aboutsummaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorRobert Richter <robert.richter@amd.com>2009-06-12 17:58:48 +0200
committerRobert Richter <robert.richter@amd.com>2009-06-12 17:58:48 +0200
commit1241eb8f136bf3ea409f61590e7663465906d158 (patch)
treec8384237f19bb1094d9c90825a74e28393b60d00 /arch
parent51563a0e5650d0d76539625388d72d62b34c726e (diff)
parent940010c5a314a7bd9b498593bc6ba1718ac5aec5 (diff)
downloadlinux-2.6.38-lt-ux500-1241eb8f136bf3ea409f61590e7663465906d158.tar.gz
Merge commit 'tip/perfcounters-for-linus' into oprofile/master
Conflicts: arch/x86/oprofile/op_model_ppro.c Signed-off-by: Robert Richter <robert.richter@amd.com>
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/kernel/sys_dp264.c8
-rw-r--r--arch/alpha/kernel/sys_titan.c4
-rw-r--r--arch/arm/common/gic.c4
-rw-r--r--arch/cris/arch-v32/kernel/irq.c4
-rw-r--r--arch/ia64/hp/sim/hpsim_irq.c3
-rw-r--r--arch/ia64/kernel/acpi.c5
-rw-r--r--arch/ia64/kernel/iosapic.c10
-rw-r--r--arch/ia64/kernel/msi_ia64.c16
-rw-r--r--arch/ia64/sn/kernel/irq.c4
-rw-r--r--arch/ia64/sn/kernel/msi_sn.c8
-rw-r--r--arch/mips/cavium-octeon/octeon-irq.c8
-rw-r--r--arch/mips/include/asm/irq.h2
-rw-r--r--arch/mips/kernel/irq-gic.c5
-rw-r--r--arch/mips/mti-malta/malta-smtc.c4
-rw-r--r--arch/mips/sibyte/bcm1480/irq.c8
-rw-r--r--arch/mips/sibyte/sb1250/irq.c8
-rw-r--r--arch/parisc/kernel/irq.c6
-rw-r--r--arch/powerpc/include/asm/hw_irq.h39
-rw-r--r--arch/powerpc/include/asm/paca.h1
-rw-r--r--arch/powerpc/include/asm/perf_counter.h98
-rw-r--r--arch/powerpc/include/asm/reg.h2
-rw-r--r--arch/powerpc/include/asm/systbl.h2
-rw-r--r--arch/powerpc/include/asm/unistd.h1
-rw-r--r--arch/powerpc/kernel/Makefile3
-rw-r--r--arch/powerpc/kernel/asm-offsets.c1
-rw-r--r--arch/powerpc/kernel/entry_64.S9
-rw-r--r--arch/powerpc/kernel/irq.c5
-rw-r--r--arch/powerpc/kernel/perf_counter.c1263
-rw-r--r--arch/powerpc/kernel/power4-pmu.c598
-rw-r--r--arch/powerpc/kernel/power5+-pmu.c671
-rw-r--r--arch/powerpc/kernel/power5-pmu.c611
-rw-r--r--arch/powerpc/kernel/power6-pmu.c532
-rw-r--r--arch/powerpc/kernel/power7-pmu.c357
-rw-r--r--arch/powerpc/kernel/ppc970-pmu.c482
-rw-r--r--arch/powerpc/mm/fault.c10
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype1
-rw-r--r--arch/powerpc/platforms/pseries/xics.c12
-rw-r--r--arch/powerpc/sysdev/mpic.c4
-rw-r--r--arch/powerpc/sysdev/mpic.h2
-rw-r--r--arch/sparc/include/asm/thread_info_64.h4
-rw-r--r--arch/sparc/kernel/irq_64.c12
-rw-r--r--arch/x86/Kbuild16
-rw-r--r--arch/x86/Kconfig54
-rw-r--r--arch/x86/Kconfig.debug20
-rw-r--r--arch/x86/Makefile19
-rw-r--r--arch/x86/boot/.gitignore2
-rw-r--r--arch/x86/boot/Makefile29
-rw-r--r--arch/x86/boot/a20.c9
-rw-r--r--arch/x86/boot/apm.c76
-rw-r--r--arch/x86/boot/bioscall.S82
-rw-r--r--arch/x86/boot/boot.h48
-rw-r--r--arch/x86/boot/compressed/.gitignore3
-rw-r--r--arch/x86/boot/compressed/Makefile54
-rw-r--r--arch/x86/boot/compressed/head_32.S194
-rw-r--r--arch/x86/boot/compressed/head_64.S169
-rw-r--r--arch/x86/boot/compressed/misc.c12
-rw-r--r--arch/x86/boot/compressed/mkpiggy.c97
-rw-r--r--arch/x86/boot/compressed/vmlinux.lds.S (renamed from arch/x86/boot/compressed/vmlinux_64.lds)29
-rw-r--r--arch/x86/boot/compressed/vmlinux.scr10
-rw-r--r--arch/x86/boot/compressed/vmlinux_32.lds43
-rw-r--r--arch/x86/boot/edd.c71
-rw-r--r--arch/x86/boot/header.S30
-rw-r--r--arch/x86/boot/main.c39
-rw-r--r--arch/x86/boot/mca.c27
-rw-r--r--arch/x86/boot/memory.c79
-rw-r--r--arch/x86/boot/regs.c29
-rw-r--r--arch/x86/boot/setup.ld6
-rw-r--r--arch/x86/boot/tty.c52
-rw-r--r--arch/x86/boot/video-bios.c27
-rw-r--r--arch/x86/boot/video-vesa.c137
-rw-r--r--arch/x86/boot/video-vga.c95
-rw-r--r--arch/x86/boot/video.c42
-rw-r--r--arch/x86/boot/video.h14
-rw-r--r--arch/x86/configs/i386_defconfig148
-rw-r--r--arch/x86/configs/x86_64_defconfig151
-rw-r--r--arch/x86/ia32/ia32entry.S4
-rw-r--r--arch/x86/include/asm/alternative.h59
-rw-r--r--arch/x86/include/asm/amd_iommu.h2
-rw-r--r--arch/x86/include/asm/amd_iommu_types.h55
-rw-r--r--arch/x86/include/asm/apic.h33
-rw-r--r--arch/x86/include/asm/apicdef.h8
-rw-r--r--arch/x86/include/asm/atomic_32.h236
-rw-r--r--arch/x86/include/asm/boot.h15
-rw-r--r--arch/x86/include/asm/bootparam.h3
-rw-r--r--arch/x86/include/asm/cpu_debug.h101
-rw-r--r--arch/x86/include/asm/cpufeature.h7
-rw-r--r--arch/x86/include/asm/ds.h82
-rw-r--r--arch/x86/include/asm/entry_arch.h2
-rw-r--r--arch/x86/include/asm/hardirq.h2
-rw-r--r--arch/x86/include/asm/hw_irq.h27
-rw-r--r--arch/x86/include/asm/i387.h43
-rw-r--r--arch/x86/include/asm/i8259.h4
-rw-r--r--arch/x86/include/asm/intel_arch_perfmon.h31
-rw-r--r--arch/x86/include/asm/io_apic.h9
-rw-r--r--arch/x86/include/asm/iomap.h5
-rw-r--r--arch/x86/include/asm/irq_remapping.h2
-rw-r--r--arch/x86/include/asm/irq_vectors.h9
-rw-r--r--arch/x86/include/asm/k8.h13
-rw-r--r--arch/x86/include/asm/microcode.h25
-rw-r--r--arch/x86/include/asm/mpspec.h15
-rw-r--r--arch/x86/include/asm/msr-index.h1
-rw-r--r--arch/x86/include/asm/nmi.h2
-rw-r--r--arch/x86/include/asm/numa_64.h10
-rw-r--r--arch/x86/include/asm/page_32_types.h4
-rw-r--r--arch/x86/include/asm/page_64_types.h22
-rw-r--r--arch/x86/include/asm/page_types.h6
-rw-r--r--arch/x86/include/asm/paravirt.h22
-rw-r--r--arch/x86/include/asm/perf_counter.h100
-rw-r--r--arch/x86/include/asm/pgtable.h4
-rw-r--r--arch/x86/include/asm/pgtable_64.h6
-rw-r--r--arch/x86/include/asm/pgtable_64_types.h8
-rw-r--r--arch/x86/include/asm/pgtable_types.h1
-rw-r--r--arch/x86/include/asm/processor.h47
-rw-r--r--arch/x86/include/asm/ptrace.h9
-rw-r--r--arch/x86/include/asm/required-features.h8
-rw-r--r--arch/x86/include/asm/setup.h1
-rw-r--r--arch/x86/include/asm/smp.h2
-rw-r--r--arch/x86/include/asm/sparsemem.h2
-rw-r--r--arch/x86/include/asm/syscalls.h45
-rw-r--r--arch/x86/include/asm/thread_info.h4
-rw-r--r--arch/x86/include/asm/tlbflush.h8
-rw-r--r--arch/x86/include/asm/topology.h3
-rw-r--r--arch/x86/include/asm/traps.h5
-rw-r--r--arch/x86/include/asm/unistd_32.h2
-rw-r--r--arch/x86/include/asm/unistd_64.h5
-rw-r--r--arch/x86/include/asm/uv/uv_bau.h2
-rw-r--r--arch/x86/include/asm/uv/uv_hub.h6
-rw-r--r--arch/x86/kernel/Makefile3
-rw-r--r--arch/x86/kernel/acpi/boot.c156
-rw-r--r--arch/x86/kernel/acpi/realmode/Makefile2
-rw-r--r--arch/x86/kernel/acpi/realmode/bioscall.S1
-rw-r--r--arch/x86/kernel/acpi/realmode/regs.c1
-rw-r--r--arch/x86/kernel/amd_iommu.c500
-rw-r--r--arch/x86/kernel/amd_iommu_init.c273
-rw-r--r--arch/x86/kernel/apic/apic.c314
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c4
-rw-r--r--arch/x86/kernel/apic/es7000_32.c2
-rw-r--r--arch/x86/kernel/apic/io_apic.c902
-rw-r--r--arch/x86/kernel/apic/nmi.c2
-rw-r--r--arch/x86/kernel/apic/probe_32.c1
-rw-r--r--arch/x86/kernel/apic/probe_64.c2
-rw-r--r--arch/x86/kernel/apic/summit_32.c7
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c2
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c20
-rw-r--r--arch/x86/kernel/asm-offsets_32.c1
-rw-r--r--arch/x86/kernel/asm-offsets_64.c1
-rw-r--r--arch/x86/kernel/cpu/Makefile12
-rw-r--r--arch/x86/kernel/cpu/amd.c12
-rw-r--r--arch/x86/kernel/cpu/common.c25
-rw-r--r--arch/x86/kernel/cpu/cpu_debug.c431
-rw-r--r--arch/x86/kernel/cpu/cpufreq/Kconfig9
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c6
-rw-r--r--arch/x86/kernel/cpu/intel.c6
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c153
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel_64.c1
-rw-r--r--arch/x86/kernel/cpu/mtrr/cleanup.c4
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c24
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.h15
-rw-r--r--arch/x86/kernel/cpu/mtrr/state.c6
-rw-r--r--arch/x86/kernel/cpu/perf_counter.c1704
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c4
-rw-r--r--arch/x86/kernel/ds.c921
-rw-r--r--arch/x86/kernel/ds_selftest.c408
-rw-r--r--arch/x86/kernel/ds_selftest.h15
-rw-r--r--arch/x86/kernel/dumpstack.h1
-rw-r--r--arch/x86/kernel/e820.c46
-rw-r--r--arch/x86/kernel/early-quirks.c2
-rw-r--r--arch/x86/kernel/entry_64.S29
-rw-r--r--arch/x86/kernel/head_32.S7
-rw-r--r--arch/x86/kernel/irq.c30
-rw-r--r--arch/x86/kernel/irqinit.c (renamed from arch/x86/kernel/irqinit_32.c)152
-rw-r--r--arch/x86/kernel/irqinit_64.c177
-rw-r--r--arch/x86/kernel/kgdb.c2
-rw-r--r--arch/x86/kernel/kvm.c2
-rw-r--r--arch/x86/kernel/microcode_amd.c70
-rw-r--r--arch/x86/kernel/microcode_core.c329
-rw-r--r--arch/x86/kernel/microcode_intel.c90
-rw-r--r--arch/x86/kernel/mpparse.c34
-rw-r--r--arch/x86/kernel/paravirt.c56
-rw-r--r--arch/x86/kernel/pci-calgary_64.c54
-rw-r--r--arch/x86/kernel/pci-gart_64.c55
-rw-r--r--arch/x86/kernel/pci-swiotlb.c2
-rw-r--r--arch/x86/kernel/process.c20
-rw-r--r--arch/x86/kernel/process_32.c20
-rw-r--r--arch/x86/kernel/process_64.c20
-rw-r--r--arch/x86/kernel/ptrace.c284
-rw-r--r--arch/x86/kernel/quirks.c37
-rw-r--r--arch/x86/kernel/reboot.c9
-rw-r--r--arch/x86/kernel/setup.c40
-rw-r--r--arch/x86/kernel/setup_percpu.c8
-rw-r--r--arch/x86/kernel/signal.c1
-rw-r--r--arch/x86/kernel/smp.c20
-rw-r--r--arch/x86/kernel/smpboot.c22
-rw-r--r--arch/x86/kernel/stacktrace.c2
-rw-r--r--arch/x86/kernel/syscall_table_32.S2
-rw-r--r--arch/x86/kernel/tlb_uv.c15
-rw-r--r--arch/x86/kernel/traps.c20
-rw-r--r--arch/x86/kernel/tsc.c19
-rw-r--r--arch/x86/kernel/tsc_sync.c14
-rw-r--r--arch/x86/kernel/vm86_32.c13
-rw-r--r--arch/x86/kernel/vmi_32.c20
-rw-r--r--arch/x86/kernel/vmlinux.lds.S430
-rw-r--r--arch/x86/kernel/vmlinux_32.lds.S229
-rw-r--r--arch/x86/kernel/vmlinux_64.lds.S298
-rw-r--r--arch/x86/kernel/vsyscall_64.c8
-rw-r--r--arch/x86/lguest/boot.c18
-rw-r--r--arch/x86/mm/dump_pagetables.c7
-rw-r--r--arch/x86/mm/fault.c69
-rw-r--r--arch/x86/mm/highmem_32.c2
-rw-r--r--arch/x86/mm/init.c78
-rw-r--r--arch/x86/mm/init_32.c61
-rw-r--r--arch/x86/mm/init_64.c47
-rw-r--r--arch/x86/mm/iomap_32.c1
-rw-r--r--arch/x86/mm/kmmio.c104
-rw-r--r--arch/x86/mm/memtest.c14
-rw-r--r--arch/x86/mm/mmio-mod.c2
-rw-r--r--arch/x86/mm/numa_64.c33
-rw-r--r--arch/x86/mm/pageattr.c14
-rw-r--r--arch/x86/mm/srat_64.c98
-rw-r--r--arch/x86/oprofile/nmi_int.c7
-rw-r--r--arch/x86/oprofile/op_model_ppro.c8
-rw-r--r--arch/x86/oprofile/op_x86_model.h2
-rw-r--r--arch/x86/pci/irq.c84
-rw-r--r--arch/x86/vdso/vdso32-setup.c6
-rw-r--r--arch/x86/vdso/vma.c8
-rw-r--r--arch/x86/xen/enlighten.c65
-rw-r--r--arch/x86/xen/mmu.c23
-rw-r--r--arch/x86/xen/setup.c6
-rw-r--r--arch/x86/xen/xen-ops.h1
230 files changed, 12590 insertions, 4676 deletions
diff --git a/arch/alpha/kernel/sys_dp264.c b/arch/alpha/kernel/sys_dp264.c
index 9c9d1fd4155..5bd5259324b 100644
--- a/arch/alpha/kernel/sys_dp264.c
+++ b/arch/alpha/kernel/sys_dp264.c
@@ -176,22 +176,26 @@ cpu_set_irq_affinity(unsigned int irq, cpumask_t affinity)
}
}
-static void
+static int
dp264_set_affinity(unsigned int irq, const struct cpumask *affinity)
{
spin_lock(&dp264_irq_lock);
cpu_set_irq_affinity(irq, *affinity);
tsunami_update_irq_hw(cached_irq_mask);
spin_unlock(&dp264_irq_lock);
+
+ return 0;
}
-static void
+static int
clipper_set_affinity(unsigned int irq, const struct cpumask *affinity)
{
spin_lock(&dp264_irq_lock);
cpu_set_irq_affinity(irq - 16, *affinity);
tsunami_update_irq_hw(cached_irq_mask);
spin_unlock(&dp264_irq_lock);
+
+ return 0;
}
static struct hw_interrupt_type dp264_irq_type = {
diff --git a/arch/alpha/kernel/sys_titan.c b/arch/alpha/kernel/sys_titan.c
index 27f840a4ad3..8dd239ebdb9 100644
--- a/arch/alpha/kernel/sys_titan.c
+++ b/arch/alpha/kernel/sys_titan.c
@@ -157,13 +157,15 @@ titan_cpu_set_irq_affinity(unsigned int irq, cpumask_t affinity)
}
-static void
+static int
titan_set_irq_affinity(unsigned int irq, const struct cpumask *affinity)
{
spin_lock(&titan_irq_lock);
titan_cpu_set_irq_affinity(irq - 16, *affinity);
titan_update_irq_hw(titan_cached_irq_mask);
spin_unlock(&titan_irq_lock);
+
+ return 0;
}
static void
diff --git a/arch/arm/common/gic.c b/arch/arm/common/gic.c
index 3e1714c6523..664c7b8b1ba 100644
--- a/arch/arm/common/gic.c
+++ b/arch/arm/common/gic.c
@@ -109,7 +109,7 @@ static void gic_unmask_irq(unsigned int irq)
}
#ifdef CONFIG_SMP
-static void gic_set_cpu(unsigned int irq, const struct cpumask *mask_val)
+static int gic_set_cpu(unsigned int irq, const struct cpumask *mask_val)
{
void __iomem *reg = gic_dist_base(irq) + GIC_DIST_TARGET + (gic_irq(irq) & ~3);
unsigned int shift = (irq % 4) * 8;
@@ -122,6 +122,8 @@ static void gic_set_cpu(unsigned int irq, const struct cpumask *mask_val)
val |= 1 << (cpu + shift);
writel(val, reg);
spin_unlock(&irq_controller_lock);
+
+ return 0;
}
#endif
diff --git a/arch/cris/arch-v32/kernel/irq.c b/arch/cris/arch-v32/kernel/irq.c
index df3925cb1c7..d70b445f4a8 100644
--- a/arch/cris/arch-v32/kernel/irq.c
+++ b/arch/cris/arch-v32/kernel/irq.c
@@ -325,12 +325,14 @@ static void end_crisv32_irq(unsigned int irq)
{
}
-void set_affinity_crisv32_irq(unsigned int irq, const struct cpumask *dest)
+int set_affinity_crisv32_irq(unsigned int irq, const struct cpumask *dest)
{
unsigned long flags;
spin_lock_irqsave(&irq_lock, flags);
irq_allocations[irq - FIRST_IRQ].mask = *dest;
spin_unlock_irqrestore(&irq_lock, flags);
+
+ return 0;
}
static struct irq_chip crisv32_irq_type = {
diff --git a/arch/ia64/hp/sim/hpsim_irq.c b/arch/ia64/hp/sim/hpsim_irq.c
index cc0a3182db3..acb5047ab57 100644
--- a/arch/ia64/hp/sim/hpsim_irq.c
+++ b/arch/ia64/hp/sim/hpsim_irq.c
@@ -21,9 +21,10 @@ hpsim_irq_noop (unsigned int irq)
{
}
-static void
+static int
hpsim_set_affinity_noop(unsigned int a, const struct cpumask *b)
{
+ return 0;
}
static struct hw_interrupt_type irq_type_hp_sim = {
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 5510317db37..baec6f00f7f 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -636,7 +636,7 @@ void __init acpi_numa_arch_fixup(void)
* success: return IRQ number (>=0)
* failure: return < 0
*/
-int acpi_register_gsi(u32 gsi, int triggering, int polarity)
+int acpi_register_gsi(struct device *dev, u32 gsi, int triggering, int polarity)
{
if (acpi_irq_model == ACPI_IRQ_MODEL_PLATFORM)
return gsi;
@@ -678,7 +678,8 @@ static int __init acpi_parse_fadt(struct acpi_table_header *table)
fadt = (struct acpi_table_fadt *)fadt_header;
- acpi_register_gsi(fadt->sci_interrupt, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW);
+ acpi_register_gsi(NULL, fadt->sci_interrupt, ACPI_LEVEL_SENSITIVE,
+ ACPI_ACTIVE_LOW);
return 0;
}
diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
index 166e0d839fa..f92cef47bf8 100644
--- a/arch/ia64/kernel/iosapic.c
+++ b/arch/ia64/kernel/iosapic.c
@@ -329,7 +329,7 @@ unmask_irq (unsigned int irq)
}
-static void
+static int
iosapic_set_affinity(unsigned int irq, const struct cpumask *mask)
{
#ifdef CONFIG_SMP
@@ -343,15 +343,15 @@ iosapic_set_affinity(unsigned int irq, const struct cpumask *mask)
cpu = cpumask_first_and(cpu_online_mask, mask);
if (cpu >= nr_cpu_ids)
- return;
+ return -1;
if (irq_prepare_move(irq, cpu))
- return;
+ return -1;
dest = cpu_physical_id(cpu);
if (!iosapic_intr_info[irq].count)
- return; /* not an IOSAPIC interrupt */
+ return -1; /* not an IOSAPIC interrupt */
set_irq_affinity_info(irq, dest, redir);
@@ -376,7 +376,9 @@ iosapic_set_affinity(unsigned int irq, const struct cpumask *mask)
iosapic_write(iosapic, IOSAPIC_RTE_HIGH(rte_index), high32);
iosapic_write(iosapic, IOSAPIC_RTE_LOW(rte_index), low32);
}
+
#endif
+ return 0;
}
/*
diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c
index 2b15e233f7f..0f8ade9331b 100644
--- a/arch/ia64/kernel/msi_ia64.c
+++ b/arch/ia64/kernel/msi_ia64.c
@@ -12,7 +12,7 @@
static struct irq_chip ia64_msi_chip;
#ifdef CONFIG_SMP
-static void ia64_set_msi_irq_affinity(unsigned int irq,
+static int ia64_set_msi_irq_affinity(unsigned int irq,
const cpumask_t *cpu_mask)
{
struct msi_msg msg;
@@ -20,10 +20,10 @@ static void ia64_set_msi_irq_affinity(unsigned int irq,
int cpu = first_cpu(*cpu_mask);
if (!cpu_online(cpu))
- return;
+ return -1;
if (irq_prepare_move(irq, cpu))
- return;
+ return -1;
read_msi_msg(irq, &msg);
@@ -39,6 +39,8 @@ static void ia64_set_msi_irq_affinity(unsigned int irq,
write_msi_msg(irq, &msg);
cpumask_copy(irq_desc[irq].affinity, cpumask_of(cpu));
+
+ return 0;
}
#endif /* CONFIG_SMP */
@@ -130,17 +132,17 @@ void arch_teardown_msi_irq(unsigned int irq)
#ifdef CONFIG_DMAR
#ifdef CONFIG_SMP
-static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
+static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
{
struct irq_cfg *cfg = irq_cfg + irq;
struct msi_msg msg;
int cpu = cpumask_first(mask);
if (!cpu_online(cpu))
- return;
+ return -1;
if (irq_prepare_move(irq, cpu))
- return;
+ return -1;
dmar_msi_read(irq, &msg);
@@ -151,6 +153,8 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
dmar_msi_write(irq, &msg);
cpumask_copy(irq_desc[irq].affinity, mask);
+
+ return 0;
}
#endif /* CONFIG_SMP */
diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c
index 66fd705e82c..764f26abac0 100644
--- a/arch/ia64/sn/kernel/irq.c
+++ b/arch/ia64/sn/kernel/irq.c
@@ -227,7 +227,7 @@ finish_up:
return new_irq_info;
}
-static void sn_set_affinity_irq(unsigned int irq, const struct cpumask *mask)
+static int sn_set_affinity_irq(unsigned int irq, const struct cpumask *mask)
{
struct sn_irq_info *sn_irq_info, *sn_irq_info_safe;
nasid_t nasid;
@@ -239,6 +239,8 @@ static void sn_set_affinity_irq(unsigned int irq, const struct cpumask *mask)
list_for_each_entry_safe(sn_irq_info, sn_irq_info_safe,
sn_irq_lh[irq], list)
(void)sn_retarget_vector(sn_irq_info, nasid, slice);
+
+ return 0;
}
#ifdef CONFIG_SMP
diff --git a/arch/ia64/sn/kernel/msi_sn.c b/arch/ia64/sn/kernel/msi_sn.c
index 81e428943d7..fbbfb970120 100644
--- a/arch/ia64/sn/kernel/msi_sn.c
+++ b/arch/ia64/sn/kernel/msi_sn.c
@@ -151,7 +151,7 @@ int sn_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *entry)
}
#ifdef CONFIG_SMP
-static void sn_set_msi_irq_affinity(unsigned int irq,
+static int sn_set_msi_irq_affinity(unsigned int irq,
const struct cpumask *cpu_mask)
{
struct msi_msg msg;
@@ -168,7 +168,7 @@ static void sn_set_msi_irq_affinity(unsigned int irq,
cpu = cpumask_first(cpu_mask);
sn_irq_info = sn_msi_info[irq].sn_irq_info;
if (sn_irq_info == NULL || sn_irq_info->irq_int_bit >= 0)
- return;
+ return -1;
/*
* Release XIO resources for the old MSI PCI address
@@ -189,7 +189,7 @@ static void sn_set_msi_irq_affinity(unsigned int irq,
new_irq_info = sn_retarget_vector(sn_irq_info, nasid, slice);
sn_msi_info[irq].sn_irq_info = new_irq_info;
if (new_irq_info == NULL)
- return;
+ return -1;
/*
* Map the xio address into bus space
@@ -206,6 +206,8 @@ static void sn_set_msi_irq_affinity(unsigned int irq,
write_msi_msg(irq, &msg);
cpumask_copy(irq_desc[irq].affinity, cpu_mask);
+
+ return 0;
}
#endif /* CONFIG_SMP */
diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c
index 1c19af8daa6..d3a0c8154be 100644
--- a/arch/mips/cavium-octeon/octeon-irq.c
+++ b/arch/mips/cavium-octeon/octeon-irq.c
@@ -177,7 +177,7 @@ static void octeon_irq_ciu0_disable(unsigned int irq)
}
#ifdef CONFIG_SMP
-static void octeon_irq_ciu0_set_affinity(unsigned int irq, const struct cpumask *dest)
+static int octeon_irq_ciu0_set_affinity(unsigned int irq, const struct cpumask *dest)
{
int cpu;
int bit = irq - OCTEON_IRQ_WORKQ0; /* Bit 0-63 of EN0 */
@@ -199,6 +199,8 @@ static void octeon_irq_ciu0_set_affinity(unsigned int irq, const struct cpumask
*/
cvmx_read_csr(CVMX_CIU_INTX_EN0(cvmx_get_core_num() * 2));
write_unlock(&octeon_irq_ciu0_rwlock);
+
+ return 0;
}
#endif
@@ -292,7 +294,7 @@ static void octeon_irq_ciu1_disable(unsigned int irq)
}
#ifdef CONFIG_SMP
-static void octeon_irq_ciu1_set_affinity(unsigned int irq, const struct cpumask *dest)
+static int octeon_irq_ciu1_set_affinity(unsigned int irq, const struct cpumask *dest)
{
int cpu;
int bit = irq - OCTEON_IRQ_WDOG0; /* Bit 0-63 of EN1 */
@@ -315,6 +317,8 @@ static void octeon_irq_ciu1_set_affinity(unsigned int irq, const struct cpumask
*/
cvmx_read_csr(CVMX_CIU_INTX_EN1(cvmx_get_core_num() * 2 + 1));
write_unlock(&octeon_irq_ciu1_rwlock);
+
+ return 0;
}
#endif
diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h
index 3214ade02d1..4f1eed107b0 100644
--- a/arch/mips/include/asm/irq.h
+++ b/arch/mips/include/asm/irq.h
@@ -49,7 +49,7 @@ static inline void smtc_im_ack_irq(unsigned int irq)
#ifdef CONFIG_MIPS_MT_SMTC_IRQAFF
#include <linux/cpumask.h>
-extern void plat_set_irq_affinity(unsigned int irq,
+extern int plat_set_irq_affinity(unsigned int irq,
const struct cpumask *affinity);
extern void smtc_forward_irq(unsigned int irq);
diff --git a/arch/mips/kernel/irq-gic.c b/arch/mips/kernel/irq-gic.c
index 87deb8f6c45..3f43c2e3aa5 100644
--- a/arch/mips/kernel/irq-gic.c
+++ b/arch/mips/kernel/irq-gic.c
@@ -155,7 +155,7 @@ static void gic_unmask_irq(unsigned int irq)
static DEFINE_SPINLOCK(gic_lock);
-static void gic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
+static int gic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
{
cpumask_t tmp = CPU_MASK_NONE;
unsigned long flags;
@@ -166,7 +166,7 @@ static void gic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
cpumask_and(&tmp, cpumask, cpu_online_mask);
if (cpus_empty(tmp))
- return;
+ return -1;
/* Assumption : cpumask refers to a single CPU */
spin_lock_irqsave(&gic_lock, flags);
@@ -190,6 +190,7 @@ static void gic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
cpumask_copy(irq_desc[irq].affinity, cpumask);
spin_unlock_irqrestore(&gic_lock, flags);
+ return 0;
}
#endif
diff --git a/arch/mips/mti-malta/malta-smtc.c b/arch/mips/mti-malta/malta-smtc.c
index 5ba31888fef..499ffe5475d 100644
--- a/arch/mips/mti-malta/malta-smtc.c
+++ b/arch/mips/mti-malta/malta-smtc.c
@@ -114,7 +114,7 @@ struct plat_smp_ops msmtc_smp_ops = {
*/
-void plat_set_irq_affinity(unsigned int irq, const struct cpumask *affinity)
+int plat_set_irq_affinity(unsigned int irq, const struct cpumask *affinity)
{
cpumask_t tmask;
int cpu = 0;
@@ -156,5 +156,7 @@ void plat_set_irq_affinity(unsigned int irq, const struct cpumask *affinity)
/* Do any generic SMTC IRQ affinity setup */
smtc_set_irq_affinity(irq, tmask);
+
+ return 0;
}
#endif /* CONFIG_MIPS_MT_SMTC_IRQAFF */
diff --git a/arch/mips/sibyte/bcm1480/irq.c b/arch/mips/sibyte/bcm1480/irq.c
index c147c4b35d3..690de06bde9 100644
--- a/arch/mips/sibyte/bcm1480/irq.c
+++ b/arch/mips/sibyte/bcm1480/irq.c
@@ -50,7 +50,7 @@ static void enable_bcm1480_irq(unsigned int irq);
static void disable_bcm1480_irq(unsigned int irq);
static void ack_bcm1480_irq(unsigned int irq);
#ifdef CONFIG_SMP
-static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask);
+static int bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask);
#endif
#ifdef CONFIG_PCI
@@ -109,7 +109,7 @@ void bcm1480_unmask_irq(int cpu, int irq)
}
#ifdef CONFIG_SMP
-static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask)
+static int bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask)
{
int i = 0, old_cpu, cpu, int_on, k;
u64 cur_ints;
@@ -118,7 +118,7 @@ static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask)
if (cpumask_weight(mask) != 1) {
printk("attempted to set irq affinity for irq %d to multiple CPUs\n", irq);
- return;
+ return -1;
}
i = cpumask_first(mask);
@@ -152,6 +152,8 @@ static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask)
}
}
spin_unlock_irqrestore(&bcm1480_imr_lock, flags);
+
+ return 0;
}
#endif
diff --git a/arch/mips/sibyte/sb1250/irq.c b/arch/mips/sibyte/sb1250/irq.c
index 38cb998ade2..409dec79886 100644
--- a/arch/mips/sibyte/sb1250/irq.c
+++ b/arch/mips/sibyte/sb1250/irq.c
@@ -50,7 +50,7 @@ static void enable_sb1250_irq(unsigned int irq);
static void disable_sb1250_irq(unsigned int irq);
static void ack_sb1250_irq(unsigned int irq);
#ifdef CONFIG_SMP
-static void sb1250_set_affinity(unsigned int irq, const struct cpumask *mask);
+static int sb1250_set_affinity(unsigned int irq, const struct cpumask *mask);
#endif
#ifdef CONFIG_SIBYTE_HAS_LDT
@@ -103,7 +103,7 @@ void sb1250_unmask_irq(int cpu, int irq)
}
#ifdef CONFIG_SMP
-static void sb1250_set_affinity(unsigned int irq, const struct cpumask *mask)
+static int sb1250_set_affinity(unsigned int irq, const struct cpumask *mask)
{
int i = 0, old_cpu, cpu, int_on;
u64 cur_ints;
@@ -113,7 +113,7 @@ static void sb1250_set_affinity(unsigned int irq, const struct cpumask *mask)
if (cpumask_weight(mask) > 1) {
printk("attempted to set irq affinity for irq %d to multiple CPUs\n", irq);
- return;
+ return -1;
}
/* Convert logical CPU to physical CPU */
@@ -143,6 +143,8 @@ static void sb1250_set_affinity(unsigned int irq, const struct cpumask *mask)
R_IMR_INTERRUPT_MASK));
}
spin_unlock_irqrestore(&sb1250_imr_lock, flags);
+
+ return 0;
}
#endif
diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c
index 4ea4229d765..8007f1e6572 100644
--- a/arch/parisc/kernel/irq.c
+++ b/arch/parisc/kernel/irq.c
@@ -130,15 +130,17 @@ int cpu_check_affinity(unsigned int irq, const struct cpumask *dest)
return cpu_dest;
}
-static void cpu_set_affinity_irq(unsigned int irq, const struct cpumask *dest)
+static int cpu_set_affinity_irq(unsigned int irq, const struct cpumask *dest)
{
int cpu_dest;
cpu_dest = cpu_check_affinity(irq, dest);
if (cpu_dest < 0)
- return;
+ return -1;
cpumask_copy(&irq_desc[irq].affinity, dest);
+
+ return 0;
}
#endif
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index b7e034b0a6d..20a44d0c9fd 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -131,5 +131,44 @@ static inline int irqs_disabled_flags(unsigned long flags)
*/
struct irq_chip;
+#ifdef CONFIG_PERF_COUNTERS
+static inline unsigned long test_perf_counter_pending(void)
+{
+ unsigned long x;
+
+ asm volatile("lbz %0,%1(13)"
+ : "=r" (x)
+ : "i" (offsetof(struct paca_struct, perf_counter_pending)));
+ return x;
+}
+
+static inline void set_perf_counter_pending(void)
+{
+ asm volatile("stb %0,%1(13)" : :
+ "r" (1),
+ "i" (offsetof(struct paca_struct, perf_counter_pending)));
+}
+
+static inline void clear_perf_counter_pending(void)
+{
+ asm volatile("stb %0,%1(13)" : :
+ "r" (0),
+ "i" (offsetof(struct paca_struct, perf_counter_pending)));
+}
+
+extern void perf_counter_do_pending(void);
+
+#else
+
+static inline unsigned long test_perf_counter_pending(void)
+{
+ return 0;
+}
+
+static inline void set_perf_counter_pending(void) {}
+static inline void clear_perf_counter_pending(void) {}
+static inline void perf_counter_do_pending(void) {}
+#endif /* CONFIG_PERF_COUNTERS */
+
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_HW_IRQ_H */
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 082b3aedf14..6ef05572301 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -99,6 +99,7 @@ struct paca_struct {
u8 soft_enabled; /* irq soft-enable flag */
u8 hard_enabled; /* set if irqs are enabled in MSR */
u8 io_sync; /* writel() needs spin_unlock sync */
+ u8 perf_counter_pending; /* PM interrupt while soft-disabled */
/* Stuff for accurate time accounting */
u64 user_time; /* accumulated usermode TB ticks */
diff --git a/arch/powerpc/include/asm/perf_counter.h b/arch/powerpc/include/asm/perf_counter.h
new file mode 100644
index 00000000000..cc7c887705b
--- /dev/null
+++ b/arch/powerpc/include/asm/perf_counter.h
@@ -0,0 +1,98 @@
+/*
+ * Performance counter support - PowerPC-specific definitions.
+ *
+ * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/types.h>
+
+#define MAX_HWCOUNTERS 8
+#define MAX_EVENT_ALTERNATIVES 8
+#define MAX_LIMITED_HWCOUNTERS 2
+
+/*
+ * This struct provides the constants and functions needed to
+ * describe the PMU on a particular POWER-family CPU.
+ */
+struct power_pmu {
+ int n_counter;
+ int max_alternatives;
+ u64 add_fields;
+ u64 test_adder;
+ int (*compute_mmcr)(u64 events[], int n_ev,
+ unsigned int hwc[], u64 mmcr[]);
+ int (*get_constraint)(u64 event, u64 *mskp, u64 *valp);
+ int (*get_alternatives)(u64 event, unsigned int flags,
+ u64 alt[]);
+ void (*disable_pmc)(unsigned int pmc, u64 mmcr[]);
+ int (*limited_pmc_event)(u64 event);
+ u32 flags;
+ int n_generic;
+ int *generic_events;
+ int (*cache_events)[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX];
+};
+
+extern struct power_pmu *ppmu;
+
+/*
+ * Values for power_pmu.flags
+ */
+#define PPMU_LIMITED_PMC5_6 1 /* PMC5/6 have limited function */
+#define PPMU_ALT_SIPR 2 /* uses alternate posn for SIPR/HV */
+
+/*
+ * Values for flags to get_alternatives()
+ */
+#define PPMU_LIMITED_PMC_OK 1 /* can put this on a limited PMC */
+#define PPMU_LIMITED_PMC_REQD 2 /* have to put this on a limited PMC */
+#define PPMU_ONLY_COUNT_RUN 4 /* only counting in run state */
+
+struct pt_regs;
+extern unsigned long perf_misc_flags(struct pt_regs *regs);
+#define perf_misc_flags(regs) perf_misc_flags(regs)
+
+extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
+
+/*
+ * The power_pmu.get_constraint function returns a 64-bit value and
+ * a 64-bit mask that express the constraints between this event and
+ * other events.
+ *
+ * The value and mask are divided up into (non-overlapping) bitfields
+ * of three different types:
+ *
+ * Select field: this expresses the constraint that some set of bits
+ * in MMCR* needs to be set to a specific value for this event. For a
+ * select field, the mask contains 1s in every bit of the field, and
+ * the value contains a unique value for each possible setting of the
+ * MMCR* bits. The constraint checking code will ensure that two events
+ * that set the same field in their masks have the same value in their
+ * value dwords.
+ *
+ * Add field: this expresses the constraint that there can be at most
+ * N events in a particular class. A field of k bits can be used for
+ * N <= 2^(k-1) - 1. The mask has the most significant bit of the field
+ * set (and the other bits 0), and the value has only the least significant
+ * bit of the field set. In addition, the 'add_fields' and 'test_adder'
+ * in the struct power_pmu for this processor come into play. The
+ * add_fields value contains 1 in the LSB of the field, and the
+ * test_adder contains 2^(k-1) - 1 - N in the field.
+ *
+ * NAND field: this expresses the constraint that you may not have events
+ * in all of a set of classes. (For example, on PPC970, you can't select
+ * events from the FPU, ISU and IDU simultaneously, although any two are
+ * possible.) For N classes, the field is N+1 bits wide, and each class
+ * is assigned one bit from the least-significant N bits. The mask has
+ * only the most-significant bit set, and the value has only the bit
+ * for the event's class set. The test_adder has the least significant
+ * bit set in the field.
+ *
+ * If an event is not subject to the constraint expressed by a particular
+ * field, then it will have 0 in both the mask and value for that field.
+ */
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index e8018d540e8..fb359b0a693 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -492,11 +492,13 @@
#define MMCR0_FCHV 0x00000001UL /* freeze conditions in hypervisor mode */
#define SPRN_MMCR1 798
#define SPRN_MMCRA 0x312
+#define MMCRA_SDSYNC 0x80000000UL /* SDAR synced with SIAR */
#define MMCRA_SIHV 0x10000000UL /* state of MSR HV when SIAR set */
#define MMCRA_SIPR 0x08000000UL /* state of MSR PR when SIAR set */
#define MMCRA_SLOT 0x07000000UL /* SLOT bits (37-39) */
#define MMCRA_SLOT_SHIFT 24
#define MMCRA_SAMPLE_ENABLE 0x00000001UL /* enable sampling */
+#define POWER6_MMCRA_SDSYNC 0x0000080000000000ULL /* SDAR/SIAR synced */
#define POWER6_MMCRA_SIHV 0x0000040000000000ULL
#define POWER6_MMCRA_SIPR 0x0000020000000000ULL
#define POWER6_MMCRA_THRM 0x00000020UL
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index d98a30dfd41..a0b92de51c7 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -322,6 +322,6 @@ SYSCALL_SPU(epoll_create1)
SYSCALL_SPU(dup3)
SYSCALL_SPU(pipe2)
SYSCALL(inotify_init1)
-SYSCALL(ni_syscall)
+SYSCALL_SPU(perf_counter_open)
COMPAT_SYS_SPU(preadv)
COMPAT_SYS_SPU(pwritev)
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index 3f06f8ec81c..4badac2d11d 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -341,6 +341,7 @@
#define __NR_dup3 316
#define __NR_pipe2 317
#define __NR_inotify_init1 318
+#define __NR_perf_counter_open 319
#define __NR_preadv 320
#define __NR_pwritev 321
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 71901fbda4a..a2c683403c2 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -94,6 +94,9 @@ obj64-$(CONFIG_AUDIT) += compat_audit.o
obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
+obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o power4-pmu.o ppc970-pmu.o \
+ power5-pmu.o power5+-pmu.o power6-pmu.o \
+ power7-pmu.o
obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 1e40bc05394..e981d1ce191 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -131,6 +131,7 @@ int main(void)
DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr));
DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled));
DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled));
+ DEFINE(PACAPERFPEND, offsetof(struct paca_struct, perf_counter_pending));
DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache));
DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr));
DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index abfc3233047..43e073477c3 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -526,6 +526,15 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES)
2:
TRACE_AND_RESTORE_IRQ(r5);
+#ifdef CONFIG_PERF_COUNTERS
+ /* check paca->perf_counter_pending if we're enabling ints */
+ lbz r3,PACAPERFPEND(r13)
+ and. r3,r3,r5
+ beq 27f
+ bl .perf_counter_do_pending
+27:
+#endif /* CONFIG_PERF_COUNTERS */
+
/* extract EE bit and use it to restore paca->hard_enabled */
ld r3,_MSR(r1)
rldicl r4,r3,49,63 /* r0 = (r3 >> 15) & 1 */
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 8c1a4966867..feff792ed0f 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -135,6 +135,11 @@ notrace void raw_local_irq_restore(unsigned long en)
iseries_handle_interrupts();
}
+ if (test_perf_counter_pending()) {
+ clear_perf_counter_pending();
+ perf_counter_do_pending();
+ }
+
/*
* if (get_paca()->hard_enabled) return;
* But again we need to take care that gcc gets hard_enabled directly
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
new file mode 100644
index 00000000000..bb202388170
--- /dev/null
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -0,0 +1,1263 @@
+/*
+ * Performance counter support - powerpc architecture code
+ *
+ * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/perf_counter.h>
+#include <linux/percpu.h>
+#include <linux/hardirq.h>
+#include <asm/reg.h>
+#include <asm/pmc.h>
+#include <asm/machdep.h>
+#include <asm/firmware.h>
+#include <asm/ptrace.h>
+
+struct cpu_hw_counters {
+ int n_counters;
+ int n_percpu;
+ int disabled;
+ int n_added;
+ int n_limited;
+ u8 pmcs_enabled;
+ struct perf_counter *counter[MAX_HWCOUNTERS];
+ u64 events[MAX_HWCOUNTERS];
+ unsigned int flags[MAX_HWCOUNTERS];
+ u64 mmcr[3];
+ struct perf_counter *limited_counter[MAX_LIMITED_HWCOUNTERS];
+ u8 limited_hwidx[MAX_LIMITED_HWCOUNTERS];
+};
+DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters);
+
+struct power_pmu *ppmu;
+
+/*
+ * Normally, to ignore kernel events we set the FCS (freeze counters
+ * in supervisor mode) bit in MMCR0, but if the kernel runs with the
+ * hypervisor bit set in the MSR, or if we are running on a processor
+ * where the hypervisor bit is forced to 1 (as on Apple G5 processors),
+ * then we need to use the FCHV bit to ignore kernel events.
+ */
+static unsigned int freeze_counters_kernel = MMCR0_FCS;
+
+static void perf_counter_interrupt(struct pt_regs *regs);
+
+void perf_counter_print_debug(void)
+{
+}
+
+/*
+ * Read one performance monitor counter (PMC).
+ */
+static unsigned long read_pmc(int idx)
+{
+ unsigned long val;
+
+ switch (idx) {
+ case 1:
+ val = mfspr(SPRN_PMC1);
+ break;
+ case 2:
+ val = mfspr(SPRN_PMC2);
+ break;
+ case 3:
+ val = mfspr(SPRN_PMC3);
+ break;
+ case 4:
+ val = mfspr(SPRN_PMC4);
+ break;
+ case 5:
+ val = mfspr(SPRN_PMC5);
+ break;
+ case 6:
+ val = mfspr(SPRN_PMC6);
+ break;
+ case 7:
+ val = mfspr(SPRN_PMC7);
+ break;
+ case 8:
+ val = mfspr(SPRN_PMC8);
+ break;
+ default:
+ printk(KERN_ERR "oops trying to read PMC%d\n", idx);
+ val = 0;
+ }
+ return val;
+}
+
+/*
+ * Write one PMC.
+ */
+static void write_pmc(int idx, unsigned long val)
+{
+ switch (idx) {
+ case 1:
+ mtspr(SPRN_PMC1, val);
+ break;
+ case 2:
+ mtspr(SPRN_PMC2, val);
+ break;
+ case 3:
+ mtspr(SPRN_PMC3, val);
+ break;
+ case 4:
+ mtspr(SPRN_PMC4, val);
+ break;
+ case 5:
+ mtspr(SPRN_PMC5, val);
+ break;
+ case 6:
+ mtspr(SPRN_PMC6, val);
+ break;
+ case 7:
+ mtspr(SPRN_PMC7, val);
+ break;
+ case 8:
+ mtspr(SPRN_PMC8, val);
+ break;
+ default:
+ printk(KERN_ERR "oops trying to write PMC%d\n", idx);
+ }
+}
+
+/*
+ * Check if a set of events can all go on the PMU at once.
+ * If they can't, this will look at alternative codes for the events
+ * and see if any combination of alternative codes is feasible.
+ * The feasible set is returned in event[].
+ */
+static int power_check_constraints(u64 event[], unsigned int cflags[],
+ int n_ev)
+{
+ u64 mask, value, nv;
+ u64 alternatives[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
+ u64 amasks[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
+ u64 avalues[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
+ u64 smasks[MAX_HWCOUNTERS], svalues[MAX_HWCOUNTERS];
+ int n_alt[MAX_HWCOUNTERS], choice[MAX_HWCOUNTERS];
+ int i, j;
+ u64 addf = ppmu->add_fields;
+ u64 tadd = ppmu->test_adder;
+
+ if (n_ev > ppmu->n_counter)
+ return -1;
+
+ /* First see if the events will go on as-is */
+ for (i = 0; i < n_ev; ++i) {
+ if ((cflags[i] & PPMU_LIMITED_PMC_REQD)
+ && !ppmu->limited_pmc_event(event[i])) {
+ ppmu->get_alternatives(event[i], cflags[i],
+ alternatives[i]);
+ event[i] = alternatives[i][0];
+ }
+ if (ppmu->get_constraint(event[i], &amasks[i][0],
+ &avalues[i][0]))
+ return -1;
+ }
+ value = mask = 0;
+ for (i = 0; i < n_ev; ++i) {
+ nv = (value | avalues[i][0]) + (value & avalues[i][0] & addf);
+ if ((((nv + tadd) ^ value) & mask) != 0 ||
+ (((nv + tadd) ^ avalues[i][0]) & amasks[i][0]) != 0)
+ break;
+ value = nv;
+ mask |= amasks[i][0];
+ }
+ if (i == n_ev)
+ return 0; /* all OK */
+
+ /* doesn't work, gather alternatives... */
+ if (!ppmu->get_alternatives)
+ return -1;
+ for (i = 0; i < n_ev; ++i) {
+ choice[i] = 0;
+ n_alt[i] = ppmu->get_alternatives(event[i], cflags[i],
+ alternatives[i]);
+ for (j = 1; j < n_alt[i]; ++j)
+ ppmu->get_constraint(alternatives[i][j],
+ &amasks[i][j], &avalues[i][j]);
+ }
+
+ /* enumerate all possibilities and see if any will work */
+ i = 0;
+ j = -1;
+ value = mask = nv = 0;
+ while (i < n_ev) {
+ if (j >= 0) {
+ /* we're backtracking, restore context */
+ value = svalues[i];
+ mask = smasks[i];
+ j = choice[i];
+ }
+ /*
+ * See if any alternative k for event i,
+ * where k > j, will satisfy the constraints.
+ */
+ while (++j < n_alt[i]) {
+ nv = (value | avalues[i][j]) +
+ (value & avalues[i][j] & addf);
+ if ((((nv + tadd) ^ value) & mask) == 0 &&
+ (((nv + tadd) ^ avalues[i][j])
+ & amasks[i][j]) == 0)
+ break;
+ }
+ if (j >= n_alt[i]) {
+ /*
+ * No feasible alternative, backtrack
+ * to event i-1 and continue enumerating its
+ * alternatives from where we got up to.
+ */
+ if (--i < 0)
+ return -1;
+ } else {
+ /*
+ * Found a feasible alternative for event i,
+ * remember where we got up to with this event,
+ * go on to the next event, and start with
+ * the first alternative for it.
+ */
+ choice[i] = j;
+ svalues[i] = value;
+ smasks[i] = mask;
+ value = nv;
+ mask |= amasks[i][j];
+ ++i;
+ j = -1;
+ }
+ }
+
+ /* OK, we have a feasible combination, tell the caller the solution */
+ for (i = 0; i < n_ev; ++i)
+ event[i] = alternatives[i][choice[i]];
+ return 0;
+}
+
+/*
+ * Check if newly-added counters have consistent settings for
+ * exclude_{user,kernel,hv} with each other and any previously
+ * added counters.
+ */
+static int check_excludes(struct perf_counter **ctrs, unsigned int cflags[],
+ int n_prev, int n_new)
+{
+ int eu = 0, ek = 0, eh = 0;
+ int i, n, first;
+ struct perf_counter *counter;
+
+ n = n_prev + n_new;
+ if (n <= 1)
+ return 0;
+
+ first = 1;
+ for (i = 0; i < n; ++i) {
+ if (cflags[i] & PPMU_LIMITED_PMC_OK) {
+ cflags[i] &= ~PPMU_LIMITED_PMC_REQD;
+ continue;
+ }
+ counter = ctrs[i];
+ if (first) {
+ eu = counter->attr.exclude_user;
+ ek = counter->attr.exclude_kernel;
+ eh = counter->attr.exclude_hv;
+ first = 0;
+ } else if (counter->attr.exclude_user != eu ||
+ counter->attr.exclude_kernel != ek ||
+ counter->attr.exclude_hv != eh) {
+ return -EAGAIN;
+ }
+ }
+
+ if (eu || ek || eh)
+ for (i = 0; i < n; ++i)
+ if (cflags[i] & PPMU_LIMITED_PMC_OK)
+ cflags[i] |= PPMU_LIMITED_PMC_REQD;
+
+ return 0;
+}
+
+static void power_pmu_read(struct perf_counter *counter)
+{
+ long val, delta, prev;
+
+ if (!counter->hw.idx)
+ return;
+ /*
+ * Performance monitor interrupts come even when interrupts
+ * are soft-disabled, as long as interrupts are hard-enabled.
+ * Therefore we treat them like NMIs.
+ */
+ do {
+ prev = atomic64_read(&counter->hw.prev_count);
+ barrier();
+ val = read_pmc(counter->hw.idx);
+ } while (atomic64_cmpxchg(&counter->hw.prev_count, prev, val) != prev);
+
+ /* The counters are only 32 bits wide */
+ delta = (val - prev) & 0xfffffffful;
+ atomic64_add(delta, &counter->count);
+ atomic64_sub(delta, &counter->hw.period_left);
+}
+
+/*
+ * On some machines, PMC5 and PMC6 can't be written, don't respect
+ * the freeze conditions, and don't generate interrupts. This tells
+ * us if `counter' is using such a PMC.
+ */
+static int is_limited_pmc(int pmcnum)
+{
+ return (ppmu->flags & PPMU_LIMITED_PMC5_6)
+ && (pmcnum == 5 || pmcnum == 6);
+}
+
+static void freeze_limited_counters(struct cpu_hw_counters *cpuhw,
+ unsigned long pmc5, unsigned long pmc6)
+{
+ struct perf_counter *counter;
+ u64 val, prev, delta;
+ int i;
+
+ for (i = 0; i < cpuhw->n_limited; ++i) {
+ counter = cpuhw->limited_counter[i];
+ if (!counter->hw.idx)
+ continue;
+ val = (counter->hw.idx == 5) ? pmc5 : pmc6;
+ prev = atomic64_read(&counter->hw.prev_count);
+ counter->hw.idx = 0;
+ delta = (val - prev) & 0xfffffffful;
+ atomic64_add(delta, &counter->count);
+ }
+}
+
+static void thaw_limited_counters(struct cpu_hw_counters *cpuhw,
+ unsigned long pmc5, unsigned long pmc6)
+{
+ struct perf_counter *counter;
+ u64 val;
+ int i;
+
+ for (i = 0; i < cpuhw->n_limited; ++i) {
+ counter = cpuhw->limited_counter[i];
+ counter->hw.idx = cpuhw->limited_hwidx[i];
+ val = (counter->hw.idx == 5) ? pmc5 : pmc6;
+ atomic64_set(&counter->hw.prev_count, val);
+ perf_counter_update_userpage(counter);
+ }
+}
+
+/*
+ * Since limited counters don't respect the freeze conditions, we
+ * have to read them immediately after freezing or unfreezing the
+ * other counters. We try to keep the values from the limited
+ * counters as consistent as possible by keeping the delay (in
+ * cycles and instructions) between freezing/unfreezing and reading
+ * the limited counters as small and consistent as possible.
+ * Therefore, if any limited counters are in use, we read them
+ * both, and always in the same order, to minimize variability,
+ * and do it inside the same asm that writes MMCR0.
+ */
+static void write_mmcr0(struct cpu_hw_counters *cpuhw, unsigned long mmcr0)
+{
+ unsigned long pmc5, pmc6;
+
+ if (!cpuhw->n_limited) {
+ mtspr(SPRN_MMCR0, mmcr0);
+ return;
+ }
+
+ /*
+ * Write MMCR0, then read PMC5 and PMC6 immediately.
+ * To ensure we don't get a performance monitor interrupt
+ * between writing MMCR0 and freezing/thawing the limited
+ * counters, we first write MMCR0 with the counter overflow
+ * interrupt enable bits turned off.
+ */
+ asm volatile("mtspr %3,%2; mfspr %0,%4; mfspr %1,%5"
+ : "=&r" (pmc5), "=&r" (pmc6)
+ : "r" (mmcr0 & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)),
+ "i" (SPRN_MMCR0),
+ "i" (SPRN_PMC5), "i" (SPRN_PMC6));
+
+ if (mmcr0 & MMCR0_FC)
+ freeze_limited_counters(cpuhw, pmc5, pmc6);
+ else
+ thaw_limited_counters(cpuhw, pmc5, pmc6);
+
+ /*
+ * Write the full MMCR0 including the counter overflow interrupt
+ * enable bits, if necessary.
+ */
+ if (mmcr0 & (MMCR0_PMC1CE | MMCR0_PMCjCE))
+ mtspr(SPRN_MMCR0, mmcr0);
+}
+
+/*
+ * Disable all counters to prevent PMU interrupts and to allow
+ * counters to be added or removed.
+ */
+void hw_perf_disable(void)
+{
+ struct cpu_hw_counters *cpuhw;
+ unsigned long ret;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ cpuhw = &__get_cpu_var(cpu_hw_counters);
+
+ ret = cpuhw->disabled;
+ if (!ret) {
+ cpuhw->disabled = 1;
+ cpuhw->n_added = 0;
+
+ /*
+ * Check if we ever enabled the PMU on this cpu.
+ */
+ if (!cpuhw->pmcs_enabled) {
+ if (ppc_md.enable_pmcs)
+ ppc_md.enable_pmcs();
+ cpuhw->pmcs_enabled = 1;
+ }
+
+ /*
+ * Disable instruction sampling if it was enabled
+ */
+ if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) {
+ mtspr(SPRN_MMCRA,
+ cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
+ mb();
+ }
+
+ /*
+ * Set the 'freeze counters' bit.
+ * The barrier is to make sure the mtspr has been
+ * executed and the PMU has frozen the counters
+ * before we return.
+ */
+ write_mmcr0(cpuhw, mfspr(SPRN_MMCR0) | MMCR0_FC);
+ mb();
+ }
+ local_irq_restore(flags);
+}
+
+/*
+ * Re-enable all counters if disable == 0.
+ * If we were previously disabled and counters were added, then
+ * put the new config on the PMU.
+ */
+void hw_perf_enable(void)
+{
+ struct perf_counter *counter;
+ struct cpu_hw_counters *cpuhw;
+ unsigned long flags;
+ long i;
+ unsigned long val;
+ s64 left;
+ unsigned int hwc_index[MAX_HWCOUNTERS];
+ int n_lim;
+ int idx;
+
+ local_irq_save(flags);
+ cpuhw = &__get_cpu_var(cpu_hw_counters);
+ if (!cpuhw->disabled) {
+ local_irq_restore(flags);
+ return;
+ }
+ cpuhw->disabled = 0;
+
+ /*
+ * If we didn't change anything, or only removed counters,
+ * no need to recalculate MMCR* settings and reset the PMCs.
+ * Just reenable the PMU with the current MMCR* settings
+ * (possibly updated for removal of counters).
+ */
+ if (!cpuhw->n_added) {
+ mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
+ mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
+ if (cpuhw->n_counters == 0)
+ get_lppaca()->pmcregs_in_use = 0;
+ goto out_enable;
+ }
+
+ /*
+ * Compute MMCR* values for the new set of counters
+ */
+ if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_counters, hwc_index,
+ cpuhw->mmcr)) {
+ /* shouldn't ever get here */
+ printk(KERN_ERR "oops compute_mmcr failed\n");
+ goto out;
+ }
+
+ /*
+ * Add in MMCR0 freeze bits corresponding to the
+ * attr.exclude_* bits for the first counter.
+ * We have already checked that all counters have the
+ * same values for these bits as the first counter.
+ */
+ counter = cpuhw->counter[0];
+ if (counter->attr.exclude_user)
+ cpuhw->mmcr[0] |= MMCR0_FCP;
+ if (counter->attr.exclude_kernel)
+ cpuhw->mmcr[0] |= freeze_counters_kernel;
+ if (counter->attr.exclude_hv)
+ cpuhw->mmcr[0] |= MMCR0_FCHV;
+
+ /*
+ * Write the new configuration to MMCR* with the freeze
+ * bit set and set the hardware counters to their initial values.
+ * Then unfreeze the counters.
+ */
+ get_lppaca()->pmcregs_in_use = 1;
+ mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
+ mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
+ mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE))
+ | MMCR0_FC);
+
+ /*
+ * Read off any pre-existing counters that need to move
+ * to another PMC.
+ */
+ for (i = 0; i < cpuhw->n_counters; ++i) {
+ counter = cpuhw->counter[i];
+ if (counter->hw.idx && counter->hw.idx != hwc_index[i] + 1) {
+ power_pmu_read(counter);
+ write_pmc(counter->hw.idx, 0);
+ counter->hw.idx = 0;
+ }
+ }
+
+ /*
+ * Initialize the PMCs for all the new and moved counters.
+ */
+ cpuhw->n_limited = n_lim = 0;
+ for (i = 0; i < cpuhw->n_counters; ++i) {
+ counter = cpuhw->counter[i];
+ if (counter->hw.idx)
+ continue;
+ idx = hwc_index[i] + 1;
+ if (is_limited_pmc(idx)) {
+ cpuhw->limited_counter[n_lim] = counter;
+ cpuhw->limited_hwidx[n_lim] = idx;
+ ++n_lim;
+ continue;
+ }
+ val = 0;
+ if (counter->hw.sample_period) {
+ left = atomic64_read(&counter->hw.period_left);
+ if (left < 0x80000000L)
+ val = 0x80000000L - left;
+ }
+ atomic64_set(&counter->hw.prev_count, val);
+ counter->hw.idx = idx;
+ write_pmc(idx, val);
+ perf_counter_update_userpage(counter);
+ }
+ cpuhw->n_limited = n_lim;
+ cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE;
+
+ out_enable:
+ mb();
+ write_mmcr0(cpuhw, cpuhw->mmcr[0]);
+
+ /*
+ * Enable instruction sampling if necessary
+ */
+ if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) {
+ mb();
+ mtspr(SPRN_MMCRA, cpuhw->mmcr[2]);
+ }
+
+ out:
+ local_irq_restore(flags);
+}
+
+static int collect_events(struct perf_counter *group, int max_count,
+ struct perf_counter *ctrs[], u64 *events,
+ unsigned int *flags)
+{
+ int n = 0;
+ struct perf_counter *counter;
+
+ if (!is_software_counter(group)) {
+ if (n >= max_count)
+ return -1;
+ ctrs[n] = group;
+ flags[n] = group->hw.counter_base;
+ events[n++] = group->hw.config;
+ }
+ list_for_each_entry(counter, &group->sibling_list, list_entry) {
+ if (!is_software_counter(counter) &&
+ counter->state != PERF_COUNTER_STATE_OFF) {
+ if (n >= max_count)
+ return -1;
+ ctrs[n] = counter;
+ flags[n] = counter->hw.counter_base;
+ events[n++] = counter->hw.config;
+ }
+ }
+ return n;
+}
+
+static void counter_sched_in(struct perf_counter *counter, int cpu)
+{
+ counter->state = PERF_COUNTER_STATE_ACTIVE;
+ counter->oncpu = cpu;
+ counter->tstamp_running += counter->ctx->time - counter->tstamp_stopped;
+ if (is_software_counter(counter))
+ counter->pmu->enable(counter);
+}
+
+/*
+ * Called to enable a whole group of counters.
+ * Returns 1 if the group was enabled, or -EAGAIN if it could not be.
+ * Assumes the caller has disabled interrupts and has
+ * frozen the PMU with hw_perf_save_disable.
+ */
+int hw_perf_group_sched_in(struct perf_counter *group_leader,
+ struct perf_cpu_context *cpuctx,
+ struct perf_counter_context *ctx, int cpu)
+{
+ struct cpu_hw_counters *cpuhw;
+ long i, n, n0;
+ struct perf_counter *sub;
+
+ cpuhw = &__get_cpu_var(cpu_hw_counters);
+ n0 = cpuhw->n_counters;
+ n = collect_events(group_leader, ppmu->n_counter - n0,
+ &cpuhw->counter[n0], &cpuhw->events[n0],
+ &cpuhw->flags[n0]);
+ if (n < 0)
+ return -EAGAIN;
+ if (check_excludes(cpuhw->counter, cpuhw->flags, n0, n))
+ return -EAGAIN;
+ i = power_check_constraints(cpuhw->events, cpuhw->flags, n + n0);
+ if (i < 0)
+ return -EAGAIN;
+ cpuhw->n_counters = n0 + n;
+ cpuhw->n_added += n;
+
+ /*
+ * OK, this group can go on; update counter states etc.,
+ * and enable any software counters
+ */
+ for (i = n0; i < n0 + n; ++i)
+ cpuhw->counter[i]->hw.config = cpuhw->events[i];
+ cpuctx->active_oncpu += n;
+ n = 1;
+ counter_sched_in(group_leader, cpu);
+ list_for_each_entry(sub, &group_leader->sibling_list, list_entry) {
+ if (sub->state != PERF_COUNTER_STATE_OFF) {
+ counter_sched_in(sub, cpu);
+ ++n;
+ }
+ }
+ ctx->nr_active += n;
+
+ return 1;
+}
+
+/*
+ * Add a counter to the PMU.
+ * If all counters are not already frozen, then we disable and
+ * re-enable the PMU in order to get hw_perf_enable to do the
+ * actual work of reconfiguring the PMU.
+ */
+static int power_pmu_enable(struct perf_counter *counter)
+{
+ struct cpu_hw_counters *cpuhw;
+ unsigned long flags;
+ int n0;
+ int ret = -EAGAIN;
+
+ local_irq_save(flags);
+ perf_disable();
+
+ /*
+ * Add the counter to the list (if there is room)
+ * and check whether the total set is still feasible.
+ */
+ cpuhw = &__get_cpu_var(cpu_hw_counters);
+ n0 = cpuhw->n_counters;
+ if (n0 >= ppmu->n_counter)
+ goto out;
+ cpuhw->counter[n0] = counter;
+ cpuhw->events[n0] = counter->hw.config;
+ cpuhw->flags[n0] = counter->hw.counter_base;
+ if (check_excludes(cpuhw->counter, cpuhw->flags, n0, 1))
+ goto out;
+ if (power_check_constraints(cpuhw->events, cpuhw->flags, n0 + 1))
+ goto out;
+
+ counter->hw.config = cpuhw->events[n0];
+ ++cpuhw->n_counters;
+ ++cpuhw->n_added;
+
+ ret = 0;
+ out:
+ perf_enable();
+ local_irq_restore(flags);
+ return ret;
+}
+
+/*
+ * Remove a counter from the PMU.
+ */
+static void power_pmu_disable(struct perf_counter *counter)
+{
+ struct cpu_hw_counters *cpuhw;
+ long i;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ perf_disable();
+
+ power_pmu_read(counter);
+
+ cpuhw = &__get_cpu_var(cpu_hw_counters);
+ for (i = 0; i < cpuhw->n_counters; ++i) {
+ if (counter == cpuhw->counter[i]) {
+ while (++i < cpuhw->n_counters)
+ cpuhw->counter[i-1] = cpuhw->counter[i];
+ --cpuhw->n_counters;
+ ppmu->disable_pmc(counter->hw.idx - 1, cpuhw->mmcr);
+ if (counter->hw.idx) {
+ write_pmc(counter->hw.idx, 0);
+ counter->hw.idx = 0;
+ }
+ perf_counter_update_userpage(counter);
+ break;
+ }
+ }
+ for (i = 0; i < cpuhw->n_limited; ++i)
+ if (counter == cpuhw->limited_counter[i])
+ break;
+ if (i < cpuhw->n_limited) {
+ while (++i < cpuhw->n_limited) {
+ cpuhw->limited_counter[i-1] = cpuhw->limited_counter[i];
+ cpuhw->limited_hwidx[i-1] = cpuhw->limited_hwidx[i];
+ }
+ --cpuhw->n_limited;
+ }
+ if (cpuhw->n_counters == 0) {
+ /* disable exceptions if no counters are running */
+ cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE);
+ }
+
+ perf_enable();
+ local_irq_restore(flags);
+}
+
+/*
+ * Re-enable interrupts on a counter after they were throttled
+ * because they were coming too fast.
+ */
+static void power_pmu_unthrottle(struct perf_counter *counter)
+{
+ s64 val, left;
+ unsigned long flags;
+
+ if (!counter->hw.idx || !counter->hw.sample_period)
+ return;
+ local_irq_save(flags);
+ perf_disable();
+ power_pmu_read(counter);
+ left = counter->hw.sample_period;
+ counter->hw.last_period = left;
+ val = 0;
+ if (left < 0x80000000L)
+ val = 0x80000000L - left;
+ write_pmc(counter->hw.idx, val);
+ atomic64_set(&counter->hw.prev_count, val);
+ atomic64_set(&counter->hw.period_left, left);
+ perf_counter_update_userpage(counter);
+ perf_enable();
+ local_irq_restore(flags);
+}
+
+struct pmu power_pmu = {
+ .enable = power_pmu_enable,
+ .disable = power_pmu_disable,
+ .read = power_pmu_read,
+ .unthrottle = power_pmu_unthrottle,
+};
+
+/*
+ * Return 1 if we might be able to put counter on a limited PMC,
+ * or 0 if not.
+ * A counter can only go on a limited PMC if it counts something
+ * that a limited PMC can count, doesn't require interrupts, and
+ * doesn't exclude any processor mode.
+ */
+static int can_go_on_limited_pmc(struct perf_counter *counter, u64 ev,
+ unsigned int flags)
+{
+ int n;
+ u64 alt[MAX_EVENT_ALTERNATIVES];
+
+ if (counter->attr.exclude_user
+ || counter->attr.exclude_kernel
+ || counter->attr.exclude_hv
+ || counter->attr.sample_period)
+ return 0;
+
+ if (ppmu->limited_pmc_event(ev))
+ return 1;
+
+ /*
+ * The requested event isn't on a limited PMC already;
+ * see if any alternative code goes on a limited PMC.
+ */
+ if (!ppmu->get_alternatives)
+ return 0;
+
+ flags |= PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD;
+ n = ppmu->get_alternatives(ev, flags, alt);
+
+ return n > 0;
+}
+
+/*
+ * Find an alternative event that goes on a normal PMC, if possible,
+ * and return the event code, or 0 if there is no such alternative.
+ * (Note: event code 0 is "don't count" on all machines.)
+ */
+static u64 normal_pmc_alternative(u64 ev, unsigned long flags)
+{
+ u64 alt[MAX_EVENT_ALTERNATIVES];
+ int n;
+
+ flags &= ~(PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD);
+ n = ppmu->get_alternatives(ev, flags, alt);
+ if (!n)
+ return 0;
+ return alt[0];
+}
+
+/* Number of perf_counters counting hardware events */
+static atomic_t num_counters;
+/* Used to avoid races in calling reserve/release_pmc_hardware */
+static DEFINE_MUTEX(pmc_reserve_mutex);
+
+/*
+ * Release the PMU if this is the last perf_counter.
+ */
+static void hw_perf_counter_destroy(struct perf_counter *counter)
+{
+ if (!atomic_add_unless(&num_counters, -1, 1)) {
+ mutex_lock(&pmc_reserve_mutex);
+ if (atomic_dec_return(&num_counters) == 0)
+ release_pmc_hardware();
+ mutex_unlock(&pmc_reserve_mutex);
+ }
+}
+
+/*
+ * Translate a generic cache event config to a raw event code.
+ */
+static int hw_perf_cache_event(u64 config, u64 *eventp)
+{
+ unsigned long type, op, result;
+ int ev;
+
+ if (!ppmu->cache_events)
+ return -EINVAL;
+
+ /* unpack config */
+ type = config & 0xff;
+ op = (config >> 8) & 0xff;
+ result = (config >> 16) & 0xff;
+
+ if (type >= PERF_COUNT_HW_CACHE_MAX ||
+ op >= PERF_COUNT_HW_CACHE_OP_MAX ||
+ result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
+ return -EINVAL;
+
+ ev = (*ppmu->cache_events)[type][op][result];
+ if (ev == 0)
+ return -EOPNOTSUPP;
+ if (ev == -1)
+ return -EINVAL;
+ *eventp = ev;
+ return 0;
+}
+
+const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
+{
+ u64 ev;
+ unsigned long flags;
+ struct perf_counter *ctrs[MAX_HWCOUNTERS];
+ u64 events[MAX_HWCOUNTERS];
+ unsigned int cflags[MAX_HWCOUNTERS];
+ int n;
+ int err;
+
+ if (!ppmu)
+ return ERR_PTR(-ENXIO);
+ switch (counter->attr.type) {
+ case PERF_TYPE_HARDWARE:
+ ev = counter->attr.config;
+ if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
+ return ERR_PTR(-EOPNOTSUPP);
+ ev = ppmu->generic_events[ev];
+ break;
+ case PERF_TYPE_HW_CACHE:
+ err = hw_perf_cache_event(counter->attr.config, &ev);
+ if (err)
+ return ERR_PTR(err);
+ break;
+ case PERF_TYPE_RAW:
+ ev = counter->attr.config;
+ break;
+ }
+ counter->hw.config_base = ev;
+ counter->hw.idx = 0;
+
+ /*
+ * If we are not running on a hypervisor, force the
+ * exclude_hv bit to 0 so that we don't care what
+ * the user set it to.
+ */
+ if (!firmware_has_feature(FW_FEATURE_LPAR))
+ counter->attr.exclude_hv = 0;
+
+ /*
+ * If this is a per-task counter, then we can use
+ * PM_RUN_* events interchangeably with their non RUN_*
+ * equivalents, e.g. PM_RUN_CYC instead of PM_CYC.
+ * XXX we should check if the task is an idle task.
+ */
+ flags = 0;
+ if (counter->ctx->task)
+ flags |= PPMU_ONLY_COUNT_RUN;
+
+ /*
+ * If this machine has limited counters, check whether this
+ * event could go on a limited counter.
+ */
+ if (ppmu->flags & PPMU_LIMITED_PMC5_6) {
+ if (can_go_on_limited_pmc(counter, ev, flags)) {
+ flags |= PPMU_LIMITED_PMC_OK;
+ } else if (ppmu->limited_pmc_event(ev)) {
+ /*
+ * The requested event is on a limited PMC,
+ * but we can't use a limited PMC; see if any
+ * alternative goes on a normal PMC.
+ */
+ ev = normal_pmc_alternative(ev, flags);
+ if (!ev)
+ return ERR_PTR(-EINVAL);
+ }
+ }
+
+ /*
+ * If this is in a group, check if it can go on with all the
+ * other hardware counters in the group. We assume the counter
+ * hasn't been linked into its leader's sibling list at this point.
+ */
+ n = 0;
+ if (counter->group_leader != counter) {
+ n = collect_events(counter->group_leader, ppmu->n_counter - 1,
+ ctrs, events, cflags);
+ if (n < 0)
+ return ERR_PTR(-EINVAL);
+ }
+ events[n] = ev;
+ ctrs[n] = counter;
+ cflags[n] = flags;
+ if (check_excludes(ctrs, cflags, n, 1))
+ return ERR_PTR(-EINVAL);
+ if (power_check_constraints(events, cflags, n + 1))
+ return ERR_PTR(-EINVAL);
+
+ counter->hw.config = events[n];
+ counter->hw.counter_base = cflags[n];
+ counter->hw.last_period = counter->hw.sample_period;
+ atomic64_set(&counter->hw.period_left, counter->hw.last_period);
+
+ /*
+ * See if we need to reserve the PMU.
+ * If no counters are currently in use, then we have to take a
+ * mutex to ensure that we don't race with another task doing
+ * reserve_pmc_hardware or release_pmc_hardware.
+ */
+ err = 0;
+ if (!atomic_inc_not_zero(&num_counters)) {
+ mutex_lock(&pmc_reserve_mutex);
+ if (atomic_read(&num_counters) == 0 &&
+ reserve_pmc_hardware(perf_counter_interrupt))
+ err = -EBUSY;
+ else
+ atomic_inc(&num_counters);
+ mutex_unlock(&pmc_reserve_mutex);
+ }
+ counter->destroy = hw_perf_counter_destroy;
+
+ if (err)
+ return ERR_PTR(err);
+ return &power_pmu;
+}
+
+/*
+ * A counter has overflowed; update its count and record
+ * things if requested. Note that interrupts are hard-disabled
+ * here so there is no possibility of being interrupted.
+ */
+static void record_and_restart(struct perf_counter *counter, long val,
+ struct pt_regs *regs, int nmi)
+{
+ u64 period = counter->hw.sample_period;
+ s64 prev, delta, left;
+ int record = 0;
+ u64 addr, mmcra, sdsync;
+
+ /* we don't have to worry about interrupts here */
+ prev = atomic64_read(&counter->hw.prev_count);
+ delta = (val - prev) & 0xfffffffful;
+ atomic64_add(delta, &counter->count);
+
+ /*
+ * See if the total period for this counter has expired,
+ * and update for the next period.
+ */
+ val = 0;
+ left = atomic64_read(&counter->hw.period_left) - delta;
+ if (period) {
+ if (left <= 0) {
+ left += period;
+ if (left <= 0)
+ left = period;
+ record = 1;
+ }
+ if (left < 0x80000000L)
+ val = 0x80000000L - left;
+ }
+
+ /*
+ * Finally record data if requested.
+ */
+ if (record) {
+ struct perf_sample_data data = {
+ .regs = regs,
+ .addr = 0,
+ .period = counter->hw.last_period,
+ };
+
+ if (counter->attr.sample_type & PERF_SAMPLE_ADDR) {
+ /*
+ * The user wants a data address recorded.
+ * If we're not doing instruction sampling,
+ * give them the SDAR (sampled data address).
+ * If we are doing instruction sampling, then only
+ * give them the SDAR if it corresponds to the
+ * instruction pointed to by SIAR; this is indicated
+ * by the [POWER6_]MMCRA_SDSYNC bit in MMCRA.
+ */
+ mmcra = regs->dsisr;
+ sdsync = (ppmu->flags & PPMU_ALT_SIPR) ?
+ POWER6_MMCRA_SDSYNC : MMCRA_SDSYNC;
+ if (!(mmcra & MMCRA_SAMPLE_ENABLE) || (mmcra & sdsync))
+ data.addr = mfspr(SPRN_SDAR);
+ }
+ if (perf_counter_overflow(counter, nmi, &data)) {
+ /*
+ * Interrupts are coming too fast - throttle them
+ * by setting the counter to 0, so it will be
+ * at least 2^30 cycles until the next interrupt
+ * (assuming each counter counts at most 2 counts
+ * per cycle).
+ */
+ val = 0;
+ left = ~0ULL >> 1;
+ }
+ }
+
+ write_pmc(counter->hw.idx, val);
+ atomic64_set(&counter->hw.prev_count, val);
+ atomic64_set(&counter->hw.period_left, left);
+ perf_counter_update_userpage(counter);
+}
+
+/*
+ * Called from generic code to get the misc flags (i.e. processor mode)
+ * for an event.
+ */
+unsigned long perf_misc_flags(struct pt_regs *regs)
+{
+ unsigned long mmcra;
+
+ if (TRAP(regs) != 0xf00) {
+ /* not a PMU interrupt */
+ return user_mode(regs) ? PERF_EVENT_MISC_USER :
+ PERF_EVENT_MISC_KERNEL;
+ }
+
+ mmcra = regs->dsisr;
+ if (ppmu->flags & PPMU_ALT_SIPR) {
+ if (mmcra & POWER6_MMCRA_SIHV)
+ return PERF_EVENT_MISC_HYPERVISOR;
+ return (mmcra & POWER6_MMCRA_SIPR) ? PERF_EVENT_MISC_USER :
+ PERF_EVENT_MISC_KERNEL;
+ }
+ if (mmcra & MMCRA_SIHV)
+ return PERF_EVENT_MISC_HYPERVISOR;
+ return (mmcra & MMCRA_SIPR) ? PERF_EVENT_MISC_USER :
+ PERF_EVENT_MISC_KERNEL;
+}
+
+/*
+ * Called from generic code to get the instruction pointer
+ * for an event.
+ */
+unsigned long perf_instruction_pointer(struct pt_regs *regs)
+{
+ unsigned long mmcra;
+ unsigned long ip;
+ unsigned long slot;
+
+ if (TRAP(regs) != 0xf00)
+ return regs->nip; /* not a PMU interrupt */
+
+ ip = mfspr(SPRN_SIAR);
+ mmcra = regs->dsisr;
+ if ((mmcra & MMCRA_SAMPLE_ENABLE) && !(ppmu->flags & PPMU_ALT_SIPR)) {
+ slot = (mmcra & MMCRA_SLOT) >> MMCRA_SLOT_SHIFT;
+ if (slot > 1)
+ ip += 4 * (slot - 1);
+ }
+ return ip;
+}
+
+/*
+ * Performance monitor interrupt stuff
+ */
+static void perf_counter_interrupt(struct pt_regs *regs)
+{
+ int i;
+ struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters);
+ struct perf_counter *counter;
+ long val;
+ int found = 0;
+ int nmi;
+
+ if (cpuhw->n_limited)
+ freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5),
+ mfspr(SPRN_PMC6));
+
+ /*
+ * Overload regs->dsisr to store MMCRA so we only need to read it once.
+ */
+ regs->dsisr = mfspr(SPRN_MMCRA);
+
+ /*
+ * If interrupts were soft-disabled when this PMU interrupt
+ * occurred, treat it as an NMI.
+ */
+ nmi = !regs->softe;
+ if (nmi)
+ nmi_enter();
+ else
+ irq_enter();
+
+ for (i = 0; i < cpuhw->n_counters; ++i) {
+ counter = cpuhw->counter[i];
+ if (!counter->hw.idx || is_limited_pmc(counter->hw.idx))
+ continue;
+ val = read_pmc(counter->hw.idx);
+ if ((int)val < 0) {
+ /* counter has overflowed */
+ found = 1;
+ record_and_restart(counter, val, regs, nmi);
+ }
+ }
+
+ /*
+ * In case we didn't find and reset the counter that caused
+ * the interrupt, scan all counters and reset any that are
+ * negative, to avoid getting continual interrupts.
+ * Any that we processed in the previous loop will not be negative.
+ */
+ if (!found) {
+ for (i = 0; i < ppmu->n_counter; ++i) {
+ if (is_limited_pmc(i + 1))
+ continue;
+ val = read_pmc(i + 1);
+ if ((int)val < 0)
+ write_pmc(i + 1, 0);
+ }
+ }
+
+ /*
+ * Reset MMCR0 to its normal value. This will set PMXE and
+ * clear FC (freeze counters) and PMAO (perf mon alert occurred)
+ * and thus allow interrupts to occur again.
+ * XXX might want to use MSR.PM to keep the counters frozen until
+ * we get back out of this interrupt.
+ */
+ write_mmcr0(cpuhw, cpuhw->mmcr[0]);
+
+ if (nmi)
+ nmi_exit();
+ else
+ irq_exit();
+}
+
+void hw_perf_counter_setup(int cpu)
+{
+ struct cpu_hw_counters *cpuhw = &per_cpu(cpu_hw_counters, cpu);
+
+ memset(cpuhw, 0, sizeof(*cpuhw));
+ cpuhw->mmcr[0] = MMCR0_FC;
+}
+
+extern struct power_pmu power4_pmu;
+extern struct power_pmu ppc970_pmu;
+extern struct power_pmu power5_pmu;
+extern struct power_pmu power5p_pmu;
+extern struct power_pmu power6_pmu;
+extern struct power_pmu power7_pmu;
+
+static int init_perf_counters(void)
+{
+ unsigned long pvr;
+
+ /* XXX should get this from cputable */
+ pvr = mfspr(SPRN_PVR);
+ switch (PVR_VER(pvr)) {
+ case PV_POWER4:
+ case PV_POWER4p:
+ ppmu = &power4_pmu;
+ break;
+ case PV_970:
+ case PV_970FX:
+ case PV_970MP:
+ ppmu = &ppc970_pmu;
+ break;
+ case PV_POWER5:
+ ppmu = &power5_pmu;
+ break;
+ case PV_POWER5p:
+ ppmu = &power5p_pmu;
+ break;
+ case 0x3e:
+ ppmu = &power6_pmu;
+ break;
+ case 0x3f:
+ ppmu = &power7_pmu;
+ break;
+ }
+
+ /*
+ * Use FCHV to ignore kernel events if MSR.HV is set.
+ */
+ if (mfmsr() & MSR_HV)
+ freeze_counters_kernel = MMCR0_FCHV;
+
+ return 0;
+}
+
+arch_initcall(init_perf_counters);
diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c
new file mode 100644
index 00000000000..07bd308a5fa
--- /dev/null
+++ b/arch/powerpc/kernel/power4-pmu.c
@@ -0,0 +1,598 @@
+/*
+ * Performance counter support for POWER4 (GP) and POWER4+ (GQ) processors.
+ *
+ * Copyright 2009 Paul Mackerras, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/perf_counter.h>
+#include <asm/reg.h>
+
+/*
+ * Bits in event code for POWER4
+ */
+#define PM_PMC_SH 12 /* PMC number (1-based) for direct events */
+#define PM_PMC_MSK 0xf
+#define PM_UNIT_SH 8 /* TTMMUX number and setting - unit select */
+#define PM_UNIT_MSK 0xf
+#define PM_LOWER_SH 6
+#define PM_LOWER_MSK 1
+#define PM_LOWER_MSKS 0x40
+#define PM_BYTE_SH 4 /* Byte number of event bus to use */
+#define PM_BYTE_MSK 3
+#define PM_PMCSEL_MSK 7
+
+/*
+ * Unit code values
+ */
+#define PM_FPU 1
+#define PM_ISU1 2
+#define PM_IFU 3
+#define PM_IDU0 4
+#define PM_ISU1_ALT 6
+#define PM_ISU2 7
+#define PM_IFU_ALT 8
+#define PM_LSU0 9
+#define PM_LSU1 0xc
+#define PM_GPS 0xf
+
+/*
+ * Bits in MMCR0 for POWER4
+ */
+#define MMCR0_PMC1SEL_SH 8
+#define MMCR0_PMC2SEL_SH 1
+#define MMCR_PMCSEL_MSK 0x1f
+
+/*
+ * Bits in MMCR1 for POWER4
+ */
+#define MMCR1_TTM0SEL_SH 62
+#define MMCR1_TTC0SEL_SH 61
+#define MMCR1_TTM1SEL_SH 59
+#define MMCR1_TTC1SEL_SH 58
+#define MMCR1_TTM2SEL_SH 56
+#define MMCR1_TTC2SEL_SH 55
+#define MMCR1_TTM3SEL_SH 53
+#define MMCR1_TTC3SEL_SH 52
+#define MMCR1_TTMSEL_MSK 3
+#define MMCR1_TD_CP_DBG0SEL_SH 50
+#define MMCR1_TD_CP_DBG1SEL_SH 48
+#define MMCR1_TD_CP_DBG2SEL_SH 46
+#define MMCR1_TD_CP_DBG3SEL_SH 44
+#define MMCR1_DEBUG0SEL_SH 43
+#define MMCR1_DEBUG1SEL_SH 42
+#define MMCR1_DEBUG2SEL_SH 41
+#define MMCR1_DEBUG3SEL_SH 40
+#define MMCR1_PMC1_ADDER_SEL_SH 39
+#define MMCR1_PMC2_ADDER_SEL_SH 38
+#define MMCR1_PMC6_ADDER_SEL_SH 37
+#define MMCR1_PMC5_ADDER_SEL_SH 36
+#define MMCR1_PMC8_ADDER_SEL_SH 35
+#define MMCR1_PMC7_ADDER_SEL_SH 34
+#define MMCR1_PMC3_ADDER_SEL_SH 33
+#define MMCR1_PMC4_ADDER_SEL_SH 32
+#define MMCR1_PMC3SEL_SH 27
+#define MMCR1_PMC4SEL_SH 22
+#define MMCR1_PMC5SEL_SH 17
+#define MMCR1_PMC6SEL_SH 12
+#define MMCR1_PMC7SEL_SH 7
+#define MMCR1_PMC8SEL_SH 2 /* note bit 0 is in MMCRA for GP */
+
+static short mmcr1_adder_bits[8] = {
+ MMCR1_PMC1_ADDER_SEL_SH,
+ MMCR1_PMC2_ADDER_SEL_SH,
+ MMCR1_PMC3_ADDER_SEL_SH,
+ MMCR1_PMC4_ADDER_SEL_SH,
+ MMCR1_PMC5_ADDER_SEL_SH,
+ MMCR1_PMC6_ADDER_SEL_SH,
+ MMCR1_PMC7_ADDER_SEL_SH,
+ MMCR1_PMC8_ADDER_SEL_SH
+};
+
+/*
+ * Bits in MMCRA
+ */
+#define MMCRA_PMC8SEL0_SH 17 /* PMC8SEL bit 0 for GP */
+
+/*
+ * Layout of constraint bits:
+ * 6666555555555544444444443333333333222222222211111111110000000000
+ * 3210987654321098765432109876543210987654321098765432109876543210
+ * |[ >[ >[ >|||[ >[ >< >< >< >< ><><><><><><><><>
+ * | UC1 UC2 UC3 ||| PS1 PS2 B0 B1 B2 B3 P1P2P3P4P5P6P7P8
+ * \SMPL ||\TTC3SEL
+ * |\TTC_IFU_SEL
+ * \TTM2SEL0
+ *
+ * SMPL - SAMPLE_ENABLE constraint
+ * 56: SAMPLE_ENABLE value 0x0100_0000_0000_0000
+ *
+ * UC1 - unit constraint 1: can't have all three of FPU/ISU1/IDU0|ISU2
+ * 55: UC1 error 0x0080_0000_0000_0000
+ * 54: FPU events needed 0x0040_0000_0000_0000
+ * 53: ISU1 events needed 0x0020_0000_0000_0000
+ * 52: IDU0|ISU2 events needed 0x0010_0000_0000_0000
+ *
+ * UC2 - unit constraint 2: can't have all three of FPU/IFU/LSU0
+ * 51: UC2 error 0x0008_0000_0000_0000
+ * 50: FPU events needed 0x0004_0000_0000_0000
+ * 49: IFU events needed 0x0002_0000_0000_0000
+ * 48: LSU0 events needed 0x0001_0000_0000_0000
+ *
+ * UC3 - unit constraint 3: can't have all four of LSU0/IFU/IDU0|ISU2/ISU1
+ * 47: UC3 error 0x8000_0000_0000
+ * 46: LSU0 events needed 0x4000_0000_0000
+ * 45: IFU events needed 0x2000_0000_0000
+ * 44: IDU0|ISU2 events needed 0x1000_0000_0000
+ * 43: ISU1 events needed 0x0800_0000_0000
+ *
+ * TTM2SEL0
+ * 42: 0 = IDU0 events needed
+ * 1 = ISU2 events needed 0x0400_0000_0000
+ *
+ * TTC_IFU_SEL
+ * 41: 0 = IFU.U events needed
+ * 1 = IFU.L events needed 0x0200_0000_0000
+ *
+ * TTC3SEL
+ * 40: 0 = LSU1.U events needed
+ * 1 = LSU1.L events needed 0x0100_0000_0000
+ *
+ * PS1
+ * 39: PS1 error 0x0080_0000_0000
+ * 36-38: count of events needing PMC1/2/5/6 0x0070_0000_0000
+ *
+ * PS2
+ * 35: PS2 error 0x0008_0000_0000
+ * 32-34: count of events needing PMC3/4/7/8 0x0007_0000_0000
+ *
+ * B0
+ * 28-31: Byte 0 event source 0xf000_0000
+ * 1 = FPU
+ * 2 = ISU1
+ * 3 = IFU
+ * 4 = IDU0
+ * 7 = ISU2
+ * 9 = LSU0
+ * c = LSU1
+ * f = GPS
+ *
+ * B1, B2, B3
+ * 24-27, 20-23, 16-19: Byte 1, 2, 3 event sources
+ *
+ * P8
+ * 15: P8 error 0x8000
+ * 14-15: Count of events needing PMC8
+ *
+ * P1..P7
+ * 0-13: Count of events needing PMC1..PMC7
+ *
+ * Note: this doesn't allow events using IFU.U to be combined with events
+ * using IFU.L, though that is feasible (using TTM0 and TTM2). However
+ * there are no listed events for IFU.L (they are debug events not
+ * verified for performance monitoring) so this shouldn't cause a
+ * problem.
+ */
+
+static struct unitinfo {
+ u64 value, mask;
+ int unit;
+ int lowerbit;
+} p4_unitinfo[16] = {
+ [PM_FPU] = { 0x44000000000000ull, 0x88000000000000ull, PM_FPU, 0 },
+ [PM_ISU1] = { 0x20080000000000ull, 0x88000000000000ull, PM_ISU1, 0 },
+ [PM_ISU1_ALT] =
+ { 0x20080000000000ull, 0x88000000000000ull, PM_ISU1, 0 },
+ [PM_IFU] = { 0x02200000000000ull, 0x08820000000000ull, PM_IFU, 41 },
+ [PM_IFU_ALT] =
+ { 0x02200000000000ull, 0x08820000000000ull, PM_IFU, 41 },
+ [PM_IDU0] = { 0x10100000000000ull, 0x80840000000000ull, PM_IDU0, 1 },
+ [PM_ISU2] = { 0x10140000000000ull, 0x80840000000000ull, PM_ISU2, 0 },
+ [PM_LSU0] = { 0x01400000000000ull, 0x08800000000000ull, PM_LSU0, 0 },
+ [PM_LSU1] = { 0x00000000000000ull, 0x00010000000000ull, PM_LSU1, 40 },
+ [PM_GPS] = { 0x00000000000000ull, 0x00000000000000ull, PM_GPS, 0 }
+};
+
+static unsigned char direct_marked_event[8] = {
+ (1<<2) | (1<<3), /* PMC1: PM_MRK_GRP_DISP, PM_MRK_ST_CMPL */
+ (1<<3) | (1<<5), /* PMC2: PM_THRESH_TIMEO, PM_MRK_BRU_FIN */
+ (1<<3), /* PMC3: PM_MRK_ST_CMPL_INT */
+ (1<<4) | (1<<5), /* PMC4: PM_MRK_GRP_CMPL, PM_MRK_CRU_FIN */
+ (1<<4) | (1<<5), /* PMC5: PM_MRK_GRP_TIMEO */
+ (1<<3) | (1<<4) | (1<<5),
+ /* PMC6: PM_MRK_ST_GPS, PM_MRK_FXU_FIN, PM_MRK_GRP_ISSUED */
+ (1<<4) | (1<<5), /* PMC7: PM_MRK_FPU_FIN, PM_MRK_INST_FIN */
+ (1<<4), /* PMC8: PM_MRK_LSU_FIN */
+};
+
+/*
+ * Returns 1 if event counts things relating to marked instructions
+ * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
+ */
+static int p4_marked_instr_event(u64 event)
+{
+ int pmc, psel, unit, byte, bit;
+ unsigned int mask;
+
+ pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+ psel = event & PM_PMCSEL_MSK;
+ if (pmc) {
+ if (direct_marked_event[pmc - 1] & (1 << psel))
+ return 1;
+ if (psel == 0) /* add events */
+ bit = (pmc <= 4)? pmc - 1: 8 - pmc;
+ else if (psel == 6) /* decode events */
+ bit = 4;
+ else
+ return 0;
+ } else
+ bit = psel;
+
+ byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
+ unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
+ mask = 0;
+ switch (unit) {
+ case PM_LSU1:
+ if (event & PM_LOWER_MSKS)
+ mask = 1 << 28; /* byte 7 bit 4 */
+ else
+ mask = 6 << 24; /* byte 3 bits 1 and 2 */
+ break;
+ case PM_LSU0:
+ /* byte 3, bit 3; byte 2 bits 0,2,3,4,5; byte 1 */
+ mask = 0x083dff00;
+ }
+ return (mask >> (byte * 8 + bit)) & 1;
+}
+
+static int p4_get_constraint(u64 event, u64 *maskp, u64 *valp)
+{
+ int pmc, byte, unit, lower, sh;
+ u64 mask = 0, value = 0;
+ int grp = -1;
+
+ pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+ if (pmc) {
+ if (pmc > 8)
+ return -1;
+ sh = (pmc - 1) * 2;
+ mask |= 2 << sh;
+ value |= 1 << sh;
+ grp = ((pmc - 1) >> 1) & 1;
+ }
+ unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
+ byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
+ if (unit) {
+ lower = (event >> PM_LOWER_SH) & PM_LOWER_MSK;
+
+ /*
+ * Bus events on bytes 0 and 2 can be counted
+ * on PMC1/2/5/6; bytes 1 and 3 on PMC3/4/7/8.
+ */
+ if (!pmc)
+ grp = byte & 1;
+
+ if (!p4_unitinfo[unit].unit)
+ return -1;
+ mask |= p4_unitinfo[unit].mask;
+ value |= p4_unitinfo[unit].value;
+ sh = p4_unitinfo[unit].lowerbit;
+ if (sh > 1)
+ value |= (u64)lower << sh;
+ else if (lower != sh)
+ return -1;
+ unit = p4_unitinfo[unit].unit;
+
+ /* Set byte lane select field */
+ mask |= 0xfULL << (28 - 4 * byte);
+ value |= (u64)unit << (28 - 4 * byte);
+ }
+ if (grp == 0) {
+ /* increment PMC1/2/5/6 field */
+ mask |= 0x8000000000ull;
+ value |= 0x1000000000ull;
+ } else {
+ /* increment PMC3/4/7/8 field */
+ mask |= 0x800000000ull;
+ value |= 0x100000000ull;
+ }
+
+ /* Marked instruction events need sample_enable set */
+ if (p4_marked_instr_event(event)) {
+ mask |= 1ull << 56;
+ value |= 1ull << 56;
+ }
+
+ /* PMCSEL=6 decode events on byte 2 need sample_enable clear */
+ if (pmc && (event & PM_PMCSEL_MSK) == 6 && byte == 2)
+ mask |= 1ull << 56;
+
+ *maskp = mask;
+ *valp = value;
+ return 0;
+}
+
+static unsigned int ppc_inst_cmpl[] = {
+ 0x1001, 0x4001, 0x6001, 0x7001, 0x8001
+};
+
+static int p4_get_alternatives(u64 event, unsigned int flags, u64 alt[])
+{
+ int i, j, na;
+
+ alt[0] = event;
+ na = 1;
+
+ /* 2 possibilities for PM_GRP_DISP_REJECT */
+ if (event == 0x8003 || event == 0x0224) {
+ alt[1] = event ^ (0x8003 ^ 0x0224);
+ return 2;
+ }
+
+ /* 2 possibilities for PM_ST_MISS_L1 */
+ if (event == 0x0c13 || event == 0x0c23) {
+ alt[1] = event ^ (0x0c13 ^ 0x0c23);
+ return 2;
+ }
+
+ /* several possibilities for PM_INST_CMPL */
+ for (i = 0; i < ARRAY_SIZE(ppc_inst_cmpl); ++i) {
+ if (event == ppc_inst_cmpl[i]) {
+ for (j = 0; j < ARRAY_SIZE(ppc_inst_cmpl); ++j)
+ if (j != i)
+ alt[na++] = ppc_inst_cmpl[j];
+ break;
+ }
+ }
+
+ return na;
+}
+
+static int p4_compute_mmcr(u64 event[], int n_ev,
+ unsigned int hwc[], u64 mmcr[])
+{
+ u64 mmcr0 = 0, mmcr1 = 0, mmcra = 0;
+ unsigned int pmc, unit, byte, psel, lower;
+ unsigned int ttm, grp;
+ unsigned int pmc_inuse = 0;
+ unsigned int pmc_grp_use[2];
+ unsigned char busbyte[4];
+ unsigned char unituse[16];
+ unsigned int unitlower = 0;
+ int i;
+
+ if (n_ev > 8)
+ return -1;
+
+ /* First pass to count resource use */
+ pmc_grp_use[0] = pmc_grp_use[1] = 0;
+ memset(busbyte, 0, sizeof(busbyte));
+ memset(unituse, 0, sizeof(unituse));
+ for (i = 0; i < n_ev; ++i) {
+ pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
+ if (pmc) {
+ if (pmc_inuse & (1 << (pmc - 1)))
+ return -1;
+ pmc_inuse |= 1 << (pmc - 1);
+ /* count 1/2/5/6 vs 3/4/7/8 use */
+ ++pmc_grp_use[((pmc - 1) >> 1) & 1];
+ }
+ unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
+ byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
+ lower = (event[i] >> PM_LOWER_SH) & PM_LOWER_MSK;
+ if (unit) {
+ if (!pmc)
+ ++pmc_grp_use[byte & 1];
+ if (unit == 6 || unit == 8)
+ /* map alt ISU1/IFU codes: 6->2, 8->3 */
+ unit = (unit >> 1) - 1;
+ if (busbyte[byte] && busbyte[byte] != unit)
+ return -1;
+ busbyte[byte] = unit;
+ lower <<= unit;
+ if (unituse[unit] && lower != (unitlower & lower))
+ return -1;
+ unituse[unit] = 1;
+ unitlower |= lower;
+ }
+ }
+ if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4)
+ return -1;
+
+ /*
+ * Assign resources and set multiplexer selects.
+ *
+ * Units 1,2,3 are on TTM0, 4,6,7 on TTM1, 8,10 on TTM2.
+ * Each TTMx can only select one unit, but since
+ * units 2 and 6 are both ISU1, and 3 and 8 are both IFU,
+ * we have some choices.
+ */
+ if (unituse[2] & (unituse[1] | (unituse[3] & unituse[9]))) {
+ unituse[6] = 1; /* Move 2 to 6 */
+ unituse[2] = 0;
+ }
+ if (unituse[3] & (unituse[1] | unituse[2])) {
+ unituse[8] = 1; /* Move 3 to 8 */
+ unituse[3] = 0;
+ unitlower = (unitlower & ~8) | ((unitlower & 8) << 5);
+ }
+ /* Check only one unit per TTMx */
+ if (unituse[1] + unituse[2] + unituse[3] > 1 ||
+ unituse[4] + unituse[6] + unituse[7] > 1 ||
+ unituse[8] + unituse[9] > 1 ||
+ (unituse[5] | unituse[10] | unituse[11] |
+ unituse[13] | unituse[14]))
+ return -1;
+
+ /* Set TTMxSEL fields. Note, units 1-3 => TTM0SEL codes 0-2 */
+ mmcr1 |= (u64)(unituse[3] * 2 + unituse[2]) << MMCR1_TTM0SEL_SH;
+ mmcr1 |= (u64)(unituse[7] * 3 + unituse[6] * 2) << MMCR1_TTM1SEL_SH;
+ mmcr1 |= (u64)unituse[9] << MMCR1_TTM2SEL_SH;
+
+ /* Set TTCxSEL fields. */
+ if (unitlower & 0xe)
+ mmcr1 |= 1ull << MMCR1_TTC0SEL_SH;
+ if (unitlower & 0xf0)
+ mmcr1 |= 1ull << MMCR1_TTC1SEL_SH;
+ if (unitlower & 0xf00)
+ mmcr1 |= 1ull << MMCR1_TTC2SEL_SH;
+ if (unitlower & 0x7000)
+ mmcr1 |= 1ull << MMCR1_TTC3SEL_SH;
+
+ /* Set byte lane select fields. */
+ for (byte = 0; byte < 4; ++byte) {
+ unit = busbyte[byte];
+ if (!unit)
+ continue;
+ if (unit == 0xf) {
+ /* special case for GPS */
+ mmcr1 |= 1ull << (MMCR1_DEBUG0SEL_SH - byte);
+ } else {
+ if (!unituse[unit])
+ ttm = unit - 1; /* 2->1, 3->2 */
+ else
+ ttm = unit >> 2;
+ mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2*byte);
+ }
+ }
+
+ /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
+ for (i = 0; i < n_ev; ++i) {
+ pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
+ unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
+ byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
+ psel = event[i] & PM_PMCSEL_MSK;
+ if (!pmc) {
+ /* Bus event or 00xxx direct event (off or cycles) */
+ if (unit)
+ psel |= 0x10 | ((byte & 2) << 2);
+ for (pmc = 0; pmc < 8; ++pmc) {
+ if (pmc_inuse & (1 << pmc))
+ continue;
+ grp = (pmc >> 1) & 1;
+ if (unit) {
+ if (grp == (byte & 1))
+ break;
+ } else if (pmc_grp_use[grp] < 4) {
+ ++pmc_grp_use[grp];
+ break;
+ }
+ }
+ pmc_inuse |= 1 << pmc;
+ } else {
+ /* Direct event */
+ --pmc;
+ if (psel == 0 && (byte & 2))
+ /* add events on higher-numbered bus */
+ mmcr1 |= 1ull << mmcr1_adder_bits[pmc];
+ else if (psel == 6 && byte == 3)
+ /* seem to need to set sample_enable here */
+ mmcra |= MMCRA_SAMPLE_ENABLE;
+ psel |= 8;
+ }
+ if (pmc <= 1)
+ mmcr0 |= psel << (MMCR0_PMC1SEL_SH - 7 * pmc);
+ else
+ mmcr1 |= psel << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2));
+ if (pmc == 7) /* PMC8 */
+ mmcra |= (psel & 1) << MMCRA_PMC8SEL0_SH;
+ hwc[i] = pmc;
+ if (p4_marked_instr_event(event[i]))
+ mmcra |= MMCRA_SAMPLE_ENABLE;
+ }
+
+ if (pmc_inuse & 1)
+ mmcr0 |= MMCR0_PMC1CE;
+ if (pmc_inuse & 0xfe)
+ mmcr0 |= MMCR0_PMCjCE;
+
+ mmcra |= 0x2000; /* mark only one IOP per PPC instruction */
+
+ /* Return MMCRx values */
+ mmcr[0] = mmcr0;
+ mmcr[1] = mmcr1;
+ mmcr[2] = mmcra;
+ return 0;
+}
+
+static void p4_disable_pmc(unsigned int pmc, u64 mmcr[])
+{
+ /*
+ * Setting the PMCxSEL field to 0 disables PMC x.
+ * (Note that pmc is 0-based here, not 1-based.)
+ */
+ if (pmc <= 1) {
+ mmcr[0] &= ~(0x1fUL << (MMCR0_PMC1SEL_SH - 7 * pmc));
+ } else {
+ mmcr[1] &= ~(0x1fUL << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2)));
+ if (pmc == 7)
+ mmcr[2] &= ~(1UL << MMCRA_PMC8SEL0_SH);
+ }
+}
+
+static int p4_generic_events[] = {
+ [PERF_COUNT_HW_CPU_CYCLES] = 7,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x1001,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0x8c10, /* PM_LD_REF_L1 */
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x3c10, /* PM_LD_MISS_L1 */
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x330, /* PM_BR_ISSUED */
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x331, /* PM_BR_MPRED_CR */
+};
+
+#define C(x) PERF_COUNT_HW_CACHE_##x
+
+/*
+ * Table of generalized cache-related events.
+ * 0 means not supported, -1 means nonsensical, other values
+ * are event codes.
+ */
+static int power4_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+ [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
+ [C(OP_READ)] = { 0x8c10, 0x3c10 },
+ [C(OP_WRITE)] = { 0x7c10, 0xc13 },
+ [C(OP_PREFETCH)] = { 0xc35, 0 },
+ },
+ [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */
+ [C(OP_READ)] = { 0, 0 },
+ [C(OP_WRITE)] = { -1, -1 },
+ [C(OP_PREFETCH)] = { 0, 0 },
+ },
+ [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
+ [C(OP_READ)] = { 0, 0 },
+ [C(OP_WRITE)] = { 0, 0 },
+ [C(OP_PREFETCH)] = { 0xc34, 0 },
+ },
+ [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
+ [C(OP_READ)] = { 0, 0x904 },
+ [C(OP_WRITE)] = { -1, -1 },
+ [C(OP_PREFETCH)] = { -1, -1 },
+ },
+ [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */
+ [C(OP_READ)] = { 0, 0x900 },
+ [C(OP_WRITE)] = { -1, -1 },
+ [C(OP_PREFETCH)] = { -1, -1 },
+ },
+ [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */
+ [C(OP_READ)] = { 0x330, 0x331 },
+ [C(OP_WRITE)] = { -1, -1 },
+ [C(OP_PREFETCH)] = { -1, -1 },
+ },
+};
+
+struct power_pmu power4_pmu = {
+ .n_counter = 8,
+ .max_alternatives = 5,
+ .add_fields = 0x0000001100005555ull,
+ .test_adder = 0x0011083300000000ull,
+ .compute_mmcr = p4_compute_mmcr,
+ .get_constraint = p4_get_constraint,
+ .get_alternatives = p4_get_alternatives,
+ .disable_pmc = p4_disable_pmc,
+ .n_generic = ARRAY_SIZE(p4_generic_events),
+ .generic_events = p4_generic_events,
+ .cache_events = &power4_cache_events,
+};
diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c
new file mode 100644
index 00000000000..41e5d2d958d
--- /dev/null
+++ b/arch/powerpc/kernel/power5+-pmu.c
@@ -0,0 +1,671 @@
+/*
+ * Performance counter support for POWER5+/++ (not POWER5) processors.
+ *
+ * Copyright 2009 Paul Mackerras, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/perf_counter.h>
+#include <asm/reg.h>
+
+/*
+ * Bits in event code for POWER5+ (POWER5 GS) and POWER5++ (POWER5 GS DD3)
+ */
+#define PM_PMC_SH 20 /* PMC number (1-based) for direct events */
+#define PM_PMC_MSK 0xf
+#define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH)
+#define PM_UNIT_SH 16 /* TTMMUX number and setting - unit select */
+#define PM_UNIT_MSK 0xf
+#define PM_BYTE_SH 12 /* Byte number of event bus to use */
+#define PM_BYTE_MSK 7
+#define PM_GRS_SH 8 /* Storage subsystem mux select */
+#define PM_GRS_MSK 7
+#define PM_BUSEVENT_MSK 0x80 /* Set if event uses event bus */
+#define PM_PMCSEL_MSK 0x7f
+
+/* Values in PM_UNIT field */
+#define PM_FPU 0
+#define PM_ISU0 1
+#define PM_IFU 2
+#define PM_ISU1 3
+#define PM_IDU 4
+#define PM_ISU0_ALT 6
+#define PM_GRS 7
+#define PM_LSU0 8
+#define PM_LSU1 0xc
+#define PM_LASTUNIT 0xc
+
+/*
+ * Bits in MMCR1 for POWER5+
+ */
+#define MMCR1_TTM0SEL_SH 62
+#define MMCR1_TTM1SEL_SH 60
+#define MMCR1_TTM2SEL_SH 58
+#define MMCR1_TTM3SEL_SH 56
+#define MMCR1_TTMSEL_MSK 3
+#define MMCR1_TD_CP_DBG0SEL_SH 54
+#define MMCR1_TD_CP_DBG1SEL_SH 52
+#define MMCR1_TD_CP_DBG2SEL_SH 50
+#define MMCR1_TD_CP_DBG3SEL_SH 48
+#define MMCR1_GRS_L2SEL_SH 46
+#define MMCR1_GRS_L2SEL_MSK 3
+#define MMCR1_GRS_L3SEL_SH 44
+#define MMCR1_GRS_L3SEL_MSK 3
+#define MMCR1_GRS_MCSEL_SH 41
+#define MMCR1_GRS_MCSEL_MSK 7
+#define MMCR1_GRS_FABSEL_SH 39
+#define MMCR1_GRS_FABSEL_MSK 3
+#define MMCR1_PMC1_ADDER_SEL_SH 35
+#define MMCR1_PMC2_ADDER_SEL_SH 34
+#define MMCR1_PMC3_ADDER_SEL_SH 33
+#define MMCR1_PMC4_ADDER_SEL_SH 32
+#define MMCR1_PMC1SEL_SH 25
+#define MMCR1_PMC2SEL_SH 17
+#define MMCR1_PMC3SEL_SH 9
+#define MMCR1_PMC4SEL_SH 1
+#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
+#define MMCR1_PMCSEL_MSK 0x7f
+
+/*
+ * Bits in MMCRA
+ */
+
+/*
+ * Layout of constraint bits:
+ * 6666555555555544444444443333333333222222222211111111110000000000
+ * 3210987654321098765432109876543210987654321098765432109876543210
+ * [ ><><>< ><> <><>[ > < >< >< >< ><><><><><><>
+ * NC G0G1G2 G3 T0T1 UC B0 B1 B2 B3 P6P5P4P3P2P1
+ *
+ * NC - number of counters
+ * 51: NC error 0x0008_0000_0000_0000
+ * 48-50: number of events needing PMC1-4 0x0007_0000_0000_0000
+ *
+ * G0..G3 - GRS mux constraints
+ * 46-47: GRS_L2SEL value
+ * 44-45: GRS_L3SEL value
+ * 41-44: GRS_MCSEL value
+ * 39-40: GRS_FABSEL value
+ * Note that these match up with their bit positions in MMCR1
+ *
+ * T0 - TTM0 constraint
+ * 36-37: TTM0SEL value (0=FPU, 2=IFU, 3=ISU1) 0x30_0000_0000
+ *
+ * T1 - TTM1 constraint
+ * 34-35: TTM1SEL value (0=IDU, 3=GRS) 0x0c_0000_0000
+ *
+ * UC - unit constraint: can't have all three of FPU|IFU|ISU1, ISU0, IDU|GRS
+ * 33: UC3 error 0x02_0000_0000
+ * 32: FPU|IFU|ISU1 events needed 0x01_0000_0000
+ * 31: ISU0 events needed 0x01_8000_0000
+ * 30: IDU|GRS events needed 0x00_4000_0000
+ *
+ * B0
+ * 24-27: Byte 0 event source 0x0f00_0000
+ * Encoding as for the event code
+ *
+ * B1, B2, B3
+ * 20-23, 16-19, 12-15: Byte 1, 2, 3 event sources
+ *
+ * P6
+ * 11: P6 error 0x800
+ * 10-11: Count of events needing PMC6
+ *
+ * P1..P5
+ * 0-9: Count of events needing PMC1..PMC5
+ */
+
+static const int grsel_shift[8] = {
+ MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH,
+ MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH,
+ MMCR1_GRS_MCSEL_SH, MMCR1_GRS_FABSEL_SH
+};
+
+/* Masks and values for using events from the various units */
+static u64 unit_cons[PM_LASTUNIT+1][2] = {
+ [PM_FPU] = { 0x3200000000ull, 0x0100000000ull },
+ [PM_ISU0] = { 0x0200000000ull, 0x0080000000ull },
+ [PM_ISU1] = { 0x3200000000ull, 0x3100000000ull },
+ [PM_IFU] = { 0x3200000000ull, 0x2100000000ull },
+ [PM_IDU] = { 0x0e00000000ull, 0x0040000000ull },
+ [PM_GRS] = { 0x0e00000000ull, 0x0c40000000ull },
+};
+
+static int power5p_get_constraint(u64 event, u64 *maskp, u64 *valp)
+{
+ int pmc, byte, unit, sh;
+ int bit, fmask;
+ u64 mask = 0, value = 0;
+
+ pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+ if (pmc) {
+ if (pmc > 6)
+ return -1;
+ sh = (pmc - 1) * 2;
+ mask |= 2 << sh;
+ value |= 1 << sh;
+ if (pmc >= 5 && !(event == 0x500009 || event == 0x600005))
+ return -1;
+ }
+ if (event & PM_BUSEVENT_MSK) {
+ unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
+ if (unit > PM_LASTUNIT)
+ return -1;
+ if (unit == PM_ISU0_ALT)
+ unit = PM_ISU0;
+ mask |= unit_cons[unit][0];
+ value |= unit_cons[unit][1];
+ byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
+ if (byte >= 4) {
+ if (unit != PM_LSU1)
+ return -1;
+ /* Map LSU1 low word (bytes 4-7) to unit LSU1+1 */
+ ++unit;
+ byte &= 3;
+ }
+ if (unit == PM_GRS) {
+ bit = event & 7;
+ fmask = (bit == 6)? 7: 3;
+ sh = grsel_shift[bit];
+ mask |= (u64)fmask << sh;
+ value |= (u64)((event >> PM_GRS_SH) & fmask) << sh;
+ }
+ /* Set byte lane select field */
+ mask |= 0xfULL << (24 - 4 * byte);
+ value |= (u64)unit << (24 - 4 * byte);
+ }
+ if (pmc < 5) {
+ /* need a counter from PMC1-4 set */
+ mask |= 0x8000000000000ull;
+ value |= 0x1000000000000ull;
+ }
+ *maskp = mask;
+ *valp = value;
+ return 0;
+}
+
+static int power5p_limited_pmc_event(u64 event)
+{
+ int pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+
+ return pmc == 5 || pmc == 6;
+}
+
+#define MAX_ALT 3 /* at most 3 alternatives for any event */
+
+static const unsigned int event_alternatives[][MAX_ALT] = {
+ { 0x100c0, 0x40001f }, /* PM_GCT_FULL_CYC */
+ { 0x120e4, 0x400002 }, /* PM_GRP_DISP_REJECT */
+ { 0x230e2, 0x323087 }, /* PM_BR_PRED_CR */
+ { 0x230e3, 0x223087, 0x3230a0 }, /* PM_BR_PRED_TA */
+ { 0x410c7, 0x441084 }, /* PM_THRD_L2MISS_BOTH_CYC */
+ { 0x800c4, 0xc20e0 }, /* PM_DTLB_MISS */
+ { 0xc50c6, 0xc60e0 }, /* PM_MRK_DTLB_MISS */
+ { 0x100005, 0x600005 }, /* PM_RUN_CYC */
+ { 0x100009, 0x200009 }, /* PM_INST_CMPL */
+ { 0x200015, 0x300015 }, /* PM_LSU_LMQ_SRQ_EMPTY_CYC */
+ { 0x300009, 0x400009 }, /* PM_INST_DISP */
+};
+
+/*
+ * Scan the alternatives table for a match and return the
+ * index into the alternatives table if found, else -1.
+ */
+static int find_alternative(unsigned int event)
+{
+ int i, j;
+
+ for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
+ if (event < event_alternatives[i][0])
+ break;
+ for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j)
+ if (event == event_alternatives[i][j])
+ return i;
+ }
+ return -1;
+}
+
+static const unsigned char bytedecode_alternatives[4][4] = {
+ /* PMC 1 */ { 0x21, 0x23, 0x25, 0x27 },
+ /* PMC 2 */ { 0x07, 0x17, 0x0e, 0x1e },
+ /* PMC 3 */ { 0x20, 0x22, 0x24, 0x26 },
+ /* PMC 4 */ { 0x07, 0x17, 0x0e, 0x1e }
+};
+
+/*
+ * Some direct events for decodes of event bus byte 3 have alternative
+ * PMCSEL values on other counters. This returns the alternative
+ * event code for those that do, or -1 otherwise. This also handles
+ * alternative PCMSEL values for add events.
+ */
+static s64 find_alternative_bdecode(u64 event)
+{
+ int pmc, altpmc, pp, j;
+
+ pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+ if (pmc == 0 || pmc > 4)
+ return -1;
+ altpmc = 5 - pmc; /* 1 <-> 4, 2 <-> 3 */
+ pp = event & PM_PMCSEL_MSK;
+ for (j = 0; j < 4; ++j) {
+ if (bytedecode_alternatives[pmc - 1][j] == pp) {
+ return (event & ~(PM_PMC_MSKS | PM_PMCSEL_MSK)) |
+ (altpmc << PM_PMC_SH) |
+ bytedecode_alternatives[altpmc - 1][j];
+ }
+ }
+
+ /* new decode alternatives for power5+ */
+ if (pmc == 1 && (pp == 0x0d || pp == 0x0e))
+ return event + (2 << PM_PMC_SH) + (0x2e - 0x0d);
+ if (pmc == 3 && (pp == 0x2e || pp == 0x2f))
+ return event - (2 << PM_PMC_SH) - (0x2e - 0x0d);
+
+ /* alternative add event encodings */
+ if (pp == 0x10 || pp == 0x28)
+ return ((event ^ (0x10 ^ 0x28)) & ~PM_PMC_MSKS) |
+ (altpmc << PM_PMC_SH);
+
+ return -1;
+}
+
+static int power5p_get_alternatives(u64 event, unsigned int flags, u64 alt[])
+{
+ int i, j, nalt = 1;
+ int nlim;
+ s64 ae;
+
+ alt[0] = event;
+ nalt = 1;
+ nlim = power5p_limited_pmc_event(event);
+ i = find_alternative(event);
+ if (i >= 0) {
+ for (j = 0; j < MAX_ALT; ++j) {
+ ae = event_alternatives[i][j];
+ if (ae && ae != event)
+ alt[nalt++] = ae;
+ nlim += power5p_limited_pmc_event(ae);
+ }
+ } else {
+ ae = find_alternative_bdecode(event);
+ if (ae > 0)
+ alt[nalt++] = ae;
+ }
+
+ if (flags & PPMU_ONLY_COUNT_RUN) {
+ /*
+ * We're only counting in RUN state,
+ * so PM_CYC is equivalent to PM_RUN_CYC
+ * and PM_INST_CMPL === PM_RUN_INST_CMPL.
+ * This doesn't include alternatives that don't provide
+ * any extra flexibility in assigning PMCs (e.g.
+ * 0x100005 for PM_RUN_CYC vs. 0xf for PM_CYC).
+ * Note that even with these additional alternatives
+ * we never end up with more than 3 alternatives for any event.
+ */
+ j = nalt;
+ for (i = 0; i < nalt; ++i) {
+ switch (alt[i]) {
+ case 0xf: /* PM_CYC */
+ alt[j++] = 0x600005; /* PM_RUN_CYC */
+ ++nlim;
+ break;
+ case 0x600005: /* PM_RUN_CYC */
+ alt[j++] = 0xf;
+ break;
+ case 0x100009: /* PM_INST_CMPL */
+ alt[j++] = 0x500009; /* PM_RUN_INST_CMPL */
+ ++nlim;
+ break;
+ case 0x500009: /* PM_RUN_INST_CMPL */
+ alt[j++] = 0x100009; /* PM_INST_CMPL */
+ alt[j++] = 0x200009;
+ break;
+ }
+ }
+ nalt = j;
+ }
+
+ if (!(flags & PPMU_LIMITED_PMC_OK) && nlim) {
+ /* remove the limited PMC events */
+ j = 0;
+ for (i = 0; i < nalt; ++i) {
+ if (!power5p_limited_pmc_event(alt[i])) {
+ alt[j] = alt[i];
+ ++j;
+ }
+ }
+ nalt = j;
+ } else if ((flags & PPMU_LIMITED_PMC_REQD) && nlim < nalt) {
+ /* remove all but the limited PMC events */
+ j = 0;
+ for (i = 0; i < nalt; ++i) {
+ if (power5p_limited_pmc_event(alt[i])) {
+ alt[j] = alt[i];
+ ++j;
+ }
+ }
+ nalt = j;
+ }
+
+ return nalt;
+}
+
+/*
+ * Map of which direct events on which PMCs are marked instruction events.
+ * Indexed by PMCSEL value, bit i (LE) set if PMC i is a marked event.
+ * Bit 0 is set if it is marked for all PMCs.
+ * The 0x80 bit indicates a byte decode PMCSEL value.
+ */
+static unsigned char direct_event_is_marked[0x28] = {
+ 0, /* 00 */
+ 0x1f, /* 01 PM_IOPS_CMPL */
+ 0x2, /* 02 PM_MRK_GRP_DISP */
+ 0xe, /* 03 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */
+ 0, /* 04 */
+ 0x1c, /* 05 PM_MRK_BRU_FIN, PM_MRK_INST_FIN, PM_MRK_CRU_FIN */
+ 0x80, /* 06 */
+ 0x80, /* 07 */
+ 0, 0, 0,/* 08 - 0a */
+ 0x18, /* 0b PM_THRESH_TIMEO, PM_MRK_GRP_TIMEO */
+ 0, /* 0c */
+ 0x80, /* 0d */
+ 0x80, /* 0e */
+ 0, /* 0f */
+ 0, /* 10 */
+ 0x14, /* 11 PM_MRK_GRP_BR_REDIR, PM_MRK_GRP_IC_MISS */
+ 0, /* 12 */
+ 0x10, /* 13 PM_MRK_GRP_CMPL */
+ 0x1f, /* 14 PM_GRP_MRK, PM_MRK_{FXU,FPU,LSU}_FIN */
+ 0x2, /* 15 PM_MRK_GRP_ISSUED */
+ 0x80, /* 16 */
+ 0x80, /* 17 */
+ 0, 0, 0, 0, 0,
+ 0x80, /* 1d */
+ 0x80, /* 1e */
+ 0, /* 1f */
+ 0x80, /* 20 */
+ 0x80, /* 21 */
+ 0x80, /* 22 */
+ 0x80, /* 23 */
+ 0x80, /* 24 */
+ 0x80, /* 25 */
+ 0x80, /* 26 */
+ 0x80, /* 27 */
+};
+
+/*
+ * Returns 1 if event counts things relating to marked instructions
+ * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
+ */
+static int power5p_marked_instr_event(u64 event)
+{
+ int pmc, psel;
+ int bit, byte, unit;
+ u32 mask;
+
+ pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+ psel = event & PM_PMCSEL_MSK;
+ if (pmc >= 5)
+ return 0;
+
+ bit = -1;
+ if (psel < sizeof(direct_event_is_marked)) {
+ if (direct_event_is_marked[psel] & (1 << pmc))
+ return 1;
+ if (direct_event_is_marked[psel] & 0x80)
+ bit = 4;
+ else if (psel == 0x08)
+ bit = pmc - 1;
+ else if (psel == 0x10)
+ bit = 4 - pmc;
+ else if (psel == 0x1b && (pmc == 1 || pmc == 3))
+ bit = 4;
+ } else if ((psel & 0x48) == 0x40) {
+ bit = psel & 7;
+ } else if (psel == 0x28) {
+ bit = pmc - 1;
+ } else if (pmc == 3 && (psel == 0x2e || psel == 0x2f)) {
+ bit = 4;
+ }
+
+ if (!(event & PM_BUSEVENT_MSK) || bit == -1)
+ return 0;
+
+ byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
+ unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
+ if (unit == PM_LSU0) {
+ /* byte 1 bits 0-7, byte 2 bits 0,2-4,6 */
+ mask = 0x5dff00;
+ } else if (unit == PM_LSU1 && byte >= 4) {
+ byte -= 4;
+ /* byte 5 bits 6-7, byte 6 bits 0,4, byte 7 bits 0-4,6 */
+ mask = 0x5f11c000;
+ } else
+ return 0;
+
+ return (mask >> (byte * 8 + bit)) & 1;
+}
+
+static int power5p_compute_mmcr(u64 event[], int n_ev,
+ unsigned int hwc[], u64 mmcr[])
+{
+ u64 mmcr1 = 0;
+ u64 mmcra = 0;
+ unsigned int pmc, unit, byte, psel;
+ unsigned int ttm;
+ int i, isbus, bit, grsel;
+ unsigned int pmc_inuse = 0;
+ unsigned char busbyte[4];
+ unsigned char unituse[16];
+ int ttmuse;
+
+ if (n_ev > 6)
+ return -1;
+
+ /* First pass to count resource use */
+ memset(busbyte, 0, sizeof(busbyte));
+ memset(unituse, 0, sizeof(unituse));
+ for (i = 0; i < n_ev; ++i) {
+ pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
+ if (pmc) {
+ if (pmc > 6)
+ return -1;
+ if (pmc_inuse & (1 << (pmc - 1)))
+ return -1;
+ pmc_inuse |= 1 << (pmc - 1);
+ }
+ if (event[i] & PM_BUSEVENT_MSK) {
+ unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
+ byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
+ if (unit > PM_LASTUNIT)
+ return -1;
+ if (unit == PM_ISU0_ALT)
+ unit = PM_ISU0;
+ if (byte >= 4) {
+ if (unit != PM_LSU1)
+ return -1;
+ ++unit;
+ byte &= 3;
+ }
+ if (busbyte[byte] && busbyte[byte] != unit)
+ return -1;
+ busbyte[byte] = unit;
+ unituse[unit] = 1;
+ }
+ }
+
+ /*
+ * Assign resources and set multiplexer selects.
+ *
+ * PM_ISU0 can go either on TTM0 or TTM1, but that's the only
+ * choice we have to deal with.
+ */
+ if (unituse[PM_ISU0] &
+ (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_ISU1])) {
+ unituse[PM_ISU0_ALT] = 1; /* move ISU to TTM1 */
+ unituse[PM_ISU0] = 0;
+ }
+ /* Set TTM[01]SEL fields. */
+ ttmuse = 0;
+ for (i = PM_FPU; i <= PM_ISU1; ++i) {
+ if (!unituse[i])
+ continue;
+ if (ttmuse++)
+ return -1;
+ mmcr1 |= (u64)i << MMCR1_TTM0SEL_SH;
+ }
+ ttmuse = 0;
+ for (; i <= PM_GRS; ++i) {
+ if (!unituse[i])
+ continue;
+ if (ttmuse++)
+ return -1;
+ mmcr1 |= (u64)(i & 3) << MMCR1_TTM1SEL_SH;
+ }
+ if (ttmuse > 1)
+ return -1;
+
+ /* Set byte lane select fields, TTM[23]SEL and GRS_*SEL. */
+ for (byte = 0; byte < 4; ++byte) {
+ unit = busbyte[byte];
+ if (!unit)
+ continue;
+ if (unit == PM_ISU0 && unituse[PM_ISU0_ALT]) {
+ /* get ISU0 through TTM1 rather than TTM0 */
+ unit = PM_ISU0_ALT;
+ } else if (unit == PM_LSU1 + 1) {
+ /* select lower word of LSU1 for this byte */
+ mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte);
+ }
+ ttm = unit >> 2;
+ mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
+ }
+
+ /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
+ for (i = 0; i < n_ev; ++i) {
+ pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
+ unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
+ byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
+ psel = event[i] & PM_PMCSEL_MSK;
+ isbus = event[i] & PM_BUSEVENT_MSK;
+ if (!pmc) {
+ /* Bus event or any-PMC direct event */
+ for (pmc = 0; pmc < 4; ++pmc) {
+ if (!(pmc_inuse & (1 << pmc)))
+ break;
+ }
+ if (pmc >= 4)
+ return -1;
+ pmc_inuse |= 1 << pmc;
+ } else if (pmc <= 4) {
+ /* Direct event */
+ --pmc;
+ if (isbus && (byte & 2) &&
+ (psel == 8 || psel == 0x10 || psel == 0x28))
+ /* add events on higher-numbered bus */
+ mmcr1 |= 1ull << (MMCR1_PMC1_ADDER_SEL_SH - pmc);
+ } else {
+ /* Instructions or run cycles on PMC5/6 */
+ --pmc;
+ }
+ if (isbus && unit == PM_GRS) {
+ bit = psel & 7;
+ grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK;
+ mmcr1 |= (u64)grsel << grsel_shift[bit];
+ }
+ if (power5p_marked_instr_event(event[i]))
+ mmcra |= MMCRA_SAMPLE_ENABLE;
+ if ((psel & 0x58) == 0x40 && (byte & 1) != ((pmc >> 1) & 1))
+ /* select alternate byte lane */
+ psel |= 0x10;
+ if (pmc <= 3)
+ mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc);
+ hwc[i] = pmc;
+ }
+
+ /* Return MMCRx values */
+ mmcr[0] = 0;
+ if (pmc_inuse & 1)
+ mmcr[0] = MMCR0_PMC1CE;
+ if (pmc_inuse & 0x3e)
+ mmcr[0] |= MMCR0_PMCjCE;
+ mmcr[1] = mmcr1;
+ mmcr[2] = mmcra;
+ return 0;
+}
+
+static void power5p_disable_pmc(unsigned int pmc, u64 mmcr[])
+{
+ if (pmc <= 3)
+ mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc));
+}
+
+static int power5p_generic_events[] = {
+ [PERF_COUNT_HW_CPU_CYCLES] = 0xf,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x100009,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0x1c10a8, /* LD_REF_L1 */
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x3c1088, /* LD_MISS_L1 */
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x230e4, /* BR_ISSUED */
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x230e5, /* BR_MPRED_CR */
+};
+
+#define C(x) PERF_COUNT_HW_CACHE_##x
+
+/*
+ * Table of generalized cache-related events.
+ * 0 means not supported, -1 means nonsensical, other values
+ * are event codes.
+ */
+static int power5p_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+ [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
+ [C(OP_READ)] = { 0x1c10a8, 0x3c1088 },
+ [C(OP_WRITE)] = { 0x2c10a8, 0xc10c3 },
+ [C(OP_PREFETCH)] = { 0xc70e7, -1 },
+ },
+ [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */
+ [C(OP_READ)] = { 0, 0 },
+ [C(OP_WRITE)] = { -1, -1 },
+ [C(OP_PREFETCH)] = { 0, 0 },
+ },
+ [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
+ [C(OP_READ)] = { 0, 0 },
+ [C(OP_WRITE)] = { 0, 0 },
+ [C(OP_PREFETCH)] = { 0xc50c3, 0 },
+ },
+ [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
+ [C(OP_READ)] = { 0xc20e4, 0x800c4 },
+ [C(OP_WRITE)] = { -1, -1 },
+ [C(OP_PREFETCH)] = { -1, -1 },
+ },
+ [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */
+ [C(OP_READ)] = { 0, 0x800c0 },
+ [C(OP_WRITE)] = { -1, -1 },
+ [C(OP_PREFETCH)] = { -1, -1 },
+ },
+ [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */
+ [C(OP_READ)] = { 0x230e4, 0x230e5 },
+ [C(OP_WRITE)] = { -1, -1 },
+ [C(OP_PREFETCH)] = { -1, -1 },
+ },
+};
+
+struct power_pmu power5p_pmu = {
+ .n_counter = 6,
+ .max_alternatives = MAX_ALT,
+ .add_fields = 0x7000000000055ull,
+ .test_adder = 0x3000040000000ull,
+ .compute_mmcr = power5p_compute_mmcr,
+ .get_constraint = power5p_get_constraint,
+ .get_alternatives = power5p_get_alternatives,
+ .disable_pmc = power5p_disable_pmc,
+ .limited_pmc_event = power5p_limited_pmc_event,
+ .flags = PPMU_LIMITED_PMC5_6,
+ .n_generic = ARRAY_SIZE(power5p_generic_events),
+ .generic_events = power5p_generic_events,
+ .cache_events = &power5p_cache_events,
+};
diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c
new file mode 100644
index 00000000000..05600b66221
--- /dev/null
+++ b/arch/powerpc/kernel/power5-pmu.c
@@ -0,0 +1,611 @@
+/*
+ * Performance counter support for POWER5 (not POWER5++) processors.
+ *
+ * Copyright 2009 Paul Mackerras, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/perf_counter.h>
+#include <asm/reg.h>
+
+/*
+ * Bits in event code for POWER5 (not POWER5++)
+ */
+#define PM_PMC_SH 20 /* PMC number (1-based) for direct events */
+#define PM_PMC_MSK 0xf
+#define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH)
+#define PM_UNIT_SH 16 /* TTMMUX number and setting - unit select */
+#define PM_UNIT_MSK 0xf
+#define PM_BYTE_SH 12 /* Byte number of event bus to use */
+#define PM_BYTE_MSK 7
+#define PM_GRS_SH 8 /* Storage subsystem mux select */
+#define PM_GRS_MSK 7
+#define PM_BUSEVENT_MSK 0x80 /* Set if event uses event bus */
+#define PM_PMCSEL_MSK 0x7f
+
+/* Values in PM_UNIT field */
+#define PM_FPU 0
+#define PM_ISU0 1
+#define PM_IFU 2
+#define PM_ISU1 3
+#define PM_IDU 4
+#define PM_ISU0_ALT 6
+#define PM_GRS 7
+#define PM_LSU0 8
+#define PM_LSU1 0xc
+#define PM_LASTUNIT 0xc
+
+/*
+ * Bits in MMCR1 for POWER5
+ */
+#define MMCR1_TTM0SEL_SH 62
+#define MMCR1_TTM1SEL_SH 60
+#define MMCR1_TTM2SEL_SH 58
+#define MMCR1_TTM3SEL_SH 56
+#define MMCR1_TTMSEL_MSK 3
+#define MMCR1_TD_CP_DBG0SEL_SH 54
+#define MMCR1_TD_CP_DBG1SEL_SH 52
+#define MMCR1_TD_CP_DBG2SEL_SH 50
+#define MMCR1_TD_CP_DBG3SEL_SH 48
+#define MMCR1_GRS_L2SEL_SH 46
+#define MMCR1_GRS_L2SEL_MSK 3
+#define MMCR1_GRS_L3SEL_SH 44
+#define MMCR1_GRS_L3SEL_MSK 3
+#define MMCR1_GRS_MCSEL_SH 41
+#define MMCR1_GRS_MCSEL_MSK 7
+#define MMCR1_GRS_FABSEL_SH 39
+#define MMCR1_GRS_FABSEL_MSK 3
+#define MMCR1_PMC1_ADDER_SEL_SH 35
+#define MMCR1_PMC2_ADDER_SEL_SH 34
+#define MMCR1_PMC3_ADDER_SEL_SH 33
+#define MMCR1_PMC4_ADDER_SEL_SH 32
+#define MMCR1_PMC1SEL_SH 25
+#define MMCR1_PMC2SEL_SH 17
+#define MMCR1_PMC3SEL_SH 9
+#define MMCR1_PMC4SEL_SH 1
+#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
+#define MMCR1_PMCSEL_MSK 0x7f
+
+/*
+ * Bits in MMCRA
+ */
+
+/*
+ * Layout of constraint bits:
+ * 6666555555555544444444443333333333222222222211111111110000000000
+ * 3210987654321098765432109876543210987654321098765432109876543210
+ * <><>[ ><><>< ><> [ >[ >[ >< >< >< >< ><><><><><><>
+ * T0T1 NC G0G1G2 G3 UC PS1PS2 B0 B1 B2 B3 P6P5P4P3P2P1
+ *
+ * T0 - TTM0 constraint
+ * 54-55: TTM0SEL value (0=FPU, 2=IFU, 3=ISU1) 0xc0_0000_0000_0000
+ *
+ * T1 - TTM1 constraint
+ * 52-53: TTM1SEL value (0=IDU, 3=GRS) 0x30_0000_0000_0000
+ *
+ * NC - number of counters
+ * 51: NC error 0x0008_0000_0000_0000
+ * 48-50: number of events needing PMC1-4 0x0007_0000_0000_0000
+ *
+ * G0..G3 - GRS mux constraints
+ * 46-47: GRS_L2SEL value
+ * 44-45: GRS_L3SEL value
+ * 41-44: GRS_MCSEL value
+ * 39-40: GRS_FABSEL value
+ * Note that these match up with their bit positions in MMCR1
+ *
+ * UC - unit constraint: can't have all three of FPU|IFU|ISU1, ISU0, IDU|GRS
+ * 37: UC3 error 0x20_0000_0000
+ * 36: FPU|IFU|ISU1 events needed 0x10_0000_0000
+ * 35: ISU0 events needed 0x08_0000_0000
+ * 34: IDU|GRS events needed 0x04_0000_0000
+ *
+ * PS1
+ * 33: PS1 error 0x2_0000_0000
+ * 31-32: count of events needing PMC1/2 0x1_8000_0000
+ *
+ * PS2
+ * 30: PS2 error 0x4000_0000
+ * 28-29: count of events needing PMC3/4 0x3000_0000
+ *
+ * B0
+ * 24-27: Byte 0 event source 0x0f00_0000
+ * Encoding as for the event code
+ *
+ * B1, B2, B3
+ * 20-23, 16-19, 12-15: Byte 1, 2, 3 event sources
+ *
+ * P1..P6
+ * 0-11: Count of events needing PMC1..PMC6
+ */
+
+static const int grsel_shift[8] = {
+ MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH,
+ MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH,
+ MMCR1_GRS_MCSEL_SH, MMCR1_GRS_FABSEL_SH
+};
+
+/* Masks and values for using events from the various units */
+static u64 unit_cons[PM_LASTUNIT+1][2] = {
+ [PM_FPU] = { 0xc0002000000000ull, 0x00001000000000ull },
+ [PM_ISU0] = { 0x00002000000000ull, 0x00000800000000ull },
+ [PM_ISU1] = { 0xc0002000000000ull, 0xc0001000000000ull },
+ [PM_IFU] = { 0xc0002000000000ull, 0x80001000000000ull },
+ [PM_IDU] = { 0x30002000000000ull, 0x00000400000000ull },
+ [PM_GRS] = { 0x30002000000000ull, 0x30000400000000ull },
+};
+
+static int power5_get_constraint(u64 event, u64 *maskp, u64 *valp)
+{
+ int pmc, byte, unit, sh;
+ int bit, fmask;
+ u64 mask = 0, value = 0;
+ int grp = -1;
+
+ pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+ if (pmc) {
+ if (pmc > 6)
+ return -1;
+ sh = (pmc - 1) * 2;
+ mask |= 2 << sh;
+ value |= 1 << sh;
+ if (pmc <= 4)
+ grp = (pmc - 1) >> 1;
+ else if (event != 0x500009 && event != 0x600005)
+ return -1;
+ }
+ if (event & PM_BUSEVENT_MSK) {
+ unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
+ if (unit > PM_LASTUNIT)
+ return -1;
+ if (unit == PM_ISU0_ALT)
+ unit = PM_ISU0;
+ mask |= unit_cons[unit][0];
+ value |= unit_cons[unit][1];
+ byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
+ if (byte >= 4) {
+ if (unit != PM_LSU1)
+ return -1;
+ /* Map LSU1 low word (bytes 4-7) to unit LSU1+1 */
+ ++unit;
+ byte &= 3;
+ }
+ if (unit == PM_GRS) {
+ bit = event & 7;
+ fmask = (bit == 6)? 7: 3;
+ sh = grsel_shift[bit];
+ mask |= (u64)fmask << sh;
+ value |= (u64)((event >> PM_GRS_SH) & fmask) << sh;
+ }
+ /*
+ * Bus events on bytes 0 and 2 can be counted
+ * on PMC1/2; bytes 1 and 3 on PMC3/4.
+ */
+ if (!pmc)
+ grp = byte & 1;
+ /* Set byte lane select field */
+ mask |= 0xfULL << (24 - 4 * byte);
+ value |= (u64)unit << (24 - 4 * byte);
+ }
+ if (grp == 0) {
+ /* increment PMC1/2 field */
+ mask |= 0x200000000ull;
+ value |= 0x080000000ull;
+ } else if (grp == 1) {
+ /* increment PMC3/4 field */
+ mask |= 0x40000000ull;
+ value |= 0x10000000ull;
+ }
+ if (pmc < 5) {
+ /* need a counter from PMC1-4 set */
+ mask |= 0x8000000000000ull;
+ value |= 0x1000000000000ull;
+ }
+ *maskp = mask;
+ *valp = value;
+ return 0;
+}
+
+#define MAX_ALT 3 /* at most 3 alternatives for any event */
+
+static const unsigned int event_alternatives[][MAX_ALT] = {
+ { 0x120e4, 0x400002 }, /* PM_GRP_DISP_REJECT */
+ { 0x410c7, 0x441084 }, /* PM_THRD_L2MISS_BOTH_CYC */
+ { 0x100005, 0x600005 }, /* PM_RUN_CYC */
+ { 0x100009, 0x200009, 0x500009 }, /* PM_INST_CMPL */
+ { 0x300009, 0x400009 }, /* PM_INST_DISP */
+};
+
+/*
+ * Scan the alternatives table for a match and return the
+ * index into the alternatives table if found, else -1.
+ */
+static int find_alternative(u64 event)
+{
+ int i, j;
+
+ for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
+ if (event < event_alternatives[i][0])
+ break;
+ for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j)
+ if (event == event_alternatives[i][j])
+ return i;
+ }
+ return -1;
+}
+
+static const unsigned char bytedecode_alternatives[4][4] = {
+ /* PMC 1 */ { 0x21, 0x23, 0x25, 0x27 },
+ /* PMC 2 */ { 0x07, 0x17, 0x0e, 0x1e },
+ /* PMC 3 */ { 0x20, 0x22, 0x24, 0x26 },
+ /* PMC 4 */ { 0x07, 0x17, 0x0e, 0x1e }
+};
+
+/*
+ * Some direct events for decodes of event bus byte 3 have alternative
+ * PMCSEL values on other counters. This returns the alternative
+ * event code for those that do, or -1 otherwise.
+ */
+static s64 find_alternative_bdecode(u64 event)
+{
+ int pmc, altpmc, pp, j;
+
+ pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+ if (pmc == 0 || pmc > 4)
+ return -1;
+ altpmc = 5 - pmc; /* 1 <-> 4, 2 <-> 3 */
+ pp = event & PM_PMCSEL_MSK;
+ for (j = 0; j < 4; ++j) {
+ if (bytedecode_alternatives[pmc - 1][j] == pp) {
+ return (event & ~(PM_PMC_MSKS | PM_PMCSEL_MSK)) |
+ (altpmc << PM_PMC_SH) |
+ bytedecode_alternatives[altpmc - 1][j];
+ }
+ }
+ return -1;
+}
+
+static int power5_get_alternatives(u64 event, unsigned int flags, u64 alt[])
+{
+ int i, j, nalt = 1;
+ s64 ae;
+
+ alt[0] = event;
+ nalt = 1;
+ i = find_alternative(event);
+ if (i >= 0) {
+ for (j = 0; j < MAX_ALT; ++j) {
+ ae = event_alternatives[i][j];
+ if (ae && ae != event)
+ alt[nalt++] = ae;
+ }
+ } else {
+ ae = find_alternative_bdecode(event);
+ if (ae > 0)
+ alt[nalt++] = ae;
+ }
+ return nalt;
+}
+
+/*
+ * Map of which direct events on which PMCs are marked instruction events.
+ * Indexed by PMCSEL value, bit i (LE) set if PMC i is a marked event.
+ * Bit 0 is set if it is marked for all PMCs.
+ * The 0x80 bit indicates a byte decode PMCSEL value.
+ */
+static unsigned char direct_event_is_marked[0x28] = {
+ 0, /* 00 */
+ 0x1f, /* 01 PM_IOPS_CMPL */
+ 0x2, /* 02 PM_MRK_GRP_DISP */
+ 0xe, /* 03 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */
+ 0, /* 04 */
+ 0x1c, /* 05 PM_MRK_BRU_FIN, PM_MRK_INST_FIN, PM_MRK_CRU_FIN */
+ 0x80, /* 06 */
+ 0x80, /* 07 */
+ 0, 0, 0,/* 08 - 0a */
+ 0x18, /* 0b PM_THRESH_TIMEO, PM_MRK_GRP_TIMEO */
+ 0, /* 0c */
+ 0x80, /* 0d */
+ 0x80, /* 0e */
+ 0, /* 0f */
+ 0, /* 10 */
+ 0x14, /* 11 PM_MRK_GRP_BR_REDIR, PM_MRK_GRP_IC_MISS */
+ 0, /* 12 */
+ 0x10, /* 13 PM_MRK_GRP_CMPL */
+ 0x1f, /* 14 PM_GRP_MRK, PM_MRK_{FXU,FPU,LSU}_FIN */
+ 0x2, /* 15 PM_MRK_GRP_ISSUED */
+ 0x80, /* 16 */
+ 0x80, /* 17 */
+ 0, 0, 0, 0, 0,
+ 0x80, /* 1d */
+ 0x80, /* 1e */
+ 0, /* 1f */
+ 0x80, /* 20 */
+ 0x80, /* 21 */
+ 0x80, /* 22 */
+ 0x80, /* 23 */
+ 0x80, /* 24 */
+ 0x80, /* 25 */
+ 0x80, /* 26 */
+ 0x80, /* 27 */
+};
+
+/*
+ * Returns 1 if event counts things relating to marked instructions
+ * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
+ */
+static int power5_marked_instr_event(u64 event)
+{
+ int pmc, psel;
+ int bit, byte, unit;
+ u32 mask;
+
+ pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+ psel = event & PM_PMCSEL_MSK;
+ if (pmc >= 5)
+ return 0;
+
+ bit = -1;
+ if (psel < sizeof(direct_event_is_marked)) {
+ if (direct_event_is_marked[psel] & (1 << pmc))
+ return 1;
+ if (direct_event_is_marked[psel] & 0x80)
+ bit = 4;
+ else if (psel == 0x08)
+ bit = pmc - 1;
+ else if (psel == 0x10)
+ bit = 4 - pmc;
+ else if (psel == 0x1b && (pmc == 1 || pmc == 3))
+ bit = 4;
+ } else if ((psel & 0x58) == 0x40)
+ bit = psel & 7;
+
+ if (!(event & PM_BUSEVENT_MSK))
+ return 0;
+
+ byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
+ unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
+ if (unit == PM_LSU0) {
+ /* byte 1 bits 0-7, byte 2 bits 0,2-4,6 */
+ mask = 0x5dff00;
+ } else if (unit == PM_LSU1 && byte >= 4) {
+ byte -= 4;
+ /* byte 4 bits 1,3,5,7, byte 5 bits 6-7, byte 7 bits 0-4,6 */
+ mask = 0x5f00c0aa;
+ } else
+ return 0;
+
+ return (mask >> (byte * 8 + bit)) & 1;
+}
+
+static int power5_compute_mmcr(u64 event[], int n_ev,
+ unsigned int hwc[], u64 mmcr[])
+{
+ u64 mmcr1 = 0;
+ u64 mmcra = 0;
+ unsigned int pmc, unit, byte, psel;
+ unsigned int ttm, grp;
+ int i, isbus, bit, grsel;
+ unsigned int pmc_inuse = 0;
+ unsigned int pmc_grp_use[2];
+ unsigned char busbyte[4];
+ unsigned char unituse[16];
+ int ttmuse;
+
+ if (n_ev > 6)
+ return -1;
+
+ /* First pass to count resource use */
+ pmc_grp_use[0] = pmc_grp_use[1] = 0;
+ memset(busbyte, 0, sizeof(busbyte));
+ memset(unituse, 0, sizeof(unituse));
+ for (i = 0; i < n_ev; ++i) {
+ pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
+ if (pmc) {
+ if (pmc > 6)
+ return -1;
+ if (pmc_inuse & (1 << (pmc - 1)))
+ return -1;
+ pmc_inuse |= 1 << (pmc - 1);
+ /* count 1/2 vs 3/4 use */
+ if (pmc <= 4)
+ ++pmc_grp_use[(pmc - 1) >> 1];
+ }
+ if (event[i] & PM_BUSEVENT_MSK) {
+ unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
+ byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
+ if (unit > PM_LASTUNIT)
+ return -1;
+ if (unit == PM_ISU0_ALT)
+ unit = PM_ISU0;
+ if (byte >= 4) {
+ if (unit != PM_LSU1)
+ return -1;
+ ++unit;
+ byte &= 3;
+ }
+ if (!pmc)
+ ++pmc_grp_use[byte & 1];
+ if (busbyte[byte] && busbyte[byte] != unit)
+ return -1;
+ busbyte[byte] = unit;
+ unituse[unit] = 1;
+ }
+ }
+ if (pmc_grp_use[0] > 2 || pmc_grp_use[1] > 2)
+ return -1;
+
+ /*
+ * Assign resources and set multiplexer selects.
+ *
+ * PM_ISU0 can go either on TTM0 or TTM1, but that's the only
+ * choice we have to deal with.
+ */
+ if (unituse[PM_ISU0] &
+ (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_ISU1])) {
+ unituse[PM_ISU0_ALT] = 1; /* move ISU to TTM1 */
+ unituse[PM_ISU0] = 0;
+ }
+ /* Set TTM[01]SEL fields. */
+ ttmuse = 0;
+ for (i = PM_FPU; i <= PM_ISU1; ++i) {
+ if (!unituse[i])
+ continue;
+ if (ttmuse++)
+ return -1;
+ mmcr1 |= (u64)i << MMCR1_TTM0SEL_SH;
+ }
+ ttmuse = 0;
+ for (; i <= PM_GRS; ++i) {
+ if (!unituse[i])
+ continue;
+ if (ttmuse++)
+ return -1;
+ mmcr1 |= (u64)(i & 3) << MMCR1_TTM1SEL_SH;
+ }
+ if (ttmuse > 1)
+ return -1;
+
+ /* Set byte lane select fields, TTM[23]SEL and GRS_*SEL. */
+ for (byte = 0; byte < 4; ++byte) {
+ unit = busbyte[byte];
+ if (!unit)
+ continue;
+ if (unit == PM_ISU0 && unituse[PM_ISU0_ALT]) {
+ /* get ISU0 through TTM1 rather than TTM0 */
+ unit = PM_ISU0_ALT;
+ } else if (unit == PM_LSU1 + 1) {
+ /* select lower word of LSU1 for this byte */
+ mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte);
+ }
+ ttm = unit >> 2;
+ mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
+ }
+
+ /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
+ for (i = 0; i < n_ev; ++i) {
+ pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
+ unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
+ byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
+ psel = event[i] & PM_PMCSEL_MSK;
+ isbus = event[i] & PM_BUSEVENT_MSK;
+ if (!pmc) {
+ /* Bus event or any-PMC direct event */
+ for (pmc = 0; pmc < 4; ++pmc) {
+ if (pmc_inuse & (1 << pmc))
+ continue;
+ grp = (pmc >> 1) & 1;
+ if (isbus) {
+ if (grp == (byte & 1))
+ break;
+ } else if (pmc_grp_use[grp] < 2) {
+ ++pmc_grp_use[grp];
+ break;
+ }
+ }
+ pmc_inuse |= 1 << pmc;
+ } else if (pmc <= 4) {
+ /* Direct event */
+ --pmc;
+ if ((psel == 8 || psel == 0x10) && isbus && (byte & 2))
+ /* add events on higher-numbered bus */
+ mmcr1 |= 1ull << (MMCR1_PMC1_ADDER_SEL_SH - pmc);
+ } else {
+ /* Instructions or run cycles on PMC5/6 */
+ --pmc;
+ }
+ if (isbus && unit == PM_GRS) {
+ bit = psel & 7;
+ grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK;
+ mmcr1 |= (u64)grsel << grsel_shift[bit];
+ }
+ if (power5_marked_instr_event(event[i]))
+ mmcra |= MMCRA_SAMPLE_ENABLE;
+ if (pmc <= 3)
+ mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc);
+ hwc[i] = pmc;
+ }
+
+ /* Return MMCRx values */
+ mmcr[0] = 0;
+ if (pmc_inuse & 1)
+ mmcr[0] = MMCR0_PMC1CE;
+ if (pmc_inuse & 0x3e)
+ mmcr[0] |= MMCR0_PMCjCE;
+ mmcr[1] = mmcr1;
+ mmcr[2] = mmcra;
+ return 0;
+}
+
+static void power5_disable_pmc(unsigned int pmc, u64 mmcr[])
+{
+ if (pmc <= 3)
+ mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc));
+}
+
+static int power5_generic_events[] = {
+ [PERF_COUNT_HW_CPU_CYCLES] = 0xf,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x100009,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4c1090, /* LD_REF_L1 */
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x3c1088, /* LD_MISS_L1 */
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x230e4, /* BR_ISSUED */
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x230e5, /* BR_MPRED_CR */
+};
+
+#define C(x) PERF_COUNT_HW_CACHE_##x
+
+/*
+ * Table of generalized cache-related events.
+ * 0 means not supported, -1 means nonsensical, other values
+ * are event codes.
+ */
+static int power5_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+ [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
+ [C(OP_READ)] = { 0x4c1090, 0x3c1088 },
+ [C(OP_WRITE)] = { 0x3c1090, 0xc10c3 },
+ [C(OP_PREFETCH)] = { 0xc70e7, 0 },
+ },
+ [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */
+ [C(OP_READ)] = { 0, 0 },
+ [C(OP_WRITE)] = { -1, -1 },
+ [C(OP_PREFETCH)] = { 0, 0 },
+ },
+ [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
+ [C(OP_READ)] = { 0, 0x3c309b },
+ [C(OP_WRITE)] = { 0, 0 },
+ [C(OP_PREFETCH)] = { 0xc50c3, 0 },
+ },
+ [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
+ [C(OP_READ)] = { 0x2c4090, 0x800c4 },
+ [C(OP_WRITE)] = { -1, -1 },
+ [C(OP_PREFETCH)] = { -1, -1 },
+ },
+ [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */
+ [C(OP_READ)] = { 0, 0x800c0 },
+ [C(OP_WRITE)] = { -1, -1 },
+ [C(OP_PREFETCH)] = { -1, -1 },
+ },
+ [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */
+ [C(OP_READ)] = { 0x230e4, 0x230e5 },
+ [C(OP_WRITE)] = { -1, -1 },
+ [C(OP_PREFETCH)] = { -1, -1 },
+ },
+};
+
+struct power_pmu power5_pmu = {
+ .n_counter = 6,
+ .max_alternatives = MAX_ALT,
+ .add_fields = 0x7000090000555ull,
+ .test_adder = 0x3000490000000ull,
+ .compute_mmcr = power5_compute_mmcr,
+ .get_constraint = power5_get_constraint,
+ .get_alternatives = power5_get_alternatives,
+ .disable_pmc = power5_disable_pmc,
+ .n_generic = ARRAY_SIZE(power5_generic_events),
+ .generic_events = power5_generic_events,
+ .cache_events = &power5_cache_events,
+};
diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c
new file mode 100644
index 00000000000..46f74bebcfd
--- /dev/null
+++ b/arch/powerpc/kernel/power6-pmu.c
@@ -0,0 +1,532 @@
+/*
+ * Performance counter support for POWER6 processors.
+ *
+ * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/perf_counter.h>
+#include <asm/reg.h>
+
+/*
+ * Bits in event code for POWER6
+ */
+#define PM_PMC_SH 20 /* PMC number (1-based) for direct events */
+#define PM_PMC_MSK 0x7
+#define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH)
+#define PM_UNIT_SH 16 /* Unit event comes (TTMxSEL encoding) */
+#define PM_UNIT_MSK 0xf
+#define PM_UNIT_MSKS (PM_UNIT_MSK << PM_UNIT_SH)
+#define PM_LLAV 0x8000 /* Load lookahead match value */
+#define PM_LLA 0x4000 /* Load lookahead match enable */
+#define PM_BYTE_SH 12 /* Byte of event bus to use */
+#define PM_BYTE_MSK 3
+#define PM_SUBUNIT_SH 8 /* Subunit event comes from (NEST_SEL enc.) */
+#define PM_SUBUNIT_MSK 7
+#define PM_SUBUNIT_MSKS (PM_SUBUNIT_MSK << PM_SUBUNIT_SH)
+#define PM_PMCSEL_MSK 0xff /* PMCxSEL value */
+#define PM_BUSEVENT_MSK 0xf3700
+
+/*
+ * Bits in MMCR1 for POWER6
+ */
+#define MMCR1_TTM0SEL_SH 60
+#define MMCR1_TTMSEL_SH(n) (MMCR1_TTM0SEL_SH - (n) * 4)
+#define MMCR1_TTMSEL_MSK 0xf
+#define MMCR1_TTMSEL(m, n) (((m) >> MMCR1_TTMSEL_SH(n)) & MMCR1_TTMSEL_MSK)
+#define MMCR1_NESTSEL_SH 45
+#define MMCR1_NESTSEL_MSK 0x7
+#define MMCR1_NESTSEL(m) (((m) >> MMCR1_NESTSEL_SH) & MMCR1_NESTSEL_MSK)
+#define MMCR1_PMC1_LLA ((u64)1 << 44)
+#define MMCR1_PMC1_LLA_VALUE ((u64)1 << 39)
+#define MMCR1_PMC1_ADDR_SEL ((u64)1 << 35)
+#define MMCR1_PMC1SEL_SH 24
+#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
+#define MMCR1_PMCSEL_MSK 0xff
+
+/*
+ * Map of which direct events on which PMCs are marked instruction events.
+ * Indexed by PMCSEL value >> 1.
+ * Bottom 4 bits are a map of which PMCs are interesting,
+ * top 4 bits say what sort of event:
+ * 0 = direct marked event,
+ * 1 = byte decode event,
+ * 4 = add/and event (PMC1 -> bits 0 & 4),
+ * 5 = add/and event (PMC1 -> bits 1 & 5),
+ * 6 = add/and event (PMC1 -> bits 2 & 6),
+ * 7 = add/and event (PMC1 -> bits 3 & 7).
+ */
+static unsigned char direct_event_is_marked[0x60 >> 1] = {
+ 0, /* 00 */
+ 0, /* 02 */
+ 0, /* 04 */
+ 0x07, /* 06 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */
+ 0x04, /* 08 PM_MRK_DFU_FIN */
+ 0x06, /* 0a PM_MRK_IFU_FIN, PM_MRK_INST_FIN */
+ 0, /* 0c */
+ 0, /* 0e */
+ 0x02, /* 10 PM_MRK_INST_DISP */
+ 0x08, /* 12 PM_MRK_LSU_DERAT_MISS */
+ 0, /* 14 */
+ 0, /* 16 */
+ 0x0c, /* 18 PM_THRESH_TIMEO, PM_MRK_INST_FIN */
+ 0x0f, /* 1a PM_MRK_INST_DISP, PM_MRK_{FXU,FPU,LSU}_FIN */
+ 0x01, /* 1c PM_MRK_INST_ISSUED */
+ 0, /* 1e */
+ 0, /* 20 */
+ 0, /* 22 */
+ 0, /* 24 */
+ 0, /* 26 */
+ 0x15, /* 28 PM_MRK_DATA_FROM_L2MISS, PM_MRK_DATA_FROM_L3MISS */
+ 0, /* 2a */
+ 0, /* 2c */
+ 0, /* 2e */
+ 0x4f, /* 30 */
+ 0x7f, /* 32 */
+ 0x4f, /* 34 */
+ 0x5f, /* 36 */
+ 0x6f, /* 38 */
+ 0x4f, /* 3a */
+ 0, /* 3c */
+ 0x08, /* 3e PM_MRK_INST_TIMEO */
+ 0x1f, /* 40 */
+ 0x1f, /* 42 */
+ 0x1f, /* 44 */
+ 0x1f, /* 46 */
+ 0x1f, /* 48 */
+ 0x1f, /* 4a */
+ 0x1f, /* 4c */
+ 0x1f, /* 4e */
+ 0, /* 50 */
+ 0x05, /* 52 PM_MRK_BR_TAKEN, PM_MRK_BR_MPRED */
+ 0x1c, /* 54 PM_MRK_PTEG_FROM_L3MISS, PM_MRK_PTEG_FROM_L2MISS */
+ 0x02, /* 56 PM_MRK_LD_MISS_L1 */
+ 0, /* 58 */
+ 0, /* 5a */
+ 0, /* 5c */
+ 0, /* 5e */
+};
+
+/*
+ * Masks showing for each unit which bits are marked events.
+ * These masks are in LE order, i.e. 0x00000001 is byte 0, bit 0.
+ */
+static u32 marked_bus_events[16] = {
+ 0x01000000, /* direct events set 1: byte 3 bit 0 */
+ 0x00010000, /* direct events set 2: byte 2 bit 0 */
+ 0, 0, 0, 0, /* IDU, IFU, nest: nothing */
+ 0x00000088, /* VMX set 1: byte 0 bits 3, 7 */
+ 0x000000c0, /* VMX set 2: byte 0 bits 4-7 */
+ 0x04010000, /* LSU set 1: byte 2 bit 0, byte 3 bit 2 */
+ 0xff010000u, /* LSU set 2: byte 2 bit 0, all of byte 3 */
+ 0, /* LSU set 3 */
+ 0x00000010, /* VMX set 3: byte 0 bit 4 */
+ 0, /* BFP set 1 */
+ 0x00000022, /* BFP set 2: byte 0 bits 1, 5 */
+ 0, 0
+};
+
+/*
+ * Returns 1 if event counts things relating to marked instructions
+ * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
+ */
+static int power6_marked_instr_event(u64 event)
+{
+ int pmc, psel, ptype;
+ int bit, byte, unit;
+ u32 mask;
+
+ pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+ psel = (event & PM_PMCSEL_MSK) >> 1; /* drop edge/level bit */
+ if (pmc >= 5)
+ return 0;
+
+ bit = -1;
+ if (psel < sizeof(direct_event_is_marked)) {
+ ptype = direct_event_is_marked[psel];
+ if (pmc == 0 || !(ptype & (1 << (pmc - 1))))
+ return 0;
+ ptype >>= 4;
+ if (ptype == 0)
+ return 1;
+ if (ptype == 1)
+ bit = 0;
+ else
+ bit = ptype ^ (pmc - 1);
+ } else if ((psel & 0x48) == 0x40)
+ bit = psel & 7;
+
+ if (!(event & PM_BUSEVENT_MSK) || bit == -1)
+ return 0;
+
+ byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
+ unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
+ mask = marked_bus_events[unit];
+ return (mask >> (byte * 8 + bit)) & 1;
+}
+
+/*
+ * Assign PMC numbers and compute MMCR1 value for a set of events
+ */
+static int p6_compute_mmcr(u64 event[], int n_ev,
+ unsigned int hwc[], u64 mmcr[])
+{
+ u64 mmcr1 = 0;
+ u64 mmcra = 0;
+ int i;
+ unsigned int pmc, ev, b, u, s, psel;
+ unsigned int ttmset = 0;
+ unsigned int pmc_inuse = 0;
+
+ if (n_ev > 6)
+ return -1;
+ for (i = 0; i < n_ev; ++i) {
+ pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
+ if (pmc) {
+ if (pmc_inuse & (1 << (pmc - 1)))
+ return -1; /* collision! */
+ pmc_inuse |= 1 << (pmc - 1);
+ }
+ }
+ for (i = 0; i < n_ev; ++i) {
+ ev = event[i];
+ pmc = (ev >> PM_PMC_SH) & PM_PMC_MSK;
+ if (pmc) {
+ --pmc;
+ } else {
+ /* can go on any PMC; find a free one */
+ for (pmc = 0; pmc < 4; ++pmc)
+ if (!(pmc_inuse & (1 << pmc)))
+ break;
+ if (pmc >= 4)
+ return -1;
+ pmc_inuse |= 1 << pmc;
+ }
+ hwc[i] = pmc;
+ psel = ev & PM_PMCSEL_MSK;
+ if (ev & PM_BUSEVENT_MSK) {
+ /* this event uses the event bus */
+ b = (ev >> PM_BYTE_SH) & PM_BYTE_MSK;
+ u = (ev >> PM_UNIT_SH) & PM_UNIT_MSK;
+ /* check for conflict on this byte of event bus */
+ if ((ttmset & (1 << b)) && MMCR1_TTMSEL(mmcr1, b) != u)
+ return -1;
+ mmcr1 |= (u64)u << MMCR1_TTMSEL_SH(b);
+ ttmset |= 1 << b;
+ if (u == 5) {
+ /* Nest events have a further mux */
+ s = (ev >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK;
+ if ((ttmset & 0x10) &&
+ MMCR1_NESTSEL(mmcr1) != s)
+ return -1;
+ ttmset |= 0x10;
+ mmcr1 |= (u64)s << MMCR1_NESTSEL_SH;
+ }
+ if (0x30 <= psel && psel <= 0x3d) {
+ /* these need the PMCx_ADDR_SEL bits */
+ if (b >= 2)
+ mmcr1 |= MMCR1_PMC1_ADDR_SEL >> pmc;
+ }
+ /* bus select values are different for PMC3/4 */
+ if (pmc >= 2 && (psel & 0x90) == 0x80)
+ psel ^= 0x20;
+ }
+ if (ev & PM_LLA) {
+ mmcr1 |= MMCR1_PMC1_LLA >> pmc;
+ if (ev & PM_LLAV)
+ mmcr1 |= MMCR1_PMC1_LLA_VALUE >> pmc;
+ }
+ if (power6_marked_instr_event(event[i]))
+ mmcra |= MMCRA_SAMPLE_ENABLE;
+ if (pmc < 4)
+ mmcr1 |= (u64)psel << MMCR1_PMCSEL_SH(pmc);
+ }
+ mmcr[0] = 0;
+ if (pmc_inuse & 1)
+ mmcr[0] = MMCR0_PMC1CE;
+ if (pmc_inuse & 0xe)
+ mmcr[0] |= MMCR0_PMCjCE;
+ mmcr[1] = mmcr1;
+ mmcr[2] = mmcra;
+ return 0;
+}
+
+/*
+ * Layout of constraint bits:
+ *
+ * 0-1 add field: number of uses of PMC1 (max 1)
+ * 2-3, 4-5, 6-7, 8-9, 10-11: ditto for PMC2, 3, 4, 5, 6
+ * 12-15 add field: number of uses of PMC1-4 (max 4)
+ * 16-19 select field: unit on byte 0 of event bus
+ * 20-23, 24-27, 28-31 ditto for bytes 1, 2, 3
+ * 32-34 select field: nest (subunit) event selector
+ */
+static int p6_get_constraint(u64 event, u64 *maskp, u64 *valp)
+{
+ int pmc, byte, sh, subunit;
+ u64 mask = 0, value = 0;
+
+ pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+ if (pmc) {
+ if (pmc > 4 && !(event == 0x500009 || event == 0x600005))
+ return -1;
+ sh = (pmc - 1) * 2;
+ mask |= 2 << sh;
+ value |= 1 << sh;
+ }
+ if (event & PM_BUSEVENT_MSK) {
+ byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
+ sh = byte * 4 + (16 - PM_UNIT_SH);
+ mask |= PM_UNIT_MSKS << sh;
+ value |= (u64)(event & PM_UNIT_MSKS) << sh;
+ if ((event & PM_UNIT_MSKS) == (5 << PM_UNIT_SH)) {
+ subunit = (event >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK;
+ mask |= (u64)PM_SUBUNIT_MSK << 32;
+ value |= (u64)subunit << 32;
+ }
+ }
+ if (pmc <= 4) {
+ mask |= 0x8000; /* add field for count of PMC1-4 uses */
+ value |= 0x1000;
+ }
+ *maskp = mask;
+ *valp = value;
+ return 0;
+}
+
+static int p6_limited_pmc_event(u64 event)
+{
+ int pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+
+ return pmc == 5 || pmc == 6;
+}
+
+#define MAX_ALT 4 /* at most 4 alternatives for any event */
+
+static const unsigned int event_alternatives[][MAX_ALT] = {
+ { 0x0130e8, 0x2000f6, 0x3000fc }, /* PM_PTEG_RELOAD_VALID */
+ { 0x080080, 0x10000d, 0x30000c, 0x4000f0 }, /* PM_LD_MISS_L1 */
+ { 0x080088, 0x200054, 0x3000f0 }, /* PM_ST_MISS_L1 */
+ { 0x10000a, 0x2000f4, 0x600005 }, /* PM_RUN_CYC */
+ { 0x10000b, 0x2000f5 }, /* PM_RUN_COUNT */
+ { 0x10000e, 0x400010 }, /* PM_PURR */
+ { 0x100010, 0x4000f8 }, /* PM_FLUSH */
+ { 0x10001a, 0x200010 }, /* PM_MRK_INST_DISP */
+ { 0x100026, 0x3000f8 }, /* PM_TB_BIT_TRANS */
+ { 0x100054, 0x2000f0 }, /* PM_ST_FIN */
+ { 0x100056, 0x2000fc }, /* PM_L1_ICACHE_MISS */
+ { 0x1000f0, 0x40000a }, /* PM_INST_IMC_MATCH_CMPL */
+ { 0x1000f8, 0x200008 }, /* PM_GCT_EMPTY_CYC */
+ { 0x1000fc, 0x400006 }, /* PM_LSU_DERAT_MISS_CYC */
+ { 0x20000e, 0x400007 }, /* PM_LSU_DERAT_MISS */
+ { 0x200012, 0x300012 }, /* PM_INST_DISP */
+ { 0x2000f2, 0x3000f2 }, /* PM_INST_DISP */
+ { 0x2000f8, 0x300010 }, /* PM_EXT_INT */
+ { 0x2000fe, 0x300056 }, /* PM_DATA_FROM_L2MISS */
+ { 0x2d0030, 0x30001a }, /* PM_MRK_FPU_FIN */
+ { 0x30000a, 0x400018 }, /* PM_MRK_INST_FIN */
+ { 0x3000f6, 0x40000e }, /* PM_L1_DCACHE_RELOAD_VALID */
+ { 0x3000fe, 0x400056 }, /* PM_DATA_FROM_L3MISS */
+};
+
+/*
+ * This could be made more efficient with a binary search on
+ * a presorted list, if necessary
+ */
+static int find_alternatives_list(u64 event)
+{
+ int i, j;
+ unsigned int alt;
+
+ for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
+ if (event < event_alternatives[i][0])
+ return -1;
+ for (j = 0; j < MAX_ALT; ++j) {
+ alt = event_alternatives[i][j];
+ if (!alt || event < alt)
+ break;
+ if (event == alt)
+ return i;
+ }
+ }
+ return -1;
+}
+
+static int p6_get_alternatives(u64 event, unsigned int flags, u64 alt[])
+{
+ int i, j, nlim;
+ unsigned int psel, pmc;
+ unsigned int nalt = 1;
+ u64 aevent;
+
+ alt[0] = event;
+ nlim = p6_limited_pmc_event(event);
+
+ /* check the alternatives table */
+ i = find_alternatives_list(event);
+ if (i >= 0) {
+ /* copy out alternatives from list */
+ for (j = 0; j < MAX_ALT; ++j) {
+ aevent = event_alternatives[i][j];
+ if (!aevent)
+ break;
+ if (aevent != event)
+ alt[nalt++] = aevent;
+ nlim += p6_limited_pmc_event(aevent);
+ }
+
+ } else {
+ /* Check for alternative ways of computing sum events */
+ /* PMCSEL 0x32 counter N == PMCSEL 0x34 counter 5-N */
+ psel = event & (PM_PMCSEL_MSK & ~1); /* ignore edge bit */
+ pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+ if (pmc && (psel == 0x32 || psel == 0x34))
+ alt[nalt++] = ((event ^ 0x6) & ~PM_PMC_MSKS) |
+ ((5 - pmc) << PM_PMC_SH);
+
+ /* PMCSEL 0x38 counter N == PMCSEL 0x3a counter N+/-2 */
+ if (pmc && (psel == 0x38 || psel == 0x3a))
+ alt[nalt++] = ((event ^ 0x2) & ~PM_PMC_MSKS) |
+ ((pmc > 2? pmc - 2: pmc + 2) << PM_PMC_SH);
+ }
+
+ if (flags & PPMU_ONLY_COUNT_RUN) {
+ /*
+ * We're only counting in RUN state,
+ * so PM_CYC is equivalent to PM_RUN_CYC,
+ * PM_INST_CMPL === PM_RUN_INST_CMPL, PM_PURR === PM_RUN_PURR.
+ * This doesn't include alternatives that don't provide
+ * any extra flexibility in assigning PMCs (e.g.
+ * 0x10000a for PM_RUN_CYC vs. 0x1e for PM_CYC).
+ * Note that even with these additional alternatives
+ * we never end up with more than 4 alternatives for any event.
+ */
+ j = nalt;
+ for (i = 0; i < nalt; ++i) {
+ switch (alt[i]) {
+ case 0x1e: /* PM_CYC */
+ alt[j++] = 0x600005; /* PM_RUN_CYC */
+ ++nlim;
+ break;
+ case 0x10000a: /* PM_RUN_CYC */
+ alt[j++] = 0x1e; /* PM_CYC */
+ break;
+ case 2: /* PM_INST_CMPL */
+ alt[j++] = 0x500009; /* PM_RUN_INST_CMPL */
+ ++nlim;
+ break;
+ case 0x500009: /* PM_RUN_INST_CMPL */
+ alt[j++] = 2; /* PM_INST_CMPL */
+ break;
+ case 0x10000e: /* PM_PURR */
+ alt[j++] = 0x4000f4; /* PM_RUN_PURR */
+ break;
+ case 0x4000f4: /* PM_RUN_PURR */
+ alt[j++] = 0x10000e; /* PM_PURR */
+ break;
+ }
+ }
+ nalt = j;
+ }
+
+ if (!(flags & PPMU_LIMITED_PMC_OK) && nlim) {
+ /* remove the limited PMC events */
+ j = 0;
+ for (i = 0; i < nalt; ++i) {
+ if (!p6_limited_pmc_event(alt[i])) {
+ alt[j] = alt[i];
+ ++j;
+ }
+ }
+ nalt = j;
+ } else if ((flags & PPMU_LIMITED_PMC_REQD) && nlim < nalt) {
+ /* remove all but the limited PMC events */
+ j = 0;
+ for (i = 0; i < nalt; ++i) {
+ if (p6_limited_pmc_event(alt[i])) {
+ alt[j] = alt[i];
+ ++j;
+ }
+ }
+ nalt = j;
+ }
+
+ return nalt;
+}
+
+static void p6_disable_pmc(unsigned int pmc, u64 mmcr[])
+{
+ /* Set PMCxSEL to 0 to disable PMCx */
+ if (pmc <= 3)
+ mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc));
+}
+
+static int power6_generic_events[] = {
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x1e,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 2,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0x280030, /* LD_REF_L1 */
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x30000c, /* LD_MISS_L1 */
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x410a0, /* BR_PRED */
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x400052, /* BR_MPRED */
+};
+
+#define C(x) PERF_COUNT_HW_CACHE_##x
+
+/*
+ * Table of generalized cache-related events.
+ * 0 means not supported, -1 means nonsensical, other values
+ * are event codes.
+ * The "DTLB" and "ITLB" events relate to the DERAT and IERAT.
+ */
+static int power6_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+ [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
+ [C(OP_READ)] = { 0x80082, 0x80080 },
+ [C(OP_WRITE)] = { 0x80086, 0x80088 },
+ [C(OP_PREFETCH)] = { 0x810a4, 0 },
+ },
+ [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */
+ [C(OP_READ)] = { 0, 0x100056 },
+ [C(OP_WRITE)] = { -1, -1 },
+ [C(OP_PREFETCH)] = { 0x4008c, 0 },
+ },
+ [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
+ [C(OP_READ)] = { 0x150730, 0x250532 },
+ [C(OP_WRITE)] = { 0x250432, 0x150432 },
+ [C(OP_PREFETCH)] = { 0x810a6, 0 },
+ },
+ [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
+ [C(OP_READ)] = { 0, 0x20000e },
+ [C(OP_WRITE)] = { -1, -1 },
+ [C(OP_PREFETCH)] = { -1, -1 },
+ },
+ [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */
+ [C(OP_READ)] = { 0, 0x420ce },
+ [C(OP_WRITE)] = { -1, -1 },
+ [C(OP_PREFETCH)] = { -1, -1 },
+ },
+ [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */
+ [C(OP_READ)] = { 0x430e6, 0x400052 },
+ [C(OP_WRITE)] = { -1, -1 },
+ [C(OP_PREFETCH)] = { -1, -1 },
+ },
+};
+
+struct power_pmu power6_pmu = {
+ .n_counter = 6,
+ .max_alternatives = MAX_ALT,
+ .add_fields = 0x1555,
+ .test_adder = 0x3000,
+ .compute_mmcr = p6_compute_mmcr,
+ .get_constraint = p6_get_constraint,
+ .get_alternatives = p6_get_alternatives,
+ .disable_pmc = p6_disable_pmc,
+ .limited_pmc_event = p6_limited_pmc_event,
+ .flags = PPMU_LIMITED_PMC5_6 | PPMU_ALT_SIPR,
+ .n_generic = ARRAY_SIZE(power6_generic_events),
+ .generic_events = power6_generic_events,
+ .cache_events = &power6_cache_events,
+};
diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c
new file mode 100644
index 00000000000..b3f7d1216ba
--- /dev/null
+++ b/arch/powerpc/kernel/power7-pmu.c
@@ -0,0 +1,357 @@
+/*
+ * Performance counter support for POWER7 processors.
+ *
+ * Copyright 2009 Paul Mackerras, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/perf_counter.h>
+#include <asm/reg.h>
+
+/*
+ * Bits in event code for POWER7
+ */
+#define PM_PMC_SH 16 /* PMC number (1-based) for direct events */
+#define PM_PMC_MSK 0xf
+#define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH)
+#define PM_UNIT_SH 12 /* TTMMUX number and setting - unit select */
+#define PM_UNIT_MSK 0xf
+#define PM_COMBINE_SH 11 /* Combined event bit */
+#define PM_COMBINE_MSK 1
+#define PM_COMBINE_MSKS 0x800
+#define PM_L2SEL_SH 8 /* L2 event select */
+#define PM_L2SEL_MSK 7
+#define PM_PMCSEL_MSK 0xff
+
+/*
+ * Bits in MMCR1 for POWER7
+ */
+#define MMCR1_TTM0SEL_SH 60
+#define MMCR1_TTM1SEL_SH 56
+#define MMCR1_TTM2SEL_SH 52
+#define MMCR1_TTM3SEL_SH 48
+#define MMCR1_TTMSEL_MSK 0xf
+#define MMCR1_L2SEL_SH 45
+#define MMCR1_L2SEL_MSK 7
+#define MMCR1_PMC1_COMBINE_SH 35
+#define MMCR1_PMC2_COMBINE_SH 34
+#define MMCR1_PMC3_COMBINE_SH 33
+#define MMCR1_PMC4_COMBINE_SH 32
+#define MMCR1_PMC1SEL_SH 24
+#define MMCR1_PMC2SEL_SH 16
+#define MMCR1_PMC3SEL_SH 8
+#define MMCR1_PMC4SEL_SH 0
+#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
+#define MMCR1_PMCSEL_MSK 0xff
+
+/*
+ * Bits in MMCRA
+ */
+
+/*
+ * Layout of constraint bits:
+ * 6666555555555544444444443333333333222222222211111111110000000000
+ * 3210987654321098765432109876543210987654321098765432109876543210
+ * [ ><><><><><><>
+ * NC P6P5P4P3P2P1
+ *
+ * NC - number of counters
+ * 15: NC error 0x8000
+ * 12-14: number of events needing PMC1-4 0x7000
+ *
+ * P6
+ * 11: P6 error 0x800
+ * 10-11: Count of events needing PMC6
+ *
+ * P1..P5
+ * 0-9: Count of events needing PMC1..PMC5
+ */
+
+static int power7_get_constraint(u64 event, u64 *maskp, u64 *valp)
+{
+ int pmc, sh;
+ u64 mask = 0, value = 0;
+
+ pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+ if (pmc) {
+ if (pmc > 6)
+ return -1;
+ sh = (pmc - 1) * 2;
+ mask |= 2 << sh;
+ value |= 1 << sh;
+ if (pmc >= 5 && !(event == 0x500fa || event == 0x600f4))
+ return -1;
+ }
+ if (pmc < 5) {
+ /* need a counter from PMC1-4 set */
+ mask |= 0x8000;
+ value |= 0x1000;
+ }
+ *maskp = mask;
+ *valp = value;
+ return 0;
+}
+
+#define MAX_ALT 2 /* at most 2 alternatives for any event */
+
+static const unsigned int event_alternatives[][MAX_ALT] = {
+ { 0x200f2, 0x300f2 }, /* PM_INST_DISP */
+ { 0x200f4, 0x600f4 }, /* PM_RUN_CYC */
+ { 0x400fa, 0x500fa }, /* PM_RUN_INST_CMPL */
+};
+
+/*
+ * Scan the alternatives table for a match and return the
+ * index into the alternatives table if found, else -1.
+ */
+static int find_alternative(u64 event)
+{
+ int i, j;
+
+ for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
+ if (event < event_alternatives[i][0])
+ break;
+ for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j)
+ if (event == event_alternatives[i][j])
+ return i;
+ }
+ return -1;
+}
+
+static s64 find_alternative_decode(u64 event)
+{
+ int pmc, psel;
+
+ /* this only handles the 4x decode events */
+ pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+ psel = event & PM_PMCSEL_MSK;
+ if ((pmc == 2 || pmc == 4) && (psel & ~7) == 0x40)
+ return event - (1 << PM_PMC_SH) + 8;
+ if ((pmc == 1 || pmc == 3) && (psel & ~7) == 0x48)
+ return event + (1 << PM_PMC_SH) - 8;
+ return -1;
+}
+
+static int power7_get_alternatives(u64 event, unsigned int flags, u64 alt[])
+{
+ int i, j, nalt = 1;
+ s64 ae;
+
+ alt[0] = event;
+ nalt = 1;
+ i = find_alternative(event);
+ if (i >= 0) {
+ for (j = 0; j < MAX_ALT; ++j) {
+ ae = event_alternatives[i][j];
+ if (ae && ae != event)
+ alt[nalt++] = ae;
+ }
+ } else {
+ ae = find_alternative_decode(event);
+ if (ae > 0)
+ alt[nalt++] = ae;
+ }
+
+ if (flags & PPMU_ONLY_COUNT_RUN) {
+ /*
+ * We're only counting in RUN state,
+ * so PM_CYC is equivalent to PM_RUN_CYC
+ * and PM_INST_CMPL === PM_RUN_INST_CMPL.
+ * This doesn't include alternatives that don't provide
+ * any extra flexibility in assigning PMCs.
+ */
+ j = nalt;
+ for (i = 0; i < nalt; ++i) {
+ switch (alt[i]) {
+ case 0x1e: /* PM_CYC */
+ alt[j++] = 0x600f4; /* PM_RUN_CYC */
+ break;
+ case 0x600f4: /* PM_RUN_CYC */
+ alt[j++] = 0x1e;
+ break;
+ case 0x2: /* PM_PPC_CMPL */
+ alt[j++] = 0x500fa; /* PM_RUN_INST_CMPL */
+ break;
+ case 0x500fa: /* PM_RUN_INST_CMPL */
+ alt[j++] = 0x2; /* PM_PPC_CMPL */
+ break;
+ }
+ }
+ nalt = j;
+ }
+
+ return nalt;
+}
+
+/*
+ * Returns 1 if event counts things relating to marked instructions
+ * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
+ */
+static int power7_marked_instr_event(u64 event)
+{
+ int pmc, psel;
+ int unit;
+
+ pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+ unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
+ psel = event & PM_PMCSEL_MSK & ~1; /* trim off edge/level bit */
+ if (pmc >= 5)
+ return 0;
+
+ switch (psel >> 4) {
+ case 2:
+ return pmc == 2 || pmc == 4;
+ case 3:
+ if (psel == 0x3c)
+ return pmc == 1;
+ if (psel == 0x3e)
+ return pmc != 2;
+ return 1;
+ case 4:
+ case 5:
+ return unit == 0xd;
+ case 6:
+ if (psel == 0x64)
+ return pmc >= 3;
+ case 8:
+ return unit == 0xd;
+ }
+ return 0;
+}
+
+static int power7_compute_mmcr(u64 event[], int n_ev,
+ unsigned int hwc[], u64 mmcr[])
+{
+ u64 mmcr1 = 0;
+ u64 mmcra = 0;
+ unsigned int pmc, unit, combine, l2sel, psel;
+ unsigned int pmc_inuse = 0;
+ int i;
+
+ /* First pass to count resource use */
+ for (i = 0; i < n_ev; ++i) {
+ pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
+ if (pmc) {
+ if (pmc > 6)
+ return -1;
+ if (pmc_inuse & (1 << (pmc - 1)))
+ return -1;
+ pmc_inuse |= 1 << (pmc - 1);
+ }
+ }
+
+ /* Second pass: assign PMCs, set all MMCR1 fields */
+ for (i = 0; i < n_ev; ++i) {
+ pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
+ unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
+ combine = (event[i] >> PM_COMBINE_SH) & PM_COMBINE_MSK;
+ l2sel = (event[i] >> PM_L2SEL_SH) & PM_L2SEL_MSK;
+ psel = event[i] & PM_PMCSEL_MSK;
+ if (!pmc) {
+ /* Bus event or any-PMC direct event */
+ for (pmc = 0; pmc < 4; ++pmc) {
+ if (!(pmc_inuse & (1 << pmc)))
+ break;
+ }
+ if (pmc >= 4)
+ return -1;
+ pmc_inuse |= 1 << pmc;
+ } else {
+ /* Direct or decoded event */
+ --pmc;
+ }
+ if (pmc <= 3) {
+ mmcr1 |= (u64) unit << (MMCR1_TTM0SEL_SH - 4 * pmc);
+ mmcr1 |= (u64) combine << (MMCR1_PMC1_COMBINE_SH - pmc);
+ mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc);
+ if (unit == 6) /* L2 events */
+ mmcr1 |= (u64) l2sel << MMCR1_L2SEL_SH;
+ }
+ if (power7_marked_instr_event(event[i]))
+ mmcra |= MMCRA_SAMPLE_ENABLE;
+ hwc[i] = pmc;
+ }
+
+ /* Return MMCRx values */
+ mmcr[0] = 0;
+ if (pmc_inuse & 1)
+ mmcr[0] = MMCR0_PMC1CE;
+ if (pmc_inuse & 0x3e)
+ mmcr[0] |= MMCR0_PMCjCE;
+ mmcr[1] = mmcr1;
+ mmcr[2] = mmcra;
+ return 0;
+}
+
+static void power7_disable_pmc(unsigned int pmc, u64 mmcr[])
+{
+ if (pmc <= 3)
+ mmcr[1] &= ~(0xffULL << MMCR1_PMCSEL_SH(pmc));
+}
+
+static int power7_generic_events[] = {
+ [PERF_COUNT_CPU_CYCLES] = 0x1e,
+ [PERF_COUNT_INSTRUCTIONS] = 2,
+ [PERF_COUNT_CACHE_REFERENCES] = 0xc880, /* LD_REF_L1_LSU */
+ [PERF_COUNT_CACHE_MISSES] = 0x400f0, /* LD_MISS_L1 */
+ [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x10068, /* BRU_FIN */
+ [PERF_COUNT_BRANCH_MISSES] = 0x400f6, /* BR_MPRED */
+};
+
+#define C(x) PERF_COUNT_HW_CACHE_##x
+
+/*
+ * Table of generalized cache-related events.
+ * 0 means not supported, -1 means nonsensical, other values
+ * are event codes.
+ */
+static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+ [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
+ [C(OP_READ)] = { 0x400f0, 0xc880 },
+ [C(OP_WRITE)] = { 0, 0x300f0 },
+ [C(OP_PREFETCH)] = { 0xd8b8, 0 },
+ },
+ [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */
+ [C(OP_READ)] = { 0, 0x200fc },
+ [C(OP_WRITE)] = { -1, -1 },
+ [C(OP_PREFETCH)] = { 0x408a, 0 },
+ },
+ [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
+ [C(OP_READ)] = { 0x6080, 0x6084 },
+ [C(OP_WRITE)] = { 0x6082, 0x6086 },
+ [C(OP_PREFETCH)] = { 0, 0 },
+ },
+ [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
+ [C(OP_READ)] = { 0, 0x300fc },
+ [C(OP_WRITE)] = { -1, -1 },
+ [C(OP_PREFETCH)] = { -1, -1 },
+ },
+ [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */
+ [C(OP_READ)] = { 0, 0x400fc },
+ [C(OP_WRITE)] = { -1, -1 },
+ [C(OP_PREFETCH)] = { -1, -1 },
+ },
+ [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */
+ [C(OP_READ)] = { 0x10068, 0x400f6 },
+ [C(OP_WRITE)] = { -1, -1 },
+ [C(OP_PREFETCH)] = { -1, -1 },
+ },
+};
+
+struct power_pmu power7_pmu = {
+ .n_counter = 6,
+ .max_alternatives = MAX_ALT + 1,
+ .add_fields = 0x1555ull,
+ .test_adder = 0x3000ull,
+ .compute_mmcr = power7_compute_mmcr,
+ .get_constraint = power7_get_constraint,
+ .get_alternatives = power7_get_alternatives,
+ .disable_pmc = power7_disable_pmc,
+ .n_generic = ARRAY_SIZE(power7_generic_events),
+ .generic_events = power7_generic_events,
+ .cache_events = &power7_cache_events,
+};
diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c
new file mode 100644
index 00000000000..ba0a357a89f
--- /dev/null
+++ b/arch/powerpc/kernel/ppc970-pmu.c
@@ -0,0 +1,482 @@
+/*
+ * Performance counter support for PPC970-family processors.
+ *
+ * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/string.h>
+#include <linux/perf_counter.h>
+#include <asm/reg.h>
+
+/*
+ * Bits in event code for PPC970
+ */
+#define PM_PMC_SH 12 /* PMC number (1-based) for direct events */
+#define PM_PMC_MSK 0xf
+#define PM_UNIT_SH 8 /* TTMMUX number and setting - unit select */
+#define PM_UNIT_MSK 0xf
+#define PM_SPCSEL_SH 6
+#define PM_SPCSEL_MSK 3
+#define PM_BYTE_SH 4 /* Byte number of event bus to use */
+#define PM_BYTE_MSK 3
+#define PM_PMCSEL_MSK 0xf
+
+/* Values in PM_UNIT field */
+#define PM_NONE 0
+#define PM_FPU 1
+#define PM_VPU 2
+#define PM_ISU 3
+#define PM_IFU 4
+#define PM_IDU 5
+#define PM_STS 6
+#define PM_LSU0 7
+#define PM_LSU1U 8
+#define PM_LSU1L 9
+#define PM_LASTUNIT 9
+
+/*
+ * Bits in MMCR0 for PPC970
+ */
+#define MMCR0_PMC1SEL_SH 8
+#define MMCR0_PMC2SEL_SH 1
+#define MMCR_PMCSEL_MSK 0x1f
+
+/*
+ * Bits in MMCR1 for PPC970
+ */
+#define MMCR1_TTM0SEL_SH 62
+#define MMCR1_TTM1SEL_SH 59
+#define MMCR1_TTM3SEL_SH 53
+#define MMCR1_TTMSEL_MSK 3
+#define MMCR1_TD_CP_DBG0SEL_SH 50
+#define MMCR1_TD_CP_DBG1SEL_SH 48
+#define MMCR1_TD_CP_DBG2SEL_SH 46
+#define MMCR1_TD_CP_DBG3SEL_SH 44
+#define MMCR1_PMC1_ADDER_SEL_SH 39
+#define MMCR1_PMC2_ADDER_SEL_SH 38
+#define MMCR1_PMC6_ADDER_SEL_SH 37
+#define MMCR1_PMC5_ADDER_SEL_SH 36
+#define MMCR1_PMC8_ADDER_SEL_SH 35
+#define MMCR1_PMC7_ADDER_SEL_SH 34
+#define MMCR1_PMC3_ADDER_SEL_SH 33
+#define MMCR1_PMC4_ADDER_SEL_SH 32
+#define MMCR1_PMC3SEL_SH 27
+#define MMCR1_PMC4SEL_SH 22
+#define MMCR1_PMC5SEL_SH 17
+#define MMCR1_PMC6SEL_SH 12
+#define MMCR1_PMC7SEL_SH 7
+#define MMCR1_PMC8SEL_SH 2
+
+static short mmcr1_adder_bits[8] = {
+ MMCR1_PMC1_ADDER_SEL_SH,
+ MMCR1_PMC2_ADDER_SEL_SH,
+ MMCR1_PMC3_ADDER_SEL_SH,
+ MMCR1_PMC4_ADDER_SEL_SH,
+ MMCR1_PMC5_ADDER_SEL_SH,
+ MMCR1_PMC6_ADDER_SEL_SH,
+ MMCR1_PMC7_ADDER_SEL_SH,
+ MMCR1_PMC8_ADDER_SEL_SH
+};
+
+/*
+ * Bits in MMCRA
+ */
+
+/*
+ * Layout of constraint bits:
+ * 6666555555555544444444443333333333222222222211111111110000000000
+ * 3210987654321098765432109876543210987654321098765432109876543210
+ * <><><>[ >[ >[ >< >< >< >< ><><><><><><><><>
+ * SPT0T1 UC PS1 PS2 B0 B1 B2 B3 P1P2P3P4P5P6P7P8
+ *
+ * SP - SPCSEL constraint
+ * 48-49: SPCSEL value 0x3_0000_0000_0000
+ *
+ * T0 - TTM0 constraint
+ * 46-47: TTM0SEL value (0=FPU, 2=IFU, 3=VPU) 0xC000_0000_0000
+ *
+ * T1 - TTM1 constraint
+ * 44-45: TTM1SEL value (0=IDU, 3=STS) 0x3000_0000_0000
+ *
+ * UC - unit constraint: can't have all three of FPU|IFU|VPU, ISU, IDU|STS
+ * 43: UC3 error 0x0800_0000_0000
+ * 42: FPU|IFU|VPU events needed 0x0400_0000_0000
+ * 41: ISU events needed 0x0200_0000_0000
+ * 40: IDU|STS events needed 0x0100_0000_0000
+ *
+ * PS1
+ * 39: PS1 error 0x0080_0000_0000
+ * 36-38: count of events needing PMC1/2/5/6 0x0070_0000_0000
+ *
+ * PS2
+ * 35: PS2 error 0x0008_0000_0000
+ * 32-34: count of events needing PMC3/4/7/8 0x0007_0000_0000
+ *
+ * B0
+ * 28-31: Byte 0 event source 0xf000_0000
+ * Encoding as for the event code
+ *
+ * B1, B2, B3
+ * 24-27, 20-23, 16-19: Byte 1, 2, 3 event sources
+ *
+ * P1
+ * 15: P1 error 0x8000
+ * 14-15: Count of events needing PMC1
+ *
+ * P2..P8
+ * 0-13: Count of events needing PMC2..PMC8
+ */
+
+static unsigned char direct_marked_event[8] = {
+ (1<<2) | (1<<3), /* PMC1: PM_MRK_GRP_DISP, PM_MRK_ST_CMPL */
+ (1<<3) | (1<<5), /* PMC2: PM_THRESH_TIMEO, PM_MRK_BRU_FIN */
+ (1<<3) | (1<<5), /* PMC3: PM_MRK_ST_CMPL_INT, PM_MRK_VMX_FIN */
+ (1<<4) | (1<<5), /* PMC4: PM_MRK_GRP_CMPL, PM_MRK_CRU_FIN */
+ (1<<4) | (1<<5), /* PMC5: PM_GRP_MRK, PM_MRK_GRP_TIMEO */
+ (1<<3) | (1<<4) | (1<<5),
+ /* PMC6: PM_MRK_ST_STS, PM_MRK_FXU_FIN, PM_MRK_GRP_ISSUED */
+ (1<<4) | (1<<5), /* PMC7: PM_MRK_FPU_FIN, PM_MRK_INST_FIN */
+ (1<<4) /* PMC8: PM_MRK_LSU_FIN */
+};
+
+/*
+ * Returns 1 if event counts things relating to marked instructions
+ * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
+ */
+static int p970_marked_instr_event(u64 event)
+{
+ int pmc, psel, unit, byte, bit;
+ unsigned int mask;
+
+ pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+ psel = event & PM_PMCSEL_MSK;
+ if (pmc) {
+ if (direct_marked_event[pmc - 1] & (1 << psel))
+ return 1;
+ if (psel == 0) /* add events */
+ bit = (pmc <= 4)? pmc - 1: 8 - pmc;
+ else if (psel == 7 || psel == 13) /* decode events */
+ bit = 4;
+ else
+ return 0;
+ } else
+ bit = psel;
+
+ byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
+ unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
+ mask = 0;
+ switch (unit) {
+ case PM_VPU:
+ mask = 0x4c; /* byte 0 bits 2,3,6 */
+ case PM_LSU0:
+ /* byte 2 bits 0,2,3,4,6; all of byte 1 */
+ mask = 0x085dff00;
+ case PM_LSU1L:
+ mask = 0x50 << 24; /* byte 3 bits 4,6 */
+ break;
+ }
+ return (mask >> (byte * 8 + bit)) & 1;
+}
+
+/* Masks and values for using events from the various units */
+static u64 unit_cons[PM_LASTUNIT+1][2] = {
+ [PM_FPU] = { 0xc80000000000ull, 0x040000000000ull },
+ [PM_VPU] = { 0xc80000000000ull, 0xc40000000000ull },
+ [PM_ISU] = { 0x080000000000ull, 0x020000000000ull },
+ [PM_IFU] = { 0xc80000000000ull, 0x840000000000ull },
+ [PM_IDU] = { 0x380000000000ull, 0x010000000000ull },
+ [PM_STS] = { 0x380000000000ull, 0x310000000000ull },
+};
+
+static int p970_get_constraint(u64 event, u64 *maskp, u64 *valp)
+{
+ int pmc, byte, unit, sh, spcsel;
+ u64 mask = 0, value = 0;
+ int grp = -1;
+
+ pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+ if (pmc) {
+ if (pmc > 8)
+ return -1;
+ sh = (pmc - 1) * 2;
+ mask |= 2 << sh;
+ value |= 1 << sh;
+ grp = ((pmc - 1) >> 1) & 1;
+ }
+ unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
+ if (unit) {
+ if (unit > PM_LASTUNIT)
+ return -1;
+ mask |= unit_cons[unit][0];
+ value |= unit_cons[unit][1];
+ byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
+ /*
+ * Bus events on bytes 0 and 2 can be counted
+ * on PMC1/2/5/6; bytes 1 and 3 on PMC3/4/7/8.
+ */
+ if (!pmc)
+ grp = byte & 1;
+ /* Set byte lane select field */
+ mask |= 0xfULL << (28 - 4 * byte);
+ value |= (u64)unit << (28 - 4 * byte);
+ }
+ if (grp == 0) {
+ /* increment PMC1/2/5/6 field */
+ mask |= 0x8000000000ull;
+ value |= 0x1000000000ull;
+ } else if (grp == 1) {
+ /* increment PMC3/4/7/8 field */
+ mask |= 0x800000000ull;
+ value |= 0x100000000ull;
+ }
+ spcsel = (event >> PM_SPCSEL_SH) & PM_SPCSEL_MSK;
+ if (spcsel) {
+ mask |= 3ull << 48;
+ value |= (u64)spcsel << 48;
+ }
+ *maskp = mask;
+ *valp = value;
+ return 0;
+}
+
+static int p970_get_alternatives(u64 event, unsigned int flags, u64 alt[])
+{
+ alt[0] = event;
+
+ /* 2 alternatives for LSU empty */
+ if (event == 0x2002 || event == 0x3002) {
+ alt[1] = event ^ 0x1000;
+ return 2;
+ }
+
+ return 1;
+}
+
+static int p970_compute_mmcr(u64 event[], int n_ev,
+ unsigned int hwc[], u64 mmcr[])
+{
+ u64 mmcr0 = 0, mmcr1 = 0, mmcra = 0;
+ unsigned int pmc, unit, byte, psel;
+ unsigned int ttm, grp;
+ unsigned int pmc_inuse = 0;
+ unsigned int pmc_grp_use[2];
+ unsigned char busbyte[4];
+ unsigned char unituse[16];
+ unsigned char unitmap[] = { 0, 0<<3, 3<<3, 1<<3, 2<<3, 0|4, 3|4 };
+ unsigned char ttmuse[2];
+ unsigned char pmcsel[8];
+ int i;
+ int spcsel;
+
+ if (n_ev > 8)
+ return -1;
+
+ /* First pass to count resource use */
+ pmc_grp_use[0] = pmc_grp_use[1] = 0;
+ memset(busbyte, 0, sizeof(busbyte));
+ memset(unituse, 0, sizeof(unituse));
+ for (i = 0; i < n_ev; ++i) {
+ pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
+ if (pmc) {
+ if (pmc_inuse & (1 << (pmc - 1)))
+ return -1;
+ pmc_inuse |= 1 << (pmc - 1);
+ /* count 1/2/5/6 vs 3/4/7/8 use */
+ ++pmc_grp_use[((pmc - 1) >> 1) & 1];
+ }
+ unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
+ byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
+ if (unit) {
+ if (unit > PM_LASTUNIT)
+ return -1;
+ if (!pmc)
+ ++pmc_grp_use[byte & 1];
+ if (busbyte[byte] && busbyte[byte] != unit)
+ return -1;
+ busbyte[byte] = unit;
+ unituse[unit] = 1;
+ }
+ }
+ if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4)
+ return -1;
+
+ /*
+ * Assign resources and set multiplexer selects.
+ *
+ * PM_ISU can go either on TTM0 or TTM1, but that's the only
+ * choice we have to deal with.
+ */
+ if (unituse[PM_ISU] &
+ (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_VPU]))
+ unitmap[PM_ISU] = 2 | 4; /* move ISU to TTM1 */
+ /* Set TTM[01]SEL fields. */
+ ttmuse[0] = ttmuse[1] = 0;
+ for (i = PM_FPU; i <= PM_STS; ++i) {
+ if (!unituse[i])
+ continue;
+ ttm = unitmap[i];
+ ++ttmuse[(ttm >> 2) & 1];
+ mmcr1 |= (u64)(ttm & ~4) << MMCR1_TTM1SEL_SH;
+ }
+ /* Check only one unit per TTMx */
+ if (ttmuse[0] > 1 || ttmuse[1] > 1)
+ return -1;
+
+ /* Set byte lane select fields and TTM3SEL. */
+ for (byte = 0; byte < 4; ++byte) {
+ unit = busbyte[byte];
+ if (!unit)
+ continue;
+ if (unit <= PM_STS)
+ ttm = (unitmap[unit] >> 2) & 1;
+ else if (unit == PM_LSU0)
+ ttm = 2;
+ else {
+ ttm = 3;
+ if (unit == PM_LSU1L && byte >= 2)
+ mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte);
+ }
+ mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
+ }
+
+ /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
+ memset(pmcsel, 0x8, sizeof(pmcsel)); /* 8 means don't count */
+ for (i = 0; i < n_ev; ++i) {
+ pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
+ unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
+ byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
+ psel = event[i] & PM_PMCSEL_MSK;
+ if (!pmc) {
+ /* Bus event or any-PMC direct event */
+ if (unit)
+ psel |= 0x10 | ((byte & 2) << 2);
+ else
+ psel |= 8;
+ for (pmc = 0; pmc < 8; ++pmc) {
+ if (pmc_inuse & (1 << pmc))
+ continue;
+ grp = (pmc >> 1) & 1;
+ if (unit) {
+ if (grp == (byte & 1))
+ break;
+ } else if (pmc_grp_use[grp] < 4) {
+ ++pmc_grp_use[grp];
+ break;
+ }
+ }
+ pmc_inuse |= 1 << pmc;
+ } else {
+ /* Direct event */
+ --pmc;
+ if (psel == 0 && (byte & 2))
+ /* add events on higher-numbered bus */
+ mmcr1 |= 1ull << mmcr1_adder_bits[pmc];
+ }
+ pmcsel[pmc] = psel;
+ hwc[i] = pmc;
+ spcsel = (event[i] >> PM_SPCSEL_SH) & PM_SPCSEL_MSK;
+ mmcr1 |= spcsel;
+ if (p970_marked_instr_event(event[i]))
+ mmcra |= MMCRA_SAMPLE_ENABLE;
+ }
+ for (pmc = 0; pmc < 2; ++pmc)
+ mmcr0 |= pmcsel[pmc] << (MMCR0_PMC1SEL_SH - 7 * pmc);
+ for (; pmc < 8; ++pmc)
+ mmcr1 |= (u64)pmcsel[pmc] << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2));
+ if (pmc_inuse & 1)
+ mmcr0 |= MMCR0_PMC1CE;
+ if (pmc_inuse & 0xfe)
+ mmcr0 |= MMCR0_PMCjCE;
+
+ mmcra |= 0x2000; /* mark only one IOP per PPC instruction */
+
+ /* Return MMCRx values */
+ mmcr[0] = mmcr0;
+ mmcr[1] = mmcr1;
+ mmcr[2] = mmcra;
+ return 0;
+}
+
+static void p970_disable_pmc(unsigned int pmc, u64 mmcr[])
+{
+ int shift, i;
+
+ if (pmc <= 1) {
+ shift = MMCR0_PMC1SEL_SH - 7 * pmc;
+ i = 0;
+ } else {
+ shift = MMCR1_PMC3SEL_SH - 5 * (pmc - 2);
+ i = 1;
+ }
+ /*
+ * Setting the PMCxSEL field to 0x08 disables PMC x.
+ */
+ mmcr[i] = (mmcr[i] & ~(0x1fUL << shift)) | (0x08UL << shift);
+}
+
+static int ppc970_generic_events[] = {
+ [PERF_COUNT_HW_CPU_CYCLES] = 7,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 1,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0x8810, /* PM_LD_REF_L1 */
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x3810, /* PM_LD_MISS_L1 */
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x431, /* PM_BR_ISSUED */
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x327, /* PM_GRP_BR_MPRED */
+};
+
+#define C(x) PERF_COUNT_HW_CACHE_##x
+
+/*
+ * Table of generalized cache-related events.
+ * 0 means not supported, -1 means nonsensical, other values
+ * are event codes.
+ */
+static int ppc970_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+ [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
+ [C(OP_READ)] = { 0x8810, 0x3810 },
+ [C(OP_WRITE)] = { 0x7810, 0x813 },
+ [C(OP_PREFETCH)] = { 0x731, 0 },
+ },
+ [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */
+ [C(OP_READ)] = { 0, 0 },
+ [C(OP_WRITE)] = { -1, -1 },
+ [C(OP_PREFETCH)] = { 0, 0 },
+ },
+ [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
+ [C(OP_READ)] = { 0, 0 },
+ [C(OP_WRITE)] = { 0, 0 },
+ [C(OP_PREFETCH)] = { 0x733, 0 },
+ },
+ [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
+ [C(OP_READ)] = { 0, 0x704 },
+ [C(OP_WRITE)] = { -1, -1 },
+ [C(OP_PREFETCH)] = { -1, -1 },
+ },
+ [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */
+ [C(OP_READ)] = { 0, 0x700 },
+ [C(OP_WRITE)] = { -1, -1 },
+ [C(OP_PREFETCH)] = { -1, -1 },
+ },
+ [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */
+ [C(OP_READ)] = { 0x431, 0x327 },
+ [C(OP_WRITE)] = { -1, -1 },
+ [C(OP_PREFETCH)] = { -1, -1 },
+ },
+};
+
+struct power_pmu ppc970_pmu = {
+ .n_counter = 8,
+ .max_alternatives = 2,
+ .add_fields = 0x001100005555ull,
+ .test_adder = 0x013300000000ull,
+ .compute_mmcr = p970_compute_mmcr,
+ .get_constraint = p970_get_constraint,
+ .get_alternatives = p970_get_alternatives,
+ .disable_pmc = p970_disable_pmc,
+ .n_generic = ARRAY_SIZE(ppc970_generic_events),
+ .generic_events = ppc970_generic_events,
+ .cache_events = &ppc970_cache_events,
+};
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 76993941cac..5beffc8f481 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -29,6 +29,7 @@
#include <linux/module.h>
#include <linux/kprobes.h>
#include <linux/kdebug.h>
+#include <linux/perf_counter.h>
#include <asm/firmware.h>
#include <asm/page.h>
@@ -170,6 +171,8 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
die("Weird page fault", regs, SIGSEGV);
}
+ perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
+
/* When running in the kernel we expect faults to occur only to
* addresses in user space. All other faults represent errors in the
* kernel and should generate an OOPS. Unfortunately, in the case of an
@@ -309,6 +312,8 @@ good_area:
}
if (ret & VM_FAULT_MAJOR) {
current->maj_flt++;
+ perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
+ regs, address);
#ifdef CONFIG_PPC_SMLPAR
if (firmware_has_feature(FW_FEATURE_CMO)) {
preempt_disable();
@@ -316,8 +321,11 @@ good_area:
preempt_enable();
}
#endif
- } else
+ } else {
current->min_flt++;
+ perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
+ regs, address);
+ }
up_read(&mm->mmap_sem);
return 0;
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 9da795e4933..732ee93a8e9 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -1,6 +1,7 @@
config PPC64
bool "64-bit kernel"
default n
+ select HAVE_PERF_COUNTERS
help
This option selects whether a 32-bit or a 64-bit kernel
will be built.
diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c
index 80b513449f4..be3581a8c29 100644
--- a/arch/powerpc/platforms/pseries/xics.c
+++ b/arch/powerpc/platforms/pseries/xics.c
@@ -333,7 +333,7 @@ static void xics_eoi_lpar(unsigned int virq)
lpar_xirr_info_set((0xff << 24) | irq);
}
-static void xics_set_affinity(unsigned int virq, const struct cpumask *cpumask)
+static int xics_set_affinity(unsigned int virq, const struct cpumask *cpumask)
{
unsigned int irq;
int status;
@@ -342,14 +342,14 @@ static void xics_set_affinity(unsigned int virq, const struct cpumask *cpumask)
irq = (unsigned int)irq_map[virq].hwirq;
if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS)
- return;
+ return -1;
status = rtas_call(ibm_get_xive, 1, 3, xics_status, irq);
if (status) {
printk(KERN_ERR "%s: ibm,get-xive irq=%u returns %d\n",
__func__, irq, status);
- return;
+ return -1;
}
/*
@@ -363,7 +363,7 @@ static void xics_set_affinity(unsigned int virq, const struct cpumask *cpumask)
printk(KERN_WARNING
"%s: No online cpus in the mask %s for irq %d\n",
__func__, cpulist, virq);
- return;
+ return -1;
}
status = rtas_call(ibm_set_xive, 3, 1, NULL,
@@ -372,8 +372,10 @@ static void xics_set_affinity(unsigned int virq, const struct cpumask *cpumask)
if (status) {
printk(KERN_ERR "%s: ibm,set-xive irq=%u returns %d\n",
__func__, irq, status);
- return;
+ return -1;
}
+
+ return 0;
}
static struct irq_chip xics_pic_direct = {
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index 0efc12d1a3d..352d8c3ef52 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -807,7 +807,7 @@ static void mpic_end_ipi(unsigned int irq)
#endif /* CONFIG_SMP */
-void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
+int mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
{
struct mpic *mpic = mpic_from_irq(irq);
unsigned int src = mpic_irq_to_hw(irq);
@@ -824,6 +824,8 @@ void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
mpic_irq_write(src, MPIC_INFO(IRQ_DESTINATION),
mpic_physmask(cpus_addr(tmp)[0]));
}
+
+ return 0;
}
static unsigned int mpic_type_to_vecpri(struct mpic *mpic, unsigned int type)
diff --git a/arch/powerpc/sysdev/mpic.h b/arch/powerpc/sysdev/mpic.h
index 3cef2af10f4..eff433c322a 100644
--- a/arch/powerpc/sysdev/mpic.h
+++ b/arch/powerpc/sysdev/mpic.h
@@ -36,6 +36,6 @@ static inline int mpic_pasemi_msi_init(struct mpic *mpic)
extern int mpic_set_irq_type(unsigned int virq, unsigned int flow_type);
extern void mpic_set_vector(unsigned int virq, unsigned int vector);
-extern void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask);
+extern int mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask);
#endif /* _POWERPC_SYSDEV_MPIC_H */
diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h
index 639ac805448..65865726b28 100644
--- a/arch/sparc/include/asm/thread_info_64.h
+++ b/arch/sparc/include/asm/thread_info_64.h
@@ -102,8 +102,8 @@ struct thread_info {
#define TI_KERN_CNTD1 0x00000488
#define TI_PCR 0x00000490
#define TI_RESTART_BLOCK 0x00000498
-#define TI_KUNA_REGS 0x000004c0
-#define TI_KUNA_INSN 0x000004c8
+#define TI_KUNA_REGS 0x000004c8
+#define TI_KUNA_INSN 0x000004d0
#define TI_FPREGS 0x00000500
/* We embed this in the uppermost byte of thread_info->flags */
diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c
index 5deabe921a4..e5e78f9cfc9 100644
--- a/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@ -318,10 +318,12 @@ static void sun4u_irq_enable(unsigned int virt_irq)
}
}
-static void sun4u_set_affinity(unsigned int virt_irq,
+static int sun4u_set_affinity(unsigned int virt_irq,
const struct cpumask *mask)
{
sun4u_irq_enable(virt_irq);
+
+ return 0;
}
/* Don't do anything. The desc->status check for IRQ_DISABLED in
@@ -377,7 +379,7 @@ static void sun4v_irq_enable(unsigned int virt_irq)
ino, err);
}
-static void sun4v_set_affinity(unsigned int virt_irq,
+static int sun4v_set_affinity(unsigned int virt_irq,
const struct cpumask *mask)
{
unsigned int ino = virt_irq_table[virt_irq].dev_ino;
@@ -388,6 +390,8 @@ static void sun4v_set_affinity(unsigned int virt_irq,
if (err != HV_EOK)
printk(KERN_ERR "sun4v_intr_settarget(%x,%lu): "
"err(%d)\n", ino, cpuid, err);
+
+ return 0;
}
static void sun4v_irq_disable(unsigned int virt_irq)
@@ -445,7 +449,7 @@ static void sun4v_virq_enable(unsigned int virt_irq)
dev_handle, dev_ino, err);
}
-static void sun4v_virt_set_affinity(unsigned int virt_irq,
+static int sun4v_virt_set_affinity(unsigned int virt_irq,
const struct cpumask *mask)
{
unsigned long cpuid, dev_handle, dev_ino;
@@ -461,6 +465,8 @@ static void sun4v_virt_set_affinity(unsigned int virt_irq,
printk(KERN_ERR "sun4v_vintr_set_target(%lx,%lx,%lu): "
"err(%d)\n",
dev_handle, dev_ino, cpuid, err);
+
+ return 0;
}
static void sun4v_virq_disable(unsigned int virt_irq)
diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild
new file mode 100644
index 00000000000..ad8ec356fb3
--- /dev/null
+++ b/arch/x86/Kbuild
@@ -0,0 +1,16 @@
+
+obj-$(CONFIG_KVM) += kvm/
+
+# Xen paravirtualization support
+obj-$(CONFIG_XEN) += xen/
+
+# lguest paravirtualization support
+obj-$(CONFIG_LGUEST_GUEST) += lguest/
+
+obj-y += kernel/
+obj-y += mm/
+
+obj-y += crypto/
+obj-y += vdso/
+obj-$(CONFIG_IA32_EMULATION) += ia32/
+
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index a6efe0a2e9a..68f5578fe38 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -47,6 +47,11 @@ config X86
select HAVE_KERNEL_BZIP2
select HAVE_KERNEL_LZMA
+config OUTPUT_FORMAT
+ string
+ default "elf32-i386" if X86_32
+ default "elf64-x86-64" if X86_64
+
config ARCH_DEFCONFIG
string
default "arch/x86/configs/i386_defconfig" if X86_32
@@ -274,15 +279,9 @@ config SPARSE_IRQ
If you don't know what to do here, say N.
-config NUMA_MIGRATE_IRQ_DESC
- bool "Move irq desc when changing irq smp_affinity"
+config NUMA_IRQ_DESC
+ def_bool y
depends on SPARSE_IRQ && NUMA
- depends on BROKEN
- default n
- ---help---
- This enables moving irq_desc to cpu/node that irq will use handled.
-
- If you don't know what to do here, say N.
config X86_MPPARSE
bool "Enable MPS table" if ACPI
@@ -355,7 +354,7 @@ config X86_UV
depends on X86_64
depends on X86_EXTENDED_PLATFORM
depends on NUMA
- select X86_X2APIC
+ depends on X86_X2APIC
---help---
This option is needed in order to support SGI Ultraviolet systems.
If you don't have one of these, you should say N here.
@@ -740,6 +739,7 @@ config X86_UP_IOAPIC
config X86_LOCAL_APIC
def_bool y
depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC
+ select HAVE_PERF_COUNTERS if (!M386 && !M486)
config X86_IO_APIC
def_bool y
@@ -1466,9 +1466,7 @@ config KEXEC_JUMP
config PHYSICAL_START
hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP)
- default "0x1000000" if X86_NUMAQ
- default "0x200000" if X86_64
- default "0x100000"
+ default "0x1000000"
---help---
This gives the physical address where the kernel is loaded.
@@ -1487,15 +1485,15 @@ config PHYSICAL_START
to be specifically compiled to run from a specific memory area
(normally a reserved region) and this option comes handy.
- So if you are using bzImage for capturing the crash dump, leave
- the value here unchanged to 0x100000 and set CONFIG_RELOCATABLE=y.
- Otherwise if you plan to use vmlinux for capturing the crash dump
- change this value to start of the reserved region (Typically 16MB
- 0x1000000). In other words, it can be set based on the "X" value as
- specified in the "crashkernel=YM@XM" command line boot parameter
- passed to the panic-ed kernel. Typically this parameter is set as
- crashkernel=64M@16M. Please take a look at
- Documentation/kdump/kdump.txt for more details about crash dumps.
+ So if you are using bzImage for capturing the crash dump,
+ leave the value here unchanged to 0x1000000 and set
+ CONFIG_RELOCATABLE=y. Otherwise if you plan to use vmlinux
+ for capturing the crash dump change this value to start of
+ the reserved region. In other words, it can be set based on
+ the "X" value as specified in the "crashkernel=YM@XM"
+ command line boot parameter passed to the panic-ed
+ kernel. Please take a look at Documentation/kdump/kdump.txt
+ for more details about crash dumps.
Usage of bzImage for capturing the crash dump is recommended as
one does not have to build two kernels. Same kernel can be used
@@ -1508,8 +1506,8 @@ config PHYSICAL_START
Don't change this unless you know what you are doing.
config RELOCATABLE
- bool "Build a relocatable kernel (EXPERIMENTAL)"
- depends on EXPERIMENTAL
+ bool "Build a relocatable kernel"
+ default y
---help---
This builds a kernel image that retains relocation information
so it can be loaded someplace besides the default 1MB.
@@ -1524,12 +1522,16 @@ config RELOCATABLE
it has been loaded at and the compile time physical address
(CONFIG_PHYSICAL_START) is ignored.
+# Relocation on x86-32 needs some additional build support
+config X86_NEED_RELOCS
+ def_bool y
+ depends on X86_32 && RELOCATABLE
+
config PHYSICAL_ALIGN
hex
prompt "Alignment value to which kernel should be aligned" if X86_32
- default "0x100000" if X86_32
- default "0x200000" if X86_64
- range 0x2000 0x400000
+ default "0x1000000"
+ range 0x2000 0x1000000
---help---
This value puts the alignment restrictions on physical address
where kernel is loaded and run from. Kernel is compiled for an
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index d8359e73317..d105f29bb6b 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -159,14 +159,30 @@ config IOMMU_DEBUG
options. See Documentation/x86_64/boot-options.txt for more
details.
+config IOMMU_STRESS
+ bool "Enable IOMMU stress-test mode"
+ ---help---
+ This option disables various optimizations in IOMMU related
+ code to do real stress testing of the IOMMU code. This option
+ will cause a performance drop and should only be enabled for
+ testing.
+
config IOMMU_LEAK
bool "IOMMU leak tracing"
- depends on DEBUG_KERNEL
- depends on IOMMU_DEBUG
+ depends on IOMMU_DEBUG && DMA_API_DEBUG
---help---
Add a simple leak tracer to the IOMMU code. This is useful when you
are debugging a buggy device driver that leaks IOMMU mappings.
+config X86_DS_SELFTEST
+ bool "DS selftest"
+ default y
+ depends on DEBUG_KERNEL
+ depends on X86_DS
+ ---help---
+ Perform Debug Store selftests at boot time.
+ If in doubt, say "N".
+
config HAVE_MMIOTRACE_SUPPORT
def_bool y
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 8c86b72afdc..edbd0ca6206 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -7,8 +7,6 @@ else
KBUILD_DEFCONFIG := $(ARCH)_defconfig
endif
-core-$(CONFIG_KVM) += arch/x86/kvm/
-
# BITS is used as extension for files which are available in a 32 bit
# and a 64 bit version to simplify shared Makefiles.
# e.g.: obj-y += foo_$(BITS).o
@@ -118,21 +116,8 @@ head-y += arch/x86/kernel/init_task.o
libs-y += arch/x86/lib/
-# Sub architecture files that needs linking first
-core-y += $(fcore-y)
-
-# Xen paravirtualization support
-core-$(CONFIG_XEN) += arch/x86/xen/
-
-# lguest paravirtualization support
-core-$(CONFIG_LGUEST_GUEST) += arch/x86/lguest/
-
-core-y += arch/x86/kernel/
-core-y += arch/x86/mm/
-
-core-y += arch/x86/crypto/
-core-y += arch/x86/vdso/
-core-$(CONFIG_IA32_EMULATION) += arch/x86/ia32/
+# See arch/x86/Kbuild for content of core part of the kernel
+core-y += arch/x86/
# drivers-y are linked after core-y
drivers-$(CONFIG_MATH_EMULATION) += arch/x86/math-emu/
diff --git a/arch/x86/boot/.gitignore b/arch/x86/boot/.gitignore
index 172cf8a98bd..851fe936d24 100644
--- a/arch/x86/boot/.gitignore
+++ b/arch/x86/boot/.gitignore
@@ -3,6 +3,8 @@ bzImage
cpustr.h
mkcpustr
offsets.h
+voffset.h
+zoffset.h
setup
setup.bin
setup.elf
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index 6633b6e7505..8d16ada2504 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -26,9 +26,10 @@ targets := vmlinux.bin setup.bin setup.elf bzImage
targets += fdimage fdimage144 fdimage288 image.iso mtools.conf
subdir- := compressed
-setup-y += a20.o cmdline.o copy.o cpu.o cpucheck.o edd.o
+setup-y += a20.o bioscall.o cmdline.o copy.o cpu.o cpucheck.o edd.o
setup-y += header.o main.o mca.o memory.o pm.o pmjump.o
-setup-y += printf.o string.o tty.o video.o video-mode.o version.o
+setup-y += printf.o regs.o string.o tty.o video.o video-mode.o
+setup-y += version.o
setup-$(CONFIG_X86_APM_BOOT) += apm.o
# The link order of the video-*.o modules can matter. In particular,
@@ -86,19 +87,27 @@ $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE
SETUP_OBJS = $(addprefix $(obj)/,$(setup-y))
-sed-offsets := -e 's/^00*/0/' \
- -e 's/^\([0-9a-fA-F]*\) . \(input_data\|input_data_end\)$$/\#define \2 0x\1/p'
+sed-voffset := -e 's/^\([0-9a-fA-F]*\) . \(_text\|_end\)$$/\#define VO_\2 0x\1/p'
-quiet_cmd_offsets = OFFSETS $@
- cmd_offsets = $(NM) $< | sed -n $(sed-offsets) > $@
+quiet_cmd_voffset = VOFFSET $@
+ cmd_voffset = $(NM) $< | sed -n $(sed-voffset) > $@
-$(obj)/offsets.h: $(obj)/compressed/vmlinux FORCE
- $(call if_changed,offsets)
+targets += voffset.h
+$(obj)/voffset.h: vmlinux FORCE
+ $(call if_changed,voffset)
+
+sed-zoffset := -e 's/^\([0-9a-fA-F]*\) . \(startup_32\|input_data\|_end\|z_.*\)$$/\#define ZO_\2 0x\1/p'
+
+quiet_cmd_zoffset = ZOFFSET $@
+ cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@
+
+targets += zoffset.h
+$(obj)/zoffset.h: $(obj)/compressed/vmlinux FORCE
+ $(call if_changed,zoffset)
-targets += offsets.h
AFLAGS_header.o += -I$(obj)
-$(obj)/header.o: $(obj)/offsets.h
+$(obj)/header.o: $(obj)/voffset.h $(obj)/zoffset.h
LDFLAGS_setup.elf := -T
$(obj)/setup.elf: $(src)/setup.ld $(SETUP_OBJS) FORCE
diff --git a/arch/x86/boot/a20.c b/arch/x86/boot/a20.c
index 7c19ce8c244..64a31a6d751 100644
--- a/arch/x86/boot/a20.c
+++ b/arch/x86/boot/a20.c
@@ -2,7 +2,7 @@
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007-2008 rPath, Inc. - All Rights Reserved
- * Copyright 2009 Intel Corporation
+ * Copyright 2009 Intel Corporation; author H. Peter Anvin
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
@@ -90,8 +90,11 @@ static int a20_test_long(void)
static void enable_a20_bios(void)
{
- asm volatile("pushfl; int $0x15; popfl"
- : : "a" ((u16)0x2401));
+ struct biosregs ireg;
+
+ initregs(&ireg);
+ ireg.ax = 0x2401;
+ intcall(0x15, &ireg, NULL);
}
static void enable_a20_kbc(void)
diff --git a/arch/x86/boot/apm.c b/arch/x86/boot/apm.c
index 7aa6033001f..ee274834ea8 100644
--- a/arch/x86/boot/apm.c
+++ b/arch/x86/boot/apm.c
@@ -2,6 +2,7 @@
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
+ * Copyright 2009 Intel Corporation; author H. Peter Anvin
*
* Original APM BIOS checking by Stephen Rothwell, May 1994
* (sfr@canb.auug.org.au)
@@ -19,75 +20,56 @@
int query_apm_bios(void)
{
- u16 ax, bx, cx, dx, di;
- u32 ebx, esi;
- u8 err;
+ struct biosregs ireg, oreg;
/* APM BIOS installation check */
- ax = 0x5300;
- bx = cx = 0;
- asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp ; setc %0"
- : "=d" (err), "+a" (ax), "+b" (bx), "+c" (cx)
- : : "esi", "edi");
+ initregs(&ireg);
+ ireg.ah = 0x53;
+ intcall(0x15, &ireg, &oreg);
- if (err)
+ if (oreg.flags & X86_EFLAGS_CF)
return -1; /* No APM BIOS */
- if (bx != 0x504d) /* "PM" signature */
+ if (oreg.bx != 0x504d) /* "PM" signature */
return -1;
- if (!(cx & 0x02)) /* 32 bits supported? */
+ if (!(oreg.cx & 0x02)) /* 32 bits supported? */
return -1;
/* Disconnect first, just in case */
- ax = 0x5304;
- bx = 0;
- asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp"
- : "+a" (ax), "+b" (bx)
- : : "ecx", "edx", "esi", "edi");
-
- /* Paranoia */
- ebx = esi = 0;
- cx = dx = di = 0;
+ ireg.al = 0x04;
+ intcall(0x15, &ireg, NULL);
/* 32-bit connect */
- asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp ; setc %6"
- : "=a" (ax), "+b" (ebx), "+c" (cx), "+d" (dx),
- "+S" (esi), "+D" (di), "=m" (err)
- : "a" (0x5303));
-
- boot_params.apm_bios_info.cseg = ax;
- boot_params.apm_bios_info.offset = ebx;
- boot_params.apm_bios_info.cseg_16 = cx;
- boot_params.apm_bios_info.dseg = dx;
- boot_params.apm_bios_info.cseg_len = (u16)esi;
- boot_params.apm_bios_info.cseg_16_len = esi >> 16;
- boot_params.apm_bios_info.dseg_len = di;
-
- if (err)
+ ireg.al = 0x03;
+ intcall(0x15, &ireg, &oreg);
+
+ boot_params.apm_bios_info.cseg = oreg.ax;
+ boot_params.apm_bios_info.offset = oreg.ebx;
+ boot_params.apm_bios_info.cseg_16 = oreg.cx;
+ boot_params.apm_bios_info.dseg = oreg.dx;
+ boot_params.apm_bios_info.cseg_len = oreg.si;
+ boot_params.apm_bios_info.cseg_16_len = oreg.hsi;
+ boot_params.apm_bios_info.dseg_len = oreg.di;
+
+ if (oreg.flags & X86_EFLAGS_CF)
return -1;
/* Redo the installation check as the 32-bit connect;
some BIOSes return different flags this way... */
- ax = 0x5300;
- bx = cx = 0;
- asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp ; setc %0"
- : "=d" (err), "+a" (ax), "+b" (bx), "+c" (cx)
- : : "esi", "edi");
+ ireg.al = 0x00;
+ intcall(0x15, &ireg, &oreg);
- if (err || bx != 0x504d) {
+ if ((oreg.eflags & X86_EFLAGS_CF) || oreg.bx != 0x504d) {
/* Failure with 32-bit connect, try to disconect and ignore */
- ax = 0x5304;
- bx = 0;
- asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp"
- : "+a" (ax), "+b" (bx)
- : : "ecx", "edx", "esi", "edi");
+ ireg.al = 0x04;
+ intcall(0x15, &ireg, NULL);
return -1;
}
- boot_params.apm_bios_info.version = ax;
- boot_params.apm_bios_info.flags = cx;
+ boot_params.apm_bios_info.version = oreg.ax;
+ boot_params.apm_bios_info.flags = oreg.cx;
return 0;
}
diff --git a/arch/x86/boot/bioscall.S b/arch/x86/boot/bioscall.S
new file mode 100644
index 00000000000..507793739ea
--- /dev/null
+++ b/arch/x86/boot/bioscall.S
@@ -0,0 +1,82 @@
+/* -----------------------------------------------------------------------
+ *
+ * Copyright 2009 Intel Corporation; author H. Peter Anvin
+ *
+ * This file is part of the Linux kernel, and is made available under
+ * the terms of the GNU General Public License version 2 or (at your
+ * option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * "Glove box" for BIOS calls. Avoids the constant problems with BIOSes
+ * touching registers they shouldn't be.
+ */
+
+ .code16
+ .text
+ .globl intcall
+ .type intcall, @function
+intcall:
+ /* Self-modify the INT instruction. Ugly, but works. */
+ cmpb %al, 3f
+ je 1f
+ movb %al, 3f
+ jmp 1f /* Synchronize pipeline */
+1:
+ /* Save state */
+ pushfl
+ pushw %fs
+ pushw %gs
+ pushal
+
+ /* Copy input state to stack frame */
+ subw $44, %sp
+ movw %dx, %si
+ movw %sp, %di
+ movw $11, %cx
+ rep; movsd
+
+ /* Pop full state from the stack */
+ popal
+ popw %gs
+ popw %fs
+ popw %es
+ popw %ds
+ popfl
+
+ /* Actual INT */
+ .byte 0xcd /* INT opcode */
+3: .byte 0
+
+ /* Push full state to the stack */
+ pushfl
+ pushw %ds
+ pushw %es
+ pushw %fs
+ pushw %gs
+ pushal
+
+ /* Re-establish C environment invariants */
+ cld
+ movzwl %sp, %esp
+ movw %cs, %ax
+ movw %ax, %ds
+ movw %ax, %es
+
+ /* Copy output state from stack frame */
+ movw 68(%esp), %di /* Original %cx == 3rd argument */
+ andw %di, %di
+ jz 4f
+ movw %sp, %si
+ movw $11, %cx
+ rep; movsd
+4: addw $44, %sp
+
+ /* Restore state and return */
+ popal
+ popw %gs
+ popw %fs
+ popfl
+ retl
+ .size intcall, .-intcall
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index 7b2692e897e..98239d2658f 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -2,6 +2,7 @@
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
+ * Copyright 2009 Intel Corporation; author H. Peter Anvin
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
@@ -26,6 +27,7 @@
#include <asm/setup.h>
#include "bitops.h"
#include <asm/cpufeature.h>
+#include <asm/processor-flags.h>
/* Useful macros */
#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
@@ -241,6 +243,49 @@ int enable_a20(void);
/* apm.c */
int query_apm_bios(void);
+/* bioscall.c */
+struct biosregs {
+ union {
+ struct {
+ u32 edi;
+ u32 esi;
+ u32 ebp;
+ u32 _esp;
+ u32 ebx;
+ u32 edx;
+ u32 ecx;
+ u32 eax;
+ u32 _fsgs;
+ u32 _dses;
+ u32 eflags;
+ };
+ struct {
+ u16 di, hdi;
+ u16 si, hsi;
+ u16 bp, hbp;
+ u16 _sp, _hsp;
+ u16 bx, hbx;
+ u16 dx, hdx;
+ u16 cx, hcx;
+ u16 ax, hax;
+ u16 gs, fs;
+ u16 es, ds;
+ u16 flags, hflags;
+ };
+ struct {
+ u8 dil, dih, edi2, edi3;
+ u8 sil, sih, esi2, esi3;
+ u8 bpl, bph, ebp2, ebp3;
+ u8 _spl, _sph, _esp2, _esp3;
+ u8 bl, bh, ebx2, ebx3;
+ u8 dl, dh, edx2, edx3;
+ u8 cl, ch, ecx2, ecx3;
+ u8 al, ah, eax2, eax3;
+ };
+ };
+};
+void intcall(u8 int_no, const struct biosregs *ireg, struct biosregs *oreg);
+
/* cmdline.c */
int cmdline_find_option(const char *option, char *buffer, int bufsize);
int cmdline_find_option_bool(const char *option);
@@ -279,6 +324,9 @@ int sprintf(char *buf, const char *fmt, ...);
int vsprintf(char *buf, const char *fmt, va_list args);
int printf(const char *fmt, ...);
+/* regs.c */
+void initregs(struct biosregs *regs);
+
/* string.c */
int strcmp(const char *str1, const char *str2);
size_t strnlen(const char *s, size_t maxlen);
diff --git a/arch/x86/boot/compressed/.gitignore b/arch/x86/boot/compressed/.gitignore
index 63eff3b04d0..4a46fab7162 100644
--- a/arch/x86/boot/compressed/.gitignore
+++ b/arch/x86/boot/compressed/.gitignore
@@ -1,3 +1,6 @@
relocs
vmlinux.bin.all
vmlinux.relocs
+vmlinux.lds
+mkpiggy
+piggy.S
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 65551c9f857..49c8a4c37d7 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -19,7 +19,9 @@ KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
LDFLAGS := -m elf_$(UTS_MACHINE)
LDFLAGS_vmlinux := -T
-$(obj)/vmlinux: $(src)/vmlinux_$(BITS).lds $(obj)/head_$(BITS).o $(obj)/misc.o $(obj)/piggy.o FORCE
+hostprogs-y := mkpiggy
+
+$(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o $(obj)/piggy.o FORCE
$(call if_changed,ld)
@:
@@ -29,7 +31,7 @@ $(obj)/vmlinux.bin: vmlinux FORCE
targets += vmlinux.bin.all vmlinux.relocs relocs
-hostprogs-$(CONFIG_X86_32) += relocs
+hostprogs-$(CONFIG_X86_NEED_RELOCS) += relocs
quiet_cmd_relocs = RELOCS $@
cmd_relocs = $(obj)/relocs $< > $@;$(obj)/relocs --abs-relocs $<
@@ -37,46 +39,22 @@ $(obj)/vmlinux.relocs: vmlinux $(obj)/relocs FORCE
$(call if_changed,relocs)
vmlinux.bin.all-y := $(obj)/vmlinux.bin
-vmlinux.bin.all-$(CONFIG_RELOCATABLE) += $(obj)/vmlinux.relocs
-quiet_cmd_relocbin = BUILD $@
- cmd_relocbin = cat $(filter-out FORCE,$^) > $@
-$(obj)/vmlinux.bin.all: $(vmlinux.bin.all-y) FORCE
- $(call if_changed,relocbin)
-
-ifeq ($(CONFIG_X86_32),y)
+vmlinux.bin.all-$(CONFIG_X86_NEED_RELOCS) += $(obj)/vmlinux.relocs
-ifdef CONFIG_RELOCATABLE
-$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin.all FORCE
- $(call if_changed,gzip)
-$(obj)/vmlinux.bin.bz2: $(obj)/vmlinux.bin.all FORCE
- $(call if_changed,bzip2)
-$(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin.all FORCE
- $(call if_changed,lzma)
-else
-$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
+$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) FORCE
$(call if_changed,gzip)
-$(obj)/vmlinux.bin.bz2: $(obj)/vmlinux.bin FORCE
+$(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y) FORCE
$(call if_changed,bzip2)
-$(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin FORCE
+$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y) FORCE
$(call if_changed,lzma)
-endif
-LDFLAGS_piggy.o := -r --format binary --oformat elf32-i386 -T
-else
+suffix-$(CONFIG_KERNEL_GZIP) := gz
+suffix-$(CONFIG_KERNEL_BZIP2) := bz2
+suffix-$(CONFIG_KERNEL_LZMA) := lzma
-$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
- $(call if_changed,gzip)
-$(obj)/vmlinux.bin.bz2: $(obj)/vmlinux.bin FORCE
- $(call if_changed,bzip2)
-$(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin FORCE
- $(call if_changed,lzma)
-
-LDFLAGS_piggy.o := -r --format binary --oformat elf64-x86-64 -T
-endif
+quiet_cmd_mkpiggy = MKPIGGY $@
+ cmd_mkpiggy = $(obj)/mkpiggy $< > $@ || ( rm -f $@ ; false )
-suffix_$(CONFIG_KERNEL_GZIP) = gz
-suffix_$(CONFIG_KERNEL_BZIP2) = bz2
-suffix_$(CONFIG_KERNEL_LZMA) = lzma
-
-$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.$(suffix_y) FORCE
- $(call if_changed,ld)
+targets += piggy.S
+$(obj)/piggy.S: $(obj)/vmlinux.bin.$(suffix-y) $(obj)/mkpiggy FORCE
+ $(call if_changed,mkpiggy)
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 3a8a866fb2e..75e4f001e70 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -12,16 +12,16 @@
* the page directory. [According to comments etc elsewhere on a compressed
* kernel it will end up at 0x1000 + 1Mb I hope so as I assume this. - AC]
*
- * Page 0 is deliberately kept safe, since System Management Mode code in
+ * Page 0 is deliberately kept safe, since System Management Mode code in
* laptops may need to access the BIOS data stored there. This is also
- * useful for future device drivers that either access the BIOS via VM86
+ * useful for future device drivers that either access the BIOS via VM86
* mode.
*/
/*
* High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
*/
-.text
+ .text
#include <linux/linkage.h>
#include <asm/segment.h>
@@ -29,161 +29,151 @@
#include <asm/boot.h>
#include <asm/asm-offsets.h>
-.section ".text.head","ax",@progbits
+ .section ".text.head","ax",@progbits
ENTRY(startup_32)
cld
- /* test KEEP_SEGMENTS flag to see if the bootloader is asking
- * us to not reload segments */
- testb $(1<<6), BP_loadflags(%esi)
- jnz 1f
+ /*
+ * Test KEEP_SEGMENTS flag to see if the bootloader is asking
+ * us to not reload segments
+ */
+ testb $(1<<6), BP_loadflags(%esi)
+ jnz 1f
cli
- movl $(__BOOT_DS),%eax
- movl %eax,%ds
- movl %eax,%es
- movl %eax,%fs
- movl %eax,%gs
- movl %eax,%ss
+ movl $__BOOT_DS, %eax
+ movl %eax, %ds
+ movl %eax, %es
+ movl %eax, %fs
+ movl %eax, %gs
+ movl %eax, %ss
1:
-/* Calculate the delta between where we were compiled to run
+/*
+ * Calculate the delta between where we were compiled to run
* at and where we were actually loaded at. This can only be done
* with a short local call on x86. Nothing else will tell us what
* address we are running at. The reserved chunk of the real-mode
* data at 0x1e4 (defined as a scratch field) are used as the stack
* for this calculation. Only 4 bytes are needed.
*/
- leal (0x1e4+4)(%esi), %esp
- call 1f
-1: popl %ebp
- subl $1b, %ebp
+ leal (BP_scratch+4)(%esi), %esp
+ call 1f
+1: popl %ebp
+ subl $1b, %ebp
-/* %ebp contains the address we are loaded at by the boot loader and %ebx
+/*
+ * %ebp contains the address we are loaded at by the boot loader and %ebx
* contains the address where we should move the kernel image temporarily
* for safe in-place decompression.
*/
#ifdef CONFIG_RELOCATABLE
- movl %ebp, %ebx
- addl $(CONFIG_PHYSICAL_ALIGN - 1), %ebx
- andl $(~(CONFIG_PHYSICAL_ALIGN - 1)), %ebx
+ movl %ebp, %ebx
+ movl BP_kernel_alignment(%esi), %eax
+ decl %eax
+ addl %eax, %ebx
+ notl %eax
+ andl %eax, %ebx
#else
- movl $LOAD_PHYSICAL_ADDR, %ebx
+ movl $LOAD_PHYSICAL_ADDR, %ebx
#endif
- /* Replace the compressed data size with the uncompressed size */
- subl input_len(%ebp), %ebx
- movl output_len(%ebp), %eax
- addl %eax, %ebx
- /* Add 8 bytes for every 32K input block */
- shrl $12, %eax
- addl %eax, %ebx
- /* Add 32K + 18 bytes of extra slack */
- addl $(32768 + 18), %ebx
- /* Align on a 4K boundary */
- addl $4095, %ebx
- andl $~4095, %ebx
-
-/* Copy the compressed kernel to the end of our buffer
+ /* Target address to relocate to for decompression */
+ addl $z_extract_offset, %ebx
+
+ /* Set up the stack */
+ leal boot_stack_end(%ebx), %esp
+
+ /* Zero EFLAGS */
+ pushl $0
+ popfl
+
+/*
+ * Copy the compressed kernel to the end of our buffer
* where decompression in place becomes safe.
*/
- pushl %esi
- leal _end(%ebp), %esi
- leal _end(%ebx), %edi
- movl $(_end - startup_32), %ecx
+ pushl %esi
+ leal (_bss-4)(%ebp), %esi
+ leal (_bss-4)(%ebx), %edi
+ movl $(_bss - startup_32), %ecx
+ shrl $2, %ecx
std
- rep
- movsb
+ rep movsl
cld
- popl %esi
-
-/* Compute the kernel start address.
- */
-#ifdef CONFIG_RELOCATABLE
- addl $(CONFIG_PHYSICAL_ALIGN - 1), %ebp
- andl $(~(CONFIG_PHYSICAL_ALIGN - 1)), %ebp
-#else
- movl $LOAD_PHYSICAL_ADDR, %ebp
-#endif
+ popl %esi
/*
* Jump to the relocated address.
*/
- leal relocated(%ebx), %eax
- jmp *%eax
+ leal relocated(%ebx), %eax
+ jmp *%eax
ENDPROC(startup_32)
-.section ".text"
+ .text
relocated:
/*
- * Clear BSS
- */
- xorl %eax,%eax
- leal _edata(%ebx),%edi
- leal _end(%ebx), %ecx
- subl %edi,%ecx
- cld
- rep
- stosb
-
-/*
- * Setup the stack for the decompressor
+ * Clear BSS (stack is currently empty)
*/
- leal boot_stack_end(%ebx), %esp
+ xorl %eax, %eax
+ leal _bss(%ebx), %edi
+ leal _ebss(%ebx), %ecx
+ subl %edi, %ecx
+ shrl $2, %ecx
+ rep stosl
/*
* Do the decompression, and jump to the new kernel..
*/
- movl output_len(%ebx), %eax
- pushl %eax
- # push arguments for decompress_kernel:
- pushl %ebp # output address
- movl input_len(%ebx), %eax
- pushl %eax # input_len
- leal input_data(%ebx), %eax
- pushl %eax # input_data
- leal boot_heap(%ebx), %eax
- pushl %eax # heap area
- pushl %esi # real mode pointer
- call decompress_kernel
- addl $20, %esp
- popl %ecx
+ leal z_extract_offset_negative(%ebx), %ebp
+ /* push arguments for decompress_kernel: */
+ pushl %ebp /* output address */
+ pushl $z_input_len /* input_len */
+ leal input_data(%ebx), %eax
+ pushl %eax /* input_data */
+ leal boot_heap(%ebx), %eax
+ pushl %eax /* heap area */
+ pushl %esi /* real mode pointer */
+ call decompress_kernel
+ addl $20, %esp
#if CONFIG_RELOCATABLE
-/* Find the address of the relocations.
+/*
+ * Find the address of the relocations.
*/
- movl %ebp, %edi
- addl %ecx, %edi
+ leal z_output_len(%ebp), %edi
-/* Calculate the delta between where vmlinux was compiled to run
+/*
+ * Calculate the delta between where vmlinux was compiled to run
* and where it was actually loaded.
*/
- movl %ebp, %ebx
- subl $LOAD_PHYSICAL_ADDR, %ebx
- jz 2f /* Nothing to be done if loaded at compiled addr. */
+ movl %ebp, %ebx
+ subl $LOAD_PHYSICAL_ADDR, %ebx
+ jz 2f /* Nothing to be done if loaded at compiled addr. */
/*
* Process relocations.
*/
-1: subl $4, %edi
- movl 0(%edi), %ecx
- testl %ecx, %ecx
- jz 2f
- addl %ebx, -__PAGE_OFFSET(%ebx, %ecx)
- jmp 1b
+1: subl $4, %edi
+ movl (%edi), %ecx
+ testl %ecx, %ecx
+ jz 2f
+ addl %ebx, -__PAGE_OFFSET(%ebx, %ecx)
+ jmp 1b
2:
#endif
/*
* Jump to the decompressed kernel.
*/
- xorl %ebx,%ebx
- jmp *%ebp
+ xorl %ebx, %ebx
+ jmp *%ebp
-.bss
-/* Stack and heap for uncompression */
-.balign 4
+/*
+ * Stack and heap for uncompression
+ */
+ .bss
+ .balign 4
boot_heap:
.fill BOOT_HEAP_SIZE, 1, 0
boot_stack:
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index ed4a8294800..f62c284db9e 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -21,8 +21,8 @@
/*
* High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
*/
-.code32
-.text
+ .code32
+ .text
#include <linux/linkage.h>
#include <asm/segment.h>
@@ -33,12 +33,14 @@
#include <asm/processor-flags.h>
#include <asm/asm-offsets.h>
-.section ".text.head"
+ .section ".text.head"
.code32
ENTRY(startup_32)
cld
- /* test KEEP_SEGMENTS flag to see if the bootloader is asking
- * us to not reload segments */
+ /*
+ * Test KEEP_SEGMENTS flag to see if the bootloader is asking
+ * us to not reload segments
+ */
testb $(1<<6), BP_loadflags(%esi)
jnz 1f
@@ -49,14 +51,15 @@ ENTRY(startup_32)
movl %eax, %ss
1:
-/* Calculate the delta between where we were compiled to run
+/*
+ * Calculate the delta between where we were compiled to run
* at and where we were actually loaded at. This can only be done
* with a short local call on x86. Nothing else will tell us what
* address we are running at. The reserved chunk of the real-mode
* data at 0x1e4 (defined as a scratch field) are used as the stack
* for this calculation. Only 4 bytes are needed.
*/
- leal (0x1e4+4)(%esi), %esp
+ leal (BP_scratch+4)(%esi), %esp
call 1f
1: popl %ebp
subl $1b, %ebp
@@ -70,32 +73,28 @@ ENTRY(startup_32)
testl %eax, %eax
jnz no_longmode
-/* Compute the delta between where we were compiled to run at
+/*
+ * Compute the delta between where we were compiled to run at
* and where the code will actually run at.
- */
-/* %ebp contains the address we are loaded at by the boot loader and %ebx
+ *
+ * %ebp contains the address we are loaded at by the boot loader and %ebx
* contains the address where we should move the kernel image temporarily
* for safe in-place decompression.
*/
#ifdef CONFIG_RELOCATABLE
movl %ebp, %ebx
- addl $(PMD_PAGE_SIZE -1), %ebx
- andl $PMD_PAGE_MASK, %ebx
+ movl BP_kernel_alignment(%esi), %eax
+ decl %eax
+ addl %eax, %ebx
+ notl %eax
+ andl %eax, %ebx
#else
- movl $CONFIG_PHYSICAL_START, %ebx
+ movl $LOAD_PHYSICAL_ADDR, %ebx
#endif
- /* Replace the compressed data size with the uncompressed size */
- subl input_len(%ebp), %ebx
- movl output_len(%ebp), %eax
- addl %eax, %ebx
- /* Add 8 bytes for every 32K input block */
- shrl $12, %eax
- addl %eax, %ebx
- /* Add 32K + 18 bytes of extra slack and align on a 4K boundary */
- addl $(32768 + 18 + 4095), %ebx
- andl $~4095, %ebx
+ /* Target address to relocate to for decompression */
+ addl $z_extract_offset, %ebx
/*
* Prepare for entering 64 bit mode
@@ -114,7 +113,7 @@ ENTRY(startup_32)
/*
* Build early 4G boot pagetable
*/
- /* Initialize Page tables to 0*/
+ /* Initialize Page tables to 0 */
leal pgtable(%ebx), %edi
xorl %eax, %eax
movl $((4096*6)/4), %ecx
@@ -155,7 +154,8 @@ ENTRY(startup_32)
btsl $_EFER_LME, %eax
wrmsr
- /* Setup for the jump to 64bit mode
+ /*
+ * Setup for the jump to 64bit mode
*
* When the jump is performend we will be in long mode but
* in 32bit compatibility mode with EFER.LME = 1, CS.L = 0, CS.D = 1
@@ -184,7 +184,8 @@ no_longmode:
#include "../../kernel/verify_cpu_64.S"
- /* Be careful here startup_64 needs to be at a predictable
+ /*
+ * Be careful here startup_64 needs to be at a predictable
* address so I can export it in an ELF header. Bootloaders
* should look at the ELF header to find this address, as
* it may change in the future.
@@ -192,7 +193,8 @@ no_longmode:
.code64
.org 0x200
ENTRY(startup_64)
- /* We come here either from startup_32 or directly from a
+ /*
+ * We come here either from startup_32 or directly from a
* 64bit bootloader. If we come here from a bootloader we depend on
* an identity mapped page table being provied that maps our
* entire text+data+bss and hopefully all of memory.
@@ -209,50 +211,54 @@ ENTRY(startup_64)
movl $0x20, %eax
ltr %ax
- /* Compute the decompressed kernel start address. It is where
+ /*
+ * Compute the decompressed kernel start address. It is where
* we were loaded at aligned to a 2M boundary. %rbp contains the
* decompressed kernel start address.
*
* If it is a relocatable kernel then decompress and run the kernel
* from load address aligned to 2MB addr, otherwise decompress and
- * run the kernel from CONFIG_PHYSICAL_START
+ * run the kernel from LOAD_PHYSICAL_ADDR
+ *
+ * We cannot rely on the calculation done in 32-bit mode, since we
+ * may have been invoked via the 64-bit entry point.
*/
/* Start with the delta to where the kernel will run at. */
#ifdef CONFIG_RELOCATABLE
leaq startup_32(%rip) /* - $startup_32 */, %rbp
- addq $(PMD_PAGE_SIZE - 1), %rbp
- andq $PMD_PAGE_MASK, %rbp
- movq %rbp, %rbx
+ movl BP_kernel_alignment(%rsi), %eax
+ decl %eax
+ addq %rax, %rbp
+ notq %rax
+ andq %rax, %rbp
#else
- movq $CONFIG_PHYSICAL_START, %rbp
- movq %rbp, %rbx
+ movq $LOAD_PHYSICAL_ADDR, %rbp
#endif
- /* Replace the compressed data size with the uncompressed size */
- movl input_len(%rip), %eax
- subq %rax, %rbx
- movl output_len(%rip), %eax
- addq %rax, %rbx
- /* Add 8 bytes for every 32K input block */
- shrq $12, %rax
- addq %rax, %rbx
- /* Add 32K + 18 bytes of extra slack and align on a 4K boundary */
- addq $(32768 + 18 + 4095), %rbx
- andq $~4095, %rbx
-
-/* Copy the compressed kernel to the end of our buffer
+ /* Target address to relocate to for decompression */
+ leaq z_extract_offset(%rbp), %rbx
+
+ /* Set up the stack */
+ leaq boot_stack_end(%rbx), %rsp
+
+ /* Zero EFLAGS */
+ pushq $0
+ popfq
+
+/*
+ * Copy the compressed kernel to the end of our buffer
* where decompression in place becomes safe.
*/
- leaq _end_before_pgt(%rip), %r8
- leaq _end_before_pgt(%rbx), %r9
- movq $_end_before_pgt /* - $startup_32 */, %rcx
-1: subq $8, %r8
- subq $8, %r9
- movq 0(%r8), %rax
- movq %rax, 0(%r9)
- subq $8, %rcx
- jnz 1b
+ pushq %rsi
+ leaq (_bss-8)(%rip), %rsi
+ leaq (_bss-8)(%rbx), %rdi
+ movq $_bss /* - $startup_32 */, %rcx
+ shrq $3, %rcx
+ std
+ rep movsq
+ cld
+ popq %rsi
/*
* Jump to the relocated address.
@@ -260,37 +266,28 @@ ENTRY(startup_64)
leaq relocated(%rbx), %rax
jmp *%rax
-.section ".text"
+ .text
relocated:
/*
- * Clear BSS
+ * Clear BSS (stack is currently empty)
*/
- xorq %rax, %rax
- leaq _edata(%rbx), %rdi
- leaq _end_before_pgt(%rbx), %rcx
+ xorl %eax, %eax
+ leaq _bss(%rip), %rdi
+ leaq _ebss(%rip), %rcx
subq %rdi, %rcx
- cld
- rep
- stosb
-
- /* Setup the stack */
- leaq boot_stack_end(%rip), %rsp
-
- /* zero EFLAGS after setting rsp */
- pushq $0
- popfq
+ shrq $3, %rcx
+ rep stosq
/*
* Do the decompression, and jump to the new kernel..
*/
- pushq %rsi # Save the real mode argument
- movq %rsi, %rdi # real mode address
- leaq boot_heap(%rip), %rsi # malloc area for uncompression
- leaq input_data(%rip), %rdx # input_data
- movl input_len(%rip), %eax
- movq %rax, %rcx # input_len
- movq %rbp, %r8 # output
+ pushq %rsi /* Save the real mode argument */
+ movq %rsi, %rdi /* real mode address */
+ leaq boot_heap(%rip), %rsi /* malloc area for uncompression */
+ leaq input_data(%rip), %rdx /* input_data */
+ movl $z_input_len, %ecx /* input_len */
+ movq %rbp, %r8 /* output target address */
call decompress_kernel
popq %rsi
@@ -311,11 +308,21 @@ gdt:
.quad 0x0000000000000000 /* TS continued */
gdt_end:
-.bss
-/* Stack and heap for uncompression */
-.balign 4
+/*
+ * Stack and heap for uncompression
+ */
+ .bss
+ .balign 4
boot_heap:
.fill BOOT_HEAP_SIZE, 1, 0
boot_stack:
.fill BOOT_STACK_SIZE, 1, 0
boot_stack_end:
+
+/*
+ * Space for page tables (not in .bss so not zeroed)
+ */
+ .section ".pgtable","a",@nobits
+ .balign 4096
+pgtable:
+ .fill 6*4096, 1, 0
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index e45be73684f..842b2a36174 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -325,21 +325,19 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap,
free_mem_ptr = heap; /* Heap */
free_mem_end_ptr = heap + BOOT_HEAP_SIZE;
+ if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1))
+ error("Destination address inappropriately aligned");
#ifdef CONFIG_X86_64
- if ((unsigned long)output & (__KERNEL_ALIGN - 1))
- error("Destination address not 2M aligned");
- if ((unsigned long)output >= 0xffffffffffUL)
+ if (heap > 0x3fffffffffffUL)
error("Destination address too large");
#else
- if ((u32)output & (CONFIG_PHYSICAL_ALIGN - 1))
- error("Destination address not CONFIG_PHYSICAL_ALIGN aligned");
if (heap > ((-__PAGE_OFFSET-(512<<20)-1) & 0x7fffffff))
error("Destination address too large");
+#endif
#ifndef CONFIG_RELOCATABLE
- if ((u32)output != LOAD_PHYSICAL_ADDR)
+ if ((unsigned long)output != LOAD_PHYSICAL_ADDR)
error("Wrong destination address");
#endif
-#endif
if (!quiet)
putstr("\nDecompressing Linux... ");
diff --git a/arch/x86/boot/compressed/mkpiggy.c b/arch/x86/boot/compressed/mkpiggy.c
new file mode 100644
index 00000000000..bcbd36c4143
--- /dev/null
+++ b/arch/x86/boot/compressed/mkpiggy.c
@@ -0,0 +1,97 @@
+/* ----------------------------------------------------------------------- *
+ *
+ * Copyright (C) 2009 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ *
+ * H. Peter Anvin <hpa@linux.intel.com>
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * Compute the desired load offset from a compressed program; outputs
+ * a small assembly wrapper with the appropriate symbols defined.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+
+static uint32_t getle32(const void *p)
+{
+ const uint8_t *cp = p;
+
+ return (uint32_t)cp[0] + ((uint32_t)cp[1] << 8) +
+ ((uint32_t)cp[2] << 16) + ((uint32_t)cp[3] << 24);
+}
+
+int main(int argc, char *argv[])
+{
+ uint32_t olen;
+ long ilen;
+ unsigned long offs;
+ FILE *f;
+
+ if (argc < 2) {
+ fprintf(stderr, "Usage: %s compressed_file\n", argv[0]);
+ return 1;
+ }
+
+ /* Get the information for the compressed kernel image first */
+
+ f = fopen(argv[1], "r");
+ if (!f) {
+ perror(argv[1]);
+ return 1;
+ }
+
+
+ if (fseek(f, -4L, SEEK_END)) {
+ perror(argv[1]);
+ }
+ fread(&olen, sizeof olen, 1, f);
+ ilen = ftell(f);
+ olen = getle32(&olen);
+ fclose(f);
+
+ /*
+ * Now we have the input (compressed) and output (uncompressed)
+ * sizes, compute the necessary decompression offset...
+ */
+
+ offs = (olen > ilen) ? olen - ilen : 0;
+ offs += olen >> 12; /* Add 8 bytes for each 32K block */
+ offs += 32*1024 + 18; /* Add 32K + 18 bytes slack */
+ offs = (offs+4095) & ~4095; /* Round to a 4K boundary */
+
+ printf(".section \".rodata.compressed\",\"a\",@progbits\n");
+ printf(".globl z_input_len\n");
+ printf("z_input_len = %lu\n", ilen);
+ printf(".globl z_output_len\n");
+ printf("z_output_len = %lu\n", (unsigned long)olen);
+ printf(".globl z_extract_offset\n");
+ printf("z_extract_offset = 0x%lx\n", offs);
+ /* z_extract_offset_negative allows simplification of head_32.S */
+ printf(".globl z_extract_offset_negative\n");
+ printf("z_extract_offset_negative = -0x%lx\n", offs);
+
+ printf(".globl input_data, input_data_end\n");
+ printf("input_data:\n");
+ printf(".incbin \"%s\"\n", argv[1]);
+ printf("input_data_end:\n");
+
+ return 0;
+}
diff --git a/arch/x86/boot/compressed/vmlinux_64.lds b/arch/x86/boot/compressed/vmlinux.lds.S
index bef1ac891bc..cc353e1b3ff 100644
--- a/arch/x86/boot/compressed/vmlinux_64.lds
+++ b/arch/x86/boot/compressed/vmlinux.lds.S
@@ -1,6 +1,17 @@
-OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64")
+OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT)
+
+#undef i386
+
+#include <asm/page_types.h>
+
+#ifdef CONFIG_X86_64
OUTPUT_ARCH(i386:x86-64)
ENTRY(startup_64)
+#else
+OUTPUT_ARCH(i386)
+ENTRY(startup_32)
+#endif
+
SECTIONS
{
/* Be careful parts of head_64.S assume startup_32 is at
@@ -33,16 +44,22 @@ SECTIONS
*(.data.*)
_edata = . ;
}
+ . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
.bss : {
_bss = . ;
*(.bss)
*(.bss.*)
*(COMMON)
- . = ALIGN(8);
- _end_before_pgt = . ;
- . = ALIGN(4096);
- pgtable = . ;
- . = . + 4096 * 6;
+ . = ALIGN(8); /* For convenience during zeroing */
_ebss = .;
}
+#ifdef CONFIG_X86_64
+ . = ALIGN(PAGE_SIZE);
+ .pgtable : {
+ _pgtable = . ;
+ *(.pgtable)
+ _epgtable = . ;
+ }
+#endif
+ _end = .;
}
diff --git a/arch/x86/boot/compressed/vmlinux.scr b/arch/x86/boot/compressed/vmlinux.scr
deleted file mode 100644
index f02382ae5c4..00000000000
--- a/arch/x86/boot/compressed/vmlinux.scr
+++ /dev/null
@@ -1,10 +0,0 @@
-SECTIONS
-{
- .rodata.compressed : {
- input_len = .;
- LONG(input_data_end - input_data) input_data = .;
- *(.data)
- output_len = . - 4;
- input_data_end = .;
- }
-}
diff --git a/arch/x86/boot/compressed/vmlinux_32.lds b/arch/x86/boot/compressed/vmlinux_32.lds
deleted file mode 100644
index bb3c48379c4..00000000000
--- a/arch/x86/boot/compressed/vmlinux_32.lds
+++ /dev/null
@@ -1,43 +0,0 @@
-OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
-OUTPUT_ARCH(i386)
-ENTRY(startup_32)
-SECTIONS
-{
- /* Be careful parts of head_32.S assume startup_32 is at
- * address 0.
- */
- . = 0;
- .text.head : {
- _head = . ;
- *(.text.head)
- _ehead = . ;
- }
- .rodata.compressed : {
- *(.rodata.compressed)
- }
- .text : {
- _text = .; /* Text */
- *(.text)
- *(.text.*)
- _etext = . ;
- }
- .rodata : {
- _rodata = . ;
- *(.rodata) /* read-only data */
- *(.rodata.*)
- _erodata = . ;
- }
- .data : {
- _data = . ;
- *(.data)
- *(.data.*)
- _edata = . ;
- }
- .bss : {
- _bss = . ;
- *(.bss)
- *(.bss.*)
- *(COMMON)
- _end = . ;
- }
-}
diff --git a/arch/x86/boot/edd.c b/arch/x86/boot/edd.c
index 1aae8f3e5ca..c501a5b466f 100644
--- a/arch/x86/boot/edd.c
+++ b/arch/x86/boot/edd.c
@@ -2,6 +2,7 @@
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
+ * Copyright 2009 Intel Corporation; author H. Peter Anvin
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
@@ -22,17 +23,17 @@
*/
static int read_mbr(u8 devno, void *buf)
{
- u16 ax, bx, cx, dx;
+ struct biosregs ireg, oreg;
- ax = 0x0201; /* Legacy Read, one sector */
- cx = 0x0001; /* Sector 0-0-1 */
- dx = devno;
- bx = (size_t)buf;
- asm volatile("pushfl; stc; int $0x13; setc %%al; popfl"
- : "+a" (ax), "+c" (cx), "+d" (dx), "+b" (bx)
- : : "esi", "edi", "memory");
+ initregs(&ireg);
+ ireg.ax = 0x0201; /* Legacy Read, one sector */
+ ireg.cx = 0x0001; /* Sector 0-0-1 */
+ ireg.dl = devno;
+ ireg.bx = (size_t)buf;
- return -(u8)ax; /* 0 or -1 */
+ intcall(0x13, &ireg, &oreg);
+
+ return -(oreg.eflags & X86_EFLAGS_CF); /* 0 or -1 */
}
static u32 read_mbr_sig(u8 devno, struct edd_info *ei, u32 *mbrsig)
@@ -72,56 +73,46 @@ static u32 read_mbr_sig(u8 devno, struct edd_info *ei, u32 *mbrsig)
static int get_edd_info(u8 devno, struct edd_info *ei)
{
- u16 ax, bx, cx, dx, di;
+ struct biosregs ireg, oreg;
memset(ei, 0, sizeof *ei);
/* Check Extensions Present */
- ax = 0x4100;
- bx = EDDMAGIC1;
- dx = devno;
- asm("pushfl; stc; int $0x13; setc %%al; popfl"
- : "+a" (ax), "+b" (bx), "=c" (cx), "+d" (dx)
- : : "esi", "edi");
+ initregs(&ireg);
+ ireg.ah = 0x41;
+ ireg.bx = EDDMAGIC1;
+ ireg.dl = devno;
+ intcall(0x13, &ireg, &oreg);
- if ((u8)ax)
+ if (oreg.eflags & X86_EFLAGS_CF)
return -1; /* No extended information */
- if (bx != EDDMAGIC2)
+ if (oreg.bx != EDDMAGIC2)
return -1;
ei->device = devno;
- ei->version = ax >> 8; /* EDD version number */
- ei->interface_support = cx; /* EDD functionality subsets */
+ ei->version = oreg.ah; /* EDD version number */
+ ei->interface_support = oreg.cx; /* EDD functionality subsets */
/* Extended Get Device Parameters */
ei->params.length = sizeof(ei->params);
- ax = 0x4800;
- dx = devno;
- asm("pushfl; int $0x13; popfl"
- : "+a" (ax), "+d" (dx), "=m" (ei->params)
- : "S" (&ei->params)
- : "ebx", "ecx", "edi");
+ ireg.ah = 0x48;
+ ireg.si = (size_t)&ei->params;
+ intcall(0x13, &ireg, &oreg);
/* Get legacy CHS parameters */
/* Ralf Brown recommends setting ES:DI to 0:0 */
- ax = 0x0800;
- dx = devno;
- di = 0;
- asm("pushw %%es; "
- "movw %%di,%%es; "
- "pushfl; stc; int $0x13; setc %%al; popfl; "
- "popw %%es"
- : "+a" (ax), "=b" (bx), "=c" (cx), "+d" (dx), "+D" (di)
- : : "esi");
-
- if ((u8)ax == 0) {
- ei->legacy_max_cylinder = (cx >> 8) + ((cx & 0xc0) << 2);
- ei->legacy_max_head = dx >> 8;
- ei->legacy_sectors_per_track = cx & 0x3f;
+ ireg.ah = 0x08;
+ ireg.es = 0;
+ intcall(0x13, &ireg, &oreg);
+
+ if (!(oreg.eflags & X86_EFLAGS_CF)) {
+ ei->legacy_max_cylinder = oreg.ch + ((oreg.cl & 0xc0) << 2);
+ ei->legacy_max_head = oreg.dh;
+ ei->legacy_sectors_per_track = oreg.cl & 0x3f;
}
return 0;
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 5d84d1c74e4..b31cc54b464 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -22,7 +22,8 @@
#include <asm/page_types.h>
#include <asm/setup.h>
#include "boot.h"
-#include "offsets.h"
+#include "voffset.h"
+#include "zoffset.h"
BOOTSEG = 0x07C0 /* original address of boot-sector */
SYSSEG = 0x1000 /* historical load address >> 4 */
@@ -115,7 +116,7 @@ _start:
# Part 2 of the header, from the old setup.S
.ascii "HdrS" # header signature
- .word 0x0209 # header version number (>= 0x0105)
+ .word 0x020a # header version number (>= 0x0105)
# or else old loadlin-1.5 will fail)
.globl realmode_swtch
realmode_swtch: .word 0, 0 # default_switch, SETUPSEG
@@ -168,7 +169,11 @@ heap_end_ptr: .word _end+STACK_SIZE-512
# end of setup code can be used by setup
# for local heap purposes.
-pad1: .word 0
+ext_loader_ver:
+ .byte 0 # Extended boot loader version
+ext_loader_type:
+ .byte 0 # Extended boot loader type
+
cmd_line_ptr: .long 0 # (Header version 0x0202 or later)
# If nonzero, a 32-bit pointer
# to the kernel command line.
@@ -200,7 +205,7 @@ relocatable_kernel: .byte 1
#else
relocatable_kernel: .byte 0
#endif
-pad2: .byte 0
+min_alignment: .byte MIN_KERNEL_ALIGN_LG2 # minimum alignment
pad3: .word 0
cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line,
@@ -212,16 +217,27 @@ hardware_subarch: .long 0 # subarchitecture, added with 2.07
hardware_subarch_data: .quad 0
-payload_offset: .long input_data
-payload_length: .long input_data_end-input_data
+payload_offset: .long ZO_input_data
+payload_length: .long ZO_z_input_len
setup_data: .quad 0 # 64-bit physical pointer to
# single linked list of
# struct setup_data
+pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr
+
+#define ZO_INIT_SIZE (ZO__end - ZO_startup_32 + ZO_z_extract_offset)
+#define VO_INIT_SIZE (VO__end - VO__text)
+#if ZO_INIT_SIZE > VO_INIT_SIZE
+#define INIT_SIZE ZO_INIT_SIZE
+#else
+#define INIT_SIZE VO_INIT_SIZE
+#endif
+init_size: .long INIT_SIZE # kernel initialization size
+
# End of setup header #####################################################
- .section ".inittext", "ax"
+ .section ".entrytext", "ax"
start_of_setup:
#ifdef SAFE_RESET_DISK_CONTROLLER
# Reset the disk controller.
diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c
index 58f0415d3ae..140172b895b 100644
--- a/arch/x86/boot/main.c
+++ b/arch/x86/boot/main.c
@@ -2,6 +2,7 @@
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
+ * Copyright 2009 Intel Corporation; author H. Peter Anvin
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
@@ -61,11 +62,10 @@ static void copy_boot_params(void)
*/
static void keyboard_set_repeat(void)
{
- u16 ax = 0x0305;
- u16 bx = 0;
- asm volatile("int $0x16"
- : "+a" (ax), "+b" (bx)
- : : "ecx", "edx", "esi", "edi");
+ struct biosregs ireg;
+ initregs(&ireg);
+ ireg.ax = 0x0305;
+ intcall(0x16, &ireg, NULL);
}
/*
@@ -73,18 +73,22 @@ static void keyboard_set_repeat(void)
*/
static void query_ist(void)
{
+ struct biosregs ireg, oreg;
+
/* Some older BIOSes apparently crash on this call, so filter
it from machines too old to have SpeedStep at all. */
if (cpu.level < 6)
return;
- asm("int $0x15"
- : "=a" (boot_params.ist_info.signature),
- "=b" (boot_params.ist_info.command),
- "=c" (boot_params.ist_info.event),
- "=d" (boot_params.ist_info.perf_level)
- : "a" (0x0000e980), /* IST Support */
- "d" (0x47534943)); /* Request value */
+ initregs(&ireg);
+ ireg.ax = 0xe980; /* IST Support */
+ ireg.edx = 0x47534943; /* Request value */
+ intcall(0x15, &ireg, &oreg);
+
+ boot_params.ist_info.signature = oreg.eax;
+ boot_params.ist_info.command = oreg.ebx;
+ boot_params.ist_info.event = oreg.ecx;
+ boot_params.ist_info.perf_level = oreg.edx;
}
/*
@@ -93,13 +97,12 @@ static void query_ist(void)
static void set_bios_mode(void)
{
#ifdef CONFIG_X86_64
- u32 eax, ebx;
+ struct biosregs ireg;
- eax = 0xec00;
- ebx = 2;
- asm volatile("int $0x15"
- : "+a" (eax), "+b" (ebx)
- : : "ecx", "edx", "esi", "edi");
+ initregs(&ireg);
+ ireg.ax = 0xec00;
+ ireg.bx = 2;
+ intcall(0x15, &ireg, NULL);
#endif
}
diff --git a/arch/x86/boot/mca.c b/arch/x86/boot/mca.c
index 911eaae5d69..a95a531148e 100644
--- a/arch/x86/boot/mca.c
+++ b/arch/x86/boot/mca.c
@@ -2,6 +2,7 @@
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
+ * Copyright 2009 Intel Corporation; author H. Peter Anvin
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
@@ -16,26 +17,22 @@
int query_mca(void)
{
- u8 err;
- u16 es, bx, len;
-
- asm("pushw %%es ; "
- "int $0x15 ; "
- "setc %0 ; "
- "movw %%es, %1 ; "
- "popw %%es"
- : "=acd" (err), "=acdSD" (es), "=b" (bx)
- : "a" (0xc000));
-
- if (err)
+ struct biosregs ireg, oreg;
+ u16 len;
+
+ initregs(&ireg);
+ ireg.ah = 0xc0;
+ intcall(0x15, &ireg, &oreg);
+
+ if (oreg.eflags & X86_EFLAGS_CF)
return -1; /* No MCA present */
- set_fs(es);
- len = rdfs16(bx);
+ set_fs(oreg.es);
+ len = rdfs16(oreg.bx);
if (len > sizeof(boot_params.sys_desc_table))
len = sizeof(boot_params.sys_desc_table);
- copy_from_fs(&boot_params.sys_desc_table, bx, len);
+ copy_from_fs(&boot_params.sys_desc_table, oreg.bx, len);
return 0;
}
diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c
index 74b3d2ba84e..cae3feb1035 100644
--- a/arch/x86/boot/memory.c
+++ b/arch/x86/boot/memory.c
@@ -20,12 +20,16 @@
static int detect_memory_e820(void)
{
int count = 0;
- u32 next = 0;
- u32 size, id, edi;
- u8 err;
+ struct biosregs ireg, oreg;
struct e820entry *desc = boot_params.e820_map;
static struct e820entry buf; /* static so it is zeroed */
+ initregs(&ireg);
+ ireg.ax = 0xe820;
+ ireg.cx = sizeof buf;
+ ireg.edx = SMAP;
+ ireg.di = (size_t)&buf;
+
/*
* Note: at least one BIOS is known which assumes that the
* buffer pointed to by one e820 call is the same one as
@@ -41,22 +45,13 @@ static int detect_memory_e820(void)
*/
do {
- size = sizeof buf;
-
- /* Important: %edx and %esi are clobbered by some BIOSes,
- so they must be either used for the error output
- or explicitly marked clobbered. Given that, assume there
- is something out there clobbering %ebp and %edi, too. */
- asm("pushl %%ebp; int $0x15; popl %%ebp; setc %0"
- : "=d" (err), "+b" (next), "=a" (id), "+c" (size),
- "=D" (edi), "+m" (buf)
- : "D" (&buf), "d" (SMAP), "a" (0xe820)
- : "esi");
+ intcall(0x15, &ireg, &oreg);
+ ireg.ebx = oreg.ebx; /* for next iteration... */
/* BIOSes which terminate the chain with CF = 1 as opposed
to %ebx = 0 don't always report the SMAP signature on
the final, failing, probe. */
- if (err)
+ if (oreg.eflags & X86_EFLAGS_CF)
break;
/* Some BIOSes stop returning SMAP in the middle of
@@ -64,60 +59,64 @@ static int detect_memory_e820(void)
screwed up the map at that point, we might have a
partial map, the full map, or complete garbage, so
just return failure. */
- if (id != SMAP) {
+ if (oreg.eax != SMAP) {
count = 0;
break;
}
*desc++ = buf;
count++;
- } while (next && count < ARRAY_SIZE(boot_params.e820_map));
+ } while (ireg.ebx && count < ARRAY_SIZE(boot_params.e820_map));
return boot_params.e820_entries = count;
}
static int detect_memory_e801(void)
{
- u16 ax, bx, cx, dx;
- u8 err;
+ struct biosregs ireg, oreg;
- bx = cx = dx = 0;
- ax = 0xe801;
- asm("stc; int $0x15; setc %0"
- : "=m" (err), "+a" (ax), "+b" (bx), "+c" (cx), "+d" (dx));
+ initregs(&ireg);
+ ireg.ax = 0xe801;
+ intcall(0x15, &ireg, &oreg);
- if (err)
+ if (oreg.eflags & X86_EFLAGS_CF)
return -1;
/* Do we really need to do this? */
- if (cx || dx) {
- ax = cx;
- bx = dx;
+ if (oreg.cx || oreg.dx) {
+ oreg.ax = oreg.cx;
+ oreg.bx = oreg.dx;
}
- if (ax > 15*1024)
+ if (oreg.ax > 15*1024) {
return -1; /* Bogus! */
-
- /* This ignores memory above 16MB if we have a memory hole
- there. If someone actually finds a machine with a memory
- hole at 16MB and no support for 0E820h they should probably
- generate a fake e820 map. */
- boot_params.alt_mem_k = (ax == 15*1024) ? (dx << 6)+ax : ax;
+ } else if (oreg.ax == 15*1024) {
+ boot_params.alt_mem_k = (oreg.dx << 6) + oreg.ax;
+ } else {
+ /*
+ * This ignores memory above 16MB if we have a memory
+ * hole there. If someone actually finds a machine
+ * with a memory hole at 16MB and no support for
+ * 0E820h they should probably generate a fake e820
+ * map.
+ */
+ boot_params.alt_mem_k = oreg.ax;
+ }
return 0;
}
static int detect_memory_88(void)
{
- u16 ax;
- u8 err;
+ struct biosregs ireg, oreg;
- ax = 0x8800;
- asm("stc; int $0x15; setc %0" : "=bcdm" (err), "+a" (ax));
+ initregs(&ireg);
+ ireg.ah = 0x88;
+ intcall(0x15, &ireg, &oreg);
- boot_params.screen_info.ext_mem_k = ax;
+ boot_params.screen_info.ext_mem_k = oreg.ax;
- return -err;
+ return -(oreg.eflags & X86_EFLAGS_CF); /* 0 or -1 */
}
int detect_memory(void)
diff --git a/arch/x86/boot/regs.c b/arch/x86/boot/regs.c
new file mode 100644
index 00000000000..958019b1cfa
--- /dev/null
+++ b/arch/x86/boot/regs.c
@@ -0,0 +1,29 @@
+/* -----------------------------------------------------------------------
+ *
+ * Copyright 2009 Intel Corporation; author H. Peter Anvin
+ *
+ * This file is part of the Linux kernel, and is made available under
+ * the terms of the GNU General Public License version 2 or (at your
+ * option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * Simple helper function for initializing a register set.
+ *
+ * Note that this sets EFLAGS_CF in the input register set; this
+ * makes it easier to catch functions which do nothing but don't
+ * explicitly set CF.
+ */
+
+#include "boot.h"
+
+void initregs(struct biosregs *reg)
+{
+ memset(reg, 0, sizeof *reg);
+ reg->eflags |= X86_EFLAGS_CF;
+ reg->ds = ds();
+ reg->es = ds();
+ reg->fs = fs();
+ reg->gs = gs();
+}
diff --git a/arch/x86/boot/setup.ld b/arch/x86/boot/setup.ld
index bb8dc2de796..0f6ec455a2b 100644
--- a/arch/x86/boot/setup.ld
+++ b/arch/x86/boot/setup.ld
@@ -15,8 +15,11 @@ SECTIONS
. = 497;
.header : { *(.header) }
+ .entrytext : { *(.entrytext) }
.inittext : { *(.inittext) }
.initdata : { *(.initdata) }
+ __end_init = .;
+
.text : { *(.text) }
.text32 : { *(.text32) }
@@ -52,4 +55,7 @@ SECTIONS
. = ASSERT(_end <= 0x8000, "Setup too big!");
. = ASSERT(hdr == 0x1f1, "The setup header has the wrong offset!");
+ /* Necessary for the very-old-loader check to work... */
+ . = ASSERT(__end_init <= 5*512, "init sections too big!");
+
}
diff --git a/arch/x86/boot/tty.c b/arch/x86/boot/tty.c
index 7e8e8b25f5f..01ec69c901c 100644
--- a/arch/x86/boot/tty.c
+++ b/arch/x86/boot/tty.c
@@ -2,6 +2,7 @@
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
+ * Copyright 2009 Intel Corporation; author H. Peter Anvin
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
@@ -22,24 +23,23 @@
void __attribute__((section(".inittext"))) putchar(int ch)
{
- unsigned char c = ch;
+ struct biosregs ireg;
- if (c == '\n')
+ if (ch == '\n')
putchar('\r'); /* \n -> \r\n */
- /* int $0x10 is known to have bugs involving touching registers
- it shouldn't. Be extra conservative... */
- asm volatile("pushal; pushw %%ds; int $0x10; popw %%ds; popal"
- : : "b" (0x0007), "c" (0x0001), "a" (0x0e00|ch));
+ initregs(&ireg);
+ ireg.bx = 0x0007;
+ ireg.cx = 0x0001;
+ ireg.ah = 0x0e;
+ ireg.al = ch;
+ intcall(0x10, &ireg, NULL);
}
void __attribute__((section(".inittext"))) puts(const char *str)
{
- int n = 0;
- while (*str) {
+ while (*str)
putchar(*str++);
- n++;
- }
}
/*
@@ -49,14 +49,13 @@ void __attribute__((section(".inittext"))) puts(const char *str)
static u8 gettime(void)
{
- u16 ax = 0x0200;
- u16 cx, dx;
+ struct biosregs ireg, oreg;
- asm volatile("int $0x1a"
- : "+a" (ax), "=c" (cx), "=d" (dx)
- : : "ebx", "esi", "edi");
+ initregs(&ireg);
+ ireg.ah = 0x02;
+ intcall(0x1a, &ireg, &oreg);
- return dx >> 8;
+ return oreg.dh;
}
/*
@@ -64,19 +63,24 @@ static u8 gettime(void)
*/
int getchar(void)
{
- u16 ax = 0;
- asm volatile("int $0x16" : "+a" (ax));
+ struct biosregs ireg, oreg;
+
+ initregs(&ireg);
+ /* ireg.ah = 0x00; */
+ intcall(0x16, &ireg, &oreg);
- return ax & 0xff;
+ return oreg.al;
}
static int kbd_pending(void)
{
- u8 pending;
- asm volatile("int $0x16; setnz %0"
- : "=qm" (pending)
- : "a" (0x0100));
- return pending;
+ struct biosregs ireg, oreg;
+
+ initregs(&ireg);
+ ireg.ah = 0x01;
+ intcall(0x16, &ireg, &oreg);
+
+ return !(oreg.eflags & X86_EFLAGS_ZF);
}
void kbd_flush(void)
diff --git a/arch/x86/boot/video-bios.c b/arch/x86/boot/video-bios.c
index 3fa979c9c36..d660be49236 100644
--- a/arch/x86/boot/video-bios.c
+++ b/arch/x86/boot/video-bios.c
@@ -2,6 +2,7 @@
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
+ * Copyright 2009 Intel Corporation; author H. Peter Anvin
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
@@ -29,21 +30,21 @@ static int bios_set_mode(struct mode_info *mi)
static int set_bios_mode(u8 mode)
{
- u16 ax;
+ struct biosregs ireg, oreg;
u8 new_mode;
- ax = mode; /* AH=0x00 Set Video Mode */
- asm volatile(INT10
- : "+a" (ax)
- : : "ebx", "ecx", "edx", "esi", "edi");
+ initregs(&ireg);
+ ireg.al = mode; /* AH=0x00 Set Video Mode */
+ intcall(0x10, &ireg, NULL);
- ax = 0x0f00; /* Get Current Video Mode */
- asm volatile(INT10
- : "+a" (ax)
- : : "ebx", "ecx", "edx", "esi", "edi");
+
+ ireg.ah = 0x0f; /* Get Current Video Mode */
+ intcall(0x10, &ireg, &oreg);
do_restore = 1; /* Assume video contents were lost */
- new_mode = ax & 0x7f; /* Not all BIOSes are clean with the top bit */
+
+ /* Not all BIOSes are clean with the top bit */
+ new_mode = ireg.al & 0x7f;
if (new_mode == mode)
return 0; /* Mode change OK */
@@ -53,10 +54,8 @@ static int set_bios_mode(u8 mode)
/* Mode setting failed, but we didn't end up where we
started. That's bad. Try to revert to the original
video mode. */
- ax = boot_params.screen_info.orig_video_mode;
- asm volatile(INT10
- : "+a" (ax)
- : : "ebx", "ecx", "edx", "esi", "edi");
+ ireg.ax = boot_params.screen_info.orig_video_mode;
+ intcall(0x10, &ireg, NULL);
}
#endif
return -1;
diff --git a/arch/x86/boot/video-vesa.c b/arch/x86/boot/video-vesa.c
index 4a58c8ce3f6..c700147d6ff 100644
--- a/arch/x86/boot/video-vesa.c
+++ b/arch/x86/boot/video-vesa.c
@@ -2,6 +2,7 @@
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
+ * Copyright 2009 Intel Corporation; author H. Peter Anvin
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
@@ -31,7 +32,7 @@ static inline void vesa_store_mode_params_graphics(void) {}
static int vesa_probe(void)
{
#if defined(CONFIG_VIDEO_VESA) || defined(CONFIG_FIRMWARE_EDID)
- u16 ax, cx, di;
+ struct biosregs ireg, oreg;
u16 mode;
addr_t mode_ptr;
struct mode_info *mi;
@@ -39,13 +40,12 @@ static int vesa_probe(void)
video_vesa.modes = GET_HEAP(struct mode_info, 0);
- ax = 0x4f00;
- di = (size_t)&vginfo;
- asm(INT10
- : "+a" (ax), "+D" (di), "=m" (vginfo)
- : : "ebx", "ecx", "edx", "esi");
+ initregs(&ireg);
+ ireg.ax = 0x4f00;
+ ireg.di = (size_t)&vginfo;
+ intcall(0x10, &ireg, &oreg);
- if (ax != 0x004f ||
+ if (ireg.ax != 0x004f ||
vginfo.signature != VESA_MAGIC ||
vginfo.version < 0x0102)
return 0; /* Not present */
@@ -65,14 +65,12 @@ static int vesa_probe(void)
memset(&vminfo, 0, sizeof vminfo); /* Just in case... */
- ax = 0x4f01;
- cx = mode;
- di = (size_t)&vminfo;
- asm(INT10
- : "+a" (ax), "+c" (cx), "+D" (di), "=m" (vminfo)
- : : "ebx", "edx", "esi");
+ ireg.ax = 0x4f01;
+ ireg.cx = mode;
+ ireg.di = (size_t)&vminfo;
+ intcall(0x10, &ireg, &oreg);
- if (ax != 0x004f)
+ if (ireg.ax != 0x004f)
continue;
if ((vminfo.mode_attr & 0x15) == 0x05) {
@@ -111,20 +109,19 @@ static int vesa_probe(void)
static int vesa_set_mode(struct mode_info *mode)
{
- u16 ax, bx, cx, di;
+ struct biosregs ireg, oreg;
int is_graphic;
u16 vesa_mode = mode->mode - VIDEO_FIRST_VESA;
memset(&vminfo, 0, sizeof vminfo); /* Just in case... */
- ax = 0x4f01;
- cx = vesa_mode;
- di = (size_t)&vminfo;
- asm(INT10
- : "+a" (ax), "+c" (cx), "+D" (di), "=m" (vminfo)
- : : "ebx", "edx", "esi");
+ initregs(&ireg);
+ ireg.ax = 0x4f01;
+ ireg.cx = vesa_mode;
+ ireg.di = (size_t)&vminfo;
+ intcall(0x10, &ireg, &oreg);
- if (ax != 0x004f)
+ if (oreg.ax != 0x004f)
return -1;
if ((vminfo.mode_attr & 0x15) == 0x05) {
@@ -141,14 +138,12 @@ static int vesa_set_mode(struct mode_info *mode)
}
- ax = 0x4f02;
- bx = vesa_mode;
- di = 0;
- asm volatile(INT10
- : "+a" (ax), "+b" (bx), "+D" (di)
- : : "ecx", "edx", "esi");
+ initregs(&ireg);
+ ireg.ax = 0x4f02;
+ ireg.bx = vesa_mode;
+ intcall(0x10, &ireg, &oreg);
- if (ax != 0x004f)
+ if (oreg.ax != 0x004f)
return -1;
graphic_mode = is_graphic;
@@ -171,50 +166,45 @@ static int vesa_set_mode(struct mode_info *mode)
/* Switch DAC to 8-bit mode */
static void vesa_dac_set_8bits(void)
{
+ struct biosregs ireg, oreg;
u8 dac_size = 6;
/* If possible, switch the DAC to 8-bit mode */
if (vginfo.capabilities & 1) {
- u16 ax, bx;
-
- ax = 0x4f08;
- bx = 0x0800;
- asm volatile(INT10
- : "+a" (ax), "+b" (bx)
- : : "ecx", "edx", "esi", "edi");
-
- if (ax == 0x004f)
- dac_size = bx >> 8;
+ initregs(&ireg);
+ ireg.ax = 0x4f08;
+ ireg.bh = 0x08;
+ intcall(0x10, &ireg, &oreg);
+ if (oreg.ax == 0x004f)
+ dac_size = oreg.bh;
}
/* Set the color sizes to the DAC size, and offsets to 0 */
- boot_params.screen_info.red_size = dac_size;
+ boot_params.screen_info.red_size = dac_size;
boot_params.screen_info.green_size = dac_size;
- boot_params.screen_info.blue_size = dac_size;
- boot_params.screen_info.rsvd_size = dac_size;
+ boot_params.screen_info.blue_size = dac_size;
+ boot_params.screen_info.rsvd_size = dac_size;
- boot_params.screen_info.red_pos = 0;
- boot_params.screen_info.green_pos = 0;
- boot_params.screen_info.blue_pos = 0;
- boot_params.screen_info.rsvd_pos = 0;
+ boot_params.screen_info.red_pos = 0;
+ boot_params.screen_info.green_pos = 0;
+ boot_params.screen_info.blue_pos = 0;
+ boot_params.screen_info.rsvd_pos = 0;
}
/* Save the VESA protected mode info */
static void vesa_store_pm_info(void)
{
- u16 ax, bx, di, es;
+ struct biosregs ireg, oreg;
- ax = 0x4f0a;
- bx = di = 0;
- asm("pushw %%es; "INT10"; movw %%es,%0; popw %%es"
- : "=d" (es), "+a" (ax), "+b" (bx), "+D" (di)
- : : "ecx", "esi");
+ initregs(&ireg);
+ ireg.ax = 0x4f0a;
+ intcall(0x10, &ireg, &oreg);
- if (ax != 0x004f)
+ if (oreg.ax != 0x004f)
return;
- boot_params.screen_info.vesapm_seg = es;
- boot_params.screen_info.vesapm_off = di;
+ boot_params.screen_info.vesapm_seg = oreg.es;
+ boot_params.screen_info.vesapm_off = oreg.di;
}
/*
@@ -252,7 +242,7 @@ static void vesa_store_mode_params_graphics(void)
void vesa_store_edid(void)
{
#ifdef CONFIG_FIRMWARE_EDID
- u16 ax, bx, cx, dx, di;
+ struct biosregs ireg, oreg;
/* Apparently used as a nonsense token... */
memset(&boot_params.edid_info, 0x13, sizeof boot_params.edid_info);
@@ -260,33 +250,26 @@ void vesa_store_edid(void)
if (vginfo.version < 0x0200)
return; /* EDID requires VBE 2.0+ */
- ax = 0x4f15; /* VBE DDC */
- bx = 0x0000; /* Report DDC capabilities */
- cx = 0; /* Controller 0 */
- di = 0; /* ES:DI must be 0 by spec */
-
- /* Note: The VBE DDC spec is different from the main VESA spec;
- we genuinely have to assume all registers are destroyed here. */
-
- asm("pushw %%es; movw %2,%%es; "INT10"; popw %%es"
- : "+a" (ax), "+b" (bx), "+c" (cx), "+D" (di)
- : : "esi", "edx");
+ initregs(&ireg);
+ ireg.ax = 0x4f15; /* VBE DDC */
+ /* ireg.bx = 0x0000; */ /* Report DDC capabilities */
+ /* ireg.cx = 0; */ /* Controller 0 */
+ ireg.es = 0; /* ES:DI must be 0 by spec */
+ intcall(0x10, &ireg, &oreg);
- if (ax != 0x004f)
+ if (oreg.ax != 0x004f)
return; /* No EDID */
/* BH = time in seconds to transfer EDD information */
/* BL = DDC level supported */
- ax = 0x4f15; /* VBE DDC */
- bx = 0x0001; /* Read EDID */
- cx = 0; /* Controller 0 */
- dx = 0; /* EDID block number */
- di =(size_t) &boot_params.edid_info; /* (ES:)Pointer to block */
- asm(INT10
- : "+a" (ax), "+b" (bx), "+d" (dx), "=m" (boot_params.edid_info),
- "+c" (cx), "+D" (di)
- : : "esi");
+ ireg.ax = 0x4f15; /* VBE DDC */
+ ireg.bx = 0x0001; /* Read EDID */
+ /* ireg.cx = 0; */ /* Controller 0 */
+ /* ireg.dx = 0; */ /* EDID block number */
+ ireg.es = ds();
+ ireg.di =(size_t)&boot_params.edid_info; /* (ES:)Pointer to block */
+ intcall(0x10, &ireg, &oreg);
#endif /* CONFIG_FIRMWARE_EDID */
}
diff --git a/arch/x86/boot/video-vga.c b/arch/x86/boot/video-vga.c
index 9e0587a3776..8f8d827e254 100644
--- a/arch/x86/boot/video-vga.c
+++ b/arch/x86/boot/video-vga.c
@@ -2,6 +2,7 @@
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
+ * Copyright 2009 Intel Corporation; author H. Peter Anvin
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
@@ -39,30 +40,30 @@ static __videocard video_vga;
/* Set basic 80x25 mode */
static u8 vga_set_basic_mode(void)
{
+ struct biosregs ireg, oreg;
u16 ax;
u8 rows;
u8 mode;
+ initregs(&ireg);
+
#ifdef CONFIG_VIDEO_400_HACK
if (adapter >= ADAPTER_VGA) {
- asm volatile(INT10
- : : "a" (0x1202), "b" (0x0030)
- : "ecx", "edx", "esi", "edi");
+ ireg.ax = 0x1202;
+ ireg.bx = 0x0030;
+ intcall(0x10, &ireg, NULL);
}
#endif
ax = 0x0f00;
- asm volatile(INT10
- : "+a" (ax)
- : : "ebx", "ecx", "edx", "esi", "edi");
-
- mode = (u8)ax;
+ intcall(0x10, &ireg, &oreg);
+ mode = oreg.al;
set_fs(0);
rows = rdfs8(0x484); /* rows minus one */
#ifndef CONFIG_VIDEO_400_HACK
- if ((ax == 0x5003 || ax == 0x5007) &&
+ if ((oreg.ax == 0x5003 || oreg.ax == 0x5007) &&
(rows == 0 || rows == 24))
return mode;
#endif
@@ -71,10 +72,8 @@ static u8 vga_set_basic_mode(void)
mode = 3;
/* Set the mode */
- ax = mode;
- asm volatile(INT10
- : "+a" (ax)
- : : "ebx", "ecx", "edx", "esi", "edi");
+ ireg.ax = mode; /* AH=0: set mode */
+ intcall(0x10, &ireg, NULL);
do_restore = 1;
return mode;
}
@@ -82,43 +81,69 @@ static u8 vga_set_basic_mode(void)
static void vga_set_8font(void)
{
/* Set 8x8 font - 80x43 on EGA, 80x50 on VGA */
+ struct biosregs ireg;
+
+ initregs(&ireg);
/* Set 8x8 font */
- asm volatile(INT10 : : "a" (0x1112), "b" (0));
+ ireg.ax = 0x1112;
+ /* ireg.bl = 0; */
+ intcall(0x10, &ireg, NULL);
/* Use alternate print screen */
- asm volatile(INT10 : : "a" (0x1200), "b" (0x20));
+ ireg.ax = 0x1200;
+ ireg.bl = 0x20;
+ intcall(0x10, &ireg, NULL);
/* Turn off cursor emulation */
- asm volatile(INT10 : : "a" (0x1201), "b" (0x34));
+ ireg.ax = 0x1201;
+ ireg.bl = 0x34;
+ intcall(0x10, &ireg, NULL);
/* Cursor is scan lines 6-7 */
- asm volatile(INT10 : : "a" (0x0100), "c" (0x0607));
+ ireg.ax = 0x0100;
+ ireg.cx = 0x0607;
+ intcall(0x10, &ireg, NULL);
}
static void vga_set_14font(void)
{
/* Set 9x14 font - 80x28 on VGA */
+ struct biosregs ireg;
+
+ initregs(&ireg);
/* Set 9x14 font */
- asm volatile(INT10 : : "a" (0x1111), "b" (0));
+ ireg.ax = 0x1111;
+ /* ireg.bl = 0; */
+ intcall(0x10, &ireg, NULL);
/* Turn off cursor emulation */
- asm volatile(INT10 : : "a" (0x1201), "b" (0x34));
+ ireg.ax = 0x1201;
+ ireg.bl = 0x34;
+ intcall(0x10, &ireg, NULL);
/* Cursor is scan lines 11-12 */
- asm volatile(INT10 : : "a" (0x0100), "c" (0x0b0c));
+ ireg.ax = 0x0100;
+ ireg.cx = 0x0b0c;
+ intcall(0x10, &ireg, NULL);
}
static void vga_set_80x43(void)
{
/* Set 80x43 mode on VGA (not EGA) */
+ struct biosregs ireg;
+
+ initregs(&ireg);
/* Set 350 scans */
- asm volatile(INT10 : : "a" (0x1201), "b" (0x30));
+ ireg.ax = 0x1201;
+ ireg.bl = 0x30;
+ intcall(0x10, &ireg, NULL);
/* Reset video mode */
- asm volatile(INT10 : : "a" (0x0003));
+ ireg.ax = 0x0003;
+ intcall(0x10, &ireg, NULL);
vga_set_8font();
}
@@ -225,8 +250,6 @@ static int vga_set_mode(struct mode_info *mode)
*/
static int vga_probe(void)
{
- u16 ega_bx;
-
static const char *card_name[] = {
"CGA/MDA/HGC", "EGA", "VGA"
};
@@ -240,26 +263,26 @@ static int vga_probe(void)
sizeof(ega_modes)/sizeof(struct mode_info),
sizeof(vga_modes)/sizeof(struct mode_info),
};
- u8 vga_flag;
- asm(INT10
- : "=b" (ega_bx)
- : "a" (0x1200), "b" (0x10) /* Check EGA/VGA */
- : "ecx", "edx", "esi", "edi");
+ struct biosregs ireg, oreg;
+
+ initregs(&ireg);
+
+ ireg.ax = 0x1200;
+ ireg.bl = 0x10; /* Check EGA/VGA */
+ intcall(0x10, &ireg, &oreg);
#ifndef _WAKEUP
- boot_params.screen_info.orig_video_ega_bx = ega_bx;
+ boot_params.screen_info.orig_video_ega_bx = oreg.bx;
#endif
/* If we have MDA/CGA/HGC then BL will be unchanged at 0x10 */
- if ((u8)ega_bx != 0x10) {
+ if (oreg.bl != 0x10) {
/* EGA/VGA */
- asm(INT10
- : "=a" (vga_flag)
- : "a" (0x1a00)
- : "ebx", "ecx", "edx", "esi", "edi");
+ ireg.ax = 0x1a00;
+ intcall(0x10, &ireg, &oreg);
- if (vga_flag == 0x1a) {
+ if (oreg.al == 0x1a) {
adapter = ADAPTER_VGA;
#ifndef _WAKEUP
boot_params.screen_info.orig_video_isVGA = 1;
diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c
index 3bef2c1febe..bad728b76fc 100644
--- a/arch/x86/boot/video.c
+++ b/arch/x86/boot/video.c
@@ -2,6 +2,7 @@
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
+ * Copyright 2009 Intel Corporation; author H. Peter Anvin
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
@@ -18,33 +19,29 @@
static void store_cursor_position(void)
{
- u16 curpos;
- u16 ax, bx;
+ struct biosregs ireg, oreg;
- ax = 0x0300;
- bx = 0;
- asm(INT10
- : "=d" (curpos), "+a" (ax), "+b" (bx)
- : : "ecx", "esi", "edi");
+ initregs(&ireg);
+ ireg.ah = 0x03;
+ intcall(0x10, &ireg, &oreg);
- boot_params.screen_info.orig_x = curpos;
- boot_params.screen_info.orig_y = curpos >> 8;
+ boot_params.screen_info.orig_x = oreg.dl;
+ boot_params.screen_info.orig_y = oreg.dh;
}
static void store_video_mode(void)
{
- u16 ax, page;
+ struct biosregs ireg, oreg;
/* N.B.: the saving of the video page here is a bit silly,
since we pretty much assume page 0 everywhere. */
- ax = 0x0f00;
- asm(INT10
- : "+a" (ax), "=b" (page)
- : : "ecx", "edx", "esi", "edi");
+ initregs(&ireg);
+ ireg.ah = 0x0f;
+ intcall(0x10, &ireg, &oreg);
/* Not all BIOSes are clean with respect to the top bit */
- boot_params.screen_info.orig_video_mode = ax & 0x7f;
- boot_params.screen_info.orig_video_page = page >> 8;
+ boot_params.screen_info.orig_video_mode = oreg.al & 0x7f;
+ boot_params.screen_info.orig_video_page = oreg.bh;
}
/*
@@ -257,7 +254,7 @@ static void restore_screen(void)
int y;
addr_t dst = 0;
u16 *src = saved.data;
- u16 ax, bx, dx;
+ struct biosregs ireg;
if (graphic_mode)
return; /* Can't restore onto a graphic mode */
@@ -296,12 +293,11 @@ static void restore_screen(void)
}
/* Restore cursor position */
- ax = 0x0200; /* Set cursor position */
- bx = 0; /* Page number (<< 8) */
- dx = (saved.cury << 8)+saved.curx;
- asm volatile(INT10
- : "+a" (ax), "+b" (bx), "+d" (dx)
- : : "ecx", "esi", "edi");
+ initregs(&ireg);
+ ireg.ah = 0x02; /* Set cursor position */
+ ireg.dh = saved.cury;
+ ireg.dl = saved.curx;
+ intcall(0x10, &ireg, NULL);
}
#else
#define save_screen() ((void)0)
diff --git a/arch/x86/boot/video.h b/arch/x86/boot/video.h
index ee63f5d1446..5bb174a997f 100644
--- a/arch/x86/boot/video.h
+++ b/arch/x86/boot/video.h
@@ -112,20 +112,6 @@ extern int force_x, force_y; /* Don't query the BIOS for cols/rows */
extern int do_restore; /* Restore screen contents */
extern int graphic_mode; /* Graphics mode with linear frame buffer */
-/*
- * int $0x10 is notorious for touching registers it shouldn't.
- * gcc doesn't like %ebp being clobbered, so define it as a push/pop
- * sequence here.
- *
- * A number of systems, including the original PC can clobber %bp in
- * certain circumstances, like when scrolling. There exists at least
- * one Trident video card which could clobber DS under a set of
- * circumstances that we are unlikely to encounter (scrolling when
- * using an extended graphics mode of more than 800x600 pixels), but
- * it's cheap insurance to deal with that here.
- */
-#define INT10 "pushl %%ebp; pushw %%ds; int $0x10; popw %%ds; popl %%ebp"
-
/* Accessing VGA indexed registers */
static inline u8 in_idx(u16 port, u8 index)
{
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 235b81d0f6f..edb992ebef9 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -1,12 +1,13 @@
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29-rc4
-# Tue Feb 24 15:50:58 2009
+# Linux kernel version: 2.6.30-rc2
+# Mon May 11 16:21:55 2009
#
# CONFIG_64BIT is not set
CONFIG_X86_32=y
# CONFIG_X86_64 is not set
CONFIG_X86=y
+CONFIG_OUTPUT_FORMAT="elf32-i386"
CONFIG_ARCH_DEFCONFIG="arch/x86/configs/i386_defconfig"
CONFIG_GENERIC_TIME=y
CONFIG_GENERIC_CMOS_UPDATE=y
@@ -33,6 +34,7 @@ CONFIG_ARCH_HAS_CPU_RELAX=y
CONFIG_ARCH_HAS_DEFAULT_IDLE=y
CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y
CONFIG_HAVE_SETUP_PER_CPU_AREA=y
+CONFIG_HAVE_DYNAMIC_PER_CPU_AREA=y
# CONFIG_HAVE_CPUMASK_OF_CPU_MAP is not set
CONFIG_ARCH_HIBERNATION_POSSIBLE=y
CONFIG_ARCH_SUSPEND_POSSIBLE=y
@@ -40,15 +42,16 @@ CONFIG_ARCH_SUSPEND_POSSIBLE=y
CONFIG_ARCH_POPULATES_NODE_MAP=y
# CONFIG_AUDIT_ARCH is not set
CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y
+CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y
CONFIG_GENERIC_HARDIRQS=y
+CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
CONFIG_GENERIC_IRQ_PROBE=y
CONFIG_GENERIC_PENDING_IRQ=y
-CONFIG_X86_SMP=y
CONFIG_USE_GENERIC_SMP_HELPERS=y
CONFIG_X86_32_SMP=y
CONFIG_X86_HT=y
-CONFIG_X86_BIOS_REBOOT=y
CONFIG_X86_TRAMPOLINE=y
+CONFIG_X86_32_LAZY_GS=y
CONFIG_KTIME_SCALAR=y
CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
@@ -60,10 +63,17 @@ CONFIG_LOCK_KERNEL=y
CONFIG_INIT_ENV_ARG_LIMIT=32
CONFIG_LOCALVERSION=""
# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_HAVE_KERNEL_GZIP=y
+CONFIG_HAVE_KERNEL_BZIP2=y
+CONFIG_HAVE_KERNEL_LZMA=y
+CONFIG_KERNEL_GZIP=y
+# CONFIG_KERNEL_BZIP2 is not set
+# CONFIG_KERNEL_LZMA is not set
CONFIG_SWAP=y
CONFIG_SYSVIPC=y
CONFIG_SYSVIPC_SYSCTL=y
CONFIG_POSIX_MQUEUE=y
+CONFIG_POSIX_MQUEUE_SYSCTL=y
CONFIG_BSD_PROCESS_ACCT=y
# CONFIG_BSD_PROCESS_ACCT_V3 is not set
CONFIG_TASKSTATS=y
@@ -113,23 +123,26 @@ CONFIG_PID_NS=y
CONFIG_NET_NS=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_INITRAMFS_SOURCE=""
+CONFIG_RD_GZIP=y
+CONFIG_RD_BZIP2=y
+CONFIG_RD_LZMA=y
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
# CONFIG_EMBEDDED is not set
CONFIG_UID16=y
CONFIG_SYSCTL_SYSCALL=y
CONFIG_KALLSYMS=y
CONFIG_KALLSYMS_ALL=y
CONFIG_KALLSYMS_EXTRA_PASS=y
+# CONFIG_STRIP_ASM_SYMS is not set
CONFIG_HOTPLUG=y
CONFIG_PRINTK=y
CONFIG_BUG=y
CONFIG_ELF_CORE=y
CONFIG_PCSPKR_PLATFORM=y
-# CONFIG_COMPAT_BRK is not set
CONFIG_BASE_FULL=y
CONFIG_FUTEX=y
-CONFIG_ANON_INODES=y
CONFIG_EPOLL=y
CONFIG_SIGNALFD=y
CONFIG_TIMERFD=y
@@ -139,6 +152,7 @@ CONFIG_AIO=y
CONFIG_VM_EVENT_COUNTERS=y
CONFIG_PCI_QUIRKS=y
CONFIG_SLUB_DEBUG=y
+# CONFIG_COMPAT_BRK is not set
# CONFIG_SLAB is not set
CONFIG_SLUB=y
# CONFIG_SLOB is not set
@@ -154,6 +168,8 @@ CONFIG_HAVE_IOREMAP_PROT=y
CONFIG_HAVE_KPROBES=y
CONFIG_HAVE_KRETPROBES=y
CONFIG_HAVE_ARCH_TRACEHOOK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
CONFIG_HAVE_GENERIC_DMA_COHERENT=y
CONFIG_SLABINFO=y
CONFIG_RT_MUTEXES=y
@@ -167,7 +183,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y
CONFIG_STOP_MACHINE=y
CONFIG_BLOCK=y
# CONFIG_LBD is not set
-CONFIG_BLK_DEV_IO_TRACE=y
CONFIG_BLK_DEV_BSG=y
# CONFIG_BLK_DEV_INTEGRITY is not set
@@ -194,12 +209,12 @@ CONFIG_HIGH_RES_TIMERS=y
CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
CONFIG_SMP=y
CONFIG_SPARSE_IRQ=y
-CONFIG_X86_FIND_SMP_CONFIG=y
CONFIG_X86_MPPARSE=y
+# CONFIG_X86_BIGSMP is not set
+CONFIG_X86_EXTENDED_PLATFORM=y
# CONFIG_X86_ELAN is not set
-# CONFIG_X86_GENERICARCH is not set
-# CONFIG_X86_VSMP is not set
# CONFIG_X86_RDC321X is not set
+# CONFIG_X86_32_NON_STANDARD is not set
CONFIG_SCHED_OMIT_FRAME_POINTER=y
# CONFIG_PARAVIRT_GUEST is not set
# CONFIG_MEMTEST is not set
@@ -230,8 +245,10 @@ CONFIG_M686=y
# CONFIG_GENERIC_CPU is not set
CONFIG_X86_GENERIC=y
CONFIG_X86_CPU=y
+CONFIG_X86_L1_CACHE_BYTES=64
+CONFIG_X86_INTERNODE_CACHE_BYTES=64
CONFIG_X86_CMPXCHG=y
-CONFIG_X86_L1_CACHE_SHIFT=7
+CONFIG_X86_L1_CACHE_SHIFT=5
CONFIG_X86_XADD=y
# CONFIG_X86_PPRO_FENCE is not set
CONFIG_X86_WP_WORKS_OK=y
@@ -247,7 +264,7 @@ CONFIG_X86_DEBUGCTLMSR=y
CONFIG_CPU_SUP_INTEL=y
CONFIG_CPU_SUP_CYRIX_32=y
CONFIG_CPU_SUP_AMD=y
-CONFIG_CPU_SUP_CENTAUR_32=y
+CONFIG_CPU_SUP_CENTAUR=y
CONFIG_CPU_SUP_TRANSMETA_32=y
CONFIG_CPU_SUP_UMC_32=y
CONFIG_X86_DS=y
@@ -279,6 +296,7 @@ CONFIG_MICROCODE_AMD=y
CONFIG_MICROCODE_OLD_INTERFACE=y
CONFIG_X86_MSR=y
CONFIG_X86_CPUID=y
+# CONFIG_X86_CPU_DEBUG is not set
# CONFIG_NOHIGHMEM is not set
CONFIG_HIGHMEM4G=y
# CONFIG_HIGHMEM64G is not set
@@ -302,6 +320,8 @@ CONFIG_ZONE_DMA_FLAG=1
CONFIG_BOUNCE=y
CONFIG_VIRT_TO_BUS=y
CONFIG_UNEVICTABLE_LRU=y
+CONFIG_HAVE_MLOCK=y
+CONFIG_HAVE_MLOCKED_PAGE_BIT=y
CONFIG_HIGHPTE=y
CONFIG_X86_CHECK_BIOS_CORRUPTION=y
CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK=y
@@ -312,6 +332,7 @@ CONFIG_MTRR=y
CONFIG_X86_PAT=y
CONFIG_EFI=y
CONFIG_SECCOMP=y
+# CONFIG_CC_STACKPROTECTOR is not set
# CONFIG_HZ_100 is not set
# CONFIG_HZ_250 is not set
# CONFIG_HZ_300 is not set
@@ -322,8 +343,9 @@ CONFIG_KEXEC=y
CONFIG_CRASH_DUMP=y
# CONFIG_KEXEC_JUMP is not set
CONFIG_PHYSICAL_START=0x1000000
-# CONFIG_RELOCATABLE is not set
-CONFIG_PHYSICAL_ALIGN=0x200000
+CONFIG_RELOCATABLE=y
+CONFIG_X86_NEED_RELOCS=y
+CONFIG_PHYSICAL_ALIGN=0x1000000
CONFIG_HOTPLUG_CPU=y
# CONFIG_COMPAT_VDSO is not set
# CONFIG_CMDLINE_BOOL is not set
@@ -363,7 +385,6 @@ CONFIG_ACPI_THERMAL=y
CONFIG_ACPI_BLACKLIST_YEAR=0
# CONFIG_ACPI_DEBUG is not set
# CONFIG_ACPI_PCI_SLOT is not set
-CONFIG_ACPI_SYSTEM=y
CONFIG_X86_PM_TIMER=y
CONFIG_ACPI_CONTAINER=y
# CONFIG_ACPI_SBS is not set
@@ -425,6 +446,7 @@ CONFIG_PCI_BIOS=y
CONFIG_PCI_DIRECT=y
CONFIG_PCI_MMCONFIG=y
CONFIG_PCI_DOMAINS=y
+# CONFIG_DMAR is not set
CONFIG_PCIEPORTBUS=y
# CONFIG_HOTPLUG_PCI_PCIE is not set
CONFIG_PCIEAER=y
@@ -435,6 +457,7 @@ CONFIG_PCI_MSI=y
# CONFIG_PCI_DEBUG is not set
# CONFIG_PCI_STUB is not set
CONFIG_HT_IRQ=y
+# CONFIG_PCI_IOV is not set
CONFIG_ISA_DMA_API=y
# CONFIG_ISA is not set
# CONFIG_MCA is not set
@@ -481,7 +504,6 @@ CONFIG_NET=y
#
# Networking options
#
-CONFIG_COMPAT_NET_DEV_OPS=y
CONFIG_PACKET=y
CONFIG_PACKET_MMAP=y
CONFIG_UNIX=y
@@ -639,6 +661,7 @@ CONFIG_LLC=y
# CONFIG_LAPB is not set
# CONFIG_ECONET is not set
# CONFIG_WAN_ROUTER is not set
+# CONFIG_PHONET is not set
CONFIG_NET_SCHED=y
#
@@ -696,6 +719,7 @@ CONFIG_NET_SCH_FIFO=y
#
# CONFIG_NET_PKTGEN is not set
# CONFIG_NET_TCPPROBE is not set
+# CONFIG_NET_DROP_MONITOR is not set
CONFIG_HAMRADIO=y
#
@@ -706,12 +730,10 @@ CONFIG_HAMRADIO=y
# CONFIG_IRDA is not set
# CONFIG_BT is not set
# CONFIG_AF_RXRPC is not set
-# CONFIG_PHONET is not set
CONFIG_FIB_RULES=y
CONFIG_WIRELESS=y
CONFIG_CFG80211=y
# CONFIG_CFG80211_REG_DEBUG is not set
-CONFIG_NL80211=y
CONFIG_WIRELESS_OLD_REGULATORY=y
CONFIG_WIRELESS_EXT=y
CONFIG_WIRELESS_EXT_SYSFS=y
@@ -789,6 +811,7 @@ CONFIG_MISC_DEVICES=y
# CONFIG_ICS932S401 is not set
# CONFIG_ENCLOSURE_SERVICES is not set
# CONFIG_HP_ILO is not set
+# CONFIG_ISL29003 is not set
# CONFIG_C2PORT is not set
#
@@ -842,6 +865,7 @@ CONFIG_SCSI_SPI_ATTRS=y
# CONFIG_SCSI_LOWLEVEL is not set
# CONFIG_SCSI_LOWLEVEL_PCMCIA is not set
# CONFIG_SCSI_DH is not set
+# CONFIG_SCSI_OSD_INITIATOR is not set
CONFIG_ATA=y
# CONFIG_ATA_NONSTANDARD is not set
CONFIG_ATA_ACPI=y
@@ -940,6 +964,7 @@ CONFIG_DM_ZERO=y
CONFIG_MACINTOSH_DRIVERS=y
CONFIG_MAC_EMUMOUSEBTN=y
CONFIG_NETDEVICES=y
+CONFIG_COMPAT_NET_DEV_OPS=y
# CONFIG_IFB is not set
# CONFIG_DUMMY is not set
# CONFIG_BONDING is not set
@@ -977,6 +1002,8 @@ CONFIG_MII=y
CONFIG_NET_VENDOR_3COM=y
# CONFIG_VORTEX is not set
# CONFIG_TYPHOON is not set
+# CONFIG_ETHOC is not set
+# CONFIG_DNET is not set
CONFIG_NET_TULIP=y
# CONFIG_DE2104X is not set
# CONFIG_TULIP is not set
@@ -1026,6 +1053,7 @@ CONFIG_E1000=y
CONFIG_E1000E=y
# CONFIG_IP1000 is not set
# CONFIG_IGB is not set
+# CONFIG_IGBVF is not set
# CONFIG_NS83820 is not set
# CONFIG_HAMACHI is not set
# CONFIG_YELLOWFIN is not set
@@ -1040,6 +1068,7 @@ CONFIG_BNX2=y
# CONFIG_QLA3XXX is not set
# CONFIG_ATL1 is not set
# CONFIG_ATL1E is not set
+# CONFIG_ATL1C is not set
# CONFIG_JME is not set
CONFIG_NETDEV_10000=y
# CONFIG_CHELSIO_T1 is not set
@@ -1049,6 +1078,7 @@ CONFIG_CHELSIO_T3_DEPENDS=y
# CONFIG_IXGBE is not set
# CONFIG_IXGB is not set
# CONFIG_S2IO is not set
+# CONFIG_VXGE is not set
# CONFIG_MYRI10GE is not set
# CONFIG_NETXEN_NIC is not set
# CONFIG_NIU is not set
@@ -1058,6 +1088,7 @@ CONFIG_CHELSIO_T3_DEPENDS=y
# CONFIG_BNX2X is not set
# CONFIG_QLGE is not set
# CONFIG_SFC is not set
+# CONFIG_BE2NET is not set
CONFIG_TR=y
# CONFIG_IBMOL is not set
# CONFIG_IBMLS is not set
@@ -1073,8 +1104,8 @@ CONFIG_WLAN_80211=y
# CONFIG_LIBERTAS is not set
# CONFIG_LIBERTAS_THINFIRM is not set
# CONFIG_AIRO is not set
-# CONFIG_HERMES is not set
# CONFIG_ATMEL is not set
+# CONFIG_AT76C50X_USB is not set
# CONFIG_AIRO_CS is not set
# CONFIG_PCMCIA_WL3501 is not set
# CONFIG_PRISM54 is not set
@@ -1084,21 +1115,21 @@ CONFIG_WLAN_80211=y
# CONFIG_RTL8187 is not set
# CONFIG_ADM8211 is not set
# CONFIG_MAC80211_HWSIM is not set
+# CONFIG_MWL8K is not set
# CONFIG_P54_COMMON is not set
CONFIG_ATH5K=y
# CONFIG_ATH5K_DEBUG is not set
# CONFIG_ATH9K is not set
+# CONFIG_AR9170_USB is not set
# CONFIG_IPW2100 is not set
# CONFIG_IPW2200 is not set
-# CONFIG_IWLCORE is not set
-# CONFIG_IWLWIFI_LEDS is not set
-# CONFIG_IWLAGN is not set
-# CONFIG_IWL3945 is not set
+# CONFIG_IWLWIFI is not set
# CONFIG_HOSTAP is not set
# CONFIG_B43 is not set
# CONFIG_B43LEGACY is not set
# CONFIG_ZD1211RW is not set
# CONFIG_RT2X00 is not set
+# CONFIG_HERMES is not set
#
# Enable WiMAX (Networking options) to see the WiMAX drivers
@@ -1209,6 +1240,8 @@ CONFIG_INPUT_TABLET=y
# CONFIG_TABLET_USB_KBTAB is not set
# CONFIG_TABLET_USB_WACOM is not set
CONFIG_INPUT_TOUCHSCREEN=y
+# CONFIG_TOUCHSCREEN_AD7879_I2C is not set
+# CONFIG_TOUCHSCREEN_AD7879 is not set
# CONFIG_TOUCHSCREEN_FUJITSU is not set
# CONFIG_TOUCHSCREEN_GUNZE is not set
# CONFIG_TOUCHSCREEN_ELO is not set
@@ -1303,6 +1336,7 @@ CONFIG_UNIX98_PTYS=y
# CONFIG_LEGACY_PTYS is not set
# CONFIG_IPMI_HANDLER is not set
CONFIG_HW_RANDOM=y
+# CONFIG_HW_RANDOM_TIMERIOMEM is not set
CONFIG_HW_RANDOM_INTEL=y
CONFIG_HW_RANDOM_AMD=y
CONFIG_HW_RANDOM_GEODE=y
@@ -1390,7 +1424,6 @@ CONFIG_I2C_I801=y
# CONFIG_SENSORS_PCF8574 is not set
# CONFIG_PCF8575 is not set
# CONFIG_SENSORS_PCA9539 is not set
-# CONFIG_SENSORS_PCF8591 is not set
# CONFIG_SENSORS_MAX6875 is not set
# CONFIG_SENSORS_TSL2550 is not set
# CONFIG_I2C_DEBUG_CORE is not set
@@ -1424,6 +1457,7 @@ CONFIG_HWMON=y
# CONFIG_SENSORS_ADT7475 is not set
# CONFIG_SENSORS_K8TEMP is not set
# CONFIG_SENSORS_ASB100 is not set
+# CONFIG_SENSORS_ATK0110 is not set
# CONFIG_SENSORS_ATXP1 is not set
# CONFIG_SENSORS_DS1621 is not set
# CONFIG_SENSORS_I5K_AMB is not set
@@ -1433,6 +1467,7 @@ CONFIG_HWMON=y
# CONFIG_SENSORS_FSCHER is not set
# CONFIG_SENSORS_FSCPOS is not set
# CONFIG_SENSORS_FSCHMD is not set
+# CONFIG_SENSORS_G760A is not set
# CONFIG_SENSORS_GL518SM is not set
# CONFIG_SENSORS_GL520SM is not set
# CONFIG_SENSORS_CORETEMP is not set
@@ -1448,11 +1483,14 @@ CONFIG_HWMON=y
# CONFIG_SENSORS_LM90 is not set
# CONFIG_SENSORS_LM92 is not set
# CONFIG_SENSORS_LM93 is not set
+# CONFIG_SENSORS_LTC4215 is not set
# CONFIG_SENSORS_LTC4245 is not set
+# CONFIG_SENSORS_LM95241 is not set
# CONFIG_SENSORS_MAX1619 is not set
# CONFIG_SENSORS_MAX6650 is not set
# CONFIG_SENSORS_PC87360 is not set
# CONFIG_SENSORS_PC87427 is not set
+# CONFIG_SENSORS_PCF8591 is not set
# CONFIG_SENSORS_SIS5595 is not set
# CONFIG_SENSORS_DME1737 is not set
# CONFIG_SENSORS_SMSC47M1 is not set
@@ -1643,7 +1681,6 @@ CONFIG_FB_EFI=y
# CONFIG_FB_3DFX is not set
# CONFIG_FB_VOODOO1 is not set
# CONFIG_FB_VT8623 is not set
-# CONFIG_FB_CYBLA is not set
# CONFIG_FB_TRIDENT is not set
# CONFIG_FB_ARK is not set
# CONFIG_FB_PM3 is not set
@@ -1652,6 +1689,7 @@ CONFIG_FB_EFI=y
# CONFIG_FB_VIRTUAL is not set
# CONFIG_FB_METRONOME is not set
# CONFIG_FB_MB862XX is not set
+# CONFIG_FB_BROADSHEET is not set
CONFIG_BACKLIGHT_LCD_SUPPORT=y
# CONFIG_LCD_CLASS_DEVICE is not set
CONFIG_BACKLIGHT_CLASS_DEVICE=y
@@ -1738,6 +1776,8 @@ CONFIG_SND_PCI=y
# CONFIG_SND_INDIGO is not set
# CONFIG_SND_INDIGOIO is not set
# CONFIG_SND_INDIGODJ is not set
+# CONFIG_SND_INDIGOIOX is not set
+# CONFIG_SND_INDIGODJX is not set
# CONFIG_SND_EMU10K1 is not set
# CONFIG_SND_EMU10K1X is not set
# CONFIG_SND_ENS1370 is not set
@@ -1811,15 +1851,17 @@ CONFIG_USB_HIDDEV=y
#
# Special HID drivers
#
-CONFIG_HID_COMPAT=y
CONFIG_HID_A4TECH=y
CONFIG_HID_APPLE=y
CONFIG_HID_BELKIN=y
CONFIG_HID_CHERRY=y
CONFIG_HID_CHICONY=y
CONFIG_HID_CYPRESS=y
+# CONFIG_DRAGONRISE_FF is not set
CONFIG_HID_EZKEY=y
+CONFIG_HID_KYE=y
CONFIG_HID_GYRATION=y
+CONFIG_HID_KENSINGTON=y
CONFIG_HID_LOGITECH=y
CONFIG_LOGITECH_FF=y
# CONFIG_LOGIRUMBLEPAD2_FF is not set
@@ -1885,11 +1927,11 @@ CONFIG_USB_PRINTER=y
# CONFIG_USB_TMC is not set
#
-# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may also be needed;
+# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may
#
#
-# see USB_STORAGE Help for more information
+# also be needed; see USB_STORAGE Help for more info
#
CONFIG_USB_STORAGE=y
# CONFIG_USB_STORAGE_DEBUG is not set
@@ -1931,7 +1973,6 @@ CONFIG_USB_LIBUSUAL=y
# CONFIG_USB_LED is not set
# CONFIG_USB_CYPRESS_CY7C63 is not set
# CONFIG_USB_CYTHERM is not set
-# CONFIG_USB_PHIDGET is not set
# CONFIG_USB_IDMOUSE is not set
# CONFIG_USB_FTDI_ELAN is not set
# CONFIG_USB_APPLEDISPLAY is not set
@@ -1947,6 +1988,7 @@ CONFIG_USB_LIBUSUAL=y
#
# OTG and related infrastructure
#
+# CONFIG_NOP_USB_XCEIV is not set
# CONFIG_UWB is not set
# CONFIG_MMC is not set
# CONFIG_MEMSTICK is not set
@@ -1958,8 +2000,10 @@ CONFIG_LEDS_CLASS=y
#
# CONFIG_LEDS_ALIX2 is not set
# CONFIG_LEDS_PCA9532 is not set
+# CONFIG_LEDS_LP5521 is not set
# CONFIG_LEDS_CLEVO_MAIL is not set
# CONFIG_LEDS_PCA955X is not set
+# CONFIG_LEDS_BD2802 is not set
#
# LED Triggers
@@ -1969,6 +2013,10 @@ CONFIG_LEDS_TRIGGERS=y
# CONFIG_LEDS_TRIGGER_HEARTBEAT is not set
# CONFIG_LEDS_TRIGGER_BACKLIGHT is not set
# CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set
+
+#
+# iptables trigger is under Netfilter config (LED target)
+#
# CONFIG_ACCESSIBILITY is not set
# CONFIG_INFINIBAND is not set
CONFIG_EDAC=y
@@ -2037,6 +2085,7 @@ CONFIG_DMADEVICES=y
# DMA Devices
#
# CONFIG_INTEL_IOATDMA is not set
+# CONFIG_AUXDISPLAY is not set
# CONFIG_UIO is not set
# CONFIG_STAGING is not set
CONFIG_X86_PLATFORM_DEVICES=y
@@ -2071,6 +2120,7 @@ CONFIG_DMIID=y
#
# CONFIG_EXT2_FS is not set
CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
CONFIG_EXT3_FS_XATTR=y
CONFIG_EXT3_FS_POSIX_ACL=y
CONFIG_EXT3_FS_SECURITY=y
@@ -2101,6 +2151,11 @@ CONFIG_AUTOFS4_FS=y
CONFIG_GENERIC_ACL=y
#
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
# CD-ROM/DVD Filesystems
#
CONFIG_ISO9660_FS=y
@@ -2151,6 +2206,7 @@ CONFIG_MISC_FILESYSTEMS=y
# CONFIG_ROMFS_FS is not set
# CONFIG_SYSV_FS is not set
# CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
CONFIG_NETWORK_FILESYSTEMS=y
CONFIG_NFS_FS=y
CONFIG_NFS_V3=y
@@ -2164,7 +2220,6 @@ CONFIG_NFS_ACL_SUPPORT=y
CONFIG_NFS_COMMON=y
CONFIG_SUNRPC=y
CONFIG_SUNRPC_GSS=y
-# CONFIG_SUNRPC_REGISTER_V4 is not set
CONFIG_RPCSEC_GSS_KRB5=y
# CONFIG_RPCSEC_GSS_SPKM3 is not set
# CONFIG_SMB_FS is not set
@@ -2251,6 +2306,7 @@ CONFIG_DEBUG_FS=y
CONFIG_DEBUG_KERNEL=y
# CONFIG_DEBUG_SHIRQ is not set
# CONFIG_DETECT_SOFTLOCKUP is not set
+# CONFIG_DETECT_HUNG_TASK is not set
# CONFIG_SCHED_DEBUG is not set
CONFIG_SCHEDSTATS=y
CONFIG_TIMER_STATS=y
@@ -2266,6 +2322,7 @@ CONFIG_TIMER_STATS=y
# CONFIG_LOCK_STAT is not set
# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
+CONFIG_STACKTRACE=y
# CONFIG_DEBUG_KOBJECT is not set
# CONFIG_DEBUG_HIGHMEM is not set
CONFIG_DEBUG_BUGVERBOSE=y
@@ -2289,13 +2346,19 @@ CONFIG_FRAME_POINTER=y
# CONFIG_FAULT_INJECTION is not set
# CONFIG_LATENCYTOP is not set
CONFIG_SYSCTL_SYSCALL_CHECK=y
+# CONFIG_DEBUG_PAGEALLOC is not set
CONFIG_USER_STACKTRACE_SUPPORT=y
+CONFIG_NOP_TRACER=y
CONFIG_HAVE_FUNCTION_TRACER=y
CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y
CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y
CONFIG_HAVE_DYNAMIC_FTRACE=y
CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
CONFIG_HAVE_HW_BRANCH_TRACER=y
+CONFIG_HAVE_FTRACE_SYSCALLS=y
+CONFIG_RING_BUFFER=y
+CONFIG_TRACING=y
+CONFIG_TRACING_SUPPORT=y
#
# Tracers
@@ -2305,13 +2368,21 @@ CONFIG_HAVE_HW_BRANCH_TRACER=y
# CONFIG_SYSPROF_TRACER is not set
# CONFIG_SCHED_TRACER is not set
# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_FTRACE_SYSCALLS is not set
# CONFIG_BOOT_TRACER is not set
# CONFIG_TRACE_BRANCH_PROFILING is not set
# CONFIG_POWER_TRACER is not set
# CONFIG_STACK_TRACER is not set
# CONFIG_HW_BRANCH_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+CONFIG_BLK_DEV_IO_TRACE=y
+# CONFIG_FTRACE_STARTUP_TEST is not set
+# CONFIG_MMIOTRACE is not set
CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
-# CONFIG_DYNAMIC_PRINTK_DEBUG is not set
+# CONFIG_DYNAMIC_DEBUG is not set
+# CONFIG_DMA_API_DEBUG is not set
# CONFIG_SAMPLES is not set
CONFIG_HAVE_ARCH_KGDB=y
# CONFIG_KGDB is not set
@@ -2321,7 +2392,6 @@ CONFIG_EARLY_PRINTK=y
CONFIG_EARLY_PRINTK_DBGP=y
CONFIG_DEBUG_STACKOVERFLOW=y
CONFIG_DEBUG_STACK_USAGE=y
-# CONFIG_DEBUG_PAGEALLOC is not set
# CONFIG_DEBUG_PER_CPU_MAPS is not set
# CONFIG_X86_PTDUMP is not set
CONFIG_DEBUG_RODATA=y
@@ -2329,7 +2399,7 @@ CONFIG_DEBUG_RODATA=y
CONFIG_DEBUG_NX_TEST=m
# CONFIG_4KSTACKS is not set
CONFIG_DOUBLEFAULT=y
-# CONFIG_MMIOTRACE is not set
+CONFIG_HAVE_MMIOTRACE_SUPPORT=y
CONFIG_IO_DELAY_TYPE_0X80=0
CONFIG_IO_DELAY_TYPE_0XED=1
CONFIG_IO_DELAY_TYPE_UDELAY=2
@@ -2365,6 +2435,8 @@ CONFIG_SECURITY_SELINUX_AVC_STATS=y
CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1
# CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX is not set
# CONFIG_SECURITY_SMACK is not set
+# CONFIG_SECURITY_TOMOYO is not set
+# CONFIG_IMA is not set
CONFIG_CRYPTO=y
#
@@ -2380,10 +2452,12 @@ CONFIG_CRYPTO_BLKCIPHER2=y
CONFIG_CRYPTO_HASH=y
CONFIG_CRYPTO_HASH2=y
CONFIG_CRYPTO_RNG2=y
+CONFIG_CRYPTO_PCOMP=y
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_MANAGER2=y
# CONFIG_CRYPTO_GF128MUL is not set
# CONFIG_CRYPTO_NULL is not set
+CONFIG_CRYPTO_WORKQUEUE=y
# CONFIG_CRYPTO_CRYPTD is not set
CONFIG_CRYPTO_AUTHENC=y
# CONFIG_CRYPTO_TEST is not set
@@ -2456,6 +2530,7 @@ CONFIG_CRYPTO_DES=y
# Compression
#
# CONFIG_CRYPTO_DEFLATE is not set
+# CONFIG_CRYPTO_ZLIB is not set
# CONFIG_CRYPTO_LZO is not set
#
@@ -2467,11 +2542,13 @@ CONFIG_CRYPTO_HW=y
# CONFIG_CRYPTO_DEV_GEODE is not set
# CONFIG_CRYPTO_DEV_HIFN_795X is not set
CONFIG_HAVE_KVM=y
+CONFIG_HAVE_KVM_IRQCHIP=y
CONFIG_VIRTUALIZATION=y
# CONFIG_KVM is not set
# CONFIG_LGUEST is not set
# CONFIG_VIRTIO_PCI is not set
# CONFIG_VIRTIO_BALLOON is not set
+CONFIG_BINARY_PRINTF=y
#
# Library routines
@@ -2489,7 +2566,10 @@ CONFIG_CRC32=y
# CONFIG_LIBCRC32C is not set
CONFIG_AUDIT_GENERIC=y
CONFIG_ZLIB_INFLATE=y
-CONFIG_PLIST=y
+CONFIG_DECOMPRESS_GZIP=y
+CONFIG_DECOMPRESS_BZIP2=y
+CONFIG_DECOMPRESS_LZMA=y
CONFIG_HAS_IOMEM=y
CONFIG_HAS_IOPORT=y
CONFIG_HAS_DMA=y
+CONFIG_NLATTR=y
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 9fe5d212ab4..cee1dd2e69b 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -1,12 +1,13 @@
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29-rc4
-# Tue Feb 24 15:44:16 2009
+# Linux kernel version: 2.6.30-rc2
+# Mon May 11 16:22:00 2009
#
CONFIG_64BIT=y
# CONFIG_X86_32 is not set
CONFIG_X86_64=y
CONFIG_X86=y
+CONFIG_OUTPUT_FORMAT="elf64-x86-64"
CONFIG_ARCH_DEFCONFIG="arch/x86/configs/x86_64_defconfig"
CONFIG_GENERIC_TIME=y
CONFIG_GENERIC_CMOS_UPDATE=y
@@ -34,6 +35,7 @@ CONFIG_ARCH_HAS_CPU_RELAX=y
CONFIG_ARCH_HAS_DEFAULT_IDLE=y
CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y
CONFIG_HAVE_SETUP_PER_CPU_AREA=y
+CONFIG_HAVE_DYNAMIC_PER_CPU_AREA=y
CONFIG_HAVE_CPUMASK_OF_CPU_MAP=y
CONFIG_ARCH_HIBERNATION_POSSIBLE=y
CONFIG_ARCH_SUSPEND_POSSIBLE=y
@@ -41,14 +43,14 @@ CONFIG_ZONE_DMA32=y
CONFIG_ARCH_POPULATES_NODE_MAP=y
CONFIG_AUDIT_ARCH=y
CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y
+CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y
CONFIG_GENERIC_HARDIRQS=y
+CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
CONFIG_GENERIC_IRQ_PROBE=y
CONFIG_GENERIC_PENDING_IRQ=y
-CONFIG_X86_SMP=y
CONFIG_USE_GENERIC_SMP_HELPERS=y
CONFIG_X86_64_SMP=y
CONFIG_X86_HT=y
-CONFIG_X86_BIOS_REBOOT=y
CONFIG_X86_TRAMPOLINE=y
# CONFIG_KTIME_SCALAR is not set
CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
@@ -61,10 +63,17 @@ CONFIG_LOCK_KERNEL=y
CONFIG_INIT_ENV_ARG_LIMIT=32
CONFIG_LOCALVERSION=""
# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_HAVE_KERNEL_GZIP=y
+CONFIG_HAVE_KERNEL_BZIP2=y
+CONFIG_HAVE_KERNEL_LZMA=y
+CONFIG_KERNEL_GZIP=y
+# CONFIG_KERNEL_BZIP2 is not set
+# CONFIG_KERNEL_LZMA is not set
CONFIG_SWAP=y
CONFIG_SYSVIPC=y
CONFIG_SYSVIPC_SYSCTL=y
CONFIG_POSIX_MQUEUE=y
+CONFIG_POSIX_MQUEUE_SYSCTL=y
CONFIG_BSD_PROCESS_ACCT=y
# CONFIG_BSD_PROCESS_ACCT_V3 is not set
CONFIG_TASKSTATS=y
@@ -114,23 +123,26 @@ CONFIG_PID_NS=y
CONFIG_NET_NS=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_INITRAMFS_SOURCE=""
+CONFIG_RD_GZIP=y
+CONFIG_RD_BZIP2=y
+CONFIG_RD_LZMA=y
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
# CONFIG_EMBEDDED is not set
CONFIG_UID16=y
CONFIG_SYSCTL_SYSCALL=y
CONFIG_KALLSYMS=y
CONFIG_KALLSYMS_ALL=y
CONFIG_KALLSYMS_EXTRA_PASS=y
+# CONFIG_STRIP_ASM_SYMS is not set
CONFIG_HOTPLUG=y
CONFIG_PRINTK=y
CONFIG_BUG=y
CONFIG_ELF_CORE=y
CONFIG_PCSPKR_PLATFORM=y
-# CONFIG_COMPAT_BRK is not set
CONFIG_BASE_FULL=y
CONFIG_FUTEX=y
-CONFIG_ANON_INODES=y
CONFIG_EPOLL=y
CONFIG_SIGNALFD=y
CONFIG_TIMERFD=y
@@ -140,6 +152,7 @@ CONFIG_AIO=y
CONFIG_VM_EVENT_COUNTERS=y
CONFIG_PCI_QUIRKS=y
CONFIG_SLUB_DEBUG=y
+# CONFIG_COMPAT_BRK is not set
# CONFIG_SLAB is not set
CONFIG_SLUB=y
# CONFIG_SLOB is not set
@@ -155,6 +168,8 @@ CONFIG_HAVE_IOREMAP_PROT=y
CONFIG_HAVE_KPROBES=y
CONFIG_HAVE_KRETPROBES=y
CONFIG_HAVE_ARCH_TRACEHOOK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
# CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
CONFIG_SLABINFO=y
CONFIG_RT_MUTEXES=y
@@ -167,7 +182,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y
# CONFIG_MODULE_SRCVERSION_ALL is not set
CONFIG_STOP_MACHINE=y
CONFIG_BLOCK=y
-CONFIG_BLK_DEV_IO_TRACE=y
CONFIG_BLK_DEV_BSG=y
# CONFIG_BLK_DEV_INTEGRITY is not set
CONFIG_BLOCK_COMPAT=y
@@ -195,12 +209,10 @@ CONFIG_HIGH_RES_TIMERS=y
CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
CONFIG_SMP=y
CONFIG_SPARSE_IRQ=y
-# CONFIG_NUMA_MIGRATE_IRQ_DESC is not set
-CONFIG_X86_FIND_SMP_CONFIG=y
CONFIG_X86_MPPARSE=y
-# CONFIG_X86_ELAN is not set
-# CONFIG_X86_GENERICARCH is not set
+CONFIG_X86_EXTENDED_PLATFORM=y
# CONFIG_X86_VSMP is not set
+# CONFIG_X86_UV is not set
CONFIG_SCHED_OMIT_FRAME_POINTER=y
# CONFIG_PARAVIRT_GUEST is not set
# CONFIG_MEMTEST is not set
@@ -230,10 +242,10 @@ CONFIG_SCHED_OMIT_FRAME_POINTER=y
# CONFIG_MCORE2 is not set
CONFIG_GENERIC_CPU=y
CONFIG_X86_CPU=y
-CONFIG_X86_L1_CACHE_BYTES=128
-CONFIG_X86_INTERNODE_CACHE_BYTES=128
+CONFIG_X86_L1_CACHE_BYTES=64
+CONFIG_X86_INTERNODE_CACHE_BYTES=64
CONFIG_X86_CMPXCHG=y
-CONFIG_X86_L1_CACHE_SHIFT=7
+CONFIG_X86_L1_CACHE_SHIFT=6
CONFIG_X86_WP_WORKS_OK=y
CONFIG_X86_TSC=y
CONFIG_X86_CMPXCHG64=y
@@ -242,7 +254,7 @@ CONFIG_X86_MINIMUM_CPU_FAMILY=64
CONFIG_X86_DEBUGCTLMSR=y
CONFIG_CPU_SUP_INTEL=y
CONFIG_CPU_SUP_AMD=y
-CONFIG_CPU_SUP_CENTAUR_64=y
+CONFIG_CPU_SUP_CENTAUR=y
CONFIG_X86_DS=y
CONFIG_X86_PTRACE_BTS=y
CONFIG_HPET_TIMER=y
@@ -269,6 +281,7 @@ CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y
CONFIG_X86_MCE=y
CONFIG_X86_MCE_INTEL=y
CONFIG_X86_MCE_AMD=y
+CONFIG_X86_MCE_THRESHOLD=y
# CONFIG_I8K is not set
CONFIG_MICROCODE=y
CONFIG_MICROCODE_INTEL=y
@@ -276,6 +289,7 @@ CONFIG_MICROCODE_AMD=y
CONFIG_MICROCODE_OLD_INTERFACE=y
CONFIG_X86_MSR=y
CONFIG_X86_CPUID=y
+# CONFIG_X86_CPU_DEBUG is not set
CONFIG_ARCH_PHYS_ADDR_T_64BIT=y
CONFIG_DIRECT_GBPAGES=y
CONFIG_NUMA=y
@@ -309,6 +323,8 @@ CONFIG_ZONE_DMA_FLAG=1
CONFIG_BOUNCE=y
CONFIG_VIRT_TO_BUS=y
CONFIG_UNEVICTABLE_LRU=y
+CONFIG_HAVE_MLOCK=y
+CONFIG_HAVE_MLOCKED_PAGE_BIT=y
CONFIG_X86_CHECK_BIOS_CORRUPTION=y
CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK=y
CONFIG_X86_RESERVE_LOW_64K=y
@@ -317,6 +333,7 @@ CONFIG_MTRR=y
CONFIG_X86_PAT=y
CONFIG_EFI=y
CONFIG_SECCOMP=y
+# CONFIG_CC_STACKPROTECTOR is not set
# CONFIG_HZ_100 is not set
# CONFIG_HZ_250 is not set
# CONFIG_HZ_300 is not set
@@ -325,9 +342,10 @@ CONFIG_HZ=1000
CONFIG_SCHED_HRTICK=y
CONFIG_KEXEC=y
CONFIG_CRASH_DUMP=y
+# CONFIG_KEXEC_JUMP is not set
CONFIG_PHYSICAL_START=0x1000000
-# CONFIG_RELOCATABLE is not set
-CONFIG_PHYSICAL_ALIGN=0x200000
+CONFIG_RELOCATABLE=y
+CONFIG_PHYSICAL_ALIGN=0x1000000
CONFIG_HOTPLUG_CPU=y
# CONFIG_COMPAT_VDSO is not set
# CONFIG_CMDLINE_BOOL is not set
@@ -370,7 +388,6 @@ CONFIG_ACPI_NUMA=y
CONFIG_ACPI_BLACKLIST_YEAR=0
# CONFIG_ACPI_DEBUG is not set
# CONFIG_ACPI_PCI_SLOT is not set
-CONFIG_ACPI_SYSTEM=y
CONFIG_X86_PM_TIMER=y
CONFIG_ACPI_CONTAINER=y
# CONFIG_ACPI_SBS is not set
@@ -436,6 +453,7 @@ CONFIG_PCI_MSI=y
# CONFIG_PCI_DEBUG is not set
# CONFIG_PCI_STUB is not set
CONFIG_HT_IRQ=y
+# CONFIG_PCI_IOV is not set
CONFIG_ISA_DMA_API=y
CONFIG_K8_NB=y
CONFIG_PCCARD=y
@@ -481,7 +499,6 @@ CONFIG_NET=y
#
# Networking options
#
-CONFIG_COMPAT_NET_DEV_OPS=y
CONFIG_PACKET=y
CONFIG_PACKET_MMAP=y
CONFIG_UNIX=y
@@ -639,6 +656,7 @@ CONFIG_LLC=y
# CONFIG_LAPB is not set
# CONFIG_ECONET is not set
# CONFIG_WAN_ROUTER is not set
+# CONFIG_PHONET is not set
CONFIG_NET_SCHED=y
#
@@ -696,6 +714,7 @@ CONFIG_NET_SCH_FIFO=y
#
# CONFIG_NET_PKTGEN is not set
# CONFIG_NET_TCPPROBE is not set
+# CONFIG_NET_DROP_MONITOR is not set
CONFIG_HAMRADIO=y
#
@@ -706,12 +725,10 @@ CONFIG_HAMRADIO=y
# CONFIG_IRDA is not set
# CONFIG_BT is not set
# CONFIG_AF_RXRPC is not set
-# CONFIG_PHONET is not set
CONFIG_FIB_RULES=y
CONFIG_WIRELESS=y
CONFIG_CFG80211=y
# CONFIG_CFG80211_REG_DEBUG is not set
-CONFIG_NL80211=y
CONFIG_WIRELESS_OLD_REGULATORY=y
CONFIG_WIRELESS_EXT=y
CONFIG_WIRELESS_EXT_SYSFS=y
@@ -788,9 +805,8 @@ CONFIG_MISC_DEVICES=y
# CONFIG_TIFM_CORE is not set
# CONFIG_ICS932S401 is not set
# CONFIG_ENCLOSURE_SERVICES is not set
-# CONFIG_SGI_XP is not set
# CONFIG_HP_ILO is not set
-# CONFIG_SGI_GRU is not set
+# CONFIG_ISL29003 is not set
# CONFIG_C2PORT is not set
#
@@ -844,6 +860,7 @@ CONFIG_SCSI_SPI_ATTRS=y
# CONFIG_SCSI_LOWLEVEL is not set
# CONFIG_SCSI_LOWLEVEL_PCMCIA is not set
# CONFIG_SCSI_DH is not set
+# CONFIG_SCSI_OSD_INITIATOR is not set
CONFIG_ATA=y
# CONFIG_ATA_NONSTANDARD is not set
CONFIG_ATA_ACPI=y
@@ -940,6 +957,7 @@ CONFIG_DM_ZERO=y
CONFIG_MACINTOSH_DRIVERS=y
CONFIG_MAC_EMUMOUSEBTN=y
CONFIG_NETDEVICES=y
+CONFIG_COMPAT_NET_DEV_OPS=y
# CONFIG_IFB is not set
# CONFIG_DUMMY is not set
# CONFIG_BONDING is not set
@@ -977,6 +995,8 @@ CONFIG_MII=y
CONFIG_NET_VENDOR_3COM=y
# CONFIG_VORTEX is not set
# CONFIG_TYPHOON is not set
+# CONFIG_ETHOC is not set
+# CONFIG_DNET is not set
CONFIG_NET_TULIP=y
# CONFIG_DE2104X is not set
# CONFIG_TULIP is not set
@@ -1026,6 +1046,7 @@ CONFIG_E1000=y
# CONFIG_E1000E is not set
# CONFIG_IP1000 is not set
# CONFIG_IGB is not set
+# CONFIG_IGBVF is not set
# CONFIG_NS83820 is not set
# CONFIG_HAMACHI is not set
# CONFIG_YELLOWFIN is not set
@@ -1040,6 +1061,7 @@ CONFIG_TIGON3=y
# CONFIG_QLA3XXX is not set
# CONFIG_ATL1 is not set
# CONFIG_ATL1E is not set
+# CONFIG_ATL1C is not set
# CONFIG_JME is not set
CONFIG_NETDEV_10000=y
# CONFIG_CHELSIO_T1 is not set
@@ -1049,6 +1071,7 @@ CONFIG_CHELSIO_T3_DEPENDS=y
# CONFIG_IXGBE is not set
# CONFIG_IXGB is not set
# CONFIG_S2IO is not set
+# CONFIG_VXGE is not set
# CONFIG_MYRI10GE is not set
# CONFIG_NETXEN_NIC is not set
# CONFIG_NIU is not set
@@ -1058,6 +1081,7 @@ CONFIG_CHELSIO_T3_DEPENDS=y
# CONFIG_BNX2X is not set
# CONFIG_QLGE is not set
# CONFIG_SFC is not set
+# CONFIG_BE2NET is not set
CONFIG_TR=y
# CONFIG_IBMOL is not set
# CONFIG_3C359 is not set
@@ -1072,8 +1096,8 @@ CONFIG_WLAN_80211=y
# CONFIG_LIBERTAS is not set
# CONFIG_LIBERTAS_THINFIRM is not set
# CONFIG_AIRO is not set
-# CONFIG_HERMES is not set
# CONFIG_ATMEL is not set
+# CONFIG_AT76C50X_USB is not set
# CONFIG_AIRO_CS is not set
# CONFIG_PCMCIA_WL3501 is not set
# CONFIG_PRISM54 is not set
@@ -1083,21 +1107,21 @@ CONFIG_WLAN_80211=y
# CONFIG_RTL8187 is not set
# CONFIG_ADM8211 is not set
# CONFIG_MAC80211_HWSIM is not set
+# CONFIG_MWL8K is not set
# CONFIG_P54_COMMON is not set
CONFIG_ATH5K=y
# CONFIG_ATH5K_DEBUG is not set
# CONFIG_ATH9K is not set
+# CONFIG_AR9170_USB is not set
# CONFIG_IPW2100 is not set
# CONFIG_IPW2200 is not set
-# CONFIG_IWLCORE is not set
-# CONFIG_IWLWIFI_LEDS is not set
-# CONFIG_IWLAGN is not set
-# CONFIG_IWL3945 is not set
+# CONFIG_IWLWIFI is not set
# CONFIG_HOSTAP is not set
# CONFIG_B43 is not set
# CONFIG_B43LEGACY is not set
# CONFIG_ZD1211RW is not set
# CONFIG_RT2X00 is not set
+# CONFIG_HERMES is not set
#
# Enable WiMAX (Networking options) to see the WiMAX drivers
@@ -1208,6 +1232,8 @@ CONFIG_INPUT_TABLET=y
# CONFIG_TABLET_USB_KBTAB is not set
# CONFIG_TABLET_USB_WACOM is not set
CONFIG_INPUT_TOUCHSCREEN=y
+# CONFIG_TOUCHSCREEN_AD7879_I2C is not set
+# CONFIG_TOUCHSCREEN_AD7879 is not set
# CONFIG_TOUCHSCREEN_FUJITSU is not set
# CONFIG_TOUCHSCREEN_GUNZE is not set
# CONFIG_TOUCHSCREEN_ELO is not set
@@ -1301,6 +1327,7 @@ CONFIG_UNIX98_PTYS=y
# CONFIG_LEGACY_PTYS is not set
# CONFIG_IPMI_HANDLER is not set
CONFIG_HW_RANDOM=y
+# CONFIG_HW_RANDOM_TIMERIOMEM is not set
# CONFIG_HW_RANDOM_INTEL is not set
# CONFIG_HW_RANDOM_AMD is not set
CONFIG_NVRAM=y
@@ -1382,7 +1409,6 @@ CONFIG_I2C_I801=y
# CONFIG_SENSORS_PCF8574 is not set
# CONFIG_PCF8575 is not set
# CONFIG_SENSORS_PCA9539 is not set
-# CONFIG_SENSORS_PCF8591 is not set
# CONFIG_SENSORS_MAX6875 is not set
# CONFIG_SENSORS_TSL2550 is not set
# CONFIG_I2C_DEBUG_CORE is not set
@@ -1416,6 +1442,7 @@ CONFIG_HWMON=y
# CONFIG_SENSORS_ADT7475 is not set
# CONFIG_SENSORS_K8TEMP is not set
# CONFIG_SENSORS_ASB100 is not set
+# CONFIG_SENSORS_ATK0110 is not set
# CONFIG_SENSORS_ATXP1 is not set
# CONFIG_SENSORS_DS1621 is not set
# CONFIG_SENSORS_I5K_AMB is not set
@@ -1425,6 +1452,7 @@ CONFIG_HWMON=y
# CONFIG_SENSORS_FSCHER is not set
# CONFIG_SENSORS_FSCPOS is not set
# CONFIG_SENSORS_FSCHMD is not set
+# CONFIG_SENSORS_G760A is not set
# CONFIG_SENSORS_GL518SM is not set
# CONFIG_SENSORS_GL520SM is not set
# CONFIG_SENSORS_CORETEMP is not set
@@ -1440,11 +1468,14 @@ CONFIG_HWMON=y
# CONFIG_SENSORS_LM90 is not set
# CONFIG_SENSORS_LM92 is not set
# CONFIG_SENSORS_LM93 is not set
+# CONFIG_SENSORS_LTC4215 is not set
# CONFIG_SENSORS_LTC4245 is not set
+# CONFIG_SENSORS_LM95241 is not set
# CONFIG_SENSORS_MAX1619 is not set
# CONFIG_SENSORS_MAX6650 is not set
# CONFIG_SENSORS_PC87360 is not set
# CONFIG_SENSORS_PC87427 is not set
+# CONFIG_SENSORS_PCF8591 is not set
# CONFIG_SENSORS_SIS5595 is not set
# CONFIG_SENSORS_DME1737 is not set
# CONFIG_SENSORS_SMSC47M1 is not set
@@ -1635,6 +1666,7 @@ CONFIG_FB_EFI=y
# CONFIG_FB_VIRTUAL is not set
# CONFIG_FB_METRONOME is not set
# CONFIG_FB_MB862XX is not set
+# CONFIG_FB_BROADSHEET is not set
CONFIG_BACKLIGHT_LCD_SUPPORT=y
# CONFIG_LCD_CLASS_DEVICE is not set
CONFIG_BACKLIGHT_CLASS_DEVICE=y
@@ -1720,6 +1752,8 @@ CONFIG_SND_PCI=y
# CONFIG_SND_INDIGO is not set
# CONFIG_SND_INDIGOIO is not set
# CONFIG_SND_INDIGODJ is not set
+# CONFIG_SND_INDIGOIOX is not set
+# CONFIG_SND_INDIGODJX is not set
# CONFIG_SND_EMU10K1 is not set
# CONFIG_SND_EMU10K1X is not set
# CONFIG_SND_ENS1370 is not set
@@ -1792,15 +1826,17 @@ CONFIG_USB_HIDDEV=y
#
# Special HID drivers
#
-CONFIG_HID_COMPAT=y
CONFIG_HID_A4TECH=y
CONFIG_HID_APPLE=y
CONFIG_HID_BELKIN=y
CONFIG_HID_CHERRY=y
CONFIG_HID_CHICONY=y
CONFIG_HID_CYPRESS=y
+# CONFIG_DRAGONRISE_FF is not set
CONFIG_HID_EZKEY=y
+CONFIG_HID_KYE=y
CONFIG_HID_GYRATION=y
+CONFIG_HID_KENSINGTON=y
CONFIG_HID_LOGITECH=y
CONFIG_LOGITECH_FF=y
# CONFIG_LOGIRUMBLEPAD2_FF is not set
@@ -1866,11 +1902,11 @@ CONFIG_USB_PRINTER=y
# CONFIG_USB_TMC is not set
#
-# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may also be needed;
+# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may
#
#
-# see USB_STORAGE Help for more information
+# also be needed; see USB_STORAGE Help for more info
#
CONFIG_USB_STORAGE=y
# CONFIG_USB_STORAGE_DEBUG is not set
@@ -1912,7 +1948,6 @@ CONFIG_USB_LIBUSUAL=y
# CONFIG_USB_LED is not set
# CONFIG_USB_CYPRESS_CY7C63 is not set
# CONFIG_USB_CYTHERM is not set
-# CONFIG_USB_PHIDGET is not set
# CONFIG_USB_IDMOUSE is not set
# CONFIG_USB_FTDI_ELAN is not set
# CONFIG_USB_APPLEDISPLAY is not set
@@ -1928,6 +1963,7 @@ CONFIG_USB_LIBUSUAL=y
#
# OTG and related infrastructure
#
+# CONFIG_NOP_USB_XCEIV is not set
# CONFIG_UWB is not set
# CONFIG_MMC is not set
# CONFIG_MEMSTICK is not set
@@ -1939,8 +1975,10 @@ CONFIG_LEDS_CLASS=y
#
# CONFIG_LEDS_ALIX2 is not set
# CONFIG_LEDS_PCA9532 is not set
+# CONFIG_LEDS_LP5521 is not set
# CONFIG_LEDS_CLEVO_MAIL is not set
# CONFIG_LEDS_PCA955X is not set
+# CONFIG_LEDS_BD2802 is not set
#
# LED Triggers
@@ -1950,6 +1988,10 @@ CONFIG_LEDS_TRIGGERS=y
# CONFIG_LEDS_TRIGGER_HEARTBEAT is not set
# CONFIG_LEDS_TRIGGER_BACKLIGHT is not set
# CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set
+
+#
+# iptables trigger is under Netfilter config (LED target)
+#
# CONFIG_ACCESSIBILITY is not set
# CONFIG_INFINIBAND is not set
CONFIG_EDAC=y
@@ -2018,6 +2060,7 @@ CONFIG_DMADEVICES=y
# DMA Devices
#
# CONFIG_INTEL_IOATDMA is not set
+# CONFIG_AUXDISPLAY is not set
# CONFIG_UIO is not set
# CONFIG_STAGING is not set
CONFIG_X86_PLATFORM_DEVICES=y
@@ -2051,6 +2094,7 @@ CONFIG_DMIID=y
#
# CONFIG_EXT2_FS is not set
CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
CONFIG_EXT3_FS_XATTR=y
CONFIG_EXT3_FS_POSIX_ACL=y
CONFIG_EXT3_FS_SECURITY=y
@@ -2082,6 +2126,11 @@ CONFIG_AUTOFS4_FS=y
CONFIG_GENERIC_ACL=y
#
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
# CD-ROM/DVD Filesystems
#
CONFIG_ISO9660_FS=y
@@ -2132,6 +2181,7 @@ CONFIG_MISC_FILESYSTEMS=y
# CONFIG_ROMFS_FS is not set
# CONFIG_SYSV_FS is not set
# CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
CONFIG_NETWORK_FILESYSTEMS=y
CONFIG_NFS_FS=y
CONFIG_NFS_V3=y
@@ -2145,7 +2195,6 @@ CONFIG_NFS_ACL_SUPPORT=y
CONFIG_NFS_COMMON=y
CONFIG_SUNRPC=y
CONFIG_SUNRPC_GSS=y
-# CONFIG_SUNRPC_REGISTER_V4 is not set
CONFIG_RPCSEC_GSS_KRB5=y
# CONFIG_RPCSEC_GSS_SPKM3 is not set
# CONFIG_SMB_FS is not set
@@ -2232,6 +2281,7 @@ CONFIG_DEBUG_FS=y
CONFIG_DEBUG_KERNEL=y
# CONFIG_DEBUG_SHIRQ is not set
# CONFIG_DETECT_SOFTLOCKUP is not set
+# CONFIG_DETECT_HUNG_TASK is not set
# CONFIG_SCHED_DEBUG is not set
CONFIG_SCHEDSTATS=y
CONFIG_TIMER_STATS=y
@@ -2247,6 +2297,7 @@ CONFIG_TIMER_STATS=y
# CONFIG_LOCK_STAT is not set
# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
+CONFIG_STACKTRACE=y
# CONFIG_DEBUG_KOBJECT is not set
CONFIG_DEBUG_BUGVERBOSE=y
# CONFIG_DEBUG_INFO is not set
@@ -2269,13 +2320,19 @@ CONFIG_FRAME_POINTER=y
# CONFIG_FAULT_INJECTION is not set
# CONFIG_LATENCYTOP is not set
CONFIG_SYSCTL_SYSCALL_CHECK=y
+# CONFIG_DEBUG_PAGEALLOC is not set
CONFIG_USER_STACKTRACE_SUPPORT=y
+CONFIG_NOP_TRACER=y
CONFIG_HAVE_FUNCTION_TRACER=y
CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y
CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y
CONFIG_HAVE_DYNAMIC_FTRACE=y
CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
CONFIG_HAVE_HW_BRANCH_TRACER=y
+CONFIG_HAVE_FTRACE_SYSCALLS=y
+CONFIG_RING_BUFFER=y
+CONFIG_TRACING=y
+CONFIG_TRACING_SUPPORT=y
#
# Tracers
@@ -2285,13 +2342,21 @@ CONFIG_HAVE_HW_BRANCH_TRACER=y
# CONFIG_SYSPROF_TRACER is not set
# CONFIG_SCHED_TRACER is not set
# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_FTRACE_SYSCALLS is not set
# CONFIG_BOOT_TRACER is not set
# CONFIG_TRACE_BRANCH_PROFILING is not set
# CONFIG_POWER_TRACER is not set
# CONFIG_STACK_TRACER is not set
# CONFIG_HW_BRANCH_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+CONFIG_BLK_DEV_IO_TRACE=y
+# CONFIG_FTRACE_STARTUP_TEST is not set
+# CONFIG_MMIOTRACE is not set
CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
-# CONFIG_DYNAMIC_PRINTK_DEBUG is not set
+# CONFIG_DYNAMIC_DEBUG is not set
+# CONFIG_DMA_API_DEBUG is not set
# CONFIG_SAMPLES is not set
CONFIG_HAVE_ARCH_KGDB=y
# CONFIG_KGDB is not set
@@ -2301,14 +2366,13 @@ CONFIG_EARLY_PRINTK=y
CONFIG_EARLY_PRINTK_DBGP=y
CONFIG_DEBUG_STACKOVERFLOW=y
CONFIG_DEBUG_STACK_USAGE=y
-# CONFIG_DEBUG_PAGEALLOC is not set
# CONFIG_DEBUG_PER_CPU_MAPS is not set
# CONFIG_X86_PTDUMP is not set
CONFIG_DEBUG_RODATA=y
# CONFIG_DEBUG_RODATA_TEST is not set
CONFIG_DEBUG_NX_TEST=m
# CONFIG_IOMMU_DEBUG is not set
-# CONFIG_MMIOTRACE is not set
+CONFIG_HAVE_MMIOTRACE_SUPPORT=y
CONFIG_IO_DELAY_TYPE_0X80=0
CONFIG_IO_DELAY_TYPE_0XED=1
CONFIG_IO_DELAY_TYPE_UDELAY=2
@@ -2344,6 +2408,8 @@ CONFIG_SECURITY_SELINUX_AVC_STATS=y
CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1
# CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX is not set
# CONFIG_SECURITY_SMACK is not set
+# CONFIG_SECURITY_TOMOYO is not set
+# CONFIG_IMA is not set
CONFIG_CRYPTO=y
#
@@ -2359,10 +2425,12 @@ CONFIG_CRYPTO_BLKCIPHER2=y
CONFIG_CRYPTO_HASH=y
CONFIG_CRYPTO_HASH2=y
CONFIG_CRYPTO_RNG2=y
+CONFIG_CRYPTO_PCOMP=y
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_MANAGER2=y
# CONFIG_CRYPTO_GF128MUL is not set
# CONFIG_CRYPTO_NULL is not set
+CONFIG_CRYPTO_WORKQUEUE=y
# CONFIG_CRYPTO_CRYPTD is not set
CONFIG_CRYPTO_AUTHENC=y
# CONFIG_CRYPTO_TEST is not set
@@ -2414,6 +2482,7 @@ CONFIG_CRYPTO_SHA1=y
#
CONFIG_CRYPTO_AES=y
# CONFIG_CRYPTO_AES_X86_64 is not set
+# CONFIG_CRYPTO_AES_NI_INTEL is not set
# CONFIG_CRYPTO_ANUBIS is not set
CONFIG_CRYPTO_ARC4=y
# CONFIG_CRYPTO_BLOWFISH is not set
@@ -2435,6 +2504,7 @@ CONFIG_CRYPTO_DES=y
# Compression
#
# CONFIG_CRYPTO_DEFLATE is not set
+# CONFIG_CRYPTO_ZLIB is not set
# CONFIG_CRYPTO_LZO is not set
#
@@ -2444,10 +2514,12 @@ CONFIG_CRYPTO_DES=y
CONFIG_CRYPTO_HW=y
# CONFIG_CRYPTO_DEV_HIFN_795X is not set
CONFIG_HAVE_KVM=y
+CONFIG_HAVE_KVM_IRQCHIP=y
CONFIG_VIRTUALIZATION=y
# CONFIG_KVM is not set
# CONFIG_VIRTIO_PCI is not set
# CONFIG_VIRTIO_BALLOON is not set
+CONFIG_BINARY_PRINTF=y
#
# Library routines
@@ -2464,7 +2536,10 @@ CONFIG_CRC32=y
# CONFIG_CRC7 is not set
# CONFIG_LIBCRC32C is not set
CONFIG_ZLIB_INFLATE=y
-CONFIG_PLIST=y
+CONFIG_DECOMPRESS_GZIP=y
+CONFIG_DECOMPRESS_BZIP2=y
+CONFIG_DECOMPRESS_LZMA=y
CONFIG_HAS_IOMEM=y
CONFIG_HAS_IOPORT=y
CONFIG_HAS_DMA=y
+CONFIG_NLATTR=y
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index a505202086e..e590261ba05 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -825,9 +825,11 @@ ia32_sys_call_table:
.quad compat_sys_signalfd4
.quad sys_eventfd2
.quad sys_epoll_create1
- .quad sys_dup3 /* 330 */
+ .quad sys_dup3 /* 330 */
.quad sys_pipe2
.quad sys_inotify_init1
.quad compat_sys_preadv
.quad compat_sys_pwritev
+ .quad compat_sys_rt_tgsigqueueinfo /* 335 */
+ .quad sys_perf_counter_open
ia32_syscall_end:
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index f6aa18eadf7..1a37bcdc860 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -3,6 +3,7 @@
#include <linux/types.h>
#include <linux/stddef.h>
+#include <linux/stringify.h>
#include <asm/asm.h>
/*
@@ -74,6 +75,22 @@ static inline void alternatives_smp_switch(int smp) {}
const unsigned char *const *find_nop_table(void);
+/* alternative assembly primitive: */
+#define ALTERNATIVE(oldinstr, newinstr, feature) \
+ \
+ "661:\n\t" oldinstr "\n662:\n" \
+ ".section .altinstructions,\"a\"\n" \
+ _ASM_ALIGN "\n" \
+ _ASM_PTR "661b\n" /* label */ \
+ _ASM_PTR "663f\n" /* new instruction */ \
+ " .byte " __stringify(feature) "\n" /* feature bit */ \
+ " .byte 662b-661b\n" /* sourcelen */ \
+ " .byte 664f-663f\n" /* replacementlen */ \
+ ".previous\n" \
+ ".section .altinstr_replacement, \"ax\"\n" \
+ "663:\n\t" newinstr "\n664:\n" /* replacement */ \
+ ".previous"
+
/*
* Alternative instructions for different CPU types or capabilities.
*
@@ -87,18 +104,7 @@ const unsigned char *const *find_nop_table(void);
* without volatile and memory clobber.
*/
#define alternative(oldinstr, newinstr, feature) \
- asm volatile ("661:\n\t" oldinstr "\n662:\n" \
- ".section .altinstructions,\"a\"\n" \
- _ASM_ALIGN "\n" \
- _ASM_PTR "661b\n" /* label */ \
- _ASM_PTR "663f\n" /* new instruction */ \
- " .byte %c0\n" /* feature bit */ \
- " .byte 662b-661b\n" /* sourcelen */ \
- " .byte 664f-663f\n" /* replacementlen */ \
- ".previous\n" \
- ".section .altinstr_replacement,\"ax\"\n" \
- "663:\n\t" newinstr "\n664:\n" /* replacement */ \
- ".previous" :: "i" (feature) : "memory")
+ asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory")
/*
* Alternative inline assembly with input.
@@ -109,35 +115,16 @@ const unsigned char *const *find_nop_table(void);
* Best is to use constraints that are fixed size (like (%1) ... "r")
* If you use variable sized constraints like "m" or "g" in the
* replacement make sure to pad to the worst case length.
+ * Leaving an unused argument 0 to keep API compatibility.
*/
#define alternative_input(oldinstr, newinstr, feature, input...) \
- asm volatile ("661:\n\t" oldinstr "\n662:\n" \
- ".section .altinstructions,\"a\"\n" \
- _ASM_ALIGN "\n" \
- _ASM_PTR "661b\n" /* label */ \
- _ASM_PTR "663f\n" /* new instruction */ \
- " .byte %c0\n" /* feature bit */ \
- " .byte 662b-661b\n" /* sourcelen */ \
- " .byte 664f-663f\n" /* replacementlen */ \
- ".previous\n" \
- ".section .altinstr_replacement,\"ax\"\n" \
- "663:\n\t" newinstr "\n664:\n" /* replacement */ \
- ".previous" :: "i" (feature), ##input)
+ asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \
+ : : "i" (0), ## input)
/* Like alternative_input, but with a single output argument */
#define alternative_io(oldinstr, newinstr, feature, output, input...) \
- asm volatile ("661:\n\t" oldinstr "\n662:\n" \
- ".section .altinstructions,\"a\"\n" \
- _ASM_ALIGN "\n" \
- _ASM_PTR "661b\n" /* label */ \
- _ASM_PTR "663f\n" /* new instruction */ \
- " .byte %c[feat]\n" /* feature bit */ \
- " .byte 662b-661b\n" /* sourcelen */ \
- " .byte 664f-663f\n" /* replacementlen */ \
- ".previous\n" \
- ".section .altinstr_replacement,\"ax\"\n" \
- "663:\n\t" newinstr "\n664:\n" /* replacement */ \
- ".previous" : output : [feat] "i" (feature), ##input)
+ asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \
+ : output : "i" (0), ## input)
/*
* use this macro(s) if you need more than one output parameter
diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h
index f712344329b..262e0282004 100644
--- a/arch/x86/include/asm/amd_iommu.h
+++ b/arch/x86/include/asm/amd_iommu.h
@@ -27,6 +27,8 @@ extern int amd_iommu_init(void);
extern int amd_iommu_init_dma_ops(void);
extern void amd_iommu_detect(void);
extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
+extern void amd_iommu_flush_all_domains(void);
+extern void amd_iommu_flush_all_devices(void);
#else
static inline int amd_iommu_init(void) { return -ENODEV; }
static inline void amd_iommu_detect(void) { }
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 95c8cd9d22b..0c878caaa0a 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -194,6 +194,27 @@
#define PD_DMA_OPS_MASK (1UL << 0) /* domain used for dma_ops */
#define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops
domain for an IOMMU */
+extern bool amd_iommu_dump;
+#define DUMP_printk(format, arg...) \
+ do { \
+ if (amd_iommu_dump) \
+ printk(KERN_INFO "AMD IOMMU: " format, ## arg); \
+ } while(0);
+
+/*
+ * Make iterating over all IOMMUs easier
+ */
+#define for_each_iommu(iommu) \
+ list_for_each_entry((iommu), &amd_iommu_list, list)
+#define for_each_iommu_safe(iommu, next) \
+ list_for_each_entry_safe((iommu), (next), &amd_iommu_list, list)
+
+#define APERTURE_RANGE_SHIFT 27 /* 128 MB */
+#define APERTURE_RANGE_SIZE (1ULL << APERTURE_RANGE_SHIFT)
+#define APERTURE_RANGE_PAGES (APERTURE_RANGE_SIZE >> PAGE_SHIFT)
+#define APERTURE_MAX_RANGES 32 /* allows 4GB of DMA address space */
+#define APERTURE_RANGE_INDEX(a) ((a) >> APERTURE_RANGE_SHIFT)
+#define APERTURE_PAGE_INDEX(a) (((a) >> 21) & 0x3fULL)
/*
* This structure contains generic data for IOMMU protection domains
@@ -210,6 +231,26 @@ struct protection_domain {
};
/*
+ * For dynamic growth the aperture size is split into ranges of 128MB of
+ * DMA address space each. This struct represents one such range.
+ */
+struct aperture_range {
+
+ /* address allocation bitmap */
+ unsigned long *bitmap;
+
+ /*
+ * Array of PTE pages for the aperture. In this array we save all the
+ * leaf pages of the domain page table used for the aperture. This way
+ * we don't need to walk the page table to find a specific PTE. We can
+ * just calculate its address in constant time.
+ */
+ u64 *pte_pages[64];
+
+ unsigned long offset;
+};
+
+/*
* Data container for a dma_ops specific protection domain
*/
struct dma_ops_domain {
@@ -222,18 +263,10 @@ struct dma_ops_domain {
unsigned long aperture_size;
/* address we start to search for free addresses */
- unsigned long next_bit;
-
- /* address allocation bitmap */
- unsigned long *bitmap;
+ unsigned long next_address;
- /*
- * Array of PTE pages for the aperture. In this array we save all the
- * leaf pages of the domain page table used for the aperture. This way
- * we don't need to walk the page table to find a specific PTE. We can
- * just calculate its address in constant time.
- */
- u64 **pte_pages;
+ /* address space relevant data */
+ struct aperture_range *aperture[APERTURE_MAX_RANGES];
/* This will be set to true when TLB needs to be flushed */
bool need_flush;
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 42f2f837742..bb7d4792584 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -107,8 +107,7 @@ extern u32 native_safe_apic_wait_icr_idle(void);
extern void native_apic_icr_write(u32 low, u32 id);
extern u64 native_apic_icr_read(void);
-#define EIM_8BIT_APIC_ID 0
-#define EIM_32BIT_APIC_ID 1
+extern int x2apic_mode;
#ifdef CONFIG_X86_X2APIC
/*
@@ -166,10 +165,9 @@ static inline u64 native_x2apic_icr_read(void)
return val;
}
-extern int x2apic, x2apic_phys;
+extern int x2apic_phys;
extern void check_x2apic(void);
extern void enable_x2apic(void);
-extern void enable_IR_x2apic(void);
extern void x2apic_icr_write(u32 low, u32 id);
static inline int x2apic_enabled(void)
{
@@ -183,6 +181,8 @@ static inline int x2apic_enabled(void)
return 1;
return 0;
}
+
+#define x2apic_supported() (cpu_has_x2apic)
#else
static inline void check_x2apic(void)
{
@@ -190,28 +190,20 @@ static inline void check_x2apic(void)
static inline void enable_x2apic(void)
{
}
-static inline void enable_IR_x2apic(void)
-{
-}
static inline int x2apic_enabled(void)
{
return 0;
}
-#define x2apic 0
-
+#define x2apic_preenabled 0
+#define x2apic_supported() 0
#endif
-extern int get_physical_broadcast(void);
+extern void enable_IR_x2apic(void);
-#ifdef CONFIG_X86_X2APIC
-static inline void ack_x2APIC_irq(void)
-{
- /* Docs say use 0 for future compatibility */
- native_apic_msr_write(APIC_EOI, 0);
-}
-#endif
+extern int get_physical_broadcast(void);
+extern void apic_disable(void);
extern int lapic_get_maxlvt(void);
extern void clear_local_APIC(void);
extern void connect_bsp_APIC(void);
@@ -252,7 +244,7 @@ static inline void lapic_shutdown(void) { }
#define local_apic_timer_c2_ok 1
static inline void init_apic_mappings(void) { }
static inline void disable_local_APIC(void) { }
-
+static inline void apic_disable(void) { }
#endif /* !CONFIG_X86_LOCAL_APIC */
#ifdef CONFIG_X86_64
@@ -410,7 +402,7 @@ static inline unsigned default_get_apic_id(unsigned long x)
{
unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR));
- if (APIC_XAPIC(ver))
+ if (APIC_XAPIC(ver) || boot_cpu_has(X86_FEATURE_EXTD_APICID))
return (x >> 24) & 0xFF;
else
return (x >> 24) & 0x0F;
@@ -478,6 +470,9 @@ static inline unsigned int read_apic_id(void)
extern void default_setup_apic_routing(void);
#ifdef CONFIG_X86_32
+
+extern struct apic apic_default;
+
/*
* Set up the logical destination ID.
*
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index bc9514fb3b1..7ddb36ab933 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -22,6 +22,7 @@
# define APIC_INTEGRATED(x) (1)
#endif
#define APIC_XAPIC(x) ((x) >= 0x14)
+#define APIC_EXT_SPACE(x) ((x) & 0x80000000)
#define APIC_TASKPRI 0x80
#define APIC_TPRI_MASK 0xFFu
#define APIC_ARBPRI 0x90
@@ -116,7 +117,9 @@
#define APIC_TDR_DIV_32 0x8
#define APIC_TDR_DIV_64 0x9
#define APIC_TDR_DIV_128 0xA
-#define APIC_EILVT0 0x500
+#define APIC_EFEAT 0x400
+#define APIC_ECTRL 0x410
+#define APIC_EILVTn(n) (0x500 + 0x10 * n)
#define APIC_EILVT_NR_AMD_K8 1 /* # of extended interrupts */
#define APIC_EILVT_NR_AMD_10H 4
#define APIC_EILVT_LVTOFF(x) (((x) >> 4) & 0xF)
@@ -125,9 +128,6 @@
#define APIC_EILVT_MSG_NMI 0x4
#define APIC_EILVT_MSG_EXT 0x7
#define APIC_EILVT_MASKED (1 << 16)
-#define APIC_EILVT1 0x510
-#define APIC_EILVT2 0x520
-#define APIC_EILVT3 0x530
#define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
#define APIC_BASE_MSR 0x800
diff --git a/arch/x86/include/asm/atomic_32.h b/arch/x86/include/asm/atomic_32.h
index 85b46fba422..aff9f1fcdcd 100644
--- a/arch/x86/include/asm/atomic_32.h
+++ b/arch/x86/include/asm/atomic_32.h
@@ -247,5 +247,241 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u)
#define smp_mb__before_atomic_inc() barrier()
#define smp_mb__after_atomic_inc() barrier()
+/* An 64bit atomic type */
+
+typedef struct {
+ unsigned long long counter;
+} atomic64_t;
+
+#define ATOMIC64_INIT(val) { (val) }
+
+/**
+ * atomic64_read - read atomic64 variable
+ * @v: pointer of type atomic64_t
+ *
+ * Atomically reads the value of @v.
+ * Doesn't imply a read memory barrier.
+ */
+#define __atomic64_read(ptr) ((ptr)->counter)
+
+static inline unsigned long long
+cmpxchg8b(unsigned long long *ptr, unsigned long long old, unsigned long long new)
+{
+ asm volatile(
+
+ LOCK_PREFIX "cmpxchg8b (%[ptr])\n"
+
+ : "=A" (old)
+
+ : [ptr] "D" (ptr),
+ "A" (old),
+ "b" (ll_low(new)),
+ "c" (ll_high(new))
+
+ : "memory");
+
+ return old;
+}
+
+static inline unsigned long long
+atomic64_cmpxchg(atomic64_t *ptr, unsigned long long old_val,
+ unsigned long long new_val)
+{
+ return cmpxchg8b(&ptr->counter, old_val, new_val);
+}
+
+/**
+ * atomic64_xchg - xchg atomic64 variable
+ * @ptr: pointer to type atomic64_t
+ * @new_val: value to assign
+ * @old_val: old value that was there
+ *
+ * Atomically xchgs the value of @ptr to @new_val and returns
+ * the old value.
+ */
+
+static inline unsigned long long
+atomic64_xchg(atomic64_t *ptr, unsigned long long new_val)
+{
+ unsigned long long old_val;
+
+ do {
+ old_val = atomic_read(ptr);
+ } while (atomic64_cmpxchg(ptr, old_val, new_val) != old_val);
+
+ return old_val;
+}
+
+/**
+ * atomic64_set - set atomic64 variable
+ * @ptr: pointer to type atomic64_t
+ * @new_val: value to assign
+ *
+ * Atomically sets the value of @ptr to @new_val.
+ */
+static inline void atomic64_set(atomic64_t *ptr, unsigned long long new_val)
+{
+ atomic64_xchg(ptr, new_val);
+}
+
+/**
+ * atomic64_read - read atomic64 variable
+ * @ptr: pointer to type atomic64_t
+ *
+ * Atomically reads the value of @ptr and returns it.
+ */
+static inline unsigned long long atomic64_read(atomic64_t *ptr)
+{
+ unsigned long long curr_val;
+
+ do {
+ curr_val = __atomic64_read(ptr);
+ } while (atomic64_cmpxchg(ptr, curr_val, curr_val) != curr_val);
+
+ return curr_val;
+}
+
+/**
+ * atomic64_add_return - add and return
+ * @delta: integer value to add
+ * @ptr: pointer to type atomic64_t
+ *
+ * Atomically adds @delta to @ptr and returns @delta + *@ptr
+ */
+static inline unsigned long long
+atomic64_add_return(unsigned long long delta, atomic64_t *ptr)
+{
+ unsigned long long old_val, new_val;
+
+ do {
+ old_val = atomic_read(ptr);
+ new_val = old_val + delta;
+
+ } while (atomic64_cmpxchg(ptr, old_val, new_val) != old_val);
+
+ return new_val;
+}
+
+static inline long atomic64_sub_return(unsigned long long delta, atomic64_t *ptr)
+{
+ return atomic64_add_return(-delta, ptr);
+}
+
+static inline long atomic64_inc_return(atomic64_t *ptr)
+{
+ return atomic64_add_return(1, ptr);
+}
+
+static inline long atomic64_dec_return(atomic64_t *ptr)
+{
+ return atomic64_sub_return(1, ptr);
+}
+
+/**
+ * atomic64_add - add integer to atomic64 variable
+ * @delta: integer value to add
+ * @ptr: pointer to type atomic64_t
+ *
+ * Atomically adds @delta to @ptr.
+ */
+static inline void atomic64_add(unsigned long long delta, atomic64_t *ptr)
+{
+ atomic64_add_return(delta, ptr);
+}
+
+/**
+ * atomic64_sub - subtract the atomic64 variable
+ * @delta: integer value to subtract
+ * @ptr: pointer to type atomic64_t
+ *
+ * Atomically subtracts @delta from @ptr.
+ */
+static inline void atomic64_sub(unsigned long long delta, atomic64_t *ptr)
+{
+ atomic64_add(-delta, ptr);
+}
+
+/**
+ * atomic64_sub_and_test - subtract value from variable and test result
+ * @delta: integer value to subtract
+ * @ptr: pointer to type atomic64_t
+ *
+ * Atomically subtracts @delta from @ptr and returns
+ * true if the result is zero, or false for all
+ * other cases.
+ */
+static inline int
+atomic64_sub_and_test(unsigned long long delta, atomic64_t *ptr)
+{
+ unsigned long long old_val = atomic64_sub_return(delta, ptr);
+
+ return old_val == 0;
+}
+
+/**
+ * atomic64_inc - increment atomic64 variable
+ * @ptr: pointer to type atomic64_t
+ *
+ * Atomically increments @ptr by 1.
+ */
+static inline void atomic64_inc(atomic64_t *ptr)
+{
+ atomic64_add(1, ptr);
+}
+
+/**
+ * atomic64_dec - decrement atomic64 variable
+ * @ptr: pointer to type atomic64_t
+ *
+ * Atomically decrements @ptr by 1.
+ */
+static inline void atomic64_dec(atomic64_t *ptr)
+{
+ atomic64_sub(1, ptr);
+}
+
+/**
+ * atomic64_dec_and_test - decrement and test
+ * @ptr: pointer to type atomic64_t
+ *
+ * Atomically decrements @ptr by 1 and
+ * returns true if the result is 0, or false for all other
+ * cases.
+ */
+static inline int atomic64_dec_and_test(atomic64_t *ptr)
+{
+ return atomic64_sub_and_test(1, ptr);
+}
+
+/**
+ * atomic64_inc_and_test - increment and test
+ * @ptr: pointer to type atomic64_t
+ *
+ * Atomically increments @ptr by 1
+ * and returns true if the result is zero, or false for all
+ * other cases.
+ */
+static inline int atomic64_inc_and_test(atomic64_t *ptr)
+{
+ return atomic64_sub_and_test(-1, ptr);
+}
+
+/**
+ * atomic64_add_negative - add and test if negative
+ * @delta: integer value to add
+ * @ptr: pointer to type atomic64_t
+ *
+ * Atomically adds @delta to @ptr and returns true
+ * if the result is negative, or false when
+ * result is greater than or equal to zero.
+ */
+static inline int
+atomic64_add_negative(unsigned long long delta, atomic64_t *ptr)
+{
+ long long old_val = atomic64_add_return(delta, ptr);
+
+ return old_val < 0;
+}
+
#include <asm-generic/atomic.h>
#endif /* _ASM_X86_ATOMIC_32_H */
diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h
index 6ba23dd9fc9..418e632d4a8 100644
--- a/arch/x86/include/asm/boot.h
+++ b/arch/x86/include/asm/boot.h
@@ -8,11 +8,26 @@
#ifdef __KERNEL__
+#include <asm/page_types.h>
+
/* Physical address where kernel should be loaded. */
#define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \
+ (CONFIG_PHYSICAL_ALIGN - 1)) \
& ~(CONFIG_PHYSICAL_ALIGN - 1))
+/* Minimum kernel alignment, as a power of two */
+#ifdef CONFIG_x86_64
+#define MIN_KERNEL_ALIGN_LG2 PMD_SHIFT
+#else
+#define MIN_KERNEL_ALIGN_LG2 (PAGE_SHIFT+1)
+#endif
+#define MIN_KERNEL_ALIGN (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2)
+
+#if (CONFIG_PHYSICAL_ALIGN & (CONFIG_PHYSICAL_ALIGN-1)) || \
+ (CONFIG_PHYSICAL_ALIGN < (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2))
+#error "Invalid value for CONFIG_PHYSICAL_ALIGN"
+#endif
+
#ifdef CONFIG_KERNEL_BZIP2
#define BOOT_HEAP_SIZE 0x400000
#else /* !CONFIG_KERNEL_BZIP2 */
diff --git a/arch/x86/include/asm/bootparam.h b/arch/x86/include/asm/bootparam.h
index 433adaebf9b..1724e8de317 100644
--- a/arch/x86/include/asm/bootparam.h
+++ b/arch/x86/include/asm/bootparam.h
@@ -50,7 +50,8 @@ struct setup_header {
__u32 ramdisk_size;
__u32 bootsect_kludge;
__u16 heap_end_ptr;
- __u16 _pad1;
+ __u8 ext_loader_ver;
+ __u8 ext_loader_type;
__u32 cmd_line_ptr;
__u32 initrd_addr_max;
__u32 kernel_alignment;
diff --git a/arch/x86/include/asm/cpu_debug.h b/arch/x86/include/asm/cpu_debug.h
index 222802029fa..d96c1ee3a95 100644
--- a/arch/x86/include/asm/cpu_debug.h
+++ b/arch/x86/include/asm/cpu_debug.h
@@ -86,105 +86,7 @@ enum cpu_file_bit {
CPU_VALUE_BIT, /* value */
};
-#define CPU_FILE_VALUE (1 << CPU_VALUE_BIT)
-
-/*
- * DisplayFamily_DisplayModel Processor Families/Processor Number Series
- * -------------------------- ------------------------------------------
- * 05_01, 05_02, 05_04 Pentium, Pentium with MMX
- *
- * 06_01 Pentium Pro
- * 06_03, 06_05 Pentium II Xeon, Pentium II
- * 06_07, 06_08, 06_0A, 06_0B Pentium III Xeon, Pentum III
- *
- * 06_09, 060D Pentium M
- *
- * 06_0E Core Duo, Core Solo
- *
- * 06_0F Xeon 3000, 3200, 5100, 5300, 7300 series,
- * Core 2 Quad, Core 2 Extreme, Core 2 Duo,
- * Pentium dual-core
- * 06_17 Xeon 5200, 5400 series, Core 2 Quad Q9650
- *
- * 06_1C Atom
- *
- * 0F_00, 0F_01, 0F_02 Xeon, Xeon MP, Pentium 4
- * 0F_03, 0F_04 Xeon, Xeon MP, Pentium 4, Pentium D
- *
- * 0F_06 Xeon 7100, 5000 Series, Xeon MP,
- * Pentium 4, Pentium D
- */
-
-/* Register processors bits */
-enum cpu_processor_bit {
- CPU_NONE,
-/* Intel */
- CPU_INTEL_PENTIUM_BIT,
- CPU_INTEL_P6_BIT,
- CPU_INTEL_PENTIUM_M_BIT,
- CPU_INTEL_CORE_BIT,
- CPU_INTEL_CORE2_BIT,
- CPU_INTEL_ATOM_BIT,
- CPU_INTEL_XEON_P4_BIT,
- CPU_INTEL_XEON_MP_BIT,
-/* AMD */
- CPU_AMD_K6_BIT,
- CPU_AMD_K7_BIT,
- CPU_AMD_K8_BIT,
- CPU_AMD_0F_BIT,
- CPU_AMD_10_BIT,
- CPU_AMD_11_BIT,
-};
-
-#define CPU_INTEL_PENTIUM (1 << CPU_INTEL_PENTIUM_BIT)
-#define CPU_INTEL_P6 (1 << CPU_INTEL_P6_BIT)
-#define CPU_INTEL_PENTIUM_M (1 << CPU_INTEL_PENTIUM_M_BIT)
-#define CPU_INTEL_CORE (1 << CPU_INTEL_CORE_BIT)
-#define CPU_INTEL_CORE2 (1 << CPU_INTEL_CORE2_BIT)
-#define CPU_INTEL_ATOM (1 << CPU_INTEL_ATOM_BIT)
-#define CPU_INTEL_XEON_P4 (1 << CPU_INTEL_XEON_P4_BIT)
-#define CPU_INTEL_XEON_MP (1 << CPU_INTEL_XEON_MP_BIT)
-
-#define CPU_INTEL_PX (CPU_INTEL_P6 | CPU_INTEL_PENTIUM_M)
-#define CPU_INTEL_COREX (CPU_INTEL_CORE | CPU_INTEL_CORE2)
-#define CPU_INTEL_XEON (CPU_INTEL_XEON_P4 | CPU_INTEL_XEON_MP)
-#define CPU_CO_AT (CPU_INTEL_CORE | CPU_INTEL_ATOM)
-#define CPU_C2_AT (CPU_INTEL_CORE2 | CPU_INTEL_ATOM)
-#define CPU_CX_AT (CPU_INTEL_COREX | CPU_INTEL_ATOM)
-#define CPU_CX_XE (CPU_INTEL_COREX | CPU_INTEL_XEON)
-#define CPU_P6_XE (CPU_INTEL_P6 | CPU_INTEL_XEON)
-#define CPU_PM_CO_AT (CPU_INTEL_PENTIUM_M | CPU_CO_AT)
-#define CPU_C2_AT_XE (CPU_C2_AT | CPU_INTEL_XEON)
-#define CPU_CX_AT_XE (CPU_CX_AT | CPU_INTEL_XEON)
-#define CPU_P6_CX_AT (CPU_INTEL_P6 | CPU_CX_AT)
-#define CPU_P6_CX_XE (CPU_P6_XE | CPU_INTEL_COREX)
-#define CPU_P6_CX_AT_XE (CPU_INTEL_P6 | CPU_CX_AT_XE)
-#define CPU_PM_CX_AT_XE (CPU_INTEL_PENTIUM_M | CPU_CX_AT_XE)
-#define CPU_PM_CX_AT (CPU_INTEL_PENTIUM_M | CPU_CX_AT)
-#define CPU_PM_CX_XE (CPU_INTEL_PENTIUM_M | CPU_CX_XE)
-#define CPU_PX_CX_AT (CPU_INTEL_PX | CPU_CX_AT)
-#define CPU_PX_CX_AT_XE (CPU_INTEL_PX | CPU_CX_AT_XE)
-
-/* Select all supported Intel CPUs */
-#define CPU_INTEL_ALL (CPU_INTEL_PENTIUM | CPU_PX_CX_AT_XE)
-
-#define CPU_AMD_K6 (1 << CPU_AMD_K6_BIT)
-#define CPU_AMD_K7 (1 << CPU_AMD_K7_BIT)
-#define CPU_AMD_K8 (1 << CPU_AMD_K8_BIT)
-#define CPU_AMD_0F (1 << CPU_AMD_0F_BIT)
-#define CPU_AMD_10 (1 << CPU_AMD_10_BIT)
-#define CPU_AMD_11 (1 << CPU_AMD_11_BIT)
-
-#define CPU_K10_PLUS (CPU_AMD_10 | CPU_AMD_11)
-#define CPU_K0F_PLUS (CPU_AMD_0F | CPU_K10_PLUS)
-#define CPU_K8_PLUS (CPU_AMD_K8 | CPU_K0F_PLUS)
-#define CPU_K7_PLUS (CPU_AMD_K7 | CPU_K8_PLUS)
-
-/* Select all supported AMD CPUs */
-#define CPU_AMD_ALL (CPU_AMD_K6 | CPU_K7_PLUS)
-
-/* Select all supported CPUs */
-#define CPU_ALL (CPU_INTEL_ALL | CPU_AMD_ALL)
+#define CPU_FILE_VALUE (1 << CPU_VALUE_BIT)
#define MAX_CPU_FILES 512
@@ -220,7 +122,6 @@ struct cpu_debug_range {
unsigned min; /* Register range min */
unsigned max; /* Register range max */
unsigned flag; /* Supported flags */
- unsigned model; /* Supported models */
};
#endif /* _ASM_X86_CPU_DEBUG_H */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index bb83b1c397a..19af42138f7 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -22,7 +22,7 @@
#define X86_FEATURE_TSC (0*32+ 4) /* Time Stamp Counter */
#define X86_FEATURE_MSR (0*32+ 5) /* Model-Specific Registers */
#define X86_FEATURE_PAE (0*32+ 6) /* Physical Address Extensions */
-#define X86_FEATURE_MCE (0*32+ 7) /* Machine Check Architecture */
+#define X86_FEATURE_MCE (0*32+ 7) /* Machine Check Exception */
#define X86_FEATURE_CX8 (0*32+ 8) /* CMPXCHG8 instruction */
#define X86_FEATURE_APIC (0*32+ 9) /* Onboard APIC */
#define X86_FEATURE_SEP (0*32+11) /* SYSENTER/SYSEXIT */
@@ -94,6 +94,7 @@
#define X86_FEATURE_TSC_RELIABLE (3*32+23) /* TSC is known to be reliable */
#define X86_FEATURE_NONSTOP_TSC (3*32+24) /* TSC does not stop in C states */
#define X86_FEATURE_CLFLUSH_MONITOR (3*32+25) /* "" clflush reqd with monitor */
+#define X86_FEATURE_EXTD_APICID (3*32+26) /* has extended APICID (8 bits) */
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
#define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */
@@ -192,11 +193,11 @@ extern const char * const x86_power_flags[32];
#define clear_cpu_cap(c, bit) clear_bit(bit, (unsigned long *)((c)->x86_capability))
#define setup_clear_cpu_cap(bit) do { \
clear_cpu_cap(&boot_cpu_data, bit); \
- set_bit(bit, (unsigned long *)cleared_cpu_caps); \
+ set_bit(bit, (unsigned long *)cpu_caps_cleared); \
} while (0)
#define setup_force_cpu_cap(bit) do { \
set_cpu_cap(&boot_cpu_data, bit); \
- clear_bit(bit, (unsigned long *)cleared_cpu_caps); \
+ set_bit(bit, (unsigned long *)cpu_caps_set); \
} while (0)
#define cpu_has_fpu boot_cpu_has(X86_FEATURE_FPU)
diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h
index a8f672ba100..70dac199b09 100644
--- a/arch/x86/include/asm/ds.h
+++ b/arch/x86/include/asm/ds.h
@@ -15,8 +15,8 @@
* - buffer allocation (memory accounting)
*
*
- * Copyright (C) 2007-2008 Intel Corporation.
- * Markus Metzger <markus.t.metzger@intel.com>, 2007-2008
+ * Copyright (C) 2007-2009 Intel Corporation.
+ * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009
*/
#ifndef _ASM_X86_DS_H
@@ -83,8 +83,10 @@ enum ds_feature {
* The interrupt threshold is independent from the overflow callback
* to allow users to use their own overflow interrupt handling mechanism.
*
- * task: the task to request recording for;
- * NULL for per-cpu recording on the current cpu
+ * The function might sleep.
+ *
+ * task: the task to request recording for
+ * cpu: the cpu to request recording for
* base: the base pointer for the (non-pageable) buffer;
* size: the size of the provided buffer in bytes
* ovfl: pointer to a function to be called on buffer overflow;
@@ -93,19 +95,28 @@ enum ds_feature {
* -1 if no interrupt threshold is requested.
* flags: a bit-mask of the above flags
*/
-extern struct bts_tracer *ds_request_bts(struct task_struct *task,
- void *base, size_t size,
- bts_ovfl_callback_t ovfl,
- size_t th, unsigned int flags);
-extern struct pebs_tracer *ds_request_pebs(struct task_struct *task,
- void *base, size_t size,
- pebs_ovfl_callback_t ovfl,
- size_t th, unsigned int flags);
+extern struct bts_tracer *ds_request_bts_task(struct task_struct *task,
+ void *base, size_t size,
+ bts_ovfl_callback_t ovfl,
+ size_t th, unsigned int flags);
+extern struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size,
+ bts_ovfl_callback_t ovfl,
+ size_t th, unsigned int flags);
+extern struct pebs_tracer *ds_request_pebs_task(struct task_struct *task,
+ void *base, size_t size,
+ pebs_ovfl_callback_t ovfl,
+ size_t th, unsigned int flags);
+extern struct pebs_tracer *ds_request_pebs_cpu(int cpu,
+ void *base, size_t size,
+ pebs_ovfl_callback_t ovfl,
+ size_t th, unsigned int flags);
/*
* Release BTS or PEBS resources
* Suspend and resume BTS or PEBS tracing
*
+ * Must be called with irq's enabled.
+ *
* tracer: the tracer handle returned from ds_request_~()
*/
extern void ds_release_bts(struct bts_tracer *tracer);
@@ -115,6 +126,28 @@ extern void ds_release_pebs(struct pebs_tracer *tracer);
extern void ds_suspend_pebs(struct pebs_tracer *tracer);
extern void ds_resume_pebs(struct pebs_tracer *tracer);
+/*
+ * Release BTS or PEBS resources
+ * Suspend and resume BTS or PEBS tracing
+ *
+ * Cpu tracers must call this on the traced cpu.
+ * Task tracers must call ds_release_~_noirq() for themselves.
+ *
+ * May be called with irq's disabled.
+ *
+ * Returns 0 if successful;
+ * -EPERM if the cpu tracer does not trace the current cpu.
+ * -EPERM if the task tracer does not trace itself.
+ *
+ * tracer: the tracer handle returned from ds_request_~()
+ */
+extern int ds_release_bts_noirq(struct bts_tracer *tracer);
+extern int ds_suspend_bts_noirq(struct bts_tracer *tracer);
+extern int ds_resume_bts_noirq(struct bts_tracer *tracer);
+extern int ds_release_pebs_noirq(struct pebs_tracer *tracer);
+extern int ds_suspend_pebs_noirq(struct pebs_tracer *tracer);
+extern int ds_resume_pebs_noirq(struct pebs_tracer *tracer);
+
/*
* The raw DS buffer state as it is used for BTS and PEBS recording.
@@ -170,9 +203,9 @@ struct bts_struct {
} lbr;
/* BTS_TASK_ARRIVES or BTS_TASK_DEPARTS */
struct {
- __u64 jiffies;
+ __u64 clock;
pid_t pid;
- } timestamp;
+ } event;
} variant;
};
@@ -201,8 +234,12 @@ struct bts_trace {
struct pebs_trace {
struct ds_trace ds;
- /* the PEBS reset value */
- unsigned long long reset_value;
+ /* the number of valid counters in the below array */
+ unsigned int counters;
+
+#define MAX_PEBS_COUNTERS 4
+ /* the counter reset value */
+ unsigned long long counter_reset[MAX_PEBS_COUNTERS];
};
@@ -237,9 +274,11 @@ extern int ds_reset_pebs(struct pebs_tracer *tracer);
* Returns 0 on success; -Eerrno on error
*
* tracer: the tracer handle returned from ds_request_pebs()
+ * counter: the index of the counter
* value: the new counter reset value
*/
-extern int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value);
+extern int ds_set_pebs_reset(struct pebs_tracer *tracer,
+ unsigned int counter, u64 value);
/*
* Initialization
@@ -252,21 +291,12 @@ extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *);
*/
extern void ds_switch_to(struct task_struct *prev, struct task_struct *next);
-/*
- * Task clone/init and cleanup work
- */
-extern void ds_copy_thread(struct task_struct *tsk, struct task_struct *father);
-extern void ds_exit_thread(struct task_struct *tsk);
-
#else /* CONFIG_X86_DS */
struct cpuinfo_x86;
static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {}
static inline void ds_switch_to(struct task_struct *prev,
struct task_struct *next) {}
-static inline void ds_copy_thread(struct task_struct *tsk,
- struct task_struct *father) {}
-static inline void ds_exit_thread(struct task_struct *tsk) {}
#endif /* CONFIG_X86_DS */
#endif /* _ASM_X86_DS_H */
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index c2e6bedaf25..d750a10ccad 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -49,7 +49,7 @@ BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
#ifdef CONFIG_PERF_COUNTERS
-BUILD_INTERRUPT(perf_counter_interrupt, LOCAL_PERF_VECTOR)
+BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR)
#endif
#ifdef CONFIG_X86_MCE_P4THERMAL
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 37555e52f98..9ebc5c25503 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -13,6 +13,8 @@ typedef struct {
unsigned int irq_spurious_count;
#endif
unsigned int generic_irqs; /* arch dependent */
+ unsigned int apic_perf_irqs;
+ unsigned int apic_pending_irqs;
#ifdef CONFIG_SMP
unsigned int irq_resched_count;
unsigned int irq_call_count;
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index b762ea49bd7..6df45f63966 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -29,6 +29,8 @@
extern void apic_timer_interrupt(void);
extern void generic_interrupt(void);
extern void error_interrupt(void);
+extern void perf_pending_interrupt(void);
+
extern void spurious_interrupt(void);
extern void thermal_interrupt(void);
extern void reschedule_interrupt(void);
@@ -63,7 +65,26 @@ extern unsigned long io_apic_irqs;
extern void init_VISWS_APIC_irqs(void);
extern void setup_IO_APIC(void);
extern void disable_IO_APIC(void);
-extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn);
+
+struct io_apic_irq_attr {
+ int ioapic;
+ int ioapic_pin;
+ int trigger;
+ int polarity;
+};
+
+static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr,
+ int ioapic, int ioapic_pin,
+ int trigger, int polarity)
+{
+ irq_attr->ioapic = ioapic;
+ irq_attr->ioapic_pin = ioapic_pin;
+ irq_attr->trigger = trigger;
+ irq_attr->polarity = polarity;
+}
+
+extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin,
+ struct io_apic_irq_attr *irq_attr);
extern void setup_ioapic_dest(void);
extern void enable_IO_APIC(void);
@@ -78,7 +99,11 @@ extern void eisa_set_level_irq(unsigned int irq);
/* SMP */
extern void smp_apic_timer_interrupt(struct pt_regs *);
extern void smp_spurious_interrupt(struct pt_regs *);
+extern void smp_generic_interrupt(struct pt_regs *);
extern void smp_error_interrupt(struct pt_regs *);
+#ifdef CONFIG_X86_IO_APIC
+extern asmlinkage void smp_irq_move_cleanup_interrupt(void);
+#endif
#ifdef CONFIG_SMP
extern void smp_reschedule_interrupt(struct pt_regs *);
extern void smp_call_function_interrupt(struct pt_regs *);
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index 71c9e518398..175adf58dd4 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -67,7 +67,7 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
".previous\n"
_ASM_EXTABLE(1b, 3b)
: [err] "=r" (err)
-#if 0 /* See comment in __save_init_fpu() below. */
+#if 0 /* See comment in fxsave() below. */
: [fx] "r" (fx), "m" (*fx), "0" (0));
#else
: [fx] "cdaSDb" (fx), "m" (*fx), "0" (0));
@@ -75,14 +75,6 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
return err;
}
-static inline int restore_fpu_checking(struct task_struct *tsk)
-{
- if (task_thread_info(tsk)->status & TS_XSAVE)
- return xrstor_checking(&tsk->thread.xstate->xsave);
- else
- return fxrstor_checking(&tsk->thread.xstate->fxsave);
-}
-
/* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception
is pending. Clear the x87 state here by setting it to fixed
values. The kernel data segment can be sometimes 0 and sometimes
@@ -120,7 +112,7 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
".previous\n"
_ASM_EXTABLE(1b, 3b)
: [err] "=r" (err), "=m" (*fx)
-#if 0 /* See comment in __fxsave_clear() below. */
+#if 0 /* See comment in fxsave() below. */
: [fx] "r" (fx), "0" (0));
#else
: [fx] "cdaSDb" (fx), "0" (0));
@@ -185,12 +177,9 @@ static inline void tolerant_fwait(void)
asm volatile("fnclex ; fwait");
}
-static inline void restore_fpu(struct task_struct *tsk)
+/* perform fxrstor iff the processor has extended states, otherwise frstor */
+static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
{
- if (task_thread_info(tsk)->status & TS_XSAVE) {
- xrstor_checking(&tsk->thread.xstate->xsave);
- return;
- }
/*
* The "nop" is needed to make the instructions the same
* length.
@@ -199,7 +188,9 @@ static inline void restore_fpu(struct task_struct *tsk)
"nop ; frstor %1",
"fxrstor %1",
X86_FEATURE_FXSR,
- "m" (tsk->thread.xstate->fxsave));
+ "m" (*fx));
+
+ return 0;
}
/* We need a safe address that is cheap to find and that is already
@@ -262,6 +253,14 @@ end:
#endif /* CONFIG_X86_64 */
+static inline int restore_fpu_checking(struct task_struct *tsk)
+{
+ if (task_thread_info(tsk)->status & TS_XSAVE)
+ return xrstor_checking(&tsk->thread.xstate->xsave);
+ else
+ return fxrstor_checking(&tsk->thread.xstate->fxsave);
+}
+
/*
* Signal frame handlers...
*/
@@ -305,18 +304,18 @@ static inline void kernel_fpu_end(void)
/*
* Some instructions like VIA's padlock instructions generate a spurious
* DNA fault but don't modify SSE registers. And these instructions
- * get used from interrupt context aswell. To prevent these kernel instructions
- * in interrupt context interact wrongly with other user/kernel fpu usage, we
+ * get used from interrupt context as well. To prevent these kernel instructions
+ * in interrupt context interacting wrongly with other user/kernel fpu usage, we
* should use them only in the context of irq_ts_save/restore()
*/
static inline int irq_ts_save(void)
{
/*
- * If we are in process context, we are ok to take a spurious DNA fault.
- * Otherwise, doing clts() in process context require pre-emption to
- * be disabled or some heavy lifting like kernel_fpu_begin()
+ * If in process context and not atomic, we can take a spurious DNA fault.
+ * Otherwise, doing clts() in process context requires disabling preemption
+ * or some heavy lifting like kernel_fpu_begin()
*/
- if (!in_interrupt())
+ if (!in_atomic())
return 0;
if (read_cr0() & X86_CR0_TS) {
diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h
index 1a99e6c092a..58d7091eeb1 100644
--- a/arch/x86/include/asm/i8259.h
+++ b/arch/x86/include/asm/i8259.h
@@ -60,8 +60,4 @@ extern struct irq_chip i8259A_chip;
extern void mask_8259A(void);
extern void unmask_8259A(void);
-#ifdef CONFIG_X86_32
-extern void init_ISA_irqs(void);
-#endif
-
#endif /* _ASM_X86_I8259_H */
diff --git a/arch/x86/include/asm/intel_arch_perfmon.h b/arch/x86/include/asm/intel_arch_perfmon.h
deleted file mode 100644
index fa0fd068bc2..00000000000
--- a/arch/x86/include/asm/intel_arch_perfmon.h
+++ /dev/null
@@ -1,31 +0,0 @@
-#ifndef _ASM_X86_INTEL_ARCH_PERFMON_H
-#define _ASM_X86_INTEL_ARCH_PERFMON_H
-
-#define MSR_ARCH_PERFMON_PERFCTR0 0xc1
-#define MSR_ARCH_PERFMON_PERFCTR1 0xc2
-
-#define MSR_ARCH_PERFMON_EVENTSEL0 0x186
-#define MSR_ARCH_PERFMON_EVENTSEL1 0x187
-
-#define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22)
-#define ARCH_PERFMON_EVENTSEL_INT (1 << 20)
-#define ARCH_PERFMON_EVENTSEL_OS (1 << 17)
-#define ARCH_PERFMON_EVENTSEL_USR (1 << 16)
-
-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL (0x3c)
-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8)
-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX (0)
-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \
- (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX))
-
-union cpuid10_eax {
- struct {
- unsigned int version_id:8;
- unsigned int num_counters:8;
- unsigned int bit_width:8;
- unsigned int mask_length:8;
- } split;
- unsigned int full;
-};
-
-#endif /* _ASM_X86_INTEL_ARCH_PERFMON_H */
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 9d826e43601..daf866ed061 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -154,22 +154,19 @@ extern int timer_through_8259;
extern int io_apic_get_unique_id(int ioapic, int apic_id);
extern int io_apic_get_version(int ioapic);
extern int io_apic_get_redir_entries(int ioapic);
-extern int io_apic_set_pci_routing(int ioapic, int pin, int irq,
- int edge_level, int active_high_low);
#endif /* CONFIG_ACPI */
+struct io_apic_irq_attr;
+extern int io_apic_set_pci_routing(struct device *dev, int irq,
+ struct io_apic_irq_attr *irq_attr);
extern int (*ioapic_renumber_irq)(int ioapic, int irq);
extern void ioapic_init_mappings(void);
-#ifdef CONFIG_X86_64
extern struct IO_APIC_route_entry **alloc_ioapic_entries(void);
extern void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries);
extern int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
extern void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
-extern void reinit_intr_remapped_IO_APIC(int intr_remapping,
- struct IO_APIC_route_entry **ioapic_entries);
-#endif
extern void probe_nr_irqs_gsi(void);
diff --git a/arch/x86/include/asm/iomap.h b/arch/x86/include/asm/iomap.h
index 86af26091d6..0e9fe1d9d97 100644
--- a/arch/x86/include/asm/iomap.h
+++ b/arch/x86/include/asm/iomap.h
@@ -1,3 +1,6 @@
+#ifndef _ASM_X86_IOMAP_H
+#define _ASM_X86_IOMAP_H
+
/*
* Copyright © 2008 Ingo Molnar
*
@@ -31,3 +34,5 @@ iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot);
void
iounmap_atomic(void *kvaddr, enum km_type type);
+
+#endif /* _ASM_X86_IOMAP_H */
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index 0396760fccb..f275e224450 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -1,6 +1,6 @@
#ifndef _ASM_X86_IRQ_REMAPPING_H
#define _ASM_X86_IRQ_REMAPPING_H
-#define IRTE_DEST(dest) ((x2apic) ? dest : dest << 8)
+#define IRTE_DEST(dest) ((x2apic_mode) ? dest : dest << 8)
#endif /* _ASM_X86_IRQ_REMAPPING_H */
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 3cbd79bbb47..e997be98c9b 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -34,6 +34,7 @@
#ifdef CONFIG_X86_32
# define SYSCALL_VECTOR 0x80
+# define IA32_SYSCALL_VECTOR 0x80
#else
# define IA32_SYSCALL_VECTOR 0x80
#endif
@@ -107,14 +108,14 @@
#define LOCAL_TIMER_VECTOR 0xef
/*
- * Performance monitoring interrupt vector:
+ * Generic system vector for platform specific use
*/
-#define LOCAL_PERF_VECTOR 0xee
+#define GENERIC_INTERRUPT_VECTOR 0xed
/*
- * Generic system vector for platform specific use
+ * Performance monitoring pending work vector:
*/
-#define GENERIC_INTERRUPT_VECTOR 0xed
+#define LOCAL_PENDING_VECTOR 0xec
/*
* First APIC vector available to drivers: (vectors 0x30-0xee) we
diff --git a/arch/x86/include/asm/k8.h b/arch/x86/include/asm/k8.h
index 54c8cc53b24..c2d1f3b58e5 100644
--- a/arch/x86/include/asm/k8.h
+++ b/arch/x86/include/asm/k8.h
@@ -12,4 +12,17 @@ extern int cache_k8_northbridges(void);
extern void k8_flush_garts(void);
extern int k8_scan_nodes(unsigned long start, unsigned long end);
+#ifdef CONFIG_K8_NB
+static inline struct pci_dev *node_to_k8_nb_misc(int node)
+{
+ return (node < num_k8_northbridges) ? k8_northbridges[node] : NULL;
+}
+#else
+static inline struct pci_dev *node_to_k8_nb_misc(int node)
+{
+ return NULL;
+}
+#endif
+
+
#endif /* _ASM_X86_K8_H */
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index c882664716c..ef51b501e22 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -9,20 +9,31 @@ struct cpu_signature {
struct device;
+enum ucode_state { UCODE_ERROR, UCODE_OK, UCODE_NFOUND };
+
struct microcode_ops {
- int (*request_microcode_user) (int cpu, const void __user *buf, size_t size);
- int (*request_microcode_fw) (int cpu, struct device *device);
+ enum ucode_state (*request_microcode_user) (int cpu,
+ const void __user *buf, size_t size);
- void (*apply_microcode) (int cpu);
+ enum ucode_state (*request_microcode_fw) (int cpu,
+ struct device *device);
- int (*collect_cpu_info) (int cpu, struct cpu_signature *csig);
void (*microcode_fini_cpu) (int cpu);
+
+ /*
+ * The generic 'microcode_core' part guarantees that
+ * the callbacks below run on a target cpu when they
+ * are being called.
+ * See also the "Synchronization" section in microcode_core.c.
+ */
+ int (*apply_microcode) (int cpu);
+ int (*collect_cpu_info) (int cpu, struct cpu_signature *csig);
};
struct ucode_cpu_info {
- struct cpu_signature cpu_sig;
- int valid;
- void *mc;
+ struct cpu_signature cpu_sig;
+ int valid;
+ void *mc;
};
extern struct ucode_cpu_info ucode_cpu_info[];
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index 642fc7fc8cd..e2a1bb6d71e 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -61,9 +61,11 @@ extern void get_smp_config(void);
#ifdef CONFIG_X86_MPPARSE
extern void find_smp_config(void);
extern void early_reserve_e820_mpc_new(void);
+extern int enable_update_mptable;
#else
static inline void find_smp_config(void) { }
static inline void early_reserve_e820_mpc_new(void) { }
+#define enable_update_mptable 0
#endif
void __cpuinit generic_processor_info(int apicid, int version);
@@ -72,20 +74,13 @@ extern void mp_register_ioapic(int id, u32 address, u32 gsi_base);
extern void mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
u32 gsi);
extern void mp_config_acpi_legacy_irqs(void);
-extern int mp_register_gsi(u32 gsi, int edge_level, int active_high_low);
+struct device;
+extern int mp_register_gsi(struct device *dev, u32 gsi, int edge_level,
+ int active_high_low);
extern int acpi_probe_gsi(void);
#ifdef CONFIG_X86_IO_APIC
-extern int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
- u32 gsi, int triggering, int polarity);
extern int mp_find_ioapic(int gsi);
extern int mp_find_ioapic_pin(int ioapic, int gsi);
-#else
-static inline int
-mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
- u32 gsi, int triggering, int polarity)
-{
- return 0;
-}
#endif
#else /* !CONFIG_ACPI: */
static inline int acpi_probe_gsi(void)
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index ec41fc16c16..4d58d04fca8 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -121,7 +121,6 @@
#define MSR_K8_TOP_MEM1 0xc001001a
#define MSR_K8_TOP_MEM2 0xc001001d
#define MSR_K8_SYSCFG 0xc0010010
-#define MSR_K8_HWCR 0xc0010015
#define MSR_K8_INT_PENDING_MSG 0xc0010055
/* C1E active bits in int pending message */
#define K8_INTP_C1E_ACTIVE_MASK 0x18000000
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index c45a0a568df..c9726440993 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -64,7 +64,7 @@ static inline int nmi_watchdog_active(void)
* but since they are power of two we could use a
* cheaper way --cvg
*/
- return nmi_watchdog & 0x3;
+ return nmi_watchdog & (NMI_LOCAL_APIC | NMI_IO_APIC);
}
#endif
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h
index 064ed6df4cb..c4ae822e415 100644
--- a/arch/x86/include/asm/numa_64.h
+++ b/arch/x86/include/asm/numa_64.h
@@ -17,9 +17,6 @@ extern int compute_hash_shift(struct bootnode *nodes, int numblks,
extern void numa_init_array(void);
extern int numa_off;
-extern void srat_reserve_add_area(int nodeid);
-extern int hotadd_percent;
-
extern s16 apicid_to_node[MAX_LOCAL_APIC];
extern unsigned long numa_free_all_bootmem(void);
@@ -27,6 +24,13 @@ extern void setup_node_bootmem(int nodeid, unsigned long start,
unsigned long end);
#ifdef CONFIG_NUMA
+/*
+ * Too small node sizes may confuse the VM badly. Usually they
+ * result from BIOS bugs. So dont recognize nodes as standalone
+ * NUMA entities that have less than this amount of RAM listed:
+ */
+#define NODE_MIN_SIZE (4*1024*1024)
+
extern void __init init_cpu_to_node(void);
extern void __cpuinit numa_set_node(int cpu, int node);
extern void __cpuinit numa_clear_node(int cpu);
diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h
index 0f915ae649a..6f1b7331313 100644
--- a/arch/x86/include/asm/page_32_types.h
+++ b/arch/x86/include/asm/page_32_types.h
@@ -54,10 +54,6 @@ extern unsigned int __VMALLOC_RESERVE;
extern int sysctl_legacy_va_layout;
extern void find_low_pfn_range(void);
-extern unsigned long init_memory_mapping(unsigned long start,
- unsigned long end);
-extern void initmem_init(unsigned long, unsigned long);
-extern void free_initmem(void);
extern void setup_bootmem_allocator(void);
#endif /* !__ASSEMBLY__ */
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index d38c91b7024..8d382d3abf3 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -32,22 +32,14 @@
*/
#define __PAGE_OFFSET _AC(0xffff880000000000, UL)
-#define __PHYSICAL_START CONFIG_PHYSICAL_START
-#define __KERNEL_ALIGN 0x200000
-
-/*
- * Make sure kernel is aligned to 2MB address. Catching it at compile
- * time is better. Change your config file and compile the kernel
- * for a 2MB aligned address (CONFIG_PHYSICAL_START)
- */
-#if (CONFIG_PHYSICAL_START % __KERNEL_ALIGN) != 0
-#error "CONFIG_PHYSICAL_START must be a multiple of 2MB"
-#endif
+#define __PHYSICAL_START ((CONFIG_PHYSICAL_START + \
+ (CONFIG_PHYSICAL_ALIGN - 1)) & \
+ ~(CONFIG_PHYSICAL_ALIGN - 1))
#define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START)
#define __START_KERNEL_map _AC(0xffffffff80000000, UL)
-/* See Documentation/x86_64/mm.txt for a description of the memory map. */
+/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
#define __PHYSICAL_MASK_SHIFT 46
#define __VIRTUAL_MASK_SHIFT 48
@@ -71,12 +63,6 @@ extern unsigned long __phys_addr(unsigned long);
#define vmemmap ((struct page *)VMEMMAP_START)
-extern unsigned long init_memory_mapping(unsigned long start,
- unsigned long end);
-
-extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn);
-extern void free_initmem(void);
-
extern void init_extra_mapping_uc(unsigned long phys, unsigned long size);
extern void init_extra_mapping_wb(unsigned long phys, unsigned long size);
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index 826ad37006a..6473f5ccff8 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -46,6 +46,12 @@ extern int devmem_is_allowed(unsigned long pagenr);
extern unsigned long max_low_pfn_mapped;
extern unsigned long max_pfn_mapped;
+extern unsigned long init_memory_mapping(unsigned long start,
+ unsigned long end);
+
+extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn);
+extern void free_initmem(void);
+
#endif /* !__ASSEMBLY__ */
#endif /* _ASM_X86_PAGE_DEFS_H */
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index a53da004e08..4fb37c8a083 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -56,6 +56,7 @@ struct desc_ptr;
struct tss_struct;
struct mm_struct;
struct desc_struct;
+struct task_struct;
/*
* Wrapper type for pointers to code which uses the non-standard
@@ -203,7 +204,8 @@ struct pv_cpu_ops {
void (*swapgs)(void);
- struct pv_lazy_ops lazy_mode;
+ void (*start_context_switch)(struct task_struct *prev);
+ void (*end_context_switch)(struct task_struct *next);
};
struct pv_irq_ops {
@@ -1399,25 +1401,23 @@ enum paravirt_lazy_mode {
};
enum paravirt_lazy_mode paravirt_get_lazy_mode(void);
-void paravirt_enter_lazy_cpu(void);
-void paravirt_leave_lazy_cpu(void);
+void paravirt_start_context_switch(struct task_struct *prev);
+void paravirt_end_context_switch(struct task_struct *next);
+
void paravirt_enter_lazy_mmu(void);
void paravirt_leave_lazy_mmu(void);
-void paravirt_leave_lazy(enum paravirt_lazy_mode mode);
-#define __HAVE_ARCH_ENTER_LAZY_CPU_MODE
-static inline void arch_enter_lazy_cpu_mode(void)
+#define __HAVE_ARCH_START_CONTEXT_SWITCH
+static inline void arch_start_context_switch(struct task_struct *prev)
{
- PVOP_VCALL0(pv_cpu_ops.lazy_mode.enter);
+ PVOP_VCALL1(pv_cpu_ops.start_context_switch, prev);
}
-static inline void arch_leave_lazy_cpu_mode(void)
+static inline void arch_end_context_switch(struct task_struct *next)
{
- PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave);
+ PVOP_VCALL1(pv_cpu_ops.end_context_switch, next);
}
-void arch_flush_lazy_cpu_mode(void);
-
#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE
static inline void arch_enter_lazy_mmu_mode(void)
{
diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h
new file mode 100644
index 00000000000..876ed97147b
--- /dev/null
+++ b/arch/x86/include/asm/perf_counter.h
@@ -0,0 +1,100 @@
+#ifndef _ASM_X86_PERF_COUNTER_H
+#define _ASM_X86_PERF_COUNTER_H
+
+/*
+ * Performance counter hw details:
+ */
+
+#define X86_PMC_MAX_GENERIC 8
+#define X86_PMC_MAX_FIXED 3
+
+#define X86_PMC_IDX_GENERIC 0
+#define X86_PMC_IDX_FIXED 32
+#define X86_PMC_IDX_MAX 64
+
+#define MSR_ARCH_PERFMON_PERFCTR0 0xc1
+#define MSR_ARCH_PERFMON_PERFCTR1 0xc2
+
+#define MSR_ARCH_PERFMON_EVENTSEL0 0x186
+#define MSR_ARCH_PERFMON_EVENTSEL1 0x187
+
+#define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22)
+#define ARCH_PERFMON_EVENTSEL_INT (1 << 20)
+#define ARCH_PERFMON_EVENTSEL_OS (1 << 17)
+#define ARCH_PERFMON_EVENTSEL_USR (1 << 16)
+
+/*
+ * Includes eventsel and unit mask as well:
+ */
+#define ARCH_PERFMON_EVENT_MASK 0xffff
+
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8)
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \
+ (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX))
+
+#define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6
+
+/*
+ * Intel "Architectural Performance Monitoring" CPUID
+ * detection/enumeration details:
+ */
+union cpuid10_eax {
+ struct {
+ unsigned int version_id:8;
+ unsigned int num_counters:8;
+ unsigned int bit_width:8;
+ unsigned int mask_length:8;
+ } split;
+ unsigned int full;
+};
+
+union cpuid10_edx {
+ struct {
+ unsigned int num_counters_fixed:4;
+ unsigned int reserved:28;
+ } split;
+ unsigned int full;
+};
+
+
+/*
+ * Fixed-purpose performance counters:
+ */
+
+/*
+ * All 3 fixed-mode PMCs are configured via this single MSR:
+ */
+#define MSR_ARCH_PERFMON_FIXED_CTR_CTRL 0x38d
+
+/*
+ * The counts are available in three separate MSRs:
+ */
+
+/* Instr_Retired.Any: */
+#define MSR_ARCH_PERFMON_FIXED_CTR0 0x309
+#define X86_PMC_IDX_FIXED_INSTRUCTIONS (X86_PMC_IDX_FIXED + 0)
+
+/* CPU_CLK_Unhalted.Core: */
+#define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a
+#define X86_PMC_IDX_FIXED_CPU_CYCLES (X86_PMC_IDX_FIXED + 1)
+
+/* CPU_CLK_Unhalted.Ref: */
+#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b
+#define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2)
+
+extern void set_perf_counter_pending(void);
+
+#define clear_perf_counter_pending() do { } while (0)
+#define test_perf_counter_pending() (0)
+
+#ifdef CONFIG_PERF_COUNTERS
+extern void init_hw_perf_counters(void);
+extern void perf_counters_lapic_init(void);
+#else
+static inline void init_hw_perf_counters(void) { }
+static inline void perf_counters_lapic_init(void) { }
+#endif
+
+#endif /* _ASM_X86_PERF_COUNTER_H */
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 29d96d168bc..18ef7ebf263 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -81,6 +81,8 @@ static inline void __init paravirt_pagetable_setup_done(pgd_t *base)
#define pte_val(x) native_pte_val(x)
#define __pte(x) native_make_pte(x)
+#define arch_end_context_switch(prev) do {} while(0)
+
#endif /* CONFIG_PARAVIRT */
/*
@@ -503,6 +505,8 @@ static inline int pgd_none(pgd_t pgd)
#ifndef __ASSEMBLY__
+extern int direct_gbpages;
+
/* local pte updates need not use xchg for locking */
static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep)
{
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 6b87bc6d501..abde308fdb0 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -25,10 +25,6 @@ extern pgd_t init_level4_pgt[];
extern void paging_init(void);
-#endif /* !__ASSEMBLY__ */
-
-#ifndef __ASSEMBLY__
-
#define pte_ERROR(e) \
printk("%s:%d: bad pte %p(%016lx).\n", \
__FILE__, __LINE__, &(e), pte_val(e))
@@ -135,8 +131,6 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
#define update_mmu_cache(vma, address, pte) do { } while (0)
-extern int direct_gbpages;
-
/* Encode and de-code a swap entry */
#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
#define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1)
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index fbf42b8e038..766ea16fbbb 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -51,11 +51,11 @@ typedef struct { pteval_t pte; } pte_t;
#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE - 1))
-
+/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
-#define VMALLOC_START _AC(0xffffc20000000000, UL)
-#define VMALLOC_END _AC(0xffffe1ffffffffff, UL)
-#define VMEMMAP_START _AC(0xffffe20000000000, UL)
+#define VMALLOC_START _AC(0xffffc90000000000, UL)
+#define VMALLOC_END _AC(0xffffe8ffffffffff, UL)
+#define VMEMMAP_START _AC(0xffffea0000000000, UL)
#define MODULES_VADDR _AC(0xffffffffa0000000, UL)
#define MODULES_END _AC(0xffffffffff000000, UL)
#define MODULES_LEN (MODULES_END - MODULES_VADDR)
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index b8238dc8786..4d258ad76a0 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -273,7 +273,6 @@ typedef struct page *pgtable_t;
extern pteval_t __supported_pte_mask;
extern int nx_enabled;
-extern void set_nx(void);
#define pgprot_writecombine pgprot_writecombine
extern pgprot_t pgprot_writecombine(pgprot_t prot);
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index c2cceae709c..c7768269b1c 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -135,7 +135,8 @@ extern struct cpuinfo_x86 boot_cpu_data;
extern struct cpuinfo_x86 new_cpu_data;
extern struct tss_struct doublefault_tss;
-extern __u32 cleared_cpu_caps[NCAPINTS];
+extern __u32 cpu_caps_cleared[NCAPINTS];
+extern __u32 cpu_caps_set[NCAPINTS];
#ifdef CONFIG_SMP
DECLARE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
@@ -409,9 +410,6 @@ DECLARE_PER_CPU(unsigned long, stack_canary);
extern unsigned int xstate_size;
extern void free_thread_xstate(struct task_struct *);
extern struct kmem_cache *task_xstate_cachep;
-extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
-extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
-extern unsigned short num_cache_leaves;
struct thread_struct {
/* Cached TLS descriptors: */
@@ -427,8 +425,12 @@ struct thread_struct {
unsigned short fsindex;
unsigned short gsindex;
#endif
+#ifdef CONFIG_X86_32
unsigned long ip;
+#endif
+#ifdef CONFIG_X86_64
unsigned long fs;
+#endif
unsigned long gs;
/* Hardware debugging registers: */
unsigned long debugreg0;
@@ -460,14 +462,8 @@ struct thread_struct {
unsigned io_bitmap_max;
/* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */
unsigned long debugctlmsr;
-#ifdef CONFIG_X86_DS
-/* Debug Store context; see include/asm-x86/ds.h; goes into MSR_IA32_DS_AREA */
+ /* Debug Store context; see asm/ds.h */
struct ds_context *ds_ctx;
-#endif /* CONFIG_X86_DS */
-#ifdef CONFIG_X86_PTRACE_BTS
-/* the signal to send on a bts buffer overflow */
- unsigned int bts_ovfl_signal;
-#endif /* CONFIG_X86_PTRACE_BTS */
};
static inline unsigned long native_get_debugreg(int regno)
@@ -795,6 +791,21 @@ static inline unsigned long get_debugctlmsr(void)
return debugctlmsr;
}
+static inline unsigned long get_debugctlmsr_on_cpu(int cpu)
+{
+ u64 debugctlmsr = 0;
+ u32 val1, val2;
+
+#ifndef CONFIG_X86_DEBUGCTLMSR
+ if (boot_cpu_data.x86 < 6)
+ return 0;
+#endif
+ rdmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR, &val1, &val2);
+ debugctlmsr = val1 | ((u64)val2 << 32);
+
+ return debugctlmsr;
+}
+
static inline void update_debugctlmsr(unsigned long debugctlmsr)
{
#ifndef CONFIG_X86_DEBUGCTLMSR
@@ -804,6 +815,18 @@ static inline void update_debugctlmsr(unsigned long debugctlmsr)
wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
}
+static inline void update_debugctlmsr_on_cpu(int cpu,
+ unsigned long debugctlmsr)
+{
+#ifndef CONFIG_X86_DEBUGCTLMSR
+ if (boot_cpu_data.x86 < 6)
+ return;
+#endif
+ wrmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR,
+ (u32)((u64)debugctlmsr),
+ (u32)((u64)debugctlmsr >> 32));
+}
+
/*
* from system description table in BIOS. Mostly for MCA use, but
* others may find it useful:
@@ -814,6 +837,7 @@ extern unsigned int BIOS_revision;
/* Boot loader type from the setup header: */
extern int bootloader_type;
+extern int bootloader_version;
extern char ignore_fpu_irq;
@@ -874,7 +898,6 @@ static inline void spin_lock_prefetch(const void *x)
.vm86_info = NULL, \
.sysenter_cs = __KERNEL_CS, \
.io_bitmap_ptr = NULL, \
- .fs = __KERNEL_PERCPU, \
}
/*
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 624f133943e..0f0d908349a 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -236,12 +236,11 @@ extern int do_get_thread_area(struct task_struct *p, int idx,
extern int do_set_thread_area(struct task_struct *p, int idx,
struct user_desc __user *info, int can_allocate);
-extern void x86_ptrace_untrace(struct task_struct *);
-extern void x86_ptrace_fork(struct task_struct *child,
- unsigned long clone_flags);
+#ifdef CONFIG_X86_PTRACE_BTS
+extern void ptrace_bts_untrace(struct task_struct *tsk);
-#define arch_ptrace_untrace(tsk) x86_ptrace_untrace(tsk)
-#define arch_ptrace_fork(child, flags) x86_ptrace_fork(child, flags)
+#define arch_ptrace_untrace(tsk) ptrace_bts_untrace(tsk)
+#endif /* CONFIG_X86_PTRACE_BTS */
#endif /* __KERNEL__ */
diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
index a4737dddfd5..64cf2d24fad 100644
--- a/arch/x86/include/asm/required-features.h
+++ b/arch/x86/include/asm/required-features.h
@@ -48,9 +48,15 @@
#endif
#ifdef CONFIG_X86_64
+#ifdef CONFIG_PARAVIRT
+/* Paravirtualized systems may not have PSE or PGE available */
#define NEED_PSE 0
-#define NEED_MSR (1<<(X86_FEATURE_MSR & 31))
#define NEED_PGE 0
+#else
+#define NEED_PSE (1<<(X86_FEATURE_PSE) & 31)
+#define NEED_PGE (1<<(X86_FEATURE_PGE) & 31)
+#endif
+#define NEED_MSR (1<<(X86_FEATURE_MSR & 31))
#define NEED_FXSR (1<<(X86_FEATURE_FXSR & 31))
#define NEED_XMM (1<<(X86_FEATURE_XMM & 31))
#define NEED_XMM2 (1<<(X86_FEATURE_XMM2 & 31))
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index bdc2ada05ae..4093d1ed6db 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -33,7 +33,6 @@ struct x86_quirks {
int (*setup_ioapic_ids)(void);
};
-extern void x86_quirk_pre_intr_init(void);
extern void x86_quirk_intr_init(void);
extern void x86_quirk_trap_init(void);
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 19e0d88b966..6a84ed166ae 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -180,7 +180,7 @@ extern int safe_smp_processor_id(void);
static inline int logical_smp_processor_id(void)
{
/* we don't want to mark this access volatile - bad code generation */
- return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR));
+ return GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
}
#endif
diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h
index e3cc3c063ec..4517d6b9318 100644
--- a/arch/x86/include/asm/sparsemem.h
+++ b/arch/x86/include/asm/sparsemem.h
@@ -27,7 +27,7 @@
#else /* CONFIG_X86_32 */
# define SECTION_SIZE_BITS 27 /* matt - 128 is convenient right now */
# define MAX_PHYSADDR_BITS 44
-# define MAX_PHYSMEM_BITS 44 /* Can be max 45 bits */
+# define MAX_PHYSMEM_BITS 46
#endif
#endif /* CONFIG_SPARSEMEM */
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index 7043408f690..372b76edd63 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -1,7 +1,7 @@
/*
* syscalls.h - Linux syscall interfaces (arch-specific)
*
- * Copyright (c) 2008 Jaswinder Singh
+ * Copyright (c) 2008 Jaswinder Singh Rajput
*
* This file is released under the GPLv2.
* See the file COPYING for more details.
@@ -12,50 +12,55 @@
#include <linux/compiler.h>
#include <linux/linkage.h>
-#include <linux/types.h>
#include <linux/signal.h>
+#include <linux/types.h>
/* Common in X86_32 and X86_64 */
/* kernel/ioport.c */
asmlinkage long sys_ioperm(unsigned long, unsigned long, int);
+/* kernel/process.c */
+int sys_fork(struct pt_regs *);
+int sys_vfork(struct pt_regs *);
+
/* kernel/ldt.c */
asmlinkage int sys_modify_ldt(int, void __user *, unsigned long);
+/* kernel/signal.c */
+long sys_rt_sigreturn(struct pt_regs *);
+
/* kernel/tls.c */
asmlinkage int sys_set_thread_area(struct user_desc __user *);
asmlinkage int sys_get_thread_area(struct user_desc __user *);
/* X86_32 only */
#ifdef CONFIG_X86_32
+/* kernel/ioport.c */
+long sys_iopl(struct pt_regs *);
+
/* kernel/process_32.c */
-int sys_fork(struct pt_regs *);
int sys_clone(struct pt_regs *);
-int sys_vfork(struct pt_regs *);
int sys_execve(struct pt_regs *);
-/* kernel/signal_32.c */
+/* kernel/signal.c */
asmlinkage int sys_sigsuspend(int, int, old_sigset_t);
asmlinkage int sys_sigaction(int, const struct old_sigaction __user *,
struct old_sigaction __user *);
int sys_sigaltstack(struct pt_regs *);
unsigned long sys_sigreturn(struct pt_regs *);
-long sys_rt_sigreturn(struct pt_regs *);
-
-/* kernel/ioport.c */
-long sys_iopl(struct pt_regs *);
/* kernel/sys_i386_32.c */
+struct mmap_arg_struct;
+struct sel_arg_struct;
+struct oldold_utsname;
+struct old_utsname;
+
asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long,
unsigned long, unsigned long, unsigned long);
-struct mmap_arg_struct;
asmlinkage int old_mmap(struct mmap_arg_struct __user *);
-struct sel_arg_struct;
asmlinkage int old_select(struct sel_arg_struct __user *);
asmlinkage int sys_ipc(uint, int, int, int, void __user *, long);
-struct old_utsname;
asmlinkage int sys_uname(struct old_utsname __user *);
-struct oldold_utsname;
asmlinkage int sys_olduname(struct oldold_utsname __user *);
/* kernel/vm86_32.c */
@@ -65,29 +70,27 @@ int sys_vm86(struct pt_regs *);
#else /* CONFIG_X86_32 */
/* X86_64 only */
+/* kernel/ioport.c */
+asmlinkage long sys_iopl(unsigned int, struct pt_regs *);
+
/* kernel/process_64.c */
-asmlinkage long sys_fork(struct pt_regs *);
asmlinkage long sys_clone(unsigned long, unsigned long,
void __user *, void __user *,
struct pt_regs *);
-asmlinkage long sys_vfork(struct pt_regs *);
asmlinkage long sys_execve(char __user *, char __user * __user *,
char __user * __user *,
struct pt_regs *);
long sys_arch_prctl(int, unsigned long);
-/* kernel/ioport.c */
-asmlinkage long sys_iopl(unsigned int, struct pt_regs *);
-
-/* kernel/signal_64.c */
+/* kernel/signal.c */
asmlinkage long sys_sigaltstack(const stack_t __user *, stack_t __user *,
struct pt_regs *);
-long sys_rt_sigreturn(struct pt_regs *);
/* kernel/sys_x86_64.c */
+struct new_utsname;
+
asmlinkage long sys_mmap(unsigned long, unsigned long, unsigned long,
unsigned long, unsigned long, unsigned long);
-struct new_utsname;
asmlinkage long sys_uname(struct new_utsname __user *);
#endif /* CONFIG_X86_32 */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 8820a73ae09..602c769fc98 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -94,7 +94,8 @@ struct thread_info {
#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */
#define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */
#define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */
-#define TIF_SYSCALL_FTRACE 27 /* for ftrace syscall instrumentation */
+#define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */
+#define TIF_SYSCALL_FTRACE 28 /* for ftrace syscall instrumentation */
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
@@ -116,6 +117,7 @@ struct thread_info {
#define _TIF_FORCED_TF (1 << TIF_FORCED_TF)
#define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR)
#define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR)
+#define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES)
#define _TIF_SYSCALL_FTRACE (1 << TIF_SYSCALL_FTRACE)
/* work to do in syscall_trace_enter() */
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 16a5c84b032..a5ecc9c33e9 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -17,7 +17,7 @@
static inline void __native_flush_tlb(void)
{
- write_cr3(read_cr3());
+ native_write_cr3(native_read_cr3());
}
static inline void __native_flush_tlb_global(void)
@@ -32,11 +32,11 @@ static inline void __native_flush_tlb_global(void)
*/
raw_local_irq_save(flags);
- cr4 = read_cr4();
+ cr4 = native_read_cr4();
/* clear PGE */
- write_cr4(cr4 & ~X86_CR4_PGE);
+ native_write_cr4(cr4 & ~X86_CR4_PGE);
/* write old PGE again and flush TLBs */
- write_cr4(cr4);
+ native_write_cr4(cr4);
raw_local_irq_restore(flags);
}
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index f44b49abca4..066ef590d7e 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -203,7 +203,8 @@ struct pci_bus;
void x86_pci_root_bus_res_quirks(struct pci_bus *b);
#ifdef CONFIG_SMP
-#define mc_capable() (cpumask_weight(cpu_core_mask(0)) != nr_cpu_ids)
+#define mc_capable() ((boot_cpu_data.x86_max_cores > 1) && \
+ (cpumask_weight(cpu_core_mask(0)) != nr_cpu_ids))
#define smt_capable() (smp_num_siblings > 1)
#endif
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 0d5342515b8..bfd74c032fc 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -2,6 +2,7 @@
#define _ASM_X86_TRAPS_H
#include <asm/debugreg.h>
+#include <asm/siginfo.h> /* TRAP_TRACE, ... */
#ifdef CONFIG_X86_32
#define dotraplinkage
@@ -13,6 +14,9 @@ asmlinkage void divide_error(void);
asmlinkage void debug(void);
asmlinkage void nmi(void);
asmlinkage void int3(void);
+asmlinkage void xen_debug(void);
+asmlinkage void xen_int3(void);
+asmlinkage void xen_stack_segment(void);
asmlinkage void overflow(void);
asmlinkage void bounds(void);
asmlinkage void invalid_op(void);
@@ -74,7 +78,6 @@ static inline int get_si_code(unsigned long condition)
}
extern int panic_on_unrecovered_nmi;
-extern int kstack_depth_to_print;
void math_error(void __user *);
void math_emulate(struct math_emu_info *);
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index 6e72d74cf8d..732a3070615 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -340,6 +340,8 @@
#define __NR_inotify_init1 332
#define __NR_preadv 333
#define __NR_pwritev 334
+#define __NR_rt_tgsigqueueinfo 335
+#define __NR_perf_counter_open 336
#ifdef __KERNEL__
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index f8182946232..900e1617e67 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -657,7 +657,10 @@ __SYSCALL(__NR_inotify_init1, sys_inotify_init1)
__SYSCALL(__NR_preadv, sys_preadv)
#define __NR_pwritev 296
__SYSCALL(__NR_pwritev, sys_pwritev)
-
+#define __NR_rt_tgsigqueueinfo 297
+__SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo)
+#define __NR_perf_counter_open 298
+__SYSCALL(__NR_perf_counter_open, sys_perf_counter_open)
#ifndef __NO_STUBS
#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index 9b0e61bf7a8..bddd44f2f0a 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -37,7 +37,7 @@
#define UV_CPUS_PER_ACT_STATUS 32
#define UV_ACT_STATUS_MASK 0x3
#define UV_ACT_STATUS_SIZE 2
-#define UV_ACTIVATION_DESCRIPTOR_SIZE 32
+#define UV_ADP_SIZE 32
#define UV_DISTRIBUTION_SIZE 256
#define UV_SW_ACK_NPENDING 8
#define UV_NET_ENDPOINT_INTD 0x38
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index d3a98ea1062..341070f7ad5 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -133,6 +133,7 @@ struct uv_scir_s {
struct uv_hub_info_s {
unsigned long global_mmr_base;
unsigned long gpa_mask;
+ unsigned int gnode_extra;
unsigned long gnode_upper;
unsigned long lowmem_remap_top;
unsigned long lowmem_remap_base;
@@ -159,7 +160,8 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
* p - PNODE (local part of nsids, right shifted 1)
*/
#define UV_NASID_TO_PNODE(n) (((n) >> 1) & uv_hub_info->pnode_mask)
-#define UV_PNODE_TO_NASID(p) (((p) << 1) | uv_hub_info->gnode_upper)
+#define UV_PNODE_TO_GNODE(p) ((p) |uv_hub_info->gnode_extra)
+#define UV_PNODE_TO_NASID(p) (UV_PNODE_TO_GNODE(p) << 1)
#define UV_LOCAL_MMR_BASE 0xf4000000UL
#define UV_GLOBAL_MMR32_BASE 0xf8000000UL
@@ -173,7 +175,7 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
#define UV_GLOBAL_MMR32_PNODE_BITS(p) ((p) << (UV_GLOBAL_MMR32_PNODE_SHIFT))
#define UV_GLOBAL_MMR64_PNODE_BITS(p) \
- ((unsigned long)(p) << UV_GLOBAL_MMR64_PNODE_SHIFT)
+ ((unsigned long)(UV_PNODE_TO_GNODE(p)) << UV_GLOBAL_MMR64_PNODE_SHIFT)
#define UV_APIC_PNODE_SHIFT 6
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 88d1bfc847d..4f78bd68212 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -28,7 +28,7 @@ CFLAGS_paravirt.o := $(nostackp)
obj-y := process_$(BITS).o signal.o entry_$(BITS).o
obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o
-obj-y += setup.o i8259.o irqinit_$(BITS).o
+obj-y += setup.o i8259.o irqinit.o
obj-$(CONFIG_X86_VISWS) += visws_quirks.o
obj-$(CONFIG_X86_32) += probe_roms_32.o
obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
@@ -44,6 +44,7 @@ obj-y += process.o
obj-y += i387.o xsave.o
obj-y += ptrace.o
obj-$(CONFIG_X86_DS) += ds.o
+obj-$(CONFIG_X86_DS_SELFTEST) += ds_selftest.o
obj-$(CONFIG_X86_32) += tls.o
obj-$(CONFIG_IA32_EMULATION) += tls.o
obj-y += step.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 723989d7f80..631086159c5 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -33,6 +33,7 @@
#include <linux/irq.h>
#include <linux/bootmem.h>
#include <linux/ioport.h>
+#include <linux/pci.h>
#include <asm/pgtable.h>
#include <asm/io_apic.h>
@@ -522,7 +523,7 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irq)
* success: return IRQ number (>=0)
* failure: return < 0
*/
-int acpi_register_gsi(u32 gsi, int triggering, int polarity)
+int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity)
{
unsigned int irq;
unsigned int plat_gsi = gsi;
@@ -532,14 +533,14 @@ int acpi_register_gsi(u32 gsi, int triggering, int polarity)
* Make sure all (legacy) PCI IRQs are set as level-triggered.
*/
if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) {
- if (triggering == ACPI_LEVEL_SENSITIVE)
+ if (trigger == ACPI_LEVEL_SENSITIVE)
eisa_set_level_irq(gsi);
}
#endif
#ifdef CONFIG_X86_IO_APIC
if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) {
- plat_gsi = mp_register_gsi(gsi, triggering, polarity);
+ plat_gsi = mp_register_gsi(dev, gsi, trigger, polarity);
}
#endif
acpi_gsi_to_irq(plat_gsi, &irq);
@@ -903,10 +904,8 @@ extern int es7000_plat;
#endif
static struct {
- int apic_id;
int gsi_base;
int gsi_end;
- DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
} mp_ioapic_routing[MAX_IO_APICS];
int mp_find_ioapic(int gsi)
@@ -986,16 +985,12 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
mp_ioapics[idx].apicid = uniq_ioapic_id(id);
-#ifdef CONFIG_X86_32
mp_ioapics[idx].apicver = io_apic_get_version(idx);
-#else
- mp_ioapics[idx].apicver = 0;
-#endif
+
/*
* Build basic GSI lookup table to facilitate gsi->io_apic lookups
* and to prevent reprogramming of IOAPIC pins (PCI GSIs).
*/
- mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].apicid;
mp_ioapic_routing[idx].gsi_base = gsi_base;
mp_ioapic_routing[idx].gsi_end = gsi_base +
io_apic_get_redir_entries(idx);
@@ -1158,26 +1153,52 @@ void __init mp_config_acpi_legacy_irqs(void)
}
}
-int mp_register_gsi(u32 gsi, int triggering, int polarity)
+static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger,
+ int polarity)
{
+#ifdef CONFIG_X86_MPPARSE
+ struct mpc_intsrc mp_irq;
+ struct pci_dev *pdev;
+ unsigned char number;
+ unsigned int devfn;
int ioapic;
- int ioapic_pin;
-#ifdef CONFIG_X86_32
-#define MAX_GSI_NUM 4096
-#define IRQ_COMPRESSION_START 64
+ u8 pin;
- static int pci_irq = IRQ_COMPRESSION_START;
- /*
- * Mapping between Global System Interrupts, which
- * represent all possible interrupts, and IRQs
- * assigned to actual devices.
- */
- static int gsi_to_irq[MAX_GSI_NUM];
-#else
+ if (!acpi_ioapic)
+ return 0;
+ if (!dev)
+ return 0;
+ if (dev->bus != &pci_bus_type)
+ return 0;
+
+ pdev = to_pci_dev(dev);
+ number = pdev->bus->number;
+ devfn = pdev->devfn;
+ pin = pdev->pin;
+ /* print the entry should happen on mptable identically */
+ mp_irq.type = MP_INTSRC;
+ mp_irq.irqtype = mp_INT;
+ mp_irq.irqflag = (trigger == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) |
+ (polarity == ACPI_ACTIVE_HIGH ? 1 : 3);
+ mp_irq.srcbus = number;
+ mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3);
+ ioapic = mp_find_ioapic(gsi);
+ mp_irq.dstapic = mp_ioapics[ioapic].apicid;
+ mp_irq.dstirq = mp_find_ioapic_pin(ioapic, gsi);
+
+ save_mp_irq(&mp_irq);
+#endif
+ return 0;
+}
+
+int mp_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity)
+{
+ int ioapic;
+ int ioapic_pin;
+ struct io_apic_irq_attr irq_attr;
if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
return gsi;
-#endif
/* Don't set up the ACPI SCI because it's already set up */
if (acpi_gbl_FADT.sci_interrupt == gsi)
@@ -1196,93 +1217,22 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity)
gsi = ioapic_renumber_irq(ioapic, gsi);
#endif
- /*
- * Avoid pin reprogramming. PRTs typically include entries
- * with redundant pin->gsi mappings (but unique PCI devices);
- * we only program the IOAPIC on the first.
- */
if (ioapic_pin > MP_MAX_IOAPIC_PIN) {
printk(KERN_ERR "Invalid reference to IOAPIC pin "
- "%d-%d\n", mp_ioapic_routing[ioapic].apic_id,
+ "%d-%d\n", mp_ioapics[ioapic].apicid,
ioapic_pin);
return gsi;
}
- if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) {
- pr_debug("Pin %d-%d already programmed\n",
- mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
-#ifdef CONFIG_X86_32
- return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]);
-#else
- return gsi;
-#endif
- }
-
- set_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed);
-#ifdef CONFIG_X86_32
- /*
- * For GSI >= 64, use IRQ compression
- */
- if ((gsi >= IRQ_COMPRESSION_START)
- && (triggering == ACPI_LEVEL_SENSITIVE)) {
- /*
- * For PCI devices assign IRQs in order, avoiding gaps
- * due to unused I/O APIC pins.
- */
- int irq = gsi;
- if (gsi < MAX_GSI_NUM) {
- /*
- * Retain the VIA chipset work-around (gsi > 15), but
- * avoid a problem where the 8254 timer (IRQ0) is setup
- * via an override (so it's not on pin 0 of the ioapic),
- * and at the same time, the pin 0 interrupt is a PCI
- * type. The gsi > 15 test could cause these two pins
- * to be shared as IRQ0, and they are not shareable.
- * So test for this condition, and if necessary, avoid
- * the pin collision.
- */
- gsi = pci_irq++;
- /*
- * Don't assign IRQ used by ACPI SCI
- */
- if (gsi == acpi_gbl_FADT.sci_interrupt)
- gsi = pci_irq++;
- gsi_to_irq[irq] = gsi;
- } else {
- printk(KERN_ERR "GSI %u is too high\n", gsi);
- return gsi;
- }
- }
-#endif
- io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
- triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
- polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
- return gsi;
-}
-int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
- u32 gsi, int triggering, int polarity)
-{
-#ifdef CONFIG_X86_MPPARSE
- struct mpc_intsrc mp_irq;
- int ioapic;
+ if (enable_update_mptable)
+ mp_config_acpi_gsi(dev, gsi, trigger, polarity);
- if (!acpi_ioapic)
- return 0;
+ set_io_apic_irq_attr(&irq_attr, ioapic, ioapic_pin,
+ trigger == ACPI_EDGE_SENSITIVE ? 0 : 1,
+ polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
+ io_apic_set_pci_routing(dev, gsi, &irq_attr);
- /* print the entry should happen on mptable identically */
- mp_irq.type = MP_INTSRC;
- mp_irq.irqtype = mp_INT;
- mp_irq.irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) |
- (polarity == ACPI_ACTIVE_HIGH ? 1 : 3);
- mp_irq.srcbus = number;
- mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3);
- ioapic = mp_find_ioapic(gsi);
- mp_irq.dstapic = mp_ioapic_routing[ioapic].apic_id;
- mp_irq.dstirq = mp_find_ioapic_pin(ioapic, gsi);
-
- save_mp_irq(&mp_irq);
-#endif
- return 0;
+ return gsi;
}
/*
diff --git a/arch/x86/kernel/acpi/realmode/Makefile b/arch/x86/kernel/acpi/realmode/Makefile
index 1c31cc0e9de..167bc16ce0e 100644
--- a/arch/x86/kernel/acpi/realmode/Makefile
+++ b/arch/x86/kernel/acpi/realmode/Makefile
@@ -9,7 +9,7 @@
always := wakeup.bin
targets := wakeup.elf wakeup.lds
-wakeup-y += wakeup.o wakemain.o video-mode.o copy.o
+wakeup-y += wakeup.o wakemain.o video-mode.o copy.o bioscall.o regs.o
# The link order of the video-*.o modules can matter. In particular,
# video-vga.o *must* be listed first, followed by video-vesa.o.
diff --git a/arch/x86/kernel/acpi/realmode/bioscall.S b/arch/x86/kernel/acpi/realmode/bioscall.S
new file mode 100644
index 00000000000..f51eb0bb56c
--- /dev/null
+++ b/arch/x86/kernel/acpi/realmode/bioscall.S
@@ -0,0 +1 @@
+#include "../../../boot/bioscall.S"
diff --git a/arch/x86/kernel/acpi/realmode/regs.c b/arch/x86/kernel/acpi/realmode/regs.c
new file mode 100644
index 00000000000..6206033ba20
--- /dev/null
+++ b/arch/x86/kernel/acpi/realmode/regs.c
@@ -0,0 +1 @@
+#include "../../../boot/regs.c"
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index a97db99dad5..1c60554537c 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -55,7 +55,16 @@ struct iommu_cmd {
static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
struct unity_map_entry *e);
static struct dma_ops_domain *find_protection_domain(u16 devid);
+static u64* alloc_pte(struct protection_domain *dom,
+ unsigned long address, u64
+ **pte_page, gfp_t gfp);
+static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
+ unsigned long start_page,
+ unsigned int pages);
+#ifndef BUS_NOTIFY_UNBOUND_DRIVER
+#define BUS_NOTIFY_UNBOUND_DRIVER 0x0005
+#endif
#ifdef CONFIG_AMD_IOMMU_STATS
@@ -213,7 +222,7 @@ irqreturn_t amd_iommu_int_handler(int irq, void *data)
{
struct amd_iommu *iommu;
- list_for_each_entry(iommu, &amd_iommu_list, list)
+ for_each_iommu(iommu)
iommu_poll_events(iommu);
return IRQ_HANDLED;
@@ -440,7 +449,7 @@ static void iommu_flush_domain(u16 domid)
__iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
domid, 1, 1);
- list_for_each_entry(iommu, &amd_iommu_list, list) {
+ for_each_iommu(iommu) {
spin_lock_irqsave(&iommu->lock, flags);
__iommu_queue_command(iommu, &cmd);
__iommu_completion_wait(iommu);
@@ -449,6 +458,35 @@ static void iommu_flush_domain(u16 domid)
}
}
+void amd_iommu_flush_all_domains(void)
+{
+ int i;
+
+ for (i = 1; i < MAX_DOMAIN_ID; ++i) {
+ if (!test_bit(i, amd_iommu_pd_alloc_bitmap))
+ continue;
+ iommu_flush_domain(i);
+ }
+}
+
+void amd_iommu_flush_all_devices(void)
+{
+ struct amd_iommu *iommu;
+ int i;
+
+ for (i = 0; i <= amd_iommu_last_bdf; ++i) {
+ if (amd_iommu_pd_table[i] == NULL)
+ continue;
+
+ iommu = amd_iommu_rlookup_table[i];
+ if (!iommu)
+ continue;
+
+ iommu_queue_inv_dev_entry(iommu, i);
+ iommu_completion_wait(iommu);
+ }
+}
+
/****************************************************************************
*
* The functions below are used the create the page table mappings for
@@ -468,7 +506,7 @@ static int iommu_map_page(struct protection_domain *dom,
unsigned long phys_addr,
int prot)
{
- u64 __pte, *pte, *page;
+ u64 __pte, *pte;
bus_addr = PAGE_ALIGN(bus_addr);
phys_addr = PAGE_ALIGN(phys_addr);
@@ -477,27 +515,7 @@ static int iommu_map_page(struct protection_domain *dom,
if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK))
return -EINVAL;
- pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)];
-
- if (!IOMMU_PTE_PRESENT(*pte)) {
- page = (u64 *)get_zeroed_page(GFP_KERNEL);
- if (!page)
- return -ENOMEM;
- *pte = IOMMU_L2_PDE(virt_to_phys(page));
- }
-
- pte = IOMMU_PTE_PAGE(*pte);
- pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
-
- if (!IOMMU_PTE_PRESENT(*pte)) {
- page = (u64 *)get_zeroed_page(GFP_KERNEL);
- if (!page)
- return -ENOMEM;
- *pte = IOMMU_L1_PDE(virt_to_phys(page));
- }
-
- pte = IOMMU_PTE_PAGE(*pte);
- pte = &pte[IOMMU_PTE_L0_INDEX(bus_addr)];
+ pte = alloc_pte(dom, bus_addr, NULL, GFP_KERNEL);
if (IOMMU_PTE_PRESENT(*pte))
return -EBUSY;
@@ -595,7 +613,8 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
* as allocated in the aperture
*/
if (addr < dma_dom->aperture_size)
- __set_bit(addr >> PAGE_SHIFT, dma_dom->bitmap);
+ __set_bit(addr >> PAGE_SHIFT,
+ dma_dom->aperture[0]->bitmap);
}
return 0;
@@ -632,42 +651,191 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
****************************************************************************/
/*
- * The address allocator core function.
+ * The address allocator core functions.
*
* called with domain->lock held
*/
+
+/*
+ * This function checks if there is a PTE for a given dma address. If
+ * there is one, it returns the pointer to it.
+ */
+static u64* fetch_pte(struct protection_domain *domain,
+ unsigned long address)
+{
+ u64 *pte;
+
+ pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(address)];
+
+ if (!IOMMU_PTE_PRESENT(*pte))
+ return NULL;
+
+ pte = IOMMU_PTE_PAGE(*pte);
+ pte = &pte[IOMMU_PTE_L1_INDEX(address)];
+
+ if (!IOMMU_PTE_PRESENT(*pte))
+ return NULL;
+
+ pte = IOMMU_PTE_PAGE(*pte);
+ pte = &pte[IOMMU_PTE_L0_INDEX(address)];
+
+ return pte;
+}
+
+/*
+ * This function is used to add a new aperture range to an existing
+ * aperture in case of dma_ops domain allocation or address allocation
+ * failure.
+ */
+static int alloc_new_range(struct amd_iommu *iommu,
+ struct dma_ops_domain *dma_dom,
+ bool populate, gfp_t gfp)
+{
+ int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT;
+ int i;
+
+#ifdef CONFIG_IOMMU_STRESS
+ populate = false;
+#endif
+
+ if (index >= APERTURE_MAX_RANGES)
+ return -ENOMEM;
+
+ dma_dom->aperture[index] = kzalloc(sizeof(struct aperture_range), gfp);
+ if (!dma_dom->aperture[index])
+ return -ENOMEM;
+
+ dma_dom->aperture[index]->bitmap = (void *)get_zeroed_page(gfp);
+ if (!dma_dom->aperture[index]->bitmap)
+ goto out_free;
+
+ dma_dom->aperture[index]->offset = dma_dom->aperture_size;
+
+ if (populate) {
+ unsigned long address = dma_dom->aperture_size;
+ int i, num_ptes = APERTURE_RANGE_PAGES / 512;
+ u64 *pte, *pte_page;
+
+ for (i = 0; i < num_ptes; ++i) {
+ pte = alloc_pte(&dma_dom->domain, address,
+ &pte_page, gfp);
+ if (!pte)
+ goto out_free;
+
+ dma_dom->aperture[index]->pte_pages[i] = pte_page;
+
+ address += APERTURE_RANGE_SIZE / 64;
+ }
+ }
+
+ dma_dom->aperture_size += APERTURE_RANGE_SIZE;
+
+ /* Intialize the exclusion range if necessary */
+ if (iommu->exclusion_start &&
+ iommu->exclusion_start >= dma_dom->aperture[index]->offset &&
+ iommu->exclusion_start < dma_dom->aperture_size) {
+ unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT;
+ int pages = iommu_num_pages(iommu->exclusion_start,
+ iommu->exclusion_length,
+ PAGE_SIZE);
+ dma_ops_reserve_addresses(dma_dom, startpage, pages);
+ }
+
+ /*
+ * Check for areas already mapped as present in the new aperture
+ * range and mark those pages as reserved in the allocator. Such
+ * mappings may already exist as a result of requested unity
+ * mappings for devices.
+ */
+ for (i = dma_dom->aperture[index]->offset;
+ i < dma_dom->aperture_size;
+ i += PAGE_SIZE) {
+ u64 *pte = fetch_pte(&dma_dom->domain, i);
+ if (!pte || !IOMMU_PTE_PRESENT(*pte))
+ continue;
+
+ dma_ops_reserve_addresses(dma_dom, i << PAGE_SHIFT, 1);
+ }
+
+ return 0;
+
+out_free:
+ free_page((unsigned long)dma_dom->aperture[index]->bitmap);
+
+ kfree(dma_dom->aperture[index]);
+ dma_dom->aperture[index] = NULL;
+
+ return -ENOMEM;
+}
+
+static unsigned long dma_ops_area_alloc(struct device *dev,
+ struct dma_ops_domain *dom,
+ unsigned int pages,
+ unsigned long align_mask,
+ u64 dma_mask,
+ unsigned long start)
+{
+ unsigned long next_bit = dom->next_address % APERTURE_RANGE_SIZE;
+ int max_index = dom->aperture_size >> APERTURE_RANGE_SHIFT;
+ int i = start >> APERTURE_RANGE_SHIFT;
+ unsigned long boundary_size;
+ unsigned long address = -1;
+ unsigned long limit;
+
+ next_bit >>= PAGE_SHIFT;
+
+ boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
+ PAGE_SIZE) >> PAGE_SHIFT;
+
+ for (;i < max_index; ++i) {
+ unsigned long offset = dom->aperture[i]->offset >> PAGE_SHIFT;
+
+ if (dom->aperture[i]->offset >= dma_mask)
+ break;
+
+ limit = iommu_device_max_index(APERTURE_RANGE_PAGES, offset,
+ dma_mask >> PAGE_SHIFT);
+
+ address = iommu_area_alloc(dom->aperture[i]->bitmap,
+ limit, next_bit, pages, 0,
+ boundary_size, align_mask);
+ if (address != -1) {
+ address = dom->aperture[i]->offset +
+ (address << PAGE_SHIFT);
+ dom->next_address = address + (pages << PAGE_SHIFT);
+ break;
+ }
+
+ next_bit = 0;
+ }
+
+ return address;
+}
+
static unsigned long dma_ops_alloc_addresses(struct device *dev,
struct dma_ops_domain *dom,
unsigned int pages,
unsigned long align_mask,
u64 dma_mask)
{
- unsigned long limit;
unsigned long address;
- unsigned long boundary_size;
- boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
- PAGE_SIZE) >> PAGE_SHIFT;
- limit = iommu_device_max_index(dom->aperture_size >> PAGE_SHIFT, 0,
- dma_mask >> PAGE_SHIFT);
+#ifdef CONFIG_IOMMU_STRESS
+ dom->next_address = 0;
+ dom->need_flush = true;
+#endif
- if (dom->next_bit >= limit) {
- dom->next_bit = 0;
- dom->need_flush = true;
- }
+ address = dma_ops_area_alloc(dev, dom, pages, align_mask,
+ dma_mask, dom->next_address);
- address = iommu_area_alloc(dom->bitmap, limit, dom->next_bit, pages,
- 0 , boundary_size, align_mask);
if (address == -1) {
- address = iommu_area_alloc(dom->bitmap, limit, 0, pages,
- 0, boundary_size, align_mask);
+ dom->next_address = 0;
+ address = dma_ops_area_alloc(dev, dom, pages, align_mask,
+ dma_mask, 0);
dom->need_flush = true;
}
- if (likely(address != -1)) {
- dom->next_bit = address + pages;
- address <<= PAGE_SHIFT;
- } else
+ if (unlikely(address == -1))
address = bad_dma_address;
WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size);
@@ -684,11 +852,23 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom,
unsigned long address,
unsigned int pages)
{
- address >>= PAGE_SHIFT;
- iommu_area_free(dom->bitmap, address, pages);
+ unsigned i = address >> APERTURE_RANGE_SHIFT;
+ struct aperture_range *range = dom->aperture[i];
- if (address >= dom->next_bit)
+ BUG_ON(i >= APERTURE_MAX_RANGES || range == NULL);
+
+#ifdef CONFIG_IOMMU_STRESS
+ if (i < 4)
+ return;
+#endif
+
+ if (address >= dom->next_address)
dom->need_flush = true;
+
+ address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT;
+
+ iommu_area_free(range->bitmap, address, pages);
+
}
/****************************************************************************
@@ -736,12 +916,16 @@ static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
unsigned long start_page,
unsigned int pages)
{
- unsigned int last_page = dom->aperture_size >> PAGE_SHIFT;
+ unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT;
if (start_page + pages > last_page)
pages = last_page - start_page;
- iommu_area_reserve(dom->bitmap, start_page, pages);
+ for (i = start_page; i < start_page + pages; ++i) {
+ int index = i / APERTURE_RANGE_PAGES;
+ int page = i % APERTURE_RANGE_PAGES;
+ __set_bit(page, dom->aperture[index]->bitmap);
+ }
}
static void free_pagetable(struct protection_domain *domain)
@@ -780,14 +964,19 @@ static void free_pagetable(struct protection_domain *domain)
*/
static void dma_ops_domain_free(struct dma_ops_domain *dom)
{
+ int i;
+
if (!dom)
return;
free_pagetable(&dom->domain);
- kfree(dom->pte_pages);
-
- kfree(dom->bitmap);
+ for (i = 0; i < APERTURE_MAX_RANGES; ++i) {
+ if (!dom->aperture[i])
+ continue;
+ free_page((unsigned long)dom->aperture[i]->bitmap);
+ kfree(dom->aperture[i]);
+ }
kfree(dom);
}
@@ -797,19 +986,9 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
* It also intializes the page table and the address allocator data
* structures required for the dma_ops interface
*/
-static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
- unsigned order)
+static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu)
{
struct dma_ops_domain *dma_dom;
- unsigned i, num_pte_pages;
- u64 *l2_pde;
- u64 address;
-
- /*
- * Currently the DMA aperture must be between 32 MB and 1GB in size
- */
- if ((order < 25) || (order > 30))
- return NULL;
dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL);
if (!dma_dom)
@@ -826,55 +1005,20 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
dma_dom->domain.priv = dma_dom;
if (!dma_dom->domain.pt_root)
goto free_dma_dom;
- dma_dom->aperture_size = (1ULL << order);
- dma_dom->bitmap = kzalloc(dma_dom->aperture_size / (PAGE_SIZE * 8),
- GFP_KERNEL);
- if (!dma_dom->bitmap)
- goto free_dma_dom;
- /*
- * mark the first page as allocated so we never return 0 as
- * a valid dma-address. So we can use 0 as error value
- */
- dma_dom->bitmap[0] = 1;
- dma_dom->next_bit = 0;
dma_dom->need_flush = false;
dma_dom->target_dev = 0xffff;
- /* Intialize the exclusion range if necessary */
- if (iommu->exclusion_start &&
- iommu->exclusion_start < dma_dom->aperture_size) {
- unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT;
- int pages = iommu_num_pages(iommu->exclusion_start,
- iommu->exclusion_length,
- PAGE_SIZE);
- dma_ops_reserve_addresses(dma_dom, startpage, pages);
- }
+ if (alloc_new_range(iommu, dma_dom, true, GFP_KERNEL))
+ goto free_dma_dom;
/*
- * At the last step, build the page tables so we don't need to
- * allocate page table pages in the dma_ops mapping/unmapping
- * path.
+ * mark the first page as allocated so we never return 0 as
+ * a valid dma-address. So we can use 0 as error value
*/
- num_pte_pages = dma_dom->aperture_size / (PAGE_SIZE * 512);
- dma_dom->pte_pages = kzalloc(num_pte_pages * sizeof(void *),
- GFP_KERNEL);
- if (!dma_dom->pte_pages)
- goto free_dma_dom;
-
- l2_pde = (u64 *)get_zeroed_page(GFP_KERNEL);
- if (l2_pde == NULL)
- goto free_dma_dom;
+ dma_dom->aperture[0]->bitmap[0] = 1;
+ dma_dom->next_address = 0;
- dma_dom->domain.pt_root[0] = IOMMU_L2_PDE(virt_to_phys(l2_pde));
-
- for (i = 0; i < num_pte_pages; ++i) {
- dma_dom->pte_pages[i] = (u64 *)get_zeroed_page(GFP_KERNEL);
- if (!dma_dom->pte_pages[i])
- goto free_dma_dom;
- address = virt_to_phys(dma_dom->pte_pages[i]);
- l2_pde[i] = IOMMU_L1_PDE(address);
- }
return dma_dom;
@@ -983,7 +1127,6 @@ static int device_change_notifier(struct notifier_block *nb,
struct protection_domain *domain;
struct dma_ops_domain *dma_domain;
struct amd_iommu *iommu;
- int order = amd_iommu_aperture_order;
unsigned long flags;
if (devid > amd_iommu_last_bdf)
@@ -1002,17 +1145,7 @@ static int device_change_notifier(struct notifier_block *nb,
"to a non-dma-ops domain\n", dev_name(dev));
switch (action) {
- case BUS_NOTIFY_BOUND_DRIVER:
- if (domain)
- goto out;
- dma_domain = find_protection_domain(devid);
- if (!dma_domain)
- dma_domain = iommu->default_dom;
- attach_device(iommu, &dma_domain->domain, devid);
- printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
- "device %s\n", dma_domain->domain.id, dev_name(dev));
- break;
- case BUS_NOTIFY_UNBIND_DRIVER:
+ case BUS_NOTIFY_UNBOUND_DRIVER:
if (!domain)
goto out;
detach_device(domain, devid);
@@ -1022,7 +1155,7 @@ static int device_change_notifier(struct notifier_block *nb,
dma_domain = find_protection_domain(devid);
if (dma_domain)
goto out;
- dma_domain = dma_ops_domain_alloc(iommu, order);
+ dma_domain = dma_ops_domain_alloc(iommu);
if (!dma_domain)
goto out;
dma_domain->target_dev = devid;
@@ -1133,8 +1266,8 @@ static int get_device_resources(struct device *dev,
dma_dom = (*iommu)->default_dom;
*domain = &dma_dom->domain;
attach_device(*iommu, *domain, *bdf);
- printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
- "device %s\n", (*domain)->id, dev_name(dev));
+ DUMP_printk("Using protection domain %d for device %s\n",
+ (*domain)->id, dev_name(dev));
}
if (domain_for_device(_bdf) == NULL)
@@ -1144,6 +1277,66 @@ static int get_device_resources(struct device *dev,
}
/*
+ * If the pte_page is not yet allocated this function is called
+ */
+static u64* alloc_pte(struct protection_domain *dom,
+ unsigned long address, u64 **pte_page, gfp_t gfp)
+{
+ u64 *pte, *page;
+
+ pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(address)];
+
+ if (!IOMMU_PTE_PRESENT(*pte)) {
+ page = (u64 *)get_zeroed_page(gfp);
+ if (!page)
+ return NULL;
+ *pte = IOMMU_L2_PDE(virt_to_phys(page));
+ }
+
+ pte = IOMMU_PTE_PAGE(*pte);
+ pte = &pte[IOMMU_PTE_L1_INDEX(address)];
+
+ if (!IOMMU_PTE_PRESENT(*pte)) {
+ page = (u64 *)get_zeroed_page(gfp);
+ if (!page)
+ return NULL;
+ *pte = IOMMU_L1_PDE(virt_to_phys(page));
+ }
+
+ pte = IOMMU_PTE_PAGE(*pte);
+
+ if (pte_page)
+ *pte_page = pte;
+
+ pte = &pte[IOMMU_PTE_L0_INDEX(address)];
+
+ return pte;
+}
+
+/*
+ * This function fetches the PTE for a given address in the aperture
+ */
+static u64* dma_ops_get_pte(struct dma_ops_domain *dom,
+ unsigned long address)
+{
+ struct aperture_range *aperture;
+ u64 *pte, *pte_page;
+
+ aperture = dom->aperture[APERTURE_RANGE_INDEX(address)];
+ if (!aperture)
+ return NULL;
+
+ pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)];
+ if (!pte) {
+ pte = alloc_pte(&dom->domain, address, &pte_page, GFP_ATOMIC);
+ aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page;
+ } else
+ pte += IOMMU_PTE_L0_INDEX(address);
+
+ return pte;
+}
+
+/*
* This is the generic map function. It maps one 4kb page at paddr to
* the given address in the DMA address space for the domain.
*/
@@ -1159,8 +1352,9 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu,
paddr &= PAGE_MASK;
- pte = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)];
- pte += IOMMU_PTE_L0_INDEX(address);
+ pte = dma_ops_get_pte(dom, address);
+ if (!pte)
+ return bad_dma_address;
__pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC;
@@ -1185,14 +1379,20 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu,
struct dma_ops_domain *dom,
unsigned long address)
{
+ struct aperture_range *aperture;
u64 *pte;
if (address >= dom->aperture_size)
return;
- WARN_ON(address & ~PAGE_MASK || address >= dom->aperture_size);
+ aperture = dom->aperture[APERTURE_RANGE_INDEX(address)];
+ if (!aperture)
+ return;
+
+ pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)];
+ if (!pte)
+ return;
- pte = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)];
pte += IOMMU_PTE_L0_INDEX(address);
WARN_ON(!*pte);
@@ -1216,7 +1416,7 @@ static dma_addr_t __map_single(struct device *dev,
u64 dma_mask)
{
dma_addr_t offset = paddr & ~PAGE_MASK;
- dma_addr_t address, start;
+ dma_addr_t address, start, ret;
unsigned int pages;
unsigned long align_mask = 0;
int i;
@@ -1232,14 +1432,33 @@ static dma_addr_t __map_single(struct device *dev,
if (align)
align_mask = (1UL << get_order(size)) - 1;
+retry:
address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask,
dma_mask);
- if (unlikely(address == bad_dma_address))
- goto out;
+ if (unlikely(address == bad_dma_address)) {
+ /*
+ * setting next_address here will let the address
+ * allocator only scan the new allocated range in the
+ * first run. This is a small optimization.
+ */
+ dma_dom->next_address = dma_dom->aperture_size;
+
+ if (alloc_new_range(iommu, dma_dom, false, GFP_ATOMIC))
+ goto out;
+
+ /*
+ * aperture was sucessfully enlarged by 128 MB, try
+ * allocation again
+ */
+ goto retry;
+ }
start = address;
for (i = 0; i < pages; ++i) {
- dma_ops_domain_map(iommu, dma_dom, start, paddr, dir);
+ ret = dma_ops_domain_map(iommu, dma_dom, start, paddr, dir);
+ if (ret == bad_dma_address)
+ goto out_unmap;
+
paddr += PAGE_SIZE;
start += PAGE_SIZE;
}
@@ -1255,6 +1474,17 @@ static dma_addr_t __map_single(struct device *dev,
out:
return address;
+
+out_unmap:
+
+ for (--i; i >= 0; --i) {
+ start -= PAGE_SIZE;
+ dma_ops_domain_unmap(iommu, dma_dom, start);
+ }
+
+ dma_ops_free_addresses(dma_dom, address, pages);
+
+ return bad_dma_address;
}
/*
@@ -1537,8 +1767,10 @@ static void *alloc_coherent(struct device *dev, size_t size,
*dma_addr = __map_single(dev, iommu, domain->priv, paddr,
size, DMA_BIDIRECTIONAL, true, dma_mask);
- if (*dma_addr == bad_dma_address)
+ if (*dma_addr == bad_dma_address) {
+ spin_unlock_irqrestore(&domain->lock, flags);
goto out_free;
+ }
iommu_completion_wait(iommu);
@@ -1625,7 +1857,6 @@ static void prealloc_protection_domains(void)
struct pci_dev *dev = NULL;
struct dma_ops_domain *dma_dom;
struct amd_iommu *iommu;
- int order = amd_iommu_aperture_order;
u16 devid;
while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
@@ -1638,7 +1869,7 @@ static void prealloc_protection_domains(void)
iommu = amd_iommu_rlookup_table[devid];
if (!iommu)
continue;
- dma_dom = dma_ops_domain_alloc(iommu, order);
+ dma_dom = dma_ops_domain_alloc(iommu);
if (!dma_dom)
continue;
init_unity_mappings_for_device(dma_dom, devid);
@@ -1664,7 +1895,6 @@ static struct dma_map_ops amd_iommu_dma_ops = {
int __init amd_iommu_init_dma_ops(void)
{
struct amd_iommu *iommu;
- int order = amd_iommu_aperture_order;
int ret;
/*
@@ -1672,8 +1902,8 @@ int __init amd_iommu_init_dma_ops(void)
* found in the system. Devices not assigned to any other
* protection domain will be assigned to the default one.
*/
- list_for_each_entry(iommu, &amd_iommu_list, list) {
- iommu->default_dom = dma_ops_domain_alloc(iommu, order);
+ for_each_iommu(iommu) {
+ iommu->default_dom = dma_ops_domain_alloc(iommu);
if (iommu->default_dom == NULL)
return -ENOMEM;
iommu->default_dom->domain.flags |= PD_DEFAULT_MASK;
@@ -1710,7 +1940,7 @@ int __init amd_iommu_init_dma_ops(void)
free_domains:
- list_for_each_entry(iommu, &amd_iommu_list, list) {
+ for_each_iommu(iommu) {
if (iommu->default_dom)
dma_ops_domain_free(iommu->default_dom);
}
@@ -1842,7 +2072,7 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
old_domain = domain_for_device(devid);
if (old_domain)
- return -EBUSY;
+ detach_device(old_domain, devid);
attach_device(iommu, domain, devid);
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 8c0be0902da..238989ec077 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -115,15 +115,21 @@ struct ivmd_header {
u64 range_length;
} __attribute__((packed));
+bool amd_iommu_dump;
+
static int __initdata amd_iommu_detected;
u16 amd_iommu_last_bdf; /* largest PCI device id we have
to handle */
LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings
we find in ACPI */
-unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */
+#ifdef CONFIG_IOMMU_STRESS
+bool amd_iommu_isolate = false;
+#else
bool amd_iommu_isolate = true; /* if true, device isolation is
enabled */
+#endif
+
bool amd_iommu_unmap_flush; /* if true, flush on every unmap */
LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the
@@ -175,7 +181,7 @@ static inline void update_last_devid(u16 devid)
static inline unsigned long tbl_size(int entry_size)
{
unsigned shift = PAGE_SHIFT +
- get_order(amd_iommu_last_bdf * entry_size);
+ get_order(((int)amd_iommu_last_bdf + 1) * entry_size);
return 1UL << shift;
}
@@ -193,7 +199,7 @@ static inline unsigned long tbl_size(int entry_size)
* This function set the exclusion range in the IOMMU. DMA accesses to the
* exclusion range are passed through untranslated
*/
-static void __init iommu_set_exclusion_range(struct amd_iommu *iommu)
+static void iommu_set_exclusion_range(struct amd_iommu *iommu)
{
u64 start = iommu->exclusion_start & PAGE_MASK;
u64 limit = (start + iommu->exclusion_length) & PAGE_MASK;
@@ -225,7 +231,7 @@ static void __init iommu_set_device_table(struct amd_iommu *iommu)
}
/* Generic functions to enable/disable certain features of the IOMMU. */
-static void __init iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
+static void iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
{
u32 ctrl;
@@ -244,7 +250,7 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
}
/* Function to enable the hardware */
-static void __init iommu_enable(struct amd_iommu *iommu)
+static void iommu_enable(struct amd_iommu *iommu)
{
printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at %s cap 0x%hx\n",
dev_name(&iommu->dev->dev), iommu->cap_ptr);
@@ -252,11 +258,9 @@ static void __init iommu_enable(struct amd_iommu *iommu)
iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
}
-/* Function to enable IOMMU event logging and event interrupts */
-static void __init iommu_enable_event_logging(struct amd_iommu *iommu)
+static void iommu_disable(struct amd_iommu *iommu)
{
- iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
- iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
+ iommu_feature_disable(iommu, CONTROL_IOMMU_EN);
}
/*
@@ -413,25 +417,36 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu)
{
u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
get_order(CMD_BUFFER_SIZE));
- u64 entry;
if (cmd_buf == NULL)
return NULL;
iommu->cmd_buf_size = CMD_BUFFER_SIZE;
- entry = (u64)virt_to_phys(cmd_buf);
+ return cmd_buf;
+}
+
+/*
+ * This function writes the command buffer address to the hardware and
+ * enables it.
+ */
+static void iommu_enable_command_buffer(struct amd_iommu *iommu)
+{
+ u64 entry;
+
+ BUG_ON(iommu->cmd_buf == NULL);
+
+ entry = (u64)virt_to_phys(iommu->cmd_buf);
entry |= MMIO_CMD_SIZE_512;
+
memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET,
- &entry, sizeof(entry));
+ &entry, sizeof(entry));
/* set head and tail to zero manually */
writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
-
- return cmd_buf;
}
static void __init free_command_buffer(struct amd_iommu *iommu)
@@ -443,20 +458,27 @@ static void __init free_command_buffer(struct amd_iommu *iommu)
/* allocates the memory where the IOMMU will log its events to */
static u8 * __init alloc_event_buffer(struct amd_iommu *iommu)
{
- u64 entry;
iommu->evt_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
get_order(EVT_BUFFER_SIZE));
if (iommu->evt_buf == NULL)
return NULL;
+ return iommu->evt_buf;
+}
+
+static void iommu_enable_event_buffer(struct amd_iommu *iommu)
+{
+ u64 entry;
+
+ BUG_ON(iommu->evt_buf == NULL);
+
entry = (u64)virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK;
+
memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET,
&entry, sizeof(entry));
- iommu->evt_buf_size = EVT_BUFFER_SIZE;
-
- return iommu->evt_buf;
+ iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
}
static void __init free_event_buffer(struct amd_iommu *iommu)
@@ -596,32 +618,83 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
p += sizeof(struct ivhd_header);
end += h->length;
+
while (p < end) {
e = (struct ivhd_entry *)p;
switch (e->type) {
case IVHD_DEV_ALL:
+
+ DUMP_printk(" DEV_ALL\t\t\t first devid: %02x:%02x.%x"
+ " last device %02x:%02x.%x flags: %02x\n",
+ PCI_BUS(iommu->first_device),
+ PCI_SLOT(iommu->first_device),
+ PCI_FUNC(iommu->first_device),
+ PCI_BUS(iommu->last_device),
+ PCI_SLOT(iommu->last_device),
+ PCI_FUNC(iommu->last_device),
+ e->flags);
+
for (dev_i = iommu->first_device;
dev_i <= iommu->last_device; ++dev_i)
set_dev_entry_from_acpi(iommu, dev_i,
e->flags, 0);
break;
case IVHD_DEV_SELECT:
+
+ DUMP_printk(" DEV_SELECT\t\t\t devid: %02x:%02x.%x "
+ "flags: %02x\n",
+ PCI_BUS(e->devid),
+ PCI_SLOT(e->devid),
+ PCI_FUNC(e->devid),
+ e->flags);
+
devid = e->devid;
set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
break;
case IVHD_DEV_SELECT_RANGE_START:
+
+ DUMP_printk(" DEV_SELECT_RANGE_START\t "
+ "devid: %02x:%02x.%x flags: %02x\n",
+ PCI_BUS(e->devid),
+ PCI_SLOT(e->devid),
+ PCI_FUNC(e->devid),
+ e->flags);
+
devid_start = e->devid;
flags = e->flags;
ext_flags = 0;
alias = false;
break;
case IVHD_DEV_ALIAS:
+
+ DUMP_printk(" DEV_ALIAS\t\t\t devid: %02x:%02x.%x "
+ "flags: %02x devid_to: %02x:%02x.%x\n",
+ PCI_BUS(e->devid),
+ PCI_SLOT(e->devid),
+ PCI_FUNC(e->devid),
+ e->flags,
+ PCI_BUS(e->ext >> 8),
+ PCI_SLOT(e->ext >> 8),
+ PCI_FUNC(e->ext >> 8));
+
devid = e->devid;
devid_to = e->ext >> 8;
- set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
+ set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0);
amd_iommu_alias_table[devid] = devid_to;
break;
case IVHD_DEV_ALIAS_RANGE:
+
+ DUMP_printk(" DEV_ALIAS_RANGE\t\t "
+ "devid: %02x:%02x.%x flags: %02x "
+ "devid_to: %02x:%02x.%x\n",
+ PCI_BUS(e->devid),
+ PCI_SLOT(e->devid),
+ PCI_FUNC(e->devid),
+ e->flags,
+ PCI_BUS(e->ext >> 8),
+ PCI_SLOT(e->ext >> 8),
+ PCI_FUNC(e->ext >> 8));
+
devid_start = e->devid;
flags = e->flags;
devid_to = e->ext >> 8;
@@ -629,17 +702,39 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
alias = true;
break;
case IVHD_DEV_EXT_SELECT:
+
+ DUMP_printk(" DEV_EXT_SELECT\t\t devid: %02x:%02x.%x "
+ "flags: %02x ext: %08x\n",
+ PCI_BUS(e->devid),
+ PCI_SLOT(e->devid),
+ PCI_FUNC(e->devid),
+ e->flags, e->ext);
+
devid = e->devid;
set_dev_entry_from_acpi(iommu, devid, e->flags,
e->ext);
break;
case IVHD_DEV_EXT_SELECT_RANGE:
+
+ DUMP_printk(" DEV_EXT_SELECT_RANGE\t devid: "
+ "%02x:%02x.%x flags: %02x ext: %08x\n",
+ PCI_BUS(e->devid),
+ PCI_SLOT(e->devid),
+ PCI_FUNC(e->devid),
+ e->flags, e->ext);
+
devid_start = e->devid;
flags = e->flags;
ext_flags = e->ext;
alias = false;
break;
case IVHD_DEV_RANGE_END:
+
+ DUMP_printk(" DEV_RANGE_END\t\t devid: %02x:%02x.%x\n",
+ PCI_BUS(e->devid),
+ PCI_SLOT(e->devid),
+ PCI_FUNC(e->devid));
+
devid = e->devid;
for (dev_i = devid_start; dev_i <= devid; ++dev_i) {
if (alias)
@@ -679,7 +774,7 @@ static void __init free_iommu_all(void)
{
struct amd_iommu *iommu, *next;
- list_for_each_entry_safe(iommu, next, &amd_iommu_list, list) {
+ for_each_iommu_safe(iommu, next) {
list_del(&iommu->list);
free_iommu_one(iommu);
kfree(iommu);
@@ -710,7 +805,6 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
if (!iommu->mmio_base)
return -ENOMEM;
- iommu_set_device_table(iommu);
iommu->cmd_buf = alloc_command_buffer(iommu);
if (!iommu->cmd_buf)
return -ENOMEM;
@@ -746,6 +840,15 @@ static int __init init_iommu_all(struct acpi_table_header *table)
h = (struct ivhd_header *)p;
switch (*p) {
case ACPI_IVHD_TYPE:
+
+ DUMP_printk("IOMMU: device: %02x:%02x.%01x cap: %04x "
+ "seg: %d flags: %01x info %04x\n",
+ PCI_BUS(h->devid), PCI_SLOT(h->devid),
+ PCI_FUNC(h->devid), h->cap_ptr,
+ h->pci_seg, h->flags, h->info);
+ DUMP_printk(" mmio-addr: %016llx\n",
+ h->mmio_phys);
+
iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL);
if (iommu == NULL)
return -ENOMEM;
@@ -773,56 +876,9 @@ static int __init init_iommu_all(struct acpi_table_header *table)
*
****************************************************************************/
-static int __init iommu_setup_msix(struct amd_iommu *iommu)
-{
- struct amd_iommu *curr;
- struct msix_entry entries[32]; /* only 32 supported by AMD IOMMU */
- int nvec = 0, i;
-
- list_for_each_entry(curr, &amd_iommu_list, list) {
- if (curr->dev == iommu->dev) {
- entries[nvec].entry = curr->evt_msi_num;
- entries[nvec].vector = 0;
- curr->int_enabled = true;
- nvec++;
- }
- }
-
- if (pci_enable_msix(iommu->dev, entries, nvec)) {
- pci_disable_msix(iommu->dev);
- return 1;
- }
-
- for (i = 0; i < nvec; ++i) {
- int r = request_irq(entries->vector, amd_iommu_int_handler,
- IRQF_SAMPLE_RANDOM,
- "AMD IOMMU",
- NULL);
- if (r)
- goto out_free;
- }
-
- return 0;
-
-out_free:
- for (i -= 1; i >= 0; --i)
- free_irq(entries->vector, NULL);
-
- pci_disable_msix(iommu->dev);
-
- return 1;
-}
-
static int __init iommu_setup_msi(struct amd_iommu *iommu)
{
int r;
- struct amd_iommu *curr;
-
- list_for_each_entry(curr, &amd_iommu_list, list) {
- if (curr->dev == iommu->dev)
- curr->int_enabled = true;
- }
-
if (pci_enable_msi(iommu->dev))
return 1;
@@ -837,17 +893,18 @@ static int __init iommu_setup_msi(struct amd_iommu *iommu)
return 1;
}
+ iommu->int_enabled = true;
+ iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
+
return 0;
}
-static int __init iommu_init_msi(struct amd_iommu *iommu)
+static int iommu_init_msi(struct amd_iommu *iommu)
{
if (iommu->int_enabled)
return 0;
- if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSIX))
- return iommu_setup_msix(iommu);
- else if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSI))
+ if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSI))
return iommu_setup_msi(iommu);
return 1;
@@ -899,6 +956,7 @@ static int __init init_exclusion_range(struct ivmd_header *m)
static int __init init_unity_map_range(struct ivmd_header *m)
{
struct unity_map_entry *e = 0;
+ char *s;
e = kzalloc(sizeof(*e), GFP_KERNEL);
if (e == NULL)
@@ -906,14 +964,19 @@ static int __init init_unity_map_range(struct ivmd_header *m)
switch (m->type) {
default:
+ kfree(e);
+ return 0;
case ACPI_IVMD_TYPE:
+ s = "IVMD_TYPEi\t\t\t";
e->devid_start = e->devid_end = m->devid;
break;
case ACPI_IVMD_TYPE_ALL:
+ s = "IVMD_TYPE_ALL\t\t";
e->devid_start = 0;
e->devid_end = amd_iommu_last_bdf;
break;
case ACPI_IVMD_TYPE_RANGE:
+ s = "IVMD_TYPE_RANGE\t\t";
e->devid_start = m->devid;
e->devid_end = m->aux;
break;
@@ -922,6 +985,13 @@ static int __init init_unity_map_range(struct ivmd_header *m)
e->address_end = e->address_start + PAGE_ALIGN(m->range_length);
e->prot = m->flags >> 1;
+ DUMP_printk("%s devid_start: %02x:%02x.%x devid_end: %02x:%02x.%x"
+ " range_start: %016llx range_end: %016llx flags: %x\n", s,
+ PCI_BUS(e->devid_start), PCI_SLOT(e->devid_start),
+ PCI_FUNC(e->devid_start), PCI_BUS(e->devid_end),
+ PCI_SLOT(e->devid_end), PCI_FUNC(e->devid_end),
+ e->address_start, e->address_end, m->flags);
+
list_add_tail(&e->list, &amd_iommu_unity_map);
return 0;
@@ -967,18 +1037,28 @@ static void init_device_table(void)
* This function finally enables all IOMMUs found in the system after
* they have been initialized
*/
-static void __init enable_iommus(void)
+static void enable_iommus(void)
{
struct amd_iommu *iommu;
- list_for_each_entry(iommu, &amd_iommu_list, list) {
+ for_each_iommu(iommu) {
+ iommu_set_device_table(iommu);
+ iommu_enable_command_buffer(iommu);
+ iommu_enable_event_buffer(iommu);
iommu_set_exclusion_range(iommu);
iommu_init_msi(iommu);
- iommu_enable_event_logging(iommu);
iommu_enable(iommu);
}
}
+static void disable_iommus(void)
+{
+ struct amd_iommu *iommu;
+
+ for_each_iommu(iommu)
+ iommu_disable(iommu);
+}
+
/*
* Suspend/Resume support
* disable suspend until real resume implemented
@@ -986,12 +1066,31 @@ static void __init enable_iommus(void)
static int amd_iommu_resume(struct sys_device *dev)
{
+ /*
+ * Disable IOMMUs before reprogramming the hardware registers.
+ * IOMMU is still enabled from the resume kernel.
+ */
+ disable_iommus();
+
+ /* re-load the hardware */
+ enable_iommus();
+
+ /*
+ * we have to flush after the IOMMUs are enabled because a
+ * disabled IOMMU will never execute the commands we send
+ */
+ amd_iommu_flush_all_domains();
+ amd_iommu_flush_all_devices();
+
return 0;
}
static int amd_iommu_suspend(struct sys_device *dev, pm_message_t state)
{
- return -EINVAL;
+ /* disable IOMMUs to go out of the way for BIOS */
+ disable_iommus();
+
+ return 0;
}
static struct sysdev_class amd_iommu_sysdev_class = {
@@ -1137,9 +1236,6 @@ int __init amd_iommu_init(void)
enable_iommus();
- printk(KERN_INFO "AMD IOMMU: aperture size is %d MB\n",
- (1 << (amd_iommu_aperture_order-20)));
-
printk(KERN_INFO "AMD IOMMU: device isolation ");
if (amd_iommu_isolate)
printk("enabled\n");
@@ -1211,6 +1307,13 @@ void __init amd_iommu_detect(void)
*
****************************************************************************/
+static int __init parse_amd_iommu_dump(char *str)
+{
+ amd_iommu_dump = true;
+
+ return 1;
+}
+
static int __init parse_amd_iommu_options(char *str)
{
for (; *str; ++str) {
@@ -1225,15 +1328,5 @@ static int __init parse_amd_iommu_options(char *str)
return 1;
}
-static int __init parse_amd_iommu_size_options(char *str)
-{
- unsigned order = PAGE_SHIFT + get_order(memparse(str, &str));
-
- if ((order > 24) && (order < 31))
- amd_iommu_aperture_order = order;
-
- return 1;
-}
-
+__setup("amd_iommu_dump", parse_amd_iommu_dump);
__setup("amd_iommu=", parse_amd_iommu_options);
-__setup("amd_iommu_size=", parse_amd_iommu_size_options);
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index f2870920f24..076d3881f3d 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -14,6 +14,7 @@
* Mikael Pettersson : PM converted to driver model.
*/
+#include <linux/perf_counter.h>
#include <linux/kernel_stat.h>
#include <linux/mc146818rtc.h>
#include <linux/acpi_pmtmr.h>
@@ -34,6 +35,7 @@
#include <linux/smp.h>
#include <linux/mm.h>
+#include <asm/perf_counter.h>
#include <asm/pgalloc.h>
#include <asm/atomic.h>
#include <asm/mpspec.h>
@@ -98,6 +100,29 @@ early_param("lapic", parse_lapic);
/* Local APIC was disabled by the BIOS and enabled by the kernel */
static int enabled_via_apicbase;
+/*
+ * Handle interrupt mode configuration register (IMCR).
+ * This register controls whether the interrupt signals
+ * that reach the BSP come from the master PIC or from the
+ * local APIC. Before entering Symmetric I/O Mode, either
+ * the BIOS or the operating system must switch out of
+ * PIC Mode by changing the IMCR.
+ */
+static inline void imcr_pic_to_apic(void)
+{
+ /* select IMCR register */
+ outb(0x70, 0x22);
+ /* NMI and 8259 INTR go through APIC */
+ outb(0x01, 0x23);
+}
+
+static inline void imcr_apic_to_pic(void)
+{
+ /* select IMCR register */
+ outb(0x70, 0x22);
+ /* NMI and 8259 INTR go directly to BSP */
+ outb(0x00, 0x23);
+}
#endif
#ifdef CONFIG_X86_64
@@ -111,13 +136,19 @@ static __init int setup_apicpmtimer(char *s)
__setup("apicpmtimer", setup_apicpmtimer);
#endif
+int x2apic_mode;
#ifdef CONFIG_X86_X2APIC
-int x2apic;
/* x2apic enabled before OS handover */
static int x2apic_preenabled;
static int disable_x2apic;
static __init int setup_nox2apic(char *str)
{
+ if (x2apic_enabled()) {
+ pr_warning("Bios already enabled x2apic, "
+ "can't enforce nox2apic");
+ return 0;
+ }
+
disable_x2apic = 1;
setup_clear_cpu_cap(X86_FEATURE_X2APIC);
return 0;
@@ -209,6 +240,31 @@ static int modern_apic(void)
return lapic_get_version() >= 0x14;
}
+/*
+ * bare function to substitute write operation
+ * and it's _that_ fast :)
+ */
+static void native_apic_write_dummy(u32 reg, u32 v)
+{
+ WARN_ON_ONCE((cpu_has_apic || !disable_apic));
+}
+
+static u32 native_apic_read_dummy(u32 reg)
+{
+ WARN_ON_ONCE((cpu_has_apic && !disable_apic));
+ return 0;
+}
+
+/*
+ * right after this call apic->write/read doesn't do anything
+ * note that there is no restore operation it works one way
+ */
+void apic_disable(void)
+{
+ apic->read = native_apic_read_dummy;
+ apic->write = native_apic_write_dummy;
+}
+
void native_apic_wait_icr_idle(void)
{
while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
@@ -348,7 +404,7 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask)
{
- unsigned long reg = (lvt_off << 4) + APIC_EILVT0;
+ unsigned long reg = (lvt_off << 4) + APIC_EILVTn(0);
unsigned int v = (mask << 16) | (msg_type << 8) | vector;
apic_write(reg, v);
@@ -815,7 +871,7 @@ void clear_local_APIC(void)
u32 v;
/* APIC hasn't been mapped yet */
- if (!x2apic && !apic_phys)
+ if (!x2apic_mode && !apic_phys)
return;
maxlvt = lapic_get_maxlvt();
@@ -1133,6 +1189,7 @@ void __cpuinit setup_local_APIC(void)
apic_write(APIC_ESR, 0);
}
#endif
+ perf_counters_lapic_init();
preempt_disable();
@@ -1287,7 +1344,7 @@ void check_x2apic(void)
{
if (x2apic_enabled()) {
pr_info("x2apic enabled by BIOS, switching to x2apic ops\n");
- x2apic_preenabled = x2apic = 1;
+ x2apic_preenabled = x2apic_mode = 1;
}
}
@@ -1295,7 +1352,7 @@ void enable_x2apic(void)
{
int msr, msr2;
- if (!x2apic)
+ if (!x2apic_mode)
return;
rdmsr(MSR_IA32_APICBASE, msr, msr2);
@@ -1304,6 +1361,7 @@ void enable_x2apic(void)
wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
}
}
+#endif /* CONFIG_X86_X2APIC */
void __init enable_IR_x2apic(void)
{
@@ -1312,32 +1370,21 @@ void __init enable_IR_x2apic(void)
unsigned long flags;
struct IO_APIC_route_entry **ioapic_entries = NULL;
- if (!cpu_has_x2apic)
- return;
-
- if (!x2apic_preenabled && disable_x2apic) {
- pr_info("Skipped enabling x2apic and Interrupt-remapping "
- "because of nox2apic\n");
- return;
+ ret = dmar_table_init();
+ if (ret) {
+ pr_debug("dmar_table_init() failed with %d:\n", ret);
+ goto ir_failed;
}
- if (x2apic_preenabled && disable_x2apic)
- panic("Bios already enabled x2apic, can't enforce nox2apic");
-
- if (!x2apic_preenabled && skip_ioapic_setup) {
- pr_info("Skipped enabling x2apic and Interrupt-remapping "
- "because of skipping io-apic setup\n");
- return;
+ if (!intr_remapping_supported()) {
+ pr_debug("intr-remapping not supported\n");
+ goto ir_failed;
}
- ret = dmar_table_init();
- if (ret) {
- pr_info("dmar_table_init() failed with %d:\n", ret);
- if (x2apic_preenabled)
- panic("x2apic enabled by bios. But IR enabling failed");
- else
- pr_info("Not enabling x2apic,Intr-remapping\n");
+ if (!x2apic_preenabled && skip_ioapic_setup) {
+ pr_info("Skipped enabling intr-remap because of skipping "
+ "io-apic setup\n");
return;
}
@@ -1357,19 +1404,16 @@ void __init enable_IR_x2apic(void)
mask_IO_APIC_setup(ioapic_entries);
mask_8259A();
- ret = enable_intr_remapping(EIM_32BIT_APIC_ID);
-
- if (ret && x2apic_preenabled) {
- local_irq_restore(flags);
- panic("x2apic enabled by bios. But IR enabling failed");
- }
-
+ ret = enable_intr_remapping(x2apic_supported());
if (ret)
goto end_restore;
- if (!x2apic) {
- x2apic = 1;
+ pr_info("Enabled Interrupt-remapping\n");
+
+ if (x2apic_supported() && !x2apic_mode) {
+ x2apic_mode = 1;
enable_x2apic();
+ pr_info("Enabled x2apic\n");
}
end_restore:
@@ -1378,37 +1422,34 @@ end_restore:
* IR enabling failed
*/
restore_IO_APIC_setup(ioapic_entries);
- else
- reinit_intr_remapped_IO_APIC(x2apic_preenabled, ioapic_entries);
unmask_8259A();
local_irq_restore(flags);
end:
- if (!ret) {
- if (!x2apic_preenabled)
- pr_info("Enabled x2apic and interrupt-remapping\n");
- else
- pr_info("Enabled Interrupt-remapping\n");
- } else
- pr_err("Failed to enable Interrupt-remapping and x2apic\n");
if (ioapic_entries)
free_ioapic_entries(ioapic_entries);
+
+ if (!ret)
+ return;
+
+ir_failed:
+ if (x2apic_preenabled)
+ panic("x2apic enabled by bios. But IR enabling failed");
+ else if (cpu_has_x2apic)
+ pr_info("Not enabling x2apic,Intr-remapping\n");
#else
if (!cpu_has_x2apic)
return;
if (x2apic_preenabled)
panic("x2apic enabled prior OS handover,"
- " enable CONFIG_INTR_REMAP");
-
- pr_info("Enable CONFIG_INTR_REMAP for enabling intr-remapping "
- " and x2apic\n");
+ " enable CONFIG_X86_X2APIC, CONFIG_INTR_REMAP");
#endif
return;
}
-#endif /* CONFIG_X86_X2APIC */
+
#ifdef CONFIG_X86_64
/*
@@ -1425,7 +1466,6 @@ static int __init detect_init_APIC(void)
}
mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
- boot_cpu_physical_apicid = 0;
return 0;
}
#else
@@ -1539,32 +1579,49 @@ void __init early_init_lapic_mapping(void)
*/
void __init init_apic_mappings(void)
{
- if (x2apic) {
+ unsigned int new_apicid;
+
+ if (x2apic_mode) {
boot_cpu_physical_apicid = read_apic_id();
return;
}
- /*
- * If no local APIC can be found then set up a fake all
- * zeroes page to simulate the local APIC and another
- * one for the IO-APIC.
- */
+ /* If no local APIC can be found return early */
if (!smp_found_config && detect_init_APIC()) {
- apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE);
- apic_phys = __pa(apic_phys);
- } else
+ /* lets NOP'ify apic operations */
+ pr_info("APIC: disable apic facility\n");
+ apic_disable();
+ } else {
apic_phys = mp_lapic_addr;
- set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
- apic_printk(APIC_VERBOSE, "mapped APIC to %08lx (%08lx)\n",
- APIC_BASE, apic_phys);
+ /*
+ * acpi lapic path already maps that address in
+ * acpi_register_lapic_address()
+ */
+ if (!acpi_lapic)
+ set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
+
+ apic_printk(APIC_VERBOSE, "mapped APIC to %08lx (%08lx)\n",
+ APIC_BASE, apic_phys);
+ }
/*
* Fetch the APIC ID of the BSP in case we have a
* default configuration (or the MP table is broken).
*/
- if (boot_cpu_physical_apicid == -1U)
- boot_cpu_physical_apicid = read_apic_id();
+ new_apicid = read_apic_id();
+ if (boot_cpu_physical_apicid != new_apicid) {
+ boot_cpu_physical_apicid = new_apicid;
+ /*
+ * yeah -- we lie about apic_version
+ * in case if apic was disabled via boot option
+ * but it's not a problem for SMP compiled kernel
+ * since smp_sanity_check is prepared for such a case
+ * and disable smp mode
+ */
+ apic_version[new_apicid] =
+ GET_APIC_VERSION(apic_read(APIC_LVR));
+ }
}
/*
@@ -1733,8 +1790,7 @@ void __init connect_bsp_APIC(void)
*/
apic_printk(APIC_VERBOSE, "leaving PIC mode, "
"enabling APIC mode.\n");
- outb(0x70, 0x22);
- outb(0x01, 0x23);
+ imcr_pic_to_apic();
}
#endif
if (apic->enable_apic_mode)
@@ -1762,8 +1818,7 @@ void disconnect_bsp_APIC(int virt_wire_setup)
*/
apic_printk(APIC_VERBOSE, "disabling APIC mode, "
"entering PIC mode.\n");
- outb(0x70, 0x22);
- outb(0x00, 0x23);
+ imcr_apic_to_pic();
return;
}
#endif
@@ -1969,10 +2024,10 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state)
local_irq_save(flags);
disable_local_APIC();
-#ifdef CONFIG_INTR_REMAP
+
if (intr_remapping_enabled)
disable_intr_remapping();
-#endif
+
local_irq_restore(flags);
return 0;
}
@@ -1982,42 +2037,34 @@ static int lapic_resume(struct sys_device *dev)
unsigned int l, h;
unsigned long flags;
int maxlvt;
-
-#ifdef CONFIG_INTR_REMAP
- int ret;
+ int ret = 0;
struct IO_APIC_route_entry **ioapic_entries = NULL;
if (!apic_pm_state.active)
return 0;
local_irq_save(flags);
- if (x2apic) {
+ if (intr_remapping_enabled) {
ioapic_entries = alloc_ioapic_entries();
if (!ioapic_entries) {
WARN(1, "Alloc ioapic_entries in lapic resume failed.");
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto restore;
}
ret = save_IO_APIC_setup(ioapic_entries);
if (ret) {
WARN(1, "Saving IO-APIC state failed: %d\n", ret);
free_ioapic_entries(ioapic_entries);
- return ret;
+ goto restore;
}
mask_IO_APIC_setup(ioapic_entries);
mask_8259A();
- enable_x2apic();
}
-#else
- if (!apic_pm_state.active)
- return 0;
- local_irq_save(flags);
- if (x2apic)
+ if (x2apic_mode)
enable_x2apic();
-#endif
-
else {
/*
* Make sure the APICBASE points to the right address
@@ -2055,21 +2102,16 @@ static int lapic_resume(struct sys_device *dev)
apic_write(APIC_ESR, 0);
apic_read(APIC_ESR);
-#ifdef CONFIG_INTR_REMAP
- if (intr_remapping_enabled)
- reenable_intr_remapping(EIM_32BIT_APIC_ID);
-
- if (x2apic) {
+ if (intr_remapping_enabled) {
+ reenable_intr_remapping(x2apic_mode);
unmask_8259A();
restore_IO_APIC_setup(ioapic_entries);
free_ioapic_entries(ioapic_entries);
}
-#endif
-
+restore:
local_irq_restore(flags);
-
- return 0;
+ return ret;
}
/*
@@ -2117,31 +2159,14 @@ static void apic_pm_activate(void) { }
#endif /* CONFIG_PM */
#ifdef CONFIG_X86_64
-/*
- * apic_is_clustered_box() -- Check if we can expect good TSC
- *
- * Thus far, the major user of this is IBM's Summit2 series:
- *
- * Clustered boxes may have unsynced TSC problems if they are
- * multi-chassis. Use available data to take a good guess.
- * If in doubt, go HPET.
- */
-__cpuinit int apic_is_clustered_box(void)
+
+static int __cpuinit apic_cluster_num(void)
{
int i, clusters, zeros;
unsigned id;
u16 *bios_cpu_apicid;
DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS);
- /*
- * there is not this kind of box with AMD CPU yet.
- * Some AMD box with quadcore cpu and 8 sockets apicid
- * will be [4, 0x23] or [8, 0x27] could be thought to
- * vsmp box still need checking...
- */
- if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box())
- return 0;
-
bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
@@ -2177,18 +2202,67 @@ __cpuinit int apic_is_clustered_box(void)
++zeros;
}
- /* ScaleMP vSMPowered boxes have one cluster per board and TSCs are
- * not guaranteed to be synced between boards
- */
- if (is_vsmp_box() && clusters > 1)
+ return clusters;
+}
+
+static int __cpuinitdata multi_checked;
+static int __cpuinitdata multi;
+
+static int __cpuinit set_multi(const struct dmi_system_id *d)
+{
+ if (multi)
+ return 0;
+ pr_info("APIC: %s detected, Multi Chassis\n", d->ident);
+ multi = 1;
+ return 0;
+}
+
+static const __cpuinitconst struct dmi_system_id multi_dmi_table[] = {
+ {
+ .callback = set_multi,
+ .ident = "IBM System Summit2",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Summit2"),
+ },
+ },
+ {}
+};
+
+static void __cpuinit dmi_check_multi(void)
+{
+ if (multi_checked)
+ return;
+
+ dmi_check_system(multi_dmi_table);
+ multi_checked = 1;
+}
+
+/*
+ * apic_is_clustered_box() -- Check if we can expect good TSC
+ *
+ * Thus far, the major user of this is IBM's Summit2 series:
+ * Clustered boxes may have unsynced TSC problems if they are
+ * multi-chassis.
+ * Use DMI to check them
+ */
+__cpuinit int apic_is_clustered_box(void)
+{
+ dmi_check_multi();
+ if (multi)
return 1;
+ if (!is_vsmp_box())
+ return 0;
+
/*
- * If clusters > 2, then should be multi-chassis.
- * May have to revisit this when multi-core + hyperthreaded CPUs come
- * out, but AFAIK this will work even for them.
+ * ScaleMP vSMPowered boxes have one cluster per board and TSCs are
+ * not guaranteed to be synced between boards
*/
- return (clusters > 2);
+ if (apic_cluster_num() > 1)
+ return 1;
+
+ return 0;
}
#endif
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 306e5e88fb6..d0c99abc26c 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -161,7 +161,7 @@ static int flat_apic_id_registered(void)
static int flat_phys_pkg_id(int initial_apic_id, int index_msb)
{
- return hard_smp_processor_id() >> index_msb;
+ return initial_apic_id >> index_msb;
}
struct apic apic_flat = {
@@ -235,7 +235,7 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
* regardless of how many processors are present (x86_64 ES7000
* is an example).
*/
- if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
+ if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID &&
(acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) {
printk(KERN_DEBUG "system APIC only can use physical flat");
return 1;
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 30294777557..69328ac8de9 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -145,7 +145,7 @@ es7000_rename_gsi(int ioapic, int gsi)
return gsi;
}
-static int wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip)
+static int __cpuinit wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip)
{
unsigned long vect = 0, psaival = 0;
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 30da617d18e..1946fac42ab 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -59,6 +59,7 @@
#include <asm/setup.h>
#include <asm/irq_remapping.h>
#include <asm/hpet.h>
+#include <asm/hw_irq.h>
#include <asm/uv/uv_hub.h>
#include <asm/uv/uv_irq.h>
@@ -129,12 +130,9 @@ struct irq_pin_list {
struct irq_pin_list *next;
};
-static struct irq_pin_list *get_one_free_irq_2_pin(int cpu)
+static struct irq_pin_list *get_one_free_irq_2_pin(int node)
{
struct irq_pin_list *pin;
- int node;
-
- node = cpu_to_node(cpu);
pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node);
@@ -148,9 +146,6 @@ struct irq_cfg {
unsigned move_cleanup_count;
u8 vector;
u8 move_in_progress : 1;
-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
- u8 move_desc_pending : 1;
-#endif
};
/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
@@ -212,12 +207,9 @@ static struct irq_cfg *irq_cfg(unsigned int irq)
return cfg;
}
-static struct irq_cfg *get_one_free_irq_cfg(int cpu)
+static struct irq_cfg *get_one_free_irq_cfg(int node)
{
struct irq_cfg *cfg;
- int node;
-
- node = cpu_to_node(cpu);
cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
if (cfg) {
@@ -238,13 +230,13 @@ static struct irq_cfg *get_one_free_irq_cfg(int cpu)
return cfg;
}
-int arch_init_chip_data(struct irq_desc *desc, int cpu)
+int arch_init_chip_data(struct irq_desc *desc, int node)
{
struct irq_cfg *cfg;
cfg = desc->chip_data;
if (!cfg) {
- desc->chip_data = get_one_free_irq_cfg(cpu);
+ desc->chip_data = get_one_free_irq_cfg(node);
if (!desc->chip_data) {
printk(KERN_ERR "can not alloc irq_cfg\n");
BUG_ON(1);
@@ -254,10 +246,9 @@ int arch_init_chip_data(struct irq_desc *desc, int cpu)
return 0;
}
-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
-
+/* for move_irq_desc */
static void
-init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
+init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int node)
{
struct irq_pin_list *old_entry, *head, *tail, *entry;
@@ -266,7 +257,7 @@ init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
if (!old_entry)
return;
- entry = get_one_free_irq_2_pin(cpu);
+ entry = get_one_free_irq_2_pin(node);
if (!entry)
return;
@@ -276,7 +267,7 @@ init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
tail = entry;
old_entry = old_entry->next;
while (old_entry) {
- entry = get_one_free_irq_2_pin(cpu);
+ entry = get_one_free_irq_2_pin(node);
if (!entry) {
entry = head;
while (entry) {
@@ -316,12 +307,12 @@ static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg)
}
void arch_init_copy_chip_data(struct irq_desc *old_desc,
- struct irq_desc *desc, int cpu)
+ struct irq_desc *desc, int node)
{
struct irq_cfg *cfg;
struct irq_cfg *old_cfg;
- cfg = get_one_free_irq_cfg(cpu);
+ cfg = get_one_free_irq_cfg(node);
if (!cfg)
return;
@@ -332,7 +323,7 @@ void arch_init_copy_chip_data(struct irq_desc *old_desc,
memcpy(cfg, old_cfg, sizeof(struct irq_cfg));
- init_copy_irq_2_pin(old_cfg, cfg, cpu);
+ init_copy_irq_2_pin(old_cfg, cfg, node);
}
static void free_irq_cfg(struct irq_cfg *old_cfg)
@@ -356,19 +347,7 @@ void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
old_desc->chip_data = NULL;
}
}
-
-static void
-set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
-{
- struct irq_cfg *cfg = desc->chip_data;
-
- if (!cfg->move_in_progress) {
- /* it means that domain is not changed */
- if (!cpumask_intersects(desc->affinity, mask))
- cfg->move_desc_pending = 1;
- }
-}
-#endif
+/* end for move_irq_desc */
#else
static struct irq_cfg *irq_cfg(unsigned int irq)
@@ -378,13 +357,6 @@ static struct irq_cfg *irq_cfg(unsigned int irq)
#endif
-#ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC
-static inline void
-set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
-{
-}
-#endif
-
struct io_apic {
unsigned int index;
unsigned int unused[3];
@@ -518,132 +490,18 @@ static void ioapic_mask_entry(int apic, int pin)
spin_unlock_irqrestore(&ioapic_lock, flags);
}
-#ifdef CONFIG_SMP
-static void send_cleanup_vector(struct irq_cfg *cfg)
-{
- cpumask_var_t cleanup_mask;
-
- if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
- unsigned int i;
- cfg->move_cleanup_count = 0;
- for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
- cfg->move_cleanup_count++;
- for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
- apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
- } else {
- cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
- cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
- apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
- free_cpumask_var(cleanup_mask);
- }
- cfg->move_in_progress = 0;
-}
-
-static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
-{
- int apic, pin;
- struct irq_pin_list *entry;
- u8 vector = cfg->vector;
-
- entry = cfg->irq_2_pin;
- for (;;) {
- unsigned int reg;
-
- if (!entry)
- break;
-
- apic = entry->apic;
- pin = entry->pin;
- /*
- * With interrupt-remapping, destination information comes
- * from interrupt-remapping table entry.
- */
- if (!irq_remapped(irq))
- io_apic_write(apic, 0x11 + pin*2, dest);
- reg = io_apic_read(apic, 0x10 + pin*2);
- reg &= ~IO_APIC_REDIR_VECTOR_MASK;
- reg |= vector;
- io_apic_modify(apic, 0x10 + pin*2, reg);
- if (!entry->next)
- break;
- entry = entry->next;
- }
-}
-
-static int
-assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
-
-/*
- * Either sets desc->affinity to a valid value, and returns
- * ->cpu_mask_to_apicid of that, or returns BAD_APICID and
- * leaves desc->affinity untouched.
- */
-static unsigned int
-set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
-{
- struct irq_cfg *cfg;
- unsigned int irq;
-
- if (!cpumask_intersects(mask, cpu_online_mask))
- return BAD_APICID;
-
- irq = desc->irq;
- cfg = desc->chip_data;
- if (assign_irq_vector(irq, cfg, mask))
- return BAD_APICID;
-
- /* check that before desc->addinity get updated */
- set_extra_move_desc(desc, mask);
-
- cpumask_copy(desc->affinity, mask);
-
- return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
-}
-
-static void
-set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
-{
- struct irq_cfg *cfg;
- unsigned long flags;
- unsigned int dest;
- unsigned int irq;
-
- irq = desc->irq;
- cfg = desc->chip_data;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- dest = set_desc_affinity(desc, mask);
- if (dest != BAD_APICID) {
- /* Only the high 8 bits are valid. */
- dest = SET_APIC_LOGICAL_ID(dest);
- __target_IO_APIC_irq(irq, dest, cfg);
- }
- spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-static void
-set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
-{
- struct irq_desc *desc;
-
- desc = irq_to_desc(irq);
-
- set_ioapic_affinity_irq_desc(desc, mask);
-}
-#endif /* CONFIG_SMP */
-
/*
* The common case is 1:1 IRQ<->pin mappings. Sometimes there are
* shared ISA-space IRQs, so we have to support them. We are super
* fast in the common case, and fast for shared ISA-space IRQs.
*/
-static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin)
+static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
{
struct irq_pin_list *entry;
entry = cfg->irq_2_pin;
if (!entry) {
- entry = get_one_free_irq_2_pin(cpu);
+ entry = get_one_free_irq_2_pin(node);
if (!entry) {
printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n",
apic, pin);
@@ -663,7 +521,7 @@ static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin)
entry = entry->next;
}
- entry->next = get_one_free_irq_2_pin(cpu);
+ entry->next = get_one_free_irq_2_pin(node);
entry = entry->next;
entry->apic = apic;
entry->pin = pin;
@@ -672,7 +530,7 @@ static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin)
/*
* Reroute an IRQ to a different pin.
*/
-static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu,
+static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node,
int oldapic, int oldpin,
int newapic, int newpin)
{
@@ -692,7 +550,7 @@ static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu,
/* why? call replace before add? */
if (!replaced)
- add_pin_to_irq_cpu(cfg, cpu, newapic, newpin);
+ add_pin_to_irq_node(cfg, node, newapic, newpin);
}
static inline void io_apic_modify_irq(struct irq_cfg *cfg,
@@ -850,7 +708,6 @@ static int __init ioapic_pirq_setup(char *str)
__setup("pirq=", ioapic_pirq_setup);
#endif /* CONFIG_X86_32 */
-#ifdef CONFIG_INTR_REMAP
struct IO_APIC_route_entry **alloc_ioapic_entries(void)
{
int apic;
@@ -948,20 +805,6 @@ int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries)
return 0;
}
-void reinit_intr_remapped_IO_APIC(int intr_remapping,
- struct IO_APIC_route_entry **ioapic_entries)
-
-{
- /*
- * for now plain restore of previous settings.
- * TBD: In the case of OS enabling interrupt-remapping,
- * IO-APIC RTE's need to be setup to point to interrupt-remapping
- * table entries. for now, do a plain restore, and wait for
- * the setup_IO_APIC_irqs() to do proper initialization.
- */
- restore_IO_APIC_setup(ioapic_entries);
-}
-
void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries)
{
int apic;
@@ -971,7 +814,6 @@ void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries)
kfree(ioapic_entries);
}
-#endif
/*
* Find the IRQ entry number of a certain pin.
@@ -1032,54 +874,6 @@ static int __init find_isa_irq_apic(int irq, int type)
return -1;
}
-/*
- * Find a specific PCI IRQ entry.
- * Not an __init, possibly needed by modules
- */
-static int pin_2_irq(int idx, int apic, int pin);
-
-int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
-{
- int apic, i, best_guess = -1;
-
- apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
- bus, slot, pin);
- if (test_bit(bus, mp_bus_not_pci)) {
- apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
- return -1;
- }
- for (i = 0; i < mp_irq_entries; i++) {
- int lbus = mp_irqs[i].srcbus;
-
- for (apic = 0; apic < nr_ioapics; apic++)
- if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic ||
- mp_irqs[i].dstapic == MP_APIC_ALL)
- break;
-
- if (!test_bit(lbus, mp_bus_not_pci) &&
- !mp_irqs[i].irqtype &&
- (bus == lbus) &&
- (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) {
- int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq);
-
- if (!(apic || IO_APIC_IRQ(irq)))
- continue;
-
- if (pin == (mp_irqs[i].srcbusirq & 3))
- return irq;
- /*
- * Use the first all-but-pin matching entry as a
- * best-guess fuzzy result for broken mptables.
- */
- if (best_guess < 0)
- best_guess = irq;
- }
- }
- return best_guess;
-}
-
-EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
-
#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
/*
* EISA Edge/Level control register, ELCR
@@ -1298,6 +1092,64 @@ static int pin_2_irq(int idx, int apic, int pin)
return irq;
}
+/*
+ * Find a specific PCI IRQ entry.
+ * Not an __init, possibly needed by modules
+ */
+int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin,
+ struct io_apic_irq_attr *irq_attr)
+{
+ int apic, i, best_guess = -1;
+
+ apic_printk(APIC_DEBUG,
+ "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
+ bus, slot, pin);
+ if (test_bit(bus, mp_bus_not_pci)) {
+ apic_printk(APIC_VERBOSE,
+ "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
+ return -1;
+ }
+ for (i = 0; i < mp_irq_entries; i++) {
+ int lbus = mp_irqs[i].srcbus;
+
+ for (apic = 0; apic < nr_ioapics; apic++)
+ if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic ||
+ mp_irqs[i].dstapic == MP_APIC_ALL)
+ break;
+
+ if (!test_bit(lbus, mp_bus_not_pci) &&
+ !mp_irqs[i].irqtype &&
+ (bus == lbus) &&
+ (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) {
+ int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq);
+
+ if (!(apic || IO_APIC_IRQ(irq)))
+ continue;
+
+ if (pin == (mp_irqs[i].srcbusirq & 3)) {
+ set_io_apic_irq_attr(irq_attr, apic,
+ mp_irqs[i].dstirq,
+ irq_trigger(i),
+ irq_polarity(i));
+ return irq;
+ }
+ /*
+ * Use the first all-but-pin matching entry as a
+ * best-guess fuzzy result for broken mptables.
+ */
+ if (best_guess < 0) {
+ set_io_apic_irq_attr(irq_attr, apic,
+ mp_irqs[i].dstirq,
+ irq_trigger(i),
+ irq_polarity(i));
+ best_guess = irq;
+ }
+ }
+ }
+ return best_guess;
+}
+EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
+
void lock_vector_lock(void)
{
/* Used to the online set of cpus does not change
@@ -1628,58 +1480,70 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq
ioapic_write_entry(apic_id, pin, entry);
}
+static struct {
+ DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
+} mp_ioapic_routing[MAX_IO_APICS];
+
static void __init setup_IO_APIC_irqs(void)
{
- int apic_id, pin, idx, irq;
+ int apic_id = 0, pin, idx, irq;
int notcon = 0;
struct irq_desc *desc;
struct irq_cfg *cfg;
- int cpu = boot_cpu_id;
+ int node = cpu_to_node(boot_cpu_id);
apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
- for (apic_id = 0; apic_id < nr_ioapics; apic_id++) {
- for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) {
-
- idx = find_irq_entry(apic_id, pin, mp_INT);
- if (idx == -1) {
- if (!notcon) {
- notcon = 1;
- apic_printk(APIC_VERBOSE,
- KERN_DEBUG " %d-%d",
- mp_ioapics[apic_id].apicid, pin);
- } else
- apic_printk(APIC_VERBOSE, " %d-%d",
- mp_ioapics[apic_id].apicid, pin);
- continue;
- }
- if (notcon) {
- apic_printk(APIC_VERBOSE,
- " (apicid-pin) not connected\n");
- notcon = 0;
- }
+#ifdef CONFIG_ACPI
+ if (!acpi_disabled && acpi_ioapic) {
+ apic_id = mp_find_ioapic(0);
+ if (apic_id < 0)
+ apic_id = 0;
+ }
+#endif
- irq = pin_2_irq(idx, apic_id, pin);
+ for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) {
+ idx = find_irq_entry(apic_id, pin, mp_INT);
+ if (idx == -1) {
+ if (!notcon) {
+ notcon = 1;
+ apic_printk(APIC_VERBOSE,
+ KERN_DEBUG " %d-%d",
+ mp_ioapics[apic_id].apicid, pin);
+ } else
+ apic_printk(APIC_VERBOSE, " %d-%d",
+ mp_ioapics[apic_id].apicid, pin);
+ continue;
+ }
+ if (notcon) {
+ apic_printk(APIC_VERBOSE,
+ " (apicid-pin) not connected\n");
+ notcon = 0;
+ }
- /*
- * Skip the timer IRQ if there's a quirk handler
- * installed and if it returns 1:
- */
- if (apic->multi_timer_check &&
- apic->multi_timer_check(apic_id, irq))
- continue;
+ irq = pin_2_irq(idx, apic_id, pin);
- desc = irq_to_desc_alloc_cpu(irq, cpu);
- if (!desc) {
- printk(KERN_INFO "can not get irq_desc for %d\n", irq);
- continue;
- }
- cfg = desc->chip_data;
- add_pin_to_irq_cpu(cfg, cpu, apic_id, pin);
+ /*
+ * Skip the timer IRQ if there's a quirk handler
+ * installed and if it returns 1:
+ */
+ if (apic->multi_timer_check &&
+ apic->multi_timer_check(apic_id, irq))
+ continue;
- setup_IO_APIC_irq(apic_id, pin, irq, desc,
- irq_trigger(idx), irq_polarity(idx));
+ desc = irq_to_desc_alloc_node(irq, node);
+ if (!desc) {
+ printk(KERN_INFO "can not get irq_desc for %d\n", irq);
+ continue;
}
+ cfg = desc->chip_data;
+ add_pin_to_irq_node(cfg, node, apic_id, pin);
+ /*
+ * don't mark it in pin_programmed, so later acpi could
+ * set it correctly when irq < 16
+ */
+ setup_IO_APIC_irq(apic_id, pin, irq, desc,
+ irq_trigger(idx), irq_polarity(idx));
}
if (notcon)
@@ -1869,7 +1733,7 @@ __apicdebuginit(void) print_APIC_bitfield(int base)
__apicdebuginit(void) print_local_APIC(void *dummy)
{
- unsigned int v, ver, maxlvt;
+ unsigned int i, v, ver, maxlvt;
u64 icr;
if (apic_verbosity == APIC_QUIET)
@@ -1957,6 +1821,18 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
v = apic_read(APIC_TDCR);
printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
+
+ if (boot_cpu_has(X86_FEATURE_EXTAPIC)) {
+ v = apic_read(APIC_EFEAT);
+ maxlvt = (v >> 16) & 0xff;
+ printk(KERN_DEBUG "... APIC EFEAT: %08x\n", v);
+ v = apic_read(APIC_ECTRL);
+ printk(KERN_DEBUG "... APIC ECTRL: %08x\n", v);
+ for (i = 0; i < maxlvt; i++) {
+ v = apic_read(APIC_EILVTn(i));
+ printk(KERN_DEBUG "... APIC EILVT%d: %08x\n", i, v);
+ }
+ }
printk("\n");
}
@@ -2005,6 +1881,11 @@ __apicdebuginit(void) print_PIC(void)
__apicdebuginit(int) print_all_ICs(void)
{
print_PIC();
+
+ /* don't print out if apic is not there */
+ if (!cpu_has_apic || disable_apic)
+ return 0;
+
print_all_local_APICs();
print_IO_APIC();
@@ -2360,6 +2241,118 @@ static int ioapic_retrigger_irq(unsigned int irq)
*/
#ifdef CONFIG_SMP
+static void send_cleanup_vector(struct irq_cfg *cfg)
+{
+ cpumask_var_t cleanup_mask;
+
+ if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
+ unsigned int i;
+ cfg->move_cleanup_count = 0;
+ for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
+ cfg->move_cleanup_count++;
+ for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
+ apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
+ } else {
+ cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
+ cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
+ apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+ free_cpumask_var(cleanup_mask);
+ }
+ cfg->move_in_progress = 0;
+}
+
+static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
+{
+ int apic, pin;
+ struct irq_pin_list *entry;
+ u8 vector = cfg->vector;
+
+ entry = cfg->irq_2_pin;
+ for (;;) {
+ unsigned int reg;
+
+ if (!entry)
+ break;
+
+ apic = entry->apic;
+ pin = entry->pin;
+ /*
+ * With interrupt-remapping, destination information comes
+ * from interrupt-remapping table entry.
+ */
+ if (!irq_remapped(irq))
+ io_apic_write(apic, 0x11 + pin*2, dest);
+ reg = io_apic_read(apic, 0x10 + pin*2);
+ reg &= ~IO_APIC_REDIR_VECTOR_MASK;
+ reg |= vector;
+ io_apic_modify(apic, 0x10 + pin*2, reg);
+ if (!entry->next)
+ break;
+ entry = entry->next;
+ }
+}
+
+static int
+assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
+
+/*
+ * Either sets desc->affinity to a valid value, and returns
+ * ->cpu_mask_to_apicid of that, or returns BAD_APICID and
+ * leaves desc->affinity untouched.
+ */
+static unsigned int
+set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
+{
+ struct irq_cfg *cfg;
+ unsigned int irq;
+
+ if (!cpumask_intersects(mask, cpu_online_mask))
+ return BAD_APICID;
+
+ irq = desc->irq;
+ cfg = desc->chip_data;
+ if (assign_irq_vector(irq, cfg, mask))
+ return BAD_APICID;
+
+ cpumask_copy(desc->affinity, mask);
+
+ return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
+}
+
+static int
+set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
+{
+ struct irq_cfg *cfg;
+ unsigned long flags;
+ unsigned int dest;
+ unsigned int irq;
+ int ret = -1;
+
+ irq = desc->irq;
+ cfg = desc->chip_data;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ dest = set_desc_affinity(desc, mask);
+ if (dest != BAD_APICID) {
+ /* Only the high 8 bits are valid. */
+ dest = SET_APIC_LOGICAL_ID(dest);
+ __target_IO_APIC_irq(irq, dest, cfg);
+ ret = 0;
+ }
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ return ret;
+}
+
+static int
+set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
+{
+ struct irq_desc *desc;
+
+ desc = irq_to_desc(irq);
+
+ return set_ioapic_affinity_irq_desc(desc, mask);
+}
#ifdef CONFIG_INTR_REMAP
@@ -2374,26 +2367,25 @@ static int ioapic_retrigger_irq(unsigned int irq)
* Real vector that is used for interrupting cpu will be coming from
* the interrupt-remapping table entry.
*/
-static void
+static int
migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
{
struct irq_cfg *cfg;
struct irte irte;
unsigned int dest;
unsigned int irq;
+ int ret = -1;
if (!cpumask_intersects(mask, cpu_online_mask))
- return;
+ return ret;
irq = desc->irq;
if (get_irte(irq, &irte))
- return;
+ return ret;
cfg = desc->chip_data;
if (assign_irq_vector(irq, cfg, mask))
- return;
-
- set_extra_move_desc(desc, mask);
+ return ret;
dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask);
@@ -2409,27 +2401,30 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
send_cleanup_vector(cfg);
cpumask_copy(desc->affinity, mask);
+
+ return 0;
}
/*
* Migrates the IRQ destination in the process context.
*/
-static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
+static int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
const struct cpumask *mask)
{
- migrate_ioapic_irq_desc(desc, mask);
+ return migrate_ioapic_irq_desc(desc, mask);
}
-static void set_ir_ioapic_affinity_irq(unsigned int irq,
+static int set_ir_ioapic_affinity_irq(unsigned int irq,
const struct cpumask *mask)
{
struct irq_desc *desc = irq_to_desc(irq);
- set_ir_ioapic_affinity_irq_desc(desc, mask);
+ return set_ir_ioapic_affinity_irq_desc(desc, mask);
}
#else
-static inline void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
+static inline int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
const struct cpumask *mask)
{
+ return 0;
}
#endif
@@ -2491,86 +2486,19 @@ static void irq_complete_move(struct irq_desc **descp)
struct irq_cfg *cfg = desc->chip_data;
unsigned vector, me;
- if (likely(!cfg->move_in_progress)) {
-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
- if (likely(!cfg->move_desc_pending))
- return;
-
- /* domain has not changed, but affinity did */
- me = smp_processor_id();
- if (cpumask_test_cpu(me, desc->affinity)) {
- *descp = desc = move_irq_desc(desc, me);
- /* get the new one */
- cfg = desc->chip_data;
- cfg->move_desc_pending = 0;
- }
-#endif
+ if (likely(!cfg->move_in_progress))
return;
- }
vector = ~get_irq_regs()->orig_ax;
me = smp_processor_id();
- if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) {
-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
- *descp = desc = move_irq_desc(desc, me);
- /* get the new one */
- cfg = desc->chip_data;
-#endif
+ if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
send_cleanup_vector(cfg);
- }
}
#else
static inline void irq_complete_move(struct irq_desc **descp) {}
#endif
-static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
-{
- int apic, pin;
- struct irq_pin_list *entry;
-
- entry = cfg->irq_2_pin;
- for (;;) {
-
- if (!entry)
- break;
-
- apic = entry->apic;
- pin = entry->pin;
- io_apic_eoi(apic, pin);
- entry = entry->next;
- }
-}
-
-static void
-eoi_ioapic_irq(struct irq_desc *desc)
-{
- struct irq_cfg *cfg;
- unsigned long flags;
- unsigned int irq;
-
- irq = desc->irq;
- cfg = desc->chip_data;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- __eoi_ioapic_irq(irq, cfg);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-#ifdef CONFIG_X86_X2APIC
-static void ack_x2apic_level(unsigned int irq)
-{
- struct irq_desc *desc = irq_to_desc(irq);
- ack_x2APIC_irq();
- eoi_ioapic_irq(desc);
-}
-
-static void ack_x2apic_edge(unsigned int irq)
-{
- ack_x2APIC_irq();
-}
-#endif
-
static void ack_apic_edge(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
@@ -2634,9 +2562,6 @@ static void ack_apic_level(unsigned int irq)
*/
ack_APIC_irq();
- if (irq_remapped(irq))
- eoi_ioapic_irq(desc);
-
/* Now we can move and renable the irq */
if (unlikely(do_unmask_irq)) {
/* Only migrate the irq if the ack has been received.
@@ -2683,22 +2608,50 @@ static void ack_apic_level(unsigned int irq)
}
#ifdef CONFIG_INTR_REMAP
+static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
+{
+ int apic, pin;
+ struct irq_pin_list *entry;
+
+ entry = cfg->irq_2_pin;
+ for (;;) {
+
+ if (!entry)
+ break;
+
+ apic = entry->apic;
+ pin = entry->pin;
+ io_apic_eoi(apic, pin);
+ entry = entry->next;
+ }
+}
+
+static void
+eoi_ioapic_irq(struct irq_desc *desc)
+{
+ struct irq_cfg *cfg;
+ unsigned long flags;
+ unsigned int irq;
+
+ irq = desc->irq;
+ cfg = desc->chip_data;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ __eoi_ioapic_irq(irq, cfg);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
static void ir_ack_apic_edge(unsigned int irq)
{
-#ifdef CONFIG_X86_X2APIC
- if (x2apic_enabled())
- return ack_x2apic_edge(irq);
-#endif
- return ack_apic_edge(irq);
+ ack_APIC_irq();
}
static void ir_ack_apic_level(unsigned int irq)
{
-#ifdef CONFIG_X86_X2APIC
- if (x2apic_enabled())
- return ack_x2apic_level(irq);
-#endif
- return ack_apic_level(irq);
+ struct irq_desc *desc = irq_to_desc(irq);
+
+ ack_APIC_irq();
+ eoi_ioapic_irq(desc);
}
#endif /* CONFIG_INTR_REMAP */
@@ -2903,7 +2856,7 @@ static inline void __init check_timer(void)
{
struct irq_desc *desc = irq_to_desc(0);
struct irq_cfg *cfg = desc->chip_data;
- int cpu = boot_cpu_id;
+ int node = cpu_to_node(boot_cpu_id);
int apic1, pin1, apic2, pin2;
unsigned long flags;
int no_pin1 = 0;
@@ -2969,7 +2922,7 @@ static inline void __init check_timer(void)
* Ok, does IRQ0 through the IOAPIC work?
*/
if (no_pin1) {
- add_pin_to_irq_cpu(cfg, cpu, apic1, pin1);
+ add_pin_to_irq_node(cfg, node, apic1, pin1);
setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
} else {
/* for edge trigger, setup_IO_APIC_irq already
@@ -3006,7 +2959,7 @@ static inline void __init check_timer(void)
/*
* legacy devices should be connected to IO APIC #0
*/
- replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2);
+ replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2);
setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
enable_8259A_irq(0);
if (timer_irq_works()) {
@@ -3218,14 +3171,13 @@ static int nr_irqs_gsi = NR_IRQS_LEGACY;
/*
* Dynamic irq allocate and deallocation
*/
-unsigned int create_irq_nr(unsigned int irq_want)
+unsigned int create_irq_nr(unsigned int irq_want, int node)
{
/* Allocate an unused irq */
unsigned int irq;
unsigned int new;
unsigned long flags;
struct irq_cfg *cfg_new = NULL;
- int cpu = boot_cpu_id;
struct irq_desc *desc_new = NULL;
irq = 0;
@@ -3234,7 +3186,7 @@ unsigned int create_irq_nr(unsigned int irq_want)
spin_lock_irqsave(&vector_lock, flags);
for (new = irq_want; new < nr_irqs; new++) {
- desc_new = irq_to_desc_alloc_cpu(new, cpu);
+ desc_new = irq_to_desc_alloc_node(new, node);
if (!desc_new) {
printk(KERN_INFO "can not get irq_desc for %d\n", new);
continue;
@@ -3243,6 +3195,9 @@ unsigned int create_irq_nr(unsigned int irq_want)
if (cfg_new->vector != 0)
continue;
+
+ desc_new = move_irq_desc(desc_new, node);
+
if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0)
irq = new;
break;
@@ -3260,11 +3215,12 @@ unsigned int create_irq_nr(unsigned int irq_want)
int create_irq(void)
{
+ int node = cpu_to_node(boot_cpu_id);
unsigned int irq_want;
int irq;
irq_want = nr_irqs_gsi;
- irq = create_irq_nr(irq_want);
+ irq = create_irq_nr(irq_want, node);
if (irq == 0)
irq = -1;
@@ -3366,7 +3322,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
}
#ifdef CONFIG_SMP
-static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
+static int set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
{
struct irq_desc *desc = irq_to_desc(irq);
struct irq_cfg *cfg;
@@ -3375,7 +3331,7 @@ static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
dest = set_desc_affinity(desc, mask);
if (dest == BAD_APICID)
- return;
+ return -1;
cfg = desc->chip_data;
@@ -3387,13 +3343,15 @@ static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
msg.address_lo |= MSI_ADDR_DEST_ID(dest);
write_msi_msg_desc(desc, &msg);
+
+ return 0;
}
#ifdef CONFIG_INTR_REMAP
/*
* Migrate the MSI irq to another cpumask. This migration is
* done in the process context using interrupt-remapping hardware.
*/
-static void
+static int
ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
{
struct irq_desc *desc = irq_to_desc(irq);
@@ -3402,11 +3360,11 @@ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
struct irte irte;
if (get_irte(irq, &irte))
- return;
+ return -1;
dest = set_desc_affinity(desc, mask);
if (dest == BAD_APICID)
- return;
+ return -1;
irte.vector = cfg->vector;
irte.dest_id = IRTE_DEST(dest);
@@ -3423,6 +3381,8 @@ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
*/
if (cfg->move_in_progress)
send_cleanup_vector(cfg);
+
+ return 0;
}
#endif
@@ -3518,15 +3478,17 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
unsigned int irq_want;
struct intel_iommu *iommu = NULL;
int index = 0;
+ int node;
/* x86 doesn't support multiple MSI yet */
if (type == PCI_CAP_ID_MSI && nvec > 1)
return 1;
+ node = dev_to_node(&dev->dev);
irq_want = nr_irqs_gsi;
sub_handle = 0;
list_for_each_entry(msidesc, &dev->msi_list, list) {
- irq = create_irq_nr(irq_want);
+ irq = create_irq_nr(irq_want, node);
if (irq == 0)
return -1;
irq_want = irq + 1;
@@ -3576,7 +3538,7 @@ void arch_teardown_msi_irq(unsigned int irq)
#if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP)
#ifdef CONFIG_SMP
-static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
+static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
{
struct irq_desc *desc = irq_to_desc(irq);
struct irq_cfg *cfg;
@@ -3585,7 +3547,7 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
dest = set_desc_affinity(desc, mask);
if (dest == BAD_APICID)
- return;
+ return -1;
cfg = desc->chip_data;
@@ -3597,6 +3559,8 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
msg.address_lo |= MSI_ADDR_DEST_ID(dest);
dmar_msi_write(irq, &msg);
+
+ return 0;
}
#endif /* CONFIG_SMP */
@@ -3630,7 +3594,7 @@ int arch_setup_dmar_msi(unsigned int irq)
#ifdef CONFIG_HPET_TIMER
#ifdef CONFIG_SMP
-static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
+static int hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
{
struct irq_desc *desc = irq_to_desc(irq);
struct irq_cfg *cfg;
@@ -3639,7 +3603,7 @@ static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
dest = set_desc_affinity(desc, mask);
if (dest == BAD_APICID)
- return;
+ return -1;
cfg = desc->chip_data;
@@ -3651,6 +3615,8 @@ static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
msg.address_lo |= MSI_ADDR_DEST_ID(dest);
hpet_msi_write(irq, &msg);
+
+ return 0;
}
#endif /* CONFIG_SMP */
@@ -3707,7 +3673,7 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
write_ht_irq_msg(irq, &msg);
}
-static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
+static int set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
{
struct irq_desc *desc = irq_to_desc(irq);
struct irq_cfg *cfg;
@@ -3715,11 +3681,13 @@ static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
dest = set_desc_affinity(desc, mask);
if (dest == BAD_APICID)
- return;
+ return -1;
cfg = desc->chip_data;
target_ht_irq(irq, dest, cfg->vector);
+
+ return 0;
}
#endif
@@ -3794,6 +3762,8 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
unsigned long flags;
int err;
+ BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
+
cfg = irq_cfg(irq);
err = assign_irq_vector(irq, cfg, eligible_cpu);
@@ -3807,15 +3777,13 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
mmr_value = 0;
entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
- BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
-
- entry->vector = cfg->vector;
- entry->delivery_mode = apic->irq_delivery_mode;
- entry->dest_mode = apic->irq_dest_mode;
- entry->polarity = 0;
- entry->trigger = 0;
- entry->mask = 0;
- entry->dest = apic->cpu_mask_to_apicid(eligible_cpu);
+ entry->vector = cfg->vector;
+ entry->delivery_mode = apic->irq_delivery_mode;
+ entry->dest_mode = apic->irq_dest_mode;
+ entry->polarity = 0;
+ entry->trigger = 0;
+ entry->mask = 0;
+ entry->dest = apic->cpu_mask_to_apicid(eligible_cpu);
mmr_pnode = uv_blade_to_pnode(mmr_blade);
uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
@@ -3833,10 +3801,10 @@ void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset)
struct uv_IO_APIC_route_entry *entry;
int mmr_pnode;
+ BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
+
mmr_value = 0;
entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
- BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
-
entry->mask = 1;
mmr_pnode = uv_blade_to_pnode(mmr_blade);
@@ -3900,6 +3868,71 @@ int __init arch_probe_nr_irqs(void)
}
#endif
+static int __io_apic_set_pci_routing(struct device *dev, int irq,
+ struct io_apic_irq_attr *irq_attr)
+{
+ struct irq_desc *desc;
+ struct irq_cfg *cfg;
+ int node;
+ int ioapic, pin;
+ int trigger, polarity;
+
+ ioapic = irq_attr->ioapic;
+ if (!IO_APIC_IRQ(irq)) {
+ apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
+ ioapic);
+ return -EINVAL;
+ }
+
+ if (dev)
+ node = dev_to_node(dev);
+ else
+ node = cpu_to_node(boot_cpu_id);
+
+ desc = irq_to_desc_alloc_node(irq, node);
+ if (!desc) {
+ printk(KERN_INFO "can not get irq_desc %d\n", irq);
+ return 0;
+ }
+
+ pin = irq_attr->ioapic_pin;
+ trigger = irq_attr->trigger;
+ polarity = irq_attr->polarity;
+
+ /*
+ * IRQs < 16 are already in the irq_2_pin[] map
+ */
+ if (irq >= NR_IRQS_LEGACY) {
+ cfg = desc->chip_data;
+ add_pin_to_irq_node(cfg, node, ioapic, pin);
+ }
+
+ setup_IO_APIC_irq(ioapic, pin, irq, desc, trigger, polarity);
+
+ return 0;
+}
+
+int io_apic_set_pci_routing(struct device *dev, int irq,
+ struct io_apic_irq_attr *irq_attr)
+{
+ int ioapic, pin;
+ /*
+ * Avoid pin reprogramming. PRTs typically include entries
+ * with redundant pin->gsi mappings (but unique PCI devices);
+ * we only program the IOAPIC on the first.
+ */
+ ioapic = irq_attr->ioapic;
+ pin = irq_attr->ioapic_pin;
+ if (test_bit(pin, mp_ioapic_routing[ioapic].pin_programmed)) {
+ pr_debug("Pin %d-%d already programmed\n",
+ mp_ioapics[ioapic].apicid, pin);
+ return 0;
+ }
+ set_bit(pin, mp_ioapic_routing[ioapic].pin_programmed);
+
+ return __io_apic_set_pci_routing(dev, irq, irq_attr);
+}
+
/* --------------------------------------------------------------------------
ACPI-based IOAPIC Configuration
-------------------------------------------------------------------------- */
@@ -3980,6 +4013,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id)
return apic_id;
}
+#endif
int __init io_apic_get_version(int ioapic)
{
@@ -3992,39 +4026,6 @@ int __init io_apic_get_version(int ioapic)
return reg_01.bits.version;
}
-#endif
-
-int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
-{
- struct irq_desc *desc;
- struct irq_cfg *cfg;
- int cpu = boot_cpu_id;
-
- if (!IO_APIC_IRQ(irq)) {
- apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
- ioapic);
- return -EINVAL;
- }
-
- desc = irq_to_desc_alloc_cpu(irq, cpu);
- if (!desc) {
- printk(KERN_INFO "can not get irq_desc %d\n", irq);
- return 0;
- }
-
- /*
- * IRQs < 16 are already in the irq_2_pin[] map
- */
- if (irq >= NR_IRQS_LEGACY) {
- cfg = desc->chip_data;
- add_pin_to_irq_cpu(cfg, cpu, ioapic, pin);
- }
-
- setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity);
-
- return 0;
-}
-
int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
{
@@ -4055,51 +4056,44 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
#ifdef CONFIG_SMP
void __init setup_ioapic_dest(void)
{
- int pin, ioapic, irq, irq_entry;
+ int pin, ioapic = 0, irq, irq_entry;
struct irq_desc *desc;
- struct irq_cfg *cfg;
const struct cpumask *mask;
if (skip_ioapic_setup == 1)
return;
- for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
- for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
- irq_entry = find_irq_entry(ioapic, pin, mp_INT);
- if (irq_entry == -1)
- continue;
- irq = pin_2_irq(irq_entry, ioapic, pin);
-
- /* setup_IO_APIC_irqs could fail to get vector for some device
- * when you have too many devices, because at that time only boot
- * cpu is online.
- */
- desc = irq_to_desc(irq);
- cfg = desc->chip_data;
- if (!cfg->vector) {
- setup_IO_APIC_irq(ioapic, pin, irq, desc,
- irq_trigger(irq_entry),
- irq_polarity(irq_entry));
- continue;
+#ifdef CONFIG_ACPI
+ if (!acpi_disabled && acpi_ioapic) {
+ ioapic = mp_find_ioapic(0);
+ if (ioapic < 0)
+ ioapic = 0;
+ }
+#endif
- }
+ for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
+ irq_entry = find_irq_entry(ioapic, pin, mp_INT);
+ if (irq_entry == -1)
+ continue;
+ irq = pin_2_irq(irq_entry, ioapic, pin);
- /*
- * Honour affinities which have been set in early boot
- */
- if (desc->status &
- (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
- mask = desc->affinity;
- else
- mask = apic->target_cpus();
+ desc = irq_to_desc(irq);
- if (intr_remapping_enabled)
- set_ir_ioapic_affinity_irq_desc(desc, mask);
- else
- set_ioapic_affinity_irq_desc(desc, mask);
- }
+ /*
+ * Honour affinities which have been set in early boot
+ */
+ if (desc->status &
+ (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
+ mask = desc->affinity;
+ else
+ mask = apic->target_cpus();
+ if (intr_remapping_enabled)
+ set_ir_ioapic_affinity_irq_desc(desc, mask);
+ else
+ set_ioapic_affinity_irq_desc(desc, mask);
}
+
}
#endif
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
index ce4fbfa315a..a691302dc3f 100644
--- a/arch/x86/kernel/apic/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
@@ -104,7 +104,7 @@ static __init void nmi_cpu_busy(void *data)
}
#endif
-static void report_broken_nmi(int cpu, int *prev_nmi_count)
+static void report_broken_nmi(int cpu, unsigned int *prev_nmi_count)
{
printk(KERN_CONT "\n");
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 01eda2ac65e..440a8bccd91 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -160,7 +160,6 @@ extern struct apic apic_summit;
extern struct apic apic_bigsmp;
extern struct apic apic_es7000;
extern struct apic apic_es7000_cluster;
-extern struct apic apic_default;
struct apic *apic = &apic_default;
EXPORT_SYMBOL_GPL(apic);
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c
index 1783652bb0e..bc3e880f9b8 100644
--- a/arch/x86/kernel/apic/probe_64.c
+++ b/arch/x86/kernel/apic/probe_64.c
@@ -50,7 +50,7 @@ static struct apic *apic_probe[] __initdata = {
void __init default_setup_apic_routing(void)
{
#ifdef CONFIG_X86_X2APIC
- if (x2apic && (apic != &apic_x2apic_phys &&
+ if (x2apic_mode && (apic != &apic_x2apic_phys &&
#ifdef CONFIG_X86_UV
apic != &apic_x2apic_uv_x &&
#endif
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index 9cfe1f415d8..344eee4ac0a 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -173,13 +173,6 @@ static inline int is_WPEG(struct rio_detail *rio){
rio->type == LookOutAWPEG || rio->type == LookOutBWPEG);
}
-
-/* In clustered mode, the high nibble of APIC ID is a cluster number.
- * The low nibble is a 4-bit bitmap. */
-#define XAPIC_DEST_CPUS_SHIFT 4
-#define XAPIC_DEST_CPUS_MASK ((1u << XAPIC_DEST_CPUS_SHIFT) - 1)
-#define XAPIC_DEST_CLUSTER_MASK (XAPIC_DEST_CPUS_MASK << XAPIC_DEST_CPUS_SHIFT)
-
#define SUMMIT_APIC_DFR_VALUE (APIC_DFR_CLUSTER)
static const struct cpumask *summit_target_cpus(void)
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 4a903e2f0d1..8e4cbb255c3 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -10,7 +10,7 @@
#include <asm/apic.h>
#include <asm/ipi.h>
-DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid);
+static DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid);
static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 2bda6935297..ef0ae207a7c 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -105,7 +105,7 @@ static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask)
cpumask_set_cpu(cpu, retmask);
}
-static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip)
+static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_rip)
{
#ifdef CONFIG_SMP
unsigned long val;
@@ -562,7 +562,7 @@ void __init uv_system_init(void)
union uvh_node_id_u node_id;
unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size;
int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val;
- int max_pnode = 0;
+ int gnode_extra, max_pnode = 0;
unsigned long mmr_base, present, paddr;
unsigned short pnode_mask;
@@ -574,6 +574,13 @@ void __init uv_system_init(void)
mmr_base =
uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) &
~UV_MMR_ENABLE;
+ pnode_mask = (1 << n_val) - 1;
+ node_id.v = uv_read_local_mmr(UVH_NODE_ID);
+ gnode_extra = (node_id.s.node_id & ~((1 << n_val) - 1)) >> 1;
+ gnode_upper = ((unsigned long)gnode_extra << m_val);
+ printk(KERN_DEBUG "UV: N %d, M %d, gnode_upper 0x%lx, gnode_extra 0x%x\n",
+ n_val, m_val, gnode_upper, gnode_extra);
+
printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base);
for(i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++)
@@ -583,15 +590,18 @@ void __init uv_system_init(void)
bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
uv_blade_info = kmalloc(bytes, GFP_KERNEL);
+ BUG_ON(!uv_blade_info);
get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size);
bytes = sizeof(uv_node_to_blade[0]) * num_possible_nodes();
uv_node_to_blade = kmalloc(bytes, GFP_KERNEL);
+ BUG_ON(!uv_node_to_blade);
memset(uv_node_to_blade, 255, bytes);
bytes = sizeof(uv_cpu_to_blade[0]) * num_possible_cpus();
uv_cpu_to_blade = kmalloc(bytes, GFP_KERNEL);
+ BUG_ON(!uv_cpu_to_blade);
memset(uv_cpu_to_blade, 255, bytes);
blade = 0;
@@ -607,11 +617,6 @@ void __init uv_system_init(void)
}
}
- pnode_mask = (1 << n_val) - 1;
- node_id.v = uv_read_local_mmr(UVH_NODE_ID);
- gnode_upper = (((unsigned long)node_id.s.node_id) &
- ~((1 << n_val) - 1)) << m_val;
-
uv_bios_init();
uv_bios_get_sn_info(0, &uv_type, &sn_partition_id,
&sn_coherency_id, &sn_region_size);
@@ -634,6 +639,7 @@ void __init uv_system_init(void)
uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask;
uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1;
uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
+ uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra;
uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
uv_cpu_hub_info(cpu)->coherency_domain_number = sn_coherency_id;
uv_cpu_hub_info(cpu)->scir.offset = SCIR_LOCAL_MMR_BASE + lcpu;
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 5a6aa1c1162..1a830cbd701 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -146,4 +146,5 @@ void foo(void)
OFFSET(BP_loadflags, boot_params, hdr.loadflags);
OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch);
OFFSET(BP_version, boot_params, hdr.version);
+ OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment);
}
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index e72f062fb4b..898ecc47e12 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -125,6 +125,7 @@ int main(void)
OFFSET(BP_loadflags, boot_params, hdr.loadflags);
OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch);
OFFSET(BP_version, boot_params, hdr.version);
+ OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment);
BLANK();
DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 4e242f9a06e..3efcb2b96a1 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -1,5 +1,5 @@
#
-# Makefile for x86-compatible CPU details and quirks
+# Makefile for x86-compatible CPU details, features and quirks
#
# Don't trace early stages of a secondary CPU boot
@@ -23,11 +23,13 @@ obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o
obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o
obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o
-obj-$(CONFIG_X86_MCE) += mcheck/
-obj-$(CONFIG_MTRR) += mtrr/
-obj-$(CONFIG_CPU_FREQ) += cpufreq/
+obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o
-obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o
+obj-$(CONFIG_X86_MCE) += mcheck/
+obj-$(CONFIG_MTRR) += mtrr/
+obj-$(CONFIG_CPU_FREQ) += cpufreq/
+
+obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o
quiet_cmd_mkcapflags = MKCAP $@
cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 7e4a459daa6..e5b27d8f1b4 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -6,6 +6,7 @@
#include <asm/processor.h>
#include <asm/apic.h>
#include <asm/cpu.h>
+#include <asm/pci-direct.h>
#ifdef CONFIG_X86_64
# include <asm/numa_64.h>
@@ -272,7 +273,7 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
int cpu = smp_processor_id();
int node;
- unsigned apicid = hard_smp_processor_id();
+ unsigned apicid = cpu_has_apic ? hard_smp_processor_id() : c->apicid;
node = c->phys_proc_id;
if (apicid_to_node[apicid] != NUMA_NO_NODE)
@@ -351,6 +352,15 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
(c->x86_model == 8 && c->x86_mask >= 8))
set_cpu_cap(c, X86_FEATURE_K6_MTRR);
#endif
+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PCI)
+ /* check CPU config space for extended APIC ID */
+ if (c->x86 >= 0xf) {
+ unsigned int val;
+ val = read_pci_config(0, 24, 0, 0x68);
+ if ((val & ((1 << 17) | (1 << 18))) == ((1 << 17) | (1 << 18)))
+ set_cpu_cap(c, X86_FEATURE_EXTD_APICID);
+ }
+#endif
}
static void __cpuinit init_amd(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 77848d9fca6..3ffdcfa9abd 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -13,6 +13,7 @@
#include <linux/io.h>
#include <asm/stackprotector.h>
+#include <asm/perf_counter.h>
#include <asm/mmu_context.h>
#include <asm/hypervisor.h>
#include <asm/processor.h>
@@ -299,7 +300,8 @@ static const char *__cpuinit table_lookup_model(struct cpuinfo_x86 *c)
return NULL; /* Not found */
}
-__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
+__u32 cpu_caps_cleared[NCAPINTS] __cpuinitdata;
+__u32 cpu_caps_set[NCAPINTS] __cpuinitdata;
void load_percpu_segment(int cpu)
{
@@ -768,6 +770,12 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
if (this_cpu->c_identify)
this_cpu->c_identify(c);
+ /* Clear/Set all flags overriden by options, after probe */
+ for (i = 0; i < NCAPINTS; i++) {
+ c->x86_capability[i] &= ~cpu_caps_cleared[i];
+ c->x86_capability[i] |= cpu_caps_set[i];
+ }
+
#ifdef CONFIG_X86_64
c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
#endif
@@ -813,6 +821,16 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
#endif
init_hypervisor(c);
+
+ /*
+ * Clear/Set all flags overriden by options, need do it
+ * before following smp all cpus cap AND.
+ */
+ for (i = 0; i < NCAPINTS; i++) {
+ c->x86_capability[i] &= ~cpu_caps_cleared[i];
+ c->x86_capability[i] |= cpu_caps_set[i];
+ }
+
/*
* On SMP, boot_cpu_data holds the common feature set between
* all CPUs; so make sure that we indicate which features are
@@ -825,10 +843,6 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
}
- /* Clear all flags overriden by options */
- for (i = 0; i < NCAPINTS; i++)
- c->x86_capability[i] &= ~cleared_cpu_caps[i];
-
#ifdef CONFIG_X86_MCE
/* Init Machine Check Exception if available. */
mcheck_init(c);
@@ -861,6 +875,7 @@ void __init identify_boot_cpu(void)
#else
vgetcpu_set_mode();
#endif
+ init_hw_perf_counters();
}
void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/cpu_debug.c b/arch/x86/kernel/cpu/cpu_debug.c
index 46e29ab96c6..6b2a52dd040 100644
--- a/arch/x86/kernel/cpu/cpu_debug.c
+++ b/arch/x86/kernel/cpu/cpu_debug.c
@@ -32,9 +32,7 @@
static DEFINE_PER_CPU(struct cpu_cpuX_base, cpu_arr[CPU_REG_ALL_BIT]);
static DEFINE_PER_CPU(struct cpu_private *, priv_arr[MAX_CPU_FILES]);
-static DEFINE_PER_CPU(unsigned, cpu_modelflag);
static DEFINE_PER_CPU(int, cpu_priv_count);
-static DEFINE_PER_CPU(unsigned, cpu_model);
static DEFINE_MUTEX(cpu_debug_lock);
@@ -80,302 +78,102 @@ static struct cpu_file_base cpu_file[] = {
{ "value", CPU_REG_ALL, 1 },
};
-/* Intel Registers Range */
-static struct cpu_debug_range cpu_intel_range[] = {
- { 0x00000000, 0x00000001, CPU_MC, CPU_INTEL_ALL },
- { 0x00000006, 0x00000007, CPU_MONITOR, CPU_CX_AT_XE },
- { 0x00000010, 0x00000010, CPU_TIME, CPU_INTEL_ALL },
- { 0x00000011, 0x00000013, CPU_PMC, CPU_INTEL_PENTIUM },
- { 0x00000017, 0x00000017, CPU_PLATFORM, CPU_PX_CX_AT_XE },
- { 0x0000001B, 0x0000001B, CPU_APIC, CPU_P6_CX_AT_XE },
-
- { 0x0000002A, 0x0000002A, CPU_POWERON, CPU_PX_CX_AT_XE },
- { 0x0000002B, 0x0000002B, CPU_POWERON, CPU_INTEL_XEON },
- { 0x0000002C, 0x0000002C, CPU_FREQ, CPU_INTEL_XEON },
- { 0x0000003A, 0x0000003A, CPU_CONTROL, CPU_CX_AT_XE },
-
- { 0x00000040, 0x00000043, CPU_LBRANCH, CPU_PM_CX_AT_XE },
- { 0x00000044, 0x00000047, CPU_LBRANCH, CPU_PM_CO_AT },
- { 0x00000060, 0x00000063, CPU_LBRANCH, CPU_C2_AT },
- { 0x00000064, 0x00000067, CPU_LBRANCH, CPU_INTEL_ATOM },
-
- { 0x00000079, 0x00000079, CPU_BIOS, CPU_P6_CX_AT_XE },
- { 0x00000088, 0x0000008A, CPU_CACHE, CPU_INTEL_P6 },
- { 0x0000008B, 0x0000008B, CPU_BIOS, CPU_P6_CX_AT_XE },
- { 0x0000009B, 0x0000009B, CPU_MONITOR, CPU_INTEL_XEON },
-
- { 0x000000C1, 0x000000C2, CPU_PMC, CPU_P6_CX_AT },
- { 0x000000CD, 0x000000CD, CPU_FREQ, CPU_CX_AT },
- { 0x000000E7, 0x000000E8, CPU_PERF, CPU_CX_AT },
- { 0x000000FE, 0x000000FE, CPU_MTRR, CPU_P6_CX_XE },
-
- { 0x00000116, 0x00000116, CPU_CACHE, CPU_INTEL_P6 },
- { 0x00000118, 0x00000118, CPU_CACHE, CPU_INTEL_P6 },
- { 0x00000119, 0x00000119, CPU_CACHE, CPU_INTEL_PX },
- { 0x0000011A, 0x0000011B, CPU_CACHE, CPU_INTEL_P6 },
- { 0x0000011E, 0x0000011E, CPU_CACHE, CPU_PX_CX_AT },
-
- { 0x00000174, 0x00000176, CPU_SYSENTER, CPU_P6_CX_AT_XE },
- { 0x00000179, 0x0000017A, CPU_MC, CPU_PX_CX_AT_XE },
- { 0x0000017B, 0x0000017B, CPU_MC, CPU_P6_XE },
- { 0x00000186, 0x00000187, CPU_PMC, CPU_P6_CX_AT },
- { 0x00000198, 0x00000199, CPU_PERF, CPU_PM_CX_AT_XE },
- { 0x0000019A, 0x0000019A, CPU_TIME, CPU_PM_CX_AT_XE },
- { 0x0000019B, 0x0000019D, CPU_THERM, CPU_PM_CX_AT_XE },
- { 0x000001A0, 0x000001A0, CPU_MISC, CPU_PM_CX_AT_XE },
-
- { 0x000001C9, 0x000001C9, CPU_LBRANCH, CPU_PM_CX_AT },
- { 0x000001D7, 0x000001D8, CPU_LBRANCH, CPU_INTEL_XEON },
- { 0x000001D9, 0x000001D9, CPU_DEBUG, CPU_CX_AT_XE },
- { 0x000001DA, 0x000001DA, CPU_LBRANCH, CPU_INTEL_XEON },
- { 0x000001DB, 0x000001DB, CPU_LBRANCH, CPU_P6_XE },
- { 0x000001DC, 0x000001DC, CPU_LBRANCH, CPU_INTEL_P6 },
- { 0x000001DD, 0x000001DE, CPU_LBRANCH, CPU_PX_CX_AT_XE },
- { 0x000001E0, 0x000001E0, CPU_LBRANCH, CPU_INTEL_P6 },
-
- { 0x00000200, 0x0000020F, CPU_MTRR, CPU_P6_CX_XE },
- { 0x00000250, 0x00000250, CPU_MTRR, CPU_P6_CX_XE },
- { 0x00000258, 0x00000259, CPU_MTRR, CPU_P6_CX_XE },
- { 0x00000268, 0x0000026F, CPU_MTRR, CPU_P6_CX_XE },
- { 0x00000277, 0x00000277, CPU_PAT, CPU_C2_AT_XE },
- { 0x000002FF, 0x000002FF, CPU_MTRR, CPU_P6_CX_XE },
-
- { 0x00000300, 0x00000308, CPU_PMC, CPU_INTEL_XEON },
- { 0x00000309, 0x0000030B, CPU_PMC, CPU_C2_AT_XE },
- { 0x0000030C, 0x00000311, CPU_PMC, CPU_INTEL_XEON },
- { 0x00000345, 0x00000345, CPU_PMC, CPU_C2_AT },
- { 0x00000360, 0x00000371, CPU_PMC, CPU_INTEL_XEON },
- { 0x0000038D, 0x00000390, CPU_PMC, CPU_C2_AT },
- { 0x000003A0, 0x000003BE, CPU_PMC, CPU_INTEL_XEON },
- { 0x000003C0, 0x000003CD, CPU_PMC, CPU_INTEL_XEON },
- { 0x000003E0, 0x000003E1, CPU_PMC, CPU_INTEL_XEON },
- { 0x000003F0, 0x000003F0, CPU_PMC, CPU_INTEL_XEON },
- { 0x000003F1, 0x000003F1, CPU_PMC, CPU_C2_AT_XE },
- { 0x000003F2, 0x000003F2, CPU_PMC, CPU_INTEL_XEON },
-
- { 0x00000400, 0x00000402, CPU_MC, CPU_PM_CX_AT_XE },
- { 0x00000403, 0x00000403, CPU_MC, CPU_INTEL_XEON },
- { 0x00000404, 0x00000406, CPU_MC, CPU_PM_CX_AT_XE },
- { 0x00000407, 0x00000407, CPU_MC, CPU_INTEL_XEON },
- { 0x00000408, 0x0000040A, CPU_MC, CPU_PM_CX_AT_XE },
- { 0x0000040B, 0x0000040B, CPU_MC, CPU_INTEL_XEON },
- { 0x0000040C, 0x0000040E, CPU_MC, CPU_PM_CX_XE },
- { 0x0000040F, 0x0000040F, CPU_MC, CPU_INTEL_XEON },
- { 0x00000410, 0x00000412, CPU_MC, CPU_PM_CX_AT_XE },
- { 0x00000413, 0x00000417, CPU_MC, CPU_CX_AT_XE },
- { 0x00000480, 0x0000048B, CPU_VMX, CPU_CX_AT_XE },
-
- { 0x00000600, 0x00000600, CPU_DEBUG, CPU_PM_CX_AT_XE },
- { 0x00000680, 0x0000068F, CPU_LBRANCH, CPU_INTEL_XEON },
- { 0x000006C0, 0x000006CF, CPU_LBRANCH, CPU_INTEL_XEON },
-
- { 0x000107CC, 0x000107D3, CPU_PMC, CPU_INTEL_XEON_MP },
-
- { 0xC0000080, 0xC0000080, CPU_FEATURES, CPU_INTEL_XEON },
- { 0xC0000081, 0xC0000082, CPU_CALL, CPU_INTEL_XEON },
- { 0xC0000084, 0xC0000084, CPU_CALL, CPU_INTEL_XEON },
- { 0xC0000100, 0xC0000102, CPU_BASE, CPU_INTEL_XEON },
+/* CPU Registers Range */
+static struct cpu_debug_range cpu_reg_range[] = {
+ { 0x00000000, 0x00000001, CPU_MC, },
+ { 0x00000006, 0x00000007, CPU_MONITOR, },
+ { 0x00000010, 0x00000010, CPU_TIME, },
+ { 0x00000011, 0x00000013, CPU_PMC, },
+ { 0x00000017, 0x00000017, CPU_PLATFORM, },
+ { 0x0000001B, 0x0000001B, CPU_APIC, },
+ { 0x0000002A, 0x0000002B, CPU_POWERON, },
+ { 0x0000002C, 0x0000002C, CPU_FREQ, },
+ { 0x0000003A, 0x0000003A, CPU_CONTROL, },
+ { 0x00000040, 0x00000047, CPU_LBRANCH, },
+ { 0x00000060, 0x00000067, CPU_LBRANCH, },
+ { 0x00000079, 0x00000079, CPU_BIOS, },
+ { 0x00000088, 0x0000008A, CPU_CACHE, },
+ { 0x0000008B, 0x0000008B, CPU_BIOS, },
+ { 0x0000009B, 0x0000009B, CPU_MONITOR, },
+ { 0x000000C1, 0x000000C4, CPU_PMC, },
+ { 0x000000CD, 0x000000CD, CPU_FREQ, },
+ { 0x000000E7, 0x000000E8, CPU_PERF, },
+ { 0x000000FE, 0x000000FE, CPU_MTRR, },
+
+ { 0x00000116, 0x0000011E, CPU_CACHE, },
+ { 0x00000174, 0x00000176, CPU_SYSENTER, },
+ { 0x00000179, 0x0000017B, CPU_MC, },
+ { 0x00000186, 0x00000189, CPU_PMC, },
+ { 0x00000198, 0x00000199, CPU_PERF, },
+ { 0x0000019A, 0x0000019A, CPU_TIME, },
+ { 0x0000019B, 0x0000019D, CPU_THERM, },
+ { 0x000001A0, 0x000001A0, CPU_MISC, },
+ { 0x000001C9, 0x000001C9, CPU_LBRANCH, },
+ { 0x000001D7, 0x000001D8, CPU_LBRANCH, },
+ { 0x000001D9, 0x000001D9, CPU_DEBUG, },
+ { 0x000001DA, 0x000001E0, CPU_LBRANCH, },
+
+ { 0x00000200, 0x0000020F, CPU_MTRR, },
+ { 0x00000250, 0x00000250, CPU_MTRR, },
+ { 0x00000258, 0x00000259, CPU_MTRR, },
+ { 0x00000268, 0x0000026F, CPU_MTRR, },
+ { 0x00000277, 0x00000277, CPU_PAT, },
+ { 0x000002FF, 0x000002FF, CPU_MTRR, },
+
+ { 0x00000300, 0x00000311, CPU_PMC, },
+ { 0x00000345, 0x00000345, CPU_PMC, },
+ { 0x00000360, 0x00000371, CPU_PMC, },
+ { 0x0000038D, 0x00000390, CPU_PMC, },
+ { 0x000003A0, 0x000003BE, CPU_PMC, },
+ { 0x000003C0, 0x000003CD, CPU_PMC, },
+ { 0x000003E0, 0x000003E1, CPU_PMC, },
+ { 0x000003F0, 0x000003F2, CPU_PMC, },
+
+ { 0x00000400, 0x00000417, CPU_MC, },
+ { 0x00000480, 0x0000048B, CPU_VMX, },
+
+ { 0x00000600, 0x00000600, CPU_DEBUG, },
+ { 0x00000680, 0x0000068F, CPU_LBRANCH, },
+ { 0x000006C0, 0x000006CF, CPU_LBRANCH, },
+
+ { 0x000107CC, 0x000107D3, CPU_PMC, },
+
+ { 0xC0000080, 0xC0000080, CPU_FEATURES, },
+ { 0xC0000081, 0xC0000084, CPU_CALL, },
+ { 0xC0000100, 0xC0000102, CPU_BASE, },
+ { 0xC0000103, 0xC0000103, CPU_TIME, },
+
+ { 0xC0010000, 0xC0010007, CPU_PMC, },
+ { 0xC0010010, 0xC0010010, CPU_CONF, },
+ { 0xC0010015, 0xC0010015, CPU_CONF, },
+ { 0xC0010016, 0xC001001A, CPU_MTRR, },
+ { 0xC001001D, 0xC001001D, CPU_MTRR, },
+ { 0xC001001F, 0xC001001F, CPU_CONF, },
+ { 0xC0010030, 0xC0010035, CPU_BIOS, },
+ { 0xC0010044, 0xC0010048, CPU_MC, },
+ { 0xC0010050, 0xC0010056, CPU_SMM, },
+ { 0xC0010058, 0xC0010058, CPU_CONF, },
+ { 0xC0010060, 0xC0010060, CPU_CACHE, },
+ { 0xC0010061, 0xC0010068, CPU_SMM, },
+ { 0xC0010069, 0xC001006B, CPU_SMM, },
+ { 0xC0010070, 0xC0010071, CPU_SMM, },
+ { 0xC0010111, 0xC0010113, CPU_SMM, },
+ { 0xC0010114, 0xC0010118, CPU_SVM, },
+ { 0xC0010140, 0xC0010141, CPU_OSVM, },
+ { 0xC0011022, 0xC0011023, CPU_CONF, },
};
-/* AMD Registers Range */
-static struct cpu_debug_range cpu_amd_range[] = {
- { 0x00000000, 0x00000001, CPU_MC, CPU_K10_PLUS, },
- { 0x00000010, 0x00000010, CPU_TIME, CPU_K8_PLUS, },
- { 0x0000001B, 0x0000001B, CPU_APIC, CPU_K8_PLUS, },
- { 0x0000002A, 0x0000002A, CPU_POWERON, CPU_K7_PLUS },
- { 0x0000008B, 0x0000008B, CPU_VER, CPU_K8_PLUS },
- { 0x000000FE, 0x000000FE, CPU_MTRR, CPU_K8_PLUS, },
-
- { 0x00000174, 0x00000176, CPU_SYSENTER, CPU_K8_PLUS, },
- { 0x00000179, 0x0000017B, CPU_MC, CPU_K8_PLUS, },
- { 0x000001D9, 0x000001D9, CPU_DEBUG, CPU_K8_PLUS, },
- { 0x000001DB, 0x000001DE, CPU_LBRANCH, CPU_K8_PLUS, },
-
- { 0x00000200, 0x0000020F, CPU_MTRR, CPU_K8_PLUS, },
- { 0x00000250, 0x00000250, CPU_MTRR, CPU_K8_PLUS, },
- { 0x00000258, 0x00000259, CPU_MTRR, CPU_K8_PLUS, },
- { 0x00000268, 0x0000026F, CPU_MTRR, CPU_K8_PLUS, },
- { 0x00000277, 0x00000277, CPU_PAT, CPU_K8_PLUS, },
- { 0x000002FF, 0x000002FF, CPU_MTRR, CPU_K8_PLUS, },
-
- { 0x00000400, 0x00000413, CPU_MC, CPU_K8_PLUS, },
-
- { 0xC0000080, 0xC0000080, CPU_FEATURES, CPU_AMD_ALL, },
- { 0xC0000081, 0xC0000084, CPU_CALL, CPU_K8_PLUS, },
- { 0xC0000100, 0xC0000102, CPU_BASE, CPU_K8_PLUS, },
- { 0xC0000103, 0xC0000103, CPU_TIME, CPU_K10_PLUS, },
-
- { 0xC0010000, 0xC0010007, CPU_PMC, CPU_K8_PLUS, },
- { 0xC0010010, 0xC0010010, CPU_CONF, CPU_K7_PLUS, },
- { 0xC0010015, 0xC0010015, CPU_CONF, CPU_K7_PLUS, },
- { 0xC0010016, 0xC001001A, CPU_MTRR, CPU_K8_PLUS, },
- { 0xC001001D, 0xC001001D, CPU_MTRR, CPU_K8_PLUS, },
- { 0xC001001F, 0xC001001F, CPU_CONF, CPU_K8_PLUS, },
- { 0xC0010030, 0xC0010035, CPU_BIOS, CPU_K8_PLUS, },
- { 0xC0010044, 0xC0010048, CPU_MC, CPU_K8_PLUS, },
- { 0xC0010050, 0xC0010056, CPU_SMM, CPU_K0F_PLUS, },
- { 0xC0010058, 0xC0010058, CPU_CONF, CPU_K10_PLUS, },
- { 0xC0010060, 0xC0010060, CPU_CACHE, CPU_AMD_11, },
- { 0xC0010061, 0xC0010068, CPU_SMM, CPU_K10_PLUS, },
- { 0xC0010069, 0xC001006B, CPU_SMM, CPU_AMD_11, },
- { 0xC0010070, 0xC0010071, CPU_SMM, CPU_K10_PLUS, },
- { 0xC0010111, 0xC0010113, CPU_SMM, CPU_K8_PLUS, },
- { 0xC0010114, 0xC0010118, CPU_SVM, CPU_K10_PLUS, },
- { 0xC0010140, 0xC0010141, CPU_OSVM, CPU_K10_PLUS, },
- { 0xC0011022, 0xC0011023, CPU_CONF, CPU_K10_PLUS, },
-};
-
-
-/* Intel */
-static int get_intel_modelflag(unsigned model)
-{
- int flag;
-
- switch (model) {
- case 0x0501:
- case 0x0502:
- case 0x0504:
- flag = CPU_INTEL_PENTIUM;
- break;
- case 0x0601:
- case 0x0603:
- case 0x0605:
- case 0x0607:
- case 0x0608:
- case 0x060A:
- case 0x060B:
- flag = CPU_INTEL_P6;
- break;
- case 0x0609:
- case 0x060D:
- flag = CPU_INTEL_PENTIUM_M;
- break;
- case 0x060E:
- flag = CPU_INTEL_CORE;
- break;
- case 0x060F:
- case 0x0617:
- flag = CPU_INTEL_CORE2;
- break;
- case 0x061C:
- flag = CPU_INTEL_ATOM;
- break;
- case 0x0F00:
- case 0x0F01:
- case 0x0F02:
- case 0x0F03:
- case 0x0F04:
- flag = CPU_INTEL_XEON_P4;
- break;
- case 0x0F06:
- flag = CPU_INTEL_XEON_MP;
- break;
- default:
- flag = CPU_NONE;
- break;
- }
-
- return flag;
-}
-
-/* AMD */
-static int get_amd_modelflag(unsigned model)
-{
- int flag;
-
- switch (model >> 8) {
- case 0x6:
- flag = CPU_AMD_K6;
- break;
- case 0x7:
- flag = CPU_AMD_K7;
- break;
- case 0x8:
- flag = CPU_AMD_K8;
- break;
- case 0xf:
- flag = CPU_AMD_0F;
- break;
- case 0x10:
- flag = CPU_AMD_10;
- break;
- case 0x11:
- flag = CPU_AMD_11;
- break;
- default:
- flag = CPU_NONE;
- break;
- }
-
- return flag;
-}
-
-static int get_cpu_modelflag(unsigned cpu)
-{
- int flag;
-
- flag = per_cpu(cpu_model, cpu);
-
- switch (flag >> 16) {
- case X86_VENDOR_INTEL:
- flag = get_intel_modelflag(flag);
- break;
- case X86_VENDOR_AMD:
- flag = get_amd_modelflag(flag & 0xffff);
- break;
- default:
- flag = CPU_NONE;
- break;
- }
-
- return flag;
-}
-
-static int get_cpu_range_count(unsigned cpu)
-{
- int index;
-
- switch (per_cpu(cpu_model, cpu) >> 16) {
- case X86_VENDOR_INTEL:
- index = ARRAY_SIZE(cpu_intel_range);
- break;
- case X86_VENDOR_AMD:
- index = ARRAY_SIZE(cpu_amd_range);
- break;
- default:
- index = 0;
- break;
- }
-
- return index;
-}
-
static int is_typeflag_valid(unsigned cpu, unsigned flag)
{
- unsigned vendor, modelflag;
- int i, index;
+ int i;
/* Standard Registers should be always valid */
if (flag >= CPU_TSS)
return 1;
- modelflag = per_cpu(cpu_modelflag, cpu);
- vendor = per_cpu(cpu_model, cpu) >> 16;
- index = get_cpu_range_count(cpu);
-
- for (i = 0; i < index; i++) {
- switch (vendor) {
- case X86_VENDOR_INTEL:
- if ((cpu_intel_range[i].model & modelflag) &&
- (cpu_intel_range[i].flag & flag))
- return 1;
- break;
- case X86_VENDOR_AMD:
- if ((cpu_amd_range[i].model & modelflag) &&
- (cpu_amd_range[i].flag & flag))
- return 1;
- break;
- }
+ for (i = 0; i < ARRAY_SIZE(cpu_reg_range); i++) {
+ if (cpu_reg_range[i].flag == flag)
+ return 1;
}
/* Invalid */
@@ -385,26 +183,11 @@ static int is_typeflag_valid(unsigned cpu, unsigned flag)
static unsigned get_cpu_range(unsigned cpu, unsigned *min, unsigned *max,
int index, unsigned flag)
{
- unsigned modelflag;
-
- modelflag = per_cpu(cpu_modelflag, cpu);
- *max = 0;
- switch (per_cpu(cpu_model, cpu) >> 16) {
- case X86_VENDOR_INTEL:
- if ((cpu_intel_range[index].model & modelflag) &&
- (cpu_intel_range[index].flag & flag)) {
- *min = cpu_intel_range[index].min;
- *max = cpu_intel_range[index].max;
- }
- break;
- case X86_VENDOR_AMD:
- if ((cpu_amd_range[index].model & modelflag) &&
- (cpu_amd_range[index].flag & flag)) {
- *min = cpu_amd_range[index].min;
- *max = cpu_amd_range[index].max;
- }
- break;
- }
+ if (cpu_reg_range[index].flag == flag) {
+ *min = cpu_reg_range[index].min;
+ *max = cpu_reg_range[index].max;
+ } else
+ *max = 0;
return *max;
}
@@ -434,7 +217,7 @@ static void print_msr(struct seq_file *seq, unsigned cpu, unsigned flag)
unsigned msr, msr_min, msr_max;
struct cpu_private *priv;
u32 low, high;
- int i, range;
+ int i;
if (seq) {
priv = seq->private;
@@ -446,9 +229,7 @@ static void print_msr(struct seq_file *seq, unsigned cpu, unsigned flag)
}
}
- range = get_cpu_range_count(cpu);
-
- for (i = 0; i < range; i++) {
+ for (i = 0; i < ARRAY_SIZE(cpu_reg_range); i++) {
if (!get_cpu_range(cpu, &msr_min, &msr_max, i, flag))
continue;
@@ -588,8 +369,20 @@ static void print_apic(void *arg)
seq_printf(seq, " TMICT\t\t: %08x\n", apic_read(APIC_TMICT));
seq_printf(seq, " TMCCT\t\t: %08x\n", apic_read(APIC_TMCCT));
seq_printf(seq, " TDCR\t\t: %08x\n", apic_read(APIC_TDCR));
-#endif /* CONFIG_X86_LOCAL_APIC */
+ if (boot_cpu_has(X86_FEATURE_EXTAPIC)) {
+ unsigned int i, v, maxeilvt;
+
+ v = apic_read(APIC_EFEAT);
+ maxeilvt = (v >> 16) & 0xff;
+ seq_printf(seq, " EFEAT\t\t: %08x\n", v);
+ seq_printf(seq, " ECTRL\t\t: %08x\n", apic_read(APIC_ECTRL));
+ for (i = 0; i < maxeilvt; i++) {
+ v = apic_read(APIC_EILVTn(i));
+ seq_printf(seq, " EILVT%d\t\t: %08x\n", i, v);
+ }
+ }
+#endif /* CONFIG_X86_LOCAL_APIC */
seq_printf(seq, "\n MSR\t:\n");
}
@@ -788,13 +581,11 @@ static int cpu_init_msr(unsigned cpu, unsigned type, struct dentry *dentry)
{
struct dentry *cpu_dentry = NULL;
unsigned reg, reg_min, reg_max;
- int i, range, err = 0;
+ int i, err = 0;
char reg_dir[12];
u32 low, high;
- range = get_cpu_range_count(cpu);
-
- for (i = 0; i < range; i++) {
+ for (i = 0; i < ARRAY_SIZE(cpu_reg_range); i++) {
if (!get_cpu_range(cpu, &reg_min, &reg_max, i,
cpu_base[type].flag))
continue;
@@ -850,10 +641,6 @@ static int cpu_init_cpu(void)
cpui = &cpu_data(cpu);
if (!cpu_has(cpui, X86_FEATURE_MSR))
continue;
- per_cpu(cpu_model, cpu) = ((cpui->x86_vendor << 16) |
- (cpui->x86 << 8) |
- (cpui->x86_model));
- per_cpu(cpu_modelflag, cpu) = get_cpu_modelflag(cpu);
sprintf(cpu_dir, "cpu%d", cpu);
cpu_dentry = debugfs_create_dir(cpu_dir, cpu_debugfs_dir);
diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig
index 52c83987547..f138c6c389b 100644
--- a/arch/x86/kernel/cpu/cpufreq/Kconfig
+++ b/arch/x86/kernel/cpu/cpufreq/Kconfig
@@ -220,11 +220,14 @@ config X86_LONGHAUL
If in doubt, say N.
config X86_E_POWERSAVER
- tristate "VIA C7 Enhanced PowerSaver"
+ tristate "VIA C7 Enhanced PowerSaver (DANGEROUS)"
select CPU_FREQ_TABLE
- depends on X86_32
+ depends on X86_32 && EXPERIMENTAL
help
- This adds the CPUFreq driver for VIA C7 processors.
+ This adds the CPUFreq driver for VIA C7 processors. However, this driver
+ does not have any safeguards to prevent operating the CPU out of spec
+ and is thus considered dangerous. Please use the regular ACPI cpufreq
+ driver, enabled by CONFIG_X86_ACPI_CPUFREQ.
If in doubt, say N.
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 752e8c6b2c7..ae9b503220c 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -90,11 +90,7 @@ static int check_est_cpu(unsigned int cpuid)
{
struct cpuinfo_x86 *cpu = &cpu_data(cpuid);
- if (cpu->x86_vendor != X86_VENDOR_INTEL ||
- !cpu_has(cpu, X86_FEATURE_EST))
- return 0;
-
- return 1;
+ return cpu_has(cpu, X86_FEATURE_EST);
}
static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data)
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 7437fa133c0..daed39ba261 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -229,12 +229,12 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
}
#endif
-static void __cpuinit srat_detect_node(void)
+static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
{
#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
unsigned node;
int cpu = smp_processor_id();
- int apicid = hard_smp_processor_id();
+ int apicid = cpu_has_apic ? hard_smp_processor_id() : c->apicid;
/* Don't do the funky fallback heuristics the AMD version employs
for now. */
@@ -400,7 +400,7 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
}
/* Work around errata */
- srat_detect_node();
+ srat_detect_node(c);
if (cpu_has(c, X86_FEATURE_VMX))
detect_vmx_virtcap(c);
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 483eda96e10..789efe217e1 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -17,6 +17,7 @@
#include <asm/processor.h>
#include <asm/smp.h>
+#include <asm/k8.h>
#define LVL_1_INST 1
#define LVL_1_DATA 2
@@ -159,14 +160,6 @@ struct _cpuid4_info_regs {
unsigned long can_disable;
};
-#if defined(CONFIG_PCI) && defined(CONFIG_SYSFS)
-static struct pci_device_id k8_nb_id[] = {
- { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) },
- { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) },
- {}
-};
-#endif
-
unsigned short num_cache_leaves;
/* AMD doesn't have CPUID4. Emulate it here to report the same
@@ -207,10 +200,17 @@ union l3_cache {
};
static const unsigned short __cpuinitconst assocs[] = {
- [1] = 1, [2] = 2, [4] = 4, [6] = 8,
- [8] = 16, [0xa] = 32, [0xb] = 48,
+ [1] = 1,
+ [2] = 2,
+ [4] = 4,
+ [6] = 8,
+ [8] = 16,
+ [0xa] = 32,
+ [0xb] = 48,
[0xc] = 64,
- [0xf] = 0xffff // ??
+ [0xd] = 96,
+ [0xe] = 128,
+ [0xf] = 0xffff /* fully associative - no way to show this currently */
};
static const unsigned char __cpuinitconst levels[] = { 1, 1, 2, 3 };
@@ -271,7 +271,8 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
eax->split.type = types[leaf];
eax->split.level = levels[leaf];
if (leaf == 3)
- eax->split.num_threads_sharing = current_cpu_data.x86_max_cores - 1;
+ eax->split.num_threads_sharing =
+ current_cpu_data.x86_max_cores - 1;
else
eax->split.num_threads_sharing = 0;
eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1;
@@ -291,6 +292,14 @@ amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf)
{
if (index < 3)
return;
+
+ if (boot_cpu_data.x86 == 0x11)
+ return;
+
+ /* see erratum #382 */
+ if ((boot_cpu_data.x86 == 0x10) && (boot_cpu_data.x86_model < 0x8))
+ return;
+
this_leaf->can_disable = 1;
}
@@ -696,97 +705,75 @@ static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf)
#define to_object(k) container_of(k, struct _index_kobject, kobj)
#define to_attr(a) container_of(a, struct _cache_attr, attr)
-#ifdef CONFIG_PCI
-static struct pci_dev *get_k8_northbridge(int node)
-{
- struct pci_dev *dev = NULL;
- int i;
-
- for (i = 0; i <= node; i++) {
- do {
- dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev);
- if (!dev)
- break;
- } while (!pci_match_id(&k8_nb_id[0], dev));
- if (!dev)
- break;
- }
- return dev;
-}
-#else
-static struct pci_dev *get_k8_northbridge(int node)
-{
- return NULL;
-}
-#endif
-
-static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf)
+static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
+ unsigned int index)
{
- const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map);
- int node = cpu_to_node(cpumask_first(mask));
- struct pci_dev *dev = NULL;
- ssize_t ret = 0;
- int i;
+ int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
+ int node = cpu_to_node(cpu);
+ struct pci_dev *dev = node_to_k8_nb_misc(node);
+ unsigned int reg = 0;
if (!this_leaf->can_disable)
- return sprintf(buf, "Feature not enabled\n");
-
- dev = get_k8_northbridge(node);
- if (!dev) {
- printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n");
return -EINVAL;
- }
- for (i = 0; i < 2; i++) {
- unsigned int reg;
+ if (!dev)
+ return -EINVAL;
- pci_read_config_dword(dev, 0x1BC + i * 4, &reg);
+ pci_read_config_dword(dev, 0x1BC + index * 4, &reg);
+ return sprintf(buf, "%x\n", reg);
+}
- ret += sprintf(buf, "%sEntry: %d\n", buf, i);
- ret += sprintf(buf, "%sReads: %s\tNew Entries: %s\n",
- buf,
- reg & 0x80000000 ? "Disabled" : "Allowed",
- reg & 0x40000000 ? "Disabled" : "Allowed");
- ret += sprintf(buf, "%sSubCache: %x\tIndex: %x\n",
- buf, (reg & 0x30000) >> 16, reg & 0xfff);
- }
- return ret;
+#define SHOW_CACHE_DISABLE(index) \
+static ssize_t \
+show_cache_disable_##index(struct _cpuid4_info *this_leaf, char *buf) \
+{ \
+ return show_cache_disable(this_leaf, buf, index); \
}
+SHOW_CACHE_DISABLE(0)
+SHOW_CACHE_DISABLE(1)
-static ssize_t
-store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf,
- size_t count)
+static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
+ const char *buf, size_t count, unsigned int index)
{
- const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map);
- int node = cpu_to_node(cpumask_first(mask));
- struct pci_dev *dev = NULL;
- unsigned int ret, index, val;
+ int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
+ int node = cpu_to_node(cpu);
+ struct pci_dev *dev = node_to_k8_nb_misc(node);
+ unsigned long val = 0;
+ unsigned int scrubber = 0;
if (!this_leaf->can_disable)
- return 0;
-
- if (strlen(buf) > 15)
return -EINVAL;
- ret = sscanf(buf, "%x %x", &index, &val);
- if (ret != 2)
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (!dev)
return -EINVAL;
- if (index > 1)
+
+ if (strict_strtoul(buf, 10, &val) < 0)
return -EINVAL;
val |= 0xc0000000;
- dev = get_k8_northbridge(node);
- if (!dev) {
- printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n");
- return -EINVAL;
- }
+
+ pci_read_config_dword(dev, 0x58, &scrubber);
+ scrubber &= ~0x1f000000;
+ pci_write_config_dword(dev, 0x58, scrubber);
pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000);
wbinvd();
pci_write_config_dword(dev, 0x1BC + index * 4, val);
+ return count;
+}
- return 1;
+#define STORE_CACHE_DISABLE(index) \
+static ssize_t \
+store_cache_disable_##index(struct _cpuid4_info *this_leaf, \
+ const char *buf, size_t count) \
+{ \
+ return store_cache_disable(this_leaf, buf, count, index); \
}
+STORE_CACHE_DISABLE(0)
+STORE_CACHE_DISABLE(1)
struct _cache_attr {
struct attribute attr;
@@ -808,7 +795,10 @@ define_one_ro(size);
define_one_ro(shared_cpu_map);
define_one_ro(shared_cpu_list);
-static struct _cache_attr cache_disable = __ATTR(cache_disable, 0644, show_cache_disable, store_cache_disable);
+static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644,
+ show_cache_disable_0, store_cache_disable_0);
+static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
+ show_cache_disable_1, store_cache_disable_1);
static struct attribute * default_attrs[] = {
&type.attr,
@@ -820,7 +810,8 @@ static struct attribute * default_attrs[] = {
&size.attr,
&shared_cpu_map.attr,
&shared_cpu_list.attr,
- &cache_disable.attr,
+ &cache_disable_0.attr,
+ &cache_disable_1.attr,
NULL
};
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
index cef3ee30744..65a0fceedcd 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
@@ -15,7 +15,6 @@
#include <asm/hw_irq.h>
#include <asm/idle.h>
#include <asm/therm_throt.h>
-#include <asm/apic.h>
asmlinkage void smp_thermal_interrupt(void)
{
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index ce0fe4b5c04..1d584a18a50 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -808,7 +808,7 @@ int __init mtrr_cleanup(unsigned address_bits)
if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1)
return 0;
- rdmsr(MTRRdefType_MSR, def, dummy);
+ rdmsr(MSR_MTRRdefType, def, dummy);
def &= 0xff;
if (def != MTRR_TYPE_UNCACHABLE)
return 0;
@@ -1003,7 +1003,7 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
*/
if (!is_cpu(INTEL) || disable_mtrr_trim)
return 0;
- rdmsr(MTRRdefType_MSR, def, dummy);
+ rdmsr(MSR_MTRRdefType, def, dummy);
def &= 0xff;
if (def != MTRR_TYPE_UNCACHABLE)
return 0;
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index d21d4fb161f..0543f69f0b2 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -20,9 +20,9 @@ struct fixed_range_block {
};
static struct fixed_range_block fixed_range_blocks[] = {
- { MTRRfix64K_00000_MSR, 1 }, /* one 64k MTRR */
- { MTRRfix16K_80000_MSR, 2 }, /* two 16k MTRRs */
- { MTRRfix4K_C0000_MSR, 8 }, /* eight 4k MTRRs */
+ { MSR_MTRRfix64K_00000, 1 }, /* one 64k MTRR */
+ { MSR_MTRRfix16K_80000, 2 }, /* two 16k MTRRs */
+ { MSR_MTRRfix4K_C0000, 8 }, /* eight 4k MTRRs */
{}
};
@@ -194,12 +194,12 @@ get_fixed_ranges(mtrr_type * frs)
k8_check_syscfg_dram_mod_en();
- rdmsr(MTRRfix64K_00000_MSR, p[0], p[1]);
+ rdmsr(MSR_MTRRfix64K_00000, p[0], p[1]);
for (i = 0; i < 2; i++)
- rdmsr(MTRRfix16K_80000_MSR + i, p[2 + i * 2], p[3 + i * 2]);
+ rdmsr(MSR_MTRRfix16K_80000 + i, p[2 + i * 2], p[3 + i * 2]);
for (i = 0; i < 8; i++)
- rdmsr(MTRRfix4K_C0000_MSR + i, p[6 + i * 2], p[7 + i * 2]);
+ rdmsr(MSR_MTRRfix4K_C0000 + i, p[6 + i * 2], p[7 + i * 2]);
}
void mtrr_save_fixed_ranges(void *info)
@@ -310,7 +310,7 @@ void __init get_mtrr_state(void)
vrs = mtrr_state.var_ranges;
- rdmsr(MTRRcap_MSR, lo, dummy);
+ rdmsr(MSR_MTRRcap, lo, dummy);
mtrr_state.have_fixed = (lo >> 8) & 1;
for (i = 0; i < num_var_ranges; i++)
@@ -318,7 +318,7 @@ void __init get_mtrr_state(void)
if (mtrr_state.have_fixed)
get_fixed_ranges(mtrr_state.fixed_ranges);
- rdmsr(MTRRdefType_MSR, lo, dummy);
+ rdmsr(MSR_MTRRdefType, lo, dummy);
mtrr_state.def_type = (lo & 0xff);
mtrr_state.enabled = (lo & 0xc00) >> 10;
@@ -583,10 +583,10 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
__flush_tlb();
/* Save MTRR state */
- rdmsr(MTRRdefType_MSR, deftype_lo, deftype_hi);
+ rdmsr(MSR_MTRRdefType, deftype_lo, deftype_hi);
/* Disable MTRRs, and set the default type to uncached */
- mtrr_wrmsr(MTRRdefType_MSR, deftype_lo & ~0xcff, deftype_hi);
+ mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & ~0xcff, deftype_hi);
}
static void post_set(void) __releases(set_atomicity_lock)
@@ -595,7 +595,7 @@ static void post_set(void) __releases(set_atomicity_lock)
__flush_tlb();
/* Intel (P6) standard MTRRs */
- mtrr_wrmsr(MTRRdefType_MSR, deftype_lo, deftype_hi);
+ mtrr_wrmsr(MSR_MTRRdefType, deftype_lo, deftype_hi);
/* Enable caches */
write_cr0(read_cr0() & 0xbfffffff);
@@ -707,7 +707,7 @@ int generic_validate_add_page(unsigned long base, unsigned long size, unsigned i
static int generic_have_wrcomb(void)
{
unsigned long config, dummy;
- rdmsr(MTRRcap_MSR, config, dummy);
+ rdmsr(MSR_MTRRcap, config, dummy);
return (config & (1 << 10));
}
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 03cda01f57c..8fc248b5aea 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -104,7 +104,7 @@ static void __init set_num_var_ranges(void)
unsigned long config = 0, dummy;
if (use_intel()) {
- rdmsr(MTRRcap_MSR, config, dummy);
+ rdmsr(MSR_MTRRcap, config, dummy);
} else if (is_cpu(AMD))
config = 2;
else if (is_cpu(CYRIX) || is_cpu(CENTAUR))
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index 77f67f7b347..7538b767f20 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -5,21 +5,6 @@
#include <linux/types.h>
#include <linux/stddef.h>
-#define MTRRcap_MSR 0x0fe
-#define MTRRdefType_MSR 0x2ff
-
-#define MTRRfix64K_00000_MSR 0x250
-#define MTRRfix16K_80000_MSR 0x258
-#define MTRRfix16K_A0000_MSR 0x259
-#define MTRRfix4K_C0000_MSR 0x268
-#define MTRRfix4K_C8000_MSR 0x269
-#define MTRRfix4K_D0000_MSR 0x26a
-#define MTRRfix4K_D8000_MSR 0x26b
-#define MTRRfix4K_E0000_MSR 0x26c
-#define MTRRfix4K_E8000_MSR 0x26d
-#define MTRRfix4K_F0000_MSR 0x26e
-#define MTRRfix4K_F8000_MSR 0x26f
-
#define MTRR_CHANGE_MASK_FIXED 0x01
#define MTRR_CHANGE_MASK_VARIABLE 0x02
#define MTRR_CHANGE_MASK_DEFTYPE 0x04
diff --git a/arch/x86/kernel/cpu/mtrr/state.c b/arch/x86/kernel/cpu/mtrr/state.c
index 7f7e2753685..1f5fb1588d1 100644
--- a/arch/x86/kernel/cpu/mtrr/state.c
+++ b/arch/x86/kernel/cpu/mtrr/state.c
@@ -35,7 +35,7 @@ void set_mtrr_prepare_save(struct set_mtrr_context *ctxt)
if (use_intel())
/* Save MTRR state */
- rdmsr(MTRRdefType_MSR, ctxt->deftype_lo, ctxt->deftype_hi);
+ rdmsr(MSR_MTRRdefType, ctxt->deftype_lo, ctxt->deftype_hi);
else
/* Cyrix ARRs - everything else were excluded at the top */
ctxt->ccr3 = getCx86(CX86_CCR3);
@@ -46,7 +46,7 @@ void set_mtrr_cache_disable(struct set_mtrr_context *ctxt)
{
if (use_intel())
/* Disable MTRRs, and set the default type to uncached */
- mtrr_wrmsr(MTRRdefType_MSR, ctxt->deftype_lo & 0xf300UL,
+ mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo & 0xf300UL,
ctxt->deftype_hi);
else if (is_cpu(CYRIX))
/* Cyrix ARRs - everything else were excluded at the top */
@@ -64,7 +64,7 @@ void set_mtrr_done(struct set_mtrr_context *ctxt)
/* Restore MTRRdefType */
if (use_intel())
/* Intel (P6) standard MTRRs */
- mtrr_wrmsr(MTRRdefType_MSR, ctxt->deftype_lo, ctxt->deftype_hi);
+ mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo, ctxt->deftype_hi);
else
/* Cyrix ARRs - everything else was excluded at the top */
setCx86(CX86_CCR3, ctxt->ccr3);
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
new file mode 100644
index 00000000000..895c82e7845
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -0,0 +1,1704 @@
+/*
+ * Performance counter x86 architecture code
+ *
+ * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
+ * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
+ * Copyright (C) 2009 Jaswinder Singh Rajput
+ * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
+ * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *
+ * For licencing details see kernel-base/COPYING
+ */
+
+#include <linux/perf_counter.h>
+#include <linux/capability.h>
+#include <linux/notifier.h>
+#include <linux/hardirq.h>
+#include <linux/kprobes.h>
+#include <linux/module.h>
+#include <linux/kdebug.h>
+#include <linux/sched.h>
+#include <linux/uaccess.h>
+
+#include <asm/apic.h>
+#include <asm/stacktrace.h>
+#include <asm/nmi.h>
+
+static u64 perf_counter_mask __read_mostly;
+
+struct cpu_hw_counters {
+ struct perf_counter *counters[X86_PMC_IDX_MAX];
+ unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ unsigned long interrupts;
+ int enabled;
+};
+
+/*
+ * struct x86_pmu - generic x86 pmu
+ */
+struct x86_pmu {
+ const char *name;
+ int version;
+ int (*handle_irq)(struct pt_regs *);
+ void (*disable_all)(void);
+ void (*enable_all)(void);
+ void (*enable)(struct hw_perf_counter *, int);
+ void (*disable)(struct hw_perf_counter *, int);
+ unsigned eventsel;
+ unsigned perfctr;
+ u64 (*event_map)(int);
+ u64 (*raw_event)(u64);
+ int max_events;
+ int num_counters;
+ int num_counters_fixed;
+ int counter_bits;
+ u64 counter_mask;
+ u64 max_period;
+ u64 intel_ctrl;
+};
+
+static struct x86_pmu x86_pmu __read_mostly;
+
+static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = {
+ .enabled = 1,
+};
+
+/*
+ * Intel PerfMon v3. Used on Core2 and later.
+ */
+static const u64 intel_perfmon_event_map[] =
+{
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x003c,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e,
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x412e,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
+ [PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
+};
+
+static u64 intel_pmu_event_map(int event)
+{
+ return intel_perfmon_event_map[event];
+}
+
+/*
+ * Generalized hw caching related event table, filled
+ * in on a per model basis. A value of 0 means
+ * 'not supported', -1 means 'event makes no sense on
+ * this CPU', any other value means the raw event
+ * ID.
+ */
+
+#define C(x) PERF_COUNT_HW_CACHE_##x
+
+static u64 __read_mostly hw_cache_event_ids
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX];
+
+static const u64 nehalem_hw_cache_event_ids
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */
+ [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */
+ [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */
+ [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */
+ },
+ },
+ [ C(L1I ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
+ [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x0,
+ },
+ },
+ [ C(LL ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */
+ [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */
+ [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */
+ [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */
+ },
+ },
+ [ C(DTLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
+ [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
+ [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x0,
+ },
+ },
+ [ C(ITLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */
+ [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(BPU ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
+ [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+};
+
+static const u64 core2_hw_cache_event_ids
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */
+ [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */
+ [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(L1I ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */
+ [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(LL ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */
+ [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */
+ [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(DTLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
+ [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
+ [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(ITLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
+ [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(BPU ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
+ [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+};
+
+static const u64 atom_hw_cache_event_ids
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(L1I ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
+ [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(LL ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */
+ [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */
+ [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(DTLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */
+ [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */
+ [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(ITLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
+ [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(BPU ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
+ [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+};
+
+static u64 intel_pmu_raw_event(u64 event)
+{
+#define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL
+#define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL
+#define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL
+#define CORE_EVNTSEL_INV_MASK 0x00800000ULL
+#define CORE_EVNTSEL_COUNTER_MASK 0xFF000000ULL
+
+#define CORE_EVNTSEL_MASK \
+ (CORE_EVNTSEL_EVENT_MASK | \
+ CORE_EVNTSEL_UNIT_MASK | \
+ CORE_EVNTSEL_EDGE_MASK | \
+ CORE_EVNTSEL_INV_MASK | \
+ CORE_EVNTSEL_COUNTER_MASK)
+
+ return event & CORE_EVNTSEL_MASK;
+}
+
+static const u64 amd_0f_hw_cache_event_ids
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(L1I ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */
+ [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(LL ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(DTLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(ITLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */
+ [ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(BPU ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */
+ [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+};
+
+/*
+ * AMD Performance Monitor K7 and later.
+ */
+static const u64 amd_perfmon_event_map[] =
+{
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080,
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x0081,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
+};
+
+static u64 amd_pmu_event_map(int event)
+{
+ return amd_perfmon_event_map[event];
+}
+
+static u64 amd_pmu_raw_event(u64 event)
+{
+#define K7_EVNTSEL_EVENT_MASK 0x7000000FFULL
+#define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL
+#define K7_EVNTSEL_EDGE_MASK 0x000040000ULL
+#define K7_EVNTSEL_INV_MASK 0x000800000ULL
+#define K7_EVNTSEL_COUNTER_MASK 0x0FF000000ULL
+
+#define K7_EVNTSEL_MASK \
+ (K7_EVNTSEL_EVENT_MASK | \
+ K7_EVNTSEL_UNIT_MASK | \
+ K7_EVNTSEL_EDGE_MASK | \
+ K7_EVNTSEL_INV_MASK | \
+ K7_EVNTSEL_COUNTER_MASK)
+
+ return event & K7_EVNTSEL_MASK;
+}
+
+/*
+ * Propagate counter elapsed time into the generic counter.
+ * Can only be executed on the CPU where the counter is active.
+ * Returns the delta events processed.
+ */
+static u64
+x86_perf_counter_update(struct perf_counter *counter,
+ struct hw_perf_counter *hwc, int idx)
+{
+ int shift = 64 - x86_pmu.counter_bits;