From fb3bbd6a663fe972611676381adc4c60ddfe61ac Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 22 May 2008 18:22:30 -0700 Subject: x86: fix APIC warning on 32bit v2 for http://bugzilla.kernel.org/show_bug.cgi?id=10613 BIOS bug, APIC version is 0 for CPU#0! fixing up to 0x10. (tell your hw vendor) v2: fix 64 bit compilation Signed-off-by: Yinghai Lu Cc: Andrew Morton Cc: "Rafael J. Wysocki" Cc: Gabriel C Signed-off-by: Thomas Gleixner --- arch/x86/kernel/acpi/boot.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index c49ebcc6c41..33c5216fd3e 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -242,12 +242,19 @@ static int __init acpi_parse_madt(struct acpi_table_header *table) static void __cpuinit acpi_register_lapic(int id, u8 enabled) { + unsigned int ver = 0; + if (!enabled) { ++disabled_cpus; return; } - generic_processor_info(id, 0); +#ifdef CONFIG_X86_32 + if (boot_cpu_physical_apicid != -1U) + ver = apic_version[boot_cpu_physical_apicid]; +#endif + + generic_processor_info(id, ver); } static int __init @@ -767,8 +774,13 @@ static void __init acpi_register_lapic_address(unsigned long address) mp_lapic_addr = address; set_fixmap_nocache(FIX_APIC_BASE, address); - if (boot_cpu_physical_apicid == -1U) + if (boot_cpu_physical_apicid == -1U) { boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); +#ifdef CONFIG_X86_32 + apic_version[boot_cpu_physical_apicid] = + GET_APIC_VERSION(apic_read(APIC_LVR)); +#endif + } } static int __init early_acpi_parse_madt_lapic_addr_ovr(void) -- cgit v1.2.3 From deef325086c3897393b8f7d6bccd03405244fe18 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 15:44:38 +0200 Subject: x86: disable preemption in native_smp_prepare_cpus Priit Laes reported the following warning: Call Trace: [] warn_on_slowpath+0x51/0x63 [] sys_ioctl+0x2d/0x5d [] _spin_lock+0xe/0x24 [] task_rq_lock+0x3d/0x73 [] set_cpu_sibling_map+0x336/0x350 [] read_apic_id+0x30/0x62 [] verify_local_APIC+0x90/0x138 [] native_smp_prepare_cpus+0x1f9/0x305 [] kernel_init+0x59/0x2d9 [] _spin_unlock_irq+0x11/0x2b [] child_rip+0xa/0x12 [] kernel_init+0x0/0x2d9 [] child_rip+0x0/0x12 fix this by generally disabling preemption in native_smp_prepare_cpus(). Reported-and-bisected-by: Priit Laes Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/smpboot.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 38988491c62..56078d61c79 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1190,6 +1190,7 @@ static void __init smp_cpu_index_default(void) */ void __init native_smp_prepare_cpus(unsigned int max_cpus) { + preempt_disable(); nmi_watchdog_default(); smp_cpu_index_default(); current_cpu_data = boot_cpu_data; @@ -1206,7 +1207,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) if (smp_sanity_check(max_cpus) < 0) { printk(KERN_INFO "SMP disabled\n"); disable_smp(); - return; + goto out; } preempt_disable(); @@ -1246,6 +1247,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) printk(KERN_INFO "CPU%d: ", 0); print_cpu_info(&cpu_data(0)); setup_boot_clock(); +out: + preempt_enable(); } /* * Early setup to make printk work. -- cgit v1.2.3 From 5c1ea08215f1f830dfaf4819a5f22efca41c3832 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sun, 25 May 2008 11:13:32 -0400 Subject: x86: enable preemption in delay The RT team has been searching for a nasty latency. This latency shows up out of the blue and has been seen to be as big as 5ms! Using ftrace I found the cause of the latency. pcscd-2995 3dNh1 52360300us : irq_exit (smp_apic_timer_interrupt) pcscd-2995 3dN.2 52360301us : idle_cpu (irq_exit) pcscd-2995 3dN.2 52360301us : rcu_irq_exit (irq_exit) pcscd-2995 3dN.1 52360771us : smp_apic_timer_interrupt (apic_timer_interrupt ) pcscd-2995 3dN.1 52360771us : exit_idle (smp_apic_timer_interrupt) Here's an example of a 400 us latency. pcscd took a timer interrupt and returned with "need resched" enabled, but did not reschedule until after the next interrupt came in at 52360771us 400us later! At first I thought we somehow missed a preemption check in entry.S. But I also noticed that this always seemed to happen during a __delay call. pcscd-2995 3dN.2 52360836us : rcu_irq_exit (irq_exit) pcscd-2995 3.N.. 52361265us : preempt_schedule (__delay) Looking at the x86 delay, I found my problem. In git commit 35d5d08a085c56f153458c3f5d8ce24123617faf, Andrew Morton placed preempt_disable around the entire delay due to TSC's not working nicely on SMP. Unfortunately for those that care about latencies this is devastating! Especially when we have callers to mdelay(8). Here I enable preemption during the loop and account for anytime the task migrates to a new CPU. The delay asked for may be extended a bit by the migration, but delay only guarantees that it will delay for that minimum time. Delaying longer should not be an issue. [ Thanks to Thomas Gleixner for spotting that cpu wasn't updated, and to place the rep_nop between preempt_enabled/disable. ] Signed-off-by: Steven Rostedt Cc: akpm@osdl.org Cc: Clark Williams Cc: Peter Zijlstra Cc: "Luis Claudio R. Goncalves" Cc: Gregory Haskins Cc: Linus Torvalds Cc: Andi Kleen Signed-off-by: Thomas Gleixner --- arch/x86/lib/delay_32.c | 31 +++++++++++++++++++++++++++---- arch/x86/lib/delay_64.c | 30 ++++++++++++++++++++++++++---- 2 files changed, 53 insertions(+), 8 deletions(-) diff --git a/arch/x86/lib/delay_32.c b/arch/x86/lib/delay_32.c index 4535e6d147a..d710f2d167b 100644 --- a/arch/x86/lib/delay_32.c +++ b/arch/x86/lib/delay_32.c @@ -44,13 +44,36 @@ static void delay_loop(unsigned long loops) static void delay_tsc(unsigned long loops) { unsigned long bclock, now; + int cpu; - preempt_disable(); /* TSC's are per-cpu */ + preempt_disable(); + cpu = smp_processor_id(); rdtscl(bclock); - do { - rep_nop(); + for (;;) { rdtscl(now); - } while ((now-bclock) < loops); + if ((now - bclock) >= loops) + break; + + /* Allow RT tasks to run */ + preempt_enable(); + rep_nop(); + preempt_disable(); + + /* + * It is possible that we moved to another CPU, and + * since TSC's are per-cpu we need to calculate + * that. The delay must guarantee that we wait "at + * least" the amount of time. Being moved to another + * CPU could make the wait longer but we just need to + * make sure we waited long enough. Rebalance the + * counter for this CPU. + */ + if (unlikely(cpu != smp_processor_id())) { + loops -= (now - bclock); + cpu = smp_processor_id(); + rdtscl(bclock); + } + } preempt_enable(); } diff --git a/arch/x86/lib/delay_64.c b/arch/x86/lib/delay_64.c index bbc61051851..4c441be9264 100644 --- a/arch/x86/lib/delay_64.c +++ b/arch/x86/lib/delay_64.c @@ -31,14 +31,36 @@ int __devinit read_current_timer(unsigned long *timer_value) void __delay(unsigned long loops) { unsigned bclock, now; + int cpu; - preempt_disable(); /* TSC's are pre-cpu */ + preempt_disable(); + cpu = smp_processor_id(); rdtscl(bclock); - do { - rep_nop(); + for (;;) { rdtscl(now); + if ((now - bclock) >= loops) + break; + + /* Allow RT tasks to run */ + preempt_enable(); + rep_nop(); + preempt_disable(); + + /* + * It is possible that we moved to another CPU, and + * since TSC's are per-cpu we need to calculate + * that. The delay must guarantee that we wait "at + * least" the amount of time. Being moved to another + * CPU could make the wait longer but we just need to + * make sure we waited long enough. Rebalance the + * counter for this CPU. + */ + if (unlikely(cpu != smp_processor_id())) { + loops -= (now - bclock); + cpu = smp_processor_id(); + rdtscl(bclock); + } } - while ((now-bclock) < loops); preempt_enable(); } EXPORT_SYMBOL(__delay); -- cgit v1.2.3 From e8a496ac8cd00cabbdaa373db4818a9ad19a1c5a Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Fri, 23 May 2008 16:26:37 -0700 Subject: x86: fix broken math-emu with lazy allocation of fpu area Fix the math emulation that got broken with the recent lazy allocation of FPU area. init_fpu() need to be added for the math-emulation path aswell for the FPU area allocation. math emulation enabled kernel booted fine with this, in the presence of "no387 nofxsr" boot param. Signed-off-by: Suresh Siddha Cc: hpa@zytor.com Cc: mingo@elte.hu Signed-off-by: Thomas Gleixner --- arch/x86/kernel/i387.c | 44 ++++++++++++++++++++++++++++--------------- arch/x86/math-emu/fpu_entry.c | 13 ++++++++----- include/asm-x86/i387.h | 2 ++ 3 files changed, 39 insertions(+), 20 deletions(-) diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index e03cc952f23..eb9ddd8efb8 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -56,6 +56,11 @@ void __cpuinit mxcsr_feature_mask_init(void) void __init init_thread_xstate(void) { + if (!HAVE_HWFP) { + xstate_size = sizeof(struct i387_soft_struct); + return; + } + if (cpu_has_fxsr) xstate_size = sizeof(struct i387_fxsave_struct); #ifdef CONFIG_X86_32 @@ -94,7 +99,7 @@ void __cpuinit fpu_init(void) int init_fpu(struct task_struct *tsk) { if (tsk_used_math(tsk)) { - if (tsk == current) + if (HAVE_HWFP && tsk == current) unlazy_fpu(tsk); return 0; } @@ -109,6 +114,15 @@ int init_fpu(struct task_struct *tsk) return -ENOMEM; } +#ifdef CONFIG_X86_32 + if (!HAVE_HWFP) { + memset(tsk->thread.xstate, 0, xstate_size); + finit(); + set_stopped_child_used_math(tsk); + return 0; + } +#endif + if (cpu_has_fxsr) { struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave; @@ -330,13 +344,13 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset, struct user_i387_ia32_struct env; int ret; - if (!HAVE_HWFP) - return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); - ret = init_fpu(target); if (ret) return ret; + if (!HAVE_HWFP) + return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); + if (!cpu_has_fxsr) { return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &target->thread.xstate->fsave, 0, @@ -360,15 +374,15 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, struct user_i387_ia32_struct env; int ret; - if (!HAVE_HWFP) - return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); - ret = init_fpu(target); if (ret) return ret; set_stopped_child_used_math(target); + if (!HAVE_HWFP) + return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); + if (!cpu_has_fxsr) { return user_regset_copyin(&pos, &count, &kbuf, &ubuf, &target->thread.xstate->fsave, 0, -1); @@ -474,18 +488,18 @@ static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf) int restore_i387_ia32(struct _fpstate_ia32 __user *buf) { int err; + struct task_struct *tsk = current; - if (HAVE_HWFP) { - struct task_struct *tsk = current; - + if (HAVE_HWFP) clear_fpu(tsk); - if (!used_math()) { - err = init_fpu(tsk); - if (err) - return err; - } + if (!used_math()) { + err = init_fpu(tsk); + if (err) + return err; + } + if (HAVE_HWFP) { if (cpu_has_fxsr) err = restore_i387_fxsave(buf); else diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c index 6e38d877ea7..c7b06feb139 100644 --- a/arch/x86/math-emu/fpu_entry.c +++ b/arch/x86/math-emu/fpu_entry.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "fpu_system.h" #include "fpu_emu.h" @@ -146,6 +147,13 @@ asmlinkage void math_emulate(long arg) unsigned long code_limit = 0; /* Initialized to stop compiler warnings */ struct desc_struct code_descriptor; + if (!used_math()) { + if (init_fpu(current)) { + do_group_exit(SIGKILL); + return; + } + } + #ifdef RE_ENTRANT_CHECKING if (emulating) { printk("ERROR: wm-FPU-emu is not RE-ENTRANT!\n"); @@ -153,11 +161,6 @@ asmlinkage void math_emulate(long arg) RE_ENTRANT_CHECK_ON; #endif /* RE_ENTRANT_CHECKING */ - if (!used_math()) { - finit(); - set_used_math(); - } - SETUP_DATA_AREA(arg); FPU_ORIG_EIP = FPU_EIP; diff --git a/include/asm-x86/i387.h b/include/asm-x86/i387.h index 6b722d31593..37672f79dcc 100644 --- a/include/asm-x86/i387.h +++ b/include/asm-x86/i387.h @@ -193,6 +193,8 @@ static inline int restore_i387(struct _fpstate __user *buf) #else /* CONFIG_X86_32 */ +extern void finit(void); + static inline void tolerant_fwait(void) { asm volatile("fnclex ; fwait"); -- cgit v1.2.3 From 226e9a93a253b7d8811b5ed9ac671c6c5a728022 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 May 2008 09:56:49 +0200 Subject: x86: ioremap fix failing nesting check Mika Kukkonen noticed that the nesting check in early_iounmap() is not actually done. Reported-by: Mika Kukkonen Signed-off-by: Ingo Molnar Cc: torvalds@linux-foundation.org Cc: arjan@linux.intel.com Cc: mikukkon@iki.fi Signed-off-by: Thomas Gleixner --- arch/x86/mm/ioremap.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 71bb3159031..2b2bb3f9b68 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -593,10 +593,11 @@ void __init early_iounmap(void *addr, unsigned long size) unsigned long offset; unsigned int nrpages; enum fixed_addresses idx; - unsigned int nesting; + int nesting; nesting = --early_ioremap_nested; - WARN_ON(nesting < 0); + if (WARN_ON(nesting < 0)) + return; if (early_ioremap_debug) { printk(KERN_INFO "early_iounmap(%p, %08lx) [%d]\n", addr, -- cgit v1.2.3 From 2884f110d5409714f3a04eeb6d2ecd77da66b242 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 28 May 2008 19:36:07 +0100 Subject: x86: fix bad pmd ffff810000207xxx(9090909090909090) OGAWA Hirofumi and Fede have reported rare pmd_ERROR messages: mm/memory.c:127: bad pmd ffff810000207xxx(9090909090909090). Initialization's cleanup_highmap was leaving alignment filler behind in the pmd for MODULES_VADDR: when vmalloc's guard page would occupy a new page table, it's not allocated, and then module unload's vfree hits the bad 9090 pmd entry left over. Signed-off-by: Hugh Dickins Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 32ba13b0f81..998a06ea5f7 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -206,7 +206,7 @@ void __init cleanup_highmap(void) pmd_t *last_pmd = pmd + PTRS_PER_PMD; for (; pmd < last_pmd; pmd++, vaddr += PMD_SIZE) { - if (!pmd_present(*pmd)) + if (pmd_none(*pmd)) continue; if (vaddr < (unsigned long) _text || vaddr > end) set_pmd(pmd, __pmd(0)); -- cgit v1.2.3 From 511631011d39706ac81ee5e4c9084d61e5b4fd34 Mon Sep 17 00:00:00 2001 From: Kevin Winchester Date: Thu, 29 May 2008 21:14:35 -0300 Subject: x86: fix pointer type warning in arch/x86/mm/init_64.c:early_memtest Changed the call to find_e820_area_size to pass u64 instead of unsigned long. Signed-off-by: Kevin Winchester Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 998a06ea5f7..156e6d7b0e3 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -506,7 +506,7 @@ early_param("memtest", parse_memtest); static void __init early_memtest(unsigned long start, unsigned long end) { - unsigned long t_start, t_size; + u64 t_start, t_size; unsigned pattern; if (!memtest_pattern) @@ -525,7 +525,7 @@ static void __init early_memtest(unsigned long start, unsigned long end) if (t_start + t_size > end) t_size = end - t_start; - printk(KERN_CONT "\n %016lx - %016lx pattern %d", + printk(KERN_CONT "\n %016llx - %016llx pattern %d", t_start, t_start + t_size, pattern); memtest(t_start, t_size, pattern); -- cgit v1.2.3 From 282c454cd3a7041f59a37112bb2f82263bc38f6c Mon Sep 17 00:00:00 2001 From: Venki Pallipadi Date: Thu, 29 May 2008 12:01:44 -0700 Subject: x86: fix Xorg crash with xf86MapVidMem error Clarify the usage of mtrr_lookup() in PAT code, and to make PAT code resilient to mtrr lookup problems. Specifically, pat_x_mtrr_type() is restructured to highlight, under what conditions we look for mtrr hint. pat_x_mtrr_type() uses a default type when there are any errors in mtrr lookup (still maintaining the pat consistency). And, reserve_memtype() highlights its usage ot mtrr_lookup for request type of '-1' and also defaults in a sane way on any mtrr lookup failure. pat.c looks at mtrr type of a range to get a hint on what mapping type to request when user/API: (1) hasn't specified any type (/dev/mem mapping) and we do not want to take performance hit by always mapping UC_MINUS. This will be the case for /dev/mem mappings used to map BIOS area or ACPI region which are WB'able. In this case, as long as MTRR is not WB, PAT will request UC_MINUS for such mappings. (2) user/API requests WB mapping while in reality MTRR may have UC or WC. In this case, PAT can map as WB (without checking MTRR) and still effective type will be UC or WC. But, a subsequent request to map same region as UC or WC may fail, as the region will get trackked as WB in PAT list. Looking at MTRR hint helps us to track based on effective type rather than what user requested. Again, here mtrr_lookup is only used as hint and we fallback to WB mapping (as requested by user) as default. In both cases, after using the mtrr hint, we still go through the memtype list to make sure there are no inconsistencies among multiple users. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Tested-by: Rufus & Azrael Signed-off-by: Ingo Molnar --- arch/x86/mm/pat.c | 49 ++++++++++++++++++++++++------------------------- 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index de3a9981245..183fdd36d3b 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -151,32 +151,33 @@ static int pat_x_mtrr_type(u64 start, u64 end, unsigned long prot, unsigned long pat_type; u8 mtrr_type; - mtrr_type = mtrr_type_lookup(start, end); - if (mtrr_type == 0xFF) { /* MTRR not enabled */ - *ret_prot = prot; - return 0; - } - if (mtrr_type == 0xFE) { /* MTRR match error */ - *ret_prot = _PAGE_CACHE_UC; - return -1; - } - if (mtrr_type != MTRR_TYPE_UNCACHABLE && - mtrr_type != MTRR_TYPE_WRBACK && - mtrr_type != MTRR_TYPE_WRCOMB) { /* MTRR type unhandled */ - *ret_prot = _PAGE_CACHE_UC; - return -1; - } - pat_type = prot & _PAGE_CACHE_MASK; prot &= (~_PAGE_CACHE_MASK); - /* Currently doing intersection by hand. Optimize it later. */ + /* + * We return the PAT request directly for types where PAT takes + * precedence with respect to MTRR and for UC_MINUS. + * Consistency checks with other PAT requests is done later + * while going through memtype list. + */ if (pat_type == _PAGE_CACHE_WC) { *ret_prot = prot | _PAGE_CACHE_WC; + return 0; } else if (pat_type == _PAGE_CACHE_UC_MINUS) { *ret_prot = prot | _PAGE_CACHE_UC_MINUS; - } else if (pat_type == _PAGE_CACHE_UC || - mtrr_type == MTRR_TYPE_UNCACHABLE) { + return 0; + } else if (pat_type == _PAGE_CACHE_UC) { + *ret_prot = prot | _PAGE_CACHE_UC; + return 0; + } + + /* + * Look for MTRR hint to get the effective type in case where PAT + * request is for WB. + */ + mtrr_type = mtrr_type_lookup(start, end); + + if (mtrr_type == MTRR_TYPE_UNCACHABLE) { *ret_prot = prot | _PAGE_CACHE_UC; } else if (mtrr_type == MTRR_TYPE_WRCOMB) { *ret_prot = prot | _PAGE_CACHE_WC; @@ -233,14 +234,12 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, if (req_type == -1) { /* - * Special case where caller wants to inherit from mtrr or - * existing pat mapping, defaulting to UC_MINUS in case of - * no match. + * Call mtrr_lookup to get the type hint. This is an + * optimization for /dev/mem mmap'ers into WB memory (BIOS + * tools and ACPI tools). Use WB request for WB memory and use + * UC_MINUS otherwise. */ u8 mtrr_type = mtrr_type_lookup(start, end); - if (mtrr_type == 0xFE) { /* MTRR match error */ - err = -1; - } if (mtrr_type == MTRR_TYPE_WRBACK) { req_type = _PAGE_CACHE_WB; -- cgit v1.2.3 From be524fb96081e9e511d993ebf39b05a32b19476e Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 29 May 2008 00:01:28 -0700 Subject: x86: section mismatch fix Fix this: WARNING: vmlinux.o(.text+0x114bb): Section mismatch in reference from the function nopat() to the function .cpuinit.text:pat_disable() The function nopat() references the function __cpuinit pat_disable(). This is often because nopat lacks a __cpuinit annotation or the annotation of pat_disable is wrong. Reported-by: "Fabio Comolli" Cc: Sam Ravnborg Signed-off-by: Ingo Molnar --- arch/x86/mm/pat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 183fdd36d3b..06b7a1c90fb 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -34,7 +34,7 @@ void __cpuinit pat_disable(char *reason) printk(KERN_INFO "%s\n", reason); } -static int nopat(char *str) +static int __init nopat(char *str) { pat_disable("PAT support disabled."); return 0; -- cgit v1.2.3 From cd76374e9de4501acc74f833dc6cb5e7a5dca115 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Thu, 29 May 2008 00:30:21 -0700 Subject: suspend-vs-iommu: prevent suspend if we could not resume iommu/gart support misses suspend/resume code, which can do bad stuff, including memory corruption on resume. Prevent system suspend in case we would be unable to resume. Signed-off-by: Pavel Machek Tested-by: Patrick Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index c07455d1695..aa8ec928caa 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -548,6 +549,28 @@ static __init unsigned read_aperture(struct pci_dev *dev, u32 *size) return aper_base; } +static int gart_resume(struct sys_device *dev) +{ + return 0; +} + +static int gart_suspend(struct sys_device *dev, pm_message_t state) +{ + return -EINVAL; +} + +static struct sysdev_class gart_sysdev_class = { + .name = "gart", + .suspend = gart_suspend, + .resume = gart_resume, + +}; + +static struct sys_device device_gart = { + .id = 0, + .cls = &gart_sysdev_class, +}; + /* * Private Northbridge GATT initialization in case we cannot use the * AGP driver for some reason. @@ -558,7 +581,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info) unsigned aper_base, new_aper_base; struct pci_dev *dev; void *gatt; - int i; + int i, error; printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); aper_size = aper_base = info->aper_size = 0; @@ -606,6 +629,12 @@ static __init int init_k8_gatt(struct agp_kern_info *info) pci_write_config_dword(dev, 0x90, ctl); } + + error = sysdev_class_register(&gart_sysdev_class); + if (!error) + error = sysdev_register(&device_gart); + if (error) + panic("Could not register gart_sysdev -- would corrupt data on next suspend"); flush_gart(); printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n", -- cgit v1.2.3 From 870568b39064cab2dd971fe57969916036982862 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 2 Jun 2008 15:57:27 -0700 Subject: x86, fpu: fix CONFIG_PREEMPT=y corruption of application's FPU stack MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Jürgen Mell reported an FPU state corruption bug under CONFIG_PREEMPT, and bisected it to commit v2.6.19-1363-gacc2076, "i386: add sleazy FPU optimization". Add tsk_used_math() checks to prevent calling math_state_restore() which can sleep in the case of !tsk_used_math(). This prevents making a blocking call in __switch_to(). Apparently "fpu_counter > 5" check is not enough, as in some signal handling and fork/exec scenarios, fpu_counter > 5 and !tsk_used_math() is possible. It's a side effect though. This is the failing scenario: process 'A' in save_i387_ia32() just after clear_used_math() Got an interrupt and pre-empted out. At the next context switch to process 'A' again, kernel tries to restore the math state proactively and sees a fpu_counter > 0 and !tsk_used_math() This results in init_fpu() during the __switch_to()'s math_state_restore() And resulting in fpu corruption which will be saved/restored (save_i387_fxsave and restore_i387_fxsave) during the remaining part of the signal handling after the context switch. Bisected-by: Jürgen Mell Signed-off-by: Suresh Siddha Tested-by: Jürgen Mell Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Cc: stable@kernel.org --- arch/x86/kernel/process_32.c | 5 ++++- arch/x86/kernel/process_64.c | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index f8476dfbb60..6d5483356e7 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -649,8 +649,11 @@ struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct /* If the task has used fpu the last 5 timeslices, just do a full * restore of the math state immediately to avoid the trap; the * chances of needing FPU soon are obviously high now + * + * tsk_used_math() checks prevent calling math_state_restore(), + * which can sleep in the case of !tsk_used_math() */ - if (next_p->fpu_counter > 5) + if (tsk_used_math(next_p) && next_p->fpu_counter > 5) math_state_restore(); /* diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index e2319f39988..ac54ff56df8 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -658,8 +658,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* If the task has used fpu the last 5 timeslices, just do a full * restore of the math state immediately to avoid the trap; the * chances of needing FPU soon are obviously high now + * + * tsk_used_math() checks prevent calling math_state_restore(), + * which can sleep in the case of !tsk_used_math() */ - if (next_p->fpu_counter>5) + if (tsk_used_math(next_p) && next_p->fpu_counter > 5) math_state_restore(); return prev_p; } -- cgit v1.2.3