From 722bc6b16771ed80871e1fd81c86d3627dda2ac8 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Mon, 5 Mar 2012 15:05:13 -0800 Subject: x86/mm: Fix the size calculation of mapping tables For machines that enable PSE, the first 2/4M memory region still uses 4K pages, so needs more PTEs in this case, but find_early_table_space() doesn't count this. This patch fixes it. The bug was found via code review, no misbehavior of the kernel was observed. Signed-off-by: WANG Cong Cc: Yinghai Lu Cc: Tejun Heo Cc: Signed-off-by: Andrew Morton Link: http://lkml.kernel.org/n/tip-kq6a00qe33h7c7ais2xsywnh@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/mm/init.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 6cabf6570d64..2e92fdcbea86 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -30,8 +30,14 @@ int direct_gbpages #endif ; -static void __init find_early_table_space(unsigned long end, int use_pse, - int use_gbpages) +struct map_range { + unsigned long start; + unsigned long end; + unsigned page_size_mask; +}; + +static void __init find_early_table_space(struct map_range *mr, unsigned long end, + int use_pse, int use_gbpages) { unsigned long puds, pmds, ptes, tables, start = 0, good_end = end; phys_addr_t base; @@ -56,6 +62,9 @@ static void __init find_early_table_space(unsigned long end, int use_pse, #ifdef CONFIG_X86_32 extra += PMD_SIZE; #endif + /* The first 2/4M doesn't use large pages. */ + extra += mr->end - mr->start; + ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; } else ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; @@ -85,12 +94,6 @@ void __init native_pagetable_reserve(u64 start, u64 end) memblock_reserve(start, end - start); } -struct map_range { - unsigned long start; - unsigned long end; - unsigned page_size_mask; -}; - #ifdef CONFIG_X86_32 #define NR_RANGE_MR 3 #else /* CONFIG_X86_64 */ @@ -262,7 +265,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, * nodes are discovered. */ if (!after_bootmem) - find_early_table_space(end, use_pse, use_gbpages); + find_early_table_space(&mr[0], end, use_pse, use_gbpages); for (i = 0; i < nr_range; i++) ret = kernel_physical_mapping_init(mr[i].start, mr[i].end, -- cgit v1.2.3 From a6fca40f1d7f3e232c9de27c1cebbb9f787fbc4f Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 22 Mar 2012 17:01:25 -0700 Subject: x86, tlb: Switch cr3 in leave_mm() only when needed Currently leave_mm() unconditionally switches the cr3 to swapper_pg_dir. But there is no need to change the cr3, if we already left that mm. intel_idle() for example calls leave_mm() on every deep c-state entry where the CPU flushes the TLB for us. Similarly flush_tlb_all() was also calling leave_mm() whenever the TLB is in LAZY state. Both these paths will be improved with this change. Signed-off-by: Suresh Siddha Link: http://lkml.kernel.org/r/1332460885.16101.147.camel@sbsiddha-desk.sc.intel.com Signed-off-by: H. Peter Anvin --- arch/x86/mm/tlb.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index d6c0418c3e47..125bcad1b757 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -61,11 +61,13 @@ static DEFINE_PER_CPU_READ_MOSTLY(int, tlb_vector_offset); */ void leave_mm(int cpu) { + struct mm_struct *active_mm = percpu_read(cpu_tlbstate.active_mm); if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) BUG(); - cpumask_clear_cpu(cpu, - mm_cpumask(percpu_read(cpu_tlbstate.active_mm))); - load_cr3(swapper_pg_dir); + if (cpumask_test_cpu(cpu, mm_cpumask(active_mm))) { + cpumask_clear_cpu(cpu, mm_cpumask(active_mm)); + load_cr3(swapper_pg_dir); + } } EXPORT_SYMBOL_GPL(leave_mm); -- cgit v1.2.3 From 20167d3421a089a1bf1bd680b150dc69c9506810 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 16 May 2012 14:06:26 +0100 Subject: x86-64: Fix accounting in kernel_physical_mapping_init() When finding a present and acceptable 2M/1G mapping, the number of pages mapped this way shouldn't be incremented (as it was already incremented when the earlier part of the mapping was established). Instead, last_map_addr needs to be updated in this case. Further, address increments were wrong in one place each in both phys_pmd_init() and phys_pud_init() (lacking the aligning down to the respective page boundary). As we're now doing the same calculation several times, fold it into a single instance using a local variable (matching how kernel_physical_mapping_init() itself does it at the PGD level). Observed during code inspection, not because of an actual problem. Signed-off-by: Jan Beulich Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/4FB3C27202000078000841A0@nat28.tlf.novell.com Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 436a0309db33..f9476a0f8cb6 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -408,12 +408,12 @@ static unsigned long __meminit phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, unsigned long page_size_mask, pgprot_t prot) { - unsigned long pages = 0; + unsigned long pages = 0, next; unsigned long last_map_addr = end; int i = pmd_index(address); - for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) { + for (; i < PTRS_PER_PMD; i++, address = next) { unsigned long pte_phys; pmd_t *pmd = pmd_page + pmd_index(address); pte_t *pte; @@ -427,6 +427,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, break; } + next = (address & PMD_MASK) + PMD_SIZE; + if (pmd_val(*pmd)) { if (!pmd_large(*pmd)) { spin_lock(&init_mm.page_table_lock); @@ -450,7 +452,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, * attributes. */ if (page_size_mask & (1 << PG_LEVEL_2M)) { - pages++; + last_map_addr = next; continue; } new_prot = pte_pgprot(pte_clrhuge(*(pte_t *)pmd)); @@ -463,7 +465,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, pfn_pte(address >> PAGE_SHIFT, __pgprot(pgprot_val(prot) | _PAGE_PSE))); spin_unlock(&init_mm.page_table_lock); - last_map_addr = (address & PMD_MASK) + PMD_SIZE; + last_map_addr = next; continue; } @@ -483,11 +485,11 @@ static unsigned long __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, unsigned long page_size_mask) { - unsigned long pages = 0; + unsigned long pages = 0, next; unsigned long last_map_addr = end; int i = pud_index(addr); - for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE) { + for (; i < PTRS_PER_PUD; i++, addr = next) { unsigned long pmd_phys; pud_t *pud = pud_page + pud_index(addr); pmd_t *pmd; @@ -496,8 +498,9 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, if (addr >= end) break; - if (!after_bootmem && - !e820_any_mapped(addr, addr+PUD_SIZE, 0)) { + next = (addr & PUD_MASK) + PUD_SIZE; + + if (!after_bootmem && !e820_any_mapped(addr, next, 0)) { set_pud(pud, __pud(0)); continue; } @@ -524,7 +527,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, * attributes. */ if (page_size_mask & (1 << PG_LEVEL_1G)) { - pages++; + last_map_addr = next; continue; } prot = pte_pgprot(pte_clrhuge(*(pte_t *)pud)); @@ -536,7 +539,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, set_pte((pte_t *)pud, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); spin_unlock(&init_mm.page_table_lock); - last_map_addr = (addr & PUD_MASK) + PUD_SIZE; + last_map_addr = next; continue; } -- cgit v1.2.3