diff options
author | Alex Shi <alex.shi@linaro.org> | 2016-06-29 10:16:07 +0800 |
---|---|---|
committer | Alex Shi <alex.shi@linaro.org> | 2016-06-29 10:16:07 +0800 |
commit | ff6d469851b0e93dacbca9b56f9d1b903180a3fd (patch) | |
tree | b638d769e1d2b5b7f7df4f43edc1da23fd0cfb8d | |
parent | 72838bb94b0ded15cadc0ecbf0c3e07428ec9320 (diff) | |
parent | 43c981bd2d8af8d91726be1fac389cdce47dce1e (diff) |
Merge branch 'linux-linaro-lsk-v4.1' into linux-linaro-lsk-v4.1-android
35 files changed, 326 insertions, 133 deletions
diff --git a/Documentation/arm/uefi.txt b/Documentation/arm/uefi.txt index d60030a1b909..7f1bed8872f3 100644 --- a/Documentation/arm/uefi.txt +++ b/Documentation/arm/uefi.txt @@ -58,7 +58,5 @@ linux,uefi-mmap-desc-size | 32-bit | Size in bytes of each entry in the UEFI -------------------------------------------------------------------------------- linux,uefi-mmap-desc-ver | 32-bit | Version of the mmap descriptor format. -------------------------------------------------------------------------------- -linux,uefi-stub-kern-ver | string | Copy of linux_banner from build. --------------------------------------------------------------------------------- For verbose debug messages, specify 'uefi_debug' on the kernel command line. diff --git a/Documentation/kasan.txt b/Documentation/kasan.txt index 4692241789b1..603e3598a208 100644 --- a/Documentation/kasan.txt +++ b/Documentation/kasan.txt @@ -29,8 +29,7 @@ latter is 1.1 - 2 times faster. Inline instrumentation requires a gcc version of 5.0 or later. Currently KASAN works only with the SLUB memory allocator. -For better bug detection and nicer report, enable CONFIG_STACKTRACE and put -at least 'slub_debug=U' in the boot cmdline. +For better bug detection and nicer reporting, enable CONFIG_STACKTRACE. To disable instrumentation for specific files or directories, add a line similar to the following to the respective kernel Makefile: diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h index 7f9edcb1a332..c991d5b4c1b2 100644 --- a/arch/arm64/include/asm/dma-mapping.h +++ b/arch/arm64/include/asm/dma-mapping.h @@ -18,7 +18,6 @@ #ifdef __KERNEL__ -#include <linux/acpi.h> #include <linux/types.h> #include <linux/vmalloc.h> @@ -28,22 +27,16 @@ #include <asm/xen/hypervisor.h> #define DMA_ERROR_CODE (~(dma_addr_t)0) -extern struct dma_map_ops *dma_ops; extern struct dma_map_ops dummy_dma_ops; static inline struct dma_map_ops *__generic_dma_ops(struct device *dev) { - if (unlikely(!dev)) - return dma_ops; - else if (dev->archdata.dma_ops) + if (dev && dev->archdata.dma_ops) return dev->archdata.dma_ops; - else if (acpi_disabled) - return dma_ops; /* - * When ACPI is enabled, if arch_set_dma_ops is not called, - * we will disable device DMA capability by setting it - * to dummy_dma_ops. + * We expect no ISA devices, and all other DMA masters are expected to + * have someone call arch_setup_dma_ops at device creation time. */ return &dummy_dma_ops; } diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 29de782126e8..3c1ba215c538 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -22,6 +22,14 @@ arm64-obj-y := debug-monitors.o entry.o irq.o fpsimd.o \ cpufeature.o alternative.o cacheinfo.o \ smp.o smp_spin_table.o topology.o smccc-call.o +stub-obj := efi-stub.o efi-entry.o +extra-y := $(stub-obj) +stub-obj := $(patsubst %.o,%.stub.o,$(stub-obj)) + +OBJCOPYFLAGS := --prefix-symbols=__efistub_ +$(obj)/%.stub.o: $(obj)/%.o FORCE + $(call if_changed,objcopy) + arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ sys_compat.o entry32.o \ ../../arm/kernel/opcodes.o @@ -34,7 +42,7 @@ arm64-obj-$(CONFIG_CPU_PM) += sleep.o suspend.o arm64-obj-$(CONFIG_CPU_IDLE) += cpuidle.o arm64-obj-$(CONFIG_JUMP_LABEL) += jump_label.o arm64-obj-$(CONFIG_KGDB) += kgdb.o -arm64-obj-$(CONFIG_EFI) += efi.o efi-stub.o efi-entry.o +arm64-obj-$(CONFIG_EFI) += efi.o $(stub-obj) arm64-obj-$(CONFIG_PCI) += pci.o arm64-obj-$(CONFIG_ARMV8_DEPRECATED) += armv8_deprecated.o arm64-obj-$(CONFIG_ACPI) += acpi.o @@ -43,7 +51,7 @@ arm64-obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o obj-y += $(arm64-obj-y) vdso/ obj-m += $(arm64-obj-m) head-y := head.o -extra-y := $(head-y) vmlinux.lds +extra-y += $(head-y) vmlinux.lds # vDSO - this must be built first to generate the symbol offsets $(call objectify,$(arm64-obj-y)): $(obj)/vdso/vdso-offsets.h diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S index 8ce9b0577442..a773db92908b 100644 --- a/arch/arm64/kernel/efi-entry.S +++ b/arch/arm64/kernel/efi-entry.S @@ -29,7 +29,7 @@ * we want to be. The kernel image wants to be placed at TEXT_OFFSET * from start of RAM. */ -ENTRY(efi_stub_entry) +ENTRY(entry) /* * Create a stack frame to save FP/LR with extra space * for image_addr variable passed to efi_entry(). @@ -86,8 +86,8 @@ ENTRY(efi_stub_entry) * entries for the VA range of the current image, so no maintenance is * necessary. */ - adr x0, efi_stub_entry - adr x1, efi_stub_entry_end + adr x0, entry + adr x1, entry_end sub x1, x1, x0 bl __flush_dcache_area @@ -120,5 +120,5 @@ efi_load_fail: ldp x29, x30, [sp], #32 ret -efi_stub_entry_end: -ENDPROC(efi_stub_entry) +entry_end: +ENDPROC(entry) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index faed4a124eb5..f484684ff44a 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -97,8 +97,8 @@ efi_head: #endif #ifdef CONFIG_EFI - .globl stext_offset - .set stext_offset, stext - efi_head + .globl __efistub_stext_offset + .set __efistub_stext_offset, stext - efi_head .align 3 pe_header: .ascii "PE" @@ -121,8 +121,8 @@ optional_header: .long _end - stext // SizeOfCode .long 0 // SizeOfInitializedData .long 0 // SizeOfUninitializedData - .long efi_stub_entry - efi_head // AddressOfEntryPoint - .long stext_offset // BaseOfCode + .long __efistub_entry - efi_head // AddressOfEntryPoint + .long __efistub_stext_offset // BaseOfCode extra_header_fields: .quad 0 // ImageBase @@ -139,7 +139,7 @@ extra_header_fields: .long _end - efi_head // SizeOfImage // Everything before the kernel image is considered part of the header - .long stext_offset // SizeOfHeaders + .long __efistub_stext_offset // SizeOfHeaders .long 0 // CheckSum .short 0xa // Subsystem (EFI application) .short 0 // DllCharacteristics @@ -184,9 +184,9 @@ section_table: .byte 0 .byte 0 // end of 0 padding of section name .long _end - stext // VirtualSize - .long stext_offset // VirtualAddress + .long __efistub_stext_offset // VirtualAddress .long _edata - stext // SizeOfRawData - .long stext_offset // PointerToRawData + .long __efistub_stext_offset // PointerToRawData .long 0 // PointerToRelocations (0 for executables) .long 0 // PointerToLineNumbers (0 for executables) diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h index 8fae0756e175..6eb8fee93321 100644 --- a/arch/arm64/kernel/image.h +++ b/arch/arm64/kernel/image.h @@ -59,4 +59,37 @@ _kernel_offset_le = DATA_LE64(TEXT_OFFSET); \ _kernel_flags_le = DATA_LE64(__HEAD_FLAGS); +#ifdef CONFIG_EFI + +/* + * The EFI stub has its own symbol namespace prefixed by __efistub_, to + * isolate it from the kernel proper. The following symbols are legally + * accessed by the stub, so provide some aliases to make them accessible. + * Only include data symbols here, or text symbols of functions that are + * guaranteed to be safe when executed at another offset than they were + * linked at. The routines below are all implemented in assembler in a + * position independent manner + */ +__efistub_memcmp = __pi_memcmp; +__efistub_memchr = __pi_memchr; +__efistub_memcpy = __pi_memcpy; +__efistub_memmove = __pi_memmove; +__efistub_memset = __pi_memset; +__efistub_strlen = __pi_strlen; +__efistub_strcmp = __pi_strcmp; +__efistub_strncmp = __pi_strncmp; +__efistub___flush_dcache_area = __pi___flush_dcache_area; + +#ifdef CONFIG_KASAN +__efistub___memcpy = __pi_memcpy; +__efistub___memmove = __pi_memmove; +__efistub___memset = __pi_memset; +#endif + +__efistub__text = _text; +__efistub__end = _end; +__efistub__edata = _edata; + +#endif + #endif /* __ASM_IMAGE_H */ diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index ccb6078ed9f2..4f2369e36094 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -47,8 +47,8 @@ int notrace unwind_frame(struct stackframe *frame) return -EINVAL; frame->sp = fp + 0x10; - frame->fp = *(unsigned long *)(fp); - frame->pc = *(unsigned long *)(fp + 8); + frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp)); + frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8)); return 0; } diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index fb81afa1f357..29f167605f9e 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -18,6 +18,7 @@ */ #include <linux/gfp.h> +#include <linux/acpi.h> #include <linux/export.h> #include <linux/slab.h> #include <linux/genalloc.h> @@ -28,9 +29,6 @@ #include <asm/cacheflush.h> -struct dma_map_ops *dma_ops; -EXPORT_SYMBOL(dma_ops); - static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot, bool coherent) { @@ -508,13 +506,7 @@ EXPORT_SYMBOL(dummy_dma_ops); static int __init arm64_dma_init(void) { - int ret; - - dma_ops = &swiotlb_dma_ops; - - ret = atomic_pool_init(); - - return ret; + return atomic_pool_init(); } arch_initcall(arm64_dma_init); @@ -981,8 +973,8 @@ static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, struct iommu_ops *iommu, bool coherent) { - if (!acpi_disabled && !dev->archdata.dma_ops) - dev->archdata.dma_ops = dma_ops; + if (!dev->archdata.dma_ops) + dev->archdata.dma_ops = &swiotlb_dma_ops; dev->archdata.dma_coherent = coherent; __iommu_setup_dma_ops(dev, dma_base, size, iommu); diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ecb966f0b340..d546087f81ba 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -264,6 +264,11 @@ config ARCH_SUPPORTS_OPTIMIZED_INLINING config ARCH_SUPPORTS_DEBUG_PAGEALLOC def_bool y +config KASAN_SHADOW_OFFSET + hex + depends on KASAN + default 0xdffffc0000000000 + config HAVE_INTEL_TXT def_bool y depends on INTEL_IOMMU && ACPI diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 3738b138b843..a7c9206b5bea 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -86,6 +86,18 @@ extern u64 asmlinkage efi_call(void *fp, ...); extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size, u32 type, u64 attribute); +#ifdef CONFIG_KASAN +/* + * CONFIG_KASAN may redefine memset to __memset. __memset function is present + * only in kernel binary. Since the EFI stub linked into a separate binary it + * doesn't have __memset(). So we should use standard memset from + * arch/x86/boot/compressed/string.c. The same applies to memcpy and memmove. + */ +#undef memcpy +#undef memset +#undef memmove +#endif + #endif /* CONFIG_X86_32 */ extern struct efi_scratch efi_scratch; diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index 28d7a857f9d1..5daeca3d0f9e 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -22,9 +22,9 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool bran ".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t" ".pushsection __jump_table, \"aw\" \n\t" _ASM_ALIGN "\n\t" - _ASM_PTR "1b, %l[l_yes], %c0 \n\t" + _ASM_PTR "1b, %l[l_yes], %c0 + %c1 \n\t" ".popsection \n\t" - : : "i" (&((char *)key)[branch]) : : l_yes); + : : "i" (key), "i" (branch) : : l_yes); return false; l_yes: @@ -38,9 +38,9 @@ static __always_inline bool arch_static_branch_jump(struct static_key *key, bool "2:\n\t" ".pushsection __jump_table, \"aw\" \n\t" _ASM_ALIGN "\n\t" - _ASM_PTR "1b, %l[l_yes], %c0 \n\t" + _ASM_PTR "1b, %l[l_yes], %c0 + %c1 \n\t" ".popsection \n\t" - : : "i" (&((char *)key)[branch]) : : l_yes); + : : "i" (key), "i" (branch) : : l_yes); return false; l_yes: diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 23ba6765b718..b3928c13f359 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -902,7 +902,7 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk); * Return saved PC of a blocked thread. * What is this good for? it will be always the scheduler or ret_from_fork. */ -#define thread_saved_pc(t) (*(unsigned long *)((t)->thread.sp - 8)) +#define thread_saved_pc(t) READ_ONCE_NOCHECK(*(unsigned long *)((t)->thread.sp - 8)) #define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1) extern unsigned long KSTK_ESP(struct task_struct *task); diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h index e4661196994e..ff8b9a17dc4b 100644 --- a/arch/x86/include/asm/string_64.h +++ b/arch/x86/include/asm/string_64.h @@ -27,12 +27,11 @@ static __always_inline void *__inline_memcpy(void *to, const void *from, size_t function. */ #define __HAVE_ARCH_MEMCPY 1 +extern void *memcpy(void *to, const void *from, size_t len); extern void *__memcpy(void *to, const void *from, size_t len); #ifndef CONFIG_KMEMCHECK -#if (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) || __GNUC__ > 4 -extern void *memcpy(void *to, const void *from, size_t len); -#else +#if (__GNUC__ == 4 && __GNUC_MINOR__ < 3) || __GNUC__ < 4 #define memcpy(dst, src, len) \ ({ \ size_t __len = (len); \ diff --git a/block/blk-core.c b/block/blk-core.c index 7e14a19c1d1d..d9406f784311 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -577,7 +577,7 @@ void blk_cleanup_queue(struct request_queue *q) q->queue_lock = &q->__queue_lock; spin_unlock_irq(lock); - bdi_destroy(&q->backing_dev_info); + bdi_unregister(&q->backing_dev_info); /* @q is and will stay empty, shutdown and put */ blk_put_queue(q); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 6264b382d4d1..145ddb6c6d31 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -502,6 +502,7 @@ static void blk_release_queue(struct kobject *kobj) struct request_queue *q = container_of(kobj, struct request_queue, kobj); + bdi_exit(&q->backing_dev_info); blkcg_exit_queue(q); if (q->elevator) { diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index 280bc0a63365..c0ac48c5d510 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -14,6 +14,8 @@ cflags-$(CONFIG_ARM64) := $(subst -pg,,$(KBUILD_CFLAGS)) cflags-$(CONFIG_ARM) := $(subst -pg,,$(KBUILD_CFLAGS)) \ -fno-builtin -fpic -mno-single-pic-base +cflags-$(CONFIG_EFI_ARMSTUB) += -I$(srctree)/scripts/dtc/libfdt + KBUILD_CFLAGS := $(cflags-y) \ $(call cc-option,-ffreestanding) \ $(call cc-option,-fno-stack-protector) @@ -22,7 +24,15 @@ GCOV_PROFILE := n KASAN_SANITIZE := n lib-y := efi-stub-helper.o -lib-$(CONFIG_EFI_ARMSTUB) += arm-stub.o fdt.o + +# include the stub's generic dependencies from lib/ when building for ARM/arm64 +arm-deps := fdt_rw.c fdt_ro.c fdt_wip.c fdt.c fdt_empty_tree.c fdt_sw.c sort.c + +$(obj)/lib-%.o: $(srctree)/lib/%.c FORCE + $(call if_changed_rule,cc_o_c) + +lib-$(CONFIG_EFI_ARMSTUB) += arm-stub.o fdt.o string.o \ + $(patsubst %.c,lib-%.o,$(arm-deps)) CFLAGS_fdt.o += -I$(srctree)/scripts/dtc/libfdt/ @@ -32,10 +42,27 @@ CFLAGS_fdt.o += -I$(srctree)/scripts/dtc/libfdt/ # So let's apply the __init annotations at the section level, by prefixing # the section names directly. This will ensure that even all the inline string # literals are covered. +# The fact that the stub and the kernel proper are essentially the same binary +# also means that we need to be extra careful to make sure that the stub does +# not rely on any absolute symbol references, considering that the virtual +# kernel mapping that the linker uses is not active yet when the stub is +# executing. So build all C dependencies of the EFI stub into libstub, and do +# a verification pass to see if any absolute relocations exist in any of the +# object files. # -extra-$(CONFIG_ARM64) := $(lib-y) -lib-$(CONFIG_ARM64) := $(patsubst %.o,%.init.o,$(lib-y)) +extra-$(CONFIG_EFI_ARMSTUB) := $(lib-y) +lib-$(CONFIG_EFI_ARMSTUB) := $(patsubst %.o,%.stub.o,$(lib-y)) + +STUBCOPY_FLAGS-y := -R .debug* -R *ksymtab* -R *kcrctab* +STUBCOPY_FLAGS-$(CONFIG_ARM64) += --prefix-alloc-sections=.init \ + --prefix-symbols=__efistub_ +STUBCOPY_RELOC-$(CONFIG_ARM64) := R_AARCH64_ABS + +$(obj)/%.stub.o: $(obj)/%.o FORCE + $(call if_changed,stubcopy) -OBJCOPYFLAGS := --prefix-alloc-sections=.init -$(obj)/%.init.o: $(obj)/%.o FORCE - $(call if_changed,objcopy) +quiet_cmd_stubcopy = STUBCPY $@ + cmd_stubcopy = if $(OBJCOPY) $(STUBCOPY_FLAGS-y) $< $@; then \ + $(OBJDUMP) -r $@ | grep $(STUBCOPY_RELOC-y) \ + && (echo >&2 "$@: absolute symbol references not allowed in the EFI stub"; \ + rm -f $@; /bin/false); else /bin/false; fi diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index e334a01cf92f..6b6548fda089 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -5,10 +5,6 @@ /* error code which can't be mistaken for valid address */ #define EFI_ERROR (~0UL) -#undef memcpy -#undef memset -#undef memmove - void efi_char16_printk(efi_system_table_t *, efi_char16_t *); efi_status_t efi_open_volume(efi_system_table_t *sys_table_arg, void *__image, diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c index ef5d764e2a27..b62e2f5dcab3 100644 --- a/drivers/firmware/efi/libstub/fdt.c +++ b/drivers/firmware/efi/libstub/fdt.c @@ -147,15 +147,6 @@ efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt, if (status) goto fdt_set_fail; - /* - * Add kernel version banner so stub/kernel match can be - * verified. - */ - status = fdt_setprop_string(fdt, node, "linux,uefi-stub-kern-ver", - linux_banner); - if (status) - goto fdt_set_fail; - return EFI_SUCCESS; fdt_set_fail: diff --git a/drivers/firmware/efi/libstub/string.c b/drivers/firmware/efi/libstub/string.c new file mode 100644 index 000000000000..09d5a0894343 --- /dev/null +++ b/drivers/firmware/efi/libstub/string.c @@ -0,0 +1,57 @@ +/* + * Taken from: + * linux/lib/string.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#include <linux/types.h> +#include <linux/string.h> + +#ifndef __HAVE_ARCH_STRSTR +/** + * strstr - Find the first substring in a %NUL terminated string + * @s1: The string to be searched + * @s2: The string to search for + */ +char *strstr(const char *s1, const char *s2) +{ + size_t l1, l2; + + l2 = strlen(s2); + if (!l2) + return (char *)s1; + l1 = strlen(s1); + while (l1 >= l2) { + l1--; + if (!memcmp(s1, s2, l2)) + return (char *)s1; + s1++; + } + return NULL; +} +#endif + +#ifndef __HAVE_ARCH_STRNCMP +/** + * strncmp - Compare two length-limited strings + * @cs: One string + * @ct: Another string + * @count: The maximum number of bytes to compare + */ +int strncmp(const char *cs, const char *ct, size_t count) +{ + unsigned char c1, c2; + + while (count) { + c1 = *cs++; + c2 = *ct++; + if (c1 != c2) + return c1 < c2 ? -1 : 1; + if (!c1) + break; + count--; + } + return 0; +} +#endif diff --git a/fs/block_dev.c b/fs/block_dev.c index e9134afc347c..5af3bc743af4 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -44,7 +44,7 @@ static inline struct bdev_inode *BDEV_I(struct inode *inode) return container_of(inode, struct bdev_inode, vfs_inode); } -inline struct block_device *I_BDEV(struct inode *inode) +struct block_device *I_BDEV(struct inode *inode) { return &BDEV_I(inode)->bdev; } diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 9dc893cc7140..4e6e5505a242 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -278,13 +278,15 @@ locked_inode_to_wb_and_lock_list(struct inode *inode) wb_get(wb); spin_unlock(&inode->i_lock); spin_lock(&wb->list_lock); - wb_put(wb); /* not gonna deref it anymore */ /* i_wb may have changed inbetween, can't use inode_to_wb() */ - if (likely(wb == inode->i_wb)) - return wb; /* @inode already has ref */ + if (likely(wb == inode->i_wb)) { + wb_put(wb); /* @inode already has ref */ + return wb; + } spin_unlock(&wb->list_lock); + wb_put(wb); cpu_relax(); spin_lock(&inode->i_lock); } @@ -1311,10 +1313,10 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) * we go e.g. from filesystem. Flusher thread uses __writeback_single_inode() * and does more profound writeback list handling in writeback_sb_inodes(). */ -static int -writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, - struct writeback_control *wbc) +static int writeback_single_inode(struct inode *inode, + struct writeback_control *wbc) { + struct bdi_writeback *wb; int ret = 0; spin_lock(&inode->i_lock); @@ -1352,7 +1354,8 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, ret = __writeback_single_inode(inode, wbc); wbc_detach_inode(wbc); - spin_lock(&wb->list_lock); + + wb = inode_to_wb_and_lock_list(inode); spin_lock(&inode->i_lock); /* * If inode is clean, remove it from writeback lists. Otherwise don't @@ -1402,6 +1405,10 @@ static long writeback_chunk_size(struct bdi_writeback *wb, * Write a portion of b_io inodes which belong to @sb. * * Return the number of pages and/or inodes written. + * + * NOTE! This is called with wb->list_lock held, and will + * unlock and relock that for each inode it ends up doing + * IO for. */ static long writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, @@ -1420,11 +1427,10 @@ static long writeback_sb_inodes(struct super_block *sb, unsigned long start_time = jiffies; long write_chunk; long wrote = 0; /* count both pages and inodes */ - struct blk_plug plug; - blk_start_plug(&plug); while (!list_empty(&wb->b_io)) { struct inode *inode = wb_inode(wb->b_io.prev); + struct bdi_writeback *tmp_wb; if (inode->i_sb != sb) { if (work->sb) { @@ -1515,15 +1521,23 @@ static long writeback_sb_inodes(struct super_block *sb, cond_resched(); } - - spin_lock(&wb->list_lock); + /* + * Requeue @inode if still dirty. Be careful as @inode may + * have been switched to another wb in the meantime. + */ + tmp_wb = inode_to_wb_and_lock_list(inode); spin_lock(&inode->i_lock); if (!(inode->i_state & I_DIRTY_ALL)) wrote++; - requeue_inode(inode, wb, &wbc); + requeue_inode(inode, tmp_wb, &wbc); inode_sync_complete(inode); spin_unlock(&inode->i_lock); + if (unlikely(tmp_wb != wb)) { + spin_unlock(&tmp_wb->list_lock); + spin_lock(&wb->list_lock); + } + /* * bail out to wb_writeback() often enough to check * background threshold and other termination conditions. @@ -1535,7 +1549,6 @@ static long writeback_sb_inodes(struct super_block *sb, break; } } - blk_finish_plug(&plug); return wrote; } @@ -2294,7 +2307,6 @@ EXPORT_SYMBOL(sync_inodes_sb); */ int write_inode_now(struct inode *inode, int sync) { - struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; struct writeback_control wbc = { .nr_to_write = LONG_MAX, .sync_mode = sync ? WB_SYNC_ALL : WB_SYNC_NONE, @@ -2306,7 +2318,7 @@ int write_inode_now(struct inode *inode, int sync) wbc.nr_to_write = 0; might_sleep(); - return writeback_single_inode(inode, wb, &wbc); + return writeback_single_inode(inode, &wbc); } EXPORT_SYMBOL(write_inode_now); @@ -2323,7 +2335,7 @@ EXPORT_SYMBOL(write_inode_now); */ int sync_inode(struct inode *inode, struct writeback_control *wbc) { - return writeback_single_inode(inode, &inode_to_bdi(inode)->wb, wbc); + return writeback_single_inode(inode, wbc); } EXPORT_SYMBOL(sync_inode); diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 3d122674717a..145b50032695 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -19,13 +19,17 @@ #include <linux/slab.h> int __must_check bdi_init(struct backing_dev_info *bdi); -void bdi_destroy(struct backing_dev_info *bdi); +void bdi_exit(struct backing_dev_info *bdi); __printf(3, 4) int bdi_register(struct backing_dev_info *bdi, struct device *parent, const char *fmt, ...); int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev); +void bdi_unregister(struct backing_dev_info *bdi); + int __must_check bdi_setup_and_register(struct backing_dev_info *, char *); +void bdi_destroy(struct backing_dev_info *bdi); + void wb_start_writeback(struct bdi_writeback *wb, long nr_pages, bool range_cyclic, enum wb_reason reason); void wb_start_background_writeback(struct bdi_writeback *wb); diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 0a5cc7a1109b..c02e669945e9 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -713,9 +713,9 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q, if (!throtl) { blkg = blkg ?: q->root_blkg; - blkg_rwstat_add(&blkg->stat_bytes, bio->bi_flags, + blkg_rwstat_add(&blkg->stat_bytes, bio->bi_rw, bio->bi_iter.bi_size); - blkg_rwstat_add(&blkg->stat_ios, bio->bi_flags, 1); + blkg_rwstat_add(&blkg->stat_ios, bio->bi_rw, 1); } rcu_read_unlock(); diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index d764e3d38597..501597f36721 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -245,7 +245,6 @@ struct mem_cgroup { * percpu counter. */ struct mem_cgroup_stat_cpu __percpu *stat; - spinlock_t pcp_counter_lock; #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET) struct cg_proto tcp_mem; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 9f76e59793f7..f332649eca55 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1587,7 +1587,7 @@ static int cgroup_show_options(struct seq_file *seq, if (root != &cgrp_dfl_root) for_each_subsys(ss, ssid) if (root->subsys_mask & (1 << ssid)) - seq_show_option(seq, ss->name, NULL); + seq_show_option(seq, ss->legacy_name, NULL); if (root->flags & CGRP_ROOT_NOPREFIX) seq_puts(seq, ",noprefix"); if (root->flags & CGRP_ROOT_XATTR) diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 3f34496244e9..20769e5fd57b 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -143,7 +143,11 @@ check_stack(unsigned long ip, unsigned long *stack) p = start; for (; p < top && i < max_stack_trace.nr_entries; p++) { - if (*p == stack_dump_trace[i]) { + /* + * The READ_ONCE_NOCHECK is used to let KASAN know that + * this is not a stack-out-of-bounds error. + */ + if ((READ_ONCE_NOCHECK(*p)) == stack_dump_trace[i]) { this_size = stack_dump_index[i++] = (top - p) * sizeof(unsigned long); found = 1; diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index 777eda7d1ab4..0fee5acd5aa0 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -15,12 +15,7 @@ config KASAN global variables requires gcc 5.0 or later. This feature consumes about 1/8 of available memory and brings about ~x3 performance slowdown. - For better error detection enable CONFIG_STACKTRACE, - and add slub_debug=U to boot cmdline. - -config KASAN_SHADOW_OFFSET - hex - default 0xdffffc0000000000 if X86_64 + For better error detection enable CONFIG_STACKTRACE. choice prompt "Instrumentation type" diff --git a/lib/test_kasan.c b/lib/test_kasan.c index 098c08eddfab..c1682523d79a 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -65,7 +65,7 @@ static noinline void __init kmalloc_node_oob_right(void) kfree(ptr); } -static noinline void __init kmalloc_large_oob_rigth(void) +static noinline void __init kmalloc_large_oob_right(void) { char *ptr; size_t size = KMALLOC_MAX_CACHE_SIZE + 10; @@ -114,7 +114,7 @@ static noinline void __init kmalloc_oob_krealloc_less(void) kfree(ptr1); return; } - ptr2[size1] = 'x'; + ptr2[size2] = 'x'; kfree(ptr2); } @@ -138,6 +138,71 @@ static noinline void __init kmalloc_oob_16(void) kfree(ptr2); } +static noinline void __init kmalloc_oob_memset_2(void) +{ + char *ptr; + size_t size = 8; + + pr_info("out-of-bounds in memset2\n"); + ptr = kmalloc(size, GFP_KERNEL); + if (!ptr) { + pr_err("Allocation failed\n"); + return; + } + + memset(ptr+7, 0, 2); + kfree(ptr); +} + +static noinline void __init kmalloc_oob_memset_4(void) +{ + char *ptr; + size_t size = 8; + + pr_info("out-of-bounds in memset4\n"); + ptr = kmalloc(size, GFP_KERNEL); + if (!ptr) { + pr_err("Allocation failed\n"); + return; + } + + memset(ptr+5, 0, 4); + kfree(ptr); +} + + +static noinline void __init kmalloc_oob_memset_8(void) +{ + char *ptr; + size_t size = 8; + + pr_info("out-of-bounds in memset8\n"); + ptr = kmalloc(size, GFP_KERNEL); + if (!ptr) { + pr_err("Allocation failed\n"); + return; + } + + memset(ptr+1, 0, 8); + kfree(ptr); +} + +static noinline void __init kmalloc_oob_memset_16(void) +{ + char *ptr; + size_t size = 16; + + pr_info("out-of-bounds in memset16\n"); + ptr = kmalloc(size, GFP_KERNEL); + if (!ptr) { + pr_err("Allocation failed\n"); + return; + } + + memset(ptr+1, 0, 16); + kfree(ptr); +} + static noinline void __init kmalloc_oob_in_memset(void) { char *ptr; @@ -206,6 +271,8 @@ static noinline void __init kmalloc_uaf2(void) } ptr1[40] = 'x'; + if (ptr1 == ptr2) + pr_err("Could not detect use-after-free: ptr1 == ptr2\n"); kfree(ptr2); } @@ -259,11 +326,15 @@ static int __init kmalloc_tests_init(void) kmalloc_oob_right(); kmalloc_oob_left(); kmalloc_node_oob_right(); - kmalloc_large_oob_rigth(); + kmalloc_large_oob_right(); kmalloc_oob_krealloc_more(); kmalloc_oob_krealloc_less(); kmalloc_oob_16(); kmalloc_oob_in_memset(); + kmalloc_oob_memset_2(); + kmalloc_oob_memset_4(); + kmalloc_oob_memset_8(); + kmalloc_oob_memset_16(); kmalloc_uaf(); kmalloc_uaf_memset(); kmalloc_uaf2(); diff --git a/mm/backing-dev.c b/mm/backing-dev.c index a1943cba8957..619984fc07ec 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -837,25 +837,8 @@ static void bdi_remove_from_list(struct backing_dev_info *bdi) synchronize_rcu_expedited(); } -/* - * Called when the device behind @bdi has been removed or ejected. - * - * We can't really do much here except for reducing the dirty ratio at - * the moment. In the future we should be able to set a flag so that - * the filesystem can handle errors at mark_inode_dirty time instead - * of only at writeback time. - */ void bdi_unregister(struct backing_dev_info *bdi) { - if (WARN_ON_ONCE(!bdi->dev)) - return; - - bdi_set_min_ratio(bdi, 0); -} -EXPORT_SYMBOL(bdi_unregister); - -void bdi_destroy(struct backing_dev_info *bdi) -{ /* make sure nobody finds us on the bdi_list anymore */ bdi_remove_from_list(bdi); wb_shutdown(&bdi->wb); @@ -866,9 +849,19 @@ void bdi_destroy(struct backing_dev_info *bdi) device_unregister(bdi->dev); bdi->dev = NULL; } +} +void bdi_exit(struct backing_dev_info *bdi) +{ + WARN_ON_ONCE(bdi->dev); wb_exit(&bdi->wb); } + +void bdi_destroy(struct backing_dev_info *bdi) +{ + bdi_unregister(bdi); + bdi_exit(bdi); +} EXPORT_SYMBOL(bdi_destroy); /* diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 9ece918cc23a..a860dfee7f0f 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4195,7 +4195,6 @@ static struct mem_cgroup *mem_cgroup_alloc(void) if (memcg_wb_domain_init(memcg, GFP_KERNEL)) goto out_free_stat; - spin_lock_init(&memcg->pcp_counter_lock); return memcg; out_free_stat: diff --git a/mm/mempool.c b/mm/mempool.c index 2cc08de8b1db..70cccdcff860 100644 --- a/mm/mempool.c +++ b/mm/mempool.c @@ -135,8 +135,8 @@ static void *remove_element(mempool_t *pool) void *element = pool->elements[--pool->curr_nr]; BUG_ON(pool->curr_nr < 0); - check_element(pool, element); kasan_unpoison_element(pool, element); + check_element(pool, element); return element; } diff --git a/mm/page-writeback.c b/mm/page-writeback.c index c7e2df525b63..685b144d55b2 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -369,8 +369,9 @@ static void domain_dirty_limits(struct dirty_throttle_control *dtc) struct dirty_throttle_control *gdtc = mdtc_gdtc(dtc); unsigned long bytes = vm_dirty_bytes; unsigned long bg_bytes = dirty_background_bytes; - unsigned long ratio = vm_dirty_ratio; - unsigned long bg_ratio = dirty_background_ratio; + /* convert ratios to per-PAGE_SIZE for higher precision */ + unsigned long ratio = (vm_dirty_ratio * PAGE_SIZE) / 100; + unsigned long bg_ratio = (dirty_background_ratio * PAGE_SIZE) / 100; unsigned long thresh; unsigned long bg_thresh; struct task_struct *tsk; @@ -382,26 +383,28 @@ static void domain_dirty_limits(struct dirty_throttle_control *dtc) /* * The byte settings can't be applied directly to memcg * domains. Convert them to ratios by scaling against - * globally available memory. + * globally available memory. As the ratios are in + * per-PAGE_SIZE, they can be obtained by dividing bytes by + * number of pages. */ if (bytes) - ratio = min(DIV_ROUND_UP(bytes, PAGE_SIZE) * 100 / - global_avail, 100UL); + ratio = min(DIV_ROUND_UP(bytes, global_avail), + PAGE_SIZE); if (bg_bytes) - bg_ratio = min(DIV_ROUND_UP(bg_bytes, PAGE_SIZE) * 100 / - global_avail, 100UL); + bg_ratio = min(DIV_ROUND_UP(bg_bytes, global_avail), + PAGE_SIZE); bytes = bg_bytes = 0; } if (bytes) thresh = DIV_ROUND_UP(bytes, PAGE_SIZE); else - thresh = (ratio * available_memory) / 100; + thresh = (ratio * available_memory) / PAGE_SIZE; if (bg_bytes) bg_thresh = DIV_ROUND_UP(bg_bytes, PAGE_SIZE); else - bg_thresh = (bg_ratio * available_memory) / 100; + bg_thresh = (bg_ratio * available_memory) / PAGE_SIZE; if (bg_thresh >= thresh) bg_thresh = thresh / 2; diff --git a/mm/slub.c b/mm/slub.c index 08342c523a85..993483e5f993 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -459,8 +459,10 @@ static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map) /* * Debug settings: */ -#ifdef CONFIG_SLUB_DEBUG_ON +#if defined(CONFIG_SLUB_DEBUG_ON) static int slub_debug = DEBUG_DEFAULT_FLAGS; +#elif defined(CONFIG_KASAN) +static int slub_debug = SLAB_STORE_USER; #else static int slub_debug; #endif diff --git a/scripts/dtc/libfdt/fdt_ro.c b/scripts/dtc/libfdt/fdt_ro.c index 02b6d687537f..067c7acff61b 100644 --- a/scripts/dtc/libfdt/fdt_ro.c +++ b/scripts/dtc/libfdt/fdt_ro.c @@ -164,7 +164,7 @@ int fdt_path_offset(const void *fdt, const char *path) /* see if we have an alias */ if (*path != '/') { - const char *q = strchr(path, '/'); + const char *q = memchr(path, '/', end - p); if (!q) q = end; @@ -184,7 +184,7 @@ int fdt_path_offset(const void *fdt, const char *path) p++; if (! *p) return offset; - q = strchr(p, '/'); + q = memchr(p, '/', end - p); if (! q) q = end; |