diff options
196 files changed, 2923 insertions, 1820 deletions
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index 8384c681a4b2..476722b7b636 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -1879,10 +1879,8 @@ following two functions. wbc_init_bio(@wbc, @bio) Should be called for each bio carrying writeback data and - associates the bio with the inode's owner cgroup and the - corresponding request queue. This must be called after - a queue (device) has been associated with the bio and - before submission. + associates the bio with the inode's owner cgroup. Can be + called anytime between bio allocation and submission. wbc_account_io(@wbc, @page, @bytes) Should be called for each data segment being written out. @@ -1901,7 +1899,7 @@ the configuration, the bio may be executed at a lower priority and if the writeback session is holding shared resources, e.g. a journal entry, may lead to priority inversion. There is no one easy solution for the problem. Filesystems can try to work around specific problem -cases by skipping wbc_init_bio() or using bio_associate_create_blkg() +cases by skipping wbc_init_bio() or using bio_associate_blkcg() directly. diff --git a/Documentation/devicetree/bindings/display/panel/innolux,tv123wam.txt b/Documentation/devicetree/bindings/display/panel/innolux,p120zdg-bf1.txt index a9b35265fa13..513f03466aba 100644 --- a/Documentation/devicetree/bindings/display/panel/innolux,tv123wam.txt +++ b/Documentation/devicetree/bindings/display/panel/innolux,p120zdg-bf1.txt @@ -1,20 +1,22 @@ -Innolux TV123WAM 12.3 inch eDP 2K display panel +Innolux P120ZDG-BF1 12.02 inch eDP 2K display panel This binding is compatible with the simple-panel binding, which is specified in simple-panel.txt in this directory. Required properties: -- compatible: should be "innolux,tv123wam" +- compatible: should be "innolux,p120zdg-bf1" - power-supply: regulator to provide the supply voltage Optional properties: - enable-gpios: GPIO pin to enable or disable the panel - backlight: phandle of the backlight device attached to the panel +- no-hpd: If HPD isn't hooked up; add this property. Example: panel_edp: panel-edp { - compatible = "innolux,tv123wam"; + compatible = "innolux,p120zdg-bf1"; enable-gpios = <&msmgpio 31 GPIO_ACTIVE_LOW>; power-supply = <&pm8916_l2>; backlight = <&backlight>; + no-hpd; }; diff --git a/Documentation/devicetree/bindings/display/panel/simple-panel.txt b/Documentation/devicetree/bindings/display/panel/simple-panel.txt index 45a457ad38f0..b2b872c710f2 100644 --- a/Documentation/devicetree/bindings/display/panel/simple-panel.txt +++ b/Documentation/devicetree/bindings/display/panel/simple-panel.txt @@ -11,6 +11,9 @@ Optional properties: - ddc-i2c-bus: phandle of an I2C controller used for DDC EDID probing - enable-gpios: GPIO pin to enable or disable the panel - backlight: phandle of the backlight device attached to the panel +- no-hpd: This panel is supposed to communicate that it's ready via HPD + (hot plug detect) signal, but the signal isn't hooked up so we should + hardcode the max delay from the panel spec when powering up the panel. Example: diff --git a/Documentation/devicetree/bindings/pwm/pwm-tiecap.txt b/Documentation/devicetree/bindings/pwm/pwm-tiecap.txt index 06a363d9ccef..b9a1d7402128 100644 --- a/Documentation/devicetree/bindings/pwm/pwm-tiecap.txt +++ b/Documentation/devicetree/bindings/pwm/pwm-tiecap.txt @@ -7,6 +7,7 @@ Required properties: for da850 - compatible = "ti,da850-ecap", "ti,am3352-ecap", "ti,am33xx-ecap"; for dra746 - compatible = "ti,dra746-ecap", "ti,am3352-ecap"; for 66ak2g - compatible = "ti,k2g-ecap", "ti,am3352-ecap"; + for am654 - compatible = "ti,am654-ecap", "ti,am3352-ecap"; - #pwm-cells: should be 3. See pwm.txt in this directory for a description of the cells format. The PWM channel index ranges from 0 to 4. The only third cell flag supported by this binding is PWM_POLARITY_INVERTED. diff --git a/Documentation/devicetree/bindings/pwm/renesas,pwm-rcar.txt b/Documentation/devicetree/bindings/pwm/renesas,pwm-rcar.txt index e1ef6afbe3a7..7f31fe7e2093 100644 --- a/Documentation/devicetree/bindings/pwm/renesas,pwm-rcar.txt +++ b/Documentation/devicetree/bindings/pwm/renesas,pwm-rcar.txt @@ -3,7 +3,9 @@ Required Properties: - compatible: should be "renesas,pwm-rcar" and one of the following. - "renesas,pwm-r8a7743": for RZ/G1M + - "renesas,pwm-r8a7744": for RZ/G1N - "renesas,pwm-r8a7745": for RZ/G1E + - "renesas,pwm-r8a774a1": for RZ/G2M - "renesas,pwm-r8a7778": for R-Car M1A - "renesas,pwm-r8a7779": for R-Car H1 - "renesas,pwm-r8a7790": for R-Car H2 @@ -12,6 +14,8 @@ Required Properties: - "renesas,pwm-r8a7795": for R-Car H3 - "renesas,pwm-r8a7796": for R-Car M3-W - "renesas,pwm-r8a77965": for R-Car M3-N + - "renesas,pwm-r8a77970": for R-Car V3M + - "renesas,pwm-r8a77980": for R-Car V3H - "renesas,pwm-r8a77990": for R-Car E3 - "renesas,pwm-r8a77995": for R-Car D3 - reg: base address and length of the registers block for the PWM. diff --git a/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.txt b/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.txt index d53a16715da6..848a92b53d81 100644 --- a/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.txt +++ b/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.txt @@ -2,13 +2,19 @@ Required Properties: - - compatible: should be one of the following. + - compatible: must contain one or more of the following: - "renesas,tpu-r8a73a4": for R8A73A4 (R-Mobile APE6) compatible PWM controller. - "renesas,tpu-r8a7740": for R8A7740 (R-Mobile A1) compatible PWM controller. - "renesas,tpu-r8a7743": for R8A7743 (RZ/G1M) compatible PWM controller. + - "renesas,tpu-r8a7744": for R8A7744 (RZ/G1N) compatible PWM controller. - "renesas,tpu-r8a7745": for R8A7745 (RZ/G1E) compatible PWM controller. - "renesas,tpu-r8a7790": for R8A7790 (R-Car H2) compatible PWM controller. - - "renesas,tpu": for generic R-Car and RZ/G1 TPU PWM controller. + - "renesas,tpu-r8a77970": for R8A77970 (R-Car V3M) compatible PWM + controller. + - "renesas,tpu-r8a77980": for R8A77980 (R-Car V3H) compatible PWM + controller. + - "renesas,tpu": for the generic TPU PWM controller; this is a fallback for + the entries listed above. - reg: Base address and length of each memory resource used by the PWM controller hardware module. diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 321d74b73937..cf43bc4dbf31 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -623,6 +623,11 @@ in your dentry operations instead. On success you get a new struct file sharing the mount/dentry with the original, on failure - ERR_PTR(). -- +[mandatory] + ->clone_file_range() and ->dedupe_file_range have been replaced with + ->remap_file_range(). See Documentation/filesystems/vfs.txt for more + information. +-- [recommended] ->lookup() instances doing an equivalent of if (IS_ERR(inode)) diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index a6c6a8af48a2..5f71a252e2e0 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -883,8 +883,9 @@ struct file_operations { unsigned (*mmap_capabilities)(struct file *); #endif ssize_t (*copy_file_range)(struct file *, loff_t, struct file *, loff_t, size_t, unsigned int); - int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t, u64); - int (*dedupe_file_range)(struct file *, loff_t, struct file *, loff_t, u64); + loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t len, unsigned int remap_flags); int (*fadvise)(struct file *, loff_t, loff_t, int); }; @@ -960,11 +961,18 @@ otherwise noted. copy_file_range: called by the copy_file_range(2) system call. - clone_file_range: called by the ioctl(2) system call for FICLONERANGE and - FICLONE commands. - - dedupe_file_range: called by the ioctl(2) system call for FIDEDUPERANGE - command. + remap_file_range: called by the ioctl(2) system call for FICLONERANGE and + FICLONE and FIDEDUPERANGE commands to remap file ranges. An + implementation should remap len bytes at pos_in of the source file into + the dest file at pos_out. Implementations must handle callers passing + in len == 0; this means "remap to the end of the source file". The + return value should the number of bytes remapped, or the usual + negative error code if errors occurred before any bytes were remapped. + The remap_flags parameter accepts REMAP_FILE_* flags. If + REMAP_FILE_DEDUP is set then the implementation must only remap if the + requested file ranges have identical contents. If REMAP_CAN_SHORTEN is + set, the caller is ok with the implementation shortening the request + length to satisfy alignment or EOF requirements (or any other reason). fadvise: possibly called by the fadvise64() system call. diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt index 7b6a2b2bdc98..8da26c6dd886 100644 --- a/Documentation/kbuild/makefiles.txt +++ b/Documentation/kbuild/makefiles.txt @@ -537,21 +537,6 @@ more details, with real examples. The third parameter may be a text as in this example, but it may also be an expanded variable or a macro. - cc-fullversion - cc-fullversion is useful when the exact version of gcc is needed. - One typical use-case is when a specific GCC version is broken. - cc-fullversion points out a more specific version than cc-version does. - - Example: - #arch/powerpc/Makefile - $(Q)if test "$(cc-fullversion)" = "040200" ; then \ - echo -n '*** GCC-4.2.0 cannot compile the 64-bit powerpc ' ; \ - false ; \ - fi - - In this example for a specific GCC version the build will error out - explaining to the user why it stops. - cc-cross-prefix cc-cross-prefix is used to check if there exists a $(CC) in path with one of the listed prefixes. The first prefix where there exist a diff --git a/Documentation/networking/ice.rst b/Documentation/networking/ice.rst index 1e4948c9e989..4d118b827bbb 100644 --- a/Documentation/networking/ice.rst +++ b/Documentation/networking/ice.rst @@ -20,7 +20,7 @@ Enabling the driver The driver is enabled via the standard kernel configuration system, using the make command:: - make oldconfig/silentoldconfig/menuconfig/etc. + make oldconfig/menuconfig/etc. The driver is located in the menu structure at: @@ -485,7 +485,7 @@ ifneq ($(KBUILD_SRC),) $(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkmakefile $(srctree) endif -ifeq ($(cc-name),clang) +ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep clang),) ifneq ($(CROSS_COMPILE),) CLANG_TARGET := --target=$(notdir $(CROSS_COMPILE:%-=%)) GCC_TOOLCHAIN_DIR := $(dir $(shell which $(LD))) @@ -702,7 +702,7 @@ stackp-flags-$(CONFIG_STACKPROTECTOR_STRONG) := -fstack-protector-strong KBUILD_CFLAGS += $(stackp-flags-y) -ifeq ($(cc-name),clang) +ifdef CONFIG_CC_IS_CLANG KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,) KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier) KBUILD_CFLAGS += $(call cc-disable-warning, gnu) diff --git a/arch/mips/Makefile b/arch/mips/Makefile index 15a84cfd0719..68410490e12f 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -128,7 +128,7 @@ cflags-y += -ffreestanding # clang's output will be based upon the build machine. So for clang we simply # unconditionally specify -EB or -EL as appropriate. # -ifeq ($(cc-name),clang) +ifdef CONFIG_CC_IS_CLANG cflags-$(CONFIG_CPU_BIG_ENDIAN) += -EB cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -EL else diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile index 34605ca21498..58a0315ad743 100644 --- a/arch/mips/vdso/Makefile +++ b/arch/mips/vdso/Makefile @@ -10,7 +10,7 @@ ccflags-vdso := \ $(filter -march=%,$(KBUILD_CFLAGS)) \ -D__VDSO__ -ifeq ($(cc-name),clang) +ifdef CONFIG_CC_IS_CLANG ccflags-vdso += $(filter --target=%,$(KBUILD_CFLAGS)) endif diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 2d51b2bd4aa1..8be31261aec8 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -930,10 +930,6 @@ config FSL_GTM help Freescale General-purpose Timers support -# Yes MCA RS/6000s exist but Linux-PPC does not currently support any -config MCA - bool - # Platforms that what PCI turned unconditionally just do select PCI # in their config node. Platforms that want to choose at config # time should select PPC_PCI_CHOICE @@ -944,7 +940,6 @@ config PCI bool "PCI support" if PPC_PCI_CHOICE default y if !40x && !CPM2 && !PPC_8xx && !PPC_83xx \ && !PPC_85xx && !PPC_86xx && !GAMECUBE_COMMON - default PCI_QSPAN if PPC_8xx select GENERIC_PCI_IOMAP help Find out whether your system includes a PCI bus. PCI is the name of @@ -958,14 +953,6 @@ config PCI_DOMAINS config PCI_SYSCALL def_bool PCI -config PCI_QSPAN - bool "QSpan PCI" - depends on PPC_8xx - select PPC_I8259 - help - Say Y here if you have a system based on a Motorola 8xx-series - embedded processor with a QSPAN PCI interface, otherwise say N. - config PCI_8260 bool depends on PCI && 8260 diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 17be664dafa2..8a2ce14d68d0 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -96,7 +96,7 @@ aflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mabi=elfv1) aflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mabi=elfv2 endif -ifneq ($(cc-name),clang) +ifndef CONFIG_CC_IS_CLANG cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mno-strict-align endif @@ -175,7 +175,7 @@ endif # Work around gcc code-gen bugs with -pg / -fno-omit-frame-pointer in gcc <= 4.8 # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=44199 # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52828 -ifneq ($(cc-name),clang) +ifndef CONFIG_CC_IS_CLANG CC_FLAGS_FTRACE += $(call cc-ifversion, -lt, 0409, -mno-sched-epilog) endif endif diff --git a/arch/powerpc/boot/dts/fsl/t2080rdb.dts b/arch/powerpc/boot/dts/fsl/t2080rdb.dts index 55c0210a771d..092a400740f8 100644 --- a/arch/powerpc/boot/dts/fsl/t2080rdb.dts +++ b/arch/powerpc/boot/dts/fsl/t2080rdb.dts @@ -77,12 +77,12 @@ }; ethernet@f0000 { - phy-handle = <&xg_cs4315_phy1>; + phy-handle = <&xg_cs4315_phy2>; phy-connection-type = "xgmii"; }; ethernet@f2000 { - phy-handle = <&xg_cs4315_phy2>; + phy-handle = <&xg_cs4315_phy1>; phy-connection-type = "xgmii"; }; diff --git a/arch/powerpc/boot/dts/mpc885ads.dts b/arch/powerpc/boot/dts/mpc885ads.dts index 5b037f51741d..3aa300afbbca 100644 --- a/arch/powerpc/boot/dts/mpc885ads.dts +++ b/arch/powerpc/boot/dts/mpc885ads.dts @@ -72,7 +72,7 @@ #address-cells = <1>; #size-cells = <1>; device_type = "soc"; - ranges = <0x0 0xff000000 0x4000>; + ranges = <0x0 0xff000000 0x28000>; bus-frequency = <0>; // Temporary -- will go away once kernel uses ranges for get_immrbase(). @@ -224,6 +224,17 @@ #size-cells = <0>; }; }; + + crypto@20000 { + compatible = "fsl,sec1.2", "fsl,sec1.0"; + reg = <0x20000 0x8000>; + interrupts = <1 1>; + interrupt-parent = <&PIC>; + fsl,num-channels = <1>; + fsl,channel-fifo-len = <24>; + fsl,exec-units-mask = <0x4c>; + fsl,descriptor-types-mask = <0x05000154>; + }; }; chosen { diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index 31733a95bbd0..3d5acd2b113a 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -36,6 +36,11 @@ int raw_patch_instruction(unsigned int *addr, unsigned int instr); int patch_instruction_site(s32 *addr, unsigned int instr); int patch_branch_site(s32 *site, unsigned long target, int flags); +static inline unsigned long patch_site_addr(s32 *site) +{ + return (unsigned long)site + *site; +} + int instr_is_relative_branch(unsigned int instr); int instr_is_relative_link_branch(unsigned int instr); int instr_is_branch_to_addr(const unsigned int *instr, unsigned long addr); diff --git a/arch/powerpc/include/asm/mmu-8xx.h b/arch/powerpc/include/asm/mmu-8xx.h index 4f547752ae79..fa05aa566ece 100644 --- a/arch/powerpc/include/asm/mmu-8xx.h +++ b/arch/powerpc/include/asm/mmu-8xx.h @@ -34,20 +34,12 @@ * respectively NA for All or X for Supervisor and no access for User. * Then we use the APG to say whether accesses are according to Page rules or * "all Supervisor" rules (Access to all) - * We also use the 2nd APG bit for _PAGE_ACCESSED when having SWAP: - * When that bit is not set access is done iaw "all user" - * which means no access iaw page rules. - * Therefore, we define 4 APG groups. lsb is _PMD_USER, 2nd is _PAGE_ACCESSED - * 0x => No access => 11 (all accesses performed as user iaw page definition) - * 10 => No user => 01 (all accesses performed according to page definition) - * 11 => User => 00 (all accesses performed as supervisor iaw page definition) + * Therefore, we define 2 APG groups. lsb is _PMD_USER + * 0 => No user => 01 (all accesses performed according to page definition) + * 1 => User => 00 (all accesses performed as supervisor iaw page definition) * We define all 16 groups so that all other bits of APG can take any value */ -#ifdef CONFIG_SWAP -#define MI_APG_INIT 0xf4f4f4f4 -#else #define MI_APG_INIT 0x44444444 -#endif /* The effective page number register. When read, contains the information * about the last instruction TLB miss. When MI_RPN is written, bits in @@ -115,20 +107,12 @@ * Supervisor and no access for user and NA for ALL. * Then we use the APG to say whether accesses are according to Page rules or * "all Supervisor" rules (Access to all) - * We also use the 2nd APG bit for _PAGE_ACCESSED when having SWAP: - * When that bit is not set access is done iaw "all user" - * which means no access iaw page rules. - * Therefore, we define 4 APG groups. lsb is _PMD_USER, 2nd is _PAGE_ACCESSED - * 0x => No access => 11 (all accesses performed as user iaw page definition) - * 10 => No user => 01 (all accesses performed according to page definition) - * 11 => User => 00 (all accesses performed as supervisor iaw page definition) + * Therefore, we define 2 APG groups. lsb is _PMD_USER + * 0 => No user => 01 (all accesses performed according to page definition) + * 1 => User => 00 (all accesses performed as supervisor iaw page definition) * We define all 16 groups so that all other bits of APG can take any value */ -#ifdef CONFIG_SWAP -#define MD_APG_INIT 0xf4f4f4f4 -#else #define MD_APG_INIT 0x44444444 -#endif /* The effective page number register. When read, contains the information * about the last instruction TLB miss. When MD_RPN is written, bits in @@ -180,12 +164,6 @@ */ #define SPRN_M_TW 799 -/* APGs */ -#define M_APG0 0x00000000 -#define M_APG1 0x00000020 -#define M_APG2 0x00000040 -#define M_APG3 0x00000060 - #ifdef CONFIG_PPC_MM_SLICES #include <asm/nohash/32/slice.h> #define SLICE_ARRAY_SIZE (1 << (32 - SLICE_LOW_SHIFT - 1)) @@ -251,6 +229,15 @@ static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize) BUG(); } +/* patch sites */ +extern s32 patch__itlbmiss_linmem_top; +extern s32 patch__dtlbmiss_linmem_top, patch__dtlbmiss_immr_jmp; +extern s32 patch__fixupdar_linmem_top; + +extern s32 patch__itlbmiss_exit_1, patch__itlbmiss_exit_2; +extern s32 patch__dtlbmiss_exit_1, patch__dtlbmiss_exit_2, patch__dtlbmiss_exit_3; +extern s32 patch__itlbmiss_perf, patch__dtlbmiss_perf; + #endif /* !__ASSEMBLY__ */ #if defined(CONFIG_PPC_4K_PAGES) diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index bb38dd67d47d..1b06add4f092 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -5,6 +5,7 @@ #include <linux/spinlock.h> #include <asm/page.h> #include <linux/time.h> +#include <linux/cpumask.h> /* * Definitions for talking to the RTAS on CHRP machines. diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 134a573a9f2d..3b67b9533c82 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -31,6 +31,7 @@ #include <asm/asm-offsets.h> #include <asm/ptrace.h> #include <asm/export.h> +#include <asm/code-patching-asm.h> #if CONFIG_TASK_SIZE <= 0x80000000 && CONFIG_PAGE_OFFSET >= 0x80000000 /* By simply checking Address >= 0x80000000, we know if its a kernel address */ @@ -318,8 +319,8 @@ InstructionTLBMiss: cmpli cr0, r11, PAGE_OFFSET@h #ifndef CONFIG_PIN_TLB_TEXT /* It is assumed that kernel code fits into the first 8M page */ -_ENTRY(ITLBMiss_cmp) - cmpli cr7, r11, (PAGE_OFFSET + 0x0800000)@h +0: cmpli cr7, r11, (PAGE_OFFSET + 0x0800000)@h + patch_site 0b, patch__itlbmiss_linmem_top #endif #endif #endif @@ -353,13 +354,14 @@ _ENTRY(ITLBMiss_cmp) #if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE) mtcr r12 #endif - -#ifdef CONFIG_SWAP - rlwinm r11, r10, 31, _PAGE_ACCESSED >> 1 -#endif /* Load the MI_TWC with the attributes for this "segment." */ mtspr SPRN_MI_TWC, r11 /* Set segment attributes */ +#ifdef CONFIG_SWAP + rlwinm r11, r10, 32-5, _PAGE_PRESENT + and r11, r11, r10 + rlwimi r10, r11, 0, _PAGE_PRESENT +#endif li r11, RPN_PATTERN | 0x200 /* The Linux PTE won't go exactly into the MMU TLB. * Software indicator bits 20 and 23 must be clear. @@ -372,16 +374,17 @@ _ENTRY(ITLBMiss_cmp) mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ /* Restore registers */ -_ENTRY(itlb_miss_exit_1) - mfspr r10, SPRN_SPRG_SCRATCH0 +0: mfspr r10, SPRN_SPRG_SCRATCH0 mfspr r11, SPRN_SPRG_SCRATCH1 #if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE) mfspr r12, SPRN_SPRG_SCRATCH2 #endif rfi + patch_site 0b, patch__itlbmiss_exit_1 + #ifdef CONFIG_PERF_EVENTS -_ENTRY(itlb_miss_perf) - lis r10, (itlb_miss_counter - PAGE_OFFSET)@ha + patch_site 0f, patch__itlbmiss_perf +0: lis r10, (itlb_miss_counter - PAGE_OFFSET)@ha lwz r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10) addi r11, r11, 1 stw r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10) @@ -435,11 +438,11 @@ DataStoreTLBMiss: #ifndef CONFIG_PIN_TLB_IMMR cmpli cr0, r11, VIRT_IMMR_BASE@h #endif -_ENTRY(DTLBMiss_cmp) - cmpli cr7, r11, (PAGE_OFFSET + 0x1800000)@h +0: cmpli cr7, r11, (PAGE_OFFSET + 0x1800000)@h + patch_site 0b, patch__dtlbmiss_linmem_top #ifndef CONFIG_PIN_TLB_IMMR -_ENTRY(DTLBMiss_jmp) - beq- DTLBMissIMMR +0: beq- DTLBMissIMMR + patch_site 0b, patch__dtlbmiss_immr_jmp #endif blt cr7, DTLBMissLinear lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha @@ -470,14 +473,22 @@ _ENTRY(DTLBMiss_jmp) * above. */ rlwimi r11, r10, 0, _PAGE_GUARDED -#ifdef CONFIG_SWAP - /* _PAGE_ACCESSED has to be set. We use second APG bit for that, 0 - * on that bit will represent a Non Access group - */ - rlwinm r11, r10, 31, _PAGE_ACCESSED >> 1 -#endif mtspr SPRN_MD_TWC, r11 + /* Both _PAGE_ACCESSED and _PAGE_PRESENT has to be set. + * We also need to know if the insn is a load/store, so: + * Clear _PAGE_PRESENT and load that which will + * trap into DTLB Error with store bit set accordinly. + */ + /* PRESENT=0x1, ACCESSED=0x20 + * r11 = ((r10 & PRESENT) & ((r10 & ACCESSED) >> 5)); + * r10 = (r10 & ~PRESENT) | r11; + */ +#ifdef CONFIG_SWAP + rlwinm r11, r10, 32-5, _PAGE_PRESENT + and r11, r11, r10 + rlwimi r10, r11, 0, _PAGE_PRESENT +#endif /* The Linux PTE won't go exactly into the MMU TLB. * Software indicator bits 24, 25, 26, and 27 must be * set. All other Linux PTE bits control the behavior @@ -489,14 +500,16 @@ _ENTRY(DTLBMiss_jmp) /* Restore registers */ mtspr SPRN_DAR, r11 /* Tag DAR */ -_ENTRY(dtlb_miss_exit_1) - mfspr r10, SPRN_SPRG_SCRATCH0 + +0: mfspr r10, SPRN_SPRG_SCRATCH0 mfspr r11, SPRN_SPRG_SCRATCH1 mfspr r12, SPRN_SPRG_SCRATCH2 rfi + patch_site 0b, patch__dtlbmiss_exit_1 + #ifdef CONFIG_PERF_EVENTS -_ENTRY(dtlb_miss_perf) - lis r10, (dtlb_miss_counter - PAGE_OFFSET)@ha + patch_site 0f, patch__dtlbmiss_perf +0: lis r10, (dtlb_miss_counter - PAGE_OFFSET)@ha lwz r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10) addi r11, r11, 1 stw r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10) @@ -637,8 +650,8 @@ InstructionBreakpoint: */ DTLBMissIMMR: mtcr r12 - /* Set 512k byte guarded page and mark it valid and accessed */ - li r10, MD_PS512K | MD_GUARDED | MD_SVALID | M_APG2 + /* Set 512k byte guarded page and mark it valid */ + li r10, MD_PS512K | MD_GUARDED | MD_SVALID mtspr SPRN_MD_TWC, r10 mfspr r10, SPRN_IMMR /* Get current IMMR */ rlwinm r10, r10, 0, 0xfff80000 /* Get 512 kbytes boundary */ @@ -648,16 +661,17 @@ DTLBMissIMMR: li r11, RPN_PATTERN mtspr SPRN_DAR, r11 /* Tag DAR */ -_ENTRY(dtlb_miss_exit_2) - mfspr r10, SPRN_SPRG_SCRATCH0 + +0: mfspr r10, SPRN_SPRG_SCRATCH0 mfspr r11, SPRN_SPRG_SCRATCH1 mfspr r12, SPRN_SPRG_SCRATCH2 rfi + patch_site 0b, patch__dtlbmiss_exit_2 DTLBMissLinear: mtcr r12 - /* Set 8M byte page and mark it valid and accessed */ - li r11, MD_PS8MEG | MD_SVALID | M_APG2 + /* Set 8M byte page and mark it valid */ + li r11, MD_PS8MEG | MD_SVALID mtspr SPRN_MD_TWC, r11 rlwinm r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */ ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY | \ @@ -666,28 +680,29 @@ DTLBMissLinear: li r11, RPN_PATTERN mtspr SPRN_DAR, r11 /* Tag DAR */ -_ENTRY(dtlb_miss_exit_3) - mfspr r10, SPRN_SPRG_SCRATCH0 + +0: mfspr r10, SPRN_SPRG_SCRATCH0 mfspr r11, SPRN_SPRG_SCRATCH1 mfspr r12, SPRN_SPRG_SCRATCH2 rfi + patch_site 0b, patch__dtlbmiss_exit_3 #ifndef CONFIG_PIN_TLB_TEXT ITLBMissLinear: mtcr r12 - /* Set 8M byte page and mark it valid,accessed */ - li r11, MI_PS8MEG | MI_SVALID | M_APG2 + /* Set 8M byte page and mark it valid */ + li r11, MI_PS8MEG | MI_SVALID mtspr SPRN_MI_TWC, r11 rlwinm r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */ ori r10, r10, 0xf0 | MI_SPS16K | _PAGE_SH | _PAGE_DIRTY | \ _PAGE_PRESENT mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ -_ENTRY(itlb_miss_exit_2) - mfspr r10, SPRN_SPRG_SCRATCH0 +0: mfspr r10, SPRN_SPRG_SCRATCH0 mfspr r11, SPRN_SPRG_SCRATCH1 mfspr r12, SPRN_SPRG_SCRATCH2 rfi + patch_site 0b, patch__itlbmiss_exit_2 #endif /* This is the procedure to calculate the data EA for buggy dcbx,dcbi instructions @@ -705,8 +720,10 @@ FixupDAR:/* Entry point for dcbx workaround. */ mfspr r11, SPRN_M_TW /* Get level 1 table */ blt+ 3f rlwinm r11, r10, 16, 0xfff8 -_ENTRY(FixupDAR_cmp) - cmpli cr7, r11, (PAGE_OFFSET + 0x1800000)@h + +0: cmpli cr7, r11, (PAGE_OFFSET + 0x1800000)@h + patch_site 0b, patch__fixupdar_linmem_top + /* create physical page address from effective address */ tophys(r11, r10) blt- cr7, 201f @@ -960,7 +977,7 @@ initial_mmu: ori r8, r8, MI_EVALID /* Mark it valid */ mtspr SPRN_MI_EPN, r8 li r8, MI_PS8MEG /* Set 8M byte page */ - ori r8, r8, MI_SVALID | M_APG2 /* Make it valid, APG 2 */ + ori r8, r8, MI_SVALID /* Make it valid */ mtspr SPRN_MI_TWC, r8 li r8, MI_BOOTINIT /* Create RPN for address 0 */ mtspr SPRN_MI_RPN, r8 /* Store TLB entry */ @@ -987,7 +1004,7 @@ initial_mmu: ori r8, r8, MD_EVALID /* Mark it valid */ mtspr SPRN_MD_EPN, r8 li r8, MD_PS512K | MD_GUARDED /* Set 512k byte page */ - ori r8, r8, MD_SVALID | M_APG2 /* Make it valid and accessed */ + ori r8, r8, MD_SVALID /* Make it valid */ mtspr SPRN_MD_TWC, r8 mr r8, r9 /* Create paddr for TLB */ ori r8, r8, MI_BOOTINIT|0x2 /* Inhibit cache -- Cort */ diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 4d5322cfad25..96f34730010f 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -590,12 +590,11 @@ void flush_all_to_thread(struct task_struct *tsk) if (tsk->thread.regs) { preempt_disable(); BUG_ON(tsk != current); - save_all(tsk); - #ifdef CONFIG_SPE if (tsk->thread.regs->msr & MSR_SPE) tsk->thread.spefscr = mfspr(SPRN_SPEFSCR); #endif + save_all(tsk); preempt_enable(); } diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index bf8def2159c3..d65b961661fb 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2337,8 +2337,7 @@ static void kvmppc_set_timer(struct kvm_vcpu *vcpu) kvmppc_core_prepare_to_enter(vcpu); return; } - dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC - / tb_ticks_per_sec; + dec_nsec = tb_to_ns(vcpu->arch.dec_expires - now); hrtimer_start(&vcpu->arch.dec_timer, dec_nsec, HRTIMER_MODE_REL); vcpu->arch.timer_running = 1; } diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index fa888bfc347e..9f5b8c01c4e1 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -61,11 +61,10 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu) dec_time = vcpu->arch.dec; /* - * Guest timebase ticks at the same frequency as host decrementer. - * So use the host decrementer calculations for decrementer emulation. + * Guest timebase ticks at the same frequency as host timebase. + * So use the host timebase calculations for decrementer emulation. */ - dec_time = dec_time << decrementer_clockevent.shift; - do_div(dec_time, decrementer_clockevent.mult); + dec_time = tb_to_ns(dec_time); dec_nsec = do_div(dec_time, NSEC_PER_SEC); hrtimer_start(&vcpu->arch.dec_timer, ktime_set(dec_time, dec_nsec), HRTIMER_MODE_REL); diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c index 36484a2ef915..01b7f5107c3a 100644 --- a/arch/powerpc/mm/8xx_mmu.c +++ b/arch/powerpc/mm/8xx_mmu.c @@ -13,6 +13,7 @@ */ #include <linux/memblock.h> +#include <linux/mmu_context.h> #include <asm/fixmap.h> #include <asm/code-patching.h> @@ -79,7 +80,7 @@ void __init MMU_init_hw(void) for (; i < 32 && mem >= LARGE_PAGE_SIZE_8M; i++) { mtspr(SPRN_MD_CTR, ctr | (i << 8)); mtspr(SPRN_MD_EPN, (unsigned long)__va(addr) | MD_EVALID); - mtspr(SPRN_MD_TWC, MD_PS8MEG | MD_SVALID | M_APG2); + mtspr(SPRN_MD_TWC, MD_PS8MEG | MD_SVALID); mtspr(SPRN_MD_RPN, addr | flags | _PAGE_PRESENT); addr += LARGE_PAGE_SIZE_8M; mem -= LARGE_PAGE_SIZE_8M; @@ -97,22 +98,13 @@ static void __init mmu_mapin_immr(void) map_kernel_page(v + offset, p + offset, PAGE_KERNEL_NCG); } -/* Address of instructions to patch */ -#ifndef CONFIG_PIN_TLB_IMMR -extern unsigned int DTLBMiss_jmp; -#endif -extern unsigned int DTLBMiss_cmp, FixupDAR_cmp; -#ifndef CONFIG_PIN_TLB_TEXT -extern unsigned int ITLBMiss_cmp; -#endif - -static void __init mmu_patch_cmp_limit(unsigned int *addr, unsigned long mapped) +static void __init mmu_patch_cmp_limit(s32 *site, unsigned long mapped) { - unsigned int instr = *addr; + unsigned int instr = *(unsigned int *)patch_site_addr(site); instr &= 0xffff0000; instr |= (unsigned long)__va(mapped) >> 16; - patch_instruction(addr, instr); + patch_instruction_site(site, instr); } unsigned long __init mmu_mapin_ram(unsigned long top) @@ -123,17 +115,17 @@ unsigned long __init mmu_mapin_ram(unsigned long top) mapped = 0; mmu_mapin_immr(); #ifndef CONFIG_PIN_TLB_IMMR - patch_instruction(&DTLBMiss_jmp, PPC_INST_NOP); + patch_instruction_site(&patch__dtlbmiss_immr_jmp, PPC_INST_NOP); #endif #ifndef CONFIG_PIN_TLB_TEXT - mmu_patch_cmp_limit(&ITLBMiss_cmp, 0); + mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, 0); #endif } else { mapped = top & ~(LARGE_PAGE_SIZE_8M - 1); } - mmu_patch_cmp_limit(&DTLBMiss_cmp, mapped); - mmu_patch_cmp_limit(&FixupDAR_cmp, mapped); + mmu_patch_cmp_limit(&patch__dtlbmiss_linmem_top, mapped); + mmu_patch_cmp_limit(&patch__fixupdar_linmem_top, mapped); /* If the size of RAM is not an exact power of two, we may not * have covered RAM in its entirety with 8 MiB diff --git a/arch/powerpc/perf/8xx-pmu.c b/arch/powerpc/perf/8xx-pmu.c index 6c0020d1c561..e38f74e9e7a4 100644 --- a/arch/powerpc/perf/8xx-pmu.c +++ b/arch/powerpc/perf/8xx-pmu.c @@ -31,9 +31,6 @@ extern unsigned long itlb_miss_counter, dtlb_miss_counter; extern atomic_t instruction_counter; -extern unsigned int itlb_miss_perf, dtlb_miss_perf; -extern unsigned int itlb_miss_exit_1, itlb_miss_exit_2; -extern unsigned int dtlb_miss_exit_1, dtlb_miss_exit_2, dtlb_miss_exit_3; static atomic_t insn_ctr_ref; static atomic_t itlb_miss_ref; @@ -103,22 +100,22 @@ static int mpc8xx_pmu_add(struct perf_event *event, int flags) break; case PERF_8xx_ID_ITLB_LOAD_MISS: if (atomic_inc_return(&itlb_miss_ref) == 1) { - unsigned long target = (unsigned long)&itlb_miss_perf; + unsigned long target = patch_site_addr(&patch__itlbmiss_perf); - patch_branch(&itlb_miss_exit_1, target, 0); + patch_branch_site(&patch__itlbmiss_exit_1, target, 0); #ifndef CONFIG_PIN_TLB_TEXT - patch_branch(&itlb_miss_exit_2, target, 0); + patch_branch_site(&patch__itlbmiss_exit_2, target, 0); #endif } val = itlb_miss_counter; break; case PERF_8xx_ID_DTLB_LOAD_MISS: if (atomic_inc_return(&dtlb_miss_ref) == 1) { - unsigned long target = (unsigned long)&dtlb_miss_perf; + unsigned long target = patch_site_addr(&patch__dtlbmiss_perf); - patch_branch(&dtlb_miss_exit_1, target, 0); - patch_branch(&dtlb_miss_exit_2, target, 0); - patch_branch(&dtlb_miss_exit_3, target, 0); + patch_branch_site(&patch__dtlbmiss_exit_1, target, 0); + patch_branch_site(&patch__dtlbmiss_exit_2, target, 0); + patch_branch_site(&patch__dtlbmiss_exit_3, target, 0); } val = dtlb_miss_counter; break; @@ -180,17 +177,17 @@ static void mpc8xx_pmu_del(struct perf_event *event, int flags) break; case PERF_8xx_ID_ITLB_LOAD_MISS: if (atomic_dec_return(&itlb_miss_ref) == 0) { - patch_instruction(&itlb_miss_exit_1, insn); + patch_instruction_site(&patch__itlbmiss_exit_1, insn); #ifndef CONFIG_PIN_TLB_TEXT - patch_instruction(&itlb_miss_exit_2, insn); + patch_instruction_site(&patch__itlbmiss_exit_2, insn); #endif } break; case PERF_8xx_ID_DTLB_LOAD_MISS: if (atomic_dec_return(&dtlb_miss_ref) == 0) { - patch_instruction(&dtlb_miss_exit_1, insn); - patch_instruction(&dtlb_miss_exit_2, insn); - patch_instruction(&dtlb_miss_exit_3, insn); + patch_instruction_site(&patch__dtlbmiss_exit_1, insn); + patch_instruction_site(&patch__dtlbmiss_exit_2, insn); + patch_instruction_site(&patch__dtlbmiss_exit_3, insn); } break; } diff --git a/arch/powerpc/platforms/40x/Kconfig b/arch/powerpc/platforms/40x/Kconfig index 2a9d66254ffc..5326ece36120 100644 --- a/arch/powerpc/platforms/40x/Kconfig +++ b/arch/powerpc/platforms/40x/Kconfig @@ -29,6 +29,7 @@ config KILAUEA select 405EX select PPC40x_SIMPLE select PPC4xx_PCI_EXPRESS + select PCI select PCI_MSI select PPC4xx_MSI help diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig index f024efd5a4c2..9a85d350b1b6 100644 --- a/arch/powerpc/platforms/44x/Kconfig +++ b/arch/powerpc/platforms/44x/Kconfig @@ -21,6 +21,7 @@ config BLUESTONE depends on 44x select PPC44x_SIMPLE select APM821xx + select PCI select PCI_MSI select PPC4xx_MSI select PPC4xx_PCI_EXPRESS @@ -200,6 +201,7 @@ config AKEBONO select SWIOTLB select 476FPE select PPC4xx_PCI_EXPRESS + select PCI select PCI_MSI select PPC4xx_HSTA_MSI select I2C diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c index 8bd590af488a..794487313cc8 100644 --- a/arch/powerpc/platforms/pseries/lparcfg.c +++ b/arch/powerpc/platforms/pseries/lparcfg.c @@ -26,6 +26,7 @@ #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/uaccess.h> +#include <linux/hugetlb.h> #include <asm/lppaca.h> #include <asm/hvcall.h> #include <asm/firmware.h> @@ -36,6 +37,7 @@ #include <asm/vio.h> #include <asm/mmu.h> #include <asm/machdep.h> +#include <asm/drmem.h> #include "pseries.h" @@ -433,6 +435,16 @@ static void parse_em_data(struct seq_file *m) seq_printf(m, "power_mode_data=%016lx\n", retbuf[0]); } +static void maxmem_data(struct seq_file *m) +{ + unsigned long maxmem = 0; + + maxmem += drmem_info->n_lmbs * drmem_info->lmb_size; + maxmem += hugetlb_total_pages() * PAGE_SIZE; + + seq_printf(m, "MaxMem=%ld\n", maxmem); +} + static int pseries_lparcfg_data(struct seq_file *m, void *v) { int partition_potential_processors; @@ -491,6 +503,7 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v) seq_printf(m, "slb_size=%d\n", mmu_slb_size); #endif parse_em_data(m); + maxmem_data(m); return 0; } diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile index 69e7fb47bcaa..878f9c1d3615 100644 --- a/arch/powerpc/xmon/Makefile +++ b/arch/powerpc/xmon/Makefile @@ -11,6 +11,12 @@ UBSAN_SANITIZE := n ORIG_CFLAGS := $(KBUILD_CFLAGS) KBUILD_CFLAGS = $(subst $(CC_FLAGS_FTRACE),,$(ORIG_CFLAGS)) +ifdef CONFIG_CC_IS_CLANG +# clang stores addresses on the stack causing the frame size to blow +# out. See https://github.com/ClangBuiltLinux/linux/issues/252 +KBUILD_CFLAGS += -Wframe-larger-than=4096 +endif + ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) obj-y += xmon.o nonstdio.o spr_access.o diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index 36473d7dbaac..07fa9ea75fea 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -1,6 +1,3 @@ -CONFIG_SMP=y -CONFIG_PCI=y -CONFIG_PCIE_XILINX=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_IKCONFIG=y @@ -11,10 +8,15 @@ CONFIG_CFS_BANDWIDTH=y CONFIG_CGROUP_BPF=y CONFIG_NAMESPACES=y CONFIG_USER_NS=y +CONFIG_CHECKPOINT_RESTORE=y CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y -CONFIG_CHECKPOINT_RESTORE=y CONFIG_BPF_SYSCALL=y +CONFIG_SMP=y +CONFIG_PCI=y +CONFIG_PCIE_XILINX=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -59,6 +61,7 @@ CONFIG_USB_OHCI_HCD_PLATFORM=y CONFIG_USB_STORAGE=y CONFIG_USB_UAS=y CONFIG_VIRTIO_MMIO=y +CONFIG_SIFIVE_PLIC=y CONFIG_RAS=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y @@ -72,8 +75,5 @@ CONFIG_NFS_V4=y CONFIG_NFS_V4_1=y CONFIG_NFS_V4_2=y CONFIG_ROOT_NFS=y -# CONFIG_RCU_TRACE is not set CONFIG_CRYPTO_USER_API_HASH=y -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -CONFIG_SIFIVE_PLIC=y +# CONFIG_RCU_TRACE is not set diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index d9a7916ff0ab..9fe5952d117d 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -642,7 +642,7 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) uint64_t serial_nr; rcu_read_lock(); - serial_nr = __bio_blkcg(bio)->css.serial_nr; + serial_nr = bio_blkcg(bio)->css.serial_nr; /* * Check whether blkcg has changed. The condition may trigger @@ -651,7 +651,7 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr)) goto out; - bfqg = __bfq_bic_change_cgroup(bfqd, bic, __bio_blkcg(bio)); + bfqg = __bfq_bic_change_cgroup(bfqd, bic, bio_blkcg(bio)); /* * Update blkg_path for bfq_log_* functions. We cache this * path, and update it here, for the following diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 6075100f03a5..3a27d31fcda6 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -4384,7 +4384,7 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, rcu_read_lock(); - bfqg = bfq_find_set_group(bfqd, __bio_blkcg(bio)); + bfqg = bfq_find_set_group(bfqd, bio_blkcg(bio)); if (!bfqg) { bfqq = &bfqd->oom_bfqq; goto out; diff --git a/block/bio.c b/block/bio.c index c27f77befbac..d5368a445561 100644 --- a/block/bio.c +++ b/block/bio.c @@ -609,9 +609,7 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src) bio->bi_iter = bio_src->bi_iter; bio->bi_io_vec = bio_src->bi_io_vec; - bio_clone_blkg_association(bio, bio_src); - - blkcg_bio_issue_init(bio); + bio_clone_blkcg_association(bio, bio_src); } EXPORT_SYMBOL(__bio_clone_fast); @@ -1956,151 +1954,69 @@ EXPORT_SYMBOL(bioset_init_from_src); #ifdef CONFIG_BLK_CGROUP -/** - * bio_associate_blkg - associate a bio with the a blkg - * @bio: target bio - * @blkg: the blkg to associate - * - * This tries to associate @bio with the specified blkg. Association failure - * is handled by walking up the blkg tree. Therefore, the blkg associated can - * be anything between @blkg and the root_blkg. This situation only happens - * when a cgroup is dying and then the remaining bios will spill to the closest - * alive blkg. - * - * A reference will be taken on the @blkg and will be released when @bio is - * freed. - */ -int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg) -{ - if (unlikely(bio->bi_blkg)) - return -EBUSY; - bio->bi_blkg = blkg_tryget_closest(blkg); - return 0; -} - -/** - * __bio_associate_blkg_from_css - internal blkg association function - * - * This in the core association function that all association paths rely on. - * A blkg reference is taken which is released upon freeing of the bio. - */ -static int __bio_associate_blkg_from_css(struct bio *bio, - struct cgroup_subsys_state *css) -{ - struct request_queue *q = bio->bi_disk->queue; - struct blkcg_gq *blkg; - int ret; - - rcu_read_lock(); - - if (!css || !css->parent) - blkg = q->root_blkg; - else - blkg = blkg_lookup_create(css_to_blkcg(css), q); - - ret = bio_associate_blkg(bio, blkg); - - rcu_read_unlock(); - return ret; -} - -/** - * bio_associate_blkg_from_css - associate a bio with a specified css - * @bio: target bio - * @css: target css - * - * Associate @bio with the blkg found by combining the css's blkg and the - * request_queue of the @bio. This falls back to the queue's root_blkg if - * the association fails with the css. - */ -int bio_associate_blkg_from_css(struct bio *bio, - struct cgroup_subsys_state *css) -{ - if (unlikely(bio->bi_blkg)) - return -EBUSY; - return __bio_associate_blkg_from_css(bio, css); -} -EXPORT_SYMBOL_GPL(bio_associate_blkg_from_css); - #ifdef CONFIG_MEMCG /** - * bio_associate_blkg_from_page - associate a bio with the page's blkg + * bio_associate_blkcg_from_page - associate a bio with the page's blkcg * @bio: target bio * @page: the page to lookup the blkcg from * - * Associate @bio with the blkg from @page's owning memcg and the respective - * request_queue. If cgroup_e_css returns NULL, fall back to the queue's - * root_blkg. - * - * Note: this must be called after bio has an associated device. + * Associate @bio with the blkcg from @page's owning memcg. This works like + * every other associate function wrt references. */ -int bio_associate_blkg_from_page(struct bio *bio, struct page *page) +int bio_associate_blkcg_from_page(struct bio *bio, struct page *page) { - struct cgroup_subsys_state *css; - int ret; + struct cgroup_subsys_state *blkcg_css; - if (unlikely(bio->bi_blkg)) + if (unlikely(bio->bi_css)) return -EBUSY; if (!page->mem_cgroup) return 0; - - rcu_read_lock(); - - css = cgroup_e_css(page->mem_cgroup->css.cgroup, &io_cgrp_subsys); - - ret = __bio_associate_blkg_from_css(bio, css); - - rcu_read_unlock(); - return ret; + blkcg_css = cgroup_get_e_css(page->mem_cgroup->css.cgroup, + &io_cgrp_subsys); + bio->bi_css = blkcg_css; + return 0; } #endif /* CONFIG_MEMCG */ /** - * bio_associate_create_blkg - associate a bio with a blkg from q - * @q: request_queue where bio is going + * bio_associate_blkcg - associate a bio with the specified blkcg * @bio: target bio + * @blkcg_css: css of the blkcg to associate + * + * Associate @bio with the blkcg specified by @blkcg_css. Block layer will + * treat @bio as if it were issued by a task which belongs to the blkcg. * - * Associate @bio with the blkg found from the bio's css and the request_queue. - * If one is not found, bio_lookup_blkg creates the blkg. This falls back to - * the queue's root_blkg if association fails. + * This function takes an extra reference of @blkcg_css which will be put + * when @bio is released. The caller must own @bio and is responsible for + * synchronizing calls to this function. */ -int bio_associate_create_blkg(struct request_queue *q, struct bio *bio) +int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css) { - struct cgroup_subsys_state *css; - int ret = 0; - - /* someone has already associated this bio with a blkg */ - if (bio->bi_blkg) - return ret; - - rcu_read_lock(); - - css = blkcg_css(); - - ret = __bio_associate_blkg_from_css(bio, css); - - rcu_read_unlock(); - return ret; + if (unlikely(bio->bi_css)) + return -EBUSY; + css_get(blkcg_css); + bio->bi_css = blkcg_css; + return 0; } +EXPORT_SYMBOL_GPL(bio_associate_blkcg); /** - * bio_reassociate_blkg - reassociate a bio with a blkg from q - * @q: request_queue where bio is going + * bio_associate_blkg - associate a bio with the specified blkg * @bio: target bio + * @blkg: the blkg to associate * - * When submitting a bio, multiple recursive calls to make_request() may occur. - * This causes the initial associate done in blkcg_bio_issue_check() to be - * incorrect and reference the prior request_queue. This performs reassociation - * when this situation happens. + * Associate @bio with the blkg specified by @blkg. This is the queue specific + * blkcg information associated with the @bio, a reference will be taken on the + * @blkg and will be freed when the bio is freed. */ -int bio_reassociate_blkg(struct request_queue *q, struct bio *bio) +int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg) { - if (bio->bi_blkg) { - blkg_put(bio->bi_blkg); - bio->bi_blkg = NULL; - } - - return bio_associate_create_blkg(q, bio); + if (unlikely(bio->bi_blkg)) + return -EBUSY; + if (!blkg_try_get(blkg)) + return -ENODEV; + bio->bi_blkg = blkg; + return 0; } /** @@ -2113,6 +2029,10 @@ void bio_disassociate_task(struct bio *bio) put_io_context(bio->bi_ioc); bio->bi_ioc = NULL; } + if (bio->bi_css) { + css_put(bio->bi_css); + bio->bi_css = NULL; + } if (bio->bi_blkg) { blkg_put(bio->bi_blkg); bio->bi_blkg = NULL; @@ -2120,16 +2040,16 @@ void bio_disassociate_task(struct bio *bio) } /** - * bio_clone_blkg_association - clone blkg association from src to dst bio + * bio_clone_blkcg_association - clone blkcg association from src to dst bio * @dst: destination bio * @src: source bio */ -void bio_clone_blkg_association(struct bio *dst, struct bio *src) +void bio_clone_blkcg_association(struct bio *dst, struct bio *src) { - if (src->bi_blkg) - bio_associate_blkg(dst, src->bi_blkg); + if (src->bi_css) + WARN_ON(bio_associate_blkcg(dst, src->bi_css)); } -EXPORT_SYMBOL_GPL(bio_clone_blkg_association); +EXPORT_SYMBOL_GPL(bio_clone_blkcg_association); #endif /* CONFIG_BLK_CGROUP */ static void __init biovec_init_slabs(void) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 992da5592c6e..c630e02836a8 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -84,37 +84,6 @@ static void blkg_free(struct blkcg_gq *blkg) kfree(blkg); } -static void __blkg_release(struct rcu_head *rcu) -{ - struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head); - - percpu_ref_exit(&blkg->refcnt); - - /* release the blkcg and parent blkg refs this blkg has been holding */ - css_put(&blkg->blkcg->css); - if (blkg->parent) - blkg_put(blkg->parent); - - wb_congested_put(blkg->wb_congested); - - blkg_free(blkg); -} - -/* - * A group is RCU protected, but having an rcu lock does not mean that one - * can access all the fields of blkg and assume these are valid. For - * example, don't try to follow throtl_data and request queue links. - * - * Having a reference to blkg under an rcu allows accesses to only values - * local to groups like group stats and group rate limits. - */ -static void blkg_release(struct percpu_ref *ref) -{ - struct blkcg_gq *blkg = container_of(ref, struct blkcg_gq, refcnt); - - call_rcu(&blkg->rcu_head, __blkg_release); -} - /** * blkg_alloc - allocate a blkg * @blkcg: block cgroup the new blkg is associated with @@ -141,6 +110,7 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q, blkg->q = q; INIT_LIST_HEAD(&blkg->q_node); blkg->blkcg = blkcg; + atomic_set(&blkg->refcnt, 1); /* root blkg uses @q->root_rl, init rl only for !root blkgs */ if (blkcg != &blkcg_root) { @@ -247,11 +217,6 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, blkg_get(blkg->parent); } - ret = percpu_ref_init(&blkg->refcnt, blkg_release, 0, - GFP_NOWAIT | __GFP_NOWARN); - if (ret) - goto err_cancel_ref; - /* invoke per-policy init */ for (i = 0; i < BLKCG_MAX_POLS; i++) { struct blkcg_policy *pol = blkcg_policy[i]; @@ -284,8 +249,6 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, blkg_put(blkg); return ERR_PTR(ret); -err_cancel_ref: - percpu_ref_exit(&blkg->refcnt); err_put_congested: wb_congested_put(wb_congested); err_put_css: @@ -296,7 +259,7 @@ err_free_blkg: } /** - * __blkg_lookup_create - lookup blkg, try to create one if not there + * blkg_lookup_create - lookup blkg, try to create one if not there * @blkcg: blkcg of interest * @q: request_queue of interest * @@ -305,11 +268,12 @@ err_free_blkg: * that all non-root blkg's have access to the parent blkg. This function * should be called under RCU read lock and @q->queue_lock. * - * Returns the blkg or the closest blkg if blkg_create fails as it walks - * down from root. + * Returns pointer to the looked up or created blkg on success, ERR_PTR() + * value on error. If @q is dead, returns ERR_PTR(-EINVAL). If @q is not + * dead and bypassing, returns ERR_PTR(-EBUSY). */ -struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg, - struct request_queue *q) +struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, + struct request_queue *q) { struct blkcg_gq *blkg; @@ -321,7 +285,7 @@ struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg, * we shouldn't allow anything to go through for a bypassing queue. */ if (unlikely(blk_queue_bypass(q))) - return q->root_blkg; + return ERR_PTR(blk_queue_dying(q) ? -ENODEV : -EBUSY); blkg = __blkg_lookup(blkcg, q, true); if (blkg) @@ -329,58 +293,23 @@ struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg, /* * Create blkgs walking down from blkcg_root to @blkcg, so that all - * non-root blkgs have access to their parents. Returns the closest - * blkg to the intended blkg should blkg_create() fail. + * non-root blkgs have access to their parents. */ while (true) { struct blkcg *pos = blkcg; struct blkcg *parent = blkcg_parent(blkcg); - struct blkcg_gq *ret_blkg = q->root_blkg; - - while (parent) { - blkg = __blkg_lookup(parent, q, false); - if (blkg) { - /* remember closest blkg */ - ret_blkg = blkg; - break; - } + + while (parent && !__blkg_lookup(parent, q, false)) { pos = parent; parent = blkcg_parent(parent); } blkg = blkg_create(pos, q, NULL); - if (IS_ERR(blkg)) - return ret_blkg; - if (pos == blkcg) + if (pos == blkcg || IS_ERR(blkg)) return blkg; } } -/** - * blkg_lookup_create - find or create a blkg - * @blkcg: target block cgroup - * @q: target request_queue - * - * This looks up or creates the blkg representing the unique pair - * of the blkcg and the request_queue. - */ -struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, - struct request_queue *q) -{ - struct blkcg_gq *blkg = blkg_lookup(blkcg, q); - unsigned long flags; - - if (unlikely(!blkg)) { - spin_lock_irqsave(q->queue_lock, flags); - - blkg = __blkg_lookup_create(blkcg, q); - - spin_unlock_irqrestore(q->queue_lock, flags); - } - - return blkg; -} - static void blkg_destroy(struct blkcg_gq *blkg) { struct blkcg *blkcg = blkg->blkcg; @@ -424,7 +353,7 @@ static void blkg_destroy(struct blkcg_gq *blkg) * Put the reference taken at the time of creation so that when all * queues are gone, group can be destroyed. */ - percpu_ref_kill(&blkg->refcnt); + blkg_put(blkg); } /** @@ -452,6 +381,29 @@ static void blkg_destroy_all(struct request_queue *q) } /* + * A group is RCU protected, but having an rcu lock does not mean that one + * can access all the fields of blkg and assume these are valid. For + * example, don't try to follow throtl_data and request queue links. + * + * Having a reference to blkg under an rcu allows accesses to only values + * local to groups like group stats and group rate limits. + */ +void __blkg_release_rcu(struct rcu_head *rcu_head) +{ + struct blkcg_gq *blkg = container_of(rcu_head, struct blkcg_gq, rcu_head); + + /* release the blkcg and parent blkg refs this blkg has been holding */ + css_put(&blkg->blkcg->css); + if (blkg->parent) + blkg_put(blkg->parent); + + wb_congested_put(blkg->wb_congested); + + blkg_free(blkg); +} +EXPORT_SYMBOL_GPL(__blkg_release_rcu); + +/* * The next function used by blk_queue_for_each_rl(). It's a bit tricky * because the root blkg uses @q->root_rl instead of its own rl. */ @@ -1796,7 +1748,8 @@ void blkcg_maybe_throttle_current(void) blkg = blkg_lookup(blkcg, q); if (!blkg) goto out; - if (!blkg_tryget(blkg)) + blkg = blkg_try_get(blkg); + if (!blkg) goto out; rcu_read_unlock(); diff --git a/block/blk-core.c b/block/blk-core.c index bc6ea87d10e0..ce12515f9b9b 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -785,6 +785,9 @@ void blk_cleanup_queue(struct request_queue *q) * prevent that q->request_fn() gets invoked after draining finished. */ blk_freeze_queue(q); + + rq_qos_exit(q); + spin_lock_irq(lock); queue_flag_set(QUEUE_FLAG_DEAD, q); spin_unlock_irq(lock); @@ -2432,7 +2435,6 @@ blk_qc_t generic_make_request(struct bio *bio) if (q) blk_queue_exit(q); q = bio->bi_disk->queue; - bio_reassociate_blkg(q, bio); flags = 0; if (bio->bi_opf & REQ_NOWAIT) flags = BLK_MQ_REQ_NOWAIT; diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index 28f80d227528..38c35c32aff2 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -482,12 +482,34 @@ static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio, spinlock_t *lock) { struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos); - struct blkcg_gq *blkg = bio->bi_blkg; + struct blkcg *blkcg; + struct blkcg_gq *blkg; + struct request_queue *q = rqos->q; bool issue_as_root = bio_issue_as_root_blkg(bio); if (!blk_iolatency_enabled(blkiolat)) return; + rcu_read_lock(); + blkcg = bio_blkcg(bio); + bio_associate_blkcg(bio, &blkcg->css); + blkg = blkg_lookup(blkcg, q); + if (unlikely(!blkg)) { + if (!lock) + spin_lock_irq(q->queue_lock); + blkg = blkg_lookup_create(blkcg, q); + if (IS_ERR(blkg)) + blkg = NULL; + if (!lock) + spin_unlock_irq(q->queue_lock); + } + if (!blkg) + goto out; + + bio_issue_init(&bio->bi_issue, bio_sectors(bio)); + bio_associate_blkg(bio, blkg); +out: + rcu_read_unlock(); while (blkg && blkg->parent) { struct iolatency_grp *iolat = blkg_to_lat(blkg); if (!iolat) { @@ -708,7 +730,7 @@ static void blkiolatency_timer_fn(struct timer_list *t) * We could be exiting, don't access the pd unless we have a * ref on the blkg. */ - if (!blkg_tryget(blkg)) + if (!blkg_try_get(blkg)) continue; iolat = blkg_to_lat(blkg); diff --git a/block/blk-merge.c b/block/blk-merge.c index 42a46744c11b..6b5ad275ed56 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -714,6 +714,31 @@ static void blk_account_io_merge(struct request *req) part_stat_unlock(); } } +/* + * Two cases of handling DISCARD merge: + * If max_discard_segments > 1, the driver takes every bio + * as a range and send them to controller together. The ranges + * needn't to be contiguous. + * Otherwise, the bios/requests will be handled as same as + * others which should be contiguous. + */ +static inline bool blk_discard_mergable(struct request *req) +{ + if (req_op(req) == REQ_OP_DISCARD && + queue_max_discard_segments(req->q) > 1) + return true; + return false; +} + +enum elv_merge blk_try_req_merge(struct request *req, struct request *next) +{ + if (blk_discard_mergable(req)) + return ELEVATOR_DISCARD_MERGE; + else if (blk_rq_pos(req) + blk_rq_sectors(req) == blk_rq_pos(next)) + return ELEVATOR_BACK_MERGE; + + return ELEVATOR_NO_MERGE; +} /* * For non-mq, this has to be called with the request spinlock acquired. @@ -731,12 +756,6 @@ static struct request *attempt_merge(struct request_queue *q, if (req_op(req) != req_op(next)) return NULL; - /* - * not contiguous - */ - if (blk_rq_pos(req) + blk_rq_sectors(req) != blk_rq_pos(next)) - return NULL; - if (rq_data_dir(req) != rq_data_dir(next) || req->rq_disk != next->rq_disk || req_no_special_merge(next)) @@ -760,11 +779,19 @@ static struct request *attempt_merge(struct request_queue *q, * counts here. Handle DISCARDs separately, as they * have separate settings. */ - if (req_op(req) == REQ_OP_DISCARD) { + + switch (blk_try_req_merge(req, next)) { + case ELEVATOR_DISCARD_MERGE: if (!req_attempt_discard_merge(q, req, next)) return NULL; - } else if (!ll_merge_requests_fn(q, req, next)) + break; + case ELEVATOR_BACK_MERGE: + if (!ll_merge_requests_fn(q, req, next)) + return NULL; + break; + default: return NULL; + } /* * If failfast settings disagree or any of the two is already @@ -888,8 +915,7 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio) enum elv_merge blk_try_merge(struct request *rq, struct bio *bio) { - if (req_op(rq) == REQ_OP_DISCARD && - queue_max_discard_segments(rq->q) > 1) + if (blk_discard_mergable(rq)) return ELEVATOR_DISCARD_MERGE; else if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector) return ELEVATOR_BACK_MERGE; diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 0641533597f1..844a454a7b3a 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -1007,8 +1007,6 @@ void blk_unregister_queue(struct gendisk *disk) kobject_del(&q->kobj); blk_trace_remove_sysfs(disk_to_dev(disk)); - rq_qos_exit(q); - mutex_lock(&q->sysfs_lock); if (q->request_fn || (q->mq_ops && q->elevator)) elv_unregister_queue(q); diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 4bda70e8db48..db1a3a2ae006 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -2115,11 +2115,21 @@ static inline void throtl_update_latency_buckets(struct throtl_data *td) } #endif +static void blk_throtl_assoc_bio(struct throtl_grp *tg, struct bio *bio) +{ +#ifdef CONFIG_BLK_DEV_THROTTLING_LOW + /* fallback to root_blkg if we fail to get a blkg ref */ + if (bio->bi_css && (bio_associate_blkg(bio, tg_to_blkg(tg)) == -ENODEV)) + bio_associate_blkg(bio, bio->bi_disk->queue->root_blkg); + bio_issue_init(&bio->bi_issue, bio_sectors(bio)); +#endif +} + bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, struct bio *bio) { struct throtl_qnode *qn = NULL; - struct throtl_grp *tg = blkg_to_tg(blkg); + struct throtl_grp *tg = blkg_to_tg(blkg ?: q->root_blkg); struct throtl_service_queue *sq; bool rw = bio_data_dir(bio); bool throttled = false; @@ -2138,6 +2148,7 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, if (unlikely(blk_queue_bypass(q))) goto out_unlock; + blk_throtl_assoc_bio(tg, bio); blk_throtl_update_idletime(tg); sq = &tg->service_queue; diff --git a/block/bounce.c b/block/bounce.c index cf49fe02f65c..36869afc258c 100644 --- a/block/bounce.c +++ b/block/bounce.c @@ -276,9 +276,7 @@ static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask, } } - bio_clone_blkg_association(bio, bio_src); - - blkcg_bio_issue_init(bio); + bio_clone_blkcg_association(bio, bio_src); return bio; } diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 6a3d87dd3c1a..ed41aa978c4a 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -3759,7 +3759,7 @@ static void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio) uint64_t serial_nr; rcu_read_lock(); - serial_nr = __bio_blkcg(bio)->css.serial_nr; + serial_nr = bio_blkcg(bio)->css.serial_nr; rcu_read_unlock(); /* @@ -3824,7 +3824,7 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic, struct cfq_group *cfqg; rcu_read_lock(); - cfqg = cfq_lookup_cfqg(cfqd, __bio_blkcg(bio)); + cfqg = cfq_lookup_cfqg(cfqd, bio_blkcg(bio)); if (!cfqg) { cfqq = &cfqd->oom_cfqq; goto out; diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index a7c2673ffd36..824ae985ad93 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -126,6 +126,7 @@ int acpi_device_get_power(struct acpi_device *device, int *state) return 0; } +EXPORT_SYMBOL(acpi_device_get_power); static int acpi_dev_pm_explicit_set(struct acpi_device *adev, int state) { diff --git a/drivers/block/brd.c b/drivers/block/brd.c index df8103dd40ac..c18586fccb6f 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -396,15 +396,14 @@ static struct brd_device *brd_alloc(int i) disk->first_minor = i * max_part; disk->fops = &brd_fops; disk->private_data = brd; - disk->queue = brd->brd_queue; disk->flags = GENHD_FL_EXT_DEVT; sprintf(disk->disk_name, "ram%d", i); set_capacity(disk, rd_size * 2); - disk->queue->backing_dev_info->capabilities |= BDI_CAP_SYNCHRONOUS_IO; + brd->brd_queue->backing_dev_info->capabilities |= BDI_CAP_SYNCHRONOUS_IO; /* Tell the block layer that this is not a rotational device */ - blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); - blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue); + blk_queue_flag_set(QUEUE_FLAG_NONROT, brd->brd_queue); + blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, brd->brd_queue); return brd; @@ -436,6 +435,7 @@ static struct brd_device *brd_init_one(int i, bool *new) brd = brd_alloc(i); if (brd) { + brd->brd_disk->queue = brd->brd_queue; add_disk(brd->brd_disk); list_add_tail(&brd->brd_list, &brd_devices); } @@ -503,8 +503,14 @@ static int __init brd_init(void) /* point of no return */ - list_for_each_entry(brd, &brd_devices, brd_list) + list_for_each_entry(brd, &brd_devices, brd_list) { + /* + * associate with queue just before adding disk for + * avoiding to mess up failure path + */ + brd->brd_disk->queue = brd->brd_queue; add_disk(brd->brd_disk); + } blk_register_region(MKDEV(RAMDISK_MAJOR, 0), 1UL << MINORBITS, THIS_MODULE, brd_probe, NULL, NULL); diff --git a/drivers/block/loop.c b/drivers/block/loop.c index e6273ae85246..cb0cc8685076 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -77,7 +77,6 @@ #include <linux/falloc.h> #include <linux/uio.h> #include <linux/ioprio.h> -#include <linux/blk-cgroup.h> #include "loop.h" @@ -1760,8 +1759,8 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx, /* always use the first bio's css */ #ifdef CONFIG_BLK_CGROUP - if (cmd->use_aio && rq->bio && rq->bio->bi_blkg) { - cmd->css = &bio_blkcg(rq->bio)->css; + if (cmd->use_aio && rq->bio && rq->bio->bi_css) { + cmd->css = rq->bio->bi_css; css_get(cmd->css); } else #endif diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index dfc8de6ce525..a7daa8acbab3 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -1942,8 +1942,8 @@ static int exec_drive_taskfile(struct driver_data *dd, dev_warn(&dd->pdev->dev, "data movement but " "sect_count is 0\n"); - err = -EINVAL; - goto abort; + err = -EINVAL; + goto abort; } } } diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index df9467eef32a..41c9ccdd20d6 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -234,6 +234,7 @@ config EDAC_SKX depends on PCI && X86_64 && X86_MCE_INTEL && PCI_MMCONFIG depends on ACPI_NFIT || !ACPI_NFIT # if ACPI_NFIT=m, EDAC_SKX can't be y select DMI + select ACPI_ADXL if ACPI help Support for error detection and correction the Intel Skylake server Integrated Memory Controllers. If your diff --git a/drivers/edac/skx_edac.c b/drivers/edac/skx_edac.c index dd209e0dd9ab..a99ea61dad32 100644 --- a/drivers/edac/skx_edac.c +++ b/drivers/edac/skx_edac.c @@ -26,6 +26,7 @@ #include <linux/bitmap.h> #include <linux/math64.h> #include <linux/mod_devicetable.h> +#include <linux/adxl.h> #include <acpi/nfit.h> #include <asm/cpu_device_id.h> #include <asm/intel-family.h> @@ -35,6 +36,7 @@ #include "edac_module.h" #define EDAC_MOD_STR "skx_edac" +#define MSG_SIZE 1024 /* * Debug macros @@ -54,6 +56,29 @@ static LIST_HEAD(skx_edac_list); static u64 skx_tolm, skx_tohm; +static char *skx_msg; +static unsigned int nvdimm_count; + +enum { + INDEX_SOCKET, + INDEX_MEMCTRL, + INDEX_CHANNEL, + INDEX_DIMM, + INDEX_MAX +}; + +static const char * const component_names[] = { + [INDEX_SOCKET] = "ProcessorSocketId", + [INDEX_MEMCTRL] = "MemoryControllerId", + [INDEX_CHANNEL] = "ChannelId", + [INDEX_DIMM] = "DimmSlotId", +}; + +static int component_indices[ARRAY_SIZE(component_names)]; +static int adxl_component_count; +static const char * const *adxl_component_names; +static u64 *adxl_values; +static char *adxl_msg; #define NUM_IMC 2 /* memory controllers per socket */ #define NUM_CHANNELS 3 /* channels per memory controller */ @@ -393,6 +418,8 @@ static int get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc, u16 flags; u64 size = 0; + nvdimm_count++; + dev_handle = ACPI_NFIT_BUILD_DEVICE_HANDLE(dimmno, chan, imc->lmc, imc->src_id, 0); @@ -941,12 +968,46 @@ static void teardown_skx_debug(void) } #endif /*CONFIG_EDAC_DEBUG*/ +static bool skx_adxl_decode(struct decoded_addr *res) + +{ + int i, len = 0; + + if (res->addr >= skx_tohm || (res->addr >= skx_tolm && + res->addr < BIT_ULL(32))) { + edac_dbg(0, "Address 0x%llx out of range\n", res->addr); + return false; + } + + if (adxl_decode(res->addr, adxl_values)) { + edac_dbg(0, "Failed to decode 0x%llx\n", res->addr); + return false; + } + + res->socket = (int)adxl_values[component_indices[INDEX_SOCKET]]; + res->imc = (int)adxl_values[component_indices[INDEX_MEMCTRL]]; + res->channel = (int)adxl_values[component_indices[INDEX_CHANNEL]]; + res->dimm = (int)adxl_values[component_indices[INDEX_DIMM]]; + + for (i = 0; i < adxl_component_count; i++) { + if (adxl_values[i] == ~0x0ull) + continue; + + len += snprintf(adxl_msg + len, MSG_SIZE - len, " %s:0x%llx", + adxl_component_names[i], adxl_values[i]); + if (MSG_SIZE - len <= 0) + break; + } + + return true; +} + static void skx_mce_output_error(struct mem_ctl_info *mci, const struct mce *m, struct decoded_addr *res) { enum hw_event_mc_err_type tp_event; - char *type, *optype, msg[256]; + char *type, *optype; bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0); bool overflow = GET_BITFIELD(m->status, 62, 62); bool uncorrected_error = GET_BITFIELD(m->status, 61, 61); @@ -1007,22 +1068,47 @@ static void skx_mce_output_error(struct mem_ctl_info *mci, break; } } + if (adxl_component_count) { + snprintf(skx_msg, MSG_SIZE, "%s%s err_code:%04x:%04x %s", + overflow ? " OVERFLOW" : "", + (uncorrected_error && recoverable) ? " recoverable" : "", + mscod, errcode, adxl_msg); + } else { + snprintf(skx_msg, MSG_SIZE, + "%s%s err_code:%04x:%04x socket:%d imc:%d rank:%d bg:%d ba:%d row:%x col:%x", + overflow ? " OVERFLOW" : "", + (uncorrected_error && recoverable) ? " recoverable" : "", + mscod, errcode, + res->socket, res->imc, res->rank, + res->bank_group, res->bank_address, res->row, res->column); + } - snprintf(msg, sizeof(msg), - "%s%s err_code:%04x:%04x socket:%d imc:%d rank:%d bg:%d ba:%d row:%x col:%x", - overflow ? " OVERFLOW" : "", - (uncorrected_error && recoverable) ? " recoverable" : "", - mscod, errcode, - res->socket, res->imc, res->rank, - res->bank_group, res->bank_address, res->row, res->column); - - edac_dbg(0, "%s\n", msg); + edac_dbg(0, "%s\n", skx_msg); /* Call the helper to output message */ edac_mc_handle_error(tp_event, mci, core_err_cnt, m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0, res->channel, res->dimm, -1, - optype, msg); + optype, skx_msg); +} + +static struct mem_ctl_info *get_mci(int src_id, int lmc) +{ + struct skx_dev *d; + + if (lmc > NUM_IMC - 1) { + skx_printk(KERN_ERR, "Bad lmc %d\n", lmc); + return NULL; + } + + list_for_each_entry(d, &skx_edac_list, list) { + if (d->imc[0].src_id == src_id) + return d->imc[lmc].mci; + } + + skx_printk(KERN_ERR, "No mci for src_id %d lmc %d\n", src_id, lmc); + + return NULL; } static int skx_mce_check_error(struct notifier_block *nb, unsigned long val, @@ -1040,10 +1126,23 @@ static int skx_mce_check_error(struct notifier_block *nb, unsigned long val, if ((mce->status & 0xefff) >> 7 != 1 || !(mce->status & MCI_STATUS_ADDRV)) return NOTIFY_DONE; + memset(&res, 0, sizeof(res)); res.addr = mce->addr; - if (!skx_decode(&res)) + + if (adxl_component_count) { + if (!skx_adxl_decode(&res)) + return NOTIFY_DONE; + + mci = get_mci(res.socket, res.imc); + } else { + if (!skx_decode(&res)) + return NOTIFY_DONE; + + mci = res.dev->imc[res.imc].mci; + } + + if (!mci) return NOTIFY_DONE; - mci = res.dev->imc[res.imc].mci; if (mce->mcgstatus & MCG_STATUS_MCIP) type = "Exception"; @@ -1094,6 +1193,62 @@ static void skx_remove(void) } } +static void __init skx_adxl_get(void) +{ + const char * const *names; + int i, j; + + names = adxl_get_component_names(); + if (!names) { + skx_printk(KERN_NOTICE, "No firmware support for address translation."); + skx_printk(KERN_CONT, " Only decoding DDR4 address!\n"); + return; + } + + for (i = 0; i < INDEX_MAX; i++) { + for (j = 0; names[j]; j++) { + if (!strcmp(component_names[i], names[j])) { + component_indices[i] = j; + break; + } + } + + if (!names[j]) + goto err; + } + + adxl_component_names = names; + while (*names++) + adxl_component_count++; + + adxl_values = kcalloc(adxl_component_count, sizeof(*adxl_values), + GFP_KERNEL); + if (!adxl_values) { + adxl_component_count = 0; + return; + } + + adxl_msg = kzalloc(MSG_SIZE, GFP_KERNEL); + if (!adxl_msg) { + adxl_component_count = 0; + kfree(adxl_values); + } + + return; +err: + skx_printk(KERN_ERR, "'%s' is not matched from DSM parameters: ", + component_names[i]); + for (j = 0; names[j]; j++) + skx_printk(KERN_CONT, "%s ", names[j]); + skx_printk(KERN_CONT, "\n"); +} + +static void __exit skx_adxl_put(void) +{ + kfree(adxl_values); + kfree(adxl_msg); +} + /* * skx_init: * make sure we are running on the correct cpu model @@ -1158,6 +1313,15 @@ static int __init skx_init(void) } } + skx_msg = kzalloc(MSG_SIZE, GFP_KERNEL); + if (!skx_msg) { + rc = -ENOMEM; + goto fail; + } + + if (nvdimm_count) + skx_adxl_get(); + /* Ensure that the OPSTATE is set correctly for POLL or NMI */ opstate_init(); @@ -1176,6 +1340,9 @@ static void __exit skx_exit(void) edac_dbg(2, "\n"); mce_unregister_decode_chain(&skx_mce_dec); skx_remove(); + if (nvdimm_count) + skx_adxl_put(); + kfree(skx_msg); teardown_skx_debug(); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c index 297a5490ad8c..0a4fba196b84 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c @@ -135,7 +135,8 @@ static int acp_poweroff(struct generic_pm_domain *genpd) * 2. power off the acp tiles * 3. check and enter ulv state */ - if (adev->powerplay.pp_funcs->set_powergating_by_smu) + if (adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->set_powergating_by_smu) amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true); } return 0; @@ -517,7 +518,8 @@ static int acp_set_powergating_state(void *handle, struct amdgpu_device *adev = (struct amdgpu_device *)handle; bool enable = state == AMD_PG_STATE_GATE ? true : false; - if (adev->powerplay.pp_funcs->set_powergating_by_smu) + if (adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->set_powergating_by_smu) amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, enable); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 1e4dd09a5072..30bc345d6fdf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1493,8 +1493,6 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) } adev->powerplay.pp_feature = amdgpu_pp_feature_mask; - if (amdgpu_sriov_vf(adev)) - adev->powerplay.pp_feature &= ~PP_GFXOFF_MASK; for (i = 0; i < adev->num_ip_blocks; i++) { if ((amdgpu_ip_block_mask & (1 << i)) == 0) { @@ -1600,7 +1598,7 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev) } } - if (adev->powerplay.pp_funcs->load_firmware) { + if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->load_firmware) { r = adev->powerplay.pp_funcs->load_firmware(adev->powerplay.pp_handle); if (r) { pr_err("firmware loading failed\n"); @@ -3341,7 +3339,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, kthread_park(ring->sched.thread); - if (job && job->base.sched == &ring->sched) + if (job && job->base.sched != &ring->sched) continue; drm_sched_hw_job_reset(&ring->sched, job ? &job->base : NULL); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 28781414d71c..943dbf3c5da1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -114,8 +114,8 @@ uint amdgpu_pg_mask = 0xffffffff; uint amdgpu_sdma_phase_quantum = 32; char *amdgpu_disable_cu = NULL; char *amdgpu_virtual_display = NULL; -/* OverDrive(bit 14) disabled by default*/ -uint amdgpu_pp_feature_mask = 0xffffbfff; +/* OverDrive(bit 14),gfxoff(bit 15),stutter mode(bit 17) disabled by default*/ +uint amdgpu_pp_feature_mask = 0xfffd3fff; int amdgpu_ngg = 0; int amdgpu_prim_buf_per_se = 0; int amdgpu_pos_buf_per_se = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 790fd5408ddf..1a656b8657f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -392,7 +392,7 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable) if (!(adev->powerplay.pp_feature & PP_GFXOFF_MASK)) return; - if (!adev->powerplay.pp_funcs->set_powergating_by_smu) + if (!adev->powerplay.pp_funcs || !adev->powerplay.pp_funcs->set_powergating_by_smu) return; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 94055a485e01..59cc678de8c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -704,7 +704,10 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev, return ret; if (adev->powerplay.pp_funcs->force_clock_level) - amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask); + ret = amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask); + + if (ret) + return -EINVAL; return count; } @@ -737,7 +740,10 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev, return ret; if (adev->powerplay.pp_funcs->force_clock_level) - amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask); + ret = amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask); + + if (ret) + return -EINVAL; return count; } @@ -770,7 +776,10 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev, return ret; if (adev->powerplay.pp_funcs->force_clock_level) - amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask); + ret = amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask); + + if (ret) + return -EINVAL; return count; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 6904d794d60a..352b30409060 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -542,7 +542,8 @@ static void amdgpu_vm_pt_next_leaf(struct amdgpu_device *adev, struct amdgpu_vm_pt_cursor *cursor) { amdgpu_vm_pt_next(adev, cursor); - while (amdgpu_vm_pt_descendant(adev, cursor)); + if (cursor->pfn != ~0ll) + while (amdgpu_vm_pt_descendant(adev, cursor)); } /** @@ -3234,8 +3235,10 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) } rbtree_postorder_for_each_entry_safe(mapping, tmp, &vm->va.rb_root, rb) { + /* Don't remove the mapping here, we don't want to trigger a + * rebalance and the tree is about to be destroyed anyway. + */ list_del(&mapping->list); - amdgpu_vm_it_remove(mapping, &vm->va); kfree(mapping); } list_for_each_entry_safe(mapping, tmp, &vm->freed, list) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 3d0f277a6523..617b0c8908a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -4815,8 +4815,10 @@ static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev) if (r) goto done; - /* Test KCQs */ - for (i = 0; i < adev->gfx.num_compute_rings; i++) { + /* Test KCQs - reversing the order of rings seems to fix ring test failure + * after GPU reset + */ + for (i = adev->gfx.num_compute_rings - 1; i >= 0; i--) { ring = &adev->gfx.compute_ring[i]; ring->ready = true; r = amdgpu_ring_test_ring(ring); diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index 14649f8475f3..fd23ba1226a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -280,7 +280,7 @@ void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev, return; if (enable && adev->pg_flags & AMD_PG_SUPPORT_MMHUB) { - if (adev->powerplay.pp_funcs->set_powergating_by_smu) + if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_powergating_by_smu) amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GMC, true); } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 04fa3d972636..7a8c9172d30a 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -1366,7 +1366,8 @@ static int sdma_v4_0_hw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs->set_powergating_by_smu) + if (adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->set_powergating_by_smu) amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false); sdma_v4_0_init_golden_registers(adev); @@ -1386,7 +1387,8 @@ static int sdma_v4_0_hw_fini(void *handle) sdma_v4_0_ctx_switch_enable(adev, false); sdma_v4_0_enable(adev, false); - if (adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs->set_powergating_by_smu) + if (adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs + && adev->powerplay.pp_funcs->set_powergating_by_smu) amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, true); return 0; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index e224f23e2215..b0df6dc9a775 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1524,6 +1524,13 @@ static int amdgpu_dm_backlight_update_status(struct backlight_device *bd) { struct amdgpu_display_manager *dm = bl_get_data(bd); + /* + * PWM interperts 0 as 100% rather than 0% because of HW + * limitation for level 0.So limiting minimum brightness level + * to 1. + */ + if (bd->props.brightness < 1) + return 1; if (dc_link_set_backlight_level(dm->backlight_link, bd->props.brightness, 0, 0)) return 0; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c index 0fab64a2a915..12001a006b2d 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c @@ -101,7 +101,7 @@ bool dm_pp_apply_display_requirements( adev->pm.pm_display_cfg.displays[i].controller_id = dc_cfg->pipe_idx + 1; } - if (adev->powerplay.pp_funcs->display_configuration_change) + if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->display_configuration_change) adev->powerplay.pp_funcs->display_configuration_change( adev->powerplay.pp_handle, &adev->pm.pm_display_cfg); @@ -304,7 +304,7 @@ bool dm_pp_get_clock_levels_by_type( struct amd_pp_simple_clock_info validation_clks = { 0 }; uint32_t i; - if (adev->powerplay.pp_funcs->get_clock_by_type) { + if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_clock_by_type) { if (adev->powerplay.pp_funcs->get_clock_by_type(pp_handle, dc_to_pp_clock_type(clk_type), &pp_clks)) { /* Error in pplib. Provide default values. */ @@ -315,7 +315,7 @@ bool dm_pp_get_clock_levels_by_type( pp_to_dc_clock_levels(&pp_clks, dc_clks, clk_type); - if (adev->powerplay.pp_funcs->get_display_mode_validation_clocks) { + if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_display_mode_validation_clocks) { if (adev->powerplay.pp_funcs->get_display_mode_validation_clocks( pp_handle, &validation_clks)) { /* Error in pplib. Provide default values. */ @@ -398,6 +398,9 @@ bool dm_pp_get_clock_levels_by_type_with_voltage( struct pp_clock_levels_with_voltage pp_clk_info = {0}; const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + if (!pp_funcs || !pp_funcs->get_clock_by_type_with_voltage) + return false; + if (pp_funcs->get_clock_by_type_with_voltage(pp_handle, dc_to_pp_clock_type(clk_type), &pp_clk_info)) @@ -438,7 +441,7 @@ bool dm_pp_apply_clock_for_voltage_request( if (!pp_clock_request.clock_type) return false; - if (adev->powerplay.pp_funcs->display_clock_voltage_request) + if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->display_clock_voltage_request) ret = adev->powerplay.pp_funcs->display_clock_voltage_request( adev->powerplay.pp_handle, &pp_clock_request); @@ -455,7 +458,7 @@ bool dm_pp_get_static_clocks( struct amd_pp_clock_info pp_clk_info = {0}; int ret = 0; - if (adev->powerplay.pp_funcs->get_current_clocks) + if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_current_clocks) ret = adev->powerplay.pp_funcs->get_current_clocks( adev->powerplay.pp_handle, &pp_clk_info); @@ -505,6 +508,9 @@ void pp_rv_set_wm_ranges(struct pp_smu *pp, wm_with_clock_ranges.num_wm_dmif_sets = ranges->num_reader_wm_sets; wm_with_clock_ranges.num_wm_mcif_sets = ranges->num_writer_wm_sets; + if (!pp_funcs || !pp_funcs->set_watermarks_for_clocks_ranges) + return; + for (i = 0; i < wm_with_clock_ranges.num_wm_dmif_sets; i++) { if (ranges->reader_wm_sets[i].wm_inst > 3) wm_dce_clocks[i].wm_set_id = WM_SET_A; diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c index de190935f0a4..e3624ca24574 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c @@ -568,7 +568,7 @@ static struct input_pixel_processor *dce110_ipp_create( static const struct encoder_feature_support link_enc_feature = { .max_hdmi_deep_color = COLOR_DEPTH_121212, - .max_hdmi_pixel_clock = 594000, + .max_hdmi_pixel_clock = 300000, .flags.bits.IS_HBR2_CAPABLE = true, .flags.bits.IS_TPS3_CAPABLE = true }; diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c index e8964cae6b93..d6aa1d414320 100644 --- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c @@ -723,11 +723,14 @@ static int pp_dpm_force_clock_level(void *handle, pr_info("%s was not implemented.\n", __func__); return 0; } + + if (hwmgr->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL) { + pr_info("force clock level is for dpm manual mode only.\n"); + return -EINVAL; + } + mutex_lock(&hwmgr->smu_lock); - if (hwmgr->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) - ret = hwmgr->hwmgr_func->force_clock_level(hwmgr, type, mask); - else - ret = -EINVAL; + ret = hwmgr->hwmgr_func->force_clock_level(hwmgr, type, mask); mutex_unlock(&hwmgr->smu_lock); return ret; } @@ -963,6 +966,7 @@ static int pp_dpm_switch_power_profile(void *handle, static int pp_set_power_limit(void *handle, uint32_t limit) { struct pp_hwmgr *hwmgr = handle; + uint32_t max_power_limit; if (!hwmgr || !hwmgr->pm_en) return -EINVAL; @@ -975,7 +979,13 @@ static int pp_set_power_limit(void *handle, uint32_t limit) if (limit == 0) limit = hwmgr->default_power_limit; - if (limit > hwmgr->default_power_limit) + max_power_limit = hwmgr->default_power_limit; + if (hwmgr->od_enabled) { + max_power_limit *= (100 + hwmgr->platform_descriptor.TDPODLimit); + max_power_limit /= 100; + } + + if (limit > max_power_limit) return -EINVAL; mutex_lock(&hwmgr->smu_lock); @@ -994,8 +1004,13 @@ static int pp_get_power_limit(void *handle, uint32_t *limit, bool default_limit) mutex_lock(&hwmgr->smu_lock); - if (default_limit) + if (default_limit) { *limit = hwmgr->default_power_limit; + if (hwmgr->od_enabled) { + *limit *= (100 + hwmgr->platform_descriptor.TDPODLimit); + *limit /= 100; + } + } else *limit = hwmgr->power_limit; @@ -1303,12 +1318,12 @@ static int pp_enable_mgpu_fan_boost(void *handle) { struct pp_hwmgr *hwmgr = handle; - if (!hwmgr || !hwmgr->pm_en) + if (!hwmgr) return -EINVAL; - if (hwmgr->hwmgr_func->enable_mgpu_fan_boost == NULL) { + if (!hwmgr->pm_en || + hwmgr->hwmgr_func->enable_mgpu_fan_boost == NULL) return 0; - } mutex_lock(&hwmgr->smu_lock); hwmgr->hwmgr_func->enable_mgpu_fan_boost(hwmgr); diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index 6c99cbf51c08..ed35ec0341e6 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -3588,9 +3588,10 @@ static int smu7_find_dpm_states_clocks_in_dpm_table(struct pp_hwmgr *hwmgr, cons break; } - if (i >= sclk_table->count) + if (i >= sclk_table->count) { data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_SCLK; - else { + sclk_table->dpm_levels[i-1].value = sclk; + } else { /* TODO: Check SCLK in DAL's minimum clocks * in case DeepSleep divider update is required. */ @@ -3605,9 +3606,10 @@ static int smu7_find_dpm_states_clocks_in_dpm_table(struct pp_hwmgr *hwmgr, cons break; } - if (i >= mclk_table->count) + if (i >= mclk_table->count) { data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_MCLK; - + mclk_table->dpm_levels[i-1].value = mclk; + } if (data->display_timing.num_existing_displays != hwmgr->display_config->num_display) data->need_update_smu7_dpm_table |= DPMTABLE_UPDATE_MCLK; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c index 4714b5b59825..99a33c33a32c 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c @@ -718,7 +718,7 @@ int smu_set_watermarks_for_clocks_ranges(void *wt_table, table->WatermarkRow[1][i].MaxClock = cpu_to_le16((uint16_t) (wm_with_clock_ranges->wm_dmif_clocks_ranges[i].wm_max_dcfclk_clk_in_khz) / - 100); + 1000); table->WatermarkRow[1][i].MinUclk = cpu_to_le16((uint16_t) (wm_with_clock_ranges->wm_dmif_clocks_ranges[i].wm_min_mem_clk_in_khz) / diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c index 419a1d77d661..8c4db86bb4b7 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c @@ -1333,7 +1333,6 @@ static int vega10_setup_default_dpm_tables(struct pp_hwmgr *hwmgr) if (hwmgr->platform_descriptor.overdriveLimit.memoryClock == 0) hwmgr->platform_descriptor.overdriveLimit.memoryClock = dpm_table->dpm_levels[dpm_table->count-1].value; - vega10_init_dpm_state(&(dpm_table->dpm_state)); data->dpm_table.eclk_table.count = 0; @@ -3249,6 +3248,37 @@ static int vega10_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, static int vega10_find_dpm_states_clocks_in_dpm_table(struct pp_hwmgr *hwmgr, const void *input) { struct vega10_hwmgr *data = hwmgr->backend; + const struct phm_set_power_state_input *states = + (const struct phm_set_power_state_input *)input; + const struct vega10_power_state *vega10_ps = + cast_const_phw_vega10_power_state(states->pnew_state); + struct vega10_single_dpm_table *sclk_table = &(data->dpm_table.gfx_table); + uint32_t sclk = vega10_ps->performance_levels + [vega10_ps->performance_level_count - 1].gfx_clock; + struct vega10_single_dpm_table *mclk_table = &(data->dpm_table.mem_table); + uint32_t mclk = vega10_ps->performance_levels + [vega10_ps->performance_level_count - 1].mem_clock; + uint32_t i; + + for (i = 0; i < sclk_table->count; i++) { + if (sclk == sclk_table->dpm_levels[i].value) + break; + } + + if (i >= sclk_table->count) { + data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_SCLK; + sclk_table->dpm_levels[i-1].value = sclk; + } + + for (i = 0; i < mclk_table->count; i++) { + if (mclk == mclk_table->dpm_levels[i].value) + break; + } + + if (i >= mclk_table->count) { + data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_MCLK; + mclk_table->dpm_levels[i-1].value = mclk; + } if (data->display_timing.num_existing_displays != hwmgr->display_config->num_display) data->need_update_dpm_table |= DPMTABLE_UPDATE_MCLK; @@ -4529,11 +4559,13 @@ static int vega10_set_sclk_od(struct pp_hwmgr *hwmgr, uint32_t value) if (vega10_ps->performance_levels [vega10_ps->performance_level_count - 1].gfx_clock > - hwmgr->platform_descriptor.overdriveLimit.engineClock) + hwmgr->platform_descriptor.overdriveLimit.engineClock) { vega10_ps->performance_levels [vega10_ps->performance_level_count - 1].gfx_clock = hwmgr->platform_descriptor.overdriveLimit.engineClock; - + pr_warn("max sclk supported by vbios is %d\n", + hwmgr->platform_descriptor.overdriveLimit.engineClock); + } return 0; } @@ -4581,10 +4613,13 @@ static int vega10_set_mclk_od(struct pp_hwmgr *hwmgr, uint32_t value) if (vega10_ps->performance_levels [vega10_ps->performance_level_count - 1].mem_clock > - hwmgr->platform_descriptor.overdriveLimit.memoryClock) + hwmgr->platform_descriptor.overdriveLimit.memoryClock) { vega10_ps->performance_levels [vega10_ps->performance_level_count - 1].mem_clock = hwmgr->platform_descriptor.overdriveLimit.memoryClock; + pr_warn("max mclk supported by vbios is %d\n", + hwmgr->platform_descriptor.overdriveLimit.memoryClock); + } return 0; } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c index 9600e2f226e9..74bc37308dc0 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c @@ -2356,6 +2356,13 @@ static int vega12_gfx_off_control(struct pp_hwmgr *hwmgr, bool enable) return vega12_disable_gfx_off(hwmgr); } +static int vega12_get_performance_level(struct pp_hwmgr *hwmgr, const struct pp_hw_power_state *state, + PHM_PerformanceLevelDesignation designation, uint32_t index, + PHM_PerformanceLevel *level) +{ + return 0; +} + static const struct pp_hwmgr_func vega12_hwmgr_funcs = { .backend_init = vega12_hwmgr_backend_init, .backend_fini = vega12_hwmgr_backend_fini, @@ -2406,6 +2413,7 @@ static const struct pp_hwmgr_func vega12_hwmgr_funcs = { .register_irq_handlers = smu9_register_irq_handlers, .start_thermal_controller = vega12_start_thermal_controller, .powergate_gfx = vega12_gfx_off_control, + .get_performance_level = vega12_get_performance_level, }; int vega12_hwmgr_init(struct pp_hwmgr *hwmgr) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c index b4dbbb7c334c..57143d51e3ee 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c @@ -1875,38 +1875,20 @@ static int vega20_get_gpu_power(struct pp_hwmgr *hwmgr, return ret; } -static int vega20_get_current_gfx_clk_freq(struct pp_hwmgr *hwmgr, uint32_t *gfx_freq) +static int vega20_get_current_clk_freq(struct pp_hwmgr *hwmgr, + PPCLK_e clk_id, uint32_t *clk_freq) { - uint32_t gfx_clk = 0; int ret = 0; - *gfx_freq = 0; + *clk_freq = 0; PP_ASSERT_WITH_CODE((ret = smum_send_msg_to_smc_with_parameter(hwmgr, - PPSMC_MSG_GetDpmClockFreq, (PPCLK_GFXCLK << 16))) == 0, - "[GetCurrentGfxClkFreq] Attempt to get Current GFXCLK Frequency Failed!", + PPSMC_MSG_GetDpmClockFreq, (clk_id << 16))) == 0, + "[GetCurrentClkFreq] Attempt to get Current Frequency Failed!", return ret); - gfx_clk = smum_get_argument(hwmgr); + *clk_freq = smum_get_argument(hwmgr); - *gfx_freq = gfx_clk * 100; - - return 0; -} - -static int vega20_get_current_mclk_freq(struct pp_hwmgr *hwmgr, uint32_t *mclk_freq) -{ - uint32_t mem_clk = 0; - int ret = 0; - - *mclk_freq = 0; - - PP_ASSERT_WITH_CODE((ret = smum_send_msg_to_smc_with_parameter(hwmgr, - PPSMC_MSG_GetDpmClockFreq, (PPCLK_UCLK << 16))) == 0, - "[GetCurrentMClkFreq] Attempt to get Current MCLK Frequency Failed!", - return ret); - mem_clk = smum_get_argument(hwmgr); - - *mclk_freq = mem_clk * 100; + *clk_freq = *clk_freq * 100; return 0; } @@ -1937,12 +1919,16 @@ static int vega20_read_sensor(struct pp_hwmgr *hwmgr, int idx, switch (idx) { case AMDGPU_PP_SENSOR_GFX_SCLK: - ret = vega20_get_current_gfx_clk_freq(hwmgr, (uint32_t *)value); + ret = vega20_get_current_clk_freq(hwmgr, + PPCLK_GFXCLK, + (uint32_t *)value); if (!ret) *size = 4; break; case AMDGPU_PP_SENSOR_GFX_MCLK: - ret = vega20_get_current_mclk_freq(hwmgr, (uint32_t *)value); + ret = vega20_get_current_clk_freq(hwmgr, + PPCLK_UCLK, + (uint32_t *)value); if (!ret) *size = 4; break; @@ -2012,7 +1998,6 @@ int vega20_display_clock_voltage_request(struct pp_hwmgr *hwmgr, if (data->smu_features[GNLD_DPM_DCEFCLK].enabled) { switch (clk_type) { case amd_pp_dcef_clock: - clk_freq = clock_req->clock_freq_in_khz / 100; clk_select = PPCLK_DCEFCLK; break; case amd_pp_disp_clock: @@ -2041,11 +2026,20 @@ int vega20_display_clock_voltage_request(struct pp_hwmgr *hwmgr, return result; } +static int vega20_get_performance_level(struct pp_hwmgr *hwmgr, const struct pp_hw_power_state *state, + PHM_PerformanceLevelDesignation designation, uint32_t index, + PHM_PerformanceLevel *level) +{ + return 0; +} + static int vega20_notify_smc_display_config_after_ps_adjustment( struct pp_hwmgr *hwmgr) { struct vega20_hwmgr *data = (struct vega20_hwmgr *)(hwmgr->backend); + struct vega20_single_dpm_table *dpm_table = + &data->dpm_table.mem_table; struct PP_Clocks min_clocks = {0}; struct pp_display_clock_request clock_req; int ret = 0; @@ -2063,7 +2057,7 @@ static int vega20_notify_smc_display_config_after_ps_adjustment( if (data->smu_features[GNLD_DPM_DCEFCLK].supported) { clock_req.clock_type = amd_pp_dcef_clock; - clock_req.clock_freq_in_khz = min_clocks.dcefClock; + clock_req.clock_freq_in_khz = min_clocks.dcefClock * 10; if (!vega20_display_clock_voltage_request(hwmgr, &clock_req)) { if (data->smu_features[GNLD_DS_DCEFCLK].supported) PP_ASSERT_WITH_CODE((ret = smum_send_msg_to_smc_with_parameter( @@ -2076,6 +2070,15 @@ static int vega20_notify_smc_display_config_after_ps_adjustment( } } + if (data->smu_features[GNLD_DPM_UCLK].enabled) { + dpm_table->dpm_state.hard_min_level = min_clocks.memoryClock / 100; + PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetHardMinByFreq, + (PPCLK_UCLK << 16 ) | dpm_table->dpm_state.hard_min_level)), + "[SetHardMinFreq] Set hard min uclk failed!", + return ret); + } + return 0; } @@ -2353,7 +2356,7 @@ static int vega20_get_sclks(struct pp_hwmgr *hwmgr, for (i = 0; i < count; i++) { clocks->data[i].clocks_in_khz = - dpm_table->dpm_levels[i].value * 100; + dpm_table->dpm_levels[i].value * 1000; clocks->data[i].latency_in_us = 0; } @@ -2383,7 +2386,7 @@ static int vega20_get_memclocks(struct pp_hwmgr *hwmgr, for (i = 0; i < count; i++) { clocks->data[i].clocks_in_khz = data->mclk_latency_table.entries[i].frequency = - dpm_table->dpm_levels[i].value * 100; + dpm_table->dpm_levels[i].value * 1000; clocks->data[i].latency_in_us = data->mclk_latency_table.entries[i].latency = vega20_get_mem_latency(hwmgr, dpm_table->dpm_levels[i].value); @@ -2408,7 +2411,7 @@ static int vega20_get_dcefclocks(struct pp_hwmgr *hwmgr, for (i = 0; i < count; i++) { clocks->data[i].clocks_in_khz = - dpm_table->dpm_levels[i].value * 100; + dpm_table->dpm_levels[i].value * 1000; clocks->data[i].latency_in_us = 0; } @@ -2431,7 +2434,7 @@ static int vega20_get_socclocks(struct pp_hwmgr *hwmgr, for (i = 0; i < count; i++) { clocks->data[i].clocks_in_khz = - dpm_table->dpm_levels[i].value * 100; + dpm_table->dpm_levels[i].value * 1000; clocks->data[i].latency_in_us = 0; } @@ -2582,11 +2585,11 @@ static int vega20_odn_edit_dpm_table(struct pp_hwmgr *hwmgr, return -EINVAL; } - if (input_clk < clocks.data[0].clocks_in_khz / 100 || + if (input_clk < clocks.data[0].clocks_in_khz / 1000 || input_clk > od8_settings[OD8_SETTING_UCLK_FMAX].max_value) { pr_info("clock freq %d is not within allowed range [%d - %d]\n", input_clk, - clocks.data[0].clocks_in_khz / 100, + clocks.data[0].clocks_in_khz / 1000, od8_settings[OD8_SETTING_UCLK_FMAX].max_value); return -EINVAL; } @@ -2726,7 +2729,7 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, switch (type) { case PP_SCLK: - ret = vega20_get_current_gfx_clk_freq(hwmgr, &now); + ret = vega20_get_current_clk_freq(hwmgr, PPCLK_GFXCLK, &now); PP_ASSERT_WITH_CODE(!ret, "Attempt to get current gfx clk Failed!", return ret); @@ -2738,12 +2741,12 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, for (i = 0; i < clocks.num_levels; i++) size += sprintf(buf + size, "%d: %uMhz %s\n", - i, clocks.data[i].clocks_in_khz / 100, + i, clocks.data[i].clocks_in_khz / 1000, (clocks.data[i].clocks_in_khz == now) ? "*" : ""); break; case PP_MCLK: - ret = vega20_get_current_mclk_freq(hwmgr, &now); + ret = vega20_get_current_clk_freq(hwmgr, PPCLK_UCLK, &now); PP_ASSERT_WITH_CODE(!ret, "Attempt to get current mclk freq Failed!", return ret); @@ -2755,7 +2758,7 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, for (i = 0; i < clocks.num_levels; i++) size += sprintf(buf + size, "%d: %uMhz %s\n", - i, clocks.data[i].clocks_in_khz / 100, + i, clocks.data[i].clocks_in_khz / 1000, (clocks.data[i].clocks_in_khz == now) ? "*" : ""); break; @@ -2820,7 +2823,7 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, return ret); size += sprintf(buf + size, "MCLK: %7uMhz %10uMhz\n", - clocks.data[0].clocks_in_khz / 100, + clocks.data[0].clocks_in_khz / 1000, od8_settings[OD8_SETTING_UCLK_FMAX].max_value); } @@ -3476,6 +3479,8 @@ static const struct pp_hwmgr_func vega20_hwmgr_funcs = { vega20_set_watermarks_for_clocks_ranges, .display_clock_voltage_request = vega20_display_clock_voltage_request, + .get_performance_level = + vega20_get_performance_level, /* UMD pstate, profile related */ .force_dpm_level = vega20_dpm_force_dpm_level, diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_processpptables.c index e5f7f8230065..97f8a1a970c3 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_processpptables.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_processpptables.c @@ -642,8 +642,14 @@ static int check_powerplay_tables( "Unsupported PPTable format!", return -1); PP_ASSERT_WITH_CODE(powerplay_table->sHeader.structuresize > 0, "Invalid PowerPlay Table!", return -1); - PP_ASSERT_WITH_CODE(powerplay_table->smcPPTable.Version == PPTABLE_V20_SMU_VERSION, - "Unmatch PPTable version, vbios update may be needed!", return -1); + + if (powerplay_table->smcPPTable.Version != PPTABLE_V20_SMU_VERSION) { + pr_info("Unmatch PPTable version: " + "pptable from VBIOS is V%d while driver supported is V%d!", + powerplay_table->smcPPTable.Version, + PPTABLE_V20_SMU_VERSION); + return -EINVAL; + } //dump_pptable(&powerplay_table->smcPPTable); @@ -716,10 +722,6 @@ static int append_vbios_pptable(struct pp_hwmgr *hwmgr, PPTable_t *ppsmc_pptable "[appendVbiosPPTable] Failed to retrieve Smc Dpm Table from VBIOS!", return -1); - memset(ppsmc_pptable->Padding32, - 0, - sizeof(struct atom_smc_dpm_info_v4_4) - - sizeof(struct atom_common_table_header)); ppsmc_pptable->MaxVoltageStepGfx = smc_dpm_table->maxvoltagestepgfx; ppsmc_pptable->MaxVoltageStepSoc = smc_dpm_table->maxvoltagestepsoc; @@ -778,22 +780,19 @@ static int append_vbios_pptable(struct pp_hwmgr *hwmgr, PPTable_t *ppsmc_pptable ppsmc_pptable->FllGfxclkSpreadPercent = smc_dpm_table->fllgfxclkspreadpercent; ppsmc_pptable->FllGfxclkSpreadFreq = smc_dpm_table->fllgfxclkspreadfreq; - if ((smc_dpm_table->table_header.format_revision == 4) && - (smc_dpm_table->table_header.content_revision == 4)) { - for (i = 0; i < I2C_CONTROLLER_NAME_COUNT; i++) { - ppsmc_pptable->I2cControllers[i].Enabled = - smc_dpm_table->i2ccontrollers[i].enabled; - ppsmc_pptable->I2cControllers[i].SlaveAddress = - smc_dpm_table->i2ccontrollers[i].slaveaddress; - ppsmc_pptable->I2cControllers[i].ControllerPort = - smc_dpm_table->i2ccontrollers[i].controllerport; - ppsmc_pptable->I2cControllers[i].ThermalThrottler = - smc_dpm_table->i2ccontrollers[i].thermalthrottler; - ppsmc_pptable->I2cControllers[i].I2cProtocol = - smc_dpm_table->i2ccontrollers[i].i2cprotocol; - ppsmc_pptable->I2cControllers[i].I2cSpeed = - smc_dpm_table->i2ccontrollers[i].i2cspeed; - } + for (i = 0; i < I2C_CONTROLLER_NAME_COUNT; i++) { + ppsmc_pptable->I2cControllers[i].Enabled = + smc_dpm_table->i2ccontrollers[i].enabled; + ppsmc_pptable->I2cControllers[i].SlaveAddress = + smc_dpm_table->i2ccontrollers[i].slaveaddress; + ppsmc_pptable->I2cControllers[i].ControllerPort = + smc_dpm_table->i2ccontrollers[i].controllerport; + ppsmc_pptable->I2cControllers[i].ThermalThrottler = + smc_dpm_table->i2ccontrollers[i].thermalthrottler; + ppsmc_pptable->I2cControllers[i].I2cProtocol = + smc_dpm_table->i2ccontrollers[i].i2cprotocol; + ppsmc_pptable->I2cControllers[i].I2cSpeed = + smc_dpm_table->i2ccontrollers[i].i2cspeed; } return 0; @@ -882,15 +881,10 @@ static int init_powerplay_table_information( if (pptable_information->smc_pptable == NULL) return -ENOMEM; - if (powerplay_table->smcPPTable.Version <= 2) - memcpy(pptable_information->smc_pptable, - &(powerplay_table->smcPPTable), - sizeof(PPTable_t) - - sizeof(I2cControllerConfig_t) * I2C_CONTROLLER_NAME_COUNT); - else - memcpy(pptable_information->smc_pptable, - &(powerplay_table->smcPPTable), - sizeof(PPTable_t)); + memcpy(pptable_information->smc_pptable, + &(powerplay_table->smcPPTable), + sizeof(PPTable_t)); + result = append_vbios_pptable(hwmgr, (pptable_information->smc_pptable)); diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if.h b/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if.h index 2998a49960ed..63d5cf691549 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if.h @@ -29,7 +29,7 @@ // any structure is changed in this file #define SMU11_DRIVER_IF_VERSION 0x12 -#define PPTABLE_V20_SMU_VERSION 2 +#define PPTABLE_V20_SMU_VERSION 3 #define NUM_GFXCLK_DPM_LEVELS 16 #define NUM_VCLK_DPM_LEVELS 8 diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smu8_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/smu8_smumgr.c index f836d30fdd44..09b844ec3eab 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/smu8_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/smu8_smumgr.c @@ -71,7 +71,11 @@ static int smu8_send_msg_to_smc_async(struct pp_hwmgr *hwmgr, uint16_t msg) result = PHM_WAIT_FIELD_UNEQUAL(hwmgr, SMU_MP1_SRBM2P_RESP_0, CONTENT, 0); if (result != 0) { + /* Read the last message to SMU, to report actual cause */ + uint32_t val = cgs_read_register(hwmgr->device, + mmSMU_MP1_SRBM2P_MSG_0); pr_err("smu8_send_msg_to_smc_async (0x%04x) failed\n", msg); + pr_err("SMU still servicing msg (0x%04x)\n", val); return result; } diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c index f8a931cf3665..680566d97adc 100644 --- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c +++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c @@ -458,18 +458,6 @@ static void ti_sn_bridge_enable(struct drm_bridge *bridge) unsigned int val; int ret; - /* - * FIXME: - * This 70ms was found necessary by experimentation. If it's not - * present, link training fails. It seems like it can go anywhere from - * pre_enable() up to semi-auto link training initiation below. - * - * Neither the datasheet for the bridge nor the panel tested mention a - * delay of this magnitude in the timing requirements. So for now, add - * the mystery delay until someone figures out a better fix. - */ - msleep(70); - /* DSI_A lane config */ val = CHA_DSI_LANES(4 - pdata->dsi->lanes); regmap_update_bits(pdata->regmap, SN_DSI_LANES_REG, @@ -536,7 +524,22 @@ static void ti_sn_bridge_pre_enable(struct drm_bridge *bridge) /* configure bridge ref_clk */ ti_sn_bridge_set_refclk_freq(pdata); - /* in case drm_panel is connected then HPD is not supported */ + /* + * HPD on this bridge chip is a bit useless. This is an eDP bridge + * so the HPD is an internal signal that's only there to signal that + * the panel is done powering up. ...but the bridge chip debounces + * this signal by between 100 ms and 400 ms (depending on process, + * voltage, and temperate--I measured it at about 200 ms). One + * particular panel asserted HPD 84 ms after it was powered on meaning + * that we saw HPD 284 ms after power on. ...but the same panel said + * that instead of looking at HPD you could just hardcode a delay of + * 200 ms. We'll assume that the panel driver will have the hardcoded + * delay in its prepare and always disable HPD. + * + * If HPD somehow makes sense on some future panel we'll have to + * change this to be conditional on someone specifying that HPD should + * be used. + */ regmap_update_bits(pdata->regmap, SN_HPD_DISABLE_REG, HPD_DISABLE, HPD_DISABLE); diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 701cb334e1ea..d8b526b7932c 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -308,6 +308,26 @@ update_connector_routing(struct drm_atomic_state *state, return 0; } + crtc_state = drm_atomic_get_new_crtc_state(state, + new_connector_state->crtc); + /* + * For compatibility with legacy users, we want to make sure that + * we allow DPMS On->Off modesets on unregistered connectors. Modesets + * which would result in anything else must be considered invalid, to + * avoid turning on new displays on dead connectors. + * + * Since the connector can be unregistered at any point during an + * atomic check or commit, this is racy. But that's OK: all we care + * about is ensuring that userspace can't do anything but shut off the + * display on a connector that was destroyed after its been notified, + * not before. + */ + if (drm_connector_is_unregistered(connector) && crtc_state->active) { + DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] is not registered\n", + connector->base.id, connector->name); + return -EINVAL; + } + funcs = connector->helper_private; if (funcs->atomic_best_encoder) @@ -352,7 +372,6 @@ update_connector_routing(struct drm_atomic_state *state, set_best_encoder(state, new_connector_state, new_encoder); - crtc_state = drm_atomic_get_new_crtc_state(state, new_connector_state->crtc); crtc_state->connectors_changed = true; DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] using [ENCODER:%d:%s] on [CRTC:%d:%s]\n", diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c index 1e40e5decbe9..4943cef178be 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c @@ -379,7 +379,8 @@ void drm_connector_cleanup(struct drm_connector *connector) /* The connector should have been removed from userspace long before * it is finally destroyed. */ - if (WARN_ON(connector->registered)) + if (WARN_ON(connector->registration_state == + DRM_CONNECTOR_REGISTERED)) drm_connector_unregister(connector); if (connector->tile_group) { @@ -436,7 +437,7 @@ int drm_connector_register(struct drm_connector *connector) return 0; mutex_lock(&connector->mutex); - if (connector->registered) + if (connector->registration_state != DRM_CONNECTOR_INITIALIZING) goto unlock; ret = drm_sysfs_connector_add(connector); @@ -456,7 +457,7 @@ int drm_connector_register(struct drm_connector *connector) drm_mode_object_register(connector->dev, &connector->base); - connector->registered = true; + connector->registration_state = DRM_CONNECTOR_REGISTERED; goto unlock; err_debugfs: @@ -478,7 +479,7 @@ EXPORT_SYMBOL(drm_connector_register); void drm_connector_unregister(struct drm_connector *connector) { mutex_lock(&connector->mutex); - if (!connector->registered) { + if (connector->registration_state != DRM_CONNECTOR_REGISTERED) { mutex_unlock(&connector->mutex); return; } @@ -489,7 +490,7 @@ void drm_connector_unregister(struct drm_connector *connector) drm_sysfs_connector_remove(connector); drm_debugfs_connector_remove(connector); - connector->registered = false; + connector->registration_state = DRM_CONNECTOR_UNREGISTERED; mutex_unlock(&connector->mutex); } EXPORT_SYMBOL(drm_connector_unregister); diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index ff0bfc65a8c1..b506e3622b08 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -122,6 +122,9 @@ static const struct edid_quirk { /* SDC panel of Lenovo B50-80 reports 8 bpc, but is a 6 bpc panel */ { "SDC", 0x3652, EDID_QUIRK_FORCE_6BPC }, + /* BOE model 0x0771 reports 8 bpc, but is a 6 bpc panel */ + { "BOE", 0x0771, EDID_QUIRK_FORCE_6BPC }, + /* Belinea 10 15 55 */ { "MAX", 1516, EDID_QUIRK_PREFER_LARGE_60 }, { "MAX", 0x77e, EDID_QUIRK_PREFER_LARGE_60 }, diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 3fae4dab295f..13f9b56a9ce7 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -5102,19 +5102,13 @@ intel_dp_long_pulse(struct intel_connector *connector, */ status = connector_status_disconnected; goto out; - } else { - /* - * If display is now connected check links status, - * there has been known issues of link loss triggering - * long pulse. - * - * Some sinks (eg. ASUS PB287Q) seem to perform some - * weird HPD ping pong during modesets. So we can apparently - * end up with HPD going low during a modeset, and then - * going back up soon after. And once that happens we must - * retrain the link to get a picture. That's in case no - * userspace component reacted to intermittent HPD dip. - */ + } + + /* + * Some external monitors do not signal loss of link synchronization + * with an IRQ_HPD, so force a link status check. + */ + if (!intel_dp_is_edp(intel_dp)) { struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; intel_dp_retrain_link(encoder, ctx); diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index 7f155b4f1a7d..1b00f8ea145b 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -77,7 +77,7 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder, pipe_config->pbn = mst_pbn; /* Zombie connectors can't have VCPI slots */ - if (READ_ONCE(connector->registered)) { + if (!drm_connector_is_unregistered(connector)) { slots = drm_dp_atomic_find_vcpi_slots(state, &intel_dp->mst_mgr, port, @@ -313,7 +313,7 @@ static int intel_dp_mst_get_ddc_modes(struct drm_connector *connector) struct edid *edid; int ret; - if (!READ_ONCE(connector->registered)) + if (drm_connector_is_unregistered(connector)) return intel_connector_update_modes(connector, NULL); edid = drm_dp_mst_get_edid(connector, &intel_dp->mst_mgr, intel_connector->port); @@ -329,7 +329,7 @@ intel_dp_mst_detect(struct drm_connector *connector, bool force) struct intel_connector *intel_connector = to_intel_connector(connector); struct intel_dp *intel_dp = intel_connector->mst_port; - if (!READ_ONCE(connector->registered)) + if (drm_connector_is_unregistered(connector)) return connector_status_disconnected; return drm_dp_mst_detect_port(connector, &intel_dp->mst_mgr, intel_connector->port); @@ -372,7 +372,7 @@ intel_dp_mst_mode_valid(struct drm_connector *connector, int bpp = 24; /* MST uses fixed bpp */ int max_rate, mode_rate, max_lanes, max_link_clock; - if (!READ_ONCE(connector->registered)) + if (drm_connector_is_unregistered(connector)) return MODE_ERROR; if (mode->flags & DRM_MODE_FLAG_DBLSCAN) diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index 6bb78076b5b5..6cbbae3f438b 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -881,22 +881,16 @@ nv50_mstc_atomic_best_encoder(struct drm_connector *connector, { struct nv50_head *head = nv50_head(connector_state->crtc); struct nv50_mstc *mstc = nv50_mstc(connector); - if (mstc->port) { - struct nv50_mstm *mstm = mstc->mstm; - return &mstm->msto[head->base.index]->encoder; - } - return NULL; + + return &mstc->mstm->msto[head->base.index]->encoder; } static struct drm_encoder * nv50_mstc_best_encoder(struct drm_connector *connector) { struct nv50_mstc *mstc = nv50_mstc(connector); - if (mstc->port) { - struct nv50_mstm *mstm = mstc->mstm; - return &mstm->msto[0]->encoder; - } - return NULL; + + return &mstc->mstm->msto[0]->encoder; } static enum drm_mode_status diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 97964f7f2ace..a04ffb3b2174 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -56,6 +56,8 @@ struct panel_desc { /** * @prepare: the time (in milliseconds) that it takes for the panel to * become ready and start receiving video data + * @hpd_absent_delay: Add this to the prepare delay if we know Hot + * Plug Detect isn't used. * @enable: the time (in milliseconds) that it takes for the panel to * display the first valid frame after starting to receive * video data @@ -66,6 +68,7 @@ struct panel_desc { */ struct { unsigned int prepare; + unsigned int hpd_absent_delay; unsigned int enable; unsigned int disable; unsigned int unprepare; @@ -79,6 +82,7 @@ struct panel_simple { struct drm_panel base; bool prepared; bool enabled; + bool no_hpd; const struct panel_desc *desc; @@ -202,6 +206,7 @@ static int panel_simple_unprepare(struct drm_panel *panel) static int panel_simple_prepare(struct drm_panel *panel) { struct panel_simple *p = to_panel_simple(panel); + unsigned int delay; int err; if (p->prepared) @@ -215,8 +220,11 @@ static int panel_simple_prepare(struct drm_panel *panel) gpiod_set_value_cansleep(p->enable_gpio, 1); - if (p->desc->delay.prepare) - msleep(p->desc->delay.prepare); + delay = p->desc->delay.prepare; + if (p->no_hpd) + delay += p->desc->delay.hpd_absent_delay; + if (delay) + msleep(delay); p->prepared = true; @@ -305,6 +313,8 @@ static int panel_simple_probe(struct device *dev, const struct panel_desc *desc) panel->prepared = false; panel->desc = desc; + panel->no_hpd = of_property_read_bool(dev->of_node, "no-hpd"); + panel->supply = devm_regulator_get(dev, "power"); if (IS_ERR(panel->supply)) return PTR_ERR(panel->supply); @@ -1363,7 +1373,7 @@ static const struct panel_desc innolux_n156bge_l21 = { }, }; -static const struct drm_display_mode innolux_tv123wam_mode = { +static const struct drm_display_mode innolux_p120zdg_bf1_mode = { .clock = 206016, .hdisplay = 2160, .hsync_start = 2160 + 48, @@ -1377,15 +1387,16 @@ static const struct drm_display_mode innolux_tv123wam_mode = { .flags = DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC, }; -static const struct panel_desc innolux_tv123wam = { - .modes = &innolux_tv123wam_mode, +static const struct panel_desc innolux_p120zdg_bf1 = { + .modes = &innolux_p120zdg_bf1_mode, .num_modes = 1, .bpc = 8, .size = { - .width = 259, - .height = 173, + .width = 254, + .height = 169, }, .delay = { + .hpd_absent_delay = 200, .unprepare = 500, }, }; @@ -2445,8 +2456,8 @@ static const struct of_device_id platform_of_match[] = { .compatible = "innolux,n156bge-l21", .data = &innolux_n156bge_l21, }, { - .compatible = "innolux,tv123wam", - .data = &innolux_tv123wam, + .compatible = "innolux,p120zdg-bf1", + .data = &innolux_p120zdg_bf1, }, { .compatible = "innolux,zj070na-01p", .data = &innolux_zj070na_01p, diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index f3fb5bb8c82a..ac1cffd2a09b 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -542,7 +542,7 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio) !discard_bio) continue; bio_chain(discard_bio, bio); - bio_clone_blkg_association(discard_bio, bio); + bio_clone_blkcg_association(discard_bio, bio); if (mddev->gendisk) trace_block_bio_remap(bdev_get_queue(rdev->bdev), discard_bio, disk_devt(mddev->gendisk), diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index e52b9d3c0bd6..0b70c8bab045 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1704,7 +1704,6 @@ __nvme_fc_init_request(struct nvme_fc_ctrl *ctrl, op->fcp_req.rspaddr = &op->rsp_iu; op->fcp_req.rsplen = sizeof(op->rsp_iu); op->fcp_req.done = nvme_fc_fcpio_done; - op->fcp_req.private = &op->fcp_req.first_sgl[SG_CHUNK_SIZE]; op->ctrl = ctrl; op->queue = queue; op->rq = rq; @@ -1752,6 +1751,7 @@ nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq, if (res) return res; op->op.fcp_req.first_sgl = &op->sgl[0]; + op->op.fcp_req.private = &op->priv[0]; return res; } diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index f30031945ee4..c33bb201b884 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1663,6 +1663,9 @@ static void nvme_map_cmb(struct nvme_dev *dev) struct pci_dev *pdev = to_pci_dev(dev->dev); int bar; + if (dev->cmb_size) + return; + dev->cmbsz = readl(dev->bar + NVME_REG_CMBSZ); if (!dev->cmbsz) return; @@ -2147,7 +2150,6 @@ static void nvme_pci_disable(struct nvme_dev *dev) { struct pci_dev *pdev = to_pci_dev(dev->dev); - nvme_release_cmb(dev); pci_free_irq_vectors(pdev); if (pci_is_enabled(pdev)) { @@ -2595,6 +2597,7 @@ static void nvme_remove(struct pci_dev *pdev) nvme_stop_ctrl(&dev->ctrl); nvme_remove_namespaces(&dev->ctrl); nvme_dev_disable(dev, true); + nvme_release_cmb(dev); nvme_free_host_mem(dev); nvme_dev_remove_admin(dev); nvme_free_queues(dev, 0); diff --git a/drivers/pwm/Kconfig b/drivers/pwm/Kconfig index 504d252716f2..27e5dd47a01f 100644 --- a/drivers/pwm/Kconfig +++ b/drivers/pwm/Kconfig @@ -447,10 +447,9 @@ config PWM_TEGRA config PWM_TIECAP tristate "ECAP PWM support" - depends on ARCH_OMAP2PLUS || ARCH_DAVINCI_DA8XX || ARCH_KEYSTONE + depends on ARCH_OMAP2PLUS || ARCH_DAVINCI_DA8XX || ARCH_KEYSTONE || ARCH_K3 help - PWM driver support for the ECAP APWM controller found on AM33XX - TI SOC + PWM driver support for the ECAP APWM controller found on TI SOCs To compile this driver as a module, choose M here: the module will be called pwm-tiecap. diff --git a/drivers/pwm/pwm-lpss-platform.c b/drivers/pwm/pwm-lpss-platform.c index 5561b9e190f8..757230e1f575 100644 --- a/drivers/pwm/pwm-lpss-platform.c +++ b/drivers/pwm/pwm-lpss-platform.c @@ -30,6 +30,7 @@ static const struct pwm_lpss_boardinfo pwm_lpss_bsw_info = { .clk_rate = 19200000, .npwm = 1, .base_unit_bits = 16, + .other_devices_aml_touches_pwm_regs = true, }; /* Broxton */ @@ -60,6 +61,7 @@ static int pwm_lpss_probe_platform(struct platform_device *pdev) platform_set_drvdata(pdev, lpwm); + dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_SMART_PREPARE); pm_runtime_set_active(&pdev->dev); pm_runtime_enable(&pdev->dev); @@ -74,13 +76,29 @@ static int pwm_lpss_remove_platform(struct platform_device *pdev) return pwm_lpss_remove(lpwm); } -static SIMPLE_DEV_PM_OPS(pwm_lpss_platform_pm_ops, - pwm_lpss_suspend, - pwm_lpss_resume); +static int pwm_lpss_prepare(struct device *dev) +{ + struct pwm_lpss_chip *lpwm = dev_get_drvdata(dev); + + /* + * If other device's AML code touches the PWM regs on suspend/resume + * force runtime-resume the PWM controller to allow this. + */ + if (lpwm->info->other_devices_aml_touches_pwm_regs) + return 0; /* Force runtime-resume */ + + return 1; /* If runtime-suspended leave as is */ +} + +static const struct dev_pm_ops pwm_lpss_platform_pm_ops = { + .prepare = pwm_lpss_prepare, + SET_SYSTEM_SLEEP_PM_OPS(pwm_lpss_suspend, pwm_lpss_resume) +}; static const struct acpi_device_id pwm_lpss_acpi_match[] = { { "80860F09", (unsigned long)&pwm_lpss_byt_info }, { "80862288", (unsigned long)&pwm_lpss_bsw_info }, + { "80862289", (unsigned long)&pwm_lpss_bsw_info }, { "80865AC8", (unsigned long)&pwm_lpss_bxt_info }, { }, }; diff --git a/drivers/pwm/pwm-lpss.c b/drivers/pwm/pwm-lpss.c index 4721a264bac2..2ac3a2aa9e53 100644 --- a/drivers/pwm/pwm-lpss.c +++ b/drivers/pwm/pwm-lpss.c @@ -32,15 +32,6 @@ /* Size of each PWM register space if multiple */ #define PWM_SIZE 0x400 -#define MAX_PWMS 4 - -struct pwm_lpss_chip { - struct pwm_chip chip; - void __iomem *regs; - const struct pwm_lpss_boardinfo *info; - u32 saved_ctrl[MAX_PWMS]; -}; - static inline struct pwm_lpss_chip *to_lpwm(struct pwm_chip *chip) { return container_of(chip, struct pwm_lpss_chip, chip); @@ -97,7 +88,7 @@ static void pwm_lpss_prepare(struct pwm_lpss_chip *lpwm, struct pwm_device *pwm, unsigned long long on_time_div; unsigned long c = lpwm->info->clk_rate, base_unit_range; unsigned long long base_unit, freq = NSEC_PER_SEC; - u32 ctrl; + u32 orig_ctrl, ctrl; do_div(freq, period_ns); @@ -114,13 +105,17 @@ static void pwm_lpss_prepare(struct pwm_lpss_chip *lpwm, struct pwm_device *pwm, do_div(on_time_div, period_ns); on_time_div = 255ULL - on_time_div; - ctrl = pwm_lpss_read(pwm); + orig_ctrl = ctrl = pwm_lpss_read(pwm); ctrl &= ~PWM_ON_TIME_DIV_MASK; ctrl &= ~(base_unit_range << PWM_BASE_UNIT_SHIFT); base_unit &= base_unit_range; ctrl |= (u32) base_unit << PWM_BASE_UNIT_SHIFT; ctrl |= on_time_div; - pwm_lpss_write(pwm, ctrl); + + if (orig_ctrl != ctrl) { + pwm_lpss_write(pwm, ctrl); + pwm_lpss_write(pwm, ctrl | PWM_SW_UPDATE); + } } static inline void pwm_lpss_cond_enable(struct pwm_device *pwm, bool cond) @@ -144,7 +139,6 @@ static int pwm_lpss_apply(struct pwm_chip *chip, struct pwm_device *pwm, return ret; } pwm_lpss_prepare(lpwm, pwm, state->duty_cycle, state->period); - pwm_lpss_write(pwm, pwm_lpss_read(pwm) | PWM_SW_UPDATE); pwm_lpss_cond_enable(pwm, lpwm->info->bypass == false); ret = pwm_lpss_wait_for_update(pwm); if (ret) { @@ -157,7 +151,6 @@ static int pwm_lpss_apply(struct pwm_chip *chip, struct pwm_device *pwm, if (ret) return ret; pwm_lpss_prepare(lpwm, pwm, state->duty_cycle, state->period); - pwm_lpss_write(pwm, pwm_lpss_read(pwm) | PWM_SW_UPDATE); return pwm_lpss_wait_for_update(pwm); } } else if (pwm_is_enabled(pwm)) { @@ -168,8 +161,42 @@ static int pwm_lpss_apply(struct pwm_chip *chip, struct pwm_device *pwm, return 0; } +/* This function gets called once from pwmchip_add to get the initial state */ +static void pwm_lpss_get_state(struct pwm_chip *chip, struct pwm_device *pwm, + struct pwm_state *state) +{ + struct pwm_lpss_chip *lpwm = to_lpwm(chip); + unsigned long base_unit_range; + unsigned long long base_unit, freq, on_time_div; + u32 ctrl; + + base_unit_range = BIT(lpwm->info->base_unit_bits); + + ctrl = pwm_lpss_read(pwm); + on_time_div = 255 - (ctrl & PWM_ON_TIME_DIV_MASK); + base_unit = (ctrl >> PWM_BASE_UNIT_SHIFT) & (base_unit_range - 1); + + freq = base_unit * lpwm->info->clk_rate; + do_div(freq, base_unit_range); + if (freq == 0) + state->period = NSEC_PER_SEC; + else + state->period = NSEC_PER_SEC / (unsigned long)freq; + + on_time_div *= state->period; + do_div(on_time_div, 255); + state->duty_cycle = on_time_div; + + state->polarity = PWM_POLARITY_NORMAL; + state->enabled = !!(ctrl & PWM_ENABLE); + + if (state->enabled) + pm_runtime_get(chip->dev); +} + static const struct pwm_ops pwm_lpss_ops = { .apply = pwm_lpss_apply, + .get_state = pwm_lpss_get_state, .owner = THIS_MODULE, }; @@ -214,6 +241,12 @@ EXPORT_SYMBOL_GPL(pwm_lpss_probe); int pwm_lpss_remove(struct pwm_lpss_chip *lpwm) { + int i; + + for (i = 0; i < lpwm->info->npwm; i++) { + if (pwm_is_enabled(&lpwm->chip.pwms[i])) + pm_runtime_put(lpwm->chip.dev); + } return pwmchip_remove(&lpwm->chip); } EXPORT_SYMBOL_GPL(pwm_lpss_remove); diff --git a/drivers/pwm/pwm-lpss.h b/drivers/pwm/pwm-lpss.h index 7a4238ad1fcb..3236be835bd9 100644 --- a/drivers/pwm/pwm-lpss.h +++ b/drivers/pwm/pwm-lpss.h @@ -16,13 +16,25 @@ #include <linux/device.h> #include <linux/pwm.h> -struct pwm_lpss_chip; +#define MAX_PWMS 4 + +struct pwm_lpss_chip { + struct pwm_chip chip; + void __iomem *regs; + const struct pwm_lpss_boardinfo *info; + u32 saved_ctrl[MAX_PWMS]; +}; struct pwm_lpss_boardinfo { unsigned long clk_rate; unsigned int npwm; unsigned long base_unit_bits; bool bypass; + /* + * On some devices the _PS0/_PS3 AML code of the GPU (GFX0) device + * messes with the PWM0 controllers state, + */ + bool other_devices_aml_touches_pwm_regs; }; struct pwm_lpss_chip *pwm_lpss_probe(struct device *dev, struct resource *r, diff --git a/drivers/pwm/pwm-rcar.c b/drivers/pwm/pwm-rcar.c index 748f614d5375..a41812fc6f95 100644 --- a/drivers/pwm/pwm-rcar.c +++ b/drivers/pwm/pwm-rcar.c @@ -1,11 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 /* * R-Car PWM Timer driver * * Copyright (C) 2015 Renesas Electronics Corporation - * - * This is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. */ #include <linux/clk.h> diff --git a/drivers/pwm/pwm-renesas-tpu.c b/drivers/pwm/pwm-renesas-tpu.c index 29267d12fb4c..4a855a21b782 100644 --- a/drivers/pwm/pwm-renesas-tpu.c +++ b/drivers/pwm/pwm-renesas-tpu.c @@ -1,16 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 /* * R-Mobile TPU PWM driver * * Copyright (C) 2012 Renesas Solutions Corp. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ #include <linux/clk.h> diff --git a/drivers/pwm/pwm-tegra.c b/drivers/pwm/pwm-tegra.c index f8ebbece57b7..48c4595a0ffc 100644 --- a/drivers/pwm/pwm-tegra.c +++ b/drivers/pwm/pwm-tegra.c @@ -300,7 +300,6 @@ static const struct of_device_id tegra_pwm_of_match[] = { { .compatible = "nvidia,tegra186-pwm", .data = &tegra186_pwm_soc }, { } }; - MODULE_DEVICE_TABLE(of, tegra_pwm_of_match); static const struct dev_pm_ops tegra_pwm_pm_ops = { diff --git a/drivers/pwm/sysfs.c b/drivers/pwm/sysfs.c index 7c71cdb8a9d8..ceb233dd6048 100644 --- a/drivers/pwm/sysfs.c +++ b/drivers/pwm/sysfs.c @@ -249,6 +249,7 @@ static void pwm_export_release(struct device *child) static int pwm_export_child(struct device *parent, struct pwm_device *pwm) { struct pwm_export *export; + char *pwm_prop[2]; int ret; if (test_and_set_bit(PWMF_EXPORTED, &pwm->flags)) @@ -263,7 +264,6 @@ static int pwm_export_child(struct device *parent, struct pwm_device *pwm) export->pwm = pwm; mutex_init(&export->lock); - export->child.class = parent->class; export->child.release = pwm_export_release; export->child.parent = parent; export->child.devt = MKDEV(0, 0); @@ -277,6 +277,10 @@ static int pwm_export_child(struct device *parent, struct pwm_device *pwm) export = NULL; return ret; } + pwm_prop[0] = kasprintf(GFP_KERNEL, "EXPORT=pwm%u", pwm->hwpwm); + pwm_prop[1] = NULL; + kobject_uevent_env(&parent->kobj, KOBJ_CHANGE, pwm_prop); + kfree(pwm_prop[0]); return 0; } @@ -289,6 +293,7 @@ static int pwm_unexport_match(struct device *child, void *data) static int pwm_unexport_child(struct device *parent, struct pwm_device *pwm) { struct device *child; + char *pwm_prop[2]; if (!test_and_clear_bit(PWMF_EXPORTED, &pwm->flags)) return -ENODEV; @@ -297,6 +302,11 @@ static int pwm_unexport_child(struct device *parent, struct pwm_device *pwm) if (!child) return -ENODEV; + pwm_prop[0] = kasprintf(GFP_KERNEL, "UNEXPORT=pwm%u", pwm->hwpwm); + pwm_prop[1] = NULL; + kobject_uevent_env(&parent->kobj, KOBJ_CHANGE, pwm_prop); + kfree(pwm_prop[0]); + /* for device_find_child() */ put_device(child); device_unregister(child); diff --git a/drivers/scsi/3w-9xxx.c b/drivers/scsi/3w-9xxx.c index 05293babb031..2d655a97b959 100644 --- a/drivers/scsi/3w-9xxx.c +++ b/drivers/scsi/3w-9xxx.c @@ -143,7 +143,9 @@ static int twa_poll_status_gone(TW_Device_Extension *tw_dev, u32 flag, int secon static int twa_post_command_packet(TW_Device_Extension *tw_dev, int request_id, char internal); static int twa_reset_device_extension(TW_Device_Extension *tw_dev); static int twa_reset_sequence(TW_Device_Extension *tw_dev, int soft_reset); -static int twa_scsiop_execute_scsi(TW_Device_Extension *tw_dev, int request_id, char *cdb, int use_sg, TW_SG_Entry *sglistarg); +static int twa_scsiop_execute_scsi(TW_Device_Extension *tw_dev, int request_id, + unsigned char *cdb, int use_sg, + TW_SG_Entry *sglistarg); static void twa_scsiop_execute_scsi_complete(TW_Device_Extension *tw_dev, int request_id); static char *twa_string_lookup(twa_message_type *table, unsigned int aen_code); @@ -278,7 +280,7 @@ out: static int twa_aen_drain_queue(TW_Device_Extension *tw_dev, int no_check_reset) { int request_id = 0; - char cdb[TW_MAX_CDB_LEN]; + unsigned char cdb[TW_MAX_CDB_LEN]; TW_SG_Entry sglist[1]; int finished = 0, count = 0; TW_Command_Full *full_command_packet; @@ -423,7 +425,7 @@ static void twa_aen_queue_event(TW_Device_Extension *tw_dev, TW_Command_Apache_H /* This function will read the aen queue from the isr */ static int twa_aen_read_queue(TW_Device_Extension *tw_dev, int request_id) { - char cdb[TW_MAX_CDB_LEN]; + unsigned char cdb[TW_MAX_CDB_LEN]; TW_SG_Entry sglist[1]; TW_Command_Full *full_command_packet; int retval = 1; @@ -1798,7 +1800,9 @@ out: static DEF_SCSI_QCMD(twa_scsi_queue) /* This function hands scsi cdb's to the firmware */ -static int twa_scsiop_execute_scsi(TW_Device_Extension *tw_dev, int request_id, char *cdb, int use_sg, TW_SG_Entry *sglistarg) +static int twa_scsiop_execute_scsi(TW_Device_Extension *tw_dev, int request_id, + unsigned char *cdb, int use_sg, + TW_SG_Entry *sglistarg) { TW_Command_Full *full_command_packet; TW_Command_Apache *command_packet; diff --git a/drivers/scsi/3w-sas.c b/drivers/scsi/3w-sas.c index 266bdac75304..480cf82700e9 100644 --- a/drivers/scsi/3w-sas.c +++ b/drivers/scsi/3w-sas.c @@ -287,7 +287,9 @@ static int twl_post_command_packet(TW_Device_Extension *tw_dev, int request_id) } /* End twl_post_command_packet() */ /* This function hands scsi cdb's to the firmware */ -static int twl_scsiop_execute_scsi(TW_Device_Extension *tw_dev, int request_id, char *cdb, int use_sg, TW_SG_Entry_ISO *sglistarg) +static int twl_scsiop_execute_scsi(TW_Device_Extension *tw_dev, int request_id, + unsigned char *cdb, int use_sg, + TW_SG_Entry_ISO *sglistarg) { TW_Command_Full *full_command_packet; TW_Command_Apache *command_packet; @@ -372,7 +374,7 @@ out: /* This function will read the aen queue from the isr */ static int twl_aen_read_queue(TW_Device_Extension *tw_dev, int request_id) { - char cdb[TW_MAX_CDB_LEN]; + unsigned char cdb[TW_MAX_CDB_LEN]; TW_SG_Entry_ISO sglist[1]; TW_Command_Full *full_command_packet; int retval = 1; @@ -554,7 +556,7 @@ out: static int twl_aen_drain_queue(TW_Device_Extension *tw_dev, int no_check_reset) { int request_id = 0; - char cdb[TW_MAX_CDB_LEN]; + unsigned char cdb[TW_MAX_CDB_LEN]; TW_SG_Entry_ISO sglist[1]; int finished = 0, count = 0; TW_Command_Full *full_command_packet; diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 70988c381268..f07444d30b21 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -538,7 +538,7 @@ config SCSI_HPTIOP config SCSI_BUSLOGIC tristate "BusLogic SCSI support" - depends on (PCI || ISA || MCA) && SCSI && ISA_DMA_API && VIRT_TO_BUS + depends on (PCI || ISA) && SCSI && ISA_DMA_API && VIRT_TO_BUS ---help--- This is support for BusLogic MultiMaster and FlashPoint SCSI Host Adapters. Consult the SCSI-HOWTO, available from @@ -1175,12 +1175,12 @@ config SCSI_LPFC_DEBUG_FS config SCSI_SIM710 tristate "Simple 53c710 SCSI support (Compaq, NCR machines)" - depends on (EISA || MCA) && SCSI + depends on EISA && SCSI select SCSI_SPI_ATTRS ---help--- This driver is for NCR53c710 based SCSI host adapters. - It currently supports Compaq EISA cards and NCR MCA cards + It currently supports Compaq EISA cards. config SCSI_DC395x tristate "Tekram DC395(U/UW/F) and DC315(U) SCSI support" diff --git a/drivers/scsi/aha152x.c b/drivers/scsi/aha152x.c index 4d7b0e0adbf7..301b3cad15f8 100644 --- a/drivers/scsi/aha152x.c +++ b/drivers/scsi/aha152x.c @@ -269,7 +269,7 @@ static LIST_HEAD(aha152x_host_list); /* DEFINES */ /* For PCMCIA cards, always use AUTOCONF */ -#if defined(PCMCIA) || defined(MODULE) +#if defined(AHA152X_PCMCIA) || defined(MODULE) #if !defined(AUTOCONF) #define AUTOCONF #endif @@ -297,7 +297,7 @@ CMD_INC_RESID(struct scsi_cmnd *cmd, int inc) #define DELAY_DEFAULT 1000 -#if defined(PCMCIA) +#if defined(AHA152X_PCMCIA) #define IRQ_MIN 0 #define IRQ_MAX 16 #else @@ -328,7 +328,7 @@ MODULE_AUTHOR("Jürgen Fischer"); MODULE_DESCRIPTION(AHA152X_REVID); MODULE_LICENSE("GPL"); -#if !defined(PCMCIA) +#if !defined(AHA152X_PCMCIA) #if defined(MODULE) static int io[] = {0, 0}; module_param_hw_array(io, int, ioport, NULL, 0); @@ -391,7 +391,7 @@ static struct isapnp_device_id id_table[] = { MODULE_DEVICE_TABLE(isapnp, id_table); #endif /* ISAPNP */ -#endif /* !PCMCIA */ +#endif /* !AHA152X_PCMCIA */ static struct scsi_host_template aha152x_driver_template; @@ -863,7 +863,7 @@ void aha152x_release(struct Scsi_Host *shpnt) if (shpnt->irq) free_irq(shpnt->irq, shpnt); -#if !defined(PCMCIA) +#if !defined(AHA152X_PCMCIA) if (shpnt->io_port) release_region(shpnt->io_port, IO_RANGE); #endif @@ -2924,7 +2924,7 @@ static struct scsi_host_template aha152x_driver_template = { .slave_alloc = aha152x_adjust_queue, }; -#if !defined(PCMCIA) +#if !defined(AHA152X_PCMCIA) static int setup_count; static struct aha152x_setup setup[2]; @@ -3392,4 +3392,4 @@ static int __init aha152x_setup(char *str) __setup("aha152x=", aha152x_setup); #endif -#endif /* !PCMCIA */ +#endif /* !AHA152X_PCMCIA */ diff --git a/drivers/scsi/mvsas/mv_sas.c b/drivers/scsi/mvsas/mv_sas.c index 3df1428df317..311d23c727ce 100644 --- a/drivers/scsi/mvsas/mv_sas.c +++ b/drivers/scsi/mvsas/mv_sas.c @@ -790,12 +790,11 @@ static int mvs_task_prep(struct sas_task *task, struct mvs_info *mvi, int is_tmf slot->n_elem = n_elem; slot->slot_tag = tag; - slot->buf = dma_pool_alloc(mvi->dma_pool, GFP_ATOMIC, &slot->buf_dma); + slot->buf = dma_pool_zalloc(mvi->dma_pool, GFP_ATOMIC, &slot->buf_dma); if (!slot->buf) { rc = -ENOMEM; goto err_out_tag; } - memset(slot->buf, 0, MVS_SLOT_BUF_SZ); tei.task = task; tei.hdr = &mvi->slot[tag]; @@ -1906,8 +1905,7 @@ static void mvs_work_queue(struct work_struct *work) if (phy->phy_event & PHY_PLUG_OUT) { u32 tmp; - struct sas_identify_frame *id; - id = (struct sas_identify_frame *)phy->frame_rcvd; + tmp = MVS_CHIP_DISP->read_phy_ctl(mvi, phy_no); phy->phy_event &= ~PHY_PLUG_OUT; if (!(tmp & PHY_READY_MASK)) { diff --git a/drivers/scsi/pcmcia/aha152x_core.c b/drivers/scsi/pcmcia/aha152x_core.c index dba3716511c5..24b89228b241 100644 --- a/drivers/scsi/pcmcia/aha152x_core.c +++ b/drivers/scsi/pcmcia/aha152x_core.c @@ -1,3 +1,3 @@ -#define PCMCIA 1 +#define AHA152X_PCMCIA 1 #define AHA152X_STAT 1 #include "aha152x.c" diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index b28f159fdaee..0bb9ac6ece92 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -218,7 +218,7 @@ qla2x00_sysfs_write_nvram(struct file *filp, struct kobject *kobj, mutex_lock(&ha->optrom_mutex); if (qla2x00_chip_is_down(vha)) { - mutex_unlock(&vha->hw->optrom_mutex); + mutex_unlock(&ha->optrom_mutex); return -EAGAIN; } diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index c72d8012fe2a..6fe20c27acc1 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -425,7 +425,7 @@ void qla24xx_handle_adisc_event(scsi_qla_host_t *vha, struct event_arg *ea) __qla24xx_handle_gpdb_event(vha, ea); } -int qla_post_els_plogi_work(struct scsi_qla_host *vha, fc_port_t *fcport) +static int qla_post_els_plogi_work(struct scsi_qla_host *vha, fc_port_t *fcport) { struct qla_work_evt *e; @@ -680,7 +680,7 @@ static void qla24xx_handle_gnl_done_event(scsi_qla_host_t *vha, fcport); break; } - /* drop through */ + /* fall through */ default: if (fcport_is_smaller(fcport)) { /* local adapter is bigger */ @@ -1551,7 +1551,8 @@ void qla24xx_handle_relogin_event(scsi_qla_host_t *vha, } -void qla_handle_els_plogi_done(scsi_qla_host_t *vha, struct event_arg *ea) +static void qla_handle_els_plogi_done(scsi_qla_host_t *vha, + struct event_arg *ea) { ql_dbg(ql_dbg_disc, vha, 0x2118, "%s %d %8phC post PRLI\n", diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c index 86fb8b21aa71..032635321ad6 100644 --- a/drivers/scsi/qla2xxx/qla_iocb.c +++ b/drivers/scsi/qla2xxx/qla_iocb.c @@ -1195,8 +1195,8 @@ qla24xx_walk_and_build_prot_sglist(struct qla_hw_data *ha, srb_t *sp, * @sp: SRB command to process * @cmd_pkt: Command type 3 IOCB * @tot_dsds: Total number of segments to transfer - * @tot_prot_dsds: - * @fw_prot_opts: + * @tot_prot_dsds: Total number of segments with protection information + * @fw_prot_opts: Protection options to be passed to firmware */ inline int qla24xx_build_scsi_crc_2_iocbs(srb_t *sp, struct cmd_type_crc_2 *cmd_pkt, diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index d73b04e40590..30d3090842f8 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -25,7 +25,7 @@ static int qla2x00_error_entry(scsi_qla_host_t *, struct rsp_que *, /** * qla2100_intr_handler() - Process interrupts for the ISP2100 and ISP2200. - * @irq: + * @irq: interrupt number * @dev_id: SCSI driver HA context * * Called by system whenever the host adapter generates an interrupt. @@ -144,7 +144,7 @@ qla2x00_check_reg16_for_disconnect(scsi_qla_host_t *vha, uint16_t reg) /** * qla2300_intr_handler() - Process interrupts for the ISP23xx and ISP63xx. - * @irq: + * @irq: interrupt number * @dev_id: SCSI driver HA context * * Called by system whenever the host adapter generates an interrupt. @@ -3109,7 +3109,7 @@ done: /** * qla24xx_intr_handler() - Process interrupts for the ISP23xx and ISP24xx. - * @irq: + * @irq: interrupt number * @dev_id: SCSI driver HA context * * Called by system whenever the host adapter generates an interrupt. diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index 2f3e5075ae76..191b6b7c8747 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -3478,9 +3478,9 @@ qla8044_read_serdes_word(scsi_qla_host_t *vha, uint32_t addr, uint32_t *data) /** * qla2x00_set_serdes_params() - * @vha: HA context - * @sw_em_1g: - * @sw_em_2g: - * @sw_em_4g: + * @sw_em_1g: serial link options + * @sw_em_2g: serial link options + * @sw_em_4g: serial link options * * Returns */ diff --git a/drivers/scsi/qla2xxx/qla_mr.c b/drivers/scsi/qla2xxx/qla_mr.c index 521a51370554..60f964c53c01 100644 --- a/drivers/scsi/qla2xxx/qla_mr.c +++ b/drivers/scsi/qla2xxx/qla_mr.c @@ -2212,7 +2212,7 @@ qlafx00_ioctl_iosb_entry(scsi_qla_host_t *vha, struct req_que *req, struct bsg_job *bsg_job; struct fc_bsg_reply *bsg_reply; struct srb_iocb *iocb_job; - int res; + int res = 0; struct qla_mt_iocb_rsp_fx00 fstatus; uint8_t *fw_sts_ptr; @@ -2624,7 +2624,7 @@ qlafx00_status_cont_entry(struct rsp_que *rsp, sts_cont_entry_t *pkt) * qlafx00_multistatus_entry() - Process Multi response queue entries. * @vha: SCSI driver HA context * @rsp: response queue - * @pkt: + * @pkt: received packet */ static void qlafx00_multistatus_entry(struct scsi_qla_host *vha, @@ -2681,12 +2681,10 @@ qlafx00_multistatus_entry(struct scsi_qla_host *vha, * @vha: SCSI driver HA context * @rsp: response queue * @pkt: Entry pointer - * @estatus: - * @etype: */ static void qlafx00_error_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, - struct sts_entry_fx00 *pkt, uint8_t estatus, uint8_t etype) + struct sts_entry_fx00 *pkt) { srb_t *sp; struct qla_hw_data *ha = vha->hw; @@ -2695,9 +2693,6 @@ qlafx00_error_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, struct req_que *req = NULL; int res = DID_ERROR << 16; - ql_dbg(ql_dbg_async, vha, 0x507f, - "type of error status in response: 0x%x\n", estatus); - req = ha->req_q_map[que]; sp = qla2x00_get_sp_from_handle(vha, func, req, pkt); @@ -2745,9 +2740,11 @@ qlafx00_process_response_queue(struct scsi_qla_host *vha, if (pkt->entry_status != 0 && pkt->entry_type != IOCTL_IOSB_TYPE_FX00) { + ql_dbg(ql_dbg_async, vha, 0x507f, + "type of error status in response: 0x%x\n", + pkt->entry_status); qlafx00_error_entry(vha, rsp, - (struct sts_entry_fx00 *)pkt, pkt->entry_status, - pkt->entry_type); + (struct sts_entry_fx00 *)pkt); continue; } @@ -2867,7 +2864,7 @@ qlafx00_async_event(scsi_qla_host_t *vha) /** * qlafx00x_mbx_completion() - Process mailbox command completions. * @vha: SCSI driver HA context - * @mb0: + * @mb0: value to be written into mailbox register 0 */ static void qlafx00_mbx_completion(scsi_qla_host_t *vha, uint32_t mb0) @@ -2893,7 +2890,7 @@ qlafx00_mbx_completion(scsi_qla_host_t *vha, uint32_t mb0) /** * qlafx00_intr_handler() - Process interrupts for the ISPFX00. - * @irq: + * @irq: interrupt number * @dev_id: SCSI driver HA context * * Called by system whenever the host adapter generates an interrupt. diff --git a/drivers/scsi/qla2xxx/qla_nx.c b/drivers/scsi/qla2xxx/qla_nx.c index 121e18b3b9f8..f2f54806f4da 100644 --- a/drivers/scsi/qla2xxx/qla_nx.c +++ b/drivers/scsi/qla2xxx/qla_nx.c @@ -2010,7 +2010,7 @@ qla82xx_mbx_completion(scsi_qla_host_t *vha, uint16_t mb0) /** * qla82xx_intr_handler() - Process interrupts for the ISP23xx and ISP63xx. - * @irq: + * @irq: interrupt number * @dev_id: SCSI driver HA context * * Called by system whenever the host adapter generates an interrupt. diff --git a/drivers/scsi/qla2xxx/qla_nx2.c b/drivers/scsi/qla2xxx/qla_nx2.c index 3a2b0282df14..fe856b602e03 100644 --- a/drivers/scsi/qla2xxx/qla_nx2.c +++ b/drivers/scsi/qla2xxx/qla_nx2.c @@ -3878,7 +3878,7 @@ out: #define PF_BITS_MASK (0xF << 16) /** * qla8044_intr_handler() - Process interrupts for the ISP8044 - * @irq: + * @irq: interrupt number * @dev_id: SCSI driver HA context * * Called by system whenever the host adapter generates an interrupt. diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 8794e54f43a9..518f15141170 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -1749,7 +1749,7 @@ qla2x00_loop_reset(scsi_qla_host_t *vha) static void __qla2x00_abort_all_cmds(struct qla_qpair *qp, int res) { - int cnt, status; + int cnt; unsigned long flags; srb_t *sp; scsi_qla_host_t *vha = qp->vha; @@ -1799,8 +1799,8 @@ __qla2x00_abort_all_cmds(struct qla_qpair *qp, int res) if (!sp_get(sp)) { spin_unlock_irqrestore (qp->qp_lock_ptr, flags); - status = qla2xxx_eh_abort( - GET_CMD_SP(sp)); + qla2xxx_eh_abort( + GET_CMD_SP(sp)); spin_lock_irqsave (qp->qp_lock_ptr, flags); } diff --git a/drivers/scsi/qla2xxx/qla_sup.c b/drivers/scsi/qla2xxx/qla_sup.c index 4499c787165f..2a3055c799fb 100644 --- a/drivers/scsi/qla2xxx/qla_sup.c +++ b/drivers/scsi/qla2xxx/qla_sup.c @@ -2229,7 +2229,7 @@ qla2x00_erase_flash_sector(struct qla_hw_data *ha, uint32_t addr, /** * qla2x00_get_flash_manufacturer() - Read manufacturer ID from flash chip. - * @ha: + * @ha: host adapter * @man_id: Flash manufacturer ID * @flash_id: Flash ID */ diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 39828207bc1d..c4504740f0e2 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -4540,7 +4540,7 @@ static int qlt_issue_task_mgmt(struct fc_port *sess, u64 lun, case QLA_TGT_CLEAR_TS: case QLA_TGT_ABORT_TS: abort_cmds_for_lun(vha, lun, a->u.isp24.fcp_hdr.s_id); - /* drop through */ + /* fall through */ case QLA_TGT_CLEAR_ACA: h = qlt_find_qphint(vha, mcmd->unpacked_lun); mcmd->qpair = h->qpair; @@ -6598,9 +6598,9 @@ static void qlt_lport_dump(struct scsi_qla_host *vha, u64 wwpn, * qla_tgt_lport_register - register lport with external module * * @target_lport_ptr: pointer for tcm_qla2xxx specific lport data - * @phys_wwpn: - * @npiv_wwpn: - * @npiv_wwnn: + * @phys_wwpn: physical port WWPN + * @npiv_wwpn: NPIV WWPN + * @npiv_wwnn: NPIV WWNN * @callback: lport initialization callback for tcm_qla2xxx code */ int qlt_lport_register(void *target_lport_ptr, u64 phys_wwpn, diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index e46ca968009c..4f134b0c3e29 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -268,7 +268,7 @@ target_emulate_report_target_port_groups(struct se_cmd *cmd) } transport_kunmap_data_sg(cmd); - target_complete_cmd(cmd, GOOD); + target_complete_cmd_with_length(cmd, GOOD, rd_len + 4); return 0; } diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 4cf33e2cc705..e31e4fc31aa1 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -205,19 +205,19 @@ void transport_subsystem_check_init(void) if (sub_api_initialized) return; - ret = request_module("target_core_iblock"); + ret = IS_ENABLED(CONFIG_TCM_IBLOCK) && request_module("target_core_iblock"); if (ret != 0) pr_err("Unable to load target_core_iblock\n"); - ret = request_module("target_core_file"); + ret = IS_ENABLED(CONFIG_TCM_FILEIO) && request_module("target_core_file"); if (ret != 0) pr_err("Unable to load target_core_file\n"); - ret = request_module("target_core_pscsi"); + ret = IS_ENABLED(CONFIG_TCM_PSCSI) && request_module("target_core_pscsi"); if (ret != 0) pr_err("Unable to load target_core_pscsi\n"); - ret = request_module("target_core_user"); + ret = IS_ENABLED(CONFIG_TCM_USER2) && request_module("target_core_user"); if (ret != 0) pr_err("Unable to load target_core_user\n"); diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 9a69392f1fb3..d81c148682e7 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -350,7 +350,8 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) s->s_magic = BFS_MAGIC; - if (le32_to_cpu(bfs_sb->s_start) > le32_to_cpu(bfs_sb->s_end)) { + if (le32_to_cpu(bfs_sb->s_start) > le32_to_cpu(bfs_sb->s_end) || + le32_to_cpu(bfs_sb->s_start) < BFS_BSIZE) { printf("Superblock is corrupted\n"); goto out1; } @@ -359,9 +360,11 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) sizeof(struct bfs_inode) + BFS_ROOT_INO - 1; imap_len = (info->si_lasti / 8) + 1; - info->si_imap = kzalloc(imap_len, GFP_KERNEL); - if (!info->si_imap) + info->si_imap = kzalloc(imap_len, GFP_KERNEL | __GFP_NOWARN); + if (!info->si_imap) { + printf("Cannot allocate %u bytes\n", imap_len); goto out1; + } for (i = 0; i < BFS_ROOT_INO; i++) set_bit(i, info->si_imap); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 68ca41dbbef3..80953528572d 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3201,9 +3201,6 @@ void btrfs_get_block_group_info(struct list_head *groups_list, struct btrfs_ioctl_space_info *space); void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info, struct btrfs_ioctl_balance_args *bargs); -int btrfs_dedupe_file_range(struct file *src_file, loff_t src_loff, - struct file *dst_file, loff_t dst_loff, - u64 olen); /* file.c */ int __init btrfs_auto_defrag_init(void); @@ -3233,8 +3230,9 @@ int btrfs_dirty_pages(struct inode *inode, struct page **pages, size_t num_pages, loff_t pos, size_t write_bytes, struct extent_state **cached); int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end); -int btrfs_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, u64 len); +loff_t btrfs_remap_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t len, unsigned int remap_flags); /* tree-defrag.c */ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 97c7a086f7bd..a3c22e16509b 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -3298,8 +3298,7 @@ const struct file_operations btrfs_file_operations = { #ifdef CONFIG_COMPAT .compat_ioctl = btrfs_compat_ioctl, #endif - .clone_file_range = btrfs_clone_file_range, - .dedupe_file_range = btrfs_dedupe_file_range, + .remap_file_range = btrfs_remap_file_range, }; void __cold btrfs_auto_defrag_exit(void) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a990a9045139..3ca6943827ef 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3629,26 +3629,6 @@ out_unlock: return ret; } -int btrfs_dedupe_file_range(struct file *src_file, loff_t src_loff, - struct file *dst_file, loff_t dst_loff, - u64 olen) -{ - struct inode *src = file_inode(src_file); - struct inode *dst = file_inode(dst_file); - u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize; - - if (WARN_ON_ONCE(bs < PAGE_SIZE)) { - /* - * Btrfs does not support blocksize < page_size. As a - * result, btrfs_cmp_data() won't correctly handle - * this situation without an update. - */ - return -EINVAL; - } - - return btrfs_extent_same(src, src_loff, olen, dst, dst_loff); -} - static int clone_finish_inode_update(struct btrfs_trans_handle *trans, struct inode *inode, u64 endoff, @@ -4350,10 +4330,34 @@ out_unlock: return ret; } -int btrfs_clone_file_range(struct file *src_file, loff_t off, - struct file *dst_file, loff_t destoff, u64 len) +loff_t btrfs_remap_file_range(struct file *src_file, loff_t off, + struct file *dst_file, loff_t destoff, loff_t len, + unsigned int remap_flags) { - return btrfs_clone_files(dst_file, src_file, off, len, destoff); + int ret; + + if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) + return -EINVAL; + + if (remap_flags & REMAP_FILE_DEDUP) { + struct inode *src = file_inode(src_file); + struct inode *dst = file_inode(dst_file); + u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize; + + if (WARN_ON_ONCE(bs < PAGE_SIZE)) { + /* + * Btrfs does not support blocksize < page_size. As a + * result, btrfs_cmp_data() won't correctly handle + * this situation without an update. + */ + return -EINVAL; + } + + ret = btrfs_extent_same(src, off, len, dst, destoff); + } else { + ret = btrfs_clone_files(dst_file, src_file, off, len, destoff); + } + return ret < 0 ? ret : len; } static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) diff --git a/fs/buffer.c b/fs/buffer.c index d60d61e8ed7d..1286c2b95498 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -3060,6 +3060,11 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, */ bio = bio_alloc(GFP_NOIO, 1); + if (wbc) { + wbc_init_bio(wbc, bio); + wbc_account_io(wbc, bh->b_page, bh->b_size); + } + bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); bio_set_dev(bio, bh->b_bdev); bio->bi_write_hint = write_hint; @@ -3079,11 +3084,6 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, op_flags |= REQ_PRIO; bio_set_op_attrs(bio, op, op_flags); - if (wbc) { - wbc_init_bio(wbc, bio); - wbc_account_io(wbc, bh->b_page, bh->b_size); - } - submit_bio(bio); return 0; } diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 3e812428ac8d..ba178b09de0b 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -145,6 +145,58 @@ cifs_dump_iface(struct seq_file *m, struct cifs_server_iface *iface) seq_printf(m, "\t\tIPv6: %pI6\n", &ipv6->sin6_addr); } +static int cifs_debug_files_proc_show(struct seq_file *m, void *v) +{ + struct list_head *stmp, *tmp, *tmp1, *tmp2; + struct TCP_Server_Info *server; + struct cifs_ses *ses; + struct cifs_tcon *tcon; + struct cifsFileInfo *cfile; + + seq_puts(m, "# Version:1\n"); + seq_puts(m, "# Format:\n"); + seq_puts(m, "# <tree id> <persistent fid> <flags> <count> <pid> <uid>"); +#ifdef CONFIG_CIFS_DEBUG2 + seq_printf(m, " <filename> <mid>\n"); +#else + seq_printf(m, " <filename>\n"); +#endif /* CIFS_DEBUG2 */ + spin_lock(&cifs_tcp_ses_lock); + list_for_each(stmp, &cifs_tcp_ses_list) { + server = list_entry(stmp, struct TCP_Server_Info, + tcp_ses_list); + list_for_each(tmp, &server->smb_ses_list) { + ses = list_entry(tmp, struct cifs_ses, smb_ses_list); + list_for_each(tmp1, &ses->tcon_list) { + tcon = list_entry(tmp1, struct cifs_tcon, tcon_list); + spin_lock(&tcon->open_file_lock); + list_for_each(tmp2, &tcon->openFileList) { + cfile = list_entry(tmp2, struct cifsFileInfo, + tlist); + seq_printf(m, + "0x%x 0x%llx 0x%x %d %d %d %s", + tcon->tid, + cfile->fid.persistent_fid, + cfile->f_flags, + cfile->count, + cfile->pid, + from_kuid(&init_user_ns, cfile->uid), + cfile->dentry->d_name.name); +#ifdef CONFIG_CIFS_DEBUG2 + seq_printf(m, " 0x%llx\n", cfile->fid.mid); +#else + seq_printf(m, "\n"); +#endif /* CIFS_DEBUG2 */ + } + spin_unlock(&tcon->open_file_lock); + } + } + } + spin_unlock(&cifs_tcp_ses_lock); + seq_putc(m, '\n'); + return 0; +} + static int cifs_debug_data_proc_show(struct seq_file *m, void *v) { struct list_head *tmp1, *tmp2, *tmp3; @@ -565,6 +617,9 @@ cifs_proc_init(void) proc_create_single("DebugData", 0, proc_fs_cifs, cifs_debug_data_proc_show); + proc_create_single("open_files", 0400, proc_fs_cifs, + cifs_debug_files_proc_show); + proc_create("Stats", 0644, proc_fs_cifs, &cifs_stats_proc_fops); proc_create("cifsFYI", 0644, proc_fs_cifs, &cifsFYI_proc_fops); proc_create("traceSMB", 0644, proc_fs_cifs, &traceSMB_proc_fops); @@ -601,6 +656,7 @@ cifs_proc_clean(void) return; remove_proc_entry("DebugData", proc_fs_cifs); + remove_proc_entry("open_files", proc_fs_cifs); remove_proc_entry("cifsFYI", proc_fs_cifs); remove_proc_entry("traceSMB", proc_fs_cifs); remove_proc_entry("Stats", proc_fs_cifs); diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c index b611fc2e8984..7f01c6e60791 100644 --- a/fs/cifs/cifs_spnego.c +++ b/fs/cifs/cifs_spnego.c @@ -147,8 +147,10 @@ cifs_get_spnego_key(struct cifs_ses *sesInfo) sprintf(dp, ";sec=krb5"); else if (server->sec_mskerberos) sprintf(dp, ";sec=mskrb5"); - else - goto out; + else { + cifs_dbg(VFS, "unknown or missing server auth type, use krb5\n"); + sprintf(dp, ";sec=krb5"); + } dp = description + strlen(description); sprintf(dp, ";uid=0x%x", diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 7de9603c54f1..865706edb307 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -992,17 +992,21 @@ const struct inode_operations cifs_symlink_inode_ops = { .listxattr = cifs_listxattr, }; -static int cifs_clone_file_range(struct file *src_file, loff_t off, - struct file *dst_file, loff_t destoff, u64 len) +static loff_t cifs_remap_file_range(struct file *src_file, loff_t off, + struct file *dst_file, loff_t destoff, loff_t len, + unsigned int remap_flags) { struct inode *src_inode = file_inode(src_file); struct inode *target_inode = file_inode(dst_file); struct cifsFileInfo *smb_file_src = src_file->private_data; - struct cifsFileInfo *smb_file_target = dst_file->private_data; - struct cifs_tcon *target_tcon = tlink_tcon(smb_file_target->tlink); + struct cifsFileInfo *smb_file_target; + struct cifs_tcon *target_tcon; unsigned int xid; int rc; + if (remap_flags & ~REMAP_FILE_ADVISORY) + return -EINVAL; + cifs_dbg(FYI, "clone range\n"); xid = get_xid(); @@ -1013,6 +1017,9 @@ static int cifs_clone_file_range(struct file *src_file, loff_t off, goto out; } + smb_file_target = dst_file->private_data; + target_tcon = tlink_tcon(smb_file_target->tlink); + /* * Note: cifs case is easier than btrfs since server responsible for * checks for proper open modes and file type and if it wants @@ -1042,7 +1049,7 @@ static int cifs_clone_file_range(struct file *src_file, loff_t off, unlock_two_nondirectories(src_inode, target_inode); out: free_xid(xid); - return rc; + return rc < 0 ? rc : len; } ssize_t cifs_file_copychunk_range(unsigned int xid, @@ -1151,7 +1158,7 @@ const struct file_operations cifs_file_ops = { .llseek = cifs_llseek, .unlocked_ioctl = cifs_ioctl, .copy_file_range = cifs_copy_file_range, - .clone_file_range = cifs_clone_file_range, + .remap_file_range = cifs_remap_file_range, .setlease = cifs_setlease, .fallocate = cifs_fallocate, }; @@ -1170,15 +1177,14 @@ const struct file_operations cifs_file_strict_ops = { .llseek = cifs_llseek, .unlocked_ioctl = cifs_ioctl, .copy_file_range = cifs_copy_file_range, - .clone_file_range = cifs_clone_file_range, + .remap_file_range = cifs_remap_file_range, .setlease = cifs_setlease, .fallocate = cifs_fallocate, }; const struct file_operations cifs_file_direct_ops = { - /* BB reevaluate whether they can be done with directio, no cache */ - .read_iter = cifs_user_readv, - .write_iter = cifs_user_writev, + .read_iter = cifs_direct_readv, + .write_iter = cifs_direct_writev, .open = cifs_open, .release = cifs_close, .lock = cifs_lock, @@ -1189,7 +1195,7 @@ const struct file_operations cifs_file_direct_ops = { .splice_write = iter_file_splice_write, .unlocked_ioctl = cifs_ioctl, .copy_file_range = cifs_copy_file_range, - .clone_file_range = cifs_clone_file_range, + .remap_file_range = cifs_remap_file_range, .llseek = cifs_llseek, .setlease = cifs_setlease, .fallocate = cifs_fallocate, @@ -1208,7 +1214,7 @@ const struct file_operations cifs_file_nobrl_ops = { .llseek = cifs_llseek, .unlocked_ioctl = cifs_ioctl, .copy_file_range = cifs_copy_file_range, - .clone_file_range = cifs_clone_file_range, + .remap_file_range = cifs_remap_file_range, .setlease = cifs_setlease, .fallocate = cifs_fallocate, }; @@ -1226,15 +1232,14 @@ const struct file_operations cifs_file_strict_nobrl_ops = { .llseek = cifs_llseek, .unlocked_ioctl = cifs_ioctl, .copy_file_range = cifs_copy_file_range, - .clone_file_range = cifs_clone_file_range, + .remap_file_range = cifs_remap_file_range, .setlease = cifs_setlease, .fallocate = cifs_fallocate, }; const struct file_operations cifs_file_direct_nobrl_ops = { - /* BB reevaluate whether they can be done with directio, no cache */ - .read_iter = cifs_user_readv, - .write_iter = cifs_user_writev, + .read_iter = cifs_direct_readv, + .write_iter = cifs_direct_writev, .open = cifs_open, .release = cifs_close, .fsync = cifs_fsync, @@ -1244,7 +1249,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = { .splice_write = iter_file_splice_write, .unlocked_ioctl = cifs_ioctl, .copy_file_range = cifs_copy_file_range, - .clone_file_range = cifs_clone_file_range, + .remap_file_range = cifs_remap_file_range, .llseek = cifs_llseek, .setlease = cifs_setlease, .fallocate = cifs_fallocate, @@ -1256,7 +1261,7 @@ const struct file_operations cifs_dir_ops = { .read = generic_read_dir, .unlocked_ioctl = cifs_ioctl, .copy_file_range = cifs_copy_file_range, - .clone_file_range = cifs_clone_file_range, + .remap_file_range = cifs_remap_file_range, .llseek = generic_file_llseek, .fsync = cifs_dir_fsync, }; diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 24e265a51874..4c3b5cfccc49 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -101,8 +101,10 @@ extern int cifs_open(struct inode *inode, struct file *file); extern int cifs_close(struct inode *inode, struct file *file); extern int cifs_closedir(struct inode *inode, struct file *file); extern ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to); +extern ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to); extern ssize_t cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to); extern ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from); +extern ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from); extern ssize_t cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from); extern int cifs_lock(struct file *, int, struct file_lock *); extern int cifs_fsync(struct file *, loff_t, loff_t, int); diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index ed1e0fcb69e3..38ab0fca49e1 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -1125,6 +1125,9 @@ struct cifs_fid { __u8 create_guid[16]; struct cifs_pending_open *pending_open; unsigned int epoch; +#ifdef CONFIG_CIFS_DEBUG2 + __u64 mid; +#endif /* CIFS_DEBUG2 */ bool purge_cache; }; @@ -1183,6 +1186,11 @@ struct cifs_aio_ctx { unsigned int len; unsigned int total_len; bool should_dirty; + /* + * Indicates if this aio_ctx is for direct_io, + * If yes, iter is a copy of the user passed iov_iter + */ + bool direct_io; }; struct cifs_readdata; diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index 1ce733f3582f..79d842e7240c 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -1539,6 +1539,9 @@ struct reparse_symlink_data { char PathBuffer[0]; } __attribute__((packed)); +/* Flag above */ +#define SYMLINK_FLAG_RELATIVE 0x00000001 + /* For IO_REPARSE_TAG_NFS */ #define NFS_SPECFILE_LNK 0x00000000014B4E4C #define NFS_SPECFILE_CHR 0x0000000000524843 diff --git a/fs/cifs/file.c b/fs/cifs/file.c index e262a05a98bf..74c33d5fafc8 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1005,7 +1005,7 @@ cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock) * Set the byte-range lock (mandatory style). Returns: * 1) 0, if we set the lock and don't need to request to the server; * 2) 1, if no locks prevent us but we need to request to the server; - * 3) -EACCESS, if there is a lock that prevents us and wait is false. + * 3) -EACCES, if there is a lock that prevents us and wait is false. */ static int cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock, @@ -2538,6 +2538,61 @@ wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from, } static int +cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list, + struct cifs_aio_ctx *ctx) +{ + int wait_retry = 0; + unsigned int wsize, credits; + int rc; + struct TCP_Server_Info *server = + tlink_tcon(wdata->cfile->tlink)->ses->server; + + /* + * Try to resend this wdata, waiting for credits up to 3 seconds. + * Note: we are attempting to resend the whole wdata not in segments + */ + do { + rc = server->ops->wait_mtu_credits( + server, wdata->bytes, &wsize, &credits); + + if (rc) + break; + + if (wsize < wdata->bytes) { + add_credits_and_wake_if(server, credits, 0); + msleep(1000); + wait_retry++; + } + } while (wsize < wdata->bytes && wait_retry < 3); + + if (wsize < wdata->bytes) { + rc = -EBUSY; + goto out; + } + + rc = -EAGAIN; + while (rc == -EAGAIN) { + rc = 0; + if (wdata->cfile->invalidHandle) + rc = cifs_reopen_file(wdata->cfile, false); + if (!rc) + rc = server->ops->async_writev(wdata, + cifs_uncached_writedata_release); + } + + if (!rc) { + list_add_tail(&wdata->list, wdata_list); + return 0; + } + + add_credits_and_wake_if(server, wdata->credits, 0); +out: + kref_put(&wdata->refcount, cifs_uncached_writedata_release); + + return rc; +} + +static int cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from, struct cifsFileInfo *open_file, struct cifs_sb_info *cifs_sb, struct list_head *wdata_list, @@ -2551,6 +2606,8 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from, loff_t saved_offset = offset; pid_t pid; struct TCP_Server_Info *server; + struct page **pagevec; + size_t start; if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) pid = open_file->pid; @@ -2567,38 +2624,79 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from, if (rc) break; - nr_pages = get_numpages(wsize, len, &cur_len); - wdata = cifs_writedata_alloc(nr_pages, + if (ctx->direct_io) { + ssize_t result; + + result = iov_iter_get_pages_alloc( + from, &pagevec, wsize, &start); + if (result < 0) { + cifs_dbg(VFS, + "direct_writev couldn't get user pages " + "(rc=%zd) iter type %d iov_offset %zd " + "count %zd\n", + result, from->type, + from->iov_offset, from->count); + dump_stack(); + break; + } + cur_len = (size_t)result; + iov_iter_advance(from, cur_len); + + nr_pages = + (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE; + + wdata = cifs_writedata_direct_alloc(pagevec, cifs_uncached_writev_complete); - if (!wdata) { - rc = -ENOMEM; - add_credits_and_wake_if(server, credits, 0); - break; - } + if (!wdata) { + rc = -ENOMEM; + add_credits_and_wake_if(server, credits, 0); + break; + } - rc = cifs_write_allocate_pages(wdata->pages, nr_pages); - if (rc) { - kfree(wdata); - add_credits_and_wake_if(server, credits, 0); - break; - } - num_pages = nr_pages; - rc = wdata_fill_from_iovec(wdata, from, &cur_len, &num_pages); - if (rc) { - for (i = 0; i < nr_pages; i++) - put_page(wdata->pages[i]); - kfree(wdata); - add_credits_and_wake_if(server, credits, 0); - break; - } + wdata->page_offset = start; + wdata->tailsz = + nr_pages > 1 ? + cur_len - (PAGE_SIZE - start) - + (nr_pages - 2) * PAGE_SIZE : + cur_len; + } else { + nr_pages = get_numpages(wsize, len, &cur_len); + wdata = cifs_writedata_alloc(nr_pages, + cifs_uncached_writev_complete); + if (!wdata) { + rc = -ENOMEM; + add_credits_and_wake_if(server, credits, 0); + break; + } - /* - * Bring nr_pages down to the number of pages we actually used, - * and free any pages that we didn't use. - */ - for ( ; nr_pages > num_pages; nr_pages--) - put_page(wdata->pages[nr_pages - 1]); + rc = cifs_write_allocate_pages(wdata->pages, nr_pages); + if (rc) { + kfree(wdata); + add_credits_and_wake_if(server, credits, 0); + break; + } + + num_pages = nr_pages; + rc = wdata_fill_from_iovec( + wdata, from, &cur_len, &num_pages); + if (rc) { + for (i = 0; i < nr_pages; i++) + put_page(wdata->pages[i]); + kfree(wdata); + add_credits_and_wake_if(server, credits, 0); + break; + } + + /* + * Bring nr_pages down to the number of pages we + * actually used, and free any pages that we didn't use. + */ + for ( ; nr_pages > num_pages; nr_pages--) + put_page(wdata->pages[nr_pages - 1]); + + wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE); + } wdata->sync_mode = WB_SYNC_ALL; wdata->nr_pages = nr_pages; @@ -2607,7 +2705,6 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from, wdata->pid = pid; wdata->bytes = cur_len; wdata->pagesz = PAGE_SIZE; - wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE); wdata->credits = credits; wdata->ctx = ctx; kref_get(&ctx->refcount); @@ -2682,13 +2779,18 @@ restart_loop: INIT_LIST_HEAD(&tmp_list); list_del_init(&wdata->list); - iov_iter_advance(&tmp_from, + if (ctx->direct_io) + rc = cifs_resend_wdata( + wdata, &tmp_list, ctx); + else { + iov_iter_advance(&tmp_from, wdata->offset - ctx->pos); - rc = cifs_write_from_iter(wdata->offset, + rc = cifs_write_from_iter(wdata->offset, wdata->bytes, &tmp_from, ctx->cfile, cifs_sb, &tmp_list, ctx); + } list_splice(&tmp_list, &ctx->list); @@ -2701,8 +2803,9 @@ restart_loop: kref_put(&wdata->refcount, cifs_uncached_writedata_release); } - for (i = 0; i < ctx->npages; i++) - put_page(ctx->bv[i].bv_page); + if (!ctx->direct_io) + for (i = 0; i < ctx->npages; i++) + put_page(ctx->bv[i].bv_page); cifs_stats_bytes_written(tcon, ctx->total_len); set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags); @@ -2717,7 +2820,8 @@ restart_loop: complete(&ctx->done); } -ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from) +static ssize_t __cifs_writev( + struct kiocb *iocb, struct iov_iter *from, bool direct) { struct file *file = iocb->ki_filp; ssize_t total_written = 0; @@ -2726,13 +2830,18 @@ ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from) struct cifs_sb_info *cifs_sb; struct cifs_aio_ctx *ctx; struct iov_iter saved_from = *from; + size_t len = iov_iter_count(from); int rc; /* - * BB - optimize the way when signing is disabled. We can drop this - * extra memory-to-memory copying and use iovec buffers for constructing - * write request. + * iov_iter_get_pages_alloc doesn't work with ITER_KVEC. + * In this case, fall back to non-direct write function. + * this could be improved by getting pages directly in ITER_KVEC */ + if (direct && from->type & ITER_KVEC) { + cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n"); + direct = false; + } rc = generic_write_checks(iocb, from); if (rc <= 0) @@ -2756,10 +2865,16 @@ ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from) ctx->pos = iocb->ki_pos; - rc = setup_aio_ctx_iter(ctx, from, WRITE); - if (rc) { - kref_put(&ctx->refcount, cifs_aio_ctx_release); - return rc; + if (direct) { + ctx->direct_io = true; + ctx->iter = *from; + ctx->len = len; + } else { + rc = setup_aio_ctx_iter(ctx, from, WRITE); + if (rc) { + kref_put(&ctx->refcount, cifs_aio_ctx_release); + return rc; + } } /* grab a lock here due to read response handlers can access ctx */ @@ -2809,6 +2924,16 @@ ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from) return total_written; } +ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from) +{ + return __cifs_writev(iocb, from, true); +} + +ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from) +{ + return __cifs_writev(iocb, from, false); +} + static ssize_t cifs_writev(struct kiocb *iocb, struct iov_iter *from) { @@ -2979,7 +3104,6 @@ cifs_uncached_readdata_release(struct kref *refcount) kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release); for (i = 0; i < rdata->nr_pages; i++) { put_page(rdata->pages[i]); - rdata->pages[i] = NULL; } cifs_readdata_release(refcount); } @@ -3106,6 +3230,67 @@ cifs_uncached_copy_into_pages(struct TCP_Server_Info *server, return uncached_fill_pages(server, rdata, iter, iter->count); } +static int cifs_resend_rdata(struct cifs_readdata *rdata, + struct list_head *rdata_list, + struct cifs_aio_ctx *ctx) +{ + int wait_retry = 0; + unsigned int rsize, credits; + int rc; + struct TCP_Server_Info *server = + tlink_tcon(rdata->cfile->tlink)->ses->server; + + /* + * Try to resend this rdata, waiting for credits up to 3 seconds. + * Note: we are attempting to resend the whole rdata not in segments + */ + do { + rc = server->ops->wait_mtu_credits(server, rdata->bytes, + &rsize, &credits); + + if (rc) + break; + + if (rsize < rdata->bytes) { + add_credits_and_wake_if(server, credits, 0); + msleep(1000); + wait_retry++; + } + } while (rsize < rdata->bytes && wait_retry < 3); + + /* + * If we can't find enough credits to send this rdata + * release the rdata and return failure, this will pass + * whatever I/O amount we have finished to VFS. + */ + if (rsize < rdata->bytes) { + rc = -EBUSY; + goto out; + } + + rc = -EAGAIN; + while (rc == -EAGAIN) { + rc = 0; + if (rdata->cfile->invalidHandle) + rc = cifs_reopen_file(rdata->cfile, true); + if (!rc) + rc = server->ops->async_readv(rdata); + } + + if (!rc) { + /* Add to aio pending list */ + list_add_tail(&rdata->list, rdata_list); + return 0; + } + + add_credits_and_wake_if(server, rdata->credits, 0); +out: + kref_put(&rdata->refcount, + cifs_uncached_readdata_release); + + return rc; +} + static int cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file, struct cifs_sb_info *cifs_sb, struct list_head *rdata_list, @@ -3117,6 +3302,9 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file, int rc; pid_t pid; struct TCP_Server_Info *server; + struct page **pagevec; + size_t start; + struct iov_iter direct_iov = ctx->iter; server = tlink_tcon(open_file->tlink)->ses->server; @@ -3125,6 +3313,9 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file, else pid = current->tgid; + if (ctx->direct_io) + iov_iter_advance(&direct_iov, offset - ctx->pos); + do { rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize, &rsize, &credits); @@ -3132,20 +3323,59 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file, break; cur_len = min_t(const size_t, len, rsize); - npages = DIV_ROUND_UP(cur_len, PAGE_SIZE); - /* allocate a readdata struct */ - rdata = cifs_readdata_alloc(npages, + if (ctx->direct_io) { + ssize_t result; + + result = iov_iter_get_pages_alloc( + &direct_iov, &pagevec, + cur_len, &start); + if (result < 0) { + cifs_dbg(VFS, + "couldn't get user pages (cur_len=%zd)" + " iter type %d" + " iov_offset %zd count %zd\n", + result, direct_iov.type, + direct_iov.iov_offset, + direct_iov.count); + dump_stack(); + break; + } + cur_len = (size_t)result; + iov_iter_advance(&direct_iov, cur_len); + + rdata = cifs_readdata_direct_alloc( + pagevec, cifs_uncached_readv_complete); + if (!rdata) { + add_credits_and_wake_if(server, credits, 0); + rc = -ENOMEM; + break; + } + + npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE; + rdata->page_offset = start; + rdata->tailsz = npages > 1 ? + cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE : + cur_len; + + } else { + + npages = DIV_ROUND_UP(cur_len, PAGE_SIZE); + /* allocate a readdata struct */ + rdata = cifs_readdata_alloc(npages, cifs_uncached_readv_complete); - if (!rdata) { - add_credits_and_wake_if(server, credits, 0); - rc = -ENOMEM; - break; - } + if (!rdata) { + add_credits_and_wake_if(server, credits, 0); + rc = -ENOMEM; + break; + } - rc = cifs_read_allocate_pages(rdata, npages); - if (rc) - goto error; + rc = cifs_read_allocate_pages(rdata, npages); + if (rc) + goto error; + + rdata->tailsz = PAGE_SIZE; + } rdata->cfile = cifsFileInfo_get(open_file); rdata->nr_pages = npages; @@ -3153,7 +3383,6 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file, rdata->bytes = cur_len; rdata->pid = pid; rdata->pagesz = PAGE_SIZE; - rdata->tailsz = PAGE_SIZE; rdata->read_into_pages = cifs_uncached_read_into_pages; rdata->copy_into_pages = cifs_uncached_copy_into_pages; rdata->credits = credits; @@ -3167,9 +3396,11 @@ error: if (rc) { add_credits_and_wake_if(server, rdata->credits, 0); kref_put(&rdata->refcount, - cifs_uncached_readdata_release); - if (rc == -EAGAIN) + cifs_uncached_readdata_release); + if (rc == -EAGAIN) { + iov_iter_revert(&direct_iov, cur_len); continue; + } break; } @@ -3225,45 +3456,62 @@ again: * reading. */ if (got_bytes && got_bytes < rdata->bytes) { - rc = cifs_readdata_to_iov(rdata, to); + rc = 0; + if (!ctx->direct_io) + rc = cifs_readdata_to_iov(rdata, to); if (rc) { kref_put(&rdata->refcount, - cifs_uncached_readdata_release); + cifs_uncached_readdata_release); continue; } } - rc = cifs_send_async_read( + if (ctx->direct_io) { + /* + * Re-use rdata as this is a + * direct I/O + */ + rc = cifs_resend_rdata( + rdata, + &tmp_list, ctx); + } else { + rc = cifs_send_async_read( rdata->offset + got_bytes, rdata->bytes - got_bytes, rdata->cfile, cifs_sb, &tmp_list, ctx); + kref_put(&rdata->refcount, + cifs_uncached_readdata_release); + } + list_splice(&tmp_list, &ctx->list); - kref_put(&rdata->refcount, - cifs_uncached_readdata_release); goto again; } else if (rdata->result) rc = rdata->result; - else + else if (!ctx->direct_io) rc = cifs_readdata_to_iov(rdata, to); /* if there was a short read -- discard anything left */ if (rdata->got_bytes && rdata->got_bytes < rdata->bytes) rc = -ENODATA; + + ctx->total_len += rdata->got_bytes; } list_del_init(&rdata->list); kref_put(&rdata->refcount, cifs_uncached_readdata_release); } - for (i = 0; i < ctx->npages; i++) { - if (ctx->should_dirty) - set_page_dirty(ctx->bv[i].bv_page); - put_page(ctx->bv[i].bv_page); - } + if (!ctx->direct_io) { + for (i = 0; i < ctx->npages; i++) { + if (ctx->should_dirty) + set_page_dirty(ctx->bv[i].bv_page); + put_page(ctx->bv[i].bv_page); + } - ctx->total_len = ctx->len - iov_iter_count(to); + ctx->total_len = ctx->len - iov_iter_count(to); + } cifs_stats_bytes_read(tcon, ctx->total_len); @@ -3281,18 +3529,28 @@ again: complete(&ctx->done); } -ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to) +static ssize_t __cifs_readv( + struct kiocb *iocb, struct iov_iter *to, bool direct) { - struct file *file = iocb->ki_filp; - ssize_t rc; size_t len; - ssize_t total_read = 0; - loff_t offset = iocb->ki_pos; + struct file *file = iocb->ki_filp; struct cifs_sb_info *cifs_sb; - struct cifs_tcon *tcon; struct cifsFileInfo *cfile; + struct cifs_tcon *tcon; + ssize_t rc, total_read = 0; + loff_t offset = iocb->ki_pos; struct cifs_aio_ctx *ctx; + /* + * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC, + * fall back to data copy read path + * this could be improved by getting pages directly in ITER_KVEC + */ + if (direct && to->type & ITER_KVEC) { + cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n"); + direct = false; + } + len = iov_iter_count(to); if (!len) return 0; @@ -3319,14 +3577,20 @@ ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to) if (iter_is_iovec(to)) ctx->should_dirty = true; - rc = setup_aio_ctx_iter(ctx, to, READ); - if (rc) { - kref_put(&ctx->refcount, cifs_aio_ctx_release); - return rc; + if (direct) { + ctx->pos = offset; + ctx->direct_io = true; + ctx->iter = *to; + ctx->len = len; + } else { + rc = setup_aio_ctx_iter(ctx, to, READ); + if (rc) { + kref_put(&ctx->refcount, cifs_aio_ctx_release); + return rc; + } + len = ctx->len; } - len = ctx->len; - /* grab a lock here due to read response handlers can access ctx */ mutex_lock(&ctx->aio_mutex); @@ -3368,6 +3632,16 @@ ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to) return rc; } +ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to) +{ + return __cifs_readv(iocb, to, true); +} + +ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to) +{ + return __cifs_readv(iocb, to, false); +} + ssize_t cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to) { diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 1023d78673fb..a81a9df997c1 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1320,8 +1320,8 @@ cifs_drop_nlink(struct inode *inode) /* * If d_inode(dentry) is null (usually meaning the cached dentry * is a negative dentry) then we would attempt a standard SMB delete, but - * if that fails we can not attempt the fall back mechanisms on EACCESS - * but will return the EACCESS to the caller. Note that the VFS does not call + * if that fails we can not attempt the fall back mechanisms on EACCES + * but will return the EACCES to the caller. Note that the VFS does not call * unlink on negative dentries currently. */ int cifs_unlink(struct inode *dir, struct dentry *dentry) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 23c0a21a66f8..225fec1cfa67 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -747,6 +747,7 @@ move_smb2_ea_to_cifs(char *dst, size_t dst_size, int rc = 0; unsigned int ea_name_len = ea_name ? strlen(ea_name) : 0; char *name, *value; + size_t buf_size = dst_size; size_t name_len, value_len, user_name_len; while (src_size > 0) { @@ -782,9 +783,10 @@ move_smb2_ea_to_cifs(char *dst, size_t dst_size, /* 'user.' plus a terminating null */ user_name_len = 5 + 1 + name_len; - rc += user_name_len; - - if (dst_size >= user_name_len) { + if (buf_size == 0) { + /* skip copy - calc size only */ + rc += user_name_len; + } else if (dst_size >= user_name_len) { dst_size -= user_name_len; memcpy(dst, "user.", 5); dst += 5; @@ -792,8 +794,7 @@ move_smb2_ea_to_cifs(char *dst, size_t dst_size, dst += name_len; *dst = 0; ++dst; - } else if (dst_size == 0) { - /* skip copy - calc size only */ + rc += user_name_len; } else { /* stop before overrun buffer */ rc = -ERANGE; @@ -1078,6 +1079,9 @@ smb2_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock) cfile->fid.persistent_fid = fid->persistent_fid; cfile->fid.volatile_fid = fid->volatile_fid; +#ifdef CONFIG_CIFS_DEBUG2 + cfile->fid.mid = fid->mid; +#endif /* CIFS_DEBUG2 */ server->ops->set_oplock_level(cinode, oplock, fid->epoch, &fid->purge_cache); cinode->can_cache_brlcks = CIFS_CACHE_WRITE(cinode); diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 7d7b016fe8bb..27f86537a5d1 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1512,7 +1512,7 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, &rsp_iov); cifs_small_buf_release(req); rsp = (struct smb2_tree_connect_rsp *)rsp_iov.iov_base; - + trace_smb3_tcon(xid, tcon->tid, ses->Suid, tree, rc); if (rc != 0) { if (tcon) { cifs_stats_fail_inc(tcon, SMB2_TREE_CONNECT_HE); @@ -1559,6 +1559,7 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, if (tcon->ses->server->ops->validate_negotiate) rc = tcon->ses->server->ops->validate_negotiate(xid, tcon); tcon_exit: + free_rsp_buf(resp_buftype, rsp); kfree(unc_path); return rc; @@ -2308,6 +2309,9 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, atomic_inc(&tcon->num_remote_opens); oparms->fid->persistent_fid = rsp->PersistentFileId; oparms->fid->volatile_fid = rsp->VolatileFileId; +#ifdef CONFIG_CIFS_DEBUG2 + oparms->fid->mid = le64_to_cpu(rsp->sync_hdr.MessageId); +#endif /* CIFS_DEBUG2 */ if (buf) { memcpy(buf, &rsp->CreationTime, 32); diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index f753f424d7f1..5671d5ee7f58 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -842,6 +842,41 @@ struct fsctl_get_integrity_information_rsp { /* Integrity flags for above */ #define FSCTL_INTEGRITY_FLAG_CHECKSUM_ENFORCEMENT_OFF 0x00000001 +/* Reparse structures - see MS-FSCC 2.1.2 */ + +/* struct fsctl_reparse_info_req is empty, only response structs (see below) */ + +struct reparse_data_buffer { + __le32 ReparseTag; + __le16 ReparseDataLength; + __u16 Reserved; + __u8 DataBuffer[0]; /* Variable Length */ +} __packed; + +struct reparse_guid_data_buffer { + __le32 ReparseTag; + __le16 ReparseDataLength; + __u16 Reserved; + __u8 ReparseGuid[16]; + __u8 DataBuffer[0]; /* Variable Length */ +} __packed; + +struct reparse_mount_point_data_buffer { + __le32 ReparseTag; + __le16 ReparseDataLength; + __u16 Reserved; + __le16 SubstituteNameOffset; + __le16 SubstituteNameLength; + __le16 PrintNameOffset; + __le16 PrintNameLength; + __u8 PathBuffer[0]; /* Variable Length */ +} __packed; + +/* See MS-FSCC 2.1.2.4 and cifspdu.h for struct reparse_symlink_data */ + +/* See MS-FSCC 2.1.2.6 and cifspdu.h for struct reparse_posix_data */ + + /* See MS-DFSC 2.2.2 */ struct fsctl_get_dfs_referral_req { __le16 MaxReferralLevel; diff --git a/fs/cifs/trace.h b/fs/cifs/trace.h index cce8414fe7ec..fb049809555f 100644 --- a/fs/cifs/trace.h +++ b/fs/cifs/trace.h @@ -374,6 +374,48 @@ DEFINE_SMB3_ENTER_EXIT_EVENT(enter); DEFINE_SMB3_ENTER_EXIT_EVENT(exit_done); /* + * For SMB2/SMB3 tree connect + */ + +DECLARE_EVENT_CLASS(smb3_tcon_class, + TP_PROTO(unsigned int xid, + __u32 tid, + __u64 sesid, + const char *unc_name, + int rc), + TP_ARGS(xid, tid, sesid, unc_name, rc), + TP_STRUCT__entry( + __field(unsigned int, xid) + __field(__u32, tid) + __field(__u64, sesid) + __field(const char *, unc_name) + __field(int, rc) + ), + TP_fast_assign( + __entry->xid = xid; + __entry->tid = tid; + __entry->sesid = sesid; + __entry->unc_name = unc_name; + __entry->rc = rc; + ), + TP_printk("xid=%u sid=0x%llx tid=0x%x unc_name=%s rc=%d", + __entry->xid, __entry->sesid, __entry->tid, + __entry->unc_name, __entry->rc) +) + +#define DEFINE_SMB3_TCON_EVENT(name) \ +DEFINE_EVENT(smb3_tcon_class, smb3_##name, \ + TP_PROTO(unsigned int xid, \ + __u32 tid, \ + __u64 sesid, \ + const char *unc_name, \ + int rc), \ + TP_ARGS(xid, tid, sesid, unc_name, rc)) + +DEFINE_SMB3_TCON_EVENT(tcon); + + +/* * For smb2/smb3 open call */ DECLARE_EVENT_CLASS(smb3_open_err_class, diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 2aa62d58d8dd..db7590178dfc 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -374,13 +374,13 @@ static int io_submit_init_bio(struct ext4_io_submit *io, bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES); if (!bio) return -ENOMEM; + wbc_init_bio(io->io_wbc, bio); bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); bio_set_dev(bio, bh->b_bdev); bio->bi_end_io = ext4_end_bio; bio->bi_private = ext4_get_io_end(io->io_end); io->io_bio = bio; io->io_next_block = bh->b_blocknr; - wbc_init_bio(io->io_wbc, bio); return 0; } diff --git a/fs/ioctl.c b/fs/ioctl.c index 0400297c8d72..d64f622cac8b 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -223,6 +223,7 @@ static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd, u64 off, u64 olen, u64 destoff) { struct fd src_file = fdget(srcfd); + loff_t cloned; int ret; if (!src_file.file) @@ -230,7 +231,14 @@ static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd, ret = -EXDEV; if (src_file.file->f_path.mnt != dst_file->f_path.mnt) goto fdput; - ret = vfs_clone_file_range(src_file.file, off, dst_file, destoff, olen); + cloned = vfs_clone_file_range(src_file.file, off, dst_file, destoff, + olen, 0); + if (cloned < 0) + ret = cloned; + else if (olen && cloned != olen) + ret = -EINVAL; + else + ret = 0; fdput: fdput(src_file); return ret; diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 4288a6ecaf75..46d691ba04bc 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -180,8 +180,9 @@ static long nfs42_fallocate(struct file *filep, int mode, loff_t offset, loff_t return nfs42_proc_allocate(filep, offset, len); } -static int nfs42_clone_file_range(struct file *src_file, loff_t src_off, - struct file *dst_file, loff_t dst_off, u64 count) +static loff_t nfs42_remap_file_range(struct file *src_file, loff_t src_off, + struct file *dst_file, loff_t dst_off, loff_t count, + unsigned int remap_flags) { struct inode *dst_inode = file_inode(dst_file); struct nfs_server *server = NFS_SERVER(dst_inode); @@ -190,6 +191,9 @@ static int nfs42_clone_file_range(struct file *src_file, loff_t src_off, bool same_inode = false; int ret; + if (remap_flags & ~REMAP_FILE_ADVISORY) + return -EINVAL; + /* check alignment w.r.t. clone_blksize */ ret = -EINVAL; if (bs) { @@ -240,7 +244,7 @@ out_unlock: inode_unlock(src_inode); } out: - return ret; + return ret < 0 ? ret : count; } #endif /* CONFIG_NFS_V4_2 */ @@ -262,7 +266,7 @@ const struct file_operations nfs4_file_operations = { .copy_file_range = nfs4_copy_file_range, .llseek = nfs4_file_llseek, .fallocate = nfs42_fallocate, - .clone_file_range = nfs42_clone_file_range, + .remap_file_range = nfs42_remap_file_range, #else .llseek = nfs_file_llseek, #endif diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index fb28be653014..eb67098117b4 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -541,8 +541,12 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp, __be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst, u64 dst_pos, u64 count) { - return nfserrno(vfs_clone_file_range(src, src_pos, dst, dst_pos, - count)); + loff_t cloned; + + cloned = vfs_clone_file_range(src, src_pos, dst, dst_pos, count, 0); + if (count && cloned != count) + cloned = -EINVAL; + return nfserrno(cloned < 0 ? cloned : 0); } ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst, diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c index 1d098c3c00e0..4ebbd57cbf84 100644 --- a/fs/ocfs2/buffer_head_io.c +++ b/fs/ocfs2/buffer_head_io.c @@ -99,25 +99,34 @@ out: return ret; } +/* Caller must provide a bhs[] with all NULL or non-NULL entries, so it + * will be easier to handle read failure. + */ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, unsigned int nr, struct buffer_head *bhs[]) { int status = 0; unsigned int i; struct buffer_head *bh; + int new_bh = 0; trace_ocfs2_read_blocks_sync((unsigned long long)block, nr); if (!nr) goto bail; + /* Don't put buffer head and re-assign it to NULL if it is allocated + * outside since the caller can't be aware of this alternation! + */ + new_bh = (bhs[0] == NULL); + for (i = 0 ; i < nr ; i++) { if (bhs[i] == NULL) { bhs[i] = sb_getblk(osb->sb, block++); if (bhs[i] == NULL) { status = -ENOMEM; mlog_errno(status); - goto bail; + break; } } bh = bhs[i]; @@ -158,9 +167,26 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, submit_bh(REQ_OP_READ, 0, bh); } +read_failure: for (i = nr; i > 0; i--) { bh = bhs[i - 1]; + if (unlikely(status)) { + if (new_bh && bh) { + /* If middle bh fails, let previous bh + * finish its read and then put it to + * aovoid bh leak + */ + if (!buffer_jbd(bh)) + wait_on_buffer(bh); + put_bh(bh); + bhs[i - 1] = NULL; + } else if (bh && buffer_uptodate(bh)) { + clear_buffer_uptodate(bh); + } + continue; + } + /* No need to wait on the buffer if it's managed by JBD. */ if (!buffer_jbd(bh)) wait_on_buffer(bh); @@ -170,8 +196,7 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, * so we can safely record this and loop back * to cleanup the other buffers. */ status = -EIO; - put_bh(bh); - bhs[i - 1] = NULL; + goto read_failure; } } @@ -179,6 +204,9 @@ bail: return status; } +/* Caller must provide a bhs[] with all NULL or non-NULL entries, so it + * will be easier to handle read failure. + */ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, struct buffer_head *bhs[], int flags, int (*validate)(struct super_block *sb, @@ -188,6 +216,7 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, int i, ignore_cache = 0; struct buffer_head *bh; struct super_block *sb = ocfs2_metadata_cache_get_super(ci); + int new_bh = 0; trace_ocfs2_read_blocks_begin(ci, (unsigned long long)block, nr, flags); @@ -213,6 +242,11 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, goto bail; } + /* Don't put buffer head and re-assign it to NULL if it is allocated + * outside since the caller can't be aware of this alternation! + */ + new_bh = (bhs[0] == NULL); + ocfs2_metadata_cache_io_lock(ci); for (i = 0 ; i < nr ; i++) { if (bhs[i] == NULL) { @@ -221,7 +255,8 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, ocfs2_metadata_cache_io_unlock(ci); status = -ENOMEM; mlog_errno(status); - goto bail; + /* Don't forget to put previous bh! */ + break; } } bh = bhs[i]; @@ -316,16 +351,27 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, } } - status = 0; - +read_failure: for (i = (nr - 1); i >= 0; i--) { bh = bhs[i]; if (!(flags & OCFS2_BH_READAHEAD)) { - if (status) { - /* Clear the rest of the buffers on error */ - put_bh(bh); - bhs[i] = NULL; + if (unlikely(status)) { + /* Clear the buffers on error including those + * ever succeeded in reading + */ + if (new_bh && bh) { + /* If middle bh fails, let previous bh + * finish its read and then put it to + * aovoid bh leak + */ + if (!buffer_jbd(bh)) + wait_on_buffer(bh); + put_bh(bh); + bhs[i] = NULL; + } else if (bh && buffer_uptodate(bh)) { + clear_buffer_uptodate(bh); + } continue; } /* We know this can't have changed as we hold the @@ -343,9 +389,7 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, * uptodate. */ status = -EIO; clear_buffer_needs_validate(bh); - put_bh(bh); - bhs[i] = NULL; - continue; + goto read_failure; } if (buffer_needs_validate(bh)) { @@ -355,11 +399,8 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, BUG_ON(buffer_jbd(bh)); clear_buffer_needs_validate(bh); status = validate(sb, bh); - if (status) { - put_bh(bh); - bhs[i] = NULL; - continue; - } + if (status) + goto read_failure; } } diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index b048d4fa3959..c121abbdfc7d 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -1897,8 +1897,7 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode, /* On error, skip the f_pos to the next block. */ ctx->pos = (ctx->pos | (sb->s_blocksize - 1)) + 1; - brelse(bh); - continue; + break; } if (le64_to_cpu(de->inode)) { unsigned char d_type = DT_UNKNOWN; diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 933aac5da193..7c835824247e 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -2123,10 +2123,10 @@ static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, /* LVB only has room for 64 bits of time here so we pack it for * now. */ -static u64 ocfs2_pack_timespec(struct timespec *spec) +static u64 ocfs2_pack_timespec(struct timespec64 *spec) { u64 res; - u64 sec = spec->tv_sec; + u64 sec = clamp_t(time64_t, spec->tv_sec, 0, 0x3ffffffffull); u32 nsec = spec->tv_nsec; res = (sec << OCFS2_SEC_SHIFT) | (nsec & OCFS2_NSEC_MASK); @@ -2142,7 +2142,6 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode) struct ocfs2_inode_info *oi = OCFS2_I(inode); struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; struct ocfs2_meta_lvb *lvb; - struct timespec ts; lvb = ocfs2_dlm_lvb(&lockres->l_lksb); @@ -2163,15 +2162,12 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode) lvb->lvb_igid = cpu_to_be32(i_gid_read(inode)); lvb->lvb_imode = cpu_to_be16(inode->i_mode); lvb->lvb_inlink = cpu_to_be16(inode->i_nlink); - ts = timespec64_to_timespec(inode->i_atime); lvb->lvb_iatime_packed = - cpu_to_be64(ocfs2_pack_timespec(&ts)); - ts = timespec64_to_timespec(inode->i_ctime); + cpu_to_be64(ocfs2_pack_timespec(&inode->i_atime)); lvb->lvb_ictime_packed = - cpu_to_be64(ocfs2_pack_timespec(&ts)); - ts = timespec64_to_timespec(inode->i_mtime); + cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime)); lvb->lvb_imtime_packed = - cpu_to_be64(ocfs2_pack_timespec(&ts)); + cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime)); lvb->lvb_iattr = cpu_to_be32(oi->ip_attr); lvb->lvb_idynfeatures = cpu_to_be16(oi->ip_dyn_features); lvb->lvb_igeneration = cpu_to_be32(inode->i_generation); @@ -2180,7 +2176,7 @@ out: mlog_meta_lvb(0, lockres); } -static void ocfs2_unpack_timespec(struct timespec *spec, +static void ocfs2_unpack_timespec(struct timespec64 *spec, u64 packed_time) { spec->tv_sec = packed_time >> OCFS2_SEC_SHIFT; @@ -2189,7 +2185,6 @@ static void ocfs2_unpack_timespec(struct timespec *spec, static void ocfs2_refresh_inode_from_lvb(struct inode *inode) { - struct timespec ts; struct ocfs2_inode_info *oi = OCFS2_I(inode); struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; struct ocfs2_meta_lvb *lvb; @@ -2217,15 +2212,12 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode) i_gid_write(inode, be32_to_cpu(lvb->lvb_igid)); inode->i_mode = be16_to_cpu(lvb->lvb_imode); set_nlink(inode, be16_to_cpu(lvb->lvb_inlink)); - ocfs2_unpack_timespec(&ts, + ocfs2_unpack_timespec(&inode->i_atime, be64_to_cpu(lvb->lvb_iatime_packed)); - inode->i_atime = timespec_to_timespec64(ts); - ocfs2_unpack_timespec(&ts, + ocfs2_unpack_timespec(&inode->i_mtime, be64_to_cpu(lvb->lvb_imtime_packed)); - inode->i_mtime = timespec_to_timespec64(ts); - ocfs2_unpack_timespec(&ts, + ocfs2_unpack_timespec(&inode->i_ctime, be64_to_cpu(lvb->lvb_ictime_packed)); - inode->i_ctime = timespec_to_timespec64(ts); spin_unlock(&oi->ip_lock); } @@ -3603,7 +3595,7 @@ static int ocfs2_downconvert_lock(struct ocfs2_super *osb, * we can recover correctly from node failure. Otherwise, we may get * invalid LVB in LKB, but without DLM_SBF_VALNOTVALID being set. */ - if (!ocfs2_is_o2cb_active() && + if (ocfs2_userspace_stack(osb) && lockres->l_ops->flags & LOCK_TYPE_USES_LVB) lvb = 1; diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 9fa35cb6f6e0..d640c5f8a85d 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2343,7 +2343,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, written = __generic_file_write_iter(iocb, from); /* buffered aio wouldn't have proper lock coverage today */ - BUG_ON(written == -EIOCBQUEUED && !(iocb->ki_flags & IOCB_DIRECT)); + BUG_ON(written == -EIOCBQUEUED && !direct_io); /* * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io @@ -2463,7 +2463,7 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb, trace_generic_file_read_iter_ret(ret); /* buffered aio wouldn't have proper lock coverage today */ - BUG_ON(ret == -EIOCBQUEUED && !(iocb->ki_flags & IOCB_DIRECT)); + BUG_ON(ret == -EIOCBQUEUED && !direct_io); /* see ocfs2_file_write_iter */ if (ret == -EIOCBQUEUED || !ocfs2_iocb_is_rw_locked(iocb)) { @@ -2527,24 +2527,79 @@ out: return offset; } -static int ocfs2_file_clone_range(struct file *file_in, - loff_t pos_in, - struct file *file_out, - loff_t pos_out, - u64 len) +static loff_t ocfs2_remap_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t len, unsigned int remap_flags) { - return ocfs2_reflink_remap_range(file_in, pos_in, file_out, pos_out, - len, false); -} + struct inode *inode_in = file_inode(file_in); + struct inode *inode_out = file_inode(file_out); + struct ocfs2_super *osb = OCFS2_SB(inode_in->i_sb); + struct buffer_head *in_bh = NULL, *out_bh = NULL; + bool same_inode = (inode_in == inode_out); + loff_t remapped = 0; + ssize_t ret; -static int ocfs2_file_dedupe_range(struct file *file_in, - loff_t pos_in, - struct file *file_out, - loff_t pos_out, - u64 len) -{ - return ocfs2_reflink_remap_range(file_in, pos_in, file_out, pos_out, - len, true); + if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) + return -EINVAL; + if (!ocfs2_refcount_tree(osb)) + return -EOPNOTSUPP; + if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) + return -EROFS; + + /* Lock both files against IO */ + ret = ocfs2_reflink_inodes_lock(inode_in, &in_bh, inode_out, &out_bh); + if (ret) + return ret; + + /* Check file eligibility and prepare for block sharing. */ + ret = -EINVAL; + if ((OCFS2_I(inode_in)->ip_flags & OCFS2_INODE_SYSTEM_FILE) || + (OCFS2_I(inode_out)->ip_flags & OCFS2_INODE_SYSTEM_FILE)) + goto out_unlock; + + ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out, + &len, remap_flags); + if (ret < 0 || len == 0) + goto out_unlock; + + /* Lock out changes to the allocation maps and remap. */ + down_write(&OCFS2_I(inode_in)->ip_alloc_sem); + if (!same_inode) + down_write_nested(&OCFS2_I(inode_out)->ip_alloc_sem, + SINGLE_DEPTH_NESTING); + + /* Zap any page cache for the destination file's range. */ + truncate_inode_pages_range(&inode_out->i_data, + round_down(pos_out, PAGE_SIZE), + round_up(pos_out + len, PAGE_SIZE) - 1); + + remapped = ocfs2_reflink_remap_blocks(inode_in, in_bh, pos_in, + inode_out, out_bh, pos_out, len); + up_write(&OCFS2_I(inode_in)->ip_alloc_sem); + if (!same_inode) + up_write(&OCFS2_I(inode_out)->ip_alloc_sem); + if (remapped < 0) { + ret = remapped; + mlog_errno(ret); + goto out_unlock; + } + + /* + * Empty the extent map so that we may get the right extent + * record from the disk. + */ + ocfs2_extent_map_trunc(inode_in, 0); + ocfs2_extent_map_trunc(inode_out, 0); + + ret = ocfs2_reflink_update_dest(inode_out, out_bh, pos_out + len); + if (ret) { + mlog_errno(ret); + goto out_unlock; + } + +out_unlock: + ocfs2_reflink_inodes_unlock(inode_in, in_bh, inode_out, out_bh); + return remapped > 0 ? remapped : ret; } const struct inode_operations ocfs2_file_iops = { @@ -2586,8 +2641,7 @@ const struct file_operations ocfs2_fops = { .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, .fallocate = ocfs2_fallocate, - .clone_file_range = ocfs2_file_clone_range, - .dedupe_file_range = ocfs2_file_dedupe_range, + .remap_file_range = ocfs2_remap_file_range, }; const struct file_operations ocfs2_dops = { @@ -2633,8 +2687,7 @@ const struct file_operations ocfs2_fops_no_plocks = { .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, .fallocate = ocfs2_fallocate, - .clone_file_range = ocfs2_file_clone_range, - .dedupe_file_range = ocfs2_file_dedupe_range, + .remap_file_range = ocfs2_remap_file_range, }; const struct file_operations ocfs2_dops_no_plocks = { diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index bd3475694e83..b63c97f4318e 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -1378,15 +1378,23 @@ static int __ocfs2_recovery_thread(void *arg) int rm_quota_used = 0, i; struct ocfs2_quota_recovery *qrec; + /* Whether the quota supported. */ + int quota_enabled = OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb, + OCFS2_FEATURE_RO_COMPAT_USRQUOTA) + || OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb, + OCFS2_FEATURE_RO_COMPAT_GRPQUOTA); + status = ocfs2_wait_on_mount(osb); if (status < 0) { goto bail; } - rm_quota = kcalloc(osb->max_slots, sizeof(int), GFP_NOFS); - if (!rm_quota) { - status = -ENOMEM; - goto bail; + if (quota_enabled) { + rm_quota = kcalloc(osb->max_slots, sizeof(int), GFP_NOFS); + if (!rm_quota) { + status = -ENOMEM; + goto bail; + } } restart: status = ocfs2_super_lock(osb, 1); @@ -1422,9 +1430,14 @@ restart: * then quota usage would be out of sync until some node takes * the slot. So we remember which nodes need quota recovery * and when everything else is done, we recover quotas. */ - for (i = 0; i < rm_quota_used && rm_quota[i] != slot_num; i++); - if (i == rm_quota_used) - rm_quota[rm_quota_used++] = slot_num; + if (quota_enabled) { + for (i = 0; i < rm_quota_used + && rm_quota[i] != slot_num; i++) + ; + + if (i == rm_quota_used) + rm_quota[rm_quota_used++] = slot_num; + } status = ocfs2_recover_node(osb, node_num, slot_num); skip_recovery: @@ -1452,16 +1465,19 @@ skip_recovery: /* Now it is right time to recover quotas... We have to do this under * superblock lock so that no one can start using the slot (and crash) * before we recover it */ - for (i = 0; i < rm_quota_used; i++) { - qrec = ocfs2_begin_quota_recovery(osb, rm_quota[i]); - if (IS_ERR(qrec)) { - status = PTR_ERR(qrec); - mlog_errno(status); - continue; + if (quota_enabled) { + for (i = 0; i < rm_quota_used; i++) { + qrec = ocfs2_begin_quota_recovery(osb, rm_quota[i]); + if (IS_ERR(qrec)) { + status = PTR_ERR(qrec); + mlog_errno(status); + continue; + } + ocfs2_queue_recovery_completion(osb->journal, + rm_quota[i], + NULL, NULL, qrec, + ORPHAN_NEED_TRUNCATE); } - ocfs2_queue_recovery_completion(osb->journal, rm_quota[i], - NULL, NULL, qrec, - ORPHAN_NEED_TRUNCATE); } ocfs2_super_unlock(osb, 1); @@ -1483,7 +1499,8 @@ bail: mutex_unlock(&osb->recovery_lock); - kfree(rm_quota); + if (quota_enabled) + kfree(rm_quota); /* no one is callint kthread_stop() for us so the kthread() api * requires that we call do_exit(). And it isn't exported, but diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c index 7eb3b0a6347e..3f1685d7d43b 100644 --- a/fs/ocfs2/move_extents.c +++ b/fs/ocfs2/move_extents.c @@ -25,6 +25,7 @@ #include "ocfs2_ioctl.h" #include "alloc.h" +#include "localalloc.h" #include "aops.h" #include "dlmglue.h" #include "extent_map.h" @@ -233,6 +234,7 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, struct ocfs2_refcount_tree *ref_tree = NULL; u32 new_phys_cpos, new_len; u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); + int need_free = 0; if ((ext_flags & OCFS2_EXT_REFCOUNTED) && *len) { BUG_ON(!ocfs2_is_refcount_inode(inode)); @@ -308,6 +310,7 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, if (!partial) { context->range->me_flags &= ~OCFS2_MOVE_EXT_FL_COMPLETE; ret = -ENOSPC; + need_free = 1; goto out_commit; } } @@ -332,6 +335,20 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, mlog_errno(ret); out_commit: + if (need_free && context->data_ac) { + struct ocfs2_alloc_context *data_ac = context->data_ac; + + if (context->data_ac->ac_which == OCFS2_AC_USE_LOCAL) + ocfs2_free_local_alloc_bits(osb, handle, data_ac, + new_phys_cpos, new_len); + else + ocfs2_free_clusters(handle, + data_ac->ac_inode, + data_ac->ac_bh, + ocfs2_clusters_to_blocks(osb->sb, new_phys_cpos), + new_len); + } + ocfs2_commit_trans(osb, handle); out_unlock_mutex: diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 1114ef02e780..a35259eebc56 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -4466,9 +4466,9 @@ out: } /* Update destination inode size, if necessary. */ -static int ocfs2_reflink_update_dest(struct inode *dest, - struct buffer_head *d_bh, - loff_t newlen) +int ocfs2_reflink_update_dest(struct inode *dest, + struct buffer_head *d_bh, + loff_t newlen) { handle_t *handle; int ret; @@ -4505,14 +4505,14 @@ out_commit: } /* Remap the range pos_in:len in s_inode to pos_out:len in t_inode. */ -static int ocfs2_reflink_remap_extent(struct inode *s_inode, - struct buffer_head *s_bh, - loff_t pos_in, - struct inode *t_inode, - struct buffer_head *t_bh, - loff_t pos_out, - loff_t len, - struct ocfs2_cached_dealloc_ctxt *dealloc) +static loff_t ocfs2_reflink_remap_extent(struct inode *s_inode, + struct buffer_head *s_bh, + loff_t pos_in, + struct inode *t_inode, + struct buffer_head *t_bh, + loff_t pos_out, + loff_t len, + struct ocfs2_cached_dealloc_ctxt *dealloc) { struct ocfs2_extent_tree s_et; struct ocfs2_extent_tree t_et; @@ -4520,8 +4520,9 @@ static int ocfs2_reflink_remap_extent(struct inode *s_inode, struct buffer_head *ref_root_bh = NULL; struct ocfs2_refcount_tree *ref_tree; struct ocfs2_super *osb; + loff_t remapped_bytes = 0; loff_t pstart, plen; - u32 p_cluster, num_clusters, slast, spos, tpos; + u32 p_cluster, num_clusters, slast, spos, tpos, remapped_clus = 0; unsigned int ext_flags; int ret = 0; @@ -4603,30 +4604,34 @@ static int ocfs2_reflink_remap_extent(struct inode *s_inode, next_loop: spos += num_clusters; tpos += num_clusters; + remapped_clus += num_clusters; } -out: - return ret; + goto out; out_unlock_refcount: ocfs2_unlock_refcount_tree(osb, ref_tree, 1); brelse(ref_root_bh); - return ret; +out: + remapped_bytes = ocfs2_clusters_to_bytes(t_inode->i_sb, remapped_clus); + remapped_bytes = min_t(loff_t, len, remapped_bytes); + + return remapped_bytes > 0 ? remapped_bytes : ret; } /* Set up refcount tree and remap s_inode to t_inode. */ -static int ocfs2_reflink_remap_blocks(struct inode *s_inode, - struct buffer_head *s_bh, - loff_t pos_in, - struct inode *t_inode, - struct buffer_head *t_bh, - loff_t pos_out, - loff_t len) +loff_t ocfs2_reflink_remap_blocks(struct inode *s_inode, + struct buffer_head *s_bh, + loff_t pos_in, + struct inode *t_inode, + struct buffer_head *t_bh, + loff_t pos_out, + loff_t len) { struct ocfs2_cached_dealloc_ctxt dealloc; struct ocfs2_super *osb; struct ocfs2_dinode *dis; struct ocfs2_dinode *dit; - int ret; + loff_t ret; osb = OCFS2_SB(s_inode->i_sb); dis = (struct ocfs2_dinode *)s_bh->b_data; @@ -4698,7 +4703,7 @@ static int ocfs2_reflink_remap_blocks(struct inode *s_inode, /* Actually remap extents now. */ ret = ocfs2_reflink_remap_extent(s_inode, s_bh, pos_in, t_inode, t_bh, pos_out, len, &dealloc); - if (ret) { + if (ret < 0) { mlog_errno(ret); goto out; } @@ -4713,10 +4718,10 @@ out: } /* Lock an inode and grab a bh pointing to the inode. */ -static int ocfs2_reflink_inodes_lock(struct inode *s_inode, - struct buffer_head **bh1, - struct inode *t_inode, - struct buffer_head **bh2) +int ocfs2_reflink_inodes_lock(struct inode *s_inode, + struct buffer_head **bh1, + struct inode *t_inode, + struct buffer_head **bh2) { struct inode *inode1; struct inode *inode2; @@ -4801,10 +4806,10 @@ out_i1: } /* Unlock both inodes and release buffers. */ -static void ocfs2_reflink_inodes_unlock(struct inode *s_inode, - struct buffer_head *s_bh, - struct inode *t_inode, - struct buffer_head *t_bh) +void ocfs2_reflink_inodes_unlock(struct inode *s_inode, + struct buffer_head *s_bh, + struct inode *t_inode, + struct buffer_head *t_bh) { ocfs2_inode_unlock(s_inode, 1); ocfs2_rw_unlock(s_inode, 1); @@ -4816,82 +4821,3 @@ static void ocfs2_reflink_inodes_unlock(struct inode *s_inode, } unlock_two_nondirectories(s_inode, t_inode); } - -/* Link a range of blocks from one file to another. */ -int ocfs2_reflink_remap_range(struct file *file_in, - loff_t pos_in, - struct file *file_out, - loff_t pos_out, - u64 len, - bool is_dedupe) -{ - struct inode *inode_in = file_inode(file_in); - struct inode *inode_out = file_inode(file_out); - struct ocfs2_super *osb = OCFS2_SB(inode_in->i_sb); - struct buffer_head *in_bh = NULL, *out_bh = NULL; - bool same_inode = (inode_in == inode_out); - ssize_t ret; - - if (!ocfs2_refcount_tree(osb)) - return -EOPNOTSUPP; - if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) - return -EROFS; - - /* Lock both files against IO */ - ret = ocfs2_reflink_inodes_lock(inode_in, &in_bh, inode_out, &out_bh); - if (ret) - return ret; - - /* Check file eligibility and prepare for block sharing. */ - ret = -EINVAL; - if ((OCFS2_I(inode_in)->ip_flags & OCFS2_INODE_SYSTEM_FILE) || - (OCFS2_I(inode_out)->ip_flags & OCFS2_INODE_SYSTEM_FILE)) - goto out_unlock; - - ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out, - &len, is_dedupe); - if (ret <= 0) - goto out_unlock; - - /* Lock out changes to the allocation maps and remap. */ - down_write(&OCFS2_I(inode_in)->ip_alloc_sem); - if (!same_inode) - down_write_nested(&OCFS2_I(inode_out)->ip_alloc_sem, - SINGLE_DEPTH_NESTING); - - ret = ocfs2_reflink_remap_blocks(inode_in, in_bh, pos_in, inode_out, - out_bh, pos_out, len); - - /* Zap any page cache for the destination file's range. */ - if (!ret) - truncate_inode_pages_range(&inode_out->i_data, pos_out, - PAGE_ALIGN(pos_out + len) - 1); - - up_write(&OCFS2_I(inode_in)->ip_alloc_sem); - if (!same_inode) - up_write(&OCFS2_I(inode_out)->ip_alloc_sem); - if (ret) { - mlog_errno(ret); - goto out_unlock; - } - - /* - * Empty the extent map so that we may get the right extent - * record from the disk. - */ - ocfs2_extent_map_trunc(inode_in, 0); - ocfs2_extent_map_trunc(inode_out, 0); - - ret = ocfs2_reflink_update_dest(inode_out, out_bh, pos_out + len); - if (ret) { - mlog_errno(ret); - goto out_unlock; - } - - ocfs2_reflink_inodes_unlock(inode_in, in_bh, inode_out, out_bh); - return 0; - -out_unlock: - ocfs2_reflink_inodes_unlock(inode_in, in_bh, inode_out, out_bh); - return ret; -} diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h index 4af55bf4b35b..e9e862be4a1e 100644 --- a/fs/ocfs2/refcounttree.h +++ b/fs/ocfs2/refcounttree.h @@ -115,11 +115,23 @@ int ocfs2_reflink_ioctl(struct inode *inode, const char __user *oldname, const char __user *newname, bool preserve); -int ocfs2_reflink_remap_range(struct file *file_in, - loff_t pos_in, - struct file *file_out, - loff_t pos_out, - u64 len, - bool is_dedupe); +loff_t ocfs2_reflink_remap_blocks(struct inode *s_inode, + struct buffer_head *s_bh, + loff_t pos_in, + struct inode *t_inode, + struct buffer_head *t_bh, + loff_t pos_out, + loff_t len); +int ocfs2_reflink_inodes_lock(struct inode *s_inode, + struct buffer_head **bh1, + struct inode *t_inode, + struct buffer_head **bh2); +void ocfs2_reflink_inodes_unlock(struct inode *s_inode, + struct buffer_head *s_bh, + struct inode *t_inode, + struct buffer_head *t_bh); +int ocfs2_reflink_update_dest(struct inode *dest, + struct buffer_head *d_bh, + loff_t newlen); #endif /* OCFS2_REFCOUNTTREE_H */ diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index d6c350ba25b9..c4b029c43464 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -48,12 +48,6 @@ static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl"; */ static struct ocfs2_stack_plugin *active_stack; -inline int ocfs2_is_o2cb_active(void) -{ - return !strcmp(active_stack->sp_name, OCFS2_STACK_PLUGIN_O2CB); -} -EXPORT_SYMBOL_GPL(ocfs2_is_o2cb_active); - static struct ocfs2_stack_plugin *ocfs2_stack_lookup(const char *name) { struct ocfs2_stack_plugin *p; diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h index e3036e1790e8..f2dce10fae54 100644 --- a/fs/ocfs2/stackglue.h +++ b/fs/ocfs2/stackglue.h @@ -298,9 +298,6 @@ void ocfs2_stack_glue_set_max_proto_version(struct ocfs2_protocol_version *max_p int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin); void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin); -/* In ocfs2_downconvert_lock(), we need to know which stack we are using */ -int ocfs2_is_o2cb_active(void); - extern struct kset *ocfs2_kset; #endif /* STACKGLUE_H */ diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index d6a3346e2672..9e62dcf06fc4 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -125,6 +125,7 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len) struct file *new_file; loff_t old_pos = 0; loff_t new_pos = 0; + loff_t cloned; int error = 0; if (len == 0) @@ -141,11 +142,10 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len) } /* Try to use clone_file_range to clone up within the same fs */ - error = do_clone_file_range(old_file, 0, new_file, 0, len); - if (!error) + cloned = do_clone_file_range(old_file, 0, new_file, 0, len, 0); + if (cloned == len) goto out; /* Couldn't clone, so now we try to copy the data */ - error = 0; /* FIXME: copy up sparse files efficiently */ while (len) { diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index 986313da0c88..84dd957efa24 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -434,14 +434,14 @@ enum ovl_copyop { OVL_DEDUPE, }; -static ssize_t ovl_copyfile(struct file *file_in, loff_t pos_in, +static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, - u64 len, unsigned int flags, enum ovl_copyop op) + loff_t len, unsigned int flags, enum ovl_copyop op) { struct inode *inode_out = file_inode(file_out); struct fd real_in, real_out; const struct cred *old_cred; - ssize_t ret; + loff_t ret; ret = ovl_real_fdget(file_out, &real_out); if (ret) @@ -462,12 +462,13 @@ static ssize_t ovl_copyfile(struct file *file_in, loff_t pos_in, case OVL_CLONE: ret = vfs_clone_file_range(real_in.file, pos_in, - real_out.file, pos_out, len); + real_out.file, pos_out, len, flags); break; case OVL_DEDUPE: ret = vfs_dedupe_file_range_one(real_in.file, pos_in, - real_out.file, pos_out, len); + real_out.file, pos_out, len, + flags); break; } revert_creds(old_cred); @@ -489,26 +490,31 @@ static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in, OVL_COPY); } -static int ovl_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, u64 len) +static loff_t ovl_remap_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t len, unsigned int remap_flags) { - return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, 0, - OVL_CLONE); -} + enum ovl_copyop op; + + if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) + return -EINVAL; + + if (remap_flags & REMAP_FILE_DEDUP) + op = OVL_DEDUPE; + else + op = OVL_CLONE; -static int ovl_dedupe_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, u64 len) -{ /* * Don't copy up because of a dedupe request, this wouldn't make sense * most of the time (data would be duplicated instead of deduplicated). */ - if (!ovl_inode_upper(file_inode(file_in)) || - !ovl_inode_upper(file_inode(file_out))) + if (op == OVL_DEDUPE && + (!ovl_inode_upper(file_inode(file_in)) || + !ovl_inode_upper(file_inode(file_out)))) return -EPERM; - return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, 0, - OVL_DEDUPE); + return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, + remap_flags, op); } const struct file_operations ovl_file_operations = { @@ -525,6 +531,5 @@ const struct file_operations ovl_file_operations = { .compat_ioctl = ovl_compat_ioctl, .copy_file_range = ovl_copy_file_range, - .clone_file_range = ovl_clone_file_range, - .dedupe_file_range = ovl_dedupe_file_range, + .remap_file_range = ovl_remap_file_range, }; diff --git a/fs/read_write.c b/fs/read_write.c index 5a2ee488c5d2..bfcb4ced5664 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1587,11 +1587,15 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, * Try cloning first, this is supported by more file systems, and * more efficient if both clone and copy are supported (e.g. NFS). */ - if (file_in->f_op->clone_file_range) { - ret = file_in->f_op->clone_file_range(file_in, pos_in, - file_out, pos_out, len); - if (ret == 0) { - ret = len; + if (file_in->f_op->remap_file_range) { + loff_t cloned; + + cloned = file_in->f_op->remap_file_range(file_in, pos_in, + file_out, pos_out, + min_t(loff_t, MAX_RW_COUNT, len), + REMAP_FILE_CAN_SHORTEN); + if (cloned > 0) { + ret = cloned; goto done; } } @@ -1685,11 +1689,12 @@ out2: return ret; } -static int clone_verify_area(struct file *file, loff_t pos, u64 len, bool write) +static int remap_verify_area(struct file *file, loff_t pos, loff_t len, + bool write) { struct inode *inode = file_inode(file); - if (unlikely(pos < 0)) + if (unlikely(pos < 0 || len < 0)) return -EINVAL; if (unlikely((loff_t) (pos + len) < 0)) @@ -1707,22 +1712,150 @@ static int clone_verify_area(struct file *file, loff_t pos, u64 len, bool write) return security_file_permission(file, write ? MAY_WRITE : MAY_READ); } +/* + * Ensure that we don't remap a partial EOF block in the middle of something + * else. Assume that the offsets have already been checked for block + * alignment. + * + * For deduplication we always scale down to the previous block because we + * can't meaningfully compare post-EOF contents. + * + * For clone we only link a partial EOF block above the destination file's EOF. + * + * Shorten the request if possible. + */ +static int generic_remap_check_len(struct inode *inode_in, + struct inode *inode_out, + loff_t pos_out, + loff_t *len, + unsigned int remap_flags) +{ + u64 blkmask = i_blocksize(inode_in) - 1; + loff_t new_len = *len; + + if ((*len & blkmask) == 0) + return 0; + + if ((remap_flags & REMAP_FILE_DEDUP) || + pos_out + *len < i_size_read(inode_out)) + new_len &= ~blkmask; + + if (new_len == *len) + return 0; + + if (remap_flags & REMAP_FILE_CAN_SHORTEN) { + *len = new_len; + return 0; + } + + return (remap_flags & REMAP_FILE_DEDUP) ? -EBADE : -EINVAL; +} + +/* + * Read a page's worth of file data into the page cache. Return the page + * locked. + */ +static struct page *vfs_dedupe_get_page(struct inode *inode, loff_t offset) +{ + struct page *page; + + page = read_mapping_page(inode->i_mapping, offset >> PAGE_SHIFT, NULL); + if (IS_ERR(page)) + return page; + if (!PageUptodate(page)) { + put_page(page); + return ERR_PTR(-EIO); + } + lock_page(page); + return page; +} + +/* + * Compare extents of two files to see if they are the same. + * Caller must have locked both inodes to prevent write races. + */ +static int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff, + struct inode *dest, loff_t destoff, + loff_t len, bool *is_same) +{ + loff_t src_poff; + loff_t dest_poff; + void *src_addr; + void *dest_addr; + struct page *src_page; + struct page *dest_page; + loff_t cmp_len; + bool same; + int error; + + error = -EINVAL; + same = true; + while (len) { + src_poff = srcoff & (PAGE_SIZE - 1); + dest_poff = destoff & (PAGE_SIZE - 1); + cmp_len = min(PAGE_SIZE - src_poff, + PAGE_SIZE - dest_poff); + cmp_len = min(cmp_len, len); + if (cmp_len <= 0) + goto out_error; + + src_page = vfs_dedupe_get_page(src, srcoff); + if (IS_ERR(src_page)) { + error = PTR_ERR(src_page); + goto out_error; + } + dest_page = vfs_dedupe_get_page(dest, destoff); + if (IS_ERR(dest_page)) { + error = PTR_ERR(dest_page); + unlock_page(src_page); + put_page(src_page); + goto out_error; + } + src_addr = kmap_atomic(src_page); + dest_addr = kmap_atomic(dest_page); + + flush_dcache_page(src_page); + flush_dcache_page(dest_page); + + if (memcmp(src_addr + src_poff, dest_addr + dest_poff, cmp_len)) + same = false; + + kunmap_atomic(dest_addr); + kunmap_atomic(src_addr); + unlock_page(dest_page); + unlock_page(src_page); + put_page(dest_page); + put_page(src_page); + + if (!same) + break; + + srcoff += cmp_len; + destoff += cmp_len; + len -= cmp_len; + } + + *is_same = same; + return 0; + +out_error: + return error; +} /* * Check that the two inodes are eligible for cloning, the ranges make * sense, and then flush all dirty data. Caller must ensure that the * inodes have been locked against any other modifications. * - * Returns: 0 for "nothing to clone", 1 for "something to clone", or - * the usual negative error code. + * If there's an error, then the usual negative error code is returned. + * Otherwise returns 0 with *len set to the request length. */ -int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in, - struct inode *inode_out, loff_t pos_out, - u64 *len, bool is_dedupe) +int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t *len, unsigned int remap_flags) { - loff_t bs = inode_out->i_sb->s_blocksize; - loff_t blen; - loff_t isize; + struct inode *inode_in = file_inode(file_in); + struct inode *inode_out = file_inode(file_out); bool same_inode = (inode_in == inode_out); int ret; @@ -1739,50 +1872,24 @@ int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in, if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) return -EINVAL; - /* Are we going all the way to the end? */ - isize = i_size_read(inode_in); - if (isize == 0) - return 0; - /* Zero length dedupe exits immediately; reflink goes to EOF. */ if (*len == 0) { - if (is_dedupe || pos_in == isize) + loff_t isize = i_size_read(inode_in); + + if ((remap_flags & REMAP_FILE_DEDUP) || pos_in == isize) return 0; if (pos_in > isize) return -EINVAL; *len = isize - pos_in; + if (*len == 0) + return 0; } - /* Ensure offsets don't wrap and the input is inside i_size */ - if (pos_in + *len < pos_in || pos_out + *len < pos_out || - pos_in + *len > isize) - return -EINVAL; - - /* Don't allow dedupe past EOF in the dest file */ - if (is_dedupe) { - loff_t disize; - - disize = i_size_read(inode_out); - if (pos_out >= disize || pos_out + *len > disize) - return -EINVAL; - } - - /* If we're linking to EOF, continue to the block boundary. */ - if (pos_in + *len == isize) - blen = ALIGN(isize, bs) - pos_in; - else - blen = *len; - - /* Only reflink if we're aligned to block boundaries */ - if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) || - !IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs)) - return -EINVAL; - - /* Don't allow overlapped reflink within the same file */ - if (same_inode) { - if (pos_out + blen > pos_in && pos_out < pos_in + blen) - return -EINVAL; - } + /* Check that we don't violate system file offset limits. */ + ret = generic_remap_checks(file_in, pos_in, file_out, pos_out, len, + remap_flags); + if (ret) + return ret; /* Wait for the completion of any pending IOs on both files */ inode_dio_wait(inode_in); @@ -1802,7 +1909,7 @@ int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in, /* * Check that the extents are the same. */ - if (is_dedupe) { + if (remap_flags & REMAP_FILE_DEDUP) { bool is_same = false; ret = vfs_dedupe_file_range_compare(inode_in, pos_in, @@ -1813,16 +1920,43 @@ int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in, return -EBADE; } - return 1; + ret = generic_remap_check_len(inode_in, inode_out, pos_out, len, + remap_flags); + if (ret) + return ret; + + /* If can't alter the file contents, we're done. */ + if (!(remap_flags & REMAP_FILE_DEDUP)) { + /* Update the timestamps, since we can alter file contents. */ + if (!(file_out->f_mode & FMODE_NOCMTIME)) { + ret = file_update_time(file_out); + if (ret) + return ret; + } + + /* + * Clear the security bits if the process is not being run by + * root. This keeps people from modifying setuid and setgid + * binaries. + */ + ret = file_remove_privs(file_out); + if (ret) + return ret; + } + + return 0; } -EXPORT_SYMBOL(vfs_clone_file_prep_inodes); +EXPORT_SYMBOL(generic_remap_file_range_prep); -int do_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, u64 len) +loff_t do_clone_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t len, unsigned int remap_flags) { struct inode *inode_in = file_inode(file_in); struct inode *inode_out = file_inode(file_out); - int ret; + loff_t ret; + + WARN_ON_ONCE(remap_flags); if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) return -EISDIR; @@ -1842,140 +1976,43 @@ int do_clone_file_range(struct file *file_in, loff_t pos_in, (file_out->f_flags & O_APPEND)) return -EBADF; - if (!file_in->f_op->clone_file_range) + if (!file_in->f_op->remap_file_range) return -EOPNOTSUPP; - ret = clone_verify_area(file_in, pos_in, len, false); + ret = remap_verify_area(file_in, pos_in, len, false); if (ret) return ret; - ret = clone_verify_area(file_out, pos_out, len, true); + ret = remap_verify_area(file_out, pos_out, len, true); if (ret) return ret; - if (pos_in + len > i_size_read(inode_in)) - return -EINVAL; - - ret = file_in->f_op->clone_file_range(file_in, pos_in, - file_out, pos_out, len); - if (!ret) { - fsnotify_access(file_in); - fsnotify_modify(file_out); - } + ret = file_in->f_op->remap_file_range(file_in, pos_in, + file_out, pos_out, len, remap_flags); + if (ret < 0) + return ret; + fsnotify_access(file_in); + fsnotify_modify(file_out); return ret; } EXPORT_SYMBOL(do_clone_file_range); -int vfs_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, u64 len) +loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t len, unsigned int remap_flags) { - int ret; + loff_t ret; file_start_write(file_out); - ret = do_clone_file_range(file_in, pos_in, file_out, pos_out, len); + ret = do_clone_file_range(file_in, pos_in, file_out, pos_out, len, + remap_flags); file_end_write(file_out); return ret; } EXPORT_SYMBOL(vfs_clone_file_range); -/* - * Read a page's worth of file data into the page cache. Return the page - * locked. - */ -static struct page *vfs_dedupe_get_page(struct inode *inode, loff_t offset) -{ - struct address_space *mapping; - struct page *page; - pgoff_t n; - - n = offset >> PAGE_SHIFT; - mapping = inode->i_mapping; - page = read_mapping_page(mapping, n, NULL); - if (IS_ERR(page)) - return page; - if (!PageUptodate(page)) { - put_page(page); - return ERR_PTR(-EIO); - } - lock_page(page); - return page; -} - -/* - * Compare extents of two files to see if they are the same. - * Caller must have locked both inodes to prevent write races. - */ -int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff, - struct inode *dest, loff_t destoff, - loff_t len, bool *is_same) -{ - loff_t src_poff; - loff_t dest_poff; - void *src_addr; - void *dest_addr; - struct page *src_page; - struct page *dest_page; - loff_t cmp_len; - bool same; - int error; - - error = -EINVAL; - same = true; - while (len) { - src_poff = srcoff & (PAGE_SIZE - 1); - dest_poff = destoff & (PAGE_SIZE - 1); - cmp_len = min(PAGE_SIZE - src_poff, - PAGE_SIZE - dest_poff); - cmp_len = min(cmp_len, len); - if (cmp_len <= 0) - goto out_error; - - src_page = vfs_dedupe_get_page(src, srcoff); - if (IS_ERR(src_page)) { - error = PTR_ERR(src_page); - goto out_error; - } - dest_page = vfs_dedupe_get_page(dest, destoff); - if (IS_ERR(dest_page)) { - error = PTR_ERR(dest_page); - unlock_page(src_page); - put_page(src_page); - goto out_error; - } - src_addr = kmap_atomic(src_page); - dest_addr = kmap_atomic(dest_page); - - flush_dcache_page(src_page); - flush_dcache_page(dest_page); - - if (memcmp(src_addr + src_poff, dest_addr + dest_poff, cmp_len)) - same = false; - - kunmap_atomic(dest_addr); - kunmap_atomic(src_addr); - unlock_page(dest_page); - unlock_page(src_page); - put_page(dest_page); - put_page(src_page); - - if (!same) - break; - - srcoff += cmp_len; - destoff += cmp_len; - len -= cmp_len; - } - - *is_same = same; - return 0; - -out_error: - return error; -} -EXPORT_SYMBOL(vfs_dedupe_file_range_compare); - /* Check whether we are allowed to dedupe the destination file */ static bool allow_file_dedupe(struct file *file) { @@ -1990,16 +2027,20 @@ static bool allow_file_dedupe(struct file *file) return false; } -int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, - struct file *dst_file, loff_t dst_pos, u64 len) +loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, + struct file *dst_file, loff_t dst_pos, + loff_t len, unsigned int remap_flags) { - s64 ret; + loff_t ret; + + WARN_ON_ONCE(remap_flags & ~(REMAP_FILE_DEDUP | + REMAP_FILE_CAN_SHORTEN)); ret = mnt_want_write_file(dst_file); if (ret) return ret; - ret = clone_verify_area(dst_file, dst_pos, len, true); + ret = remap_verify_area(dst_file, dst_pos, len, true); if (ret < 0) goto out_drop_write; @@ -2016,11 +2057,16 @@ int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, goto out_drop_write; ret = -EINVAL; - if (!dst_file->f_op->dedupe_file_range) + if (!dst_file->f_op->remap_file_range) goto out_drop_write; - ret = dst_file->f_op->dedupe_file_range(src_file, src_pos, - dst_file, dst_pos, len); + if (len == 0) { + ret = 0; + goto out_drop_write; + } + + ret = dst_file->f_op->remap_file_range(src_file, src_pos, dst_file, + dst_pos, len, remap_flags | REMAP_FILE_DEDUP); out_drop_write: mnt_drop_write_file(dst_file); @@ -2037,7 +2083,7 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same) int i; int ret; u16 count = same->dest_count; - int deduped; + loff_t deduped; if (!(file->f_mode & FMODE_READ)) return -EINVAL; @@ -2056,7 +2102,7 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same) if (!S_ISREG(src->i_mode)) goto out; - ret = clone_verify_area(file, off, len, false); + ret = remap_verify_area(file, off, len, false); if (ret < 0) goto out; ret = 0; @@ -2088,7 +2134,8 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same) } deduped = vfs_dedupe_file_range_one(file, off, dst_file, - info->dest_offset, len); + info->dest_offset, len, + REMAP_FILE_CAN_SHORTEN); if (deduped == -EBADE) info->status = FILE_DEDUPE_RANGE_DIFFERS; else if (deduped < 0) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 61a5ad2600e8..53c9ab8fb777 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -919,28 +919,67 @@ out_unlock: return error; } -STATIC int -xfs_file_clone_range( - struct file *file_in, - loff_t pos_in, - struct file *file_out, - loff_t pos_out, - u64 len) -{ - return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out, - len, false); -} -STATIC int -xfs_file_dedupe_range( - struct file *file_in, - loff_t pos_in, - struct file *file_out, - loff_t pos_out, - u64 len) +loff_t +xfs_file_remap_range( + struct file *file_in, + loff_t pos_in, + struct file *file_out, + loff_t pos_out, + loff_t len, + unsigned int remap_flags) { - return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out, - len, true); + struct inode *inode_in = file_inode(file_in); + struct xfs_inode *src = XFS_I(inode_in); + struct inode *inode_out = file_inode(file_out); + struct xfs_inode *dest = XFS_I(inode_out); + struct xfs_mount *mp = src->i_mount; + loff_t remapped = 0; + xfs_extlen_t cowextsize; + int ret; + + if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) + return -EINVAL; + + if (!xfs_sb_version_hasreflink(&mp->m_sb)) + return -EOPNOTSUPP; + + if (XFS_FORCED_SHUTDOWN(mp)) + return -EIO; + + /* Prepare and then clone file data. */ + ret = xfs_reflink_remap_prep(file_in, pos_in, file_out, pos_out, + &len, remap_flags); + if (ret < 0 || len == 0) + return ret; + + trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); + + ret = xfs_reflink_remap_blocks(src, pos_in, dest, pos_out, len, + &remapped); + if (ret) + goto out_unlock; + + /* + * Carry the cowextsize hint from src to dest if we're sharing the + * entire source file to the entire destination file, the source file + * has a cowextsize hint, and the destination file does not. + */ + cowextsize = 0; + if (pos_in == 0 && len == i_size_read(inode_in) && + (src->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) && + pos_out == 0 && len >= i_size_read(inode_out) && + !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE)) + cowextsize = src->i_d.di_cowextsize; + + ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize, + remap_flags); + +out_unlock: + xfs_reflink_remap_unlock(file_in, file_out); + if (ret) + trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_); + return remapped > 0 ? remapped : ret; } STATIC int @@ -1175,8 +1214,7 @@ const struct file_operations xfs_file_operations = { .fsync = xfs_file_fsync, .get_unmapped_area = thp_get_unmapped_area, .fallocate = xfs_file_fallocate, - .clone_file_range = xfs_file_clone_range, - .dedupe_file_range = xfs_file_dedupe_range, + .remap_file_range = xfs_file_remap_range, }; const struct file_operations xfs_dir_file_operations = { diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 8eaeec9d58ed..ecdb086bc23e 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -913,18 +913,18 @@ out_error: /* * Update destination inode size & cowextsize hint, if necessary. */ -STATIC int +int xfs_reflink_update_dest( struct xfs_inode *dest, xfs_off_t newlen, xfs_extlen_t cowextsize, - bool is_dedupe) + unsigned int remap_flags) { struct xfs_mount *mp = dest->i_mount; struct xfs_trans *tp; int error; - if (is_dedupe && newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0) + if (newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0) return 0; error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp); @@ -945,10 +945,6 @@ xfs_reflink_update_dest( dest->i_d.di_flags2 |= XFS_DIFLAG2_COWEXTSIZE; } - if (!is_dedupe) { - xfs_trans_ichgtime(tp, dest, - XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); - } xfs_trans_log_inode(tp, dest, XFS_ILOG_CORE); error = xfs_trans_commit(tp); @@ -1112,19 +1108,28 @@ out: /* * Iteratively remap one file's extents (and holes) to another's. */ -STATIC int +int xfs_reflink_remap_blocks( struct xfs_inode *src, - xfs_fileoff_t srcoff, + loff_t pos_in, struct xfs_inode *dest, - xfs_fileoff_t destoff, - xfs_filblks_t len, - xfs_off_t new_isize) + loff_t pos_out, + loff_t remap_len, + loff_t *remapped) { struct xfs_bmbt_irec imap; + xfs_fileoff_t srcoff; + xfs_fileoff_t destoff; + xfs_filblks_t len; + xfs_filblks_t range_len; + xfs_filblks_t remapped_len = 0; + xfs_off_t new_isize = pos_out + remap_len; int nimaps; int error = 0; - xfs_filblks_t range_len; + + destoff = XFS_B_TO_FSBT(src->i_mount, pos_out); + srcoff = XFS_B_TO_FSBT(src->i_mount, pos_in); + len = XFS_B_TO_FSB(src->i_mount, remap_len); /* drange = (destoff, destoff + len); srange = (srcoff, srcoff + len) */ while (len) { @@ -1139,7 +1144,7 @@ xfs_reflink_remap_blocks( error = xfs_bmapi_read(src, srcoff, len, &imap, &nimaps, 0); xfs_iunlock(src, lock_mode); if (error) - goto err; + break; ASSERT(nimaps == 1); trace_xfs_reflink_remap_imap(src, srcoff, len, XFS_IO_OVERWRITE, @@ -1153,23 +1158,24 @@ xfs_reflink_remap_blocks( error = xfs_reflink_remap_extent(dest, &imap, destoff, new_isize); if (error) - goto err; + break; if (fatal_signal_pending(current)) { error = -EINTR; - goto err; + break; } /* Advance drange/srange */ srcoff += range_len; destoff += range_len; len -= range_len; + remapped_len += range_len; } - return 0; - -err: - trace_xfs_reflink_remap_blocks_error(dest, error, _RET_IP_); + if (error) + trace_xfs_reflink_remap_blocks_error(dest, error, _RET_IP_); + *remapped = min_t(loff_t, remap_len, + XFS_FSB_TO_B(src->i_mount, remapped_len)); return error; } @@ -1218,7 +1224,7 @@ retry: } /* Unlock both inodes after they've been prepped for a range clone. */ -STATIC void +void xfs_reflink_remap_unlock( struct file *file_in, struct file *file_out) @@ -1286,21 +1292,20 @@ xfs_reflink_zero_posteof( * stale data in the destination file. Hence we reject these clone attempts with * -EINVAL in this case. */ -STATIC int +int xfs_reflink_remap_prep( struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, - u64 *len, - bool is_dedupe) + loff_t *len, + unsigned int remap_flags) { struct inode *inode_in = file_inode(file_in); struct xfs_inode *src = XFS_I(inode_in); struct inode *inode_out = file_inode(file_out); struct xfs_inode *dest = XFS_I(inode_out); bool same_inode = (inode_in == inode_out); - u64 blkmask = i_blocksize(inode_in) - 1; ssize_t ret; /* Lock both files against IO */ @@ -1323,29 +1328,11 @@ xfs_reflink_remap_prep( if (IS_DAX(inode_in) || IS_DAX(inode_out)) goto out_unlock; - ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out, - len, is_dedupe); - if (ret <= 0) + ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out, + len, remap_flags); + if (ret < 0 || *len == 0) goto out_unlock; - /* - * If the dedupe data matches, chop off the partial EOF block - * from the source file so we don't try to dedupe the partial - * EOF block. - */ - if (is_dedupe) { - *len &= ~blkmask; - } else if (*len & blkmask) { - /* - * The user is attempting to share a partial EOF block, - * if it's inside the destination EOF then reject it. - */ - if (pos_out + *len < i_size_read(inode_out)) { - ret = -EINVAL; - goto out_unlock; - } - } - /* Attach dquots to dest inode before changing block map */ ret = xfs_qm_dqattach(dest); if (ret) @@ -1365,31 +1352,9 @@ xfs_reflink_remap_prep( goto out_unlock; /* Zap any page cache for the destination file's range. */ - truncate_inode_pages_range(&inode_out->i_data, pos_out, - PAGE_ALIGN(pos_out + *len) - 1); - - /* If we're altering the file contents... */ - if (!is_dedupe) { - /* - * ...update the timestamps (which will grab the ilock again - * from xfs_fs_dirty_inode, so we have to call it before we - * take the ilock). - */ - if (!(file_out->f_mode & FMODE_NOCMTIME)) { - ret = file_update_time(file_out); - if (ret) - goto out_unlock; - } - - /* - * ...clear the security bits if the process is not being run - * by root. This keeps people from modifying setuid and setgid - * binaries. - */ - ret = file_remove_privs(file_out); - if (ret) - goto out_unlock; - } + truncate_inode_pages_range(&inode_out->i_data, + round_down(pos_out, PAGE_SIZE), + round_up(pos_out + *len, PAGE_SIZE) - 1); return 1; out_unlock: @@ -1398,72 +1363,6 @@ out_unlock: } /* - * Link a range of blocks from one file to another. - */ -int -xfs_reflink_remap_range( - struct file *file_in, - loff_t pos_in, - struct file *file_out, - loff_t pos_out, - u64 len, - bool is_dedupe) -{ - struct inode *inode_in = file_inode(file_in); - struct xfs_inode *src = XFS_I(inode_in); - struct inode *inode_out = file_inode(file_out); - struct xfs_inode *dest = XFS_I(inode_out); - struct xfs_mount *mp = src->i_mount; - xfs_fileoff_t sfsbno, dfsbno; - xfs_filblks_t fsblen; - xfs_extlen_t cowextsize; - ssize_t ret; - - if (!xfs_sb_version_hasreflink(&mp->m_sb)) - return -EOPNOTSUPP; - - if (XFS_FORCED_SHUTDOWN(mp)) - return -EIO; - - /* Prepare and then clone file data. */ - ret = xfs_reflink_remap_prep(file_in, pos_in, file_out, pos_out, - &len, is_dedupe); - if (ret <= 0) - return ret; - - trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); - - dfsbno = XFS_B_TO_FSBT(mp, pos_out); - sfsbno = XFS_B_TO_FSBT(mp, pos_in); - fsblen = XFS_B_TO_FSB(mp, len); - ret = xfs_reflink_remap_blocks(src, sfsbno, dest, dfsbno, fsblen, - pos_out + len); - if (ret) - goto out_unlock; - - /* - * Carry the cowextsize hint from src to dest if we're sharing the - * entire source file to the entire destination file, the source file - * has a cowextsize hint, and the destination file does not. - */ - cowextsize = 0; - if (pos_in == 0 && len == i_size_read(inode_in) && - (src->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) && - pos_out == 0 && len >= i_size_read(inode_out) && - !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE)) - cowextsize = src->i_d.di_cowextsize; - - ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize, - is_dedupe); - -out_unlock: - xfs_reflink_remap_unlock(file_in, file_out); - if (ret) - trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_); - return ret; -} - -/* * The user wants to preemptively CoW all shared blocks in this file, * which enables us to turn off the reflink flag. Iterate all * extents which are not prealloc/delalloc to see which ranges are diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h index 7f47202b5639..6d73daef1f13 100644 --- a/fs/xfs/xfs_reflink.h +++ b/fs/xfs/xfs_reflink.h @@ -27,13 +27,24 @@ extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset, extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset, xfs_off_t count); extern int xfs_reflink_recover_cow(struct xfs_mount *mp); -extern int xfs_reflink_remap_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, u64 len, bool is_dedupe); +extern loff_t xfs_reflink_remap_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, loff_t len, + unsigned int remap_flags); extern int xfs_reflink_inode_has_shared_extents(struct xfs_trans *tp, struct xfs_inode *ip, bool *has_shared); extern int xfs_reflink_clear_inode_flag(struct xfs_inode *ip, struct xfs_trans **tpp); extern int xfs_reflink_unshare(struct xfs_inode *ip, xfs_off_t offset, xfs_off_t len); +extern int xfs_reflink_remap_prep(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, loff_t *len, + unsigned int remap_flags); +extern int xfs_reflink_remap_blocks(struct xfs_inode *src, loff_t pos_in, + struct xfs_inode *dest, loff_t pos_out, loff_t remap_len, + loff_t *remapped); +extern int xfs_reflink_update_dest(struct xfs_inode *dest, xfs_off_t newlen, + xfs_extlen_t cowextsize, unsigned int remap_flags); +extern void xfs_reflink_remap_unlock(struct file *file_in, + struct file *file_out); #endif /* __XFS_REFLINK_H */ diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index 91a877fa00cb..9ccad6b062f2 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -82,6 +82,53 @@ enum drm_connector_status { connector_status_unknown = 3, }; +/** + * enum drm_connector_registration_status - userspace registration status for + * a &drm_connector + * + * This enum is used to track the status of initializing a connector and + * registering it with userspace, so that DRM can prevent bogus modesets on + * connectors that no longer exist. + */ +enum drm_connector_registration_state { + /** + * @DRM_CONNECTOR_INITIALIZING: The connector has just been created, + * but has yet to be exposed to userspace. There should be no + * additional restrictions to how the state of this connector may be + * modified. + */ + DRM_CONNECTOR_INITIALIZING = 0, + + /** + * @DRM_CONNECTOR_REGISTERED: The connector has been fully initialized + * and registered with sysfs, as such it has been exposed to + * userspace. There should be no additional restrictions to how the + * state of this connector may be modified. + */ + DRM_CONNECTOR_REGISTERED = 1, + + /** + * @DRM_CONNECTOR_UNREGISTERED: The connector has either been exposed + * to userspace and has since been unregistered and removed from + * userspace, or the connector was unregistered before it had a chance + * to be exposed to userspace (e.g. still in the + * @DRM_CONNECTOR_INITIALIZING state). When a connector is + * unregistered, there are additional restrictions to how its state + * may be modified: + * + * - An unregistered connector may only have its DPMS changed from + * On->Off. Once DPMS is changed to Off, it may not be switched back + * to On. + * - Modesets are not allowed on unregistered connectors, unless they + * would result in disabling its assigned CRTCs. This means + * disabling a CRTC on an unregistered connector is OK, but enabling + * one is not. + * - Removing a CRTC from an unregistered connector is OK, but new + * CRTCs may never be assigned to an unregistered connector. + */ + DRM_CONNECTOR_UNREGISTERED = 2, +}; + enum subpixel_order { SubPixelUnknown = 0, SubPixelHorizontalRGB, @@ -853,10 +900,12 @@ struct drm_connector { bool ycbcr_420_allowed; /** - * @registered: Is this connector exposed (registered) with userspace? + * @registration_state: Is this connector initializing, exposed + * (registered) with userspace, or unregistered? + * * Protected by @mutex. */ - bool registered; + enum drm_connector_registration_state registration_state; /** * @modes: @@ -1166,6 +1215,24 @@ static inline void drm_connector_unreference(struct drm_connector *connector) drm_connector_put(connector); } +/** + * drm_connector_is_unregistered - has the connector been unregistered from + * userspace? + * @connector: DRM connector + * + * Checks whether or not @connector has been unregistered from userspace. + * + * Returns: + * True if the connector was unregistered, false if the connector is + * registered or has not yet been registered with userspace. + */ +static inline bool +drm_connector_is_unregistered(struct drm_connector *connector) +{ + return READ_ONCE(connector->registration_state) == + DRM_CONNECTOR_UNREGISTERED; +} + const char *drm_get_connector_status_name(enum drm_connector_status status); const char *drm_get_subpixel_order_name(enum subpixel_order order); const char *drm_get_dpms_name(int val); diff --git a/include/linux/adxl.h b/include/linux/adxl.h index 2a629acb4c3f..2d29f55923e3 100644 --- a/include/linux/adxl.h +++ b/include/linux/adxl.h @@ -7,7 +7,12 @@ #ifndef _LINUX_ADXL_H #define _LINUX_ADXL_H +#ifdef CONFIG_ACPI_ADXL const char * const *adxl_get_component_names(void); int adxl_decode(u64 addr, u64 component_values[]); +#else +static inline const char * const *adxl_get_component_names(void) { return NULL; } +static inline int adxl_decode(u64 addr, u64 component_values[]) { return -EOPNOTSUPP; } +#endif #endif /* _LINUX_ADXL_H */ diff --git a/include/linux/bio.h b/include/linux/bio.h index b47c7f716731..056fb627edb3 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -503,31 +503,23 @@ do { \ disk_devt((bio)->bi_disk) #if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP) -int bio_associate_blkg_from_page(struct bio *bio, struct page *page); +int bio_associate_blkcg_from_page(struct bio *bio, struct page *page); #else -static inline int bio_associate_blkg_from_page(struct bio *bio, - struct page *page) { return 0; } +static inline int bio_associate_blkcg_from_page(struct bio *bio, + struct page *page) { return 0; } #endif #ifdef CONFIG_BLK_CGROUP +int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css); int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg); -int bio_associate_blkg_from_css(struct bio *bio, - struct cgroup_subsys_state *css); -int bio_associate_create_blkg(struct request_queue *q, struct bio *bio); -int bio_reassociate_blkg(struct request_queue *q, struct bio *bio); void bio_disassociate_task(struct bio *bio); -void bio_clone_blkg_association(struct bio *dst, struct bio *src); +void bio_clone_blkcg_association(struct bio *dst, struct bio *src); #else /* CONFIG_BLK_CGROUP */ -static inline int bio_associate_blkg_from_css(struct bio *bio, - struct cgroup_subsys_state *css) -{ return 0; } -static inline int bio_associate_create_blkg(struct request_queue *q, - struct bio *bio) { return 0; } -static inline int bio_reassociate_blkg(struct request_queue *q, struct bio *bio) -{ return 0; } +static inline int bio_associate_blkcg(struct bio *bio, + struct cgroup_subsys_state *blkcg_css) { return 0; } static inline void bio_disassociate_task(struct bio *bio) { } -static inline void bio_clone_blkg_association(struct bio *dst, - struct bio *src) { } +static inline void bio_clone_blkcg_association(struct bio *dst, + struct bio *src) { } #endif /* CONFIG_BLK_CGROUP */ #ifdef CONFIG_HIGHMEM diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 1e76ceebeb5d..6d766a19f2bb 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -126,7 +126,7 @@ struct blkcg_gq { struct request_list rl; /* reference count */ - struct percpu_ref refcnt; + atomic_t refcnt; /* is this blkg online? protected by both blkcg and q locks */ bool online; @@ -184,8 +184,6 @@ extern struct cgroup_subsys_state * const blkcg_root_css; struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg, struct request_queue *q, bool update_hint); -struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg, - struct request_queue *q); struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, struct request_queue *q); int blkcg_init_queue(struct request_queue *q); @@ -232,59 +230,22 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, char *input, struct blkg_conf_ctx *ctx); void blkg_conf_finish(struct blkg_conf_ctx *ctx); -/** - * blkcg_css - find the current css - * - * Find the css associated with either the kthread or the current task. - * This may return a dying css, so it is up to the caller to use tryget logic - * to confirm it is alive and well. - */ -static inline struct cgroup_subsys_state *blkcg_css(void) -{ - struct cgroup_subsys_state *css; - - css = kthread_blkcg(); - if (css) - return css; - return task_css(current, io_cgrp_id); -} static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css) { return css ? container_of(css, struct blkcg, css) : NULL; } -/** - * __bio_blkcg - internal version of bio_blkcg for bfq and cfq - * - * DO NOT USE. - * There is a flaw using this version of the function. In particular, this was - * used in a broken paradigm where association was called on the given css. It - * is possible though that the returned css from task_css() is in the process - * of dying due to migration of the current task. So it is improper to assume - * *_get() is going to succeed. Both BFQ and CFQ rely on this logic and will - * take additional work to handle more gracefully. - */ -static inline struct blkcg *__bio_blkcg(struct bio *bio) -{ - if (bio && bio->bi_blkg) - return bio->bi_blkg->blkcg; - return css_to_blkcg(blkcg_css()); -} - -/** - * bio_blkcg - grab the blkcg associated with a bio - * @bio: target bio - * - * This returns the blkcg associated with a bio, NULL if not associated. - * Callers are expected to either handle NULL or know association has been - * done prior to calling this. - */ static inline struct blkcg *bio_blkcg(struct bio *bio) { - if (bio && bio->bi_blkg) - return bio->bi_blkg->blkcg; - return NULL; + struct cgroup_subsys_state *css; + + if (bio && bio->bi_css) + return css_to_blkcg(bio->bi_css); + css = kthread_blkcg(); + if (css) + return css_to_blkcg(css); + return css_to_blkcg(task_css(current, io_cgrp_id)); } static inline bool blk_cgroup_congested(void) @@ -490,35 +451,26 @@ static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen) */ static inline void blkg_get(struct blkcg_gq *blkg) { - percpu_ref_get(&blkg->refcnt); + WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0); + atomic_inc(&blkg->refcnt); } /** - * blkg_tryget - try and get a blkg reference + * blkg_try_get - try and get a blkg reference * @blkg: blkg to get * * This is for use when doing an RCU lookup of the blkg. We may be in the midst * of freeing this blkg, so we can only use it if the refcnt is not zero. */ -static inline bool blkg_tryget(struct blkcg_gq *blkg) +static inline struct blkcg_gq *blkg_try_get(struct blkcg_gq *blkg) { - return percpu_ref_tryget(&blkg->refcnt); + if (atomic_inc_not_zero(&blkg->refcnt)) + return blkg; + return NULL; } -/** - * blkg_tryget_closest - try and get a blkg ref on the closet blkg - * @blkg: blkg to get - * - * This walks up the blkg tree to find the closest non-dying blkg and returns - * the blkg that it did association with as it may not be the passed in blkg. - */ -static inline struct blkcg_gq *blkg_tryget_closest(struct blkcg_gq *blkg) -{ - while (!percpu_ref_tryget(&blkg->refcnt)) - blkg = blkg->parent; - return blkg; -} +void __blkg_release_rcu(struct rcu_head *rcu); /** * blkg_put - put a blkg reference @@ -526,7 +478,9 @@ static inline struct blkcg_gq *blkg_tryget_closest(struct blkcg_gq *blkg) */ static inline void blkg_put(struct blkcg_gq *blkg) { - percpu_ref_put(&blkg->refcnt); + WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0); + if (atomic_dec_and_test(&blkg->refcnt)) + call_rcu(&blkg->rcu_head, __blkg_release_rcu); } /** @@ -579,36 +533,25 @@ static inline struct request_list *blk_get_rl(struct request_queue *q, rcu_read_lock(); - if (bio && bio->bi_blkg) { - blkcg = bio->bi_blkg->blkcg; - if (blkcg == &blkcg_root) - goto rl_use_root; - - blkg_get(bio->bi_blkg); - rcu_read_unlock(); - return &bio->bi_blkg->rl; - } + blkcg = bio_blkcg(bio); - blkcg = css_to_blkcg(blkcg_css()); + /* bypass blkg lookup and use @q->root_rl directly for root */ if (blkcg == &blkcg_root) - goto rl_use_root; + goto root_rl; + /* + * Try to use blkg->rl. blkg lookup may fail under memory pressure + * or if either the blkcg or queue is going away. Fall back to + * root_rl in such cases. + */ blkg = blkg_lookup(blkcg, q); if (unlikely(!blkg)) - blkg = __blkg_lookup_create(blkcg, q); - - if (blkg->blkcg == &blkcg_root || !blkg_tryget(blkg)) - goto rl_use_root; + goto root_rl; + blkg_get(blkg); rcu_read_unlock(); return &blkg->rl; - - /* - * Each blkg has its own request_list, however, the root blkcg - * uses the request_queue's root_rl. This is to avoid most - * overhead for the root blkcg. - */ -rl_use_root: +root_rl: rcu_read_unlock(); return &q->root_rl; } @@ -854,26 +797,32 @@ static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg struct bio *bio) { return false; } #endif - -static inline void blkcg_bio_issue_init(struct bio *bio) -{ - bio_issue_init(&bio->bi_issue, bio_sectors(bio)); -} - static inline bool blkcg_bio_issue_check(struct request_queue *q, struct bio *bio) { + struct blkcg *blkcg; struct blkcg_gq *blkg; bool throtl = false; rcu_read_lock(); + blkcg = bio_blkcg(bio); + + /* associate blkcg if bio hasn't attached one */ + bio_associate_blkcg(bio, &blkcg->css); - bio_associate_create_blkg(q, bio); - blkg = bio->bi_blkg; + blkg = blkg_lookup(blkcg, q); + if (unlikely(!blkg)) { + spin_lock_irq(q->queue_lock); + blkg = blkg_lookup_create(blkcg, q); + if (IS_ERR(blkg)) + blkg = NULL; + spin_unlock_irq(q->queue_lock); + } throtl = blk_throtl_bio(q, blkg, bio); if (!throtl) { + blkg = blkg ?: q->root_blkg; /* * If the bio is flagged with BIO_QUEUE_ENTERED it means this * is a split bio and we would have already accounted for the @@ -885,8 +834,6 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q, blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1); } - blkcg_bio_issue_init(bio); - rcu_read_unlock(); return !throtl; } @@ -983,7 +930,6 @@ static inline int blkcg_activate_policy(struct request_queue *q, static inline void blkcg_deactivate_policy(struct request_queue *q, const struct blkcg_policy *pol) { } -static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; } static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; } static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, @@ -999,7 +945,6 @@ static inline void blk_put_rl(struct request_list *rl) { } static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { } static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; } -static inline void blkcg_bio_issue_init(struct bio *bio) { } static inline bool blkcg_bio_issue_check(struct request_queue *q, struct bio *bio) { return true; } diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 093a818c5b68..1dcf652ba0aa 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -178,6 +178,7 @@ struct bio { * release. Read comment on top of bio_associate_current(). */ struct io_context *bi_ioc; + struct cgroup_subsys_state *bi_css; struct blkcg_gq *bi_blkg; struct bio_issue bi_issue; #endif diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 9968332cceed..9d12757a65b0 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -93,8 +93,6 @@ extern struct css_set init_css_set; bool css_has_online_children(struct cgroup_subsys_state *css); struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss); -struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgroup, - struct cgroup_subsys *ss); struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup, struct cgroup_subsys *ss); struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry, diff --git a/include/linux/fs.h b/include/linux/fs.h index 8252df30b9a1..c95c0807471f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1752,6 +1752,25 @@ struct block_device_operations; #define NOMMU_VMFLAGS \ (NOMMU_MAP_READ | NOMMU_MAP_WRITE | NOMMU_MAP_EXEC) +/* + * These flags control the behavior of the remap_file_range function pointer. + * If it is called with len == 0 that means "remap to end of source file". + * See Documentation/filesystems/vfs.txt for more details about this call. + * + * REMAP_FILE_DEDUP: only remap if contents identical (i.e. deduplicate) + * REMAP_FILE_CAN_SHORTEN: caller can handle a shortened request + */ +#define REMAP_FILE_DEDUP (1 << 0) +#define REMAP_FILE_CAN_SHORTEN (1 << 1) + +/* + * These flags signal that the caller is ok with altering various aspects of + * the behavior of the remap operation. The changes must be made by the + * implementation; the vfs remap helper functions can take advantage of them. + * Flags in this category exist to preserve the quirky behavior of the hoisted + * btrfs clone/dedupe ioctls. + */ +#define REMAP_FILE_ADVISORY (REMAP_FILE_CAN_SHORTEN) struct iov_iter; @@ -1790,10 +1809,9 @@ struct file_operations { #endif ssize_t (*copy_file_range)(struct file *, loff_t, struct file *, loff_t, size_t, unsigned int); - int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t, - u64); - int (*dedupe_file_range)(struct file *, loff_t, struct file *, loff_t, - u64); + loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t len, unsigned int remap_flags); int (*fadvise)(struct file *, loff_t, loff_t, int); } __randomize_layout; @@ -1856,21 +1874,21 @@ extern ssize_t vfs_readv(struct file *, const struct iovec __user *, unsigned long, loff_t *, rwf_t); extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *, loff_t, size_t, unsigned int); -extern int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in, - struct inode *inode_out, loff_t pos_out, - u64 *len, bool is_dedupe); -extern int do_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, u64 len); -extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, u64 len); -extern int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff, - struct inode *dest, loff_t destoff, - loff_t len, bool *is_same); +extern int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t *count, + unsigned int remap_flags); +extern loff_t do_clone_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t len, unsigned int remap_flags); +extern loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t len, unsigned int remap_flags); extern int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same); -extern int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, - struct file *dst_file, loff_t dst_pos, - u64 len); +extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, + struct file *dst_file, loff_t dst_pos, + loff_t len, unsigned int remap_flags); struct super_operations { @@ -2998,6 +3016,9 @@ extern int sb_min_blocksize(struct super_block *, int); extern int generic_file_mmap(struct file *, struct vm_area_struct *); extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *); +extern int generic_remap_checks(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t *count, unsigned int remap_flags); extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *); extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *); extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *); diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 24bcc5eec6b4..76f8db0b0e71 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -510,22 +510,18 @@ alloc_pages(gfp_t gfp_mask, unsigned int order) } extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, struct vm_area_struct *vma, unsigned long addr, - int node, bool hugepage); -#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ - alloc_pages_vma(gfp_mask, order, vma, addr, numa_node_id(), true) + int node); #else #define alloc_pages(gfp_mask, order) \ alloc_pages_node(numa_node_id(), gfp_mask, order) -#define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\ - alloc_pages(gfp_mask, order) -#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ +#define alloc_pages_vma(gfp_mask, order, vma, addr, node)\ alloc_pages(gfp_mask, order) #endif #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) #define alloc_page_vma(gfp_mask, vma, addr) \ - alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id(), false) + alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id()) #define alloc_page_vma_node(gfp_mask, vma, addr, node) \ - alloc_pages_vma(gfp_mask, 0, vma, addr, node, false) + alloc_pages_vma(gfp_mask, 0, vma, addr, node) extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order); extern unsigned long get_zeroed_page(gfp_t gfp_mask); diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 5228c62af416..bac395f1d00a 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -139,6 +139,8 @@ struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp, struct mempolicy *get_task_policy(struct task_struct *p); struct mempolicy *__get_vma_policy(struct vm_area_struct *vma, unsigned long addr); +struct mempolicy *get_vma_policy(struct vm_area_struct *vma, + unsigned long addr); bool vma_policy_mof(struct vm_area_struct *vma); extern void numa_default_policy(void); diff --git a/include/linux/notifier.h b/include/linux/notifier.h index f35c7bf76143..0096a05395e3 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -122,8 +122,7 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh); #ifdef CONFIG_TREE_SRCU #define _SRCU_NOTIFIER_HEAD(name, mod) \ - static DEFINE_PER_CPU(struct srcu_data, \ - name##_head_srcu_data); \ + static DEFINE_PER_CPU(struct srcu_data, name##_head_srcu_data); \ mod struct srcu_notifier_head name = \ SRCU_NOTIFIER_INIT(name, name##_head_srcu_data) diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 738a0c24874f..fdfd04e348f6 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -246,8 +246,7 @@ static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc, * * @bio is a part of the writeback in progress controlled by @wbc. Perform * writeback specific initialization. This is used to apply the cgroup - * writeback context. Must be called after the bio has been associated with - * a device. + * writeback context. */ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio) { @@ -258,7 +257,7 @@ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio) * regular writeback instead of writing things out itself. */ if (wbc->wb) - bio_associate_blkg_from_css(bio, wbc->wb->blkcg_css); + bio_associate_blkcg(bio, wbc->wb->blkcg_css); } #else /* CONFIG_CGROUP_WRITEBACK */ diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 8b79318810ad..6aaf5dd5383b 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -493,7 +493,7 @@ static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp, } /** - * cgroup_e_css_by_mask - obtain a cgroup's effective css for the specified ss + * cgroup_e_css - obtain a cgroup's effective css for the specified subsystem * @cgrp: the cgroup of interest * @ss: the subsystem of interest (%NULL returns @cgrp->self) * @@ -502,8 +502,8 @@ static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp, * enabled. If @ss is associated with the hierarchy @cgrp is on, this * function is guaranteed to return non-NULL css. */ -static struct cgroup_subsys_state *cgroup_e_css_by_mask(struct cgroup *cgrp, - struct cgroup_subsys *ss) +static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp, + struct cgroup_subsys *ss) { lockdep_assert_held(&cgroup_mutex); @@ -524,35 +524,6 @@ static struct cgroup_subsys_state *cgroup_e_css_by_mask(struct cgroup *cgrp, } /** - * cgroup_e_css - obtain a cgroup's effective css for the specified subsystem - * @cgrp: the cgroup of interest - * @ss: the subsystem of interest - * - * Find and get the effective css of @cgrp for @ss. The effective css is - * defined as the matching css of the nearest ancestor including self which - * has @ss enabled. If @ss is not mounted on the hierarchy @cgrp is on, - * the root css is returned, so this function always returns a valid css. - * - * The returned css is not guaranteed to be online, and therefore it is the - * callers responsiblity to tryget a reference for it. - */ -struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp, - struct cgroup_subsys *ss) -{ - struct cgroup_subsys_state *css; - - do { - css = cgroup_css(cgrp, ss); - - if (css) - return css; - cgrp = cgroup_parent(cgrp); - } while (cgrp); - - return init_css_set.subsys[ss->id]; -} - -/** * cgroup_get_e_css - get a cgroup's effective css for the specified subsystem * @cgrp: the cgroup of interest * @ss: the subsystem of interest @@ -634,11 +605,10 @@ EXPORT_SYMBOL_GPL(of_css); * * Should be called under cgroup_[tree_]mutex. */ -#define for_each_e_css(css, ssid, cgrp) \ - for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \ - if (!((css) = cgroup_e_css_by_mask(cgrp, \ - cgroup_subsys[(ssid)]))) \ - ; \ +#define for_each_e_css(css, ssid, cgrp) \ + for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \ + if (!((css) = cgroup_e_css(cgrp, cgroup_subsys[(ssid)]))) \ + ; \ else /** @@ -1037,7 +1007,7 @@ static struct css_set *find_existing_css_set(struct css_set *old_cset, * @ss is in this hierarchy, so we want the * effective css from @cgrp. */ - template[i] = cgroup_e_css_by_mask(cgrp, ss); + template[i] = cgroup_e_css(cgrp, ss); } else { /* * @ss is not in this hierarchy, so we don't want @@ -3054,7 +3024,7 @@ static int cgroup_apply_control(struct cgroup *cgrp) return ret; /* - * At this point, cgroup_e_css_by_mask() results reflect the new csses + * At this point, cgroup_e_css() results reflect the new csses * making the following cgroup_update_dfl_csses() properly update * css associations of all tasks in the subtree. */ diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index c6a3b6851372..35cf0ad29718 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -25,8 +25,6 @@ #include <linux/elf.h> #include <linux/elfcore.h> #include <linux/kernel.h> -#include <linux/kexec.h> -#include <linux/slab.h> #include <linux/syscalls.h> #include <linux/vmalloc.h> #include "kexec_internal.h" diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 3ae223f7b5df..5fc724e4e454 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -66,7 +66,6 @@ #include <linux/kexec.h> #include <linux/bpf.h> #include <linux/mount.h> -#include <linux/pipe_fs_i.h> #include <linux/uaccess.h> #include <asm/processor.h> diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index fac0ddf8a8e2..2868d85f1fb1 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -764,9 +764,9 @@ blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio) if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP)) return NULL; - if (!bio->bi_blkg) + if (!bio->bi_css) return NULL; - return cgroup_get_kernfs_id(bio_blkcg(bio)->css.cgroup); + return cgroup_get_kernfs_id(bio->bi_css->cgroup); } #else static union kernfs_node_id * diff --git a/mm/filemap.c b/mm/filemap.c index 1fe6c4c37a35..81adec8ee02c 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2825,6 +2825,42 @@ struct page *read_cache_page_gfp(struct address_space *mapping, EXPORT_SYMBOL(read_cache_page_gfp); /* + * Don't operate on ranges the page cache doesn't support, and don't exceed the + * LFS limits. If pos is under the limit it becomes a short access. If it + * exceeds the limit we return -EFBIG. + */ +static int generic_access_check_limits(struct file *file, loff_t pos, + loff_t *count) +{ + struct inode *inode = file->f_mapping->host; + loff_t max_size = inode->i_sb->s_maxbytes; + + if (!(file->f_flags & O_LARGEFILE)) + max_size = MAX_NON_LFS; + + if (unlikely(pos >= max_size)) + return -EFBIG; + *count = min(*count, max_size - pos); + return 0; +} + +static int generic_write_check_limits(struct file *file, loff_t pos, + loff_t *count) +{ + loff_t limit = rlimit(RLIMIT_FSIZE); + + if (limit != RLIM_INFINITY) { + if (pos >= limit) { + send_sig(SIGXFSZ, current, 0); + return -EFBIG; + } + *count = min(*count, limit - pos); + } + + return generic_access_check_limits(file, pos, count); +} + +/* * Performs necessary checks before doing a write * * Can adjust writing position or amount of bytes to write. @@ -2835,8 +2871,8 @@ inline ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; - unsigned long limit = rlimit(RLIMIT_FSIZE); - loff_t pos; + loff_t count; + int ret; if (!iov_iter_count(from)) return 0; @@ -2845,43 +2881,99 @@ inline ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from) if (iocb->ki_flags & IOCB_APPEND) iocb->ki_pos = i_size_read(inode); - pos = iocb->ki_pos; - if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT)) return -EINVAL; - if (limit != RLIM_INFINITY) { - if (iocb->ki_pos >= limit) { - send_sig(SIGXFSZ, current, 0); - return -EFBIG; - } - iov_iter_truncate(from, limit - (unsigned long)pos); - } + count = iov_iter_count(from); + ret = generic_write_check_limits(file, iocb->ki_pos, &count); + if (ret) + return ret; + + iov_iter_truncate(from, count); + return iov_iter_count(from); +} +EXPORT_SYMBOL(generic_write_checks); + +/* + * Performs necessary checks before doing a clone. + * + * Can adjust amount of bytes to clone. + * Returns appropriate error code that caller should return or + * zero in case the clone should be allowed. + */ +int generic_remap_checks(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t *req_count, unsigned int remap_flags) +{ + struct inode *inode_in = file_in->f_mapping->host; + struct inode *inode_out = file_out->f_mapping->host; + uint64_t count = *req_count; + uint64_t bcount; + loff_t size_in, size_out; + loff_t bs = inode_out->i_sb->s_blocksize; + int ret; + + /* The start of both ranges must be aligned to an fs block. */ + if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_out, bs)) + return -EINVAL; + + /* Ensure offsets don't wrap. */ + if (pos_in + count < pos_in || pos_out + count < pos_out) + return -EINVAL; + + size_in = i_size_read(inode_in); + size_out = i_size_read(inode_out); + + /* Dedupe requires both ranges to be within EOF. */ + if ((remap_flags & REMAP_FILE_DEDUP) && + (pos_in >= size_in || pos_in + count > size_in || + pos_out >= size_out || pos_out + count > size_out)) + return -EINVAL; + + /* Ensure the infile range is within the infile. */ + if (pos_in >= size_in) + return -EINVAL; + count = min(count, size_in - (uint64_t)pos_in); + + ret = generic_access_check_limits(file_in, pos_in, &count); + if (ret) + return ret; + + ret = generic_write_check_limits(file_out, pos_out, &count); + if (ret) + return ret; /* - * LFS rule + * If the user wanted us to link to the infile's EOF, round up to the + * next block boundary for this check. + * + * Otherwise, make sure the count is also block-aligned, having + * already confirmed the starting offsets' block alignment. */ - if (unlikely(pos + iov_iter_count(from) > MAX_NON_LFS && - !(file->f_flags & O_LARGEFILE))) { - if (pos >= MAX_NON_LFS) - return -EFBIG; - iov_iter_truncate(from, MAX_NON_LFS - (unsigned long)pos); + if (pos_in + count == size_in) { + bcount = ALIGN(size_in, bs) - pos_in; + } else { + if (!IS_ALIGNED(count, bs)) + count = ALIGN_DOWN(count, bs); + bcount = count; } + /* Don't allow overlapped cloning within the same file. */ + if (inode_in == inode_out && + pos_out + bcount > pos_in && + pos_out < pos_in + bcount) + return -EINVAL; + /* - * Are we about to exceed the fs block limit ? - * - * If we have written data it becomes a short write. If we have - * exceeded without writing data we send a signal and return EFBIG. - * Linus frestrict idea will clean these up nicely.. + * We shortened the request but the caller can't deal with that, so + * bounce the request back to userspace. */ - if (unlikely(pos >= inode->i_sb->s_maxbytes)) - return -EFBIG; + if (*req_count != count && !(remap_flags & REMAP_FILE_CAN_SHORTEN)) + return -EINVAL; - iov_iter_truncate(from, inode->i_sb->s_maxbytes - pos); - return iov_iter_count(from); + *req_count = count; + return 0; } -EXPORT_SYMBOL(generic_write_checks); int pagecache_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 4e4ef8fa479d..55478ab3c83b 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -629,21 +629,40 @@ release: * available * never: never stall for any thp allocation */ -static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma) +static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma, unsigned long addr) { const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE); + gfp_t this_node = 0; + +#ifdef CONFIG_NUMA + struct mempolicy *pol; + /* + * __GFP_THISNODE is used only when __GFP_DIRECT_RECLAIM is not + * specified, to express a general desire to stay on the current + * node for optimistic allocation attempts. If the defrag mode + * and/or madvise hint requires the direct reclaim then we prefer + * to fallback to other node rather than node reclaim because that + * can lead to excessive reclaim even though there is free memory + * on other nodes. We expect that NUMA preferences are specified + * by memory policies. + */ + pol = get_vma_policy(vma, addr); + if (pol->mode != MPOL_BIND) + this_node = __GFP_THISNODE; + mpol_cond_put(pol); +#endif if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags)) return GFP_TRANSHUGE | (vma_madvised ? 0 : __GFP_NORETRY); if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags)) - return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM; + return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM | this_node; if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags)) return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM : - __GFP_KSWAPD_RECLAIM); + __GFP_KSWAPD_RECLAIM | this_node); if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags)) return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM : - 0); - return GFP_TRANSHUGE_LIGHT; + this_node); + return GFP_TRANSHUGE_LIGHT | this_node; } /* Caller must hold page table lock. */ @@ -715,8 +734,8 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf) pte_free(vma->vm_mm, pgtable); return ret; } - gfp = alloc_hugepage_direct_gfpmask(vma); - page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER); + gfp = alloc_hugepage_direct_gfpmask(vma, haddr); + page = alloc_pages_vma(gfp, HPAGE_PMD_ORDER, vma, haddr, numa_node_id()); if (unlikely(!page)) { count_vm_event(THP_FAULT_FALLBACK); return VM_FAULT_FALLBACK; @@ -1286,8 +1305,9 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd) alloc: if (transparent_hugepage_enabled(vma) && !transparent_hugepage_debug_cow()) { - huge_gfp = alloc_hugepage_direct_gfpmask(vma); - new_page = alloc_hugepage_vma(huge_gfp, vma, haddr, HPAGE_PMD_ORDER); + huge_gfp = alloc_hugepage_direct_gfpmask(vma, haddr); + new_page = alloc_pages_vma(huge_gfp, HPAGE_PMD_ORDER, vma, + haddr, numa_node_id()); } else new_page = NULL; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 54920cbc46bf..6e1469b80cb7 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2593,7 +2593,7 @@ int memcg_kmem_charge(struct page *page, gfp_t gfp, int order) struct mem_cgroup *memcg; int ret = 0; - if (memcg_kmem_bypass()) + if (mem_cgroup_disabled() || memcg_kmem_bypass()) return 0; memcg = get_mem_cgroup_from_current(); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 61972da38d93..2b2b3ccbbfb5 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -586,6 +586,7 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn, for (i = 0; i < sections_to_remove; i++) { unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION; + cond_resched(); ret = __remove_section(zone, __pfn_to_section(pfn), map_offset, altmap); map_offset = 0; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index cfd26d7e61a1..5837a067124d 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1116,8 +1116,8 @@ static struct page *new_page(struct page *page, unsigned long start) } else if (PageTransHuge(page)) { struct page *thp; - thp = alloc_hugepage_vma(GFP_TRANSHUGE, vma, address, - HPAGE_PMD_ORDER); + thp = alloc_pages_vma(GFP_TRANSHUGE, HPAGE_PMD_ORDER, vma, + address, numa_node_id()); if (!thp) return NULL; prep_transhuge_page(thp); @@ -1662,7 +1662,7 @@ struct mempolicy *__get_vma_policy(struct vm_area_struct *vma, * freeing by another task. It is the caller's responsibility to free the * extra reference for shared policies. */ -static struct mempolicy *get_vma_policy(struct vm_area_struct *vma, +struct mempolicy *get_vma_policy(struct vm_area_struct *vma, unsigned long addr) { struct mempolicy *pol = __get_vma_policy(vma, addr); @@ -2011,7 +2011,6 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, * @vma: Pointer to VMA or NULL if not available. * @addr: Virtual Address of the allocation. Must be inside the VMA. * @node: Which node to prefer for allocation (modulo policy). - * @hugepage: for hugepages try only the preferred node if possible * * This function allocates a page from the kernel page pool and applies * a NUMA policy associated with the VMA or the current process. @@ -2022,7 +2021,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, */ struct page * alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, - unsigned long addr, int node, bool hugepage) + unsigned long addr, int node) { struct mempolicy *pol; struct page *page; @@ -2040,32 +2039,6 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, goto out; } - if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage)) { - int hpage_node = node; - - /* - * For hugepage allocation and non-interleave policy which - * allows the current node (or other explicitly preferred - * node) we only try to allocate from the current/preferred - * node and don't fall back to other nodes, as the cost of - * remote accesses would likely offset THP benefits. - * - * If the policy is interleave, or does not allow the current - * node in its nodemask, we allocate the standard way. - */ - if (pol->mode == MPOL_PREFERRED && - !(pol->flags & MPOL_F_LOCAL)) - hpage_node = pol->v.preferred_node; - - nmask = policy_nodemask(gfp, pol); - if (!nmask || node_isset(hpage_node, *nmask)) { - mpol_cond_put(pol); - page = __alloc_pages_node(hpage_node, - gfp | __GFP_THISNODE, order); - goto out; - } - } - nmask = policy_nodemask(gfp, pol); preferred_nid = policy_node(gfp, pol, node); page = __alloc_pages_nodemask(gfp, order, preferred_nid, nmask); diff --git a/mm/page_io.c b/mm/page_io.c index 27b835728442..d4d1c89bcddd 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -339,7 +339,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, goto out; } bio->bi_opf = REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc); - bio_associate_blkg_from_page(bio, page); + bio_associate_blkcg_from_page(bio, page); count_swpout_vm_event(page); set_page_writeback(page); unlock_page(page); diff --git a/mm/shmem.c b/mm/shmem.c index 56bf122e0bb4..ea26d7a0342d 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1435,7 +1435,7 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp, shmem_pseudo_vma_init(&pvma, info, hindex); page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN, - HPAGE_PMD_ORDER, &pvma, 0, numa_node_id(), true); + HPAGE_PMD_ORDER, &pvma, 0, numa_node_id()); shmem_pseudo_vma_destroy(&pvma); if (page) prep_transhuge_page(page); diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 91981970f542..b1d39cabf125 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -329,7 +329,7 @@ static int p9_get_mapped_pages(struct virtio_chan *chan, if (!iov_iter_count(data)) return 0; - if (iov_iter_is_kvec(data)) { + if (!iov_iter_is_kvec(data)) { int n; /* * We allow only p9_max_pages pinned. We wait for the diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include index ca21a35fa244..bb015551c2d9 100644 --- a/scripts/Kbuild.include +++ b/scripts/Kbuild.include @@ -140,17 +140,9 @@ cc-option-yn = $(call try-run,\ cc-disable-warning = $(call try-run,\ $(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1))) -# cc-name -# Expands to either gcc or clang -cc-name = $(shell $(CC) -v 2>&1 | grep -q "clang version" && echo clang || echo gcc) - # cc-version cc-version = $(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-version.sh $(CC)) -# cc-fullversion -cc-fullversion = $(shell $(CONFIG_SHELL) \ - $(srctree)/scripts/gcc-version.sh -p $(CC)) - # cc-ifversion # Usage: EXTRA_CFLAGS += $(call cc-ifversion, -lt, 0402, -O1) cc-ifversion = $(shell [ $(cc-version) $(1) $(2) ] && echo $(3) || echo $(4)) diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index cf6cd0ef6975..768306add591 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -65,7 +65,7 @@ endif KBUILD_CFLAGS += $(warning) else -ifeq ($(cc-name),clang) +ifdef CONFIG_CC_IS_CLANG KBUILD_CFLAGS += $(call cc-disable-warning, initializer-overrides) KBUILD_CFLAGS += $(call cc-disable-warning, unused-value) KBUILD_CFLAGS += $(call cc-disable-warning, format) diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile index 67ed9f6ccdf8..63b609243d03 100644 --- a/scripts/kconfig/Makefile +++ b/scripts/kconfig/Makefile @@ -68,21 +68,7 @@ PHONY += $(simple-targets) $(simple-targets): $(obj)/conf $< $(silent) --$@ $(Kconfig) -PHONY += oldnoconfig silentoldconfig savedefconfig defconfig - -# oldnoconfig is an alias of olddefconfig, because people already are dependent -# on its behavior (sets new symbols to their default value but not 'n') with the -# counter-intuitive name. -oldnoconfig: olddefconfig - @echo " WARNING: \"oldnoconfig\" target will be removed after Linux 4.19" - @echo " Please use \"olddefconfig\" instead, which is an alias." - -# We do not expect manual invokcation of "silentoldcofig" (or "syncconfig"). -silentoldconfig: syncconfig - @echo " WARNING: \"silentoldconfig\" has been renamed to \"syncconfig\"" - @echo " and is now an internal implementation detail." - @echo " What you want is probably \"oldconfig\"." - @echo " \"silentoldconfig\" will be removed after Linux 4.19" +PHONY += savedefconfig defconfig savedefconfig: $(obj)/conf $< $(silent) --$@=defconfig $(Kconfig) diff --git a/scripts/kconfig/conf.c b/scripts/kconfig/conf.c index 7b2b37260669..98e0c7a34699 100644 --- a/scripts/kconfig/conf.c +++ b/scripts/kconfig/conf.c @@ -460,12 +460,6 @@ static struct option long_opts[] = { {"randconfig", no_argument, NULL, randconfig}, {"listnewconfig", no_argument, NULL, listnewconfig}, {"olddefconfig", no_argument, NULL, olddefconfig}, - /* - * oldnoconfig is an alias of olddefconfig, because people already - * are dependent on its behavior(sets new symbols to their default - * value but not 'n') with the counter-intuitive name. - */ - {"oldnoconfig", no_argument, NULL, olddefconfig}, {NULL, 0, NULL, 0} }; @@ -480,7 +474,6 @@ static void conf_usage(const char *progname) printf(" --syncconfig Similar to oldconfig but generates configuration in\n" " include/{generated/,config/}\n"); printf(" --olddefconfig Same as oldconfig but sets new symbols to their default value\n"); - printf(" --oldnoconfig An alias of olddefconfig\n"); printf(" --defconfig <file> New config with default defined in <file>\n"); printf(" --savedefconfig <file> Save the minimal current configuration to <file>\n"); printf(" --allnoconfig New config where all options are answered with no\n"); diff --git a/scripts/kconfig/merge_config.sh b/scripts/kconfig/merge_config.sh index 67d131447631..da66e7742282 100755 --- a/scripts/kconfig/merge_config.sh +++ b/scripts/kconfig/merge_config.sh @@ -33,12 +33,15 @@ usage() { echo " -n use allnoconfig instead of alldefconfig" echo " -r list redundant entries when merging fragments" echo " -O dir to put generated output files. Consider setting \$KCONFIG_CONFIG instead." + echo + echo "Used prefix: '$CONFIG_PREFIX'. You can redefine it with \$CONFIG_ environment variable." } RUNMAKE=true ALLTARGET=alldefconfig WARNREDUN=false OUTPUT=. +CONFIG_PREFIX=${CONFIG_-CONFIG_} while true; do case $1 in @@ -99,7 +102,8 @@ if [ ! -r "$INITFILE" ]; then fi MERGE_LIST=$* -SED_CONFIG_EXP="s/^\(# \)\{0,1\}\(CONFIG_[a-zA-Z0-9_]*\)[= ].*/\2/p" +SED_CONFIG_EXP="s/^\(# \)\{0,1\}\(${CONFIG_PREFIX}[a-zA-Z0-9_]*\)[= ].*/\2/p" + TMP_FILE=$(mktemp ./.tmp.config.XXXXXXXXXX) echo "Using $INITFILE as base" diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index e09fe4d7307c..8963203319ea 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -1742,7 +1742,7 @@ static int ns_rmdir_op(struct inode *dir, struct dentry *dentry) if (error) return error; - parent = aa_get_ns(dir->i_private); + parent = aa_get_ns(dir->i_private); /* rmdir calls the generic securityfs functions to remove files * from the apparmor dir. It is up to the apparmor ns locking * to avoid races. diff --git a/security/apparmor/file.c b/security/apparmor/file.c index 4285943f7260..d0afed9ebd0e 100644 --- a/security/apparmor/file.c +++ b/security/apparmor/file.c @@ -496,7 +496,7 @@ static void update_file_ctx(struct aa_file_ctx *fctx, struct aa_label *label, /* update caching of label on file_ctx */ spin_lock(&fctx->lock); old = rcu_dereference_protected(fctx->label, - spin_is_locked(&fctx->lock)); + lockdep_is_held(&fctx->lock)); l = aa_label_merge(old, label, GFP_ATOMIC); if (l) { if (l != old) { diff --git a/security/apparmor/include/cred.h b/security/apparmor/include/cred.h index e287b7d0d4be..265ae6641a06 100644 --- a/security/apparmor/include/cred.h +++ b/security/apparmor/include/cred.h @@ -151,6 +151,8 @@ static inline struct aa_label *begin_current_label_crit_section(void) { struct aa_label *label = aa_current_raw_label(); + might_sleep(); + if (label_is_stale(label)) { label = aa_get_newest_label(label); if (aa_replace_current_label(label) == 0) diff --git a/security/apparmor/include/net.h b/security/apparmor/include/net.h index ec7228e857a9..7334ac966d01 100644 --- a/security/apparmor/include/net.h +++ b/security/apparmor/include/net.h @@ -83,6 +83,13 @@ struct aa_sk_ctx { __e; \ }) +struct aa_secmark { + u8 audit; + u8 deny; + u32 secid; + char *label; +}; + extern struct aa_sfs_entry aa_sfs_entry_network[]; void audit_net_cb(struct audit_buffer *ab, void *va); @@ -103,4 +110,7 @@ int aa_sk_perm(const char *op, u32 request, struct sock *sk); int aa_sock_file_perm(struct aa_label *label, const char *op, u32 request, struct socket *sock); +int apparmor_secmark_check(struct aa_label *label, char *op, u32 request, + u32 secid, struct sock *sk); + #endif /* __AA_NET_H */ diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h index ab64c6b5db5a..8e6707c837be 100644 --- a/security/apparmor/include/policy.h +++ b/security/apparmor/include/policy.h @@ -155,6 +155,9 @@ struct aa_profile { struct aa_rlimit rlimits; + int secmark_count; + struct aa_secmark *secmark; + struct aa_loaddata *rawdata; unsigned char *hash; char *dirname; diff --git a/security/apparmor/include/secid.h b/security/apparmor/include/secid.h index dee6fa3b6081..fa2062711b63 100644 --- a/security/apparmor/include/secid.h +++ b/security/apparmor/include/secid.h @@ -22,6 +22,9 @@ struct aa_label; /* secid value that will not be allocated */ #define AA_SECID_INVALID 0 +/* secid value that matches any other secid */ +#define AA_SECID_WILDCARD 1 + struct aa_label *aa_secid_to_label(u32 secid); int apparmor_secid_to_secctx(u32 secid, char **secdata, u32 *seclen); int apparmor_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid); diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c index 974affe50531..76491e7f4177 100644 --- a/security/apparmor/lib.c +++ b/security/apparmor/lib.c @@ -90,10 +90,12 @@ const char *aa_splitn_fqname(const char *fqname, size_t n, const char **ns_name, const char *end = fqname + n; const char *name = skipn_spaces(fqname, n); - if (!name) - return NULL; *ns_name = NULL; *ns_len = 0; + + if (!name) + return NULL; + if (name[0] == ':') { char *split = strnchr(&name[1], end - &name[1], ':'); *ns_name = skipn_spaces(&name[1], end - &name[1]); diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index aa35939443c4..42446a216f3b 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -23,6 +23,8 @@ #include <linux/sysctl.h> #include <linux/audit.h> #include <linux/user_namespace.h> +#include <linux/netfilter_ipv4.h> +#include <linux/netfilter_ipv6.h> #include <net/sock.h> #include "include/apparmor.h" @@ -114,13 +116,13 @@ static int apparmor_ptrace_access_check(struct task_struct *child, struct aa_label *tracer, *tracee; int error; - tracer = begin_current_label_crit_section(); + tracer = __begin_current_label_crit_section(); tracee = aa_get_task_label(child); error = aa_may_ptrace(tracer, tracee, (mode & PTRACE_MODE_READ) ? AA_PTRACE_READ : AA_PTRACE_TRACE); aa_put_label(tracee); - end_current_label_crit_section(tracer); + __end_current_label_crit_section(tracer); return error; } @@ -130,11 +132,11 @@ static int apparmor_ptrace_traceme(struct task_struct *parent) struct aa_label *tracer, *tracee; int error; - tracee = begin_current_label_crit_section(); + tracee = __begin_current_label_crit_section(); tracer = aa_get_task_label(parent); error = aa_may_ptrace(tracer, tracee, AA_PTRACE_TRACE); aa_put_label(tracer); - end_current_label_crit_section(tracee); + __end_current_label_crit_section(tracee); return error; } @@ -1020,6 +1022,7 @@ static int apparmor_socket_shutdown(struct socket *sock, int how) return aa_sock_perm(OP_SHUTDOWN, AA_MAY_SHUTDOWN, sock); } +#ifdef CONFIG_NETWORK_SECMARK /** * apparmor_socket_sock_recv_skb - check perms before associating skb to sk * @@ -1030,8 +1033,15 @@ static int apparmor_socket_shutdown(struct socket *sock, int how) */ static int apparmor_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) { - return 0; + struct aa_sk_ctx *ctx = SK_CTX(sk); + + if (!skb->secmark) + return 0; + + return apparmor_secmark_check(ctx->label, OP_RECVMSG, AA_MAY_RECEIVE, + skb->secmark, sk); } +#endif static struct aa_label *sk_peer_label(struct sock *sk) @@ -1126,6 +1136,20 @@ static void apparmor_sock_graft(struct sock *sk, struct socket *parent) ctx->label = aa_get_current_label(); } +#ifdef CONFIG_NETWORK_SECMARK +static int apparmor_inet_conn_request(struct sock *sk, struct sk_buff *skb, + struct request_sock *req) +{ + struct aa_sk_ctx *ctx = SK_CTX(sk); + + if (!skb->secmark) + return 0; + + return apparmor_secmark_check(ctx->label, OP_CONNECT, AA_MAY_CONNECT, + skb->secmark, sk); +} +#endif + static struct security_hook_list apparmor_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(ptrace_access_check, apparmor_ptrace_access_check), LSM_HOOK_INIT(ptrace_traceme, apparmor_ptrace_traceme), @@ -1177,12 +1201,17 @@ static struct security_hook_list apparmor_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(socket_getsockopt, apparmor_socket_getsockopt), LSM_HOOK_INIT(socket_setsockopt, apparmor_socket_setsockopt), LSM_HOOK_INIT(socket_shutdown, apparmor_socket_shutdown), +#ifdef CONFIG_NETWORK_SECMARK LSM_HOOK_INIT(socket_sock_rcv_skb, apparmor_socket_sock_rcv_skb), +#endif LSM_HOOK_INIT(socket_getpeersec_stream, apparmor_socket_getpeersec_stream), LSM_HOOK_INIT(socket_getpeersec_dgram, apparmor_socket_getpeersec_dgram), LSM_HOOK_INIT(sock_graft, apparmor_sock_graft), +#ifdef CONFIG_NETWORK_SECMARK + LSM_HOOK_INIT(inet_conn_request, apparmor_inet_conn_request), +#endif LSM_HOOK_INIT(cred_alloc_blank, apparmor_cred_alloc_blank), LSM_HOOK_INIT(cred_free, apparmor_cred_free), @@ -1538,6 +1567,97 @@ static inline int apparmor_init_sysctl(void) } #endif /* CONFIG_SYSCTL */ +#if defined(CONFIG_NETFILTER) && defined(CONFIG_NETWORK_SECMARK) +static unsigned int apparmor_ip_postroute(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct aa_sk_ctx *ctx; + struct sock *sk; + + if (!skb->secmark) + return NF_ACCEPT; + + sk = skb_to_full_sk(skb); + if (sk == NULL) + return NF_ACCEPT; + + ctx = SK_CTX(sk); + if (!apparmor_secmark_check(ctx->label, OP_SENDMSG, AA_MAY_SEND, + skb->secmark, sk)) + return NF_ACCEPT; + + return NF_DROP_ERR(-ECONNREFUSED); + +} + +static unsigned int apparmor_ipv4_postroute(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + return apparmor_ip_postroute(priv, skb, state); +} + +static unsigned int apparmor_ipv6_postroute(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + return apparmor_ip_postroute(priv, skb, state); +} + +static const struct nf_hook_ops apparmor_nf_ops[] = { + { + .hook = apparmor_ipv4_postroute, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_POST_ROUTING, + .priority = NF_IP_PRI_SELINUX_FIRST, + }, +#if IS_ENABLED(CONFIG_IPV6) + { + .hook = apparmor_ipv6_postroute, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_POST_ROUTING, + .priority = NF_IP6_PRI_SELINUX_FIRST, + }, +#endif +}; + +static int __net_init apparmor_nf_register(struct net *net) +{ + int ret; + + ret = nf_register_net_hooks(net, apparmor_nf_ops, + ARRAY_SIZE(apparmor_nf_ops)); + return ret; +} + +static void __net_exit apparmor_nf_unregister(struct net *net) +{ + nf_unregister_net_hooks(net, apparmor_nf_ops, + ARRAY_SIZE(apparmor_nf_ops)); +} + +static struct pernet_operations apparmor_net_ops = { + .init = apparmor_nf_register, + .exit = apparmor_nf_unregister, +}; + +static int __init apparmor_nf_ip_init(void) +{ + int err; + + if (!apparmor_enabled) + return 0; + + err = register_pernet_subsys(&apparmor_net_ops); + if (err) + panic("Apparmor: register_pernet_subsys: error %d\n", err); + + return 0; +} +__initcall(apparmor_nf_ip_init); +#endif + static int __init apparmor_init(void) { int error; diff --git a/security/apparmor/net.c b/security/apparmor/net.c index bb24cfa0a164..c07fde444792 100644 --- a/security/apparmor/net.c +++ b/security/apparmor/net.c @@ -18,6 +18,7 @@ #include "include/label.h" #include "include/net.h" #include "include/policy.h" +#include "include/secid.h" #include "net_names.h" @@ -146,17 +147,20 @@ int aa_af_perm(struct aa_label *label, const char *op, u32 request, u16 family, static int aa_label_sk_perm(struct aa_label *label, const char *op, u32 request, struct sock *sk) { - struct aa_profile *profile; - DEFINE_AUDIT_SK(sa, op, sk); + int error = 0; AA_BUG(!label); AA_BUG(!sk); - if (unconfined(label)) - return 0; + if (!unconfined(label)) { + struct aa_profile *profile; + DEFINE_AUDIT_SK(sa, op, sk); - return fn_for_each_confined(label, profile, - aa_profile_af_sk_perm(profile, &sa, request, sk)); + error = fn_for_each_confined(label, profile, + aa_profile_af_sk_perm(profile, &sa, request, sk)); + } + + return error; } int aa_sk_perm(const char *op, u32 request, struct sock *sk) @@ -185,3 +189,70 @@ int aa_sock_file_perm(struct aa_label *label, const char *op, u32 request, return aa_label_sk_perm(label, op, request, sock->sk); } + +#ifdef CONFIG_NETWORK_SECMARK +static int apparmor_secmark_init(struct aa_secmark *secmark) +{ + struct aa_label *label; + + if (secmark->label[0] == '*') { + secmark->secid = AA_SECID_WILDCARD; + return 0; + } + + label = aa_label_strn_parse(&root_ns->unconfined->label, + secmark->label, strlen(secmark->label), + GFP_ATOMIC, false, false); + + if (IS_ERR(label)) + return PTR_ERR(label); + + secmark->secid = label->secid; + + return 0; +} + +static int aa_secmark_perm(struct aa_profile *profile, u32 request, u32 secid, + struct common_audit_data *sa, struct sock *sk) +{ + int i, ret; + struct aa_perms perms = { }; + + if (profile->secmark_count == 0) + return 0; + + for (i = 0; i < profile->secmark_count; i++) { + if (!profile->secmark[i].secid) { + ret = apparmor_secmark_init(&profile->secmark[i]); + if (ret) + return ret; + } + + if (profile->secmark[i].secid == secid || + profile->secmark[i].secid == AA_SECID_WILDCARD) { + if (profile->secmark[i].deny) + perms.deny = ALL_PERMS_MASK; + else + perms.allow = ALL_PERMS_MASK; + + if (profile->secmark[i].audit) + perms.audit = ALL_PERMS_MASK; + } + } + + aa_apply_modes_to_perms(profile, &perms); + + return aa_check_perms(profile, &perms, request, sa, audit_net_cb); +} + +int apparmor_secmark_check(struct aa_label *label, char *op, u32 request, + u32 secid, struct sock *sk) +{ + struct aa_profile *profile; + DEFINE_AUDIT_SK(sa, op, sk); + + return fn_for_each_confined(label, profile, + aa_secmark_perm(profile, request, secid, + &sa, sk)); +} +#endif diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c index 1590e2de4e84..df9c5890a878 100644 --- a/security/apparmor/policy.c +++ b/security/apparmor/policy.c @@ -231,6 +231,9 @@ void aa_free_profile(struct aa_profile *profile) for (i = 0; i < profile->xattr_count; i++) kzfree(profile->xattrs[i]); kzfree(profile->xattrs); + for (i = 0; i < profile->secmark_count; i++) + kzfree(profile->secmark[i].label); + kzfree(profile->secmark); kzfree(profile->dirname); aa_put_dfa(profile->xmatch); aa_put_dfa(profile->policy.dfa); diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 21cb384d712a..379682e2a8d5 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -292,6 +292,19 @@ fail: return 0; } +static bool unpack_u8(struct aa_ext *e, u8 *data, const char *name) +{ + if (unpack_nameX(e, AA_U8, name)) { + if (!inbounds(e, sizeof(u8))) + return 0; + if (data) + *data = get_unaligned((u8 *)e->pos); + e->pos += sizeof(u8); + return 1; + } + return 0; +} + static bool unpack_u32(struct aa_ext *e, u32 *data, const char *name) { if (unpack_nameX(e, AA_U32, name)) { @@ -529,6 +542,49 @@ fail: return 0; } +static bool unpack_secmark(struct aa_ext *e, struct aa_profile *profile) +{ + void *pos = e->pos; + int i, size; + + if (unpack_nameX(e, AA_STRUCT, "secmark")) { + size = unpack_array(e, NULL); + + profile->secmark = kcalloc(size, sizeof(struct aa_secmark), + GFP_KERNEL); + if (!profile->secmark) + goto fail; + + profile->secmark_count = size; + + for (i = 0; i < size; i++) { + if (!unpack_u8(e, &profile->secmark[i].audit, NULL)) + goto fail; + if (!unpack_u8(e, &profile->secmark[i].deny, NULL)) + goto fail; + if (!unpack_strdup(e, &profile->secmark[i].label, NULL)) + goto fail; + } + if (!unpack_nameX(e, AA_ARRAYEND, NULL)) + goto fail; + if (!unpack_nameX(e, AA_STRUCTEND, NULL)) + goto fail; + } + + return 1; + +fail: + if (profile->secmark) { + for (i = 0; i < size; i++) + kfree(profile->secmark[i].label); + kfree(profile->secmark); + profile->secmark_count = 0; + } + + e->pos = pos; + return 0; +} + static bool unpack_rlimits(struct aa_ext *e, struct aa_profile *profile) { void *pos = e->pos; @@ -727,6 +783,11 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) goto fail; } + if (!unpack_secmark(e, profile)) { + info = "failed to unpack profile secmark rules"; + goto fail; + } + if (unpack_nameX(e, AA_STRUCT, "policydb")) { /* generic policy dfa - optional and may be NULL */ info = "failed to unpack policydb"; diff --git a/security/apparmor/secid.c b/security/apparmor/secid.c index 4ccec1bcf6f5..05373d9a3d6a 100644 --- a/security/apparmor/secid.c +++ b/security/apparmor/secid.c @@ -32,8 +32,7 @@ * secids - do not pin labels with a refcount. They rely on the label * properly updating/freeing them */ - -#define AA_FIRST_SECID 1 +#define AA_FIRST_SECID 2 static DEFINE_IDR(aa_secids); static DEFINE_SPINLOCK(secid_lock); diff --git a/sound/firewire/amdtp-stream.c b/sound/firewire/amdtp-stream.c index fcd965f1d69e..9be76c808fcc 100644 --- a/sound/firewire/amdtp-stream.c +++ b/sound/firewire/amdtp-stream.c @@ -146,53 +146,22 @@ static int apply_constraint_to_size(struct snd_pcm_hw_params *params, struct snd_interval *s = hw_param_interval(params, rule->var); const struct snd_interval *r = hw_param_interval_c(params, SNDRV_PCM_HW_PARAM_RATE); - struct snd_interval t = { - .min = s->min, .max = s->max, .integer = 1, - }; + struct snd_interval t = {0}; + unsigned int step = 0; int i; for (i = 0; i < CIP_SFC_COUNT; ++i) { - unsigned int rate = amdtp_rate_table[i]; - unsigned int step = amdtp_syt_intervals[i]; - - if (!snd_interval_test(r, rate)) - continue; - - t.min = roundup(t.min, step); - t.max = rounddown(t.max, step); + if (snd_interval_test(r, amdtp_rate_table[i])) + step = max(step, amdtp_syt_intervals[i]); } - if (snd_interval_checkempty(&t)) - return -EINVAL; + t.min = roundup(s->min, step); + t.max = rounddown(s->max, step); + t.integer = 1; return snd_interval_refine(s, &t); } -static int apply_constraint_to_rate(struct snd_pcm_hw_params *params, - struct snd_pcm_hw_rule *rule) -{ - struct snd_interval *r = - hw_param_interval(params, SNDRV_PCM_HW_PARAM_RATE); - const struct snd_interval *s = hw_param_interval_c(params, rule->deps[0]); - struct snd_interval t = { - .min = UINT_MAX, .max = 0, .integer = 1, - }; - int i; - - for (i = 0; i < CIP_SFC_COUNT; ++i) { - unsigned int step = amdtp_syt_intervals[i]; - unsigned int rate = amdtp_rate_table[i]; - - if (s->min % step || s->max % step) - continue; - - t.min = min(t.min, rate); - t.max = max(t.max, rate); - } - - return snd_interval_refine(r, &t); -} - /** * amdtp_stream_add_pcm_hw_constraints - add hw constraints for PCM substream * @s: the AMDTP stream, which must be initialized. @@ -250,24 +219,16 @@ int amdtp_stream_add_pcm_hw_constraints(struct amdtp_stream *s, */ err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, apply_constraint_to_size, NULL, + SNDRV_PCM_HW_PARAM_PERIOD_SIZE, SNDRV_PCM_HW_PARAM_RATE, -1); if (err < 0) goto end; - err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_RATE, - apply_constraint_to_rate, NULL, - SNDRV_PCM_HW_PARAM_PERIOD_SIZE, -1); - if (err < 0) - goto end; err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_BUFFER_SIZE, apply_constraint_to_size, NULL, + SNDRV_PCM_HW_PARAM_BUFFER_SIZE, SNDRV_PCM_HW_PARAM_RATE, -1); if (err < 0) goto end; - err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_RATE, - apply_constraint_to_rate, NULL, - SNDRV_PCM_HW_PARAM_BUFFER_SIZE, -1); - if (err < 0) - goto end; end: return err; } diff --git a/sound/firewire/dice/dice.c b/sound/firewire/dice/dice.c index 0f6dbcffe711..ed50b222d36e 100644 --- a/sound/firewire/dice/dice.c +++ b/sound/firewire/dice/dice.c @@ -240,8 +240,8 @@ static void dice_remove(struct fw_unit *unit) cancel_delayed_work_sync(&dice->dwork); if (dice->registered) { - /* No need to wait for releasing card object in this context. */ - snd_card_free_when_closed(dice->card); + // Block till all of ALSA character devices are released. + snd_card_free(dice->card); } mutex_destroy(&dice->mutex); diff --git a/sound/pci/ca0106/ca0106.h b/sound/pci/ca0106/ca0106.h index 04402c14cb23..9847b669cf3c 100644 --- a/sound/pci/ca0106/ca0106.h +++ b/sound/pci/ca0106/ca0106.h @@ -582,7 +582,7 @@ #define SPI_PL_BIT_R_R (2<<7) /* right channel = right */ #define SPI_PL_BIT_R_C (3<<7) /* right channel = (L+R)/2 */ #define SPI_IZD_REG 2 -#define SPI_IZD_BIT (1<<4) /* infinite zero detect */ +#define SPI_IZD_BIT (0<<4) /* infinite zero detect */ #define SPI_FMT_REG 3 #define SPI_FMT_BIT_RJ (0<<0) /* right justified mode */ diff --git a/tools/testing/selftests/powerpc/cache_shape/Makefile b/tools/testing/selftests/powerpc/cache_shape/Makefile index ede4d3dae750..689f6c8ebcd8 100644 --- a/tools/testing/selftests/powerpc/cache_shape/Makefile +++ b/tools/testing/selftests/powerpc/cache_shape/Makefile @@ -1,12 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 -TEST_PROGS := cache_shape - -all: $(TEST_PROGS) - -$(TEST_PROGS): ../harness.c ../utils.c +TEST_GEN_PROGS := cache_shape top_srcdir = ../../../../.. include ../../lib.mk -clean: - rm -f $(TEST_PROGS) *.o +$(TEST_GEN_PROGS): ../harness.c ../utils.c diff --git a/tools/testing/selftests/powerpc/pmu/ebb/Makefile b/tools/testing/selftests/powerpc/pmu/ebb/Makefile index bd5dfa509272..23f4caf48ffc 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/Makefile +++ b/tools/testing/selftests/powerpc/pmu/ebb/Makefile @@ -5,6 +5,9 @@ noarg: # The EBB handler is 64-bit code and everything links against it CFLAGS += -m64 +# Toolchains may build PIE by default which breaks the assembly +LDFLAGS += -no-pie + TEST_GEN_PROGS := reg_access_test event_attributes_test cycles_test \ cycles_with_freeze_test pmc56_overflow_test \ ebb_vs_cpu_event_test cpu_event_vs_ebb_test \ diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile index 9b35ca8e8f13..8d3f006c98cc 100644 --- a/tools/testing/selftests/powerpc/ptrace/Makefile +++ b/tools/testing/selftests/powerpc/ptrace/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -TEST_PROGS := ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr \ +TEST_GEN_PROGS := ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr \ ptrace-tar ptrace-tm-tar ptrace-tm-spd-tar ptrace-vsx ptrace-tm-vsx \ ptrace-tm-spd-vsx ptrace-tm-spr ptrace-hwbreak ptrace-pkey core-pkey \ perf-hwbreak ptrace-syscall @@ -7,14 +7,9 @@ TEST_PROGS := ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr \ top_srcdir = ../../../../.. include ../../lib.mk -all: $(TEST_PROGS) - CFLAGS += -m64 -I../../../../../usr/include -I../tm -mhtm -fno-pie -ptrace-pkey core-pkey: child.h -ptrace-pkey core-pkey: LDLIBS += -pthread - -$(TEST_PROGS): ../harness.c ../utils.c ../lib/reg.S ptrace.h +$(OUTPUT)/ptrace-pkey $(OUTPUT)/core-pkey: child.h +$(OUTPUT)/ptrace-pkey $(OUTPUT)/core-pkey: LDLIBS += -pthread -clean: - rm -f $(TEST_PROGS) *.o +$(TEST_GEN_PROGS): ../harness.c ../utils.c ../lib/reg.S ptrace.h diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c index 327fa943c7f3..dbdffa2e2c82 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c @@ -67,8 +67,8 @@ trans: "3: ;" : [res] "=r" (result), [texasr] "=r" (texasr) : [gpr_1]"i"(GPR_1), [gpr_2]"i"(GPR_2), [gpr_4]"i"(GPR_4), - [sprn_texasr] "i" (SPRN_TEXASR), [flt_1] "r" (&a), - [flt_2] "r" (&b), [flt_4] "r" (&d) + [sprn_texasr] "i" (SPRN_TEXASR), [flt_1] "b" (&a), + [flt_4] "b" (&d) : "memory", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", diff --git a/tools/testing/selftests/powerpc/security/Makefile b/tools/testing/selftests/powerpc/security/Makefile index 44690f1bb26a..85861c46b445 100644 --- a/tools/testing/selftests/powerpc/security/Makefile +++ b/tools/testing/selftests/powerpc/security/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0+ TEST_GEN_PROGS := rfi_flush +top_srcdir = ../../../../.. CFLAGS += -I../../../../../usr/include diff --git a/tools/testing/selftests/powerpc/security/rfi_flush.c b/tools/testing/selftests/powerpc/security/rfi_flush.c index 564ed45bbf73..0a7d0afb26b8 100644 --- a/tools/testing/selftests/powerpc/security/rfi_flush.c +++ b/tools/testing/selftests/powerpc/security/rfi_flush.c @@ -49,6 +49,7 @@ int rfi_flush_test(void) struct perf_event_read v; __u64 l1d_misses_total = 0; unsigned long iterations = 100000, zero_size = 24 * 1024; + unsigned long l1d_misses_expected; int rfi_flush_org, rfi_flush; SKIP_IF(geteuid() != 0); @@ -71,6 +72,12 @@ int rfi_flush_test(void) iter = repetitions; + /* + * We expect to see l1d miss for each cacheline access when rfi_flush + * is set. Allow a small variation on this. + */ + l1d_misses_expected = iterations * (zero_size / CACHELINE_SIZE - 2); + again: FAIL_IF(perf_event_reset(fd)); @@ -78,10 +85,9 @@ again: FAIL_IF(read(fd, &v, sizeof(v)) != sizeof(v)); - /* Expect at least zero_size/CACHELINE_SIZE misses per iteration */ - if (v.l1d_misses >= (iterations * zero_size / CACHELINE_SIZE) && rfi_flush) + if (rfi_flush && v.l1d_misses >= l1d_misses_expected) passes++; - else if (v.l1d_misses < iterations && !rfi_flush) + else if (!rfi_flush && v.l1d_misses < (l1d_misses_expected / 2)) passes++; l1d_misses_total += v.l1d_misses; @@ -92,13 +98,15 @@ again: if (passes < repetitions) { printf("FAIL (L1D misses with rfi_flush=%d: %llu %c %lu) [%d/%d failures]\n", rfi_flush, l1d_misses_total, rfi_flush ? '<' : '>', - rfi_flush ? (repetitions * iterations * zero_size / CACHELINE_SIZE) : iterations, + rfi_flush ? repetitions * l1d_misses_expected : + repetitions * l1d_misses_expected / 2, repetitions - passes, repetitions); rc = 1; } else printf("PASS (L1D misses with rfi_flush=%d: %llu %c %lu) [%d/%d pass]\n", rfi_flush, l1d_misses_total, rfi_flush ? '>' : '<', - rfi_flush ? (repetitions * iterations * zero_size / CACHELINE_SIZE) : iterations, + rfi_flush ? repetitions * l1d_misses_expected : + repetitions * l1d_misses_expected / 2, passes, repetitions); if (rfi_flush == rfi_flush_org) { diff --git a/tools/testing/selftests/powerpc/signal/Makefile b/tools/testing/selftests/powerpc/signal/Makefile index 1fca25c6ace0..209a958dca12 100644 --- a/tools/testing/selftests/powerpc/signal/Makefile +++ b/tools/testing/selftests/powerpc/signal/Makefile @@ -1,15 +1,10 @@ # SPDX-License-Identifier: GPL-2.0 -TEST_PROGS := signal signal_tm - -all: $(TEST_PROGS) - -$(TEST_PROGS): ../harness.c ../utils.c signal.S +TEST_GEN_PROGS := signal signal_tm CFLAGS += -maltivec -signal_tm: CFLAGS += -mhtm +$(OUTPUT)/signal_tm: CFLAGS += -mhtm top_srcdir = ../../../../.. include ../../lib.mk -clean: - rm -f $(TEST_PROGS) *.o +$(TEST_GEN_PROGS): ../harness.c ../utils.c signal.S diff --git a/tools/testing/selftests/powerpc/switch_endian/Makefile b/tools/testing/selftests/powerpc/switch_endian/Makefile index fcd2dcb8972b..bdc081afedb0 100644 --- a/tools/testing/selftests/powerpc/switch_endian/Makefile +++ b/tools/testing/selftests/powerpc/switch_endian/Makefile @@ -8,6 +8,7 @@ EXTRA_CLEAN = $(OUTPUT)/*.o $(OUTPUT)/check-reversed.S top_srcdir = ../../../../.. include ../../lib.mk +$(OUTPUT)/switch_endian_test: ASFLAGS += -I $(OUTPUT) $(OUTPUT)/switch_endian_test: $(OUTPUT)/check-reversed.S $(OUTPUT)/check-reversed.o: $(OUTPUT)/check.o diff --git a/tools/testing/selftests/powerpc/utils.c b/tools/testing/selftests/powerpc/utils.c index 43c342845be0..ed62f4153d3e 100644 --- a/tools/testing/selftests/powerpc/utils.c +++ b/tools/testing/selftests/powerpc/utils.c @@ -25,7 +25,6 @@ #include "utils.h" static char auxv[4096]; -extern unsigned int dscr_insn[]; int read_auxv(char *buf, ssize_t buf_size) { @@ -247,7 +246,8 @@ static void sigill_handler(int signr, siginfo_t *info, void *unused) ucontext_t *ctx = (ucontext_t *)unused; unsigned long *pc = &UCONTEXT_NIA(ctx); - if (*pc == (unsigned long)&dscr_insn) { + /* mtspr 3,RS to check for move to DSCR below */ + if ((*((unsigned int *)*pc) & 0xfc1fffff) == 0x7c0303a6) { if (!warned++) printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n"); *pc += 4; @@ -271,5 +271,5 @@ void set_dscr(unsigned long val) init = 1; } - asm volatile("dscr_insn: mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR)); + asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR)); } |