From 4e55d28084cc1c94c62b63ece1eeeeb29dc4941e Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sat, 14 Jul 2018 12:52:10 -0400 Subject: net: 6lowpan: fix reserved space for single frames commit ac74f87c789af40936a80131c4759f3e72579c3a upstream. This patch fixes patch add handling to take care tail and headroom for single 6lowpan frames. We need to be sure we have a skb with the right head and tailroom for single frames. This patch do it by using skb_copy_expand() if head and tailroom is not enough allocated by upper layer. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=195059 Reported-by: David Palma Reported-by: Rabi Narayan Sahoo Cc: stable@vger.kernel.org Signed-off-by: Alexander Aring Signed-off-by: Stefan Schmidt Signed-off-by: Greg Kroah-Hartman --- net/ieee802154/6lowpan/tx.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/net/ieee802154/6lowpan/tx.c b/net/ieee802154/6lowpan/tx.c index d4353faced35..a10db45b2e1e 100644 --- a/net/ieee802154/6lowpan/tx.c +++ b/net/ieee802154/6lowpan/tx.c @@ -265,9 +265,24 @@ netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *ldev) /* We must take a copy of the skb before we modify/replace the ipv6 * header as the header could be used elsewhere */ - skb = skb_unshare(skb, GFP_ATOMIC); - if (!skb) - return NET_XMIT_DROP; + if (unlikely(skb_headroom(skb) < ldev->needed_headroom || + skb_tailroom(skb) < ldev->needed_tailroom)) { + struct sk_buff *nskb; + + nskb = skb_copy_expand(skb, ldev->needed_headroom, + ldev->needed_tailroom, GFP_ATOMIC); + if (likely(nskb)) { + consume_skb(skb); + skb = nskb; + } else { + kfree_skb(skb); + return NET_XMIT_DROP; + } + } else { + skb = skb_unshare(skb, GFP_ATOMIC); + if (!skb) + return NET_XMIT_DROP; + } ret = lowpan_header(skb, ldev, &dgram_size, &dgram_offset); if (ret < 0) { -- cgit v1.2.3 From aeca800e562ddca7449f8d39732f4cd2b5a41dce Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 2 Jul 2018 16:32:03 -0400 Subject: net: mac802154: tx: expand tailroom if necessary commit f9c52831133050c6b82aa8b6831c92da2bbf2a0b upstream. This patch is necessary if case of AF_PACKET or other socket interface which I am aware of it and didn't allocated the necessary room. Reported-by: David Palma Reported-by: Rabi Narayan Sahoo Cc: stable@vger.kernel.org Signed-off-by: Alexander Aring Signed-off-by: Stefan Schmidt Signed-off-by: Greg Kroah-Hartman --- net/mac802154/tx.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/net/mac802154/tx.c b/net/mac802154/tx.c index 3827f359b336..9e1ff9d4cf2d 100644 --- a/net/mac802154/tx.c +++ b/net/mac802154/tx.c @@ -72,8 +72,21 @@ ieee802154_tx(struct ieee802154_local *local, struct sk_buff *skb) int ret; if (!(local->hw.flags & IEEE802154_HW_TX_OMIT_CKSUM)) { - u16 crc = crc_ccitt(0, skb->data, skb->len); + struct sk_buff *nskb; + u16 crc; + + if (unlikely(skb_tailroom(skb) < IEEE802154_FCS_LEN)) { + nskb = skb_copy_expand(skb, 0, IEEE802154_FCS_LEN, + GFP_ATOMIC); + if (likely(nskb)) { + consume_skb(skb); + skb = nskb; + } else { + goto err_tx; + } + } + crc = crc_ccitt(0, skb->data, skb->len); put_unaligned_le16(crc, skb_put(skb, 2)); } -- cgit v1.2.3 From c9f7c99fc92dc8a36a7d8f2a8af0457bc9aa20a0 Mon Sep 17 00:00:00 2001 From: Chirantan Ekbote Date: Mon, 16 Jul 2018 17:35:29 -0700 Subject: 9p/net: Fix zero-copy path in the 9p virtio transport commit d28c756caee6e414d9ba367d0b92da24145af2a8 upstream. The zero-copy optimization when reading or writing large chunks of data is quite useful. However, the 9p messages created through the zero-copy write path have an incorrect message size: it should be the size of the header + size of the data being written but instead it's just the size of the header. This only works if the server ignores the size field of the message and otherwise breaks the framing of the protocol. Fix this by re-writing the message size field with the correct value. Tested by running `dd if=/dev/zero of=out bs=4k count=1` inside a virtio-9p mount. Link: http://lkml.kernel.org/r/20180717003529.114368-1-chirantan@chromium.org Signed-off-by: Chirantan Ekbote Reviewed-by: Greg Kurz Tested-by: Greg Kurz Cc: Dylan Reid Cc: Guenter Roeck Cc: stable@vger.kernel.org Signed-off-by: Dominique Martinet Signed-off-by: Greg Kroah-Hartman --- net/9p/trans_virtio.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 2ddeecca5b12..cb2276b91b3c 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -409,6 +409,7 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req, p9_debug(P9_DEBUG_TRANS, "virtio request\n"); if (uodata) { + __le32 sz; int n = p9_get_mapped_pages(chan, &out_pages, uodata, outlen, &offs, &need_drop); if (n < 0) @@ -419,6 +420,12 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req, memcpy(&req->tc->sdata[req->tc->size - 4], &v, 4); outlen = n; } + /* The size field of the message must include the length of the + * header and the length of the data. We didn't actually know + * the length of the data until this point so add it in now. + */ + sz = cpu_to_le32(req->tc->size + outlen); + memcpy(&req->tc->sdata[0], &sz, sizeof(sz)); } else if (uidata) { int n = p9_get_mapped_pages(chan, &in_pages, uidata, inlen, &offs, &need_drop); -- cgit v1.2.3 From b4de9ac2c750127c2d8bc9a737eca611405e1171 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Fri, 7 Sep 2018 01:13:40 +0100 Subject: net: lan78xx: Fix misplaced tasklet_schedule() call Commit 136f55f66019 ("net: lan78xx: fix rx handling before first packet is send") was not correctly backported to 4.4. The call to tasklet_schedule() belongs in lan78xx_link_reset(). Fixes: d1fc12d8475c ("net: lan78xx: fix rx handling before first packet is send") Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/lan78xx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index acec4b565511..1aede726052c 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -902,6 +902,8 @@ static int lan78xx_link_reset(struct lan78xx_net *dev) ret = lan78xx_update_flowcontrol(dev, ecmd.duplex, ladv, radv); netif_carrier_on(dev->net); + + tasklet_schedule(&dev->bh); } return ret; @@ -1361,8 +1363,6 @@ static void lan78xx_init_mac_address(struct lan78xx_net *dev) netif_dbg(dev, ifup, dev->net, "MAC address set to random addr"); } - - tasklet_schedule(&dev->bh); } ret = lan78xx_write_reg(dev, MAF_LO(0), addr_lo); -- cgit v1.2.3 From c2b736ff27b4c5bdfb7d66559383f851e40ff495 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 10 Aug 2018 11:13:52 +0200 Subject: spi: davinci: fix a NULL pointer dereference commit 563a53f3906a6b43692498e5b3ae891fac93a4af upstream. On non-OF systems spi->controlled_data may be NULL. This causes a NULL pointer derefence on dm365-evm. Signed-off-by: Bartosz Golaszewski Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-davinci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-davinci.c b/drivers/spi/spi-davinci.c index c872a2e54c4b..2603bee2ce07 100644 --- a/drivers/spi/spi-davinci.c +++ b/drivers/spi/spi-davinci.c @@ -220,7 +220,7 @@ static void davinci_spi_chipselect(struct spi_device *spi, int value) pdata = &dspi->pdata; /* program delay transfers if tx_delay is non zero */ - if (spicfg->wdelay) + if (spicfg && spicfg->wdelay) spidat1 |= SPIDAT1_WDEL; /* -- cgit v1.2.3 From 182e963432d867384f2e55487ec60ca7a9f99cd1 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 2 May 2018 20:50:21 +0100 Subject: drm/i915/userptr: reject zero user_size commit c11c7bfd213495784b22ef82a69b6489f8d0092f upstream. Operating on a zero sized GEM userptr object will lead to explosions. Fixes: 5cc9ed4b9a7a ("drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl") Testcase: igt/gem_userptr_blits/input-checking Signed-off-by: Matthew Auld Cc: Chris Wilson Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20180502195021.30900-1-matthew.auld@intel.com Cc: Loic Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_gem_userptr.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index 19fb0bddc1cd..359fe2b8bb8a 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -842,6 +842,9 @@ i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file I915_USERPTR_UNSYNCHRONIZED)) return -EINVAL; + if (!args->user_size) + return -EINVAL; + if (offset_in_page(args->user_ptr | args->user_size)) return -EINVAL; -- cgit v1.2.3 From c9fadf27006ba098fcd46e7e1f0fb1daedce33d4 Mon Sep 17 00:00:00 2001 From: Hari Bathini Date: Tue, 7 Aug 2018 02:12:45 +0530 Subject: powerpc/fadump: handle crash memory ranges array index overflow commit 1bd6a1c4b80a28d975287630644e6b47d0f977a5 upstream. Crash memory ranges is an array of memory ranges of the crashing kernel to be exported as a dump via /proc/vmcore file. The size of the array is set based on INIT_MEMBLOCK_REGIONS, which works alright in most cases where memblock memory regions count is less than INIT_MEMBLOCK_REGIONS value. But this count can grow beyond INIT_MEMBLOCK_REGIONS value since commit 142b45a72e22 ("memblock: Add array resizing support"). On large memory systems with a few DLPAR operations, the memblock memory regions count could be larger than INIT_MEMBLOCK_REGIONS value. On such systems, registering fadump results in crash or other system failures like below: task: c00007f39a290010 ti: c00000000b738000 task.ti: c00000000b738000 NIP: c000000000047df4 LR: c0000000000f9e58 CTR: c00000000010f180 REGS: c00000000b73b570 TRAP: 0300 Tainted: G L X (4.4.140+) MSR: 8000000000009033 CR: 22004484 XER: 20000000 CFAR: c000000000008500 DAR: 000007a450000000 DSISR: 40000000 SOFTE: 0 ... NIP [c000000000047df4] smp_send_reschedule+0x24/0x80 LR [c0000000000f9e58] resched_curr+0x138/0x160 Call Trace: resched_curr+0x138/0x160 (unreliable) check_preempt_curr+0xc8/0xf0 ttwu_do_wakeup+0x38/0x150 try_to_wake_up+0x224/0x4d0 __wake_up_common+0x94/0x100 ep_poll_callback+0xac/0x1c0 __wake_up_common+0x94/0x100 __wake_up_sync_key+0x70/0xa0 sock_def_readable+0x58/0xa0 unix_stream_sendmsg+0x2dc/0x4c0 sock_sendmsg+0x68/0xa0 ___sys_sendmsg+0x2cc/0x2e0 __sys_sendmsg+0x5c/0xc0 SyS_socketcall+0x36c/0x3f0 system_call+0x3c/0x100 as array index overflow is not checked for while setting up crash memory ranges causing memory corruption. To resolve this issue, dynamically allocate memory for crash memory ranges and resize it incrementally, in units of pagesize, on hitting array size limit. Fixes: 2df173d9e85d ("fadump: Initialize elfcore header and add PT_LOAD program headers.") Cc: stable@vger.kernel.org # v3.4+ Signed-off-by: Hari Bathini Reviewed-by: Mahesh Salgaonkar [mpe: Just use PAGE_SIZE directly, fixup variable placement] Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/fadump.h | 3 -- arch/powerpc/kernel/fadump.c | 92 +++++++++++++++++++++++++++++++++------ 2 files changed, 78 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h index 493e72f64b35..5768ec3c1781 100644 --- a/arch/powerpc/include/asm/fadump.h +++ b/arch/powerpc/include/asm/fadump.h @@ -194,9 +194,6 @@ struct fadump_crash_info_header { struct cpumask cpu_online_mask; }; -/* Crash memory ranges */ -#define INIT_CRASHMEM_RANGES (INIT_MEMBLOCK_REGIONS + 2) - struct fad_crash_memory_ranges { unsigned long long base; unsigned long long size; diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 791d4c3329c3..c3c835290131 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -48,8 +49,10 @@ static struct fadump_mem_struct fdm; static const struct fadump_mem_struct *fdm_active; static DEFINE_MUTEX(fadump_mutex); -struct fad_crash_memory_ranges crash_memory_ranges[INIT_CRASHMEM_RANGES]; +struct fad_crash_memory_ranges *crash_memory_ranges; +int crash_memory_ranges_size; int crash_mem_ranges; +int max_crash_mem_ranges; /* Scan the Firmware Assisted dump configuration details. */ int __init early_init_dt_scan_fw_dump(unsigned long node, @@ -726,38 +729,88 @@ static int __init process_fadump(const struct fadump_mem_struct *fdm_active) return 0; } -static inline void fadump_add_crash_memory(unsigned long long base, - unsigned long long end) +static void free_crash_memory_ranges(void) +{ + kfree(crash_memory_ranges); + crash_memory_ranges = NULL; + crash_memory_ranges_size = 0; + max_crash_mem_ranges = 0; +} + +/* + * Allocate or reallocate crash memory ranges array in incremental units + * of PAGE_SIZE. + */ +static int allocate_crash_memory_ranges(void) +{ + struct fad_crash_memory_ranges *new_array; + u64 new_size; + + new_size = crash_memory_ranges_size + PAGE_SIZE; + pr_debug("Allocating %llu bytes of memory for crash memory ranges\n", + new_size); + + new_array = krealloc(crash_memory_ranges, new_size, GFP_KERNEL); + if (new_array == NULL) { + pr_err("Insufficient memory for setting up crash memory ranges\n"); + free_crash_memory_ranges(); + return -ENOMEM; + } + + crash_memory_ranges = new_array; + crash_memory_ranges_size = new_size; + max_crash_mem_ranges = (new_size / + sizeof(struct fad_crash_memory_ranges)); + return 0; +} + +static inline int fadump_add_crash_memory(unsigned long long base, + unsigned long long end) { if (base == end) - return; + return 0; + + if (crash_mem_ranges == max_crash_mem_ranges) { + int ret; + + ret = allocate_crash_memory_ranges(); + if (ret) + return ret; + } pr_debug("crash_memory_range[%d] [%#016llx-%#016llx], %#llx bytes\n", crash_mem_ranges, base, end - 1, (end - base)); crash_memory_ranges[crash_mem_ranges].base = base; crash_memory_ranges[crash_mem_ranges].size = end - base; crash_mem_ranges++; + return 0; } -static void fadump_exclude_reserved_area(unsigned long long start, +static int fadump_exclude_reserved_area(unsigned long long start, unsigned long long end) { unsigned long long ra_start, ra_end; + int ret = 0; ra_start = fw_dump.reserve_dump_area_start; ra_end = ra_start + fw_dump.reserve_dump_area_size; if ((ra_start < end) && (ra_end > start)) { if ((start < ra_start) && (end > ra_end)) { - fadump_add_crash_memory(start, ra_start); - fadump_add_crash_memory(ra_end, end); + ret = fadump_add_crash_memory(start, ra_start); + if (ret) + return ret; + + ret = fadump_add_crash_memory(ra_end, end); } else if (start < ra_start) { - fadump_add_crash_memory(start, ra_start); + ret = fadump_add_crash_memory(start, ra_start); } else if (ra_end < end) { - fadump_add_crash_memory(ra_end, end); + ret = fadump_add_crash_memory(ra_end, end); } } else - fadump_add_crash_memory(start, end); + ret = fadump_add_crash_memory(start, end); + + return ret; } static int fadump_init_elfcore_header(char *bufp) @@ -793,10 +846,11 @@ static int fadump_init_elfcore_header(char *bufp) * Traverse through memblock structure and setup crash memory ranges. These * ranges will be used create PT_LOAD program headers in elfcore header. */ -static void fadump_setup_crash_memory_ranges(void) +static int fadump_setup_crash_memory_ranges(void) { struct memblock_region *reg; unsigned long long start, end; + int ret; pr_debug("Setup crash memory ranges.\n"); crash_mem_ranges = 0; @@ -807,7 +861,9 @@ static void fadump_setup_crash_memory_ranges(void) * specified during fadump registration. We need to create a separate * program header for this chunk with the correct offset. */ - fadump_add_crash_memory(RMA_START, fw_dump.boot_memory_size); + ret = fadump_add_crash_memory(RMA_START, fw_dump.boot_memory_size); + if (ret) + return ret; for_each_memblock(memory, reg) { start = (unsigned long long)reg->base; @@ -816,8 +872,12 @@ static void fadump_setup_crash_memory_ranges(void) start = fw_dump.boot_memory_size; /* add this range excluding the reserved dump area. */ - fadump_exclude_reserved_area(start, end); + ret = fadump_exclude_reserved_area(start, end); + if (ret) + return ret; } + + return 0; } /* @@ -941,6 +1001,7 @@ static void register_fadump(void) { unsigned long addr; void *vaddr; + int ret; /* * If no memory is reserved then we can not register for firmware- @@ -949,7 +1010,9 @@ static void register_fadump(void) if (!fw_dump.reserve_dump_area_size) return; - fadump_setup_crash_memory_ranges(); + ret = fadump_setup_crash_memory_ranges(); + if (ret) + return ret; addr = be64_to_cpu(fdm.rmr_region.destination_address) + be64_to_cpu(fdm.rmr_region.source_len); /* Initialize fadump crash info header. */ @@ -1028,6 +1091,7 @@ void fadump_cleanup(void) } else if (fw_dump.dump_registered) { /* Un-register Firmware-assisted dump if it was registered. */ fadump_unregister_dump(&fdm); + free_crash_memory_ranges(); } } -- cgit v1.2.3 From fa4cd57290cf0f227e82473550868ddde0d1f074 Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Tue, 7 Aug 2018 19:46:46 +0530 Subject: powerpc/pseries: Fix endianness while restoring of r3 in MCE handler. commit cd813e1cd7122f2c261dce5b54d1e0c97f80e1a5 upstream. During Machine Check interrupt on pseries platform, register r3 points RTAS extended event log passed by hypervisor. Since hypervisor uses r3 to pass pointer to rtas log, it stores the original r3 value at the start of the memory (first 8 bytes) pointed by r3. Since hypervisor stores this info and rtas log is in BE format, linux should make sure to restore r3 value in correct endian format. Without this patch when MCE handler, after recovery, returns to code that that caused the MCE may end up with Data SLB access interrupt for invalid address followed by kernel panic or hang. Severe Machine check interrupt [Recovered] NIP [d00000000ca301b8]: init_module+0x1b8/0x338 [bork_kernel] Initiator: CPU Error type: SLB [Multihit] Effective address: d00000000ca70000 cpu 0xa: Vector: 380 (Data SLB Access) at [c0000000fc7775b0] pc: c0000000009694c0: vsnprintf+0x80/0x480 lr: c0000000009698e0: vscnprintf+0x20/0x60 sp: c0000000fc777830 msr: 8000000002009033 dar: a803a30c000000d0 current = 0xc00000000bc9ef00 paca = 0xc00000001eca5c00 softe: 3 irq_happened: 0x01 pid = 8860, comm = insmod vscnprintf+0x20/0x60 vprintk_emit+0xb4/0x4b0 vprintk_func+0x5c/0xd0 printk+0x38/0x4c init_module+0x1c0/0x338 [bork_kernel] do_one_initcall+0x54/0x230 do_init_module+0x8c/0x248 load_module+0x12b8/0x15b0 sys_finit_module+0xa8/0x110 system_call+0x58/0x6c --- Exception: c00 (System Call) at 00007fff8bda0644 SP (7fffdfbfe980) is in userspace This patch fixes this issue. Fixes: a08a53ea4c97 ("powerpc/le: Enable RTAS events support") Cc: stable@vger.kernel.org # v3.15+ Reviewed-by: Nicholas Piggin Signed-off-by: Mahesh Salgaonkar Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/pseries/ras.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 3b6647e574b6..f5313a78e5d6 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -300,7 +300,7 @@ static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs) } savep = __va(regs->gpr[3]); - regs->gpr[3] = savep[0]; /* restore original r3 */ + regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */ /* If it isn't an extended log we can use the per cpu 64bit buffer */ h = (struct rtas_error_log *)&savep[1]; -- cgit v1.2.3 From 2c9ffc9d440d31efa23136f5a64eebccfc2ec553 Mon Sep 17 00:00:00 2001 From: piaojun Date: Wed, 25 Jul 2018 11:13:16 +0800 Subject: fs/9p/xattr.c: catch the error of p9_client_clunk when setting xattr failed commit 3111784bee81591ea2815011688d28b65df03627 upstream. In my testing, v9fs_fid_xattr_set will return successfully even if the backend ext4 filesystem has no space to store xattr key-value. That will cause inconsistent behavior between front end and back end. The reason is that lsetxattr will be triggered by p9_client_clunk, and unfortunately we did not catch the error. This patch will catch the error to notify upper caller. p9_client_clunk (in 9p) p9_client_rpc(clnt, P9_TCLUNK, "d", fid->fid); v9fs_clunk (in qemu) put_fid free_fid v9fs_xattr_fid_clunk v9fs_co_lsetxattr s->ops->lsetxattr ext4_xattr_user_set (in host ext4 filesystem) Link: http://lkml.kernel.org/r/5B57EACC.2060900@huawei.com Signed-off-by: Jun Piao Cc: Eric Van Hensbergen Cc: Ron Minnich Cc: Latchesar Ionkov Cc: Andrew Morton Cc: stable@vger.kernel.org Signed-off-by: Dominique Martinet Signed-off-by: Greg Kroah-Hartman --- fs/9p/xattr.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c index e3d026ac382e..f35168ce426b 100644 --- a/fs/9p/xattr.c +++ b/fs/9p/xattr.c @@ -107,7 +107,7 @@ int v9fs_fid_xattr_set(struct p9_fid *fid, const char *name, { struct kvec kvec = {.iov_base = (void *)value, .iov_len = value_len}; struct iov_iter from; - int retval; + int retval, err; iov_iter_kvec(&from, WRITE | ITER_KVEC, &kvec, 1, value_len); @@ -128,7 +128,9 @@ int v9fs_fid_xattr_set(struct p9_fid *fid, const char *name, retval); else p9_client_write(fid, 0, &from, &retval); - p9_client_clunk(fid); + err = p9_client_clunk(fid); + if (!retval && err) + retval = err; return retval; } -- cgit v1.2.3 From 1d2e1e399f86ca085eb85a9f68c20cf4fbf2c79d Mon Sep 17 00:00:00 2001 From: jiangyiwen Date: Fri, 3 Aug 2018 12:11:34 +0800 Subject: 9p/virtio: fix off-by-one error in sg list bounds check commit 23cba9cbde0bba05d772b335fe5f66aa82b9ad19 upstream. Because the value of limit is VIRTQUEUE_NUM, if index is equal to limit, it will cause sg array out of bounds, so correct the judgement of BUG_ON. Link: http://lkml.kernel.org/r/5B63D5F6.6080109@huawei.com Signed-off-by: Yiwen Jiang Reported-By: Dan Carpenter Acked-by: Jun Piao Cc: stable@vger.kernel.org Signed-off-by: Dominique Martinet Signed-off-by: Greg Kroah-Hartman --- net/9p/trans_virtio.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index cb2276b91b3c..669198ac73db 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -192,7 +192,7 @@ static int pack_sg_list(struct scatterlist *sg, int start, s = rest_of_page(data); if (s > count) s = count; - BUG_ON(index > limit); + BUG_ON(index >= limit); /* Make sure we don't terminate early. */ sg_unmark_end(&sg[index]); sg_set_buf(&sg[index++], data, s); @@ -237,6 +237,7 @@ pack_sg_list_p(struct scatterlist *sg, int start, int limit, s = PAGE_SIZE - data_off; if (s > count) s = count; + BUG_ON(index >= limit); /* Make sure we don't terminate early. */ sg_unmark_end(&sg[index]); sg_set_page(&sg[index++], pdata[i++], s, data_off); -- cgit v1.2.3 From 34cc7cf15e16a34447581a5956bf1a434fcb190f Mon Sep 17 00:00:00 2001 From: Tomas Bortoli Date: Tue, 10 Jul 2018 00:29:43 +0200 Subject: net/9p/client.c: version pointer uninitialized commit 7913690dcc5e18e235769fd87c34143072f5dbea upstream. The p9_client_version() does not initialize the version pointer. If the call to p9pdu_readf() returns an error and version has not been allocated in p9pdu_readf(), then the program will jump to the "error" label and will try to free the version pointer. If version is not initialized, free() will be called with uninitialized, garbage data and will provoke a crash. Link: http://lkml.kernel.org/r/20180709222943.19503-1-tomasbortoli@gmail.com Signed-off-by: Tomas Bortoli Reported-by: syzbot+65c6b72f284a39d416b4@syzkaller.appspotmail.com Reviewed-by: Jun Piao Reviewed-by: Yiwen Jiang Cc: Eric Van Hensbergen Cc: Ron Minnich Cc: Latchesar Ionkov Signed-off-by: Andrew Morton Cc: stable@vger.kernel.org Signed-off-by: Dominique Martinet Signed-off-by: Greg Kroah-Hartman --- net/9p/client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/9p/client.c b/net/9p/client.c index 3ff26eb1ea20..ed8738c4dc09 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -931,7 +931,7 @@ static int p9_client_version(struct p9_client *c) { int err = 0; struct p9_req_t *req; - char *version; + char *version = NULL; int msize; p9_debug(P9_DEBUG_9P, ">>> TVERSION msize %d protocol %d\n", -- cgit v1.2.3 From 06d7a39a9c397a62615122d21dafa4c16506e7e2 Mon Sep 17 00:00:00 2001 From: Tomas Bortoli Date: Fri, 20 Jul 2018 11:27:30 +0200 Subject: net/9p/trans_fd.c: fix race-condition by flushing workqueue before the kfree() commit 430ac66eb4c5b5c4eb846b78ebf65747510b30f1 upstream. The patch adds the flush in p9_mux_poll_stop() as it the function used by p9_conn_destroy(), in turn called by p9_fd_close() to stop the async polling associated with the data regarding the connection. Link: http://lkml.kernel.org/r/20180720092730.27104-1-tomasbortoli@gmail.com Signed-off-by: Tomas Bortoli Reported-by: syzbot+39749ed7d9ef6dfb23f6@syzkaller.appspotmail.com To: Eric Van Hensbergen To: Ron Minnich To: Latchesar Ionkov Cc: Yiwen Jiang Cc: stable@vger.kernel.org Signed-off-by: Dominique Martinet Signed-off-by: Greg Kroah-Hartman --- net/9p/trans_fd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index bced8c074c12..c923221bb8b9 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -185,6 +185,8 @@ static void p9_mux_poll_stop(struct p9_conn *m) spin_lock_irqsave(&p9_poll_lock, flags); list_del_init(&m->poll_pending_link); spin_unlock_irqrestore(&p9_poll_lock, flags); + + flush_work(&p9_poll_work); } /** -- cgit v1.2.3 From 15898df477269c981dc1ae5afa39e1bb65e1db0a Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Fri, 7 Sep 2018 11:13:07 +0200 Subject: x86/mm/pat: Fix L1TF stable backport for CPA, 2nd call Mostly recycling the commit log from adaba23ccd7d which fixed populate_pmd, but did not fix populate_pud. The same problem exists there. Stable trees reverted the following patch: Revert "x86/mm/pat: Ensure cpa->pfn only contains page frame numbers" This reverts commit 87e2bd898d3a79a8c609f183180adac47879a2a4 which is commit edc3b9129cecd0f0857112136f5b8b1bc1d45918 upstream. but the L1TF patch 02ff2769edbc backported here x86/mm/pat: Make set_memory_np() L1TF safe commit 958f79b9ee55dfaf00c8106ed1c22a2919e0028b upstream set_memory_np() is used to mark kernel mappings not present, but it has it's own open coded mechanism which does not have the L1TF protection of inverting the address bits. assumed that cpa->pfn contains a PFN. With the above patch reverted it does not, which causes the PUD to be set to an incorrect address shifted by 12 bits, which can cause various failures. Convert the address to a PFN before passing it to pud_pfn(). This is a 4.4 stable only patch to fix the L1TF patches backport there. Cc: stable@vger.kernel.org # 4.4-only Cc: Andi Kleen Signed-off-by: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/pageattr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 1007fa80f5a6..0e1dd7d47f05 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -1079,7 +1079,7 @@ static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd, * Map everything starting from the Gb boundary, possibly with 1G pages */ while (end - start >= PUD_SIZE) { - set_pud(pud, pud_mkhuge(pfn_pud(cpa->pfn, + set_pud(pud, pud_mkhuge(pfn_pud(cpa->pfn >> PAGE_SHIFT, canon_pgprot(pud_pgprot)))); start += PUD_SIZE; -- cgit v1.2.3 From 75ae059e856946a47f600c9ee1cd60dba006c6d3 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 2 Aug 2018 16:08:52 -0400 Subject: dm cache metadata: save in-core policy_hint_size to on-disk superblock commit fd2fa95416188a767a63979296fa3e169a9ef5ec upstream. policy_hint_size starts as 0 during __write_initial_superblock(). It isn't until the policy is loaded that policy_hint_size is set in-core (cmd->policy_hint_size). But it never got recorded in the on-disk superblock because __commit_transaction() didn't deal with transfering the in-core cmd->policy_hint_size to the on-disk superblock. The in-core cmd->policy_hint_size gets initialized by metadata_open()'s __begin_transaction_flags() which re-reads all superblock fields. Because the superblock's policy_hint_size was never properly stored, when the cache was created, hints_array_available() would always return false when re-activating a previously created cache. This means __load_mappings() always considered the hints invalid and never made use of the hints (these hints served to optimize). Another detremental side-effect of this oversight is the cache_check utility would fail with: "invalid hint width: 0" Cc: stable@vger.kernel.org Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-cache-metadata.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index d3c55d7754af..905badc6cb17 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c @@ -337,7 +337,7 @@ static int __write_initial_superblock(struct dm_cache_metadata *cmd) disk_super->version = cpu_to_le32(MAX_CACHE_VERSION); memset(disk_super->policy_name, 0, sizeof(disk_super->policy_name)); memset(disk_super->policy_version, 0, sizeof(disk_super->policy_version)); - disk_super->policy_hint_size = 0; + disk_super->policy_hint_size = cpu_to_le32(0); __copy_sm_root(cmd, disk_super); @@ -652,6 +652,7 @@ static int __commit_transaction(struct dm_cache_metadata *cmd, disk_super->policy_version[0] = cpu_to_le32(cmd->policy_version[0]); disk_super->policy_version[1] = cpu_to_le32(cmd->policy_version[1]); disk_super->policy_version[2] = cpu_to_le32(cmd->policy_version[2]); + disk_super->policy_hint_size = cpu_to_le32(cmd->policy_hint_size); disk_super->read_hits = cpu_to_le32(cmd->stats.read_hits); disk_super->read_misses = cpu_to_le32(cmd->stats.read_misses); -- cgit v1.2.3 From ac14c5d1a5d14df3d5dffdde9fb1ec42abf38ed8 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Mon, 25 Jun 2018 11:03:07 +0300 Subject: iio: ad9523: Fix displayed phase commit 5a4e33c1c53ae7d4425f7d94e60e4458a37b349e upstream. Fix the displayed phase for the ad9523 driver. Currently the most significant decimal place is dropped and all other digits are shifted one to the left. This is due to a multiplication by 10, which is not necessary, so remove it. Signed-off-by: Lars-Peter Clausen Signed-off-by: Alexandru Ardelean Fixes: cd1678f9632 ("iio: frequency: New driver for AD9523 SPI Low Jitter Clock Generator") Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/frequency/ad9523.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/frequency/ad9523.c b/drivers/iio/frequency/ad9523.c index 44a30f286de1..adc86aa30409 100644 --- a/drivers/iio/frequency/ad9523.c +++ b/drivers/iio/frequency/ad9523.c @@ -641,7 +641,7 @@ static int ad9523_read_raw(struct iio_dev *indio_dev, code = (AD9523_CLK_DIST_DIV_PHASE_REV(ret) * 3141592) / AD9523_CLK_DIST_DIV_REV(ret); *val = code / 1000000; - *val2 = (code % 1000000) * 10; + *val2 = code % 1000000; return IIO_VAL_INT_PLUS_MICRO; default: return -EINVAL; -- cgit v1.2.3 From 0785d7aedf80d0d36f6ec259477369125a491edb Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Fri, 27 Jul 2018 09:42:45 +0300 Subject: iio: ad9523: Fix return value for ad952x_store() commit 9a5094ca29ea9b1da301b31fd377c0c0c4c23034 upstream. A sysfs write callback function needs to either return the number of consumed characters or an error. The ad952x_store() function currently returns 0 if the input value was "0", this will signal that no characters have been consumed and the function will be called repeatedly in a loop indefinitely. Fix this by returning number of supplied characters to indicate that the whole input string has been consumed. Signed-off-by: Lars-Peter Clausen Signed-off-by: Alexandru Ardelean Fixes: cd1678f96329 ("iio: frequency: New driver for AD9523 SPI Low Jitter Clock Generator") Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/frequency/ad9523.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/frequency/ad9523.c b/drivers/iio/frequency/ad9523.c index adc86aa30409..57b1812a5a18 100644 --- a/drivers/iio/frequency/ad9523.c +++ b/drivers/iio/frequency/ad9523.c @@ -507,7 +507,7 @@ static ssize_t ad9523_store(struct device *dev, return ret; if (!state) - return 0; + return len; mutex_lock(&indio_dev->mlock); switch ((u32)this_attr->address) { -- cgit v1.2.3 From 244ce5c9b32a62626367f5159d1557c815029da4 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Tue, 19 Jun 2018 16:00:24 -0700 Subject: vmw_balloon: fix inflation of 64-bit GFNs commit 09755690c6b7c1eabdc4651eb3b276f8feb1e447 upstream. When balloon batching is not supported by the hypervisor, the guest frame number (GFN) must fit in 32-bit. However, due to a bug, this check was mistakenly ignored. In practice, when total RAM is greater than 16TB, the balloon does not work currently, making this bug unlikely to happen. Fixes: ef0f8f112984 ("VMware balloon: partially inline vmballoon_reserve_page.") Cc: stable@vger.kernel.org Reviewed-by: Xavier Deguillard Signed-off-by: Nadav Amit Signed-off-by: Greg Kroah-Hartman --- drivers/misc/vmw_balloon.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c index 5e047bfc0cc4..b0b6f99c8f0f 100644 --- a/drivers/misc/vmw_balloon.c +++ b/drivers/misc/vmw_balloon.c @@ -450,7 +450,7 @@ static int vmballoon_send_lock_page(struct vmballoon *b, unsigned long pfn, pfn32 = (u32)pfn; if (pfn32 != pfn) - return -1; + return -EINVAL; STATS_INC(b->stats.lock[false]); @@ -460,7 +460,7 @@ static int vmballoon_send_lock_page(struct vmballoon *b, unsigned long pfn, pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status); STATS_INC(b->stats.lock_fail[false]); - return 1; + return -EIO; } static int vmballoon_send_batched_lock(struct vmballoon *b, @@ -597,11 +597,12 @@ static int vmballoon_lock_page(struct vmballoon *b, unsigned int num_pages, locked = vmballoon_send_lock_page(b, page_to_pfn(page), &hv_status, target); - if (locked > 0) { + if (locked) { STATS_INC(b->stats.refused_alloc[false]); - if (hv_status == VMW_BALLOON_ERROR_RESET || - hv_status == VMW_BALLOON_ERROR_PPN_NOTNEEDED) { + if (locked == -EIO && + (hv_status == VMW_BALLOON_ERROR_RESET || + hv_status == VMW_BALLOON_ERROR_PPN_NOTNEEDED)) { vmballoon_free_page(page, false); return -EIO; } @@ -617,7 +618,7 @@ static int vmballoon_lock_page(struct vmballoon *b, unsigned int num_pages, } else { vmballoon_free_page(page, false); } - return -EIO; + return locked; } /* track allocated page */ -- cgit v1.2.3 From 059766538c16541bf56764869757a29b85840312 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Tue, 19 Jun 2018 16:00:25 -0700 Subject: vmw_balloon: do not use 2MB without batching commit 5081efd112560d3febb328e627176235b250d59d upstream. If the hypervisor sets 2MB batching is on, while batching is cleared, the balloon code breaks. In this case the legacy mechanism is used with 2MB page. The VM would report a 2MB page is ballooned, and the hypervisor would only take the first 4KB. While the hypervisor should not report such settings, make the code more robust by not enabling 2MB support without batching. Fixes: 365bd7ef7ec8e ("VMware balloon: Support 2m page ballooning.") Cc: stable@vger.kernel.org Reviewed-by: Xavier Deguillard Signed-off-by: Nadav Amit Signed-off-by: Greg Kroah-Hartman --- drivers/misc/vmw_balloon.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c index b0b6f99c8f0f..b6ccd551c00e 100644 --- a/drivers/misc/vmw_balloon.c +++ b/drivers/misc/vmw_balloon.c @@ -341,7 +341,13 @@ static bool vmballoon_send_start(struct vmballoon *b, unsigned long req_caps) success = false; } - if (b->capabilities & VMW_BALLOON_BATCHED_2M_CMDS) + /* + * 2MB pages are only supported with batching. If batching is for some + * reason disabled, do not use 2MB pages, since otherwise the legacy + * mechanism is used with 2MB pages, causing a failure. + */ + if ((b->capabilities & VMW_BALLOON_BATCHED_2M_CMDS) && + (b->capabilities & VMW_BALLOON_BATCHED_CMDS)) b->supported_page_sizes = 2; else b->supported_page_sizes = 1; -- cgit v1.2.3 From cae45e44dc46f3f793b974307955c5fe3eac0170 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Tue, 19 Jun 2018 16:00:26 -0700 Subject: vmw_balloon: VMCI_DOORBELL_SET does not check status commit ce664331b2487a5d244a51cbdd8cb54f866fbe5d upstream. When vmballoon_vmci_init() sets a doorbell using VMCI_DOORBELL_SET, for some reason it does not consider the status and looks at the result. However, the hypervisor does not update the result - it updates the status. This might cause VMCI doorbell not to be enabled, resulting in degraded performance. Fixes: 48e3d668b790 ("VMware balloon: Enable notification via VMCI") Cc: stable@vger.kernel.org Reviewed-by: Xavier Deguillard Signed-off-by: Nadav Amit Signed-off-by: Greg Kroah-Hartman --- drivers/misc/vmw_balloon.c | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c index b6ccd551c00e..8e739b29079e 100644 --- a/drivers/misc/vmw_balloon.c +++ b/drivers/misc/vmw_balloon.c @@ -1036,29 +1036,30 @@ static void vmballoon_vmci_cleanup(struct vmballoon *b) */ static int vmballoon_vmci_init(struct vmballoon *b) { - int error = 0; + unsigned long error, dummy; - if ((b->capabilities & VMW_BALLOON_SIGNALLED_WAKEUP_CMD) != 0) { - error = vmci_doorbell_create(&b->vmci_doorbell, - VMCI_FLAG_DELAYED_CB, - VMCI_PRIVILEGE_FLAG_RESTRICTED, - vmballoon_doorbell, b); - - if (error == VMCI_SUCCESS) { - VMWARE_BALLOON_CMD(VMCI_DOORBELL_SET, - b->vmci_doorbell.context, - b->vmci_doorbell.resource, error); - STATS_INC(b->stats.doorbell_set); - } - } + if ((b->capabilities & VMW_BALLOON_SIGNALLED_WAKEUP_CMD) == 0) + return 0; - if (error != 0) { - vmballoon_vmci_cleanup(b); + error = vmci_doorbell_create(&b->vmci_doorbell, VMCI_FLAG_DELAYED_CB, + VMCI_PRIVILEGE_FLAG_RESTRICTED, + vmballoon_doorbell, b); - return -EIO; - } + if (error != VMCI_SUCCESS) + goto fail; + + error = VMWARE_BALLOON_CMD(VMCI_DOORBELL_SET, b->vmci_doorbell.context, + b->vmci_doorbell.resource, dummy); + + STATS_INC(b->stats.doorbell_set); + + if (error != VMW_BALLOON_SUCCESS) + goto fail; return 0; +fail: + vmballoon_vmci_cleanup(b); + return -EIO; } /* -- cgit v1.2.3 From 3328bbe314e6d46aff6c074441a7a500209ca345 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Tue, 19 Jun 2018 16:00:27 -0700 Subject: vmw_balloon: fix VMCI use when balloon built into kernel commit c3cc1b0fc27508da53fe955a3b23d03964410682 upstream. Currently, when all modules, including VMCI and VMware balloon are built into the kernel, the initialization of the balloon happens before the VMCI is probed. As a result, the balloon fails to initialize the VMCI doorbell, which it uses to get asynchronous requests for balloon size changes. The problem can be seen in the logs, in the form of the following message: "vmw_balloon: failed to initialize vmci doorbell" The driver would work correctly but slightly less efficiently, probing for requests periodically. This patch changes the balloon to be initialized using late_initcall() instead of module_init() to address this issue. It does not address a situation in which VMCI is built as a module and the balloon is built into the kernel. Fixes: 48e3d668b790 ("VMware balloon: Enable notification via VMCI") Cc: stable@vger.kernel.org Reviewed-by: Xavier Deguillard Signed-off-by: Nadav Amit Signed-off-by: Greg Kroah-Hartman --- drivers/misc/vmw_balloon.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c index 8e739b29079e..518e2dec2aa2 100644 --- a/drivers/misc/vmw_balloon.c +++ b/drivers/misc/vmw_balloon.c @@ -1297,7 +1297,14 @@ static int __init vmballoon_init(void) return 0; } -module_init(vmballoon_init); + +/* + * Using late_initcall() instead of module_init() allows the balloon to use the + * VMCI doorbell even when the balloon is built into the kernel. Otherwise the + * VMCI is probed only after the balloon is initialized. If the balloon is used + * as a module, late_initcall() is equivalent to module_init(). + */ +late_initcall(vmballoon_init); static void __exit vmballoon_exit(void) { -- cgit v1.2.3 From 0943ce7b7e066a88210ca3ea53db5515d21b3312 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 1 Aug 2018 15:40:57 -0400 Subject: tracing: Do not call start/stop() functions when tracing_on does not change commit f143641bfef9a4a60c57af30de26c63057e7e695 upstream. Currently, when one echo's in 1 into tracing_on, the current tracer's "start()" function is executed, even if tracing_on was already one. This can lead to strange side effects. One being that if the hwlat tracer is enabled, and someone does "echo 1 > tracing_on" into tracing_on, the hwlat tracer's start() function is called again which will recreate another kernel thread, and make it unable to remove the old one. Link: http://lkml.kernel.org/r/1533120354-22923-1-git-send-email-erica.bugden@linutronix.de Cc: stable@vger.kernel.org Fixes: 2df8f8a6a897e ("tracing: Fix regression with irqsoff tracer and tracing_on file") Reported-by: Erica Bugden Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 11761b3dd7ba..e409ddce8754 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6496,7 +6496,9 @@ rb_simple_write(struct file *filp, const char __user *ubuf, if (buffer) { mutex_lock(&trace_types_lock); - if (val) { + if (!!val == tracer_tracing_is_on(tr)) { + val = 0; /* do nothing */ + } else if (val) { tracer_tracing_on(tr); if (tr->current_trace->start) tr->current_trace->start(tr); -- cgit v1.2.3 From 34324394f9fa3e92e355aba40ac1b0b1d1d8d3c3 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 16 Aug 2018 16:08:37 -0400 Subject: tracing/blktrace: Fix to allow setting same value commit 757d9140072054528b13bbe291583d9823cde195 upstream. Masami Hiramatsu reported: Current trace-enable attribute in sysfs returns an error if user writes the same setting value as current one, e.g. # cat /sys/block/sda/trace/enable 0 # echo 0 > /sys/block/sda/trace/enable bash: echo: write error: Invalid argument # echo 1 > /sys/block/sda/trace/enable # echo 1 > /sys/block/sda/trace/enable bash: echo: write error: Device or resource busy But this is not a preferred behavior, it should ignore if new setting is same as current one. This fixes the problem as below. # cat /sys/block/sda/trace/enable 0 # echo 0 > /sys/block/sda/trace/enable # echo 1 > /sys/block/sda/trace/enable # echo 1 > /sys/block/sda/trace/enable Link: http://lkml.kernel.org/r/20180816103802.08678002@gandalf.local.home Cc: Ingo Molnar Cc: Jens Axboe Cc: linux-block@vger.kernel.org Cc: stable@vger.kernel.org Fixes: cd649b8bb830d ("blktrace: remove sysfs_blk_trace_enable_show/store()") Reported-by: Masami Hiramatsu Tested-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- kernel/trace/blktrace.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 7ab5eafea8b2..210b8e726a97 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -1716,6 +1716,10 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, mutex_lock(&bdev->bd_mutex); if (attr == &dev_attr_enable) { + if (!!value == !!q->blk_trace) { + ret = 0; + goto out_unlock_bdev; + } if (value) ret = blk_trace_setup_queue(q, bdev); else -- cgit v1.2.3 From f6db350c9af9814d200188e94ae8682ca995ec84 Mon Sep 17 00:00:00 2001 From: Snild Dolkow Date: Thu, 26 Jul 2018 09:15:39 +0200 Subject: kthread, tracing: Don't expose half-written comm when creating kthreads commit 3e536e222f2930534c252c1cc7ae799c725c5ff9 upstream. There is a window for racing when printing directly to task->comm, allowing other threads to see a non-terminated string. The vsnprintf function fills the buffer, counts the truncated chars, then finally writes the \0 at the end. creator other vsnprintf: fill (not terminated) count the rest trace_sched_waking(p): ... memcpy(comm, p->comm, TASK_COMM_LEN) write \0 The consequences depend on how 'other' uses the string. In our case, it was copied into the tracing system's saved cmdlines, a buffer of adjacent TASK_COMM_LEN-byte buffers (note the 'n' where 0 should be): crash-arm64> x/1024s savedcmd->saved_cmdlines | grep 'evenk' 0xffffffd5b3818640: "irq/497-pwr_evenkworker/u16:12" ...and a strcpy out of there would cause stack corruption: [224761.522292] Kernel panic - not syncing: stack-protector: Kernel stack is corrupted in: ffffff9bf9783c78 crash-arm64> kbt | grep 'comm\|trace_print_context' #6 0xffffff9bf9783c78 in trace_print_context+0x18c(+396) comm (char [16]) = "irq/497-pwr_even" crash-arm64> rd 0xffffffd4d0e17d14 8 ffffffd4d0e17d14: 2f71726900000000 5f7277702d373934 ....irq/497-pwr_ ffffffd4d0e17d24: 726f776b6e657665 3a3631752f72656b evenkworker/u16: ffffffd4d0e17d34: f9780248ff003231 cede60e0ffffff9b 12..H.x......`.. ffffffd4d0e17d44: cede60c8ffffffd4 00000fffffffffd4 .....`.......... The workaround in e09e28671 (use strlcpy in __trace_find_cmdline) was likely needed because of this same bug. Solved by vsnprintf:ing to a local buffer, then using set_task_comm(). This way, there won't be a window where comm is not terminated. Link: http://lkml.kernel.org/r/20180726071539.188015-1-snild@sony.com Cc: stable@vger.kernel.org Fixes: bc0c38d139ec7 ("ftrace: latency tracer infrastructure") Reviewed-by: Steven Rostedt (VMware) Signed-off-by: Snild Dolkow Signed-off-by: Steven Rostedt (VMware) [backported to 3.18 / 4.4 by Snild] Signed-off-by: Greg Kroah-Hartman --- kernel/kthread.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kernel/kthread.c b/kernel/kthread.c index 850b255649a2..ac6849ee3057 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -313,10 +313,16 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), task = create->result; if (!IS_ERR(task)) { static const struct sched_param param = { .sched_priority = 0 }; + char name[TASK_COMM_LEN]; va_list args; va_start(args, namefmt); - vsnprintf(task->comm, sizeof(task->comm), namefmt, args); + /* + * task is already visible to other tasks, so updating + * COMM must be protected. + */ + vsnprintf(name, sizeof(name), namefmt, args); + set_task_comm(task, name); va_end(args); /* * root may have changed our (kthreadd's) priority or CPU mask. -- cgit v1.2.3 From 6977074c573e63619dbef40ab36d75fe5713b714 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 9 Aug 2018 15:37:59 -0400 Subject: uprobes: Use synchronize_rcu() not synchronize_sched() commit 016f8ffc48cb01d1e7701649c728c5d2e737d295 upstream. While debugging another bug, I was looking at all the synchronize*() functions being used in kernel/trace, and noticed that trace_uprobes was using synchronize_sched(), with a comment to synchronize with {u,ret}_probe_trace_func(). When looking at those functions, the data is protected with "rcu_read_lock()" and not with "rcu_read_lock_sched()". This is using the wrong synchronize_*() function. Link: http://lkml.kernel.org/r/20180809160553.469e1e32@gandalf.local.home Cc: stable@vger.kernel.org Fixes: 70ed91c6ec7f8 ("tracing/uprobes: Support ftrace_event_file base multibuffer") Acked-by: Oleg Nesterov Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_uprobe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 68bb89ad9d28..1dc887bab085 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -969,7 +969,7 @@ probe_event_disable(struct trace_uprobe *tu, struct trace_event_file *file) list_del_rcu(&link->list); /* synchronize with u{,ret}probe_trace_func */ - synchronize_sched(); + synchronize_rcu(); kfree(link); if (!list_empty(&tu->tp.files)) -- cgit v1.2.3 From 6f9c611f0765f4caff557deefc5a63c0f688891e Mon Sep 17 00:00:00 2001 From: Tomas Bortoli Date: Fri, 27 Jul 2018 13:05:58 +0200 Subject: 9p: fix multiple NULL-pointer-dereferences commit 10aa14527f458e9867cf3d2cc6b8cb0f6704448b upstream. Added checks to prevent GPFs from raising. Link: http://lkml.kernel.org/r/20180727110558.5479-1-tomasbortoli@gmail.com Signed-off-by: Tomas Bortoli Reported-by: syzbot+1a262da37d3bead15c39@syzkaller.appspotmail.com Cc: stable@vger.kernel.org Signed-off-by: Dominique Martinet Signed-off-by: Greg Kroah-Hartman --- net/9p/trans_fd.c | 5 ++++- net/9p/trans_rdma.c | 3 +++ net/9p/trans_virtio.c | 3 +++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index c923221bb8b9..2f68ffda3715 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -935,7 +935,7 @@ p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args) if (err < 0) return err; - if (valid_ipaddr4(addr) < 0) + if (addr == NULL || valid_ipaddr4(addr) < 0) return -EINVAL; csocket = NULL; @@ -983,6 +983,9 @@ p9_fd_create_unix(struct p9_client *client, const char *addr, char *args) csocket = NULL; + if (addr == NULL) + return -EINVAL; + if (strlen(addr) >= UNIX_PATH_MAX) { pr_err("%s (%d): address too long: %s\n", __func__, task_pid_nr(current), addr); diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 52b4a2f993f2..f42550dd3560 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -644,6 +644,9 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args) struct ib_qp_init_attr qp_attr; struct ib_cq_init_attr cq_attr = {}; + if (addr == NULL) + return -EINVAL; + /* Parse the transport specific mount options */ err = parse_opts(args, &opts); if (err < 0) diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 669198ac73db..6018a1c0dc28 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -654,6 +654,9 @@ p9_virtio_create(struct p9_client *client, const char *devname, char *args) int ret = -ENOENT; int found = 0; + if (devname == NULL) + return -EINVAL; + mutex_lock(&virtio_9p_lock); list_for_each_entry(chan, &virtio_chan_list, chan_list) { if (!strncmp(devname, chan->tag, chan->tag_len) && -- cgit v1.2.3 From bd3a83160c0d9ef4c0901ebd14ed77bdab93df2e Mon Sep 17 00:00:00 2001 From: "zhangyi (F)" Date: Tue, 14 Aug 2018 10:34:42 +0800 Subject: PM / sleep: wakeup: Fix build error caused by missing SRCU support commit 3df6f61fff49632492490fb6e42646b803a9958a upstream. Commit ea0212f40c6 (power: auto select CONFIG_SRCU) made the code in drivers/base/power/wakeup.c use SRCU instead of RCU, but it forgot to select CONFIG_SRCU in Kconfig, which leads to the following build error if CONFIG_SRCU is not selected somewhere else: drivers/built-in.o: In function `wakeup_source_remove': (.text+0x3c6fc): undefined reference to `synchronize_srcu' drivers/built-in.o: In function `pm_print_active_wakeup_sources': (.text+0x3c7a8): undefined reference to `__srcu_read_lock' drivers/built-in.o: In function `pm_print_active_wakeup_sources': (.text+0x3c84c): undefined reference to `__srcu_read_unlock' drivers/built-in.o: In function `device_wakeup_arm_wake_irqs': (.text+0x3d1d8): undefined reference to `__srcu_read_lock' drivers/built-in.o: In function `device_wakeup_arm_wake_irqs': (.text+0x3d228): undefined reference to `__srcu_read_unlock' drivers/built-in.o: In function `device_wakeup_disarm_wake_irqs': (.text+0x3d24c): undefined reference to `__srcu_read_lock' drivers/built-in.o: In function `device_wakeup_disarm_wake_irqs': (.text+0x3d29c): undefined reference to `__srcu_read_unlock' drivers/built-in.o:(.data+0x4158): undefined reference to `process_srcu' Fix this error by selecting CONFIG_SRCU when PM_SLEEP is enabled. Fixes: ea0212f40c6 (power: auto select CONFIG_SRCU) Cc: 4.2+ # 4.2+ Signed-off-by: zhangyi (F) [ rjw: Minor subject/changelog fixups ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- kernel/power/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 02e8dfaa1ce2..9d76184279fe 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -105,6 +105,7 @@ config PM_SLEEP def_bool y depends on SUSPEND || HIBERNATE_CALLBACKS select PM + select SRCU config PM_SLEEP_SMP def_bool y -- cgit v1.2.3 From f5fa2009e3a9159e99fe6b11d51796008883cdf5 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 4 Jul 2018 12:59:58 +0300 Subject: pnfs/blocklayout: off by one in bl_map_stripe() commit 0914bb965e38a055e9245637aed117efbe976e91 upstream. "dev->nr_children" is the number of children which were parsed successfully in bl_parse_stripe(). It could be all of them and then, in that case, it is equal to v->stripe.volumes_count. Either way, the > should be >= so that we don't go beyond the end of what we're supposed to. Fixes: 5c83746a0cf2 ("pnfs/blocklayout: in-kernel GETDEVICEINFO XDR parsing") Signed-off-by: Dan Carpenter Reviewed-by: Christoph Hellwig Cc: stable@vger.kernel.org # 3.17+ Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- fs/nfs/blocklayout/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c index a861bbdfe577..fa8b484d035d 100644 --- a/fs/nfs/blocklayout/dev.c +++ b/fs/nfs/blocklayout/dev.c @@ -162,7 +162,7 @@ static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset, chunk = div_u64(offset, dev->chunk_size); div_u64_rem(chunk, dev->nr_children, &chunk_idx); - if (chunk_idx > dev->nr_children) { + if (chunk_idx >= dev->nr_children) { dprintk("%s: invalid chunk idx %d (%lld/%lld)\n", __func__, chunk_idx, offset, dev->chunk_size); /* error, should not happen */ -- cgit v1.2.3 From 2f04971a962949099881dfc7be23392ad14aa8c9 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Tue, 3 Jul 2018 09:59:47 +0100 Subject: ARM: tegra: Fix Tegra30 Cardhu PCA954x reset commit 6e1811900b6fe6f2b4665dba6bd6ed32c6b98575 upstream. On all versions of Tegra30 Cardhu, the reset signal to the NXP PCA9546 I2C mux is connected to the Tegra GPIO BB0. Currently, this pin on the Tegra is not configured as a GPIO but as a special-function IO (SFIO) that is multiplexing the pin to an I2S controller. On exiting system suspend, I2C commands sent to the PCA9546 are failing because there is no ACK. Although it is not possible to see exactly what is happening to the reset during suspend, by ensuring it is configured as a GPIO and driven high, to de-assert the reset, the failures are no longer seen. Please note that this GPIO is also used to drive the reset signal going to the camera connector on the board. However, given that there is no camera support currently for Cardhu, this should not have any impact. Fixes: 40431d16ff11 ("ARM: tegra: enable PCA9546 on Cardhu") Cc: stable@vger.kernel.org Signed-off-by: Jon Hunter Signed-off-by: Thierry Reding Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/tegra30-cardhu.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/tegra30-cardhu.dtsi b/arch/arm/boot/dts/tegra30-cardhu.dtsi index bb1ca158273c..1922e7a93e40 100644 --- a/arch/arm/boot/dts/tegra30-cardhu.dtsi +++ b/arch/arm/boot/dts/tegra30-cardhu.dtsi @@ -201,6 +201,7 @@ #address-cells = <1>; #size-cells = <0>; reg = <0x70>; + reset-gpio = <&gpio TEGRA_GPIO(BB, 0) GPIO_ACTIVE_LOW>; }; }; -- cgit v1.2.3 From 70201a4e368833c15625d8dc32fd9c0286a12b58 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 22 Aug 2018 17:30:14 +0200 Subject: mm/tlb: Remove tlb_remove_table() non-concurrent condition commit a6f572084fbee8b30f91465f4a085d7a90901c57 upstream. Will noted that only checking mm_users is incorrect; we should also check mm_count in order to cover CPUs that have a lazy reference to this mm (and could do speculative TLB operations). If removing this turns out to be a performance issue, we can re-instate a more complete check, but in tlb_table_flush() eliding the call_rcu_sched(). Fixes: 267239116987 ("mm, powerpc: move the RCU page-table freeing into generic code") Reported-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Acked-by: Rik van Riel Acked-by: Will Deacon Cc: Nicholas Piggin Cc: David Miller Cc: Martin Schwidefsky Cc: Michael Ellerman Cc: stable@kernel.org Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/memory.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 42db644f5ec4..5aee9ec8b8c6 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -361,15 +361,6 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table) { struct mmu_table_batch **batch = &tlb->batch; - /* - * When there's less then two users of this mm there cannot be a - * concurrent page-table walk. - */ - if (atomic_read(&tlb->mm->mm_users) < 2) { - __tlb_remove_table(table); - return; - } - if (*batch == NULL) { *batch = (struct mmu_table_batch *)__get_free_page(GFP_NOWAIT | __GFP_NOWARN); if (*batch == NULL) { -- cgit v1.2.3 From d25b6212cc955482eefef191b02975c1fb87d65c Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Thu, 7 Jun 2018 09:56:59 -0700 Subject: iommu/vt-d: Add definitions for PFSID commit 0f725561e168485eff7277d683405c05b192f537 upstream. When SRIOV VF device IOTLB is invalidated, we need to provide the PF source ID such that IOMMU hardware can gauge the depth of invalidation queue which is shared among VFs. This is needed when device invalidation throttle (DIT) capability is supported. This patch adds bit definitions for checking and tracking PFSID. Signed-off-by: Jacob Pan Cc: stable@vger.kernel.org Cc: "Ashok Raj" Cc: "Lu Baolu" Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/intel-iommu.c | 1 + include/linux/intel-iommu.h | 3 +++ 2 files changed, 4 insertions(+) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 4efec2db4ee2..652548ba6dcf 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -419,6 +419,7 @@ struct device_domain_info { struct list_head global; /* link to global list */ u8 bus; /* PCI bus number */ u8 devfn; /* PCI devfn number */ + u16 pfsid; /* SRIOV physical function source ID */ u8 pasid_supported:3; u8 pasid_enabled:1; u8 pri_supported:1; diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 23e129ef6726..0892615ce93d 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -125,6 +125,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) * Extended Capability Register */ +#define ecap_dit(e) ((e >> 41) & 0x1) #define ecap_pasid(e) ((e >> 40) & 0x1) #define ecap_pss(e) ((e >> 35) & 0x1f) #define ecap_eafs(e) ((e >> 34) & 0x1) @@ -294,6 +295,7 @@ enum { #define QI_DEV_IOTLB_SID(sid) ((u64)((sid) & 0xffff) << 32) #define QI_DEV_IOTLB_QDEP(qdep) (((qdep) & 0x1f) << 16) #define QI_DEV_IOTLB_ADDR(addr) ((u64)(addr) & VTD_PAGE_MASK) +#define QI_DEV_IOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | ((u64)(pfsid & 0xfff) << 52)) #define QI_DEV_IOTLB_SIZE 1 #define QI_DEV_IOTLB_MAX_INVS 32 @@ -318,6 +320,7 @@ enum { #define QI_DEV_EIOTLB_PASID(p) (((u64)p) << 32) #define QI_DEV_EIOTLB_SID(sid) ((u64)((sid) & 0xffff) << 16) #define QI_DEV_EIOTLB_QDEP(qd) ((u64)((qd) & 0x1f) << 4) +#define QI_DEV_EIOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | ((u64)(pfsid & 0xfff) << 52)) #define QI_DEV_EIOTLB_MAX_INVS 32 #define QI_PGRP_IDX(idx) (((u64)(idx)) << 55) -- cgit v1.2.3 From d792799caa81f9b0a850380a9eacafa4922b3990 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Thu, 7 Jun 2018 09:57:00 -0700 Subject: iommu/vt-d: Fix dev iotlb pfsid use commit 1c48db44924298ad0cb5a6386b88017539be8822 upstream. PFSID should be used in the invalidation descriptor for flushing device IOTLBs on SRIOV VFs. Signed-off-by: Jacob Pan Cc: stable@vger.kernel.org Cc: "Ashok Raj" Cc: "Lu Baolu" Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/dmar.c | 6 +++--- drivers/iommu/intel-iommu.c | 17 ++++++++++++++++- include/linux/intel-iommu.h | 5 ++--- 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index e913a930ac80..5a63e32a4a6b 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -1315,8 +1315,8 @@ void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, qi_submit_sync(&desc, iommu); } -void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep, - u64 addr, unsigned mask) +void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid, + u16 qdep, u64 addr, unsigned mask) { struct qi_desc desc; @@ -1331,7 +1331,7 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep, qdep = 0; desc.low = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) | - QI_DIOTLB_TYPE; + QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid); qi_submit_sync(&desc, iommu); } diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 652548ba6dcf..49b266433f4c 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1480,6 +1480,20 @@ static void iommu_enable_dev_iotlb(struct device_domain_info *info) return; pdev = to_pci_dev(info->dev); + /* For IOMMU that supports device IOTLB throttling (DIT), we assign + * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge + * queue depth at PF level. If DIT is not set, PFSID will be treated as + * reserved, which should be set to 0. + */ + if (!ecap_dit(info->iommu->ecap)) + info->pfsid = 0; + else { + struct pci_dev *pf_pdev; + + /* pdev will be returned if device is not a vf */ + pf_pdev = pci_physfn(pdev); + info->pfsid = PCI_DEVID(pf_pdev->bus->number, pf_pdev->devfn); + } #ifdef CONFIG_INTEL_IOMMU_SVM /* The PCIe spec, in its wisdom, declares that the behaviour of @@ -1538,7 +1552,8 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain, sid = info->bus << 8 | info->devfn; qdep = info->ats_qdep; - qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask); + qi_flush_dev_iotlb(info->iommu, sid, info->pfsid, + qdep, addr, mask); } spin_unlock_irqrestore(&device_domain_lock, flags); } diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 0892615ce93d..e353f6600b0b 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -466,9 +466,8 @@ extern void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm, u64 type); extern void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, unsigned int size_order, u64 type); -extern void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep, - u64 addr, unsigned mask); - +extern void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid, + u16 qdep, u64 addr, unsigned mask); extern int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); extern int dmar_ir_support(void); -- cgit v1.2.3 From 1a9bc340b8d3f450bfda0b94465823548b1c677c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 13 May 2017 21:39:49 -0400 Subject: osf_getdomainname(): use copy_to_user() commit 9ba3eb5103cf56f0daaf07de4507df76e7813ed7 upstream. Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- arch/alpha/kernel/osf_sys.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 63f06a2b1f7f..e0903215982d 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -561,25 +561,20 @@ SYSCALL_DEFINE0(getdtablesize) */ SYSCALL_DEFINE2(osf_getdomainname, char __user *, name, int, namelen) { - unsigned len; - int i; + int len, err = 0; + char *kname; - if (!access_ok(VERIFY_WRITE, name, namelen)) - return -EFAULT; - - len = namelen; - if (len > 32) - len = 32; + if (namelen > 32) + namelen = 32; down_read(&uts_sem); - for (i = 0; i < len; ++i) { - __put_user(utsname()->domainname[i], name + i); - if (utsname()->domainname[i] == '\0') - break; - } + kname = utsname()->domainname; + len = strnlen(kname, namelen); + if (copy_to_user(name, kname, min(len + 1, namelen))) + err = -EFAULT; up_read(&uts_sem); - return 0; + return err; } /* -- cgit v1.2.3 From 5c16a16fcf03789baddd43fe4ca734b4c2877db3 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Mon, 25 Jun 2018 18:34:10 +0200 Subject: sys: don't hold uts_sem while accessing userspace memory commit 42a0cc3478584d4d63f68f2f5af021ddbea771fa upstream. Holding uts_sem as a writer while accessing userspace memory allows a namespace admin to stall all processes that attempt to take uts_sem. Instead, move data through stack buffers and don't access userspace memory while uts_sem is held. Cc: stable@vger.kernel.org Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jann Horn Signed-off-by: Eric W. Biederman Signed-off-by: Greg Kroah-Hartman --- arch/alpha/kernel/osf_sys.c | 51 ++++++++++----------- arch/sparc/kernel/sys_sparc_32.c | 22 ++++++---- arch/sparc/kernel/sys_sparc_64.c | 20 +++++---- kernel/sys.c | 95 +++++++++++++++++++--------------------- kernel/utsname_sysctl.c | 41 ++++++++++------- 5 files changed, 119 insertions(+), 110 deletions(-) diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index e0903215982d..bbc7cb9faa01 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -526,24 +526,19 @@ SYSCALL_DEFINE4(osf_mount, unsigned long, typenr, const char __user *, path, SYSCALL_DEFINE1(osf_utsname, char __user *, name) { int error; + char tmp[5 * 32]; down_read(&uts_sem); - error = -EFAULT; - if (copy_to_user(name + 0, utsname()->sysname, 32)) - goto out; - if (copy_to_user(name + 32, utsname()->nodename, 32)) - goto out; - if (copy_to_user(name + 64, utsname()->release, 32)) - goto out; - if (copy_to_user(name + 96, utsname()->version, 32)) - goto out; - if (copy_to_user(name + 128, utsname()->machine, 32)) - goto out; + memcpy(tmp + 0 * 32, utsname()->sysname, 32); + memcpy(tmp + 1 * 32, utsname()->nodename, 32); + memcpy(tmp + 2 * 32, utsname()->release, 32); + memcpy(tmp + 3 * 32, utsname()->version, 32); + memcpy(tmp + 4 * 32, utsname()->machine, 32); + up_read(&uts_sem); - error = 0; - out: - up_read(&uts_sem); - return error; + if (copy_to_user(name, tmp, sizeof(tmp))) + return -EFAULT; + return 0; } SYSCALL_DEFINE0(getpagesize) @@ -563,18 +558,21 @@ SYSCALL_DEFINE2(osf_getdomainname, char __user *, name, int, namelen) { int len, err = 0; char *kname; + char tmp[32]; - if (namelen > 32) + if (namelen < 0 || namelen > 32) namelen = 32; down_read(&uts_sem); kname = utsname()->domainname; len = strnlen(kname, namelen); - if (copy_to_user(name, kname, min(len + 1, namelen))) - err = -EFAULT; + len = min(len + 1, namelen); + memcpy(tmp, kname, len); up_read(&uts_sem); - return err; + if (copy_to_user(name, tmp, len)) + return -EFAULT; + return 0; } /* @@ -736,13 +734,14 @@ SYSCALL_DEFINE3(osf_sysinfo, int, command, char __user *, buf, long, count) }; unsigned long offset; const char *res; - long len, err = -EINVAL; + long len; + char tmp[__NEW_UTS_LEN + 1]; offset = command-1; if (offset >= ARRAY_SIZE(sysinfo_table)) { /* Digital UNIX has a few unpublished interfaces here */ printk("sysinfo(%d)", command); - goto out; + return -EINVAL; } down_read(&uts_sem); @@ -750,13 +749,11 @@ SYSCALL_DEFINE3(osf_sysinfo, int, command, char __user *, buf, long, count) len = strlen(res)+1; if ((unsigned long)len > (unsigned long)count) len = count; - if (copy_to_user(buf, res, len)) - err = -EFAULT; - else - err = 0; + memcpy(tmp, res, len); up_read(&uts_sem); - out: - return err; + if (copy_to_user(buf, tmp, len)) + return -EFAULT; + return 0; } SYSCALL_DEFINE5(osf_getsysinfo, unsigned long, op, void __user *, buffer, diff --git a/arch/sparc/kernel/sys_sparc_32.c b/arch/sparc/kernel/sys_sparc_32.c index 646988d4c1a3..740f43b9b541 100644 --- a/arch/sparc/kernel/sys_sparc_32.c +++ b/arch/sparc/kernel/sys_sparc_32.c @@ -201,23 +201,27 @@ SYSCALL_DEFINE5(rt_sigaction, int, sig, asmlinkage long sys_getdomainname(char __user *name, int len) { - int nlen, err; - + int nlen, err; + char tmp[__NEW_UTS_LEN + 1]; + if (len < 0) return -EINVAL; - down_read(&uts_sem); - + down_read(&uts_sem); + nlen = strlen(utsname()->domainname) + 1; err = -EINVAL; if (nlen > len) - goto out; + goto out_unlock; + memcpy(tmp, utsname()->domainname, nlen); - err = -EFAULT; - if (!copy_to_user(name, utsname()->domainname, nlen)) - err = 0; + up_read(&uts_sem); -out: + if (copy_to_user(name, tmp, nlen)) + return -EFAULT; + return 0; + +out_unlock: up_read(&uts_sem); return err; } diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index 7f0f7c01b297..f63cd2ea8470 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -524,23 +524,27 @@ extern void check_pending(int signum); SYSCALL_DEFINE2(getdomainname, char __user *, name, int, len) { - int nlen, err; + int nlen, err; + char tmp[__NEW_UTS_LEN + 1]; if (len < 0) return -EINVAL; - down_read(&uts_sem); - + down_read(&uts_sem); + nlen = strlen(utsname()->domainname) + 1; err = -EINVAL; if (nlen > len) - goto out; + goto out_unlock; + memcpy(tmp, utsname()->domainname, nlen); + + up_read(&uts_sem); - err = -EFAULT; - if (!copy_to_user(name, utsname()->domainname, nlen)) - err = 0; + if (copy_to_user(name, tmp, nlen)) + return -EFAULT; + return 0; -out: +out_unlock: up_read(&uts_sem); return err; } diff --git a/kernel/sys.c b/kernel/sys.c index f718742e55e6..e2446ade79ba 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1142,18 +1142,19 @@ static int override_release(char __user *release, size_t len) SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name) { - int errno = 0; + struct new_utsname tmp; down_read(&uts_sem); - if (copy_to_user(name, utsname(), sizeof *name)) - errno = -EFAULT; + memcpy(&tmp, utsname(), sizeof(tmp)); up_read(&uts_sem); + if (copy_to_user(name, &tmp, sizeof(tmp))) + return -EFAULT; - if (!errno && override_release(name->release, sizeof(name->release))) - errno = -EFAULT; - if (!errno && override_architecture(name)) - errno = -EFAULT; - return errno; + if (override_release(name->release, sizeof(name->release))) + return -EFAULT; + if (override_architecture(name)) + return -EFAULT; + return 0; } #ifdef __ARCH_WANT_SYS_OLD_UNAME @@ -1162,55 +1163,46 @@ SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name) */ SYSCALL_DEFINE1(uname, struct old_utsname __user *, name) { - int error = 0; + struct old_utsname tmp; if (!name) return -EFAULT; down_read(&uts_sem); - if (copy_to_user(name, utsname(), sizeof(*name))) - error = -EFAULT; + memcpy(&tmp, utsname(), sizeof(tmp)); up_read(&uts_sem); + if (copy_to_user(name, &tmp, sizeof(tmp))) + return -EFAULT; - if (!error && override_release(name->release, sizeof(name->release))) - error = -EFAULT; - if (!error && override_architecture(name)) - error = -EFAULT; - return error; + if (override_release(name->release, sizeof(name->release))) + return -EFAULT; + if (override_architecture(name)) + return -EFAULT; + return 0; } SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name) { - int error; + struct oldold_utsname tmp = {}; if (!name) return -EFAULT; - if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname))) - return -EFAULT; down_read(&uts_sem); - error = __copy_to_user(&name->sysname, &utsname()->sysname, - __OLD_UTS_LEN); - error |= __put_user(0, name->sysname + __OLD_UTS_LEN); - error |= __copy_to_user(&name->nodename, &utsname()->nodename, - __OLD_UTS_LEN); - error |= __put_user(0, name->nodename + __OLD_UTS_LEN); - error |= __copy_to_user(&name->release, &utsname()->release, - __OLD_UTS_LEN); - error |= __put_user(0, name->release + __OLD_UTS_LEN); - error |= __copy_to_user(&name->version, &utsname()->version, - __OLD_UTS_LEN); - error |= __put_user(0, name->version + __OLD_UTS_LEN); - error |= __copy_to_user(&name->machine, &utsname()->machine, - __OLD_UTS_LEN); - error |= __put_user(0, name->machine + __OLD_UTS_LEN); + memcpy(&tmp.sysname, &utsname()->sysname, __OLD_UTS_LEN); + memcpy(&tmp.nodename, &utsname()->nodename, __OLD_UTS_LEN); + memcpy(&tmp.release, &utsname()->release, __OLD_UTS_LEN); + memcpy(&tmp.version, &utsname()->version, __OLD_UTS_LEN); + memcpy(&tmp.machine, &utsname()->machine, __OLD_UTS_LEN); up_read(&uts_sem); + if (copy_to_user(name, &tmp, sizeof(tmp))) + return -EFAULT; - if (!error && override_architecture(name)) - error = -EFAULT; - if (!error && override_release(name->release, sizeof(name->release))) - error = -EFAULT; - return error ? -EFAULT : 0; + if (override_architecture(name)) + return -EFAULT; + if (override_release(name->release, sizeof(name->release))) + return -EFAULT; + return 0; } #endif @@ -1224,17 +1216,18 @@ SYSCALL_DEFINE2(sethostname, char __user *, name, int, len) if (len < 0 || len > __NEW_UTS_LEN) return -EINVAL; - down_write(&uts_sem); errno = -EFAULT; if (!copy_from_user(tmp, name, len)) { - struct new_utsname *u = utsname(); + struct new_utsname *u; + down_write(&uts_sem); + u = utsname(); memcpy(u->nodename, tmp, len); memset(u->nodename + len, 0, sizeof(u->nodename) - len); errno = 0; uts_proc_notify(UTS_PROC_HOSTNAME); + up_write(&uts_sem); } - up_write(&uts_sem); return errno; } @@ -1242,8 +1235,9 @@ SYSCALL_DEFINE2(sethostname, char __user *, name, int, len) SYSCALL_DEFINE2(gethostname, char __user *, name, int, len) { - int i, errno; + int i; struct new_utsname *u; + char tmp[__NEW_UTS_LEN + 1]; if (len < 0) return -EINVAL; @@ -1252,11 +1246,11 @@ SYSCALL_DEFINE2(gethostname, char __user *, name, int, len) i = 1 + strlen(u->nodename); if (i > len) i = len; - errno = 0; - if (copy_to_user(name, u->nodename, i)) - errno = -EFAULT; + memcpy(tmp, u->nodename, i); up_read(&uts_sem); - return errno; + if (copy_to_user(name, tmp, i)) + return -EFAULT; + return 0; } #endif @@ -1275,17 +1269,18 @@ SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len) if (len < 0 || len > __NEW_UTS_LEN) return -EINVAL; - down_write(&uts_sem); errno = -EFAULT; if (!copy_from_user(tmp, name, len)) { - struct new_utsname *u = utsname(); + struct new_utsname *u; + down_write(&uts_sem); + u = utsname(); memcpy(u->domainname, tmp, len); memset(u->domainname + len, 0, sizeof(u->domainname) - len); errno = 0; uts_proc_notify(UTS_PROC_DOMAINNAME); + up_write(&uts_sem); } - up_write(&uts_sem); return errno; } diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c index c8eac43267e9..d2b3b2973456 100644 --- a/kernel/utsname_sysctl.c +++ b/kernel/utsname_sysctl.c @@ -17,7 +17,7 @@ #ifdef CONFIG_PROC_SYSCTL -static void *get_uts(struct ctl_table *table, int write) +static void *get_uts(struct ctl_table *table) { char *which = table->data; struct uts_namespace *uts_ns; @@ -25,21 +25,9 @@ static void *get_uts(struct ctl_table *table, int write) uts_ns = current->nsproxy->uts_ns; which = (which - (char *)&init_uts_ns) + (char *)uts_ns; - if (!write) - down_read(&uts_sem); - else - down_write(&uts_sem); return which; } -static void put_uts(struct ctl_table *table, int write, void *which) -{ - if (!write) - up_read(&uts_sem); - else - up_write(&uts_sem); -} - /* * Special case of dostring for the UTS structure. This has locks * to observe. Should this be in kernel/sys.c ???? @@ -49,13 +37,34 @@ static int proc_do_uts_string(struct ctl_table *table, int write, { struct ctl_table uts_table; int r; + char tmp_data[__NEW_UTS_LEN + 1]; + memcpy(&uts_table, table, sizeof(uts_table)); - uts_table.data = get_uts(table, write); + uts_table.data = tmp_data; + + /* + * Buffer the value in tmp_data so that proc_dostring() can be called + * without holding any locks. + * We also need to read the original value in the write==1 case to + * support partial writes. + */ + down_read(&uts_sem); + memcpy(tmp_data, get_uts(table), sizeof(tmp_data)); + up_read(&uts_sem); r = proc_dostring(&uts_table, write, buffer, lenp, ppos); - put_uts(table, write, uts_table.data); - if (write) + if (write) { + /* + * Write back the new value. + * Note that, since we dropped uts_sem, the result can + * theoretically be incorrect if there are two parallel writes + * at non-zero offsets to the same sysctl. + */ + down_write(&uts_sem); + memcpy(get_uts(table), tmp_data, sizeof(tmp_data)); + up_write(&uts_sem); proc_sys_poll_notify(table->poll); + } return r; } -- cgit v1.2.3 From 3d4c43c8f0fb00ad5f58c06b382dc2dc769a63e1 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Mon, 25 Jun 2018 18:34:19 +0200 Subject: userns: move user access out of the mutex commit 5820f140edef111a9ea2ef414ab2428b8cb805b1 upstream. The old code would hold the userns_state_mutex indefinitely if memdup_user_nul stalled due to e.g. a userfault region. Prevent that by moving the memdup_user_nul in front of the mutex_lock(). Note: This changes the error precedence of invalid buf/count/*ppos vs map already written / capabilities missing. Fixes: 22d917d80e84 ("userns: Rework the user_namespace adding uid/gid...") Cc: stable@vger.kernel.org Signed-off-by: Jann Horn Acked-by: Christian Brauner Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman Signed-off-by: Greg Kroah-Hartman --- kernel/user_namespace.c | 39 +++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 88fefa68c516..a965df4b54f5 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -602,9 +602,26 @@ static ssize_t map_write(struct file *file, const char __user *buf, struct uid_gid_map new_map; unsigned idx; struct uid_gid_extent *extent = NULL; - unsigned long page = 0; + unsigned long page; char *kbuf, *pos, *next_line; - ssize_t ret = -EINVAL; + ssize_t ret; + + /* Only allow < page size writes at the beginning of the file */ + if ((*ppos != 0) || (count >= PAGE_SIZE)) + return -EINVAL; + + /* Get a buffer */ + page = __get_free_page(GFP_TEMPORARY); + kbuf = (char *) page; + if (!page) + return -ENOMEM; + + /* Slurp in the user data */ + if (copy_from_user(kbuf, buf, count)) { + free_page(page); + return -EFAULT; + } + kbuf[count] = '\0'; /* * The userns_state_mutex serializes all writes to any given map. @@ -638,24 +655,6 @@ static ssize_t map_write(struct file *file, const char __user *buf, if (cap_valid(cap_setid) && !file_ns_capable(file, ns, CAP_SYS_ADMIN)) goto out; - /* Get a buffer */ - ret = -ENOMEM; - page = __get_free_page(GFP_TEMPORARY); - kbuf = (char *) page; - if (!page) - goto out; - - /* Only allow < page size writes at the beginning of the file */ - ret = -EINVAL; - if ((*ppos != 0) || (count >= PAGE_SIZE)) - goto out; - - /* Slurp in the user data */ - ret = -EFAULT; - if (copy_from_user(kbuf, buf, count)) - goto out; - kbuf[count] = '\0'; - /* Parse the user data */ ret = -EINVAL; pos = kbuf; -- cgit v1.2.3 From 20da15a755e8c68194ed777813277daf4931147e Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Tue, 12 Jun 2018 20:49:45 +0200 Subject: ubifs: Fix memory leak in lprobs self-check commit eef19816ada3abd56d9f20c88794cc2fea83ebb2 upstream. Allocate the buffer after we return early. Otherwise memory is being leaked. Cc: Fixes: 1e51764a3c2a ("UBIFS: add new flash file system") Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- fs/ubifs/lprops.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c index a0011aa3a779..f43f162e36f4 100644 --- a/fs/ubifs/lprops.c +++ b/fs/ubifs/lprops.c @@ -1091,10 +1091,6 @@ static int scan_check_cb(struct ubifs_info *c, } } - buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); - if (!buf) - return -ENOMEM; - /* * After an unclean unmount, empty and freeable LEBs * may contain garbage - do not scan them. @@ -1113,6 +1109,10 @@ static int scan_check_cb(struct ubifs_info *c, return LPT_SCAN_CONTINUE; } + buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); + if (!buf) + return -ENOMEM; + sleb = ubifs_scan(c, lnum, 0, buf, 0); if (IS_ERR(sleb)) { ret = PTR_ERR(sleb); -- cgit v1.2.3 From d90fea0256ce702abdfcbc0c0ff888002a13599c Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Sun, 1 Jul 2018 23:20:50 +0200 Subject: Revert "UBIFS: Fix potential integer overflow in allocation" commit 08acbdd6fd736b90f8d725da5a0de4de2dd6de62 upstream. This reverts commit 353748a359f1821ee934afc579cf04572406b420. It bypassed the linux-mtd review process and fixes the issue not as it should. Cc: Kees Cook Cc: Silvio Cesare Cc: stable@vger.kernel.org Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- fs/ubifs/journal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index 22dba8837a86..0b9da5b6e0f9 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -1107,7 +1107,7 @@ static int recomp_data_node(const struct ubifs_info *c, int err, len, compr_type, out_len; out_len = le32_to_cpu(dn->size); - buf = kmalloc_array(out_len, WORST_COMPR_FACTOR, GFP_NOFS); + buf = kmalloc(out_len * WORST_COMPR_FACTOR, GFP_NOFS); if (!buf) return -ENOMEM; -- cgit v1.2.3 From 2e052c5081e5f56b46ad28ce57b6ea721b4887b1 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Sun, 1 Jul 2018 23:20:51 +0200 Subject: ubifs: Check data node size before truncate commit 95a22d2084d72ea067d8323cc85677dba5d97cae upstream. Check whether the size is within bounds before using it. If the size is not correct, abort and dump the bad data node. Cc: Kees Cook Cc: Silvio Cesare Cc: stable@vger.kernel.org Fixes: 1e51764a3c2ac ("UBIFS: add new flash file system") Reported-by: Silvio Cesare Signed-off-by: Richard Weinberger Reviewed-by: Kees Cook Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- fs/ubifs/journal.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index 0b9da5b6e0f9..9887c03a631b 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -1186,7 +1186,16 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode, else if (err) goto out_free; else { - if (le32_to_cpu(dn->size) <= dlen) + int dn_len = le32_to_cpu(dn->size); + + if (dn_len <= 0 || dn_len > UBIFS_BLOCK_SIZE) { + ubifs_err(c, "bad data node (block %u, inode %lu)", + blk, inode->i_ino); + ubifs_dump_node(c, dn); + goto out_free; + } + + if (dn_len <= dlen) dlen = 0; /* Nothing to do */ else { int compr_type = le16_to_cpu(dn->compr_type); -- cgit v1.2.3 From dfd7543a6734b982b007734826add041158618ef Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Tue, 12 Jun 2018 00:52:28 +0200 Subject: ubifs: Fix synced_i_size calculation for xattr inodes commit 59965593205fa4044850d35ee3557cf0b7edcd14 upstream. In ubifs_jnl_update() we sync parent and child inodes to the flash, in case of xattrs, the parent inode (AKA host inode) has a non-zero data_len. Therefore we need to adjust synced_i_size too. This issue was reported by ubifs self tests unter a xattr related work load. UBIFS error (ubi0:0 pid 1896): dbg_check_synced_i_size: ui_size is 4, synced_i_size is 0, but inode is clean UBIFS error (ubi0:0 pid 1896): dbg_check_synced_i_size: i_ino 65, i_mode 0x81a4, i_size 4 Cc: Fixes: 1e51764a3c2a ("UBIFS: add new flash file system") Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- fs/ubifs/journal.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index 9887c03a631b..539fa934ed93 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -661,6 +661,11 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, spin_lock(&ui->ui_lock); ui->synced_i_size = ui->ui_size; spin_unlock(&ui->ui_lock); + if (xent) { + spin_lock(&host_ui->ui_lock); + host_ui->synced_i_size = host_ui->ui_size; + spin_unlock(&host_ui->ui_lock); + } mark_inode_clean(c, ui); mark_inode_clean(c, host_ui); return 0; -- cgit v1.2.3 From 57d78f3e00b9877cdeea6b44a6f0c070e7b0d3a7 Mon Sep 17 00:00:00 2001 From: Vignesh R Date: Mon, 11 Jun 2018 11:39:56 +0530 Subject: pwm: tiehrpwm: Fix disabling of output of PWMs commit 38dabd91ff0bde33352ca3cc65ef515599b77a05 upstream. pwm-tiehrpwm driver disables PWM output by putting it in low output state via active AQCSFRC register in ehrpwm_pwm_disable(). But, the AQCSFRC shadow register is not updated. Therefore, when shadow AQCSFRC register is re-enabled in ehrpwm_pwm_enable() (say to enable second PWM output), previous settings are lost as shadow register value is loaded into active register. This results in things like PWMA getting enabled automatically, when PWMB is enabled and vice versa. Fix this by updating AQCSFRC shadow register as well during ehrpwm_pwm_disable(). Fixes: 19891b20e7c2 ("pwm: pwm-tiehrpwm: PWM driver support for EHRPWM") Cc: stable@vger.kernel.org Signed-off-by: Vignesh R Signed-off-by: Thierry Reding Signed-off-by: Greg Kroah-Hartman --- drivers/pwm/pwm-tiehrpwm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pwm/pwm-tiehrpwm.c b/drivers/pwm/pwm-tiehrpwm.c index 6a41e66015b6..062dff1c902d 100644 --- a/drivers/pwm/pwm-tiehrpwm.c +++ b/drivers/pwm/pwm-tiehrpwm.c @@ -384,6 +384,8 @@ static void ehrpwm_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm) aqcsfrc_mask = AQCSFRC_CSFA_MASK; } + /* Update shadow register first before modifying active register */ + ehrpwm_modify(pc->mmio_base, AQCSFRC, aqcsfrc_mask, aqcsfrc_val); /* * Changes to immediate action on Action Qualifier. This puts * Action Qualifier control on PWM output from next TBCLK -- cgit v1.2.3 From c602af2b76af159cc3ad0828d247484f99b4945c Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 25 Jul 2018 15:41:54 +0200 Subject: fb: fix lost console when the user unplugs a USB adapter commit 8c5b044299951acd91e830a688dd920477ea1eda upstream. I have a USB display adapter using the udlfb driver and I use it on an ARM board that doesn't have any graphics card. When I plug the adapter in, the console is properly displayed, however when I unplug and re-plug the adapter, the console is not displayed and I can't access it until I reboot the board. The reason is this: When the adapter is unplugged, dlfb_usb_disconnect calls unlink_framebuffer, then it waits until the reference count drops to zero and then it deallocates the framebuffer. However, the console that is attached to the framebuffer device keeps the reference count non-zero, so the framebuffer device is never destroyed. When the USB adapter is plugged again, it creates a new device /dev/fb1 and the console is not attached to it. This patch fixes the bug by unbinding the console from unlink_framebuffer. The code to unbind the console is moved from do_unregister_framebuffer to a function unbind_console. When the console is unbound, the reference count drops to zero and the udlfb driver frees the framebuffer. When the adapter is plugged back, a new framebuffer is created and the console is attached to it. Signed-off-by: Mikulas Patocka Cc: Dave Airlie Cc: Bernie Thompson Cc: Ladislav Michl Cc: stable@vger.kernel.org [b.zolnierkie: preserve old behavior for do_unregister_framebuffer()] Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/core/fbmem.c | 38 ++++++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index 0705d8883ede..8a29ec5992fd 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -1687,12 +1687,12 @@ static int do_register_framebuffer(struct fb_info *fb_info) return 0; } -static int do_unregister_framebuffer(struct fb_info *fb_info) +static int unbind_console(struct fb_info *fb_info) { struct fb_event event; - int i, ret = 0; + int ret; + int i = fb_info->node; - i = fb_info->node; if (i < 0 || i >= FB_MAX || registered_fb[i] != fb_info) return -EINVAL; @@ -1707,17 +1707,29 @@ static int do_unregister_framebuffer(struct fb_info *fb_info) unlock_fb_info(fb_info); console_unlock(); + return ret; +} + +static int __unlink_framebuffer(struct fb_info *fb_info); + +static int do_unregister_framebuffer(struct fb_info *fb_info) +{ + struct fb_event event; + int ret; + + ret = unbind_console(fb_info); + if (ret) return -EINVAL; pm_vt_switch_unregister(fb_info->dev); - unlink_framebuffer(fb_info); + __unlink_framebuffer(fb_info); if (fb_info->pixmap.addr && (fb_info->pixmap.flags & FB_PIXMAP_DEFAULT)) kfree(fb_info->pixmap.addr); fb_destroy_modelist(&fb_info->modelist); - registered_fb[i] = NULL; + registered_fb[fb_info->node] = NULL; num_registered_fb--; fb_cleanup_device(fb_info); event.info = fb_info; @@ -1730,7 +1742,7 @@ static int do_unregister_framebuffer(struct fb_info *fb_info) return 0; } -int unlink_framebuffer(struct fb_info *fb_info) +static int __unlink_framebuffer(struct fb_info *fb_info) { int i; @@ -1742,6 +1754,20 @@ int unlink_framebuffer(struct fb_info *fb_info) device_destroy(fb_class, MKDEV(FB_MAJOR, i)); fb_info->dev = NULL; } + + return 0; +} + +int unlink_framebuffer(struct fb_info *fb_info) +{ + int ret; + + ret = __unlink_framebuffer(fb_info); + if (ret) + return ret; + + unbind_console(fb_info); + return 0; } EXPORT_SYMBOL(unlink_framebuffer); -- cgit v1.2.3 From 3130702ac3a6c71a1196dfa0560d6ec9e54bf7aa Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 25 Jul 2018 15:41:55 +0200 Subject: udlfb: set optimal write delay commit bb24153a3f13dd0dbc1f8055ad97fe346d598f66 upstream. The default delay 5 jiffies is too much when the kernel is compiled with HZ=100 - it results in jumpy cursor in Xwindow. In order to find out the optimal delay, I benchmarked the driver on 1280x720x30fps video. I found out that with HZ=1000, 10ms is acceptable, but with HZ=250 or HZ=300, we need 4ms, so that the video is played without any frame skips. This patch changes the delay to this value. Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Greg Kroah-Hartman --- include/video/udlfb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/video/udlfb.h b/include/video/udlfb.h index f9466fa54ba4..2ad9a6d37ff4 100644 --- a/include/video/udlfb.h +++ b/include/video/udlfb.h @@ -87,7 +87,7 @@ struct dlfb_data { #define MIN_RAW_PIX_BYTES 2 #define MIN_RAW_CMD_BYTES (RAW_HEADER_BYTES + MIN_RAW_PIX_BYTES) -#define DL_DEFIO_WRITE_DELAY 5 /* fb_deferred_io.delay in jiffies */ +#define DL_DEFIO_WRITE_DELAY msecs_to_jiffies(HZ <= 300 ? 4 : 10) /* optimal value for 720p video */ #define DL_DEFIO_WRITE_DISABLE (HZ*60) /* "disable" with long delay */ /* remove these once align.h patch is taken into kernel */ -- cgit v1.2.3 From f7f501c753f36021ffea48bf8b5b50992cb2bdac Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 7 Jun 2018 13:43:48 +0200 Subject: getxattr: use correct xattr length commit 82c9a927bc5df6e06b72d206d24a9d10cced4eb5 upstream. When running in a container with a user namespace, if you call getxattr with name = "system.posix_acl_access" and size % 8 != 4, then getxattr silently skips the user namespace fixup that it normally does resulting in un-fixed-up data being returned. This is caused by posix_acl_fix_xattr_to_user() being passed the total buffer size and not the actual size of the xattr as returned by vfs_getxattr(). This commit passes the actual length of the xattr as returned by vfs_getxattr() down. A reproducer for the issue is: touch acl_posix setfacl -m user:0:rwx acl_posix and the compile: #define _GNU_SOURCE #include #include #include #include #include #include #include /* Run in user namespace with nsuid 0 mapped to uid != 0 on the host. */ int main(int argc, void **argv) { ssize_t ret1, ret2; char buf1[128], buf2[132]; int fret = EXIT_SUCCESS; char *file; if (argc < 2) { fprintf(stderr, "Please specify a file with " "\"system.posix_acl_access\" permissions set\n"); _exit(EXIT_FAILURE); } file = argv[1]; ret1 = getxattr(file, "system.posix_acl_access", buf1, sizeof(buf1)); if (ret1 < 0) { fprintf(stderr, "%s - Failed to retrieve " "\"system.posix_acl_access\" " "from \"%s\"\n", strerror(errno), file); _exit(EXIT_FAILURE); } ret2 = getxattr(file, "system.posix_acl_access", buf2, sizeof(buf2)); if (ret2 < 0) { fprintf(stderr, "%s - Failed to retrieve " "\"system.posix_acl_access\" " "from \"%s\"\n", strerror(errno), file); _exit(EXIT_FAILURE); } if (ret1 != ret2) { fprintf(stderr, "The value of \"system.posix_acl_" "access\" for file \"%s\" changed " "between two successive calls\n", file); _exit(EXIT_FAILURE); } for (ssize_t i = 0; i < ret2; i++) { if (buf1[i] == buf2[i]) continue; fprintf(stderr, "Unexpected different in byte %zd: " "%02x != %02x\n", i, buf1[i], buf2[i]); fret = EXIT_FAILURE; } if (fret == EXIT_SUCCESS) fprintf(stderr, "Test passed\n"); else fprintf(stderr, "Test failed\n"); _exit(fret); } and run: ./tester acl_posix On a non-fixed up kernel this should return something like: root@c1:/# ./t Unexpected different in byte 16: ffffffa0 != 00 Unexpected different in byte 17: ffffff86 != 00 Unexpected different in byte 18: 01 != 00 and on a fixed kernel: root@c1:~# ./t Test passed Cc: stable@vger.kernel.org Fixes: 2f6f0654ab61 ("userns: Convert vfs posix_acl support to use kuids and kgids") Link: https://bugzilla.kernel.org/show_bug.cgi?id=199945 Reported-by: Colin Watson Signed-off-by: Christian Brauner Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman Signed-off-by: Greg Kroah-Hartman --- fs/xattr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xattr.c b/fs/xattr.c index 76f01bf4b048..09441c396798 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -453,7 +453,7 @@ getxattr(struct dentry *d, const char __user *name, void __user *value, if (error > 0) { if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) || (strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0)) - posix_acl_fix_xattr_to_user(kvalue, size); + posix_acl_fix_xattr_to_user(kvalue, error); if (size && copy_to_user(value, kvalue, error)) error = -EFAULT; } else if (error == -ERANGE && size >= XATTR_SIZE_MAX) { -- cgit v1.2.3 From 95861df6188b3960e1d80292c905e3d040fd619c Mon Sep 17 00:00:00 2001 From: Shan Hai Date: Thu, 23 Aug 2018 02:02:56 +0800 Subject: bcache: release dc->writeback_lock properly in bch_writeback_thread() commit 3943b040f11ed0cc6d4585fd286a623ca8634547 upstream. The writeback thread would exit with a lock held when the cache device is detached via sysfs interface, fix it by releasing the held lock before exiting the while-loop. Fixes: fadd94e05c02 (bcache: quit dc->writeback_thread when BCACHE_DEV_DETACHING is set) Signed-off-by: Shan Hai Signed-off-by: Coly Li Tested-by: Shenghui Wang Cc: stable@vger.kernel.org #4.17+ Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/writeback.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index f2c0000de613..95a6ae053714 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -462,8 +462,10 @@ static int bch_writeback_thread(void *arg) * data on cache. BCACHE_DEV_DETACHING flag is set in * bch_cached_dev_detach(). */ - if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)) + if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)) { + up_write(&dc->writeback_lock); break; + } } up_write(&dc->writeback_lock); -- cgit v1.2.3 From ef699421145eb85874c0ad6ca82575062b5ead34 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 14 Aug 2018 11:46:08 +0300 Subject: perf auxtrace: Fix queue resize commit 99cbbe56eb8bede625f410ab62ba34673ffa7d21 upstream. When the number of queues grows beyond 32, the array of queues is resized but not all members were being copied. Fix by also copying 'tid', 'cpu' and 'set'. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: stable@vger.kernel.org Fixes: e502789302a6e ("perf auxtrace: Add helpers for queuing AUX area tracing data") Link: http://lkml.kernel.org/r/20180814084608.6563-1-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/auxtrace.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 7f10430af39c..58426e7d320d 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -186,6 +186,9 @@ static int auxtrace_queues__grow(struct auxtrace_queues *queues, for (i = 0; i < queues->nr_queues; i++) { list_splice_tail(&queues->queue_array[i].head, &queue_array[i].head); + queue_array[i].tid = queues->queue_array[i].tid; + queue_array[i].cpu = queues->queue_array[i].cpu; + queue_array[i].set = queues->queue_array[i].set; queue_array[i].priv = queues->queue_array[i].priv; } -- cgit v1.2.3 From 59825a7ef329a4fb7ab24869a058af540f1840d5 Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Tue, 31 Jul 2018 01:37:31 +0000 Subject: fs/quota: Fix spectre gadget in do_quotactl commit 7b6924d94a60c6b8c1279ca003e8744e6cd9e8b1 upstream. 'type' is user-controlled, so sanitize it after the bounds check to avoid using it in speculative execution. This covers the following potential gadgets detected with the help of smatch: * fs/ext4/super.c:5741 ext4_quota_read() warn: potential spectre issue 'sb_dqopt(sb)->files' [r] * fs/ext4/super.c:5778 ext4_quota_write() warn: potential spectre issue 'sb_dqopt(sb)->files' [r] * fs/f2fs/super.c:1552 f2fs_quota_read() warn: potential spectre issue 'sb_dqopt(sb)->files' [r] * fs/f2fs/super.c:1608 f2fs_quota_write() warn: potential spectre issue 'sb_dqopt(sb)->files' [r] * fs/quota/dquot.c:412 mark_info_dirty() warn: potential spectre issue 'sb_dqopt(sb)->info' [w] * fs/quota/dquot.c:933 dqinit_needed() warn: potential spectre issue 'dquots' [r] * fs/quota/dquot.c:2112 dquot_commit_info() warn: potential spectre issue 'dqopt->ops' [r] * fs/quota/dquot.c:2362 vfs_load_quota_inode() warn: potential spectre issue 'dqopt->files' [w] (local cap) * fs/quota/dquot.c:2369 vfs_load_quota_inode() warn: potential spectre issue 'dqopt->ops' [w] (local cap) * fs/quota/dquot.c:2370 vfs_load_quota_inode() warn: potential spectre issue 'dqopt->info' [w] (local cap) * fs/quota/quota.c:110 quota_getfmt() warn: potential spectre issue 'sb_dqopt(sb)->info' [r] * fs/quota/quota_v2.c:84 v2_check_quota_file() warn: potential spectre issue 'quota_magics' [w] * fs/quota/quota_v2.c:85 v2_check_quota_file() warn: potential spectre issue 'quota_versions' [w] * fs/quota/quota_v2.c:96 v2_read_file_info() warn: potential spectre issue 'dqopt->info' [r] * fs/quota/quota_v2.c:172 v2_write_file_info() warn: potential spectre issue 'dqopt->info' [r] Additionally, a quick inspection indicates there are array accesses with 'type' in quota_on() and quota_off() functions which are also addressed by this. Cc: Josh Poimboeuf Cc: stable@vger.kernel.org Signed-off-by: Jeremy Cline Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/quota/quota.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/quota/quota.c b/fs/quota/quota.c index 3746367098fd..bb0d643481c8 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -17,6 +17,7 @@ #include #include #include +#include static int check_quotactl_permission(struct super_block *sb, int type, int cmd, qid_t id) @@ -644,6 +645,7 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, if (type >= (XQM_COMMAND(cmd) ? XQM_MAXQUOTAS : MAXQUOTAS)) return -EINVAL; + type = array_index_nospec(type, MAXQUOTAS); /* * Quota not supported on this fs? Check this before s_quota_types * since they needn't be set if quota is not supported at all. -- cgit v1.2.3 From 1fc5fa527625d2cbddf9004b26e020ecc83d272d Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 24 Oct 2016 15:27:59 +1000 Subject: x86/io: add interface to reserve io memtype for a resource range. (v1.1) commit 8ef4227615e158faa4ee85a1d6466782f7e22f2f upstream. A recent change to the mm code in: 87744ab3832b mm: fix cache mode tracking in vm_insert_mixed() started enforcing checking the memory type against the registered list for amixed pfn insertion mappings. It happens that the drm drivers for a number of gpus relied on this being broken. Currently the driver only inserted VRAM mappings into the tracking table when they came from the kernel, and userspace mappings never landed in the table. This led to a regression where all the mapping end up as UC instead of WC now. I've considered a number of solutions but since this needs to be fixed in fixes and not next, and some of the solutions were going to introduce overhead that hadn't been there before I didn't consider them viable at this stage. These mainly concerned hooking into the TTM io reserve APIs, but these API have a bunch of fast paths I didn't want to unwind to add this to. The solution I've decided on is to add a new API like the arch_phys_wc APIs (these would have worked but wc_del didn't take a range), and use them from the drivers to add a WC compatible mapping to the table for all VRAM on those GPUs. This means we can then create userspace mapping that won't get degraded to UC. v1.1: use CONFIG_X86_PAT + add some comments in io.h Cc: Toshi Kani Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Andy Lutomirski Cc: Denys Vlasenko Cc: Brian Gerst Cc: x86@kernel.org Cc: mcgrof@suse.com Cc: Dan Williams Acked-by: Ingo Molnar Reviewed-by: Thomas Gleixner Signed-off-by: Dave Airlie Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/io.h | 6 ++++++ arch/x86/mm/pat.c | 14 ++++++++++++++ include/linux/io.h | 22 ++++++++++++++++++++++ 3 files changed, 42 insertions(+) diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 9016b4b70375..6c5020163db0 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -351,4 +351,10 @@ extern void arch_phys_wc_del(int handle); #define arch_phys_wc_add arch_phys_wc_add #endif +#ifdef CONFIG_X86_PAT +extern int arch_io_reserve_memtype_wc(resource_size_t start, resource_size_t size); +extern void arch_io_free_memtype_wc(resource_size_t start, resource_size_t size); +#define arch_io_reserve_memtype_wc arch_io_reserve_memtype_wc +#endif + #endif /* _ASM_X86_IO_H */ diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 3146b1da6d72..5ff0cb74de55 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -726,6 +726,20 @@ void io_free_memtype(resource_size_t start, resource_size_t end) free_memtype(start, end); } +int arch_io_reserve_memtype_wc(resource_size_t start, resource_size_t size) +{ + enum page_cache_mode type = _PAGE_CACHE_MODE_WC; + + return io_reserve_memtype(start, start + size, &type); +} +EXPORT_SYMBOL(arch_io_reserve_memtype_wc); + +void arch_io_free_memtype_wc(resource_size_t start, resource_size_t size) +{ + io_free_memtype(start, start + size); +} +EXPORT_SYMBOL(arch_io_free_memtype_wc); + pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot) { diff --git a/include/linux/io.h b/include/linux/io.h index de64c1e53612..8ab45611fc35 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -154,4 +154,26 @@ enum { void *memremap(resource_size_t offset, size_t size, unsigned long flags); void memunmap(void *addr); +/* + * On x86 PAT systems we have memory tracking that keeps track of + * the allowed mappings on memory ranges. This tracking works for + * all the in-kernel mapping APIs (ioremap*), but where the user + * wishes to map a range from a physical device into user memory + * the tracking won't be updated. This API is to be used by + * drivers which remap physical device pages into userspace, + * and wants to make sure they are mapped WC and not UC. + */ +#ifndef arch_io_reserve_memtype_wc +static inline int arch_io_reserve_memtype_wc(resource_size_t base, + resource_size_t size) +{ + return 0; +} + +static inline void arch_io_free_memtype_wc(resource_size_t base, + resource_size_t size) +{ +} +#endif + #endif /* _LINUX_IO_H */ -- cgit v1.2.3 From c59fdc4cfbda52ce081c59540762185d765c3369 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 24 Oct 2016 15:37:48 +1000 Subject: drm/drivers: add support for using the arch wc mapping API. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7cf321d118a825c1541b43ca45294126fd474efa upstream. This fixes a regression in all these drivers since the cache mode tracking was fixed for mixed mappings. It uses the new arch API to add the VRAM range to the PAT mapping tracking tables. Fixes: 87744ab3832 (mm: fix cache mode tracking in vm_insert_mixed()) Reviewed-by: Christian König . Signed-off-by: Dave Airlie Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 5 +++++ drivers/gpu/drm/ast/ast_ttm.c | 6 ++++++ drivers/gpu/drm/cirrus/cirrus_ttm.c | 7 +++++++ drivers/gpu/drm/mgag200/mgag200_ttm.c | 7 +++++++ drivers/gpu/drm/nouveau/nouveau_ttm.c | 8 ++++++++ drivers/gpu/drm/radeon/radeon_object.c | 5 +++++ 6 files changed, 38 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 73628c7599e7..3aca9a9011fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -492,6 +492,10 @@ void amdgpu_bo_force_delete(struct amdgpu_device *adev) int amdgpu_bo_init(struct amdgpu_device *adev) { + /* reserve PAT memory space to WC for VRAM */ + arch_io_reserve_memtype_wc(adev->mc.aper_base, + adev->mc.aper_size); + /* Add an MTRR for the VRAM */ adev->mc.vram_mtrr = arch_phys_wc_add(adev->mc.aper_base, adev->mc.aper_size); @@ -507,6 +511,7 @@ void amdgpu_bo_fini(struct amdgpu_device *adev) { amdgpu_ttm_fini(adev); arch_phys_wc_del(adev->mc.vram_mtrr); + arch_io_free_memtype_wc(adev->mc.aper_base, adev->mc.aper_size); } int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo, diff --git a/drivers/gpu/drm/ast/ast_ttm.c b/drivers/gpu/drm/ast/ast_ttm.c index 08f82eae6939..ac12f74e6b32 100644 --- a/drivers/gpu/drm/ast/ast_ttm.c +++ b/drivers/gpu/drm/ast/ast_ttm.c @@ -275,6 +275,8 @@ int ast_mm_init(struct ast_private *ast) return ret; } + arch_io_reserve_memtype_wc(pci_resource_start(dev->pdev, 0), + pci_resource_len(dev->pdev, 0)); ast->fb_mtrr = arch_phys_wc_add(pci_resource_start(dev->pdev, 0), pci_resource_len(dev->pdev, 0)); @@ -283,11 +285,15 @@ int ast_mm_init(struct ast_private *ast) void ast_mm_fini(struct ast_private *ast) { + struct drm_device *dev = ast->dev; + ttm_bo_device_release(&ast->ttm.bdev); ast_ttm_global_release(ast); arch_phys_wc_del(ast->fb_mtrr); + arch_io_free_memtype_wc(pci_resource_start(dev->pdev, 0), + pci_resource_len(dev->pdev, 0)); } void ast_ttm_placement(struct ast_bo *bo, int domain) diff --git a/drivers/gpu/drm/cirrus/cirrus_ttm.c b/drivers/gpu/drm/cirrus/cirrus_ttm.c index dfffd528517a..393967025043 100644 --- a/drivers/gpu/drm/cirrus/cirrus_ttm.c +++ b/drivers/gpu/drm/cirrus/cirrus_ttm.c @@ -275,6 +275,9 @@ int cirrus_mm_init(struct cirrus_device *cirrus) return ret; } + arch_io_reserve_memtype_wc(pci_resource_start(dev->pdev, 0), + pci_resource_len(dev->pdev, 0)); + cirrus->fb_mtrr = arch_phys_wc_add(pci_resource_start(dev->pdev, 0), pci_resource_len(dev->pdev, 0)); @@ -284,6 +287,8 @@ int cirrus_mm_init(struct cirrus_device *cirrus) void cirrus_mm_fini(struct cirrus_device *cirrus) { + struct drm_device *dev = cirrus->dev; + if (!cirrus->mm_inited) return; @@ -293,6 +298,8 @@ void cirrus_mm_fini(struct cirrus_device *cirrus) arch_phys_wc_del(cirrus->fb_mtrr); cirrus->fb_mtrr = 0; + arch_io_free_memtype_wc(pci_resource_start(dev->pdev, 0), + pci_resource_len(dev->pdev, 0)); } void cirrus_ttm_placement(struct cirrus_bo *bo, int domain) diff --git a/drivers/gpu/drm/mgag200/mgag200_ttm.c b/drivers/gpu/drm/mgag200/mgag200_ttm.c index 05108b505fbf..d9df8d32fc35 100644 --- a/drivers/gpu/drm/mgag200/mgag200_ttm.c +++ b/drivers/gpu/drm/mgag200/mgag200_ttm.c @@ -274,6 +274,9 @@ int mgag200_mm_init(struct mga_device *mdev) return ret; } + arch_io_reserve_memtype_wc(pci_resource_start(dev->pdev, 0), + pci_resource_len(dev->pdev, 0)); + mdev->fb_mtrr = arch_phys_wc_add(pci_resource_start(dev->pdev, 0), pci_resource_len(dev->pdev, 0)); @@ -282,10 +285,14 @@ int mgag200_mm_init(struct mga_device *mdev) void mgag200_mm_fini(struct mga_device *mdev) { + struct drm_device *dev = mdev->dev; + ttm_bo_device_release(&mdev->ttm.bdev); mgag200_ttm_global_release(mdev); + arch_io_free_memtype_wc(pci_resource_start(dev->pdev, 0), + pci_resource_len(dev->pdev, 0)); arch_phys_wc_del(mdev->fb_mtrr); mdev->fb_mtrr = 0; } diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c b/drivers/gpu/drm/nouveau/nouveau_ttm.c index d2e7d209f651..9835327a3214 100644 --- a/drivers/gpu/drm/nouveau/nouveau_ttm.c +++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c @@ -397,6 +397,9 @@ nouveau_ttm_init(struct nouveau_drm *drm) /* VRAM init */ drm->gem.vram_available = drm->device.info.ram_user; + arch_io_reserve_memtype_wc(device->func->resource_addr(device, 1), + device->func->resource_size(device, 1)); + ret = ttm_bo_init_mm(&drm->ttm.bdev, TTM_PL_VRAM, drm->gem.vram_available >> PAGE_SHIFT); if (ret) { @@ -429,6 +432,8 @@ nouveau_ttm_init(struct nouveau_drm *drm) void nouveau_ttm_fini(struct nouveau_drm *drm) { + struct nvkm_device *device = nvxx_device(&drm->device); + ttm_bo_clean_mm(&drm->ttm.bdev, TTM_PL_VRAM); ttm_bo_clean_mm(&drm->ttm.bdev, TTM_PL_TT); @@ -438,4 +443,7 @@ nouveau_ttm_fini(struct nouveau_drm *drm) arch_phys_wc_del(drm->ttm.mtrr); drm->ttm.mtrr = 0; + arch_io_free_memtype_wc(device->func->resource_addr(device, 1), + device->func->resource_size(device, 1)); + } diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 83aee9e814ba..18ec38d0d3f5 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -447,6 +447,10 @@ void radeon_bo_force_delete(struct radeon_device *rdev) int radeon_bo_init(struct radeon_device *rdev) { + /* reserve PAT memory space to WC for VRAM */ + arch_io_reserve_memtype_wc(rdev->mc.aper_base, + rdev->mc.aper_size); + /* Add an MTRR for the VRAM */ if (!rdev->fastfb_working) { rdev->mc.vram_mtrr = arch_phys_wc_add(rdev->mc.aper_base, @@ -464,6 +468,7 @@ void radeon_bo_fini(struct radeon_device *rdev) { radeon_ttm_fini(rdev); arch_phys_wc_del(rdev->mc.vram_mtrr); + arch_io_free_memtype_wc(rdev->mc.aper_base, rdev->mc.aper_size); } /* Returns how many bytes TTM can move per IB. -- cgit v1.2.3 From fdf53713aebb1e8ccbfcadade2b8449e62394547 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 9 Sep 2018 20:04:37 +0200 Subject: Linux 4.4.155 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b184286cf7e6..2d9f89ec8397 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 154 +SUBLEVEL = 155 EXTRAVERSION = NAME = Blurry Fish Butt -- cgit v1.2.3