From e45a502bdeae5a075257c4f061d1ff4ff0821354 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 13 Oct 2016 13:07:36 -0700 Subject: mm: remove gup_flags FOLL_WRITE games from __get_user_pages() [ Upstream commit 19be0eaffa3ac7d8eb6784ad9bdbc7d67ed8e619 ] This is an ancient bug that was actually attempted to be fixed once (badly) by me eleven years ago in commit 4ceb5db9757a ("Fix get_user_pages() race for write access") but that was then undone due to problems on s390 by commit f33ea7f404e5 ("fix get_user_pages bug"). In the meantime, the s390 situation has long been fixed, and we can now fix it by checking the pte_dirty() bit properly (and do it better). The s390 dirty bit was implemented in abf09bed3cce ("s390/mm: implement software dirty bits") which made it into v3.9. Earlier kernels will have to look at the page state itself. Also, the VM has become more scalable, and what used a purely theoretical race back then has become easier to trigger. To fix it, we introduce a new internal FOLL_COW flag to mark the "yes, we already did a COW" rather than play racy games with FOLL_WRITE that is very fundamental, and then use the pte dirty flag to validate that the FOLL_COW flag is still valid. Reported-and-tested-by: Phil "not Paul" Oester Acked-by: Hugh Dickins Reviewed-by: Michal Hocko Cc: Andy Lutomirski Cc: Kees Cook Cc: Oleg Nesterov Cc: Willy Tarreau Cc: Nick Piggin Cc: Greg Thelen Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- include/linux/mm.h | 1 + mm/gup.c | 14 ++++++++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 9eef3a1f2291..db853dee1ec5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2029,6 +2029,7 @@ static inline struct page *follow_page(struct vm_area_struct *vma, #define FOLL_NUMA 0x200 /* force NUMA hinting page fault */ #define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */ #define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */ +#define FOLL_COW 0x4000 /* internal GUP flag */ typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, void *data); diff --git a/mm/gup.c b/mm/gup.c index 377a5a796242..3cec4df06e6b 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -32,6 +32,16 @@ static struct page *no_page_table(struct vm_area_struct *vma, return NULL; } +/* + * FOLL_FORCE can write to even unwritable pte's, but only + * after we've gone through a COW cycle and they are dirty. + */ +static inline bool can_follow_write_pte(pte_t pte, unsigned int flags) +{ + return pte_write(pte) || + ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte)); +} + static struct page *follow_page_pte(struct vm_area_struct *vma, unsigned long address, pmd_t *pmd, unsigned int flags) { @@ -66,7 +76,7 @@ retry: } if ((flags & FOLL_NUMA) && pte_numa(pte)) goto no_page; - if ((flags & FOLL_WRITE) && !pte_write(pte)) { + if ((flags & FOLL_WRITE) && !can_follow_write_pte(pte, flags)) { pte_unmap_unlock(ptep, ptl); return NULL; } @@ -315,7 +325,7 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma, * reCOWed by userspace write). */ if ((ret & VM_FAULT_WRITE) && !(vma->vm_flags & VM_WRITE)) - *flags &= ~FOLL_WRITE; + *flags |= FOLL_COW; return 0; } -- cgit v1.2.3 From bb2f5ed8935a47f78a70ac48e58e993de46430e0 Mon Sep 17 00:00:00 2001 From: Andrew Cooper Date: Wed, 3 Jun 2015 10:31:14 +0100 Subject: x86/cpu: Fix SMAP check in PVOPS environments [ Upstream commit 581b7f158fe0383b492acd1ce3fb4e99d4e57808 ] There appears to be no formal statement of what pv_irq_ops.save_fl() is supposed to return precisely. Native returns the full flags, while lguest and Xen only return the Interrupt Flag, and both have comments by the implementations stating that only the Interrupt Flag is looked at. This may have been true when initially implemented, but no longer is. To make matters worse, the Xen PVOP leaves the upper bits undefined, making the BUG_ON() undefined behaviour. Experimentally, this now trips for 32bit PV guests on Broadwell hardware. The BUG_ON() is consistent for an individual build, but not consistent for all builds. It has also been a sitting timebomb since SMAP support was introduced. Use native_save_fl() instead, which will obtain an accurate view of the AC flag. Signed-off-by: Andrew Cooper Reviewed-by: David Vrabel Tested-by: Rusty Russell Cc: Rusty Russell Cc: Konrad Rzeszutek Wilk Cc: Boris Ostrovsky Cc: Cc: Xen-devel CC: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1433323874-6927-1-git-send-email-andrew.cooper3@citrix.com Signed-off-by: Thomas Gleixner Signed-off-by: Sasha Levin --- arch/x86/kernel/cpu/common.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 88635b301694..69608a4f554b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -291,10 +291,9 @@ __setup("nosmap", setup_disable_smap); static __always_inline void setup_smap(struct cpuinfo_x86 *c) { - unsigned long eflags; + unsigned long eflags = native_save_fl(); /* This should have been cleared long ago */ - raw_local_save_flags(eflags); BUG_ON(eflags & X86_EFLAGS_AC); if (cpu_has(c, X86_FEATURE_SMAP)) { -- cgit v1.2.3 From c003797af3bc3b267a262b4d28ca06efc80390d4 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Fri, 11 Dec 2015 14:38:06 +0200 Subject: xhci: fix usb2 resume timing and races. [ Upstream commit f69115fdbc1ac0718e7d19ad3caa3da2ecfe1c96 ] According to USB 2 specs ports need to signal resume for at least 20ms, in practice even longer, before moving to U0 state. Both host and devices can initiate resume. On device initiated resume, a port status interrupt with the port in resume state in issued. The interrupt handler tags a resume_done[port] timestamp with current time + USB_RESUME_TIMEOUT, and kick roothub timer. Root hub timer requests for port status, finds the port in resume state, checks if resume_done[port] timestamp passed, and set port to U0 state. On host initiated resume, current code sets the port to resume state, sleep 20ms, and finally sets the port to U0 state. This should also be changed to work in a similar way as the device initiated resume, with timestamp tagging, but that is not yet tested and will be a separate fix later. There are a few issues with this approach 1. A host initiated resume will also generate a resume event. The event handler will find the port in resume state, believe it's a device initiated resume, and act accordingly. 2. A port status request might cut the resume signalling short if a get_port_status request is handled during the host resume signalling. The port will be found in resume state. The timestamp is not set leading to time_after_eq(jiffies, timestamp) returning true, as timestamp = 0. get_port_status will proceed with moving the port to U0. 3. If an error, or anything else happens to the port during device initiated resume signalling it will leave all the device resume parameters hanging uncleared, preventing further suspend, returning -EBUSY, and cause the pm thread to busyloop trying to enter suspend. Fix this by using the existing resuming_ports bitfield to indicate that resume signalling timing is taken care of. Check if the resume_done[port] is set before using it for timestamp comparison, and also clear out any resume signalling related variables if port is not in U0 or Resume state This issue was discovered when a PM thread busylooped, trying to runtime suspend the xhci USB 2 roothub on a Dell XPS Cc: stable Reported-by: Daniel J Blueman Tested-by: Daniel J Blueman Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/host/xhci-hub.c | 45 +++++++++++++++++++++++++++++++++++++++----- drivers/usb/host/xhci-ring.c | 3 ++- 2 files changed, 42 insertions(+), 6 deletions(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index dfe835d21b15..0fa8fd9adeba 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -619,8 +619,30 @@ static u32 xhci_get_port_status(struct usb_hcd *hcd, if ((raw_port_status & PORT_RESET) || !(raw_port_status & PORT_PE)) return 0xffffffff; - if (time_after_eq(jiffies, - bus_state->resume_done[wIndex])) { + /* did port event handler already start resume timing? */ + if (!bus_state->resume_done[wIndex]) { + /* If not, maybe we are in a host initated resume? */ + if (test_bit(wIndex, &bus_state->resuming_ports)) { + /* Host initated resume doesn't time the resume + * signalling using resume_done[]. + * It manually sets RESUME state, sleeps 20ms + * and sets U0 state. This should probably be + * changed, but not right now. + */ + } else { + /* port resume was discovered now and here, + * start resume timing + */ + unsigned long timeout = jiffies + + msecs_to_jiffies(USB_RESUME_TIMEOUT); + + set_bit(wIndex, &bus_state->resuming_ports); + bus_state->resume_done[wIndex] = timeout; + mod_timer(&hcd->rh_timer, timeout); + } + /* Has resume been signalled for USB_RESUME_TIME yet? */ + } else if (time_after_eq(jiffies, + bus_state->resume_done[wIndex])) { int time_left; xhci_dbg(xhci, "Resume USB2 port %d\n", @@ -661,13 +683,24 @@ static u32 xhci_get_port_status(struct usb_hcd *hcd, } else { /* * The resume has been signaling for less than - * 20ms. Report the port status as SUSPEND, - * let the usbcore check port status again - * and clear resume signaling later. + * USB_RESUME_TIME. Report the port status as SUSPEND, + * let the usbcore check port status again and clear + * resume signaling later. */ status |= USB_PORT_STAT_SUSPEND; } } + /* + * Clear stale usb2 resume signalling variables in case port changed + * state during resume signalling. For example on error + */ + if ((bus_state->resume_done[wIndex] || + test_bit(wIndex, &bus_state->resuming_ports)) && + (raw_port_status & PORT_PLS_MASK) != XDEV_U3 && + (raw_port_status & PORT_PLS_MASK) != XDEV_RESUME) { + bus_state->resume_done[wIndex] = 0; + clear_bit(wIndex, &bus_state->resuming_ports); + } if ((raw_port_status & PORT_PLS_MASK) == XDEV_U0 && (raw_port_status & PORT_POWER) && (bus_state->suspended_ports & (1 << wIndex))) { @@ -998,6 +1031,7 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, if ((temp & PORT_PE) == 0) goto error; + set_bit(wIndex, &bus_state->resuming_ports); xhci_set_link_state(xhci, port_array, wIndex, XDEV_RESUME); spin_unlock_irqrestore(&xhci->lock, flags); @@ -1005,6 +1039,7 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, spin_lock_irqsave(&xhci->lock, flags); xhci_set_link_state(xhci, port_array, wIndex, XDEV_U0); + clear_bit(wIndex, &bus_state->resuming_ports); } bus_state->port_c_suspend |= 1 << wIndex; diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index c15dbffe660b..cd88034c7a83 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -1595,7 +1595,8 @@ static void handle_port_status(struct xhci_hcd *xhci, */ bogus_port_status = true; goto cleanup; - } else { + } else if (!test_bit(faked_port_index, + &bus_state->resuming_ports)) { xhci_dbg(xhci, "resume HS port %d\n", port_id); bus_state->resume_done[faked_port_index] = jiffies + msecs_to_jiffies(USB_RESUME_TIMEOUT); -- cgit v1.2.3 From a6846cfd266b48af1ee7c3c19d5cb60477ca4469 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Sat, 22 Oct 2016 16:45:02 -0400 Subject: Linux 3.18.44 Signed-off-by: Sasha Levin --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 28912c341bd8..05b64e6d1456 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 3 PATCHLEVEL = 18 -SUBLEVEL = 43 +SUBLEVEL = 44 EXTRAVERSION = NAME = Diseased Newt -- cgit v1.2.3