From b48c29b1ddffa7f743a9da9404553deaf5882e14 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 30 Nov 2018 14:10:13 -0800 Subject: mm/huge_memory: rename freeze_page() to unmap_page() commit 906f9cdfc2a0800f13683f9e4ebdfd08c12ee81b upstream. The term "freeze" is used in several ways in the kernel, and in mm it has the particular meaning of forcing page refcount temporarily to 0. freeze_page() is just too confusing a name for a function that unmaps a page: rename it unmap_page(), and rename unfreeze_page() remap_page(). Went to change the mention of freeze_page() added later in mm/rmap.c, but found it to be incorrect: ordinary page reclaim reaches there too; but the substance of the comment still seems correct, so edit it down. Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261514080.2275@eggly.anvils Fixes: e9b61f19858a5 ("thp: reintroduce split_huge_page()") Signed-off-by: Hugh Dickins Acked-by: Kirill A. Shutemov Cc: Jerome Glisse Cc: Konstantin Khlebnikov Cc: Matthew Wilcox Cc: [4.8+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/huge_memory.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 9f7bba700e4e..583ad61cc2f1 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1839,7 +1839,7 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma, } } -static void freeze_page(struct page *page) +static void unmap_page(struct page *page) { enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS | TTU_RMAP_LOCKED; @@ -1862,7 +1862,7 @@ static void freeze_page(struct page *page) VM_BUG_ON_PAGE(ret, page + i - 1); } -static void unfreeze_page(struct page *page) +static void remap_page(struct page *page) { int i; @@ -1971,7 +1971,7 @@ static void __split_huge_page(struct page *page, struct list_head *list, spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags); - unfreeze_page(head); + remap_page(head); for (i = 0; i < HPAGE_PMD_NR; i++) { struct page *subpage = head + i; @@ -2138,7 +2138,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) } /* - * Racy check if we can split the page, before freeze_page() will + * Racy check if we can split the page, before unmap_page() will * split PMDs */ if (total_mapcount(head) != page_count(head) - extra_pins - 1) { @@ -2147,7 +2147,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) } mlocked = PageMlocked(page); - freeze_page(head); + unmap_page(head); VM_BUG_ON_PAGE(compound_mapcount(head), head); /* Make sure the page is not on per-CPU pagevec as it takes pin */ @@ -2199,7 +2199,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) fail: if (mapping) spin_unlock(&mapping->tree_lock); spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags); - unfreeze_page(head); + remap_page(head); ret = -EBUSY; } -- cgit v1.2.3 From fb732e62bf37d34160ff69d46e2ab89e2e98a70a Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Thu, 5 Apr 2018 16:23:28 -0700 Subject: mm/huge_memory.c: reorder operations in __split_huge_page_tail() commit 605ca5ede7643a01f4c4a15913f9714ac297f8a6 upstream. THP split makes non-atomic change of tail page flags. This is almost ok because tail pages are locked and isolated but this breaks recent changes in page locking: non-atomic operation could clear bit PG_waiters. As a result concurrent sequence get_page_unless_zero() -> lock_page() might block forever. Especially if this page was truncated later. Fix is trivial: clone flags before unfreezing page reference counter. This race exists since commit 62906027091f ("mm: add PageWaiters indicating tasks are waiting for a page bit") while unsave unfreeze itself was added in commit 8df651c7059e ("thp: cleanup split_huge_page()"). clear_compound_head() also must be called before unfreezing page reference because after successful get_page_unless_zero() might follow put_page() which needs correct compound_head(). And replace page_ref_inc()/page_ref_add() with page_ref_unfreeze() which is made especially for that and has semantic of smp_store_release(). Link: http://lkml.kernel.org/r/151844393341.210639.13162088407980624477.stgit@buzz Signed-off-by: Konstantin Khlebnikov Acked-by: Kirill A. Shutemov Cc: Michal Hocko Cc: Nicholas Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/huge_memory.c | 36 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 583ad61cc2f1..c14aec110e90 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1876,26 +1876,13 @@ static void __split_huge_page_tail(struct page *head, int tail, struct page *page_tail = head + tail; VM_BUG_ON_PAGE(atomic_read(&page_tail->_mapcount) != -1, page_tail); - VM_BUG_ON_PAGE(page_ref_count(page_tail) != 0, page_tail); /* - * tail_page->_refcount is zero and not changing from under us. But - * get_page_unless_zero() may be running from under us on the - * tail_page. If we used atomic_set() below instead of atomic_inc() or - * atomic_add(), we would then run atomic_set() concurrently with - * get_page_unless_zero(), and atomic_set() is implemented in C not - * using locked ops. spin_unlock on x86 sometime uses locked ops - * because of PPro errata 66, 92, so unless somebody can guarantee - * atomic_set() here would be safe on all archs (and not only on x86), - * it's safer to use atomic_inc()/atomic_add(). + * Clone page flags before unfreezing refcount. + * + * After successful get_page_unless_zero() might follow flags change, + * for exmaple lock_page() which set PG_waiters. */ - if (PageAnon(head)) { - page_ref_inc(page_tail); - } else { - /* Additional pin to radix tree */ - page_ref_add(page_tail, 2); - } - page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP; page_tail->flags |= (head->flags & ((1L << PG_referenced) | @@ -1907,14 +1894,21 @@ static void __split_huge_page_tail(struct page *head, int tail, (1L << PG_unevictable) | (1L << PG_dirty))); - /* - * After clearing PageTail the gup refcount can be released. - * Page flags also must be visible before we make the page non-compound. - */ + /* Page flags must be visible before we make the page non-compound. */ smp_wmb(); + /* + * Clear PageTail before unfreezing page refcount. + * + * After successful get_page_unless_zero() might follow put_page() + * which needs correct compound_head(). + */ clear_compound_head(page_tail); + /* Finally unfreeze refcount. Additional reference from page cache. */ + page_ref_unfreeze(page_tail, 1 + (!PageAnon(head) || + PageSwapCache(head))); + if (page_is_young(head)) set_page_young(page_tail); if (page_is_idle(head)) -- cgit v1.2.3 From ffdad597ccfc6ccb8b6f1b6c2c77c3e79e90fb36 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 30 Nov 2018 14:10:16 -0800 Subject: mm/huge_memory: splitting set mapping+index before unfreeze commit 173d9d9fd3ddae84c110fea8aedf1f26af6be9ec upstream. Huge tmpfs stress testing has occasionally hit shmem_undo_range()'s VM_BUG_ON_PAGE(page_to_pgoff(page) != index, page). Move the setting of mapping and index up before the page_ref_unfreeze() in __split_huge_page_tail() to fix this: so that a page cache lookup cannot get a reference while the tail's mapping and index are unstable. In fact, might as well move them up before the smp_wmb(): I don't see an actual need for that, but if I'm missing something, this way round is safer than the other, and no less efficient. You might argue that VM_BUG_ON_PAGE(page_to_pgoff(page) != index, page) is misplaced, and should be left until after the trylock_page(); but left as is has not crashed since, and gives more stringent assurance. Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261516380.2275@eggly.anvils Fixes: e9b61f19858a5 ("thp: reintroduce split_huge_page()") Requires: 605ca5ede764 ("mm/huge_memory.c: reorder operations in __split_huge_page_tail()") Signed-off-by: Hugh Dickins Acked-by: Kirill A. Shutemov Cc: Konstantin Khlebnikov Cc: Jerome Glisse Cc: Matthew Wilcox Cc: [4.8+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/huge_memory.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index c14aec110e90..5beb62fa3d30 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1894,6 +1894,12 @@ static void __split_huge_page_tail(struct page *head, int tail, (1L << PG_unevictable) | (1L << PG_dirty))); + /* ->mapping in first tail page is compound_mapcount */ + VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING, + page_tail); + page_tail->mapping = head->mapping; + page_tail->index = head->index + tail; + /* Page flags must be visible before we make the page non-compound. */ smp_wmb(); @@ -1914,12 +1920,6 @@ static void __split_huge_page_tail(struct page *head, int tail, if (page_is_idle(head)) set_page_idle(page_tail); - /* ->mapping in first tail page is compound_mapcount */ - VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING, - page_tail); - page_tail->mapping = head->mapping; - - page_tail->index = head->index + tail; page_cpupid_xchg_last(page_tail, page_cpupid_last(head)); lru_add_page_tail(head, page_tail, lruvec, list); } -- cgit v1.2.3 From b59b24fed59d322718a3f0fcaacf492335e9c9ca Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 30 Nov 2018 14:10:21 -0800 Subject: mm/huge_memory: fix lockdep complaint on 32-bit i_size_read() commit 006d3ff27e884f80bd7d306b041afc415f63598f upstream. Huge tmpfs testing, on 32-bit kernel with lockdep enabled, showed that __split_huge_page() was using i_size_read() while holding the irq-safe lru_lock and page tree lock, but the 32-bit i_size_read() uses an irq-unsafe seqlock which should not be nested inside them. Instead, read the i_size earlier in split_huge_page_to_list(), and pass the end offset down to __split_huge_page(): all while holding head page lock, which is enough to prevent truncation of that extent before the page tree lock has been taken. Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261520070.2275@eggly.anvils Fixes: baa355fd33142 ("thp: file pages support for split_huge_page()") Signed-off-by: Hugh Dickins Acked-by: Kirill A. Shutemov Cc: Jerome Glisse Cc: Konstantin Khlebnikov Cc: Matthew Wilcox Cc: [4.8+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/huge_memory.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 5beb62fa3d30..7ea8da990b9d 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1925,12 +1925,11 @@ static void __split_huge_page_tail(struct page *head, int tail, } static void __split_huge_page(struct page *page, struct list_head *list, - unsigned long flags) + pgoff_t end, unsigned long flags) { struct page *head = compound_head(page); struct zone *zone = page_zone(head); struct lruvec *lruvec; - pgoff_t end = -1; int i; lruvec = mem_cgroup_page_lruvec(head, zone->zone_pgdat); @@ -1938,9 +1937,6 @@ static void __split_huge_page(struct page *page, struct list_head *list, /* complete memcg works before add pages to LRU */ mem_cgroup_split_huge_fixup(head); - if (!PageAnon(page)) - end = DIV_ROUND_UP(i_size_read(head->mapping->host), PAGE_SIZE); - for (i = HPAGE_PMD_NR - 1; i >= 1; i--) { __split_huge_page_tail(head, i, lruvec, list); /* Some pages can be beyond i_size: drop them from page cache */ @@ -2093,6 +2089,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) int count, mapcount, extra_pins, ret; bool mlocked; unsigned long flags; + pgoff_t end; VM_BUG_ON_PAGE(is_huge_zero_page(page), page); VM_BUG_ON_PAGE(!PageLocked(page), page); @@ -2114,6 +2111,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) goto out; } extra_pins = 0; + end = -1; mapping = NULL; anon_vma_lock_write(anon_vma); } else { @@ -2129,6 +2127,15 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) extra_pins = HPAGE_PMD_NR; anon_vma = NULL; i_mmap_lock_read(mapping); + + /* + *__split_huge_page() may need to trim off pages beyond EOF: + * but on 32-bit, i_size_read() takes an irq-unsafe seqlock, + * which cannot be nested inside the page tree lock. So note + * end now: i_size itself may be changed at any moment, but + * head page lock is good enough to serialize the trimming. + */ + end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE); } /* @@ -2178,7 +2185,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) if (mapping) __dec_node_page_state(page, NR_SHMEM_THPS); spin_unlock(&pgdata->split_queue_lock); - __split_huge_page(page, list, flags); + __split_huge_page(page, list, end, flags); ret = 0; } else { if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) { -- cgit v1.2.3 From 10e458e6eb59e533e499269258b6069392bec627 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 30 Nov 2018 14:10:25 -0800 Subject: mm/khugepaged: collapse_shmem() stop if punched or truncated commit 701270fa193aadf00bdcf607738f64997275d4c7 upstream. Huge tmpfs testing showed that although collapse_shmem() recognizes a concurrently truncated or hole-punched page correctly, its handling of holes was liable to refill an emptied extent. Add check to stop that. Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261522040.2275@eggly.anvils Fixes: f3f0e1d2150b2 ("khugepaged: add support of collapse for tmpfs/shmem pages") Signed-off-by: Hugh Dickins Reviewed-by: Matthew Wilcox Cc: Kirill A. Shutemov Cc: Jerome Glisse Cc: Konstantin Khlebnikov Cc: [4.8+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/khugepaged.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 1df37ee996d5..62de24194f24 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1351,6 +1351,16 @@ static void collapse_shmem(struct mm_struct *mm, radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) { int n = min(iter.index, end) - index; + /* + * Stop if extent has been hole-punched, and is now completely + * empty (the more obvious i_size_read() check would take an + * irq-unsafe seqlock on 32-bit). + */ + if (n >= HPAGE_PMD_NR) { + result = SCAN_TRUNCATED; + goto tree_locked; + } + /* * Handle holes in the radix tree: charge it from shmem and * insert relevant subpage of new_page into the radix-tree. @@ -1462,6 +1472,11 @@ out_unlock: if (result == SCAN_SUCCEED && index < end) { int n = end - index; + /* Stop if extent has been truncated, and is now empty */ + if (n >= HPAGE_PMD_NR) { + result = SCAN_TRUNCATED; + goto tree_locked; + } if (!shmem_charge(mapping->host, n)) { result = SCAN_FAIL; goto tree_locked; -- cgit v1.2.3 From cae7ed256d772766dd2fe3ad9a08ec77f24b9503 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 6 Sep 2017 16:22:56 -0700 Subject: shmem: shmem_charge: verify max_block is not exceeded before inode update commit b1cc94ab2f2ba31fcb2c59df0b9cf03f6d720553 upstream. Patch series "userfaultfd: enable zeropage support for shmem". These patches enable support for UFFDIO_ZEROPAGE for shared memory. The first two patches are not strictly related to userfaultfd, they are just minor refactoring to reduce amount of code duplication. This patch (of 7): Currently we update inode and shmem_inode_info before verifying that used_blocks will not exceed max_blocks. In case it will, we undo the update. Let's switch the order and move the verification of the blocks count before the inode and shmem_inode_info update. Link: http://lkml.kernel.org/r/1497939652-16528-2-git-send-email-rppt@linux.vnet.ibm.com Signed-off-by: Mike Rapoport Cc: Andrea Arcangeli Cc: Hugh Dickins Cc: "Kirill A. Shutemov" Cc: Hillf Danton Cc: Pavel Emelyanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/shmem.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 358a92be43eb..b26f11221ea8 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -254,6 +254,14 @@ bool shmem_charge(struct inode *inode, long pages) if (shmem_acct_block(info->flags, pages)) return false; + + if (sbinfo->max_blocks) { + if (percpu_counter_compare(&sbinfo->used_blocks, + sbinfo->max_blocks - pages) > 0) + goto unacct; + percpu_counter_add(&sbinfo->used_blocks, pages); + } + spin_lock_irqsave(&info->lock, flags); info->alloced += pages; inode->i_blocks += pages * BLOCKS_PER_PAGE; @@ -261,20 +269,11 @@ bool shmem_charge(struct inode *inode, long pages) spin_unlock_irqrestore(&info->lock, flags); inode->i_mapping->nrpages += pages; - if (!sbinfo->max_blocks) - return true; - if (percpu_counter_compare(&sbinfo->used_blocks, - sbinfo->max_blocks - pages) > 0) { - inode->i_mapping->nrpages -= pages; - spin_lock_irqsave(&info->lock, flags); - info->alloced -= pages; - shmem_recalc_inode(inode); - spin_unlock_irqrestore(&info->lock, flags); - shmem_unacct_blocks(info->flags, pages); - return false; - } - percpu_counter_add(&sbinfo->used_blocks, pages); return true; + +unacct: + shmem_unacct_blocks(info->flags, pages); + return false; } void shmem_uncharge(struct inode *inode, long pages) -- cgit v1.2.3 From 9815b0fcec677a484514b28124d31df64c71a901 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 6 Sep 2017 16:22:59 -0700 Subject: shmem: introduce shmem_inode_acct_block commit 0f0796945614b7523987f7eea32407421af4b1ee upstream. The shmem_acct_block and the update of used_blocks are following one another in all the places they are used. Combine these two into a helper function. Link: http://lkml.kernel.org/r/1497939652-16528-3-git-send-email-rppt@linux.vnet.ibm.com Signed-off-by: Mike Rapoport Cc: "Kirill A. Shutemov" Cc: Andrea Arcangeli Cc: Hillf Danton Cc: Hugh Dickins Cc: Pavel Emelyanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/shmem.c | 82 ++++++++++++++++++++++++++++++++------------------------------ 1 file changed, 42 insertions(+), 40 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index b26f11221ea8..e30ffaa065a4 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -181,6 +181,38 @@ static inline void shmem_unacct_blocks(unsigned long flags, long pages) vm_unacct_memory(pages * VM_ACCT(PAGE_SIZE)); } +static inline bool shmem_inode_acct_block(struct inode *inode, long pages) +{ + struct shmem_inode_info *info = SHMEM_I(inode); + struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); + + if (shmem_acct_block(info->flags, pages)) + return false; + + if (sbinfo->max_blocks) { + if (percpu_counter_compare(&sbinfo->used_blocks, + sbinfo->max_blocks - pages) > 0) + goto unacct; + percpu_counter_add(&sbinfo->used_blocks, pages); + } + + return true; + +unacct: + shmem_unacct_blocks(info->flags, pages); + return false; +} + +static inline void shmem_inode_unacct_blocks(struct inode *inode, long pages) +{ + struct shmem_inode_info *info = SHMEM_I(inode); + struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); + + if (sbinfo->max_blocks) + percpu_counter_sub(&sbinfo->used_blocks, pages); + shmem_unacct_blocks(info->flags, pages); +} + static const struct super_operations shmem_ops; static const struct address_space_operations shmem_aops; static const struct file_operations shmem_file_operations; @@ -237,31 +269,20 @@ static void shmem_recalc_inode(struct inode *inode) freed = info->alloced - info->swapped - inode->i_mapping->nrpages; if (freed > 0) { - struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); - if (sbinfo->max_blocks) - percpu_counter_add(&sbinfo->used_blocks, -freed); info->alloced -= freed; inode->i_blocks -= freed * BLOCKS_PER_PAGE; - shmem_unacct_blocks(info->flags, freed); + shmem_inode_unacct_blocks(inode, freed); } } bool shmem_charge(struct inode *inode, long pages) { struct shmem_inode_info *info = SHMEM_I(inode); - struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); unsigned long flags; - if (shmem_acct_block(info->flags, pages)) + if (!shmem_inode_acct_block(inode, pages)) return false; - if (sbinfo->max_blocks) { - if (percpu_counter_compare(&sbinfo->used_blocks, - sbinfo->max_blocks - pages) > 0) - goto unacct; - percpu_counter_add(&sbinfo->used_blocks, pages); - } - spin_lock_irqsave(&info->lock, flags); info->alloced += pages; inode->i_blocks += pages * BLOCKS_PER_PAGE; @@ -270,16 +291,11 @@ bool shmem_charge(struct inode *inode, long pages) inode->i_mapping->nrpages += pages; return true; - -unacct: - shmem_unacct_blocks(info->flags, pages); - return false; } void shmem_uncharge(struct inode *inode, long pages) { struct shmem_inode_info *info = SHMEM_I(inode); - struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); unsigned long flags; spin_lock_irqsave(&info->lock, flags); @@ -288,9 +304,7 @@ void shmem_uncharge(struct inode *inode, long pages) shmem_recalc_inode(inode); spin_unlock_irqrestore(&info->lock, flags); - if (sbinfo->max_blocks) - percpu_counter_sub(&sbinfo->used_blocks, pages); - shmem_unacct_blocks(info->flags, pages); + shmem_inode_unacct_blocks(inode, pages); } /* @@ -1423,9 +1437,10 @@ static struct page *shmem_alloc_page(gfp_t gfp, } static struct page *shmem_alloc_and_acct_page(gfp_t gfp, - struct shmem_inode_info *info, struct shmem_sb_info *sbinfo, + struct inode *inode, pgoff_t index, bool huge) { + struct shmem_inode_info *info = SHMEM_I(inode); struct page *page; int nr; int err = -ENOSPC; @@ -1434,14 +1449,8 @@ static struct page *shmem_alloc_and_acct_page(gfp_t gfp, huge = false; nr = huge ? HPAGE_PMD_NR : 1; - if (shmem_acct_block(info->flags, nr)) + if (!shmem_inode_acct_block(inode, nr)) goto failed; - if (sbinfo->max_blocks) { - if (percpu_counter_compare(&sbinfo->used_blocks, - sbinfo->max_blocks - nr) > 0) - goto unacct; - percpu_counter_add(&sbinfo->used_blocks, nr); - } if (huge) page = shmem_alloc_hugepage(gfp, info, index); @@ -1454,10 +1463,7 @@ static struct page *shmem_alloc_and_acct_page(gfp_t gfp, } err = -ENOMEM; - if (sbinfo->max_blocks) - percpu_counter_add(&sbinfo->used_blocks, -nr); -unacct: - shmem_unacct_blocks(info->flags, nr); + shmem_inode_unacct_blocks(inode, nr); failed: return ERR_PTR(err); } @@ -1717,10 +1723,9 @@ repeat: } alloc_huge: - page = shmem_alloc_and_acct_page(gfp, info, sbinfo, - index, true); + page = shmem_alloc_and_acct_page(gfp, inode, index, true); if (IS_ERR(page)) { -alloc_nohuge: page = shmem_alloc_and_acct_page(gfp, info, sbinfo, +alloc_nohuge: page = shmem_alloc_and_acct_page(gfp, inode, index, false); } if (IS_ERR(page)) { @@ -1842,10 +1847,7 @@ clear: * Error recovery. */ unacct: - if (sbinfo->max_blocks) - percpu_counter_sub(&sbinfo->used_blocks, - 1 << compound_order(page)); - shmem_unacct_blocks(info->flags, 1 << compound_order(page)); + shmem_inode_unacct_blocks(inode, 1 << compound_order(page)); if (PageTransHuge(page)) { unlock_page(page); -- cgit v1.2.3 From 0dba3e54920bc876aaf88060358d96093b5e7c83 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 30 Nov 2018 14:10:29 -0800 Subject: mm/khugepaged: fix crashes due to misaccounted holes commit aaa52e340073b7f4593b3c4ddafcafa70cf838b5 upstream. Huge tmpfs testing on a shortish file mapped into a pmd-rounded extent hit shmem_evict_inode()'s WARN_ON(inode->i_blocks) followed by clear_inode()'s BUG_ON(inode->i_data.nrpages) when the file was later closed and unlinked. khugepaged's collapse_shmem() was forgetting to update mapping->nrpages on the rollback path, after it had added but then needs to undo some holes. There is indeed an irritating asymmetry between shmem_charge(), whose callers want it to increment nrpages after successfully accounting blocks, and shmem_uncharge(), when __delete_from_page_cache() already decremented nrpages itself: oh well, just add a comment on that to them both. And shmem_recalc_inode() is supposed to be called when the accounting is expected to be in balance (so it can deduce from imbalance that reclaim discarded some pages): so change shmem_charge() to update nrpages earlier (though it's rare for the difference to matter at all). Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261523450.2275@eggly.anvils Fixes: 800d8c63b2e98 ("shmem: add huge pages support") Fixes: f3f0e1d2150b2 ("khugepaged: add support of collapse for tmpfs/shmem pages") Signed-off-by: Hugh Dickins Acked-by: Kirill A. Shutemov Cc: Jerome Glisse Cc: Konstantin Khlebnikov Cc: Matthew Wilcox Cc: [4.8+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/khugepaged.c | 4 +++- mm/shmem.c | 6 +++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 62de24194f24..3f7bfd98b0e6 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1538,8 +1538,10 @@ tree_unlocked: *hpage = NULL; } else { /* Something went wrong: rollback changes to the radix-tree */ - shmem_uncharge(mapping->host, nr_none); spin_lock_irq(&mapping->tree_lock); + mapping->nrpages -= nr_none; + shmem_uncharge(mapping->host, nr_none); + radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) { if (iter.index >= end) diff --git a/mm/shmem.c b/mm/shmem.c index e30ffaa065a4..54911bbc74d6 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -283,12 +283,14 @@ bool shmem_charge(struct inode *inode, long pages) if (!shmem_inode_acct_block(inode, pages)) return false; + /* nrpages adjustment first, then shmem_recalc_inode() when balanced */ + inode->i_mapping->nrpages += pages; + spin_lock_irqsave(&info->lock, flags); info->alloced += pages; inode->i_blocks += pages * BLOCKS_PER_PAGE; shmem_recalc_inode(inode); spin_unlock_irqrestore(&info->lock, flags); - inode->i_mapping->nrpages += pages; return true; } @@ -298,6 +300,8 @@ void shmem_uncharge(struct inode *inode, long pages) struct shmem_inode_info *info = SHMEM_I(inode); unsigned long flags; + /* nrpages adjustment done by __delete_from_page_cache() or caller */ + spin_lock_irqsave(&info->lock, flags); info->alloced -= pages; inode->i_blocks -= pages * BLOCKS_PER_PAGE; -- cgit v1.2.3 From 5c0ecc2ba54201881c54a51ead083bba85176a76 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 30 Nov 2018 14:10:35 -0800 Subject: mm/khugepaged: collapse_shmem() remember to clear holes commit 2af8ff291848cc4b1cce24b6c943394eb2c761e8 upstream. Huge tmpfs testing reminds us that there is no __GFP_ZERO in the gfp flags khugepaged uses to allocate a huge page - in all common cases it would just be a waste of effort - so collapse_shmem() must remember to clear out any holes that it instantiates. The obvious place to do so, where they are put into the page cache tree, is not a good choice: because interrupts are disabled there. Leave it until further down, once success is assured, where the other pages are copied (before setting PageUptodate). Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261525080.2275@eggly.anvils Fixes: f3f0e1d2150b2 ("khugepaged: add support of collapse for tmpfs/shmem pages") Signed-off-by: Hugh Dickins Acked-by: Kirill A. Shutemov Cc: Jerome Glisse Cc: Konstantin Khlebnikov Cc: Matthew Wilcox Cc: [4.8+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/khugepaged.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 3f7bfd98b0e6..2d3ce49f6b45 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1501,7 +1501,12 @@ tree_unlocked: * Replacing old pages with new one has succeed, now we need to * copy the content and free old pages. */ + index = start; list_for_each_entry_safe(page, tmp, &pagelist, lru) { + while (index < page->index) { + clear_highpage(new_page + (index % HPAGE_PMD_NR)); + index++; + } copy_highpage(new_page + (page->index % HPAGE_PMD_NR), page); list_del(&page->lru); @@ -1511,6 +1516,11 @@ tree_unlocked: ClearPageActive(page); ClearPageUnevictable(page); put_page(page); + index++; + } + while (index < end) { + clear_highpage(new_page + (index % HPAGE_PMD_NR)); + index++; } local_irq_save(flags); -- cgit v1.2.3 From c2ca73b7ab3d5e0edc69e9a0a9e464407cabaec1 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 30 Nov 2018 14:10:39 -0800 Subject: mm/khugepaged: minor reorderings in collapse_shmem() commit 042a30824871fa3149b0127009074b75cc25863c upstream. Several cleanups in collapse_shmem(): most of which probably do not really matter, beyond doing things in a more familiar and reassuring order. Simplify the failure gotos in the main loop, and on success update stats while interrupts still disabled from the last iteration. Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261526400.2275@eggly.anvils Fixes: f3f0e1d2150b2 ("khugepaged: add support of collapse for tmpfs/shmem pages") Signed-off-by: Hugh Dickins Acked-by: Kirill A. Shutemov Cc: Jerome Glisse Cc: Konstantin Khlebnikov Cc: Matthew Wilcox Cc: [4.8+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/khugepaged.c | 73 ++++++++++++++++++++++++--------------------------------- 1 file changed, 30 insertions(+), 43 deletions(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 2d3ce49f6b45..47b83030fc53 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1332,13 +1332,12 @@ static void collapse_shmem(struct mm_struct *mm, goto out; } + __SetPageLocked(new_page); + __SetPageSwapBacked(new_page); new_page->index = start; new_page->mapping = mapping; - __SetPageSwapBacked(new_page); - __SetPageLocked(new_page); BUG_ON(!page_ref_freeze(new_page, 1)); - /* * At this point the new_page is 'frozen' (page_count() is zero), locked * and not up-to-date. It's safe to insert it into radix tree, because @@ -1367,13 +1366,13 @@ static void collapse_shmem(struct mm_struct *mm, */ if (n && !shmem_charge(mapping->host, n)) { result = SCAN_FAIL; - break; + goto tree_locked; } - nr_none += n; for (; index < min(iter.index, end); index++) { radix_tree_insert(&mapping->page_tree, index, new_page + (index % HPAGE_PMD_NR)); } + nr_none += n; /* We are done. */ if (index >= end) @@ -1389,12 +1388,12 @@ static void collapse_shmem(struct mm_struct *mm, result = SCAN_FAIL; goto tree_unlocked; } - spin_lock_irq(&mapping->tree_lock); } else if (trylock_page(page)) { get_page(page); + spin_unlock_irq(&mapping->tree_lock); } else { result = SCAN_PAGE_LOCK; - break; + goto tree_locked; } /* @@ -1409,11 +1408,10 @@ static void collapse_shmem(struct mm_struct *mm, result = SCAN_TRUNCATED; goto out_unlock; } - spin_unlock_irq(&mapping->tree_lock); if (isolate_lru_page(page)) { result = SCAN_DEL_PAGE_LRU; - goto out_isolate_failed; + goto out_unlock; } if (page_mapped(page)) @@ -1435,7 +1433,9 @@ static void collapse_shmem(struct mm_struct *mm, */ if (!page_ref_freeze(page, 3)) { result = SCAN_PAGE_COUNT; - goto out_lru; + spin_unlock_irq(&mapping->tree_lock); + putback_lru_page(page); + goto out_unlock; } /* @@ -1451,17 +1451,10 @@ static void collapse_shmem(struct mm_struct *mm, slot = radix_tree_iter_next(&iter); index++; continue; -out_lru: - spin_unlock_irq(&mapping->tree_lock); - putback_lru_page(page); -out_isolate_failed: - unlock_page(page); - put_page(page); - goto tree_unlocked; out_unlock: unlock_page(page); put_page(page); - break; + goto tree_unlocked; } /* @@ -1469,7 +1462,7 @@ out_unlock: * This code only triggers if there's nothing in radix tree * beyond 'end'. */ - if (result == SCAN_SUCCEED && index < end) { + if (index < end) { int n = end - index; /* Stop if extent has been truncated, and is now empty */ @@ -1481,7 +1474,6 @@ out_unlock: result = SCAN_FAIL; goto tree_locked; } - for (; index < end; index++) { radix_tree_insert(&mapping->page_tree, index, new_page + (index % HPAGE_PMD_NR)); @@ -1489,14 +1481,19 @@ out_unlock: nr_none += n; } + __inc_node_page_state(new_page, NR_SHMEM_THPS); + if (nr_none) { + struct zone *zone = page_zone(new_page); + + __mod_node_page_state(zone->zone_pgdat, NR_FILE_PAGES, nr_none); + __mod_node_page_state(zone->zone_pgdat, NR_SHMEM, nr_none); + } + tree_locked: spin_unlock_irq(&mapping->tree_lock); tree_unlocked: if (result == SCAN_SUCCEED) { - unsigned long flags; - struct zone *zone = page_zone(new_page); - /* * Replacing old pages with new one has succeed, now we need to * copy the content and free old pages. @@ -1510,11 +1507,11 @@ tree_unlocked: copy_highpage(new_page + (page->index % HPAGE_PMD_NR), page); list_del(&page->lru); - unlock_page(page); - page_ref_unfreeze(page, 1); page->mapping = NULL; + page_ref_unfreeze(page, 1); ClearPageActive(page); ClearPageUnevictable(page); + unlock_page(page); put_page(page); index++; } @@ -1523,28 +1520,17 @@ tree_unlocked: index++; } - local_irq_save(flags); - __inc_node_page_state(new_page, NR_SHMEM_THPS); - if (nr_none) { - __mod_node_page_state(zone->zone_pgdat, NR_FILE_PAGES, nr_none); - __mod_node_page_state(zone->zone_pgdat, NR_SHMEM, nr_none); - } - local_irq_restore(flags); - - /* - * Remove pte page tables, so we can re-faulti - * the page as huge. - */ - retract_page_tables(mapping, start); - /* Everything is ready, let's unfreeze the new_page */ - set_page_dirty(new_page); SetPageUptodate(new_page); page_ref_unfreeze(new_page, HPAGE_PMD_NR); + set_page_dirty(new_page); mem_cgroup_commit_charge(new_page, memcg, false, true); lru_cache_add_anon(new_page); - unlock_page(new_page); + /* + * Remove pte page tables, so we can re-fault the page as huge. + */ + retract_page_tables(mapping, start); *hpage = NULL; } else { /* Something went wrong: rollback changes to the radix-tree */ @@ -1576,8 +1562,8 @@ tree_unlocked: page_ref_unfreeze(page, 2); radix_tree_replace_slot(slot, page); spin_unlock_irq(&mapping->tree_lock); - putback_lru_page(page); unlock_page(page); + putback_lru_page(page); spin_lock_irq(&mapping->tree_lock); slot = radix_tree_iter_next(&iter); } @@ -1587,9 +1573,10 @@ tree_unlocked: /* Unfreeze new_page, caller would take care about freeing it */ page_ref_unfreeze(new_page, 1); mem_cgroup_cancel_charge(new_page, memcg, true); - unlock_page(new_page); new_page->mapping = NULL; } + + unlock_page(new_page); out: VM_BUG_ON(!list_empty(&pagelist)); /* TODO: tracepoints */ -- cgit v1.2.3 From 8dcbb5f21567c3a85a7e4ec781d0e49ea174397d Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 30 Nov 2018 14:10:43 -0800 Subject: mm/khugepaged: collapse_shmem() without freezing new_page commit 87c460a0bded56195b5eb497d44709777ef7b415 upstream. khugepaged's collapse_shmem() does almost all of its work, to assemble the huge new_page from 512 scattered old pages, with the new_page's refcount frozen to 0 (and refcounts of all old pages so far also frozen to 0). Including shmem_getpage() to read in any which were out on swap, memory reclaim if necessary to allocate their intermediate pages, and copying over all the data from old to new. Imagine the frozen refcount as a spinlock held, but without any lock debugging to highlight the abuse: it's not good, and under serious load heads into lockups - speculative getters of the page are not expecting to spin while khugepaged is rescheduled. One can get a little further under load by hacking around elsewhere; but fortunately, freezing the new_page turns out to have been entirely unnecessary, with no hacks needed elsewhere. The huge new_page lock is already held throughout, and guards all its subpages as they are brought one by one into the page cache tree; and anything reading the data in that page, without the lock, before it has been marked PageUptodate, would already be in the wrong. So simply eliminate the freezing of the new_page. Each of the old pages remains frozen with refcount 0 after it has been replaced by a new_page subpage in the page cache tree, until they are all unfrozen on success or failure: just as before. They could be unfrozen sooner, but cause no problem once no longer visible to find_get_entry(), filemap_map_pages() and other speculative lookups. Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261527570.2275@eggly.anvils Fixes: f3f0e1d2150b2 ("khugepaged: add support of collapse for tmpfs/shmem pages") Signed-off-by: Hugh Dickins Acked-by: Kirill A. Shutemov Cc: Jerome Glisse Cc: Konstantin Khlebnikov Cc: Matthew Wilcox Cc: [4.8+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/khugepaged.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 47b83030fc53..b87bd43993bd 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1286,7 +1286,7 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) * collapse_shmem - collapse small tmpfs/shmem pages into huge one. * * Basic scheme is simple, details are more complex: - * - allocate and freeze a new huge page; + * - allocate and lock a new huge page; * - scan over radix tree replacing old pages the new one * + swap in pages if necessary; * + fill in gaps; @@ -1294,11 +1294,11 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) * - if replacing succeed: * + copy data over; * + free old pages; - * + unfreeze huge page; + * + unlock huge page; * - if replacing failed; * + put all pages back and unfreeze them; * + restore gaps in the radix-tree; - * + free huge page; + * + unlock and free huge page; */ static void collapse_shmem(struct mm_struct *mm, struct address_space *mapping, pgoff_t start, @@ -1336,13 +1336,11 @@ static void collapse_shmem(struct mm_struct *mm, __SetPageSwapBacked(new_page); new_page->index = start; new_page->mapping = mapping; - BUG_ON(!page_ref_freeze(new_page, 1)); /* - * At this point the new_page is 'frozen' (page_count() is zero), locked - * and not up-to-date. It's safe to insert it into radix tree, because - * nobody would be able to map it or use it in other way until we - * unfreeze it. + * At this point the new_page is locked and not up-to-date. + * It's safe to insert it into the page cache, because nobody would + * be able to map it or use it in another way until we unlock it. */ index = start; @@ -1520,9 +1518,8 @@ tree_unlocked: index++; } - /* Everything is ready, let's unfreeze the new_page */ SetPageUptodate(new_page); - page_ref_unfreeze(new_page, HPAGE_PMD_NR); + page_ref_add(new_page, HPAGE_PMD_NR - 1); set_page_dirty(new_page); mem_cgroup_commit_charge(new_page, memcg, false, true); lru_cache_add_anon(new_page); @@ -1570,8 +1567,6 @@ tree_unlocked: VM_BUG_ON(nr_none); spin_unlock_irq(&mapping->tree_lock); - /* Unfreeze new_page, caller would take care about freeing it */ - page_ref_unfreeze(new_page, 1); mem_cgroup_cancel_charge(new_page, memcg, true); new_page->mapping = NULL; } -- cgit v1.2.3 From dc62803e271decb287929d44789fe9170eec5ba7 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 30 Nov 2018 14:10:47 -0800 Subject: mm/khugepaged: collapse_shmem() do not crash on Compound commit 06a5e1268a5fb9c2b346a3da6b97e85f2eba0f07 upstream. collapse_shmem()'s VM_BUG_ON_PAGE(PageTransCompound) was unsafe: before it holds page lock of the first page, racing truncation then extension might conceivably have inserted a hugepage there already. Fail with the SCAN_PAGE_COMPOUND result, instead of crashing (CONFIG_DEBUG_VM=y) or otherwise mishandling the unexpected hugepage - though later we might code up a more constructive way of handling it, with SCAN_SUCCESS. Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261529310.2275@eggly.anvils Fixes: f3f0e1d2150b2 ("khugepaged: add support of collapse for tmpfs/shmem pages") Signed-off-by: Hugh Dickins Cc: Kirill A. Shutemov Cc: Jerome Glisse Cc: Konstantin Khlebnikov Cc: Matthew Wilcox Cc: [4.8+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/khugepaged.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index b87bd43993bd..e0cfc3a54b6a 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1400,7 +1400,15 @@ static void collapse_shmem(struct mm_struct *mm, */ VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(!PageUptodate(page), page); - VM_BUG_ON_PAGE(PageTransCompound(page), page); + + /* + * If file was truncated then extended, or hole-punched, before + * we locked the first page, then a THP might be there already. + */ + if (PageTransCompound(page)) { + result = SCAN_PAGE_COMPOUND; + goto out_unlock; + } if (page_mapping(page) != mapping) { result = SCAN_TRUNCATED; -- cgit v1.2.3 From 6fc74d9f9b412b295f9ad01af7a7e62a662aa5bd Mon Sep 17 00:00:00 2001 From: Matthias Schwarzott Date: Mon, 30 Oct 2017 06:07:29 -0400 Subject: media: em28xx: Fix use-after-free when disconnecting [ Upstream commit 910b0797fa9e8af09c44a3fa36cb310ba7a7218d ] Fix bug by moving the i2c_unregister_device calls after deregistration of dvb frontend. The new style i2c drivers already destroys the frontend object at i2c_unregister_device time. When the dvb frontend is unregistered afterwards it leads to this oops: [ 6058.866459] BUG: unable to handle kernel NULL pointer dereference at 00000000000001f8 [ 6058.866578] IP: dvb_frontend_stop+0x30/0xd0 [dvb_core] [ 6058.866644] PGD 0 [ 6058.866646] P4D 0 [ 6058.866726] Oops: 0000 [#1] SMP [ 6058.866768] Modules linked in: rc_pinnacle_pctv_hd(O) em28xx_rc(O) si2157(O) si2168(O) em28xx_dvb(O) em28xx(O) si2165(O) a8293(O) tda10071(O) tea5767(O) tuner(O) cx23885(O) tda18271(O) videobuf2_dvb(O) videobuf2_dma_sg(O) m88ds3103(O) tveeprom(O) cx2341x(O) v4l2_common(O) dvb_core(O) rc_core(O) videobuf2_memops(O) videobuf2_v4l2(O) videobuf2_core(O) videodev(O) media(O) bluetooth ecdh_generic ums_realtek uas rtl8192cu rtl_usb rtl8192c_common rtlwifi usb_storage snd_hda_codec_realtek snd_hda_codec_hdmi snd_hda_codec_generic i2c_mux snd_hda_intel snd_hda_codec snd_hwdep x86_pkg_temp_thermal snd_hda_core kvm_intel kvm irqbypass [last unloaded: videobuf2_memops] [ 6058.867497] CPU: 2 PID: 7349 Comm: kworker/2:0 Tainted: G W O 4.13.9-gentoo #1 [ 6058.867595] Hardware name: MEDION E2050 2391/H81H3-EM2, BIOS H81EM2W08.308 08/25/2014 [ 6058.867692] Workqueue: usb_hub_wq hub_event [ 6058.867746] task: ffff88011a15e040 task.stack: ffffc90003074000 [ 6058.867825] RIP: 0010:dvb_frontend_stop+0x30/0xd0 [dvb_core] [ 6058.867896] RSP: 0018:ffffc90003077b58 EFLAGS: 00010293 [ 6058.867964] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 000000010040001f [ 6058.868056] RDX: ffff88011a15e040 RSI: ffffea000464e400 RDI: ffff88001cbe3028 [ 6058.868150] RBP: ffffc90003077b68 R08: ffff880119390380 R09: 000000010040001f [ 6058.868241] R10: ffffc90003077b18 R11: 000000000001e200 R12: ffff88001cbe3028 [ 6058.868330] R13: ffff88001cbe68d0 R14: ffff8800cf734000 R15: ffff8800cf734098 [ 6058.868419] FS: 0000000000000000(0000) GS:ffff88011fb00000(0000) knlGS:0000000000000000 [ 6058.868511] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 6058.868578] CR2: 00000000000001f8 CR3: 00000001113c5000 CR4: 00000000001406e0 [ 6058.868662] Call Trace: [ 6058.868705] dvb_unregister_frontend+0x2a/0x80 [dvb_core] [ 6058.868774] em28xx_dvb_fini+0x132/0x220 [em28xx_dvb] [ 6058.868840] em28xx_close_extension+0x34/0x90 [em28xx] [ 6058.868902] em28xx_usb_disconnect+0x4e/0x70 [em28xx] [ 6058.868968] usb_unbind_interface+0x6d/0x260 [ 6058.869025] device_release_driver_internal+0x150/0x210 [ 6058.869094] device_release_driver+0xd/0x10 [ 6058.869150] bus_remove_device+0xe4/0x160 [ 6058.869204] device_del+0x1ce/0x2f0 [ 6058.869253] usb_disable_device+0x99/0x270 [ 6058.869306] usb_disconnect+0x8d/0x260 [ 6058.869359] hub_event+0x93d/0x1520 [ 6058.869408] ? dequeue_task_fair+0xae5/0xd20 [ 6058.869467] process_one_work+0x1d9/0x3e0 [ 6058.869522] worker_thread+0x43/0x3e0 [ 6058.869576] kthread+0x104/0x140 [ 6058.869602] ? trace_event_raw_event_workqueue_work+0x80/0x80 [ 6058.869640] ? kthread_create_on_node+0x40/0x40 [ 6058.869673] ret_from_fork+0x22/0x30 [ 6058.869698] Code: 54 49 89 fc 53 48 8b 9f 18 03 00 00 0f 1f 44 00 00 41 83 bc 24 04 05 00 00 02 74 0c 41 c7 84 24 04 05 00 00 01 00 00 00 0f ae f0 <48> 8b bb f8 01 00 00 48 85 ff 74 5c e8 df 40 f0 e0 48 8b 93 f8 [ 6058.869850] RIP: dvb_frontend_stop+0x30/0xd0 [dvb_core] RSP: ffffc90003077b58 [ 6058.869894] CR2: 00000000000001f8 [ 6058.875880] ---[ end trace 717eecf7193b3fc6 ]--- Signed-off-by: Matthias Schwarzott Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/usb/em28xx/em28xx-dvb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/media/usb/em28xx/em28xx-dvb.c b/drivers/media/usb/em28xx/em28xx-dvb.c index 8cedef0daae4..b0aea48907b7 100644 --- a/drivers/media/usb/em28xx/em28xx-dvb.c +++ b/drivers/media/usb/em28xx/em28xx-dvb.c @@ -2016,6 +2016,8 @@ static int em28xx_dvb_fini(struct em28xx *dev) } } + em28xx_unregister_dvb(dvb); + /* remove I2C SEC */ client = dvb->i2c_client_sec; if (client) { @@ -2037,7 +2039,6 @@ static int em28xx_dvb_fini(struct em28xx *dev) i2c_unregister_device(client); } - em28xx_unregister_dvb(dvb); kfree(dvb); dev->dvb = NULL; kref_put(&dev->ref, em28xx_free_device); -- cgit v1.2.3 From ad0ee4f58533dec7c49269864055f30d4dd2bdd7 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Sun, 2 Dec 2018 10:03:24 -0500 Subject: Revert "wlcore: Add missing PM call for wlcore_cmd_wait_for_event_or_timeout()" This reverts commit afeeecc764436f31d4447575bb9007732333818c which was upstream commit 4ec7cece87b3ed21ffcd407c62fb2f151a366bc1. From Dietmar May's report on the stable mailing list (https://www.spinics.net/lists/stable/msg272201.html): > I've run into some problems which appear due to (a) recent patch(es) on > the wlcore wifi driver. > > 4.4.160 - commit 3fdd34643ffc378b5924941fad40352c04610294 > 4.9.131 - commit afeeecc764436f31d4447575bb9007732333818c > > Earlier versions (4.9.130 and 4.4.159 - tested back to 4.4.49) do not > exhibit this problem. It is still present in 4.9.141. > > master as of 4.20.0-rc4 does not exhibit this problem. > > Basically, during client association when in AP mode (running hostapd), > handshake may or may not complete following a noticeable delay. If > successful, then the driver fails consistently in warn_slowpath_null > during disassociation. If unsuccessful, the wifi client attempts multiple > times, sometimes failing repeatedly. I've had clients unable to connect > for 3-5 minutes during testing, with the syslog filled with dozens of > backtraces. syslog details are below. > > I'm working on an embedded device with a TI 3352 ARM processor and a > murata wl1271 module in sdio mode. We're running a fully patched ubuntu > 18.04 ARM build, with a kernel built from kernel.org's stable/linux repo . > Relevant parts of the kernel config are included below. > > The commit message states: > > > /I've only seen this few times with the runtime PM patches enabled so > > this one is probably not needed before that. This seems to work > > currently based on the current PM implementation timer. Let's apply > > this separately though in case others are hitting this issue./ > We're not doing anything explicit with power management. The device is an > IoT edge gateway with battery backup, normally running on wall power. The > battery is currently used solely to shut down the system cleanly to avoid > filesystem corruption. > > The device tree is configured to keep power in suspend; but the device > should never suspend, so in our case, there is no need to call > wl1271_ps_elp_wakeup() or wl1271_ps_elp_sleep(), as occurs in the patch. Signed-off-by: Sasha Levin --- drivers/net/wireless/ti/wlcore/cmd.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/net/wireless/ti/wlcore/cmd.c b/drivers/net/wireless/ti/wlcore/cmd.c index 96f83f09b8c5..7f4da727bb7b 100644 --- a/drivers/net/wireless/ti/wlcore/cmd.c +++ b/drivers/net/wireless/ti/wlcore/cmd.c @@ -35,7 +35,6 @@ #include "wl12xx_80211.h" #include "cmd.h" #include "event.h" -#include "ps.h" #include "tx.h" #include "hw_ops.h" @@ -192,10 +191,6 @@ int wlcore_cmd_wait_for_event_or_timeout(struct wl1271 *wl, timeout_time = jiffies + msecs_to_jiffies(WL1271_EVENT_TIMEOUT); - ret = wl1271_ps_elp_wakeup(wl); - if (ret < 0) - return ret; - do { if (time_after(jiffies, timeout_time)) { wl1271_debug(DEBUG_CMD, "timeout waiting for event %d", @@ -227,7 +222,6 @@ int wlcore_cmd_wait_for_event_or_timeout(struct wl1271 *wl, } while (!event); out: - wl1271_ps_elp_sleep(wl); kfree(events_vector); return ret; } -- cgit v1.2.3 From d39ebd199a639df704f50db315e93aa45d9ca33c Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 20 Nov 2018 11:39:56 +0000 Subject: net: skb_scrub_packet(): Scrub offload_fwd_mark [ Upstream commit b5dd186d10ba59e6b5ba60e42b3b083df56df6f3 ] When a packet is trapped and the corresponding SKB marked as already-forwarded, it retains this marking even after it is forwarded across veth links into another bridge. There, since it ingresses the bridge over veth, which doesn't have offload_fwd_mark, it triggers a warning in nbp_switchdev_frame_mark(). Then nbp_switchdev_allowed_egress() decides not to allow egress from this bridge through another veth, because the SKB is already marked, and the mark (of 0) of course matches. Thus the packet is incorrectly blocked. Solve by resetting offload_fwd_mark() in skb_scrub_packet(). That function is called from tunnels and also from veth, and thus catches the cases where traffic is forwarded between bridges and transformed in a way that invalidates the marking. Fixes: 6bc506b4fb06 ("bridge: switchdev: Add forward mark support for stacked devices") Fixes: abf4bb6b63d0 ("skbuff: Add the offload_mr_fwd_mark field") Signed-off-by: Petr Machata Suggested-by: Ido Schimmel Acked-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/skbuff.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 68ecb7d71c2b..dca1fed0d7da 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4421,6 +4421,10 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet) nf_reset(skb); nf_reset_trace(skb); +#ifdef CONFIG_NET_SWITCHDEV + skb->offload_fwd_mark = 0; +#endif + if (!xnet) return; -- cgit v1.2.3 From 13a3d8908e54f99a98af3c41f733aab9e93a0c56 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Wed, 28 Nov 2018 14:53:19 +0800 Subject: rapidio/rionet: do not free skb before reading its length [ Upstream commit cfc435198f53a6fa1f656d98466b24967ff457d0 ] skb is freed via dev_kfree_skb_any, however, skb->len is read then. This may result in a use-after-free bug. Fixes: e6161d64263 ("rapidio/rionet: rework driver initialization and removal") Signed-off-by: Pan Bian Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/rionet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c index a31f4610b493..2c2604e3f633 100644 --- a/drivers/net/rionet.c +++ b/drivers/net/rionet.c @@ -216,9 +216,9 @@ static int rionet_start_xmit(struct sk_buff *skb, struct net_device *ndev) * it just report sending a packet to the target * (without actual packet transfer). */ - dev_kfree_skb_any(skb); ndev->stats.tx_packets++; ndev->stats.tx_bytes += skb->len; + dev_kfree_skb_any(skb); } } -- cgit v1.2.3 From 1d3891c724da45955fddc99652387a35926bb5fb Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 28 Nov 2018 16:20:50 +0100 Subject: s390/qeth: fix length check in SNMP processing [ Upstream commit 9a764c1e59684c0358e16ccaafd870629f2cfe67 ] The response for a SNMP request can consist of multiple parts, which the cmd callback stages into a kernel buffer until all parts have been received. If the callback detects that the staging buffer provides insufficient space, it bails out with error. This processing is buggy for the first part of the response - while it initially checks for a length of 'data_len', it later copies an additional amount of 'offsetof(struct qeth_snmp_cmd, data)' bytes. Fix the calculation of 'data_len' for the first part of the response. This also nicely cleans up the memcpy code. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Julian Wiedmann Reviewed-by: Ursula Braun Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/s390/net/qeth_core_main.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index a5e603062ee0..8f77fc0630ce 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -4540,8 +4540,8 @@ static int qeth_snmp_command_cb(struct qeth_card *card, { struct qeth_ipa_cmd *cmd; struct qeth_arp_query_info *qinfo; - struct qeth_snmp_cmd *snmp; unsigned char *data; + void *snmp_data; __u16 data_len; QETH_CARD_TEXT(card, 3, "snpcmdcb"); @@ -4549,7 +4549,6 @@ static int qeth_snmp_command_cb(struct qeth_card *card, cmd = (struct qeth_ipa_cmd *) sdata; data = (unsigned char *)((char *)cmd - reply->offset); qinfo = (struct qeth_arp_query_info *) reply->param; - snmp = &cmd->data.setadapterparms.data.snmp; if (cmd->hdr.return_code) { QETH_CARD_TEXT_(card, 4, "scer1%x", cmd->hdr.return_code); @@ -4562,10 +4561,15 @@ static int qeth_snmp_command_cb(struct qeth_card *card, return 0; } data_len = *((__u16 *)QETH_IPA_PDU_LEN_PDU1(data)); - if (cmd->data.setadapterparms.hdr.seq_no == 1) - data_len -= (__u16)((char *)&snmp->data - (char *)cmd); - else - data_len -= (__u16)((char *)&snmp->request - (char *)cmd); + if (cmd->data.setadapterparms.hdr.seq_no == 1) { + snmp_data = &cmd->data.setadapterparms.data.snmp; + data_len -= offsetof(struct qeth_ipa_cmd, + data.setadapterparms.data.snmp); + } else { + snmp_data = &cmd->data.setadapterparms.data.snmp.request; + data_len -= offsetof(struct qeth_ipa_cmd, + data.setadapterparms.data.snmp.request); + } /* check if there is enough room in userspace */ if ((qinfo->udata_len - qinfo->udata_offset) < data_len) { @@ -4578,16 +4582,9 @@ static int qeth_snmp_command_cb(struct qeth_card *card, QETH_CARD_TEXT_(card, 4, "sseqn%i", cmd->data.setadapterparms.hdr.seq_no); /*copy entries to user buffer*/ - if (cmd->data.setadapterparms.hdr.seq_no == 1) { - memcpy(qinfo->udata + qinfo->udata_offset, - (char *)snmp, - data_len + offsetof(struct qeth_snmp_cmd, data)); - qinfo->udata_offset += offsetof(struct qeth_snmp_cmd, data); - } else { - memcpy(qinfo->udata + qinfo->udata_offset, - (char *)&snmp->request, data_len); - } + memcpy(qinfo->udata + qinfo->udata_offset, snmp_data, data_len); qinfo->udata_offset += data_len; + /* check if all replies received ... */ QETH_CARD_TEXT_(card, 4, "srtot%i", cmd->data.setadapterparms.hdr.used_total); -- cgit v1.2.3 From ff67a7d34bc5015b50856ae458d3e7c89d2acda0 Mon Sep 17 00:00:00 2001 From: Bernd Eckstein <3erndeckstein@gmail.com> Date: Fri, 23 Nov 2018 13:51:26 +0100 Subject: usbnet: ipheth: fix potential recvmsg bug and recvmsg bug 2 [ Upstream commit 45611c61dd503454b2edae00aabe1e429ec49ebe ] The bug is not easily reproducable, as it may occur very infrequently (we had machines with 20minutes heavy downloading before it occurred) However, on a virual machine (VMWare on Windows 10 host) it occurred pretty frequently (1-2 seconds after a speedtest was started) dev->tx_skb mab be freed via dev_kfree_skb_irq on a callback before it is set. This causes the following problems: - double free of the skb or potential memory leak - in dmesg: 'recvmsg bug' and 'recvmsg bug 2' and eventually general protection fault Example dmesg output: [ 134.841986] ------------[ cut here ]------------ [ 134.841987] recvmsg bug: copied 9C24A555 seq 9C24B557 rcvnxt 9C25A6B3 fl 0 [ 134.841993] WARNING: CPU: 7 PID: 2629 at /build/linux-hwe-On9fm7/linux-hwe-4.15.0/net/ipv4/tcp.c:1865 tcp_recvmsg+0x44d/0xab0 [ 134.841994] Modules linked in: ipheth(OE) kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc aesni_intel aes_x86_64 crypto_simd glue_helper cryptd vmw_balloon intel_rapl_perf joydev input_leds serio_raw vmw_vsock_vmci_transport vsock shpchp i2c_piix4 mac_hid binfmt_misc vmw_vmci parport_pc ppdev lp parport autofs4 vmw_pvscsi vmxnet3 hid_generic usbhid hid vmwgfx ttm drm_kms_helper syscopyarea sysfillrect mptspi mptscsih sysimgblt ahci psmouse fb_sys_fops pata_acpi mptbase libahci e1000 drm scsi_transport_spi [ 134.842046] CPU: 7 PID: 2629 Comm: python Tainted: G W OE 4.15.0-34-generic #37~16.04.1-Ubuntu [ 134.842046] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 05/19/2017 [ 134.842048] RIP: 0010:tcp_recvmsg+0x44d/0xab0 [ 134.842048] RSP: 0018:ffffa6630422bcc8 EFLAGS: 00010286 [ 134.842049] RAX: 0000000000000000 RBX: ffff997616f4f200 RCX: 0000000000000006 [ 134.842049] RDX: 0000000000000007 RSI: 0000000000000082 RDI: ffff9976257d6490 [ 134.842050] RBP: ffffa6630422bd98 R08: 0000000000000001 R09: 000000000004bba4 [ 134.842050] R10: 0000000001e00c6f R11: 000000000004bba4 R12: ffff99760dee3000 [ 134.842051] R13: 0000000000000000 R14: ffff99760dee3514 R15: 0000000000000000 [ 134.842051] FS: 00007fe332347700(0000) GS:ffff9976257c0000(0000) knlGS:0000000000000000 [ 134.842052] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 134.842053] CR2: 0000000001e41000 CR3: 000000020e9b4006 CR4: 00000000003606e0 [ 134.842055] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 134.842055] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 134.842057] Call Trace: [ 134.842060] ? aa_sk_perm+0x53/0x1a0 [ 134.842064] inet_recvmsg+0x51/0xc0 [ 134.842066] sock_recvmsg+0x43/0x50 [ 134.842070] SYSC_recvfrom+0xe4/0x160 [ 134.842072] ? __schedule+0x3de/0x8b0 [ 134.842075] ? ktime_get_ts64+0x4c/0xf0 [ 134.842079] SyS_recvfrom+0xe/0x10 [ 134.842082] do_syscall_64+0x73/0x130 [ 134.842086] entry_SYSCALL_64_after_hwframe+0x3d/0xa2 [ 134.842086] RIP: 0033:0x7fe331f5a81d [ 134.842088] RSP: 002b:00007ffe8da98398 EFLAGS: 00000246 ORIG_RAX: 000000000000002d [ 134.842090] RAX: ffffffffffffffda RBX: ffffffffffffffff RCX: 00007fe331f5a81d [ 134.842094] RDX: 00000000000003fb RSI: 0000000001e00874 RDI: 0000000000000003 [ 134.842095] RBP: 00007fe32f642c70 R08: 0000000000000000 R09: 0000000000000000 [ 134.842097] R10: 0000000000000000 R11: 0000000000000246 R12: 00007fe332347698 [ 134.842099] R13: 0000000001b7e0a0 R14: 0000000001e00874 R15: 0000000000000000 [ 134.842103] Code: 24 fd ff ff e9 cc fe ff ff 48 89 d8 41 8b 8c 24 10 05 00 00 44 8b 45 80 48 c7 c7 08 bd 59 8b 48 89 85 68 ff ff ff e8 b3 c4 7d ff <0f> 0b 48 8b 85 68 ff ff ff e9 e9 fe ff ff 41 8b 8c 24 10 05 00 [ 134.842126] ---[ end trace b7138fc08c83147f ]--- [ 134.842144] general protection fault: 0000 [#1] SMP PTI [ 134.842145] Modules linked in: ipheth(OE) kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc aesni_intel aes_x86_64 crypto_simd glue_helper cryptd vmw_balloon intel_rapl_perf joydev input_leds serio_raw vmw_vsock_vmci_transport vsock shpchp i2c_piix4 mac_hid binfmt_misc vmw_vmci parport_pc ppdev lp parport autofs4 vmw_pvscsi vmxnet3 hid_generic usbhid hid vmwgfx ttm drm_kms_helper syscopyarea sysfillrect mptspi mptscsih sysimgblt ahci psmouse fb_sys_fops pata_acpi mptbase libahci e1000 drm scsi_transport_spi [ 134.842161] CPU: 7 PID: 2629 Comm: python Tainted: G W OE 4.15.0-34-generic #37~16.04.1-Ubuntu [ 134.842162] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 05/19/2017 [ 134.842164] RIP: 0010:tcp_close+0x2c6/0x440 [ 134.842165] RSP: 0018:ffffa6630422bde8 EFLAGS: 00010202 [ 134.842167] RAX: 0000000000000000 RBX: ffff99760dee3000 RCX: 0000000180400034 [ 134.842168] RDX: 5c4afd407207a6c4 RSI: ffffe868495bd300 RDI: ffff997616f4f200 [ 134.842169] RBP: ffffa6630422be08 R08: 0000000016f4d401 R09: 0000000180400034 [ 134.842169] R10: ffffa6630422bd98 R11: 0000000000000000 R12: 000000000000600c [ 134.842170] R13: 0000000000000000 R14: ffff99760dee30c8 R15: ffff9975bd44fe00 [ 134.842171] FS: 00007fe332347700(0000) GS:ffff9976257c0000(0000) knlGS:0000000000000000 [ 134.842173] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 134.842174] CR2: 0000000001e41000 CR3: 000000020e9b4006 CR4: 00000000003606e0 [ 134.842177] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 134.842178] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 134.842179] Call Trace: [ 134.842181] inet_release+0x42/0x70 [ 134.842183] __sock_release+0x42/0xb0 [ 134.842184] sock_close+0x15/0x20 [ 134.842187] __fput+0xea/0x220 [ 134.842189] ____fput+0xe/0x10 [ 134.842191] task_work_run+0x8a/0xb0 [ 134.842193] exit_to_usermode_loop+0xc4/0xd0 [ 134.842195] do_syscall_64+0xf4/0x130 [ 134.842197] entry_SYSCALL_64_after_hwframe+0x3d/0xa2 [ 134.842197] RIP: 0033:0x7fe331f5a560 [ 134.842198] RSP: 002b:00007ffe8da982e8 EFLAGS: 00000246 ORIG_RAX: 0000000000000003 [ 134.842200] RAX: 0000000000000000 RBX: 00007fe32f642c70 RCX: 00007fe331f5a560 [ 134.842201] RDX: 00000000008f5320 RSI: 0000000001cd4b50 RDI: 0000000000000003 [ 134.842202] RBP: 00007fe32f6500f8 R08: 000000000000003c R09: 00000000009343c0 [ 134.842203] R10: 0000000000000000 R11: 0000000000000246 R12: 00007fe32f6500d0 [ 134.842204] R13: 00000000008f5320 R14: 00000000008f5320 R15: 0000000001cd4770 [ 134.842205] Code: c8 00 00 00 45 31 e4 49 39 fe 75 4d eb 50 83 ab d8 00 00 00 01 48 8b 17 48 8b 47 08 48 c7 07 00 00 00 00 48 c7 47 08 00 00 00 00 <48> 89 42 08 48 89 10 0f b6 57 34 8b 47 2c 2b 47 28 83 e2 01 80 [ 134.842226] RIP: tcp_close+0x2c6/0x440 RSP: ffffa6630422bde8 [ 134.842227] ---[ end trace b7138fc08c831480 ]--- The proposed patch eliminates a potential racing condition. Before, usb_submit_urb was called and _after_ that, the skb was attached (dev->tx_skb). So, on a callback it was possible, however unlikely that the skb was freed before it was set. That way (because dev->tx_skb was not set to NULL after it was freed), it could happen that a skb from a earlier transmission was freed a second time (and the skb we should have freed did not get freed at all) Now we free the skb directly in ipheth_tx(). It is not passed to the callback anymore, eliminating the posibility of a double free of the same skb. Depending on the retval of usb_submit_urb() we use dev_kfree_skb_any() respectively dev_consume_skb_any() to free the skb. Signed-off-by: Oliver Zweigle Signed-off-by: Bernd Eckstein <3ernd.Eckstein@gmail.com> Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/ipheth.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c index 76465b117b72..f1f8227e7342 100644 --- a/drivers/net/usb/ipheth.c +++ b/drivers/net/usb/ipheth.c @@ -140,7 +140,6 @@ struct ipheth_device { struct usb_device *udev; struct usb_interface *intf; struct net_device *net; - struct sk_buff *tx_skb; struct urb *tx_urb; struct urb *rx_urb; unsigned char *tx_buf; @@ -229,6 +228,7 @@ static void ipheth_rcvbulk_callback(struct urb *urb) case -ENOENT: case -ECONNRESET: case -ESHUTDOWN: + case -EPROTO: return; case 0: break; @@ -280,7 +280,6 @@ static void ipheth_sndbulk_callback(struct urb *urb) dev_err(&dev->intf->dev, "%s: urb status: %d\n", __func__, status); - dev_kfree_skb_irq(dev->tx_skb); netif_wake_queue(dev->net); } @@ -410,7 +409,7 @@ static int ipheth_tx(struct sk_buff *skb, struct net_device *net) if (skb->len > IPHETH_BUF_SIZE) { WARN(1, "%s: skb too large: %d bytes\n", __func__, skb->len); dev->net->stats.tx_dropped++; - dev_kfree_skb_irq(skb); + dev_kfree_skb_any(skb); return NETDEV_TX_OK; } @@ -430,12 +429,11 @@ static int ipheth_tx(struct sk_buff *skb, struct net_device *net) dev_err(&dev->intf->dev, "%s: usb_submit_urb: %d\n", __func__, retval); dev->net->stats.tx_errors++; - dev_kfree_skb_irq(skb); + dev_kfree_skb_any(skb); } else { - dev->tx_skb = skb; - dev->net->stats.tx_packets++; dev->net->stats.tx_bytes += skb->len; + dev_consume_skb_any(skb); netif_stop_queue(net); } -- cgit v1.2.3 From aa716434a05907a6302e008a24d8dcdd6e7d47bf Mon Sep 17 00:00:00 2001 From: Junaid Shahid Date: Wed, 31 Oct 2018 14:53:57 -0700 Subject: kvm: mmu: Fix race in emulated page table writes commit 0e0fee5c539b61fdd098332e0e2cc375d9073706 upstream. When a guest page table is updated via an emulated write, kvm_mmu_pte_write() is called to update the shadow PTE using the just written guest PTE value. But if two emulated guest PTE writes happened concurrently, it is possible that the guest PTE and the shadow PTE end up being out of sync. Emulated writes do not mark the shadow page as unsync-ed, so this inconsistency will not be resolved even by a guest TLB flush (unless the page was marked as unsync-ed at some other point). This is fixed by re-reading the current value of the guest PTE after the MMU lock has been acquired instead of just using the value that was written prior to calling kvm_mmu_pte_write(). Signed-off-by: Junaid Shahid Reviewed-by: Wanpeng Li Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/mmu.c | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 8a4d6bc8fed0..676edfc19a95 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -4297,9 +4297,9 @@ static bool need_remote_flush(u64 old, u64 new) } static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, - const u8 *new, int *bytes) + int *bytes) { - u64 gentry; + u64 gentry = 0; int r; /* @@ -4311,22 +4311,12 @@ static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ *gpa &= ~(gpa_t)7; *bytes = 8; - r = kvm_vcpu_read_guest(vcpu, *gpa, &gentry, 8); - if (r) - gentry = 0; - new = (const u8 *)&gentry; } - switch (*bytes) { - case 4: - gentry = *(const u32 *)new; - break; - case 8: - gentry = *(const u64 *)new; - break; - default: - gentry = 0; - break; + if (*bytes == 4 || *bytes == 8) { + r = kvm_vcpu_read_guest_atomic(vcpu, *gpa, &gentry, *bytes); + if (r) + gentry = 0; } return gentry; @@ -4437,8 +4427,6 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); - gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, new, &bytes); - /* * No need to care whether allocation memory is successful * or not since pte prefetch is skiped if it does not have @@ -4447,6 +4435,9 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, mmu_topup_memory_caches(vcpu); spin_lock(&vcpu->kvm->mmu_lock); + + gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, &bytes); + ++vcpu->kvm->stat.mmu_pte_write; kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE); -- cgit v1.2.3 From 43dd9f48871e6765972182a048ab4e0ecc24c712 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Tue, 22 May 2018 09:54:20 -0700 Subject: kvm: svm: Ensure an IBPB on all affected CPUs when freeing a vmcb commit fd65d3142f734bc4376053c8d75670041903134d upstream. Previously, we only called indirect_branch_prediction_barrier on the logical CPU that freed a vmcb. This function should be called on all logical CPUs that last loaded the vmcb in question. Fixes: 15d45071523d ("KVM/x86: Add IBPB support") Reported-by: Neel Natu Signed-off-by: Jim Mattson Reviewed-by: Konrad Rzeszutek Wilk Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 5f44d63a9d69..4bc35ac28d11 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1672,21 +1672,31 @@ out: return ERR_PTR(err); } +static void svm_clear_current_vmcb(struct vmcb *vmcb) +{ + int i; + + for_each_online_cpu(i) + cmpxchg(&per_cpu(svm_data, i)->current_vmcb, vmcb, NULL); +} + static void svm_free_vcpu(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + /* + * The vmcb page can be recycled, causing a false negative in + * svm_vcpu_load(). So, ensure that no logical CPU has this + * vmcb page recorded as its current vmcb. + */ + svm_clear_current_vmcb(svm->vmcb); + __free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT)); __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER); __free_page(virt_to_page(svm->nested.hsave)); __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER); kvm_vcpu_uninit(vcpu); kmem_cache_free(kvm_vcpu_cache, svm); - /* - * The vmcb page can be recycled, causing a false negative in - * svm_vcpu_load(). So do a full IBPB now. - */ - indirect_branch_prediction_barrier(); } static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) -- cgit v1.2.3 From 3a468e8e5a6124523e2e94c33866c609cc914876 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Tue, 20 Nov 2018 16:34:18 +0800 Subject: KVM: X86: Fix scan ioapic use-before-initialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e97f852fd4561e77721bb9a4e0ea9d98305b1e93 upstream. Reported by syzkaller: BUG: unable to handle kernel NULL pointer dereference at 00000000000001c8 PGD 80000003ec4da067 P4D 80000003ec4da067 PUD 3f7bfa067 PMD 0 Oops: 0000 [#1] PREEMPT SMP PTI CPU: 7 PID: 5059 Comm: debug Tainted: G OE 4.19.0-rc5 #16 RIP: 0010:__lock_acquire+0x1a6/0x1990 Call Trace: lock_acquire+0xdb/0x210 _raw_spin_lock+0x38/0x70 kvm_ioapic_scan_entry+0x3e/0x110 [kvm] vcpu_enter_guest+0x167e/0x1910 [kvm] kvm_arch_vcpu_ioctl_run+0x35c/0x610 [kvm] kvm_vcpu_ioctl+0x3e9/0x6d0 [kvm] do_vfs_ioctl+0xa5/0x690 ksys_ioctl+0x6d/0x80 __x64_sys_ioctl+0x1a/0x20 do_syscall_64+0x83/0x6e0 entry_SYSCALL_64_after_hwframe+0x49/0xbe The reason is that the testcase writes hyperv synic HV_X64_MSR_SINT6 msr and triggers scan ioapic logic to load synic vectors into EOI exit bitmap. However, irqchip is not initialized by this simple testcase, ioapic/apic objects should not be accessed. This can be triggered by the following program: #define _GNU_SOURCE #include #include #include #include #include #include #include #include uint64_t r[3] = {0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff}; int main(void) { syscall(__NR_mmap, 0x20000000, 0x1000000, 3, 0x32, -1, 0); long res = 0; memcpy((void*)0x20000040, "/dev/kvm", 9); res = syscall(__NR_openat, 0xffffffffffffff9c, 0x20000040, 0, 0); if (res != -1) r[0] = res; res = syscall(__NR_ioctl, r[0], 0xae01, 0); if (res != -1) r[1] = res; res = syscall(__NR_ioctl, r[1], 0xae41, 0); if (res != -1) r[2] = res; memcpy( (void*)0x20000080, "\x01\x00\x00\x00\x00\x5b\x61\xbb\x96\x00\x00\x40\x00\x00\x00\x00\x01\x00" "\x08\x00\x00\x00\x00\x00\x0b\x77\xd1\x78\x4d\xd8\x3a\xed\xb1\x5c\x2e\x43" "\xaa\x43\x39\xd6\xff\xf5\xf0\xa8\x98\xf2\x3e\x37\x29\x89\xde\x88\xc6\x33" "\xfc\x2a\xdb\xb7\xe1\x4c\xac\x28\x61\x7b\x9c\xa9\xbc\x0d\xa0\x63\xfe\xfe" "\xe8\x75\xde\xdd\x19\x38\xdc\x34\xf5\xec\x05\xfd\xeb\x5d\xed\x2e\xaf\x22" "\xfa\xab\xb7\xe4\x42\x67\xd0\xaf\x06\x1c\x6a\x35\x67\x10\x55\xcb", 106); syscall(__NR_ioctl, r[2], 0x4008ae89, 0x20000080); syscall(__NR_ioctl, r[2], 0xae80, 0); return 0; } This patch fixes it by bailing out scan ioapic if ioapic is not initialized in kernel. Reported-by: Wei Wu Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Wei Wu Signed-off-by: Wanpeng Li Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5013ef165f44..27d13b870e07 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6661,7 +6661,8 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) else { if (vcpu->arch.apicv_active) kvm_x86_ops->sync_pir_to_irr(vcpu); - kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors); + if (ioapic_in_kernel(vcpu->kvm)) + kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors); } bitmap_or((ulong *)eoi_exit_bitmap, vcpu->arch.ioapic_handled_vectors, vcpu_to_synic(vcpu)->vec_bitmap, 256); -- cgit v1.2.3 From c26e3c6c2dc45d4d580b33bb754e71d6deaa4e5a Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Mon, 26 Nov 2018 13:29:41 -0800 Subject: xtensa: enable coprocessors that are being flushed commit 2958b66694e018c552be0b60521fec27e8d12988 upstream. coprocessor_flush_all may be called from a context of a thread that is different from the thread being flushed. In that case contents of the cpenable special register may not match ti->cpenable of the target thread, resulting in unhandled coprocessor exception in the kernel context. Set cpenable special register to the ti->cpenable of the target register for the duration of the flush and restore it afterwards. This fixes the following crash caused by coprocessor register inspection in native gdb: (gdb) p/x $w0 Illegal instruction in kernel: sig: 9 [#1] PREEMPT Call Trace: ___might_sleep+0x184/0x1a4 __might_sleep+0x41/0xac exit_signals+0x14/0x218 do_exit+0xc9/0x8b8 die+0x99/0xa0 do_illegal_instruction+0x18/0x6c common_exception+0x77/0x77 coprocessor_flush+0x16/0x3c arch_ptrace+0x46c/0x674 sys_ptrace+0x2ce/0x3b4 system_call+0x54/0x80 common_exception+0x77/0x77 note: gdb[100] exited with preempt_count 1 Killed Cc: stable@vger.kernel.org Signed-off-by: Max Filippov Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/kernel/process.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c index e0ded48561db..570307c91846 100644 --- a/arch/xtensa/kernel/process.c +++ b/arch/xtensa/kernel/process.c @@ -85,18 +85,21 @@ void coprocessor_release_all(struct thread_info *ti) void coprocessor_flush_all(struct thread_info *ti) { - unsigned long cpenable; + unsigned long cpenable, old_cpenable; int i; preempt_disable(); + RSR_CPENABLE(old_cpenable); cpenable = ti->cpenable; + WSR_CPENABLE(cpenable); for (i = 0; i < XCHAL_CP_MAX; i++) { if ((cpenable & 1) != 0 && coprocessor_owner[i] == ti) coprocessor_flush(ti, i); cpenable >>= 1; } + WSR_CPENABLE(old_cpenable); preempt_enable(); } -- cgit v1.2.3 From f403887502a876ec7efaf636886e5fb8183928fe Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Mon, 26 Nov 2018 15:18:26 -0800 Subject: xtensa: fix coprocessor context offset definitions commit 03bc996af0cc71c7f30c384d8ce7260172423b34 upstream. Coprocessor context offsets are used by the assembly code that moves coprocessor context between the individual fields of the thread_info::xtregs_cp structure and coprocessor registers. This fixes coprocessor context clobbering on flushing and reloading during normal user code execution and user process debugging in the presence of more than one coprocessor in the core configuration. Cc: stable@vger.kernel.org Signed-off-by: Max Filippov Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/kernel/asm-offsets.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/xtensa/kernel/asm-offsets.c b/arch/xtensa/kernel/asm-offsets.c index 8e10e357ee32..f1af06b8f3cd 100644 --- a/arch/xtensa/kernel/asm-offsets.c +++ b/arch/xtensa/kernel/asm-offsets.c @@ -91,14 +91,14 @@ int main(void) DEFINE(THREAD_SP, offsetof (struct task_struct, thread.sp)); DEFINE(THREAD_CPENABLE, offsetof (struct thread_info, cpenable)); #if XTENSA_HAVE_COPROCESSORS - DEFINE(THREAD_XTREGS_CP0, offsetof (struct thread_info, xtregs_cp)); - DEFINE(THREAD_XTREGS_CP1, offsetof (struct thread_info, xtregs_cp)); - DEFINE(THREAD_XTREGS_CP2, offsetof (struct thread_info, xtregs_cp)); - DEFINE(THREAD_XTREGS_CP3, offsetof (struct thread_info, xtregs_cp)); - DEFINE(THREAD_XTREGS_CP4, offsetof (struct thread_info, xtregs_cp)); - DEFINE(THREAD_XTREGS_CP5, offsetof (struct thread_info, xtregs_cp)); - DEFINE(THREAD_XTREGS_CP6, offsetof (struct thread_info, xtregs_cp)); - DEFINE(THREAD_XTREGS_CP7, offsetof (struct thread_info, xtregs_cp)); + DEFINE(THREAD_XTREGS_CP0, offsetof(struct thread_info, xtregs_cp.cp0)); + DEFINE(THREAD_XTREGS_CP1, offsetof(struct thread_info, xtregs_cp.cp1)); + DEFINE(THREAD_XTREGS_CP2, offsetof(struct thread_info, xtregs_cp.cp2)); + DEFINE(THREAD_XTREGS_CP3, offsetof(struct thread_info, xtregs_cp.cp3)); + DEFINE(THREAD_XTREGS_CP4, offsetof(struct thread_info, xtregs_cp.cp4)); + DEFINE(THREAD_XTREGS_CP5, offsetof(struct thread_info, xtregs_cp.cp5)); + DEFINE(THREAD_XTREGS_CP6, offsetof(struct thread_info, xtregs_cp.cp6)); + DEFINE(THREAD_XTREGS_CP7, offsetof(struct thread_info, xtregs_cp.cp7)); #endif DEFINE(THREAD_XTREGS_USER, offsetof (struct thread_info, xtregs_user)); DEFINE(XTREGS_USER_SIZE, sizeof(xtregs_user_t)); -- cgit v1.2.3 From 6e1210e2850d94fa7c4b298b31fd40a913b6d17a Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 14 Nov 2018 11:35:24 +0000 Subject: Btrfs: ensure path name is null terminated at btrfs_control_ioctl commit f505754fd6599230371cb01b9332754ddc104be1 upstream. We were using the path name received from user space without checking that it is null terminated. While btrfs-progs is well behaved and does proper validation and null termination, someone could call the ioctl and pass a non-null terminated patch, leading to buffer overrun problems in the kernel. The ioctl is protected by CAP_SYS_ADMIN. So just set the last byte of the path to a null character, similar to what we do in other ioctls (add/remove/resize device, snapshot creation, etc). CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Anand Jain Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index f6e111984ce2..a7b69deb6d70 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -2226,6 +2226,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd, vol = memdup_user((void __user *)arg, sizeof(*vol)); if (IS_ERR(vol)) return PTR_ERR(vol); + vol->name[BTRFS_PATH_NAME_MAX] = '\0'; switch (cmd) { case BTRFS_IOC_SCAN_DEV: -- cgit v1.2.3 From 08c133e86be21e0598e2203184915c15636d1103 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 21 Nov 2018 11:16:10 +0100 Subject: perf/x86/intel: Move branch tracing setup to the Intel-specific source file commit ed6101bbf6266ee83e620b19faa7c6ad56bb41ab upstream. Moving branch tracing setup to Intel core object into separate intel_pmu_bts_config function, because it's Intel specific. Suggested-by: Peter Zijlstra Signed-off-by: Jiri Olsa Acked-by: Peter Zijlstra Cc: Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/20181121101612.16272-1-jolsa@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/core.c | 20 -------------------- arch/x86/events/intel/core.c | 41 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 40 insertions(+), 21 deletions(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 655a65eaf105..cadf99923600 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -437,26 +437,6 @@ int x86_setup_perfctr(struct perf_event *event) if (config == -1LL) return -EINVAL; - /* - * Branch tracing: - */ - if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS && - !attr->freq && hwc->sample_period == 1) { - /* BTS is not supported by this architecture. */ - if (!x86_pmu.bts_active) - return -EOPNOTSUPP; - - /* BTS is currently only allowed for user-mode. */ - if (!attr->exclude_kernel) - return -EOPNOTSUPP; - - /* disallow bts if conflicting events are present */ - if (x86_add_exclusive(x86_lbr_exclusive_lbr)) - return -EBUSY; - - event->destroy = hw_perf_lbr_event_destroy; - } - hwc->config |= config; return 0; diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 815039327932..369006b9ef3c 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2822,10 +2822,49 @@ static unsigned long intel_pmu_free_running_flags(struct perf_event *event) return flags; } +static int intel_pmu_bts_config(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + struct hw_perf_event *hwc = &event->hw; + + if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS && + !attr->freq && hwc->sample_period == 1) { + /* BTS is not supported by this architecture. */ + if (!x86_pmu.bts_active) + return -EOPNOTSUPP; + + /* BTS is currently only allowed for user-mode. */ + if (!attr->exclude_kernel) + return -EOPNOTSUPP; + + /* disallow bts if conflicting events are present */ + if (x86_add_exclusive(x86_lbr_exclusive_lbr)) + return -EBUSY; + + event->destroy = hw_perf_lbr_event_destroy; + } + + return 0; +} + +static int core_pmu_hw_config(struct perf_event *event) +{ + int ret = x86_pmu_hw_config(event); + + if (ret) + return ret; + + return intel_pmu_bts_config(event); +} + static int intel_pmu_hw_config(struct perf_event *event) { int ret = x86_pmu_hw_config(event); + if (ret) + return ret; + + ret = intel_pmu_bts_config(event); if (ret) return ret; @@ -3265,7 +3304,7 @@ static __initconst const struct x86_pmu core_pmu = { .enable_all = core_pmu_enable_all, .enable = core_pmu_enable_event, .disable = x86_pmu_disable_event, - .hw_config = x86_pmu_hw_config, + .hw_config = core_pmu_hw_config, .schedule_events = x86_schedule_events, .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, .perfctr = MSR_ARCH_PERFMON_PERFCTR0, -- cgit v1.2.3 From 54f738293d30143da54c592d751a446d375f1c30 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 21 Nov 2018 11:16:11 +0100 Subject: perf/x86/intel: Add generic branch tracing check to intel_pmu_has_bts() commit 67266c1080ad56c31af72b9c18355fde8ccc124a upstream. Currently we check the branch tracing only by checking for the PERF_COUNT_HW_BRANCH_INSTRUCTIONS event of PERF_TYPE_HARDWARE type. But we can define the same event with the PERF_TYPE_RAW type. Changing the intel_pmu_has_bts() code to check on event's final hw config value, so both HW types are covered. Adding unlikely to intel_pmu_has_bts() condition calls, because it was used in the original code in intel_bts_constraints. Signed-off-by: Jiri Olsa Acked-by: Peter Zijlstra Cc: Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/20181121101612.16272-2-jolsa@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/intel/core.c | 17 +++-------------- arch/x86/events/perf_event.h | 13 +++++++++---- 2 files changed, 12 insertions(+), 18 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 369006b9ef3c..4f8560774082 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2198,16 +2198,7 @@ done: static struct event_constraint * intel_bts_constraints(struct perf_event *event) { - struct hw_perf_event *hwc = &event->hw; - unsigned int hw_event, bts_event; - - if (event->attr.freq) - return NULL; - - hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; - bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); - - if (unlikely(hw_event == bts_event && hwc->sample_period == 1)) + if (unlikely(intel_pmu_has_bts(event))) return &bts_constraint; return NULL; @@ -2825,10 +2816,8 @@ static unsigned long intel_pmu_free_running_flags(struct perf_event *event) static int intel_pmu_bts_config(struct perf_event *event) { struct perf_event_attr *attr = &event->attr; - struct hw_perf_event *hwc = &event->hw; - if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS && - !attr->freq && hwc->sample_period == 1) { + if (unlikely(intel_pmu_has_bts(event))) { /* BTS is not supported by this architecture. */ if (!x86_pmu.bts_active) return -EOPNOTSUPP; @@ -2887,7 +2876,7 @@ static int intel_pmu_hw_config(struct perf_event *event) /* * BTS is set up earlier in this path, so don't account twice */ - if (!intel_pmu_has_bts(event)) { + if (!unlikely(intel_pmu_has_bts(event))) { /* disallow lbr if conflicting events are present */ if (x86_add_exclusive(x86_lbr_exclusive_lbr)) return -EBUSY; diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 1bfebbc4d156..7ace39c51ff7 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -835,11 +835,16 @@ static inline int amd_pmu_init(void) static inline bool intel_pmu_has_bts(struct perf_event *event) { - if (event->attr.config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS && - !event->attr.freq && event->hw.sample_period == 1) - return true; + struct hw_perf_event *hwc = &event->hw; + unsigned int hw_event, bts_event; + + if (event->attr.freq) + return false; + + hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; + bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); - return false; + return hw_event == bts_event && hwc->sample_period == 1; } int intel_pmu_save_and_restart(struct perf_event *event); -- cgit v1.2.3 From 55eb06b7728f28b0197fa8977094fcaa38d38ca4 Mon Sep 17 00:00:00 2001 From: Maximilian Heyne Date: Fri, 30 Nov 2018 08:35:14 -0700 Subject: fs: fix lost error code in dio_complete commit 41e817bca3acd3980efe5dd7d28af0e6f4ab9247 upstream. commit e259221763a40403d5bb232209998e8c45804ab8 ("fs: simplify the generic_write_sync prototype") reworked callers of generic_write_sync(), and ended up dropping the error return for the directio path. Prior to that commit, in dio_complete(), an error would be bubbled up the stack, but after that commit, errors passed on to dio_complete were eaten up. This was reported on the list earlier, and a fix was proposed in https://lore.kernel.org/lkml/20160921141539.GA17898@infradead.org/, but never followed up with. We recently hit this bug in our testing where fencing io errors, which were previously erroring out with EIO, were being returned as success operations after this commit. The fix proposed on the list earlier was a little short -- it would have still called generic_write_sync() in case `ret` already contained an error. This fix ensures generic_write_sync() is only called when there's no pending error in the write. Additionally, transferred is replaced with ret to bring this code in line with other callers. Fixes: e259221763a4 ("fs: simplify the generic_write_sync prototype") Reported-by: Ravi Nankani Signed-off-by: Maximilian Heyne Reviewed-by: Christoph Hellwig CC: Torsten Mehlan CC: Uwe Dannowski CC: Amit Shah CC: David Woodhouse CC: stable@vger.kernel.org Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- fs/direct-io.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/direct-io.c b/fs/direct-io.c index c6220a2daefd..07cc38ec66ca 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -278,8 +278,8 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async) */ dio->iocb->ki_pos += transferred; - if (dio->op == REQ_OP_WRITE) - ret = generic_write_sync(dio->iocb, transferred); + if (ret > 0 && dio->op == REQ_OP_WRITE) + ret = generic_write_sync(dio->iocb, ret); dio->iocb->ki_complete(dio->iocb, ret, 0); } -- cgit v1.2.3 From 12b2efff8874a5ff359b8bb087b7f2cd383fc76c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 23 Nov 2018 18:16:33 +0100 Subject: ALSA: wss: Fix invalid snd_free_pages() at error path commit 7b69154171b407844c273ab4c10b5f0ddcd6aa29 upstream. Some spurious calls of snd_free_pages() have been overlooked and remain in the error paths of wss driver code. Since runtime->dma_area is managed by the PCM core helper, we shouldn't release manually. Drop the superfluous calls. Reviewed-by: Takashi Sakamoto Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/isa/wss/wss_lib.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/sound/isa/wss/wss_lib.c b/sound/isa/wss/wss_lib.c index 913b731d2236..f40330ddb9b2 100644 --- a/sound/isa/wss/wss_lib.c +++ b/sound/isa/wss/wss_lib.c @@ -1531,7 +1531,6 @@ static int snd_wss_playback_open(struct snd_pcm_substream *substream) if (err < 0) { if (chip->release_dma) chip->release_dma(chip, chip->dma_private_data, chip->dma1); - snd_free_pages(runtime->dma_area, runtime->dma_bytes); return err; } chip->playback_substream = substream; @@ -1572,7 +1571,6 @@ static int snd_wss_capture_open(struct snd_pcm_substream *substream) if (err < 0) { if (chip->release_dma) chip->release_dma(chip, chip->dma_private_data, chip->dma2); - snd_free_pages(runtime->dma_area, runtime->dma_bytes); return err; } chip->capture_substream = substream; -- cgit v1.2.3 From 5d3201bbffe0c45a6de1a52732733403cc8ceca2 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 23 Nov 2018 15:44:00 +0100 Subject: ALSA: ac97: Fix incorrect bit shift at AC97-SPSA control write commit 7194eda1ba0872d917faf3b322540b4f57f11ba5 upstream. The function snd_ac97_put_spsa() gets the bit shift value from the associated private_value, but it extracts too much; the current code extracts 8 bit values in bits 8-15, but this is a combination of two nibbles (bits 8-11 and bits 12-15) for left and right shifts. Due to the incorrect bits extraction, the actual shift may go beyond the 32bit value, as spotted recently by UBSAN check: UBSAN: Undefined behaviour in sound/pci/ac97/ac97_codec.c:836:7 shift exponent 68 is too large for 32-bit type 'int' This patch fixes the shift value extraction by masking the properly with 0x0f instead of 0xff. Reported-and-tested-by: Meelis Roos Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/ac97/ac97_codec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/ac97/ac97_codec.c b/sound/pci/ac97/ac97_codec.c index 82259ca61e64..c4840fda44b4 100644 --- a/sound/pci/ac97/ac97_codec.c +++ b/sound/pci/ac97/ac97_codec.c @@ -824,7 +824,7 @@ static int snd_ac97_put_spsa(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_ { struct snd_ac97 *ac97 = snd_kcontrol_chip(kcontrol); int reg = kcontrol->private_value & 0xff; - int shift = (kcontrol->private_value >> 8) & 0xff; + int shift = (kcontrol->private_value >> 8) & 0x0f; int mask = (kcontrol->private_value >> 16) & 0xff; // int invert = (kcontrol->private_value >> 24) & 0xff; unsigned short value, old, new; -- cgit v1.2.3 From e3ff60d897dd82b4a6bcbd0c8d3ac708a841af84 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 22 Nov 2018 14:36:17 +0100 Subject: ALSA: control: Fix race between adding and removing a user element commit e1a7bfe3807974e66f971f2589d4e0197ec0fced upstream. The procedure for adding a user control element has some window opened for race against the concurrent removal of a user element. This was caught by syzkaller, hitting a KASAN use-after-free error. This patch addresses the bug by wrapping the whole procedure to add a user control element with the card->controls_rwsem, instead of only around the increment of card->user_ctl_count. This required a slight code refactoring, too. The function snd_ctl_add() is split to two parts: a core function to add the control element and a part calling it. The former is called from the function for adding a user control element inside the controls_rwsem. One change to be noted is that snd_ctl_notify() for adding a control element gets called inside the controls_rwsem as well while it was called outside the rwsem. But this should be OK, as snd_ctl_notify() takes another (finer) rwlock instead of rwsem, and the call of snd_ctl_notify() inside rwsem is already done in another code path. Reported-by: syzbot+dc09047bce3820621ba2@syzkaller.appspotmail.com Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/control.c | 80 +++++++++++++++++++++++++++++----------------------- 1 file changed, 45 insertions(+), 35 deletions(-) diff --git a/sound/core/control.c b/sound/core/control.c index 995cde48c1be..511368fe974e 100644 --- a/sound/core/control.c +++ b/sound/core/control.c @@ -346,6 +346,40 @@ static int snd_ctl_find_hole(struct snd_card *card, unsigned int count) return 0; } +/* add a new kcontrol object; call with card->controls_rwsem locked */ +static int __snd_ctl_add(struct snd_card *card, struct snd_kcontrol *kcontrol) +{ + struct snd_ctl_elem_id id; + unsigned int idx; + unsigned int count; + + id = kcontrol->id; + if (id.index > UINT_MAX - kcontrol->count) + return -EINVAL; + + if (snd_ctl_find_id(card, &id)) { + dev_err(card->dev, + "control %i:%i:%i:%s:%i is already present\n", + id.iface, id.device, id.subdevice, id.name, id.index); + return -EBUSY; + } + + if (snd_ctl_find_hole(card, kcontrol->count) < 0) + return -ENOMEM; + + list_add_tail(&kcontrol->list, &card->controls); + card->controls_count += kcontrol->count; + kcontrol->id.numid = card->last_numid + 1; + card->last_numid += kcontrol->count; + + id = kcontrol->id; + count = kcontrol->count; + for (idx = 0; idx < count; idx++, id.index++, id.numid++) + snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_ADD, &id); + + return 0; +} + /** * snd_ctl_add - add the control instance to the card * @card: the card instance @@ -362,45 +396,18 @@ static int snd_ctl_find_hole(struct snd_card *card, unsigned int count) */ int snd_ctl_add(struct snd_card *card, struct snd_kcontrol *kcontrol) { - struct snd_ctl_elem_id id; - unsigned int idx; - unsigned int count; int err = -EINVAL; if (! kcontrol) return err; if (snd_BUG_ON(!card || !kcontrol->info)) goto error; - id = kcontrol->id; - if (id.index > UINT_MAX - kcontrol->count) - goto error; down_write(&card->controls_rwsem); - if (snd_ctl_find_id(card, &id)) { - up_write(&card->controls_rwsem); - dev_err(card->dev, "control %i:%i:%i:%s:%i is already present\n", - id.iface, - id.device, - id.subdevice, - id.name, - id.index); - err = -EBUSY; - goto error; - } - if (snd_ctl_find_hole(card, kcontrol->count) < 0) { - up_write(&card->controls_rwsem); - err = -ENOMEM; - goto error; - } - list_add_tail(&kcontrol->list, &card->controls); - card->controls_count += kcontrol->count; - kcontrol->id.numid = card->last_numid + 1; - card->last_numid += kcontrol->count; - id = kcontrol->id; - count = kcontrol->count; + err = __snd_ctl_add(card, kcontrol); up_write(&card->controls_rwsem); - for (idx = 0; idx < count; idx++, id.index++, id.numid++) - snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_ADD, &id); + if (err < 0) + goto error; return 0; error: @@ -1354,9 +1361,12 @@ static int snd_ctl_elem_add(struct snd_ctl_file *file, kctl->tlv.c = snd_ctl_elem_user_tlv; /* This function manage to free the instance on failure. */ - err = snd_ctl_add(card, kctl); - if (err < 0) - return err; + down_write(&card->controls_rwsem); + err = __snd_ctl_add(card, kctl); + if (err < 0) { + snd_ctl_free_one(kctl); + goto unlock; + } offset = snd_ctl_get_ioff(kctl, &info->id); snd_ctl_build_ioff(&info->id, kctl, offset); /* @@ -1367,10 +1377,10 @@ static int snd_ctl_elem_add(struct snd_ctl_file *file, * which locks the element. */ - down_write(&card->controls_rwsem); card->user_ctl_count++; - up_write(&card->controls_rwsem); + unlock: + up_write(&card->controls_rwsem); return 0; } -- cgit v1.2.3 From 9c4a8f6f627c2ade859ef71c179052890658c844 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 23 Nov 2018 18:18:30 +0100 Subject: ALSA: sparc: Fix invalid snd_free_pages() at error path commit 9a20332ab373b1f8f947e0a9c923652b32dab031 upstream. Some spurious calls of snd_free_pages() have been overlooked and remain in the error paths of sparc cs4231 driver code. Since runtime->dma_area is managed by the PCM core helper, we shouldn't release manually. Drop the superfluous calls. Reviewed-by: Takashi Sakamoto Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/sparc/cs4231.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/sound/sparc/cs4231.c b/sound/sparc/cs4231.c index 30bdc971883b..017e241b0ec9 100644 --- a/sound/sparc/cs4231.c +++ b/sound/sparc/cs4231.c @@ -1146,10 +1146,8 @@ static int snd_cs4231_playback_open(struct snd_pcm_substream *substream) runtime->hw = snd_cs4231_playback; err = snd_cs4231_open(chip, CS4231_MODE_PLAY); - if (err < 0) { - snd_free_pages(runtime->dma_area, runtime->dma_bytes); + if (err < 0) return err; - } chip->playback_substream = substream; chip->p_periods_sent = 0; snd_pcm_set_sync(substream); @@ -1167,10 +1165,8 @@ static int snd_cs4231_capture_open(struct snd_pcm_substream *substream) runtime->hw = snd_cs4231_capture; err = snd_cs4231_open(chip, CS4231_MODE_RECORD); - if (err < 0) { - snd_free_pages(runtime->dma_area, runtime->dma_bytes); + if (err < 0) return err; - } chip->capture_substream = substream; chip->c_periods_sent = 0; snd_pcm_set_sync(substream); -- cgit v1.2.3 From 4e443d70a1b3aabb4c73425ea0c650c790513e58 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Sun, 25 Nov 2018 08:58:02 +0800 Subject: ext2: fix potential use after free commit ecebf55d27a11538ea84aee0be643dd953f830d5 upstream. The function ext2_xattr_set calls brelse(bh) to drop the reference count of bh. After that, bh may be freed. However, following brelse(bh), it reads bh->b_data via macro HDR(bh). This may result in a use-after-free bug. This patch moves brelse(bh) after reading field. CC: stable@vger.kernel.org Signed-off-by: Pan Bian Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/ext2/xattr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c index fbdb8f171893..babef30d440b 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c @@ -609,9 +609,9 @@ skip_replace: } cleanup: - brelse(bh); if (!(bh && header == HDR(bh))) kfree(header); + brelse(bh); up_write(&EXT2_I(inode)->xattr_sem); return error; -- cgit v1.2.3 From 0daa7fc2c50d4a0ca0f12ee3328d8866a37d01ef Mon Sep 17 00:00:00 2001 From: Richard Genoud Date: Tue, 27 Nov 2018 17:06:34 +0100 Subject: dmaengine: at_hdmac: fix memory leak in at_dma_xlate() commit 98f5f932254b88ce828bc8e4d1642d14e5854caa upstream. The leak was found when opening/closing a serial port a great number of time, increasing kmalloc-32 in slabinfo. Each time the port was opened, dma_request_slave_channel() was called. Then, in at_dma_xlate(), atslave was allocated with devm_kzalloc() and never freed. (Well, it was free at module unload, but that's not what we want). So, here, kzalloc is more suited for the job since it has to be freed in atc_free_chan_resources(). Cc: stable@vger.kernel.org Fixes: bbe89c8e3d59 ("at_hdmac: move to generic DMA binding") Reported-by: Mario Forner Suggested-by: Alexandre Belloni Acked-by: Alexandre Belloni Acked-by: Ludovic Desroches Signed-off-by: Richard Genoud Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/at_hdmac.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index e2cec5b357fd..d3277e13a09e 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -1774,6 +1774,12 @@ static void atc_free_chan_resources(struct dma_chan *chan) atchan->descs_allocated = 0; atchan->status = 0; + /* + * Free atslave allocated in at_dma_xlate() + */ + kfree(chan->private); + chan->private = NULL; + dev_vdbg(chan2dev(chan), "free_chan_resources: done\n"); } @@ -1808,7 +1814,7 @@ static struct dma_chan *at_dma_xlate(struct of_phandle_args *dma_spec, dma_cap_zero(mask); dma_cap_set(DMA_SLAVE, mask); - atslave = devm_kzalloc(&dmac_pdev->dev, sizeof(*atslave), GFP_KERNEL); + atslave = kzalloc(sizeof(*atslave), GFP_KERNEL); if (!atslave) return NULL; -- cgit v1.2.3 From 07d8abace810e8e0af93638b06bcf1e3f6a1845a Mon Sep 17 00:00:00 2001 From: Richard Genoud Date: Tue, 27 Nov 2018 17:06:35 +0100 Subject: dmaengine: at_hdmac: fix module unloading commit 77e75fda94d2ebb86aa9d35fb1860f6395bf95de upstream. of_dma_controller_free() was not called on module onloading. This lead to a soft lockup: watchdog: BUG: soft lockup - CPU#0 stuck for 23s! Modules linked in: at_hdmac [last unloaded: at_hdmac] when of_dma_request_slave_channel() tried to call ofdma->of_dma_xlate(). Cc: stable@vger.kernel.org Fixes: bbe89c8e3d59 ("at_hdmac: move to generic DMA binding") Acked-by: Ludovic Desroches Signed-off-by: Richard Genoud Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/at_hdmac.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index d3277e13a09e..a32cd71f94bb 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -2145,6 +2145,8 @@ static int at_dma_remove(struct platform_device *pdev) struct resource *io; at_dma_off(atdma); + if (pdev->dev.of_node) + of_dma_controller_free(pdev->dev.of_node); dma_async_device_unregister(&atdma->dma_common); dma_pool_destroy(atdma->memset_pool); -- cgit v1.2.3 From 36d8dbf23fc7458fdb6ca683ffa1e565fd7fec23 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 20 Nov 2018 10:11:21 +0200 Subject: btrfs: release metadata before running delayed refs We want to release the unused reservation we have since it refills the delayed refs reserve, which will make everything go smoother when running the delayed refs if we're short on our reservation. CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Omar Sandoval Reviewed-by: Liu Bo Reviewed-by: Nikolay Borisov Signed-off-by: Josef Bacik Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/transaction.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 9517de0e668c..fd6c74662e9a 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1924,6 +1924,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, return ret; } + btrfs_trans_release_metadata(trans, root); + trans->block_rsv = NULL; + /* make a pass through all the delayed refs we have so far * any runnings procs may add more while we are here */ @@ -1933,9 +1936,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, return ret; } - btrfs_trans_release_metadata(trans, root); - trans->block_rsv = NULL; - cur_trans = trans->transaction; /* -- cgit v1.2.3 From 72c6bc47e0b54db16f2b1a04656bb590d7d7117e Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Fri, 23 Nov 2018 08:42:19 +0000 Subject: USB: usb-storage: Add new IDs to ums-realtek commit a84a1bcc992f0545a51d2e120b8ca2ef20e2ea97 upstream. There are two new Realtek card readers require ums-realtek to work correctly. Add the new IDs to support them. Signed-off-by: Kai-Heng Feng Acked-by: Alan Stern Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/unusual_realtek.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/usb/storage/unusual_realtek.h b/drivers/usb/storage/unusual_realtek.h index 8fe624ad302a..7ca779493671 100644 --- a/drivers/usb/storage/unusual_realtek.h +++ b/drivers/usb/storage/unusual_realtek.h @@ -39,4 +39,14 @@ UNUSUAL_DEV(0x0bda, 0x0159, 0x0000, 0x9999, "USB Card Reader", USB_SC_DEVICE, USB_PR_DEVICE, init_realtek_cr, 0), +UNUSUAL_DEV(0x0bda, 0x0177, 0x0000, 0x9999, + "Realtek", + "USB Card Reader", + USB_SC_DEVICE, USB_PR_DEVICE, init_realtek_cr, 0), + +UNUSUAL_DEV(0x0bda, 0x0184, 0x0000, 0x9999, + "Realtek", + "USB Card Reader", + USB_SC_DEVICE, USB_PR_DEVICE, init_realtek_cr, 0), + #endif /* defined(CONFIG_USB_STORAGE_REALTEK) || ... */ -- cgit v1.2.3 From 0f887c6686769b24b23005f41177ed5324a88419 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Niew=C3=B6hner?= Date: Sun, 25 Nov 2018 17:57:33 +0100 Subject: usb: core: quirks: add RESET_RESUME quirk for Cherry G230 Stream series MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit effd14f66cc1ef6701a19c5a56e39c35f4d395a5 upstream. Cherry G230 Stream 2.0 (G85-231) and 3.0 (G85-232) need this quirk to function correctly. This fixes a but where double pressing numlock locks up the device completely with need to replug the keyboard. Signed-off-by: Michael Niewöhner Tested-by: Michael Niewöhner Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 1e8f68960014..808437c5ec49 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -64,6 +64,9 @@ static const struct usb_device_id usb_quirk_list[] = { /* Microsoft LifeCam-VX700 v2.0 */ { USB_DEVICE(0x045e, 0x0770), .driver_info = USB_QUIRK_RESET_RESUME }, + /* Cherry Stream G230 2.0 (G85-231) and 3.0 (G85-232) */ + { USB_DEVICE(0x046a, 0x0023), .driver_info = USB_QUIRK_RESET_RESUME }, + /* Logitech HD Pro Webcams C920, C920-C, C925e and C930e */ { USB_DEVICE(0x046d, 0x082d), .driver_info = USB_QUIRK_DELAY_INIT }, { USB_DEVICE(0x046d, 0x0841), .driver_info = USB_QUIRK_DELAY_INIT }, -- cgit v1.2.3 From 4a978cfe599fbe87128a951db062ebe0a5339741 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Mon, 19 Nov 2018 08:34:04 +0200 Subject: Revert "usb: dwc3: gadget: skip Set/Clear Halt when invalid" commit 38317f5c0f2faae5110854f36edad810f841d62f upstream. This reverts commit ffb80fc672c3a7b6afd0cefcb1524fb99917b2f3. Turns out that commit is wrong. Host controllers are allowed to use Clear Feature HALT as means to sync data toggle between host and periperal. Cc: Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 26efe8c7535f..ed6b9bfe3759 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1280,9 +1280,6 @@ int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value, int protocol) unsigned transfer_in_flight; unsigned started; - if (dep->flags & DWC3_EP_STALL) - return 0; - if (dep->number > 1) trb = dwc3_ep_prev_trb(dep, dep->trb_enqueue); else @@ -1307,8 +1304,6 @@ int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value, int protocol) else dep->flags |= DWC3_EP_STALL; } else { - if (!(dep->flags & DWC3_EP_STALL)) - return 0; ret = dwc3_send_clear_stall_ep_cmd(dep); if (ret) -- cgit v1.2.3 From 06c2233ac246507e98022809ea4752dbfead9337 Mon Sep 17 00:00:00 2001 From: Martin Kelly Date: Sun, 28 Oct 2018 20:18:53 -0700 Subject: iio:st_magn: Fix enable device after trigger commit fe5192ac81ad0d4dfe1395d11f393f0513c15f7f upstream. Currently, we enable the device before we enable the device trigger. At high frequencies, this can cause interrupts that don't yet have a poll function associated with them and are thus treated as spurious. At high frequencies with level interrupts, this can even cause an interrupt storm of repeated spurious interrupts (~100,000 on my Beagleboard with the LSM9DS1 magnetometer). If these repeat too much, the interrupt will get disabled and the device will stop functioning. To prevent these problems, enable the device prior to enabling the device trigger, and disable the divec prior to disabling the trigger. This means there's no window of time during which the device creates interrupts but we have no trigger to answer them. Fixes: 90efe055629 ("iio: st_sensors: harden interrupt handling") Signed-off-by: Martin Kelly Tested-by: Denis Ciocca Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/magnetometer/st_magn_buffer.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/iio/magnetometer/st_magn_buffer.c b/drivers/iio/magnetometer/st_magn_buffer.c index 0a9e8fadfa9d..37ab30566464 100644 --- a/drivers/iio/magnetometer/st_magn_buffer.c +++ b/drivers/iio/magnetometer/st_magn_buffer.c @@ -30,11 +30,6 @@ int st_magn_trig_set_state(struct iio_trigger *trig, bool state) return st_sensors_set_dataready_irq(indio_dev, state); } -static int st_magn_buffer_preenable(struct iio_dev *indio_dev) -{ - return st_sensors_set_enable(indio_dev, true); -} - static int st_magn_buffer_postenable(struct iio_dev *indio_dev) { int err; @@ -50,7 +45,7 @@ static int st_magn_buffer_postenable(struct iio_dev *indio_dev) if (err < 0) goto st_magn_buffer_postenable_error; - return err; + return st_sensors_set_enable(indio_dev, true); st_magn_buffer_postenable_error: kfree(mdata->buffer_data); @@ -63,11 +58,11 @@ static int st_magn_buffer_predisable(struct iio_dev *indio_dev) int err; struct st_sensor_data *mdata = iio_priv(indio_dev); - err = iio_triggered_buffer_predisable(indio_dev); + err = st_sensors_set_enable(indio_dev, false); if (err < 0) goto st_magn_buffer_predisable_error; - err = st_sensors_set_enable(indio_dev, false); + err = iio_triggered_buffer_predisable(indio_dev); st_magn_buffer_predisable_error: kfree(mdata->buffer_data); @@ -75,7 +70,6 @@ st_magn_buffer_predisable_error: } static const struct iio_buffer_setup_ops st_magn_buffer_setup_ops = { - .preenable = &st_magn_buffer_preenable, .postenable = &st_magn_buffer_postenable, .predisable = &st_magn_buffer_predisable, }; -- cgit v1.2.3 From 3c470638b6a61e30b753715d2e546456dccd9437 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Fri, 30 Nov 2018 14:09:03 -0800 Subject: mm: use swp_offset as key in shmem_replace_page() commit c1cb20d43728aa9b5393bd8d489bc85c142949b2 upstream. We changed the key of swap cache tree from swp_entry_t.val to swp_offset. We need to do so in shmem_replace_page() as well. Hugh said: "shmem_replace_page() has been wrong since the day I wrote it: good enough to work on swap "type" 0, which is all most people ever use (especially those few who need shmem_replace_page() at all), but broken once there are any non-0 swp_type bits set in the higher order bits" Link: http://lkml.kernel.org/r/20181121215442.138545-1-yuzhao@google.com Fixes: f6ab1f7f6b2d ("mm, swap: use offset of swap entry as key of swap cache") Signed-off-by: Yu Zhao Reviewed-by: Matthew Wilcox Acked-by: Hugh Dickins Cc: [4.9+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/shmem.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 54911bbc74d6..9b17bd4cbc5e 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1494,11 +1494,13 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp, { struct page *oldpage, *newpage; struct address_space *swap_mapping; + swp_entry_t entry; pgoff_t swap_index; int error; oldpage = *pagep; - swap_index = page_private(oldpage); + entry.val = page_private(oldpage); + swap_index = swp_offset(entry); swap_mapping = page_mapping(oldpage); /* @@ -1517,7 +1519,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp, __SetPageLocked(newpage); __SetPageSwapBacked(newpage); SetPageUptodate(newpage); - set_page_private(newpage, swap_index); + set_page_private(newpage, entry.val); SetPageSwapCache(newpage); /* -- cgit v1.2.3 From 140ee9b7aec9f11635ff2de2410158af33ce2525 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Mon, 26 Nov 2018 02:29:56 +0000 Subject: Drivers: hv: vmbus: check the creation_status in vmbus_establish_gpadl() commit eceb05965489784f24bbf4d61ba60e475a983016 upstream. This is a longstanding issue: if the vmbus upper-layer drivers try to consume too many GPADLs, the host may return with an error 0xC0000044 (STATUS_QUOTA_EXCEEDED), but currently we forget to check the creation_status, and hence we can pass an invalid GPADL handle into the OPEN_CHANNEL message, and get an error code 0xc0000225 in open_info->response.open_result.status, and finally we hang in vmbus_open() -> "goto error_free_info" -> vmbus_teardown_gpadl(). With this patch, we can exit gracefully on STATUS_QUOTA_EXCEEDED. Cc: Stephen Hemminger Cc: K. Y. Srinivasan Cc: Haiyang Zhang Cc: stable@vger.kernel.org Signed-off-by: Dexuan Cui Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/channel.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index 1606e7f08f4b..784c45484825 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -448,6 +448,14 @@ int vmbus_establish_gpadl(struct vmbus_channel *channel, void *kbuffer, } wait_for_completion(&msginfo->waitevent); + if (msginfo->response.gpadl_created.creation_status != 0) { + pr_err("Failed to establish GPADL: err = 0x%x\n", + msginfo->response.gpadl_created.creation_status); + + ret = -EDQUOT; + goto cleanup; + } + if (channel->rescind) { ret = -ENODEV; goto cleanup; -- cgit v1.2.3 From c4c29e1b347aeea2a8bfd03b6927806d83d8a485 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 14 Nov 2018 01:57:03 +0000 Subject: misc: mic/scif: fix copy-paste error in scif_create_remote_lookup commit 6484a677294aa5d08c0210f2f387ebb9be646115 upstream. gcc '-Wunused-but-set-variable' warning: drivers/misc/mic/scif/scif_rma.c: In function 'scif_create_remote_lookup': drivers/misc/mic/scif/scif_rma.c:373:25: warning: variable 'vmalloc_num_pages' set but not used [-Wunused-but-set-variable] 'vmalloc_num_pages' should be used to determine if the address is within the vmalloc range. Fixes: ba612aa8b487 ("misc: mic: SCIF memory registration and unregistration") Signed-off-by: YueHaibing Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mic/scif/scif_rma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/mic/scif/scif_rma.c b/drivers/misc/mic/scif/scif_rma.c index f806a4471eb9..32ab0f43f506 100644 --- a/drivers/misc/mic/scif/scif_rma.c +++ b/drivers/misc/mic/scif/scif_rma.c @@ -414,7 +414,7 @@ static int scif_create_remote_lookup(struct scif_dev *remote_dev, if (err) goto error_window; err = scif_map_page(&window->num_pages_lookup.lookup[j], - vmalloc_dma_phys ? + vmalloc_num_pages ? vmalloc_to_page(&window->num_pages[i]) : virt_to_page(&window->num_pages[i]), remote_dev); -- cgit v1.2.3 From fb660794cd6179fa1b2d6890afb99c93d99ee2c8 Mon Sep 17 00:00:00 2001 From: Alistair Strachan Date: Mon, 3 Dec 2018 11:40:57 -0800 Subject: efi/libstub: arm: support building with clang (commit 41f1c48420709470c51ee0e54b6fb28b956bb4e0 upstream) When building with CONFIG_EFI and CONFIG_EFI_STUB on ARM, the libstub Makefile would use -mno-single-pic-base without checking it was supported by the compiler. As the ARM (32-bit) clang backend does not support this flag, the build would fail. This changes the Makefile to check the compiler's support for -mno-single-pic-base before using it, similar to c1c386681bd7 ("ARM: 8767/1: add support for building ARM kernel with clang"). Signed-off-by: Alistair Strachan Reviewed-by: Stefan Agner Signed-off-by: Ard Biesheuvel [ND: adjusted due to missing commit ce279d374ff3 ("efi/libstub: Only disable stackleak plugin for arm64")] Signed-off-by: Nick Desaulniers Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/libstub/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index 2cd9496eb696..310f8feb5174 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -12,7 +12,8 @@ cflags-$(CONFIG_X86) += -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2 \ cflags-$(CONFIG_ARM64) := $(subst -pg,,$(KBUILD_CFLAGS)) -fpie cflags-$(CONFIG_ARM) := $(subst -pg,,$(KBUILD_CFLAGS)) \ - -fno-builtin -fpic -mno-single-pic-base + -fno-builtin -fpic \ + $(call cc-option,-mno-single-pic-base) cflags-$(CONFIG_EFI_ARMSTUB) += -I$(srctree)/scripts/dtc/libfdt -- cgit v1.2.3 From 61cc8587f8e1d6dc4bd69532ebf717a87f374274 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Tue, 8 May 2018 22:49:49 +0100 Subject: ARM: 8766/1: drop no-thumb-interwork in EABI mode (commit 22905a24306c8c312c2d66da9f90d09af0414f81 upstream) According to GCC documentation -m(no-)thumb-interwork is meaningless in AAPCS configurations. Also clang does not support the flag: clang-5.0: error: unknown argument: '-mno-thumb-interwork' Just drop -mno-thumb-interwork in AEABI configuration. Signed-off-by: Stefan Agner Signed-off-by: Russell King Signed-off-by: Nick Desaulniers Signed-off-by: Greg Kroah-Hartman --- arch/arm/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 6be9ee148b78..e14ddca59d02 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -104,7 +104,7 @@ tune-$(CONFIG_CPU_V6K) =$(call cc-option,-mtune=arm1136j-s,-mtune=strongarm) tune-y := $(tune-y) ifeq ($(CONFIG_AEABI),y) -CFLAGS_ABI :=-mabi=aapcs-linux -mno-thumb-interwork -mfpu=vfp +CFLAGS_ABI :=-mabi=aapcs-linux -mfpu=vfp else CFLAGS_ABI :=$(call cc-option,-mapcs-32,-mabi=apcs-gnu) $(call cc-option,-mno-thumb-interwork,) endif -- cgit v1.2.3 From 10479075c094a7f77abdfdf7a9d41477538a148c Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Tue, 8 May 2018 22:50:38 +0100 Subject: ARM: 8767/1: add support for building ARM kernel with clang (commit c1c386681bd73c4fc28eb5cc91cf8b7be9b409ba upstream) Use cc-options call for compiler options which are not available in clang. With this patch an ARMv7 multi platform kernel can be successfully build using clang (tested with version 5.0.1). Based-on-patches-by: Behan Webster Signed-off-by: Stefan Agner Signed-off-by: Russell King Signed-off-by: Nick Desaulniers Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/compressed/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index d50430c40045..552c7d7f84ce 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -112,7 +112,7 @@ CFLAGS_fdt_ro.o := $(nossp_flags) CFLAGS_fdt_rw.o := $(nossp_flags) CFLAGS_fdt_wip.o := $(nossp_flags) -ccflags-y := -fpic -mno-single-pic-base -fno-builtin -I$(obj) +ccflags-y := -fpic $(call cc-option,-mno-single-pic-base,) -fno-builtin -I$(obj) asflags-y := -DZIMAGE # Supply kernel BSS size to the decompressor via a linker symbol. -- cgit v1.2.3 From b874a8751f9154ea88ad88bc6b044846ef3c8bab Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Tue, 8 May 2018 16:27:26 +0200 Subject: bus: arm-cci: remove unnecessary unreachable() (commit 10d8713429d345867fc8998d6193b233c0cab28c upstream) Mixing asm and C code is not recommended in a naked function by gcc and leads to an error when using clang: drivers/bus/arm-cci.c:2107:2: error: non-ASM statement in naked function is not supported unreachable(); ^ While the function is marked __naked it actually properly return in asm. There is no need for the unreachable() call. GCC 7.2 generates identical object files before and after, other than (for obvious reasons) the line numbers generated by WANT_WARN_ON_SLOWPATH for all the WARN()s appearing later in the file. Suggested-by: Russell King Signed-off-by: Stefan Agner Acked-by: Nicolas Pitre Reviewed-by: Robin Murphy Signed-off-by: Olof Johansson Signed-off-by: Nick Desaulniers Signed-off-by: Greg Kroah-Hartman --- drivers/bus/arm-cci.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c index 10f56133b281..8e08cb4fd7df 100644 --- a/drivers/bus/arm-cci.c +++ b/drivers/bus/arm-cci.c @@ -2103,8 +2103,6 @@ asmlinkage void __naked cci_enable_port_for_self(void) [sizeof_struct_cpu_port] "i" (sizeof(struct cpu_port)), [sizeof_struct_ace_port] "i" (sizeof(struct cci_ace_port)), [offsetof_port_phys] "i" (offsetof(struct cci_ace_port, phys)) ); - - unreachable(); } /** -- cgit v1.2.3 From 1e5b5cb7bf1ea17640929c940ce39c8b5c59b264 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Sun, 25 Mar 2018 20:09:56 +0200 Subject: ARM: trusted_foundations: do not use naked function (commit 4ea7bdc6b5b33427bbd3f41c333e21c1825462a3 upstream) As documented in GCC naked functions should only use basic ASM syntax. The extended ASM or mixture of basic ASM and "C" code is not guaranteed. Currently this works because it was hard coded to follow and check GCC behavior for arguments and register placement. Furthermore with clang using parameters in Extended asm in a naked function is not supported: arch/arm/firmware/trusted_foundations.c:47:10: error: parameter references not allowed in naked functions : "r" (type), "r" (arg1), "r" (arg2) ^ Use a regular function to be more portable. This aligns also with the other SMC call implementations e.g. in qcom_scm-32.c and bcm_kona_smc.c. Cc: Dmitry Osipenko Cc: Stephen Warren Cc: Thierry Reding Signed-off-by: Stefan Agner Signed-off-by: Thierry Reding Signed-off-by: Nick Desaulniers Signed-off-by: Greg Kroah-Hartman --- arch/arm/firmware/trusted_foundations.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/arch/arm/firmware/trusted_foundations.c b/arch/arm/firmware/trusted_foundations.c index 3fb1b5a1dce9..689e6565abfc 100644 --- a/arch/arm/firmware/trusted_foundations.c +++ b/arch/arm/firmware/trusted_foundations.c @@ -31,21 +31,25 @@ static unsigned long cpu_boot_addr; -static void __naked tf_generic_smc(u32 type, u32 arg1, u32 arg2) +static void tf_generic_smc(u32 type, u32 arg1, u32 arg2) { + register u32 r0 asm("r0") = type; + register u32 r1 asm("r1") = arg1; + register u32 r2 asm("r2") = arg2; + asm volatile( ".arch_extension sec\n\t" - "stmfd sp!, {r4 - r11, lr}\n\t" + "stmfd sp!, {r4 - r11}\n\t" __asmeq("%0", "r0") __asmeq("%1", "r1") __asmeq("%2", "r2") "mov r3, #0\n\t" "mov r4, #0\n\t" "smc #0\n\t" - "ldmfd sp!, {r4 - r11, pc}" + "ldmfd sp!, {r4 - r11}\n\t" : - : "r" (type), "r" (arg1), "r" (arg2) - : "memory"); + : "r" (r0), "r" (r1), "r" (r2) + : "memory", "r3", "r12", "lr"); } static int tf_set_cpu_boot_addr(int cpu, unsigned long boot_addr) -- cgit v1.2.3 From 6f35b5bd0c0ddda68b999084c5bb699e23c59efc Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 1 Feb 2017 18:01:17 +0100 Subject: workqueue: avoid clang warning (commit a45463cbf3f9dcdae683033c256f50bded513d6a upstream) Building with clang shows lots of warning like: drivers/amba/bus.c:447:8: warning: implicit conversion from 'long long' to 'int' changes value from 4294967248 to -48 [-Wconstant-conversion] static DECLARE_DELAYED_WORK(deferred_retry_work, amba_deferred_retry_func); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/workqueue.h:187:26: note: expanded from macro 'DECLARE_DELAYED_WORK' struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f, 0) ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/workqueue.h:177:10: note: expanded from macro '__DELAYED_WORK_INITIALIZER' .work = __WORK_INITIALIZER((n).work, (f)), \ ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/workqueue.h:170:10: note: expanded from macro '__WORK_INITIALIZER' .data = WORK_DATA_STATIC_INIT(), \ ^~~~~~~~~~~~~~~~~~~~~~~ include/linux/workqueue.h:111:39: note: expanded from macro 'WORK_DATA_STATIC_INIT' ATOMIC_LONG_INIT(WORK_STRUCT_NO_POOL | WORK_STRUCT_STATIC) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~ include/asm-generic/atomic-long.h:32:41: note: expanded from macro 'ATOMIC_LONG_INIT' #define ATOMIC_LONG_INIT(i) ATOMIC_INIT(i) ~~~~~~~~~~~~^~ arch/arm/include/asm/atomic.h:21:27: note: expanded from macro 'ATOMIC_INIT' #define ATOMIC_INIT(i) { (i) } ~ ^ This makes the type cast explicit, which shuts up the warning. Signed-off-by: Arnd Bergmann Signed-off-by: Tejun Heo Signed-off-by: Nick Desaulniers Signed-off-by: Greg Kroah-Hartman --- include/linux/workqueue.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 1def337b16d4..8e880f7f67b2 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -106,9 +106,9 @@ struct work_struct { #endif }; -#define WORK_DATA_INIT() ATOMIC_LONG_INIT(WORK_STRUCT_NO_POOL) +#define WORK_DATA_INIT() ATOMIC_LONG_INIT((unsigned long)WORK_STRUCT_NO_POOL) #define WORK_DATA_STATIC_INIT() \ - ATOMIC_LONG_INIT(WORK_STRUCT_NO_POOL | WORK_STRUCT_STATIC) + ATOMIC_LONG_INIT((unsigned long)(WORK_STRUCT_NO_POOL | WORK_STRUCT_STATIC)) struct delayed_work { struct work_struct work; -- cgit v1.2.3 From 6d075d215b5b8875fba153670e19560a2ffcdd45 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 6 Feb 2017 11:22:46 +0000 Subject: efi/libstub: Make file I/O chunking x86-specific (commit b3879a4d3a31ef14265a52e8d941cf4b0f6627ae upstream) The ARM decompressor is finicky when it comes to uninitialized variables with local linkage, the reason being that it may relocate .text and .bss independently when executing from ROM. This is only possible if all references into .bss from .text are absolute, and this happens to be the case for references emitted under -fpic to symbols with external linkage, and so all .bss references must involve symbols with external linkage. When building the ARM stub using clang, the initialized local variable __chunk_size is optimized into a zero-initialized flag that indicates whether chunking is in effect or not. This flag is therefore emitted into .bss, which triggers the ARM decompressor's diagnostics, resulting in a failed build. Under UEFI, we never execute the decompressor from ROM, so the diagnostic makes little sense here. But we can easily work around the issue by making __chunk_size global instead. However, given that the file I/O chunking that is controlled by the __chunk_size variable is intended to work around known bugs on various x86 implementations of UEFI, we can simply make the chunking an x86 specific feature. This is an improvement by itself, and also removes the need to parse the efi= options in the stub entirely. Tested-by: Arnd Bergmann Signed-off-by: Ard Biesheuvel Reviewed-by: Matt Fleming Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1486380166-31868-8-git-send-email-ard.biesheuvel@linaro.org [ Small readability edits. ] Signed-off-by: Ingo Molnar Signed-off-by: Nick Desaulniers Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/libstub/efi-stub-helper.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index aded10662020..09d10dcf1fc6 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -355,6 +355,14 @@ efi_status_t efi_parse_options(char *cmdline) { char *str; + /* + * Currently, the only efi= option we look for is 'nochunk', which + * is intended to work around known issues on certain x86 UEFI + * versions. So ignore for now on other architectures. + */ + if (!IS_ENABLED(CONFIG_X86)) + return EFI_SUCCESS; + /* * If no EFI parameters were specified on the cmdline we've got * nothing to do. @@ -528,7 +536,8 @@ efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg, size = files[j].size; while (size) { unsigned long chunksize; - if (size > __chunk_size) + + if (IS_ENABLED(CONFIG_X86) && size > __chunk_size) chunksize = __chunk_size; else chunksize = size; -- cgit v1.2.3 From 740f140b5dc4f7553a268d153fc816948cbc8cbd Mon Sep 17 00:00:00 2001 From: Chris Fries Date: Mon, 3 Dec 2018 11:56:19 -0800 Subject: kbuild: Set KBUILD_CFLAGS before incl. arch Makefile (commit ae6b289a37890909fea0e4a1666e19377fa0ed2c upstream) Set the clang KBUILD_CFLAGS up before including arch/ Makefiles, so that ld-options (etc.) can work correctly. This fixes errors with clang such as ld-options trying to CC against your host architecture, but LD trying to link against your target architecture. Signed-off-by: Chris Fries Signed-off-by: Nick Desaulniers Reviewed-by: Matthias Kaehlcke Tested-by: Matthias Kaehlcke Signed-off-by: Masahiro Yamada [ND: adjusted context due to upstream having removed code above where I placed this block in this backport] Signed-off-by: Greg Kroah-Hartman --- Makefile | 65 ++++++++++++++++++++++++++++++++-------------------------------- 1 file changed, 33 insertions(+), 32 deletions(-) diff --git a/Makefile b/Makefile index 72ed8ff90329..b161e06b7260 100644 --- a/Makefile +++ b/Makefile @@ -509,6 +509,39 @@ ifneq ($(filter install,$(MAKECMDGOALS)),) endif endif +ifeq ($(cc-name),clang) +ifneq ($(CROSS_COMPILE),) +CLANG_TARGET := -target $(notdir $(CROSS_COMPILE:%-=%)) +GCC_TOOLCHAIN := $(realpath $(dir $(shell which $(LD)))/..) +endif +ifneq ($(GCC_TOOLCHAIN),) +CLANG_GCC_TC := -gcc-toolchain $(GCC_TOOLCHAIN) +endif +KBUILD_CFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) +KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) +KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,) +KBUILD_CFLAGS += $(call cc-disable-warning, unused-variable) +KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier) +KBUILD_CFLAGS += $(call cc-disable-warning, gnu) +KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member) +# Quiet clang warning: comparison of unsigned expression < 0 is always false +KBUILD_CFLAGS += $(call cc-disable-warning, tautological-compare) +# CLANG uses a _MergedGlobals as optimization, but this breaks modpost, as the +# source of a reference will be _MergedGlobals and not on of the whitelisted names. +# See modpost pattern 2 +KBUILD_CFLAGS += $(call cc-option, -mno-global-merge,) +KBUILD_CFLAGS += $(call cc-option, -fcatch-undefined-behavior) +KBUILD_CFLAGS += $(call cc-option, -no-integrated-as) +KBUILD_AFLAGS += $(call cc-option, -no-integrated-as) +else + +# These warnings generated too much noise in a regular build. +# Use make W=1 to enable them (see scripts/Makefile.build) +KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable) +KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable) +endif + + ifeq ($(mixed-targets),1) # =========================================================================== # We're called with mixed targets (*config and build targets). @@ -704,38 +737,6 @@ ifdef CONFIG_CC_STACKPROTECTOR endif KBUILD_CFLAGS += $(stackp-flag) -ifeq ($(cc-name),clang) -ifneq ($(CROSS_COMPILE),) -CLANG_TARGET := -target $(notdir $(CROSS_COMPILE:%-=%)) -GCC_TOOLCHAIN := $(realpath $(dir $(shell which $(LD)))/..) -endif -ifneq ($(GCC_TOOLCHAIN),) -CLANG_GCC_TC := -gcc-toolchain $(GCC_TOOLCHAIN) -endif -KBUILD_CFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) -KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) -KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,) -KBUILD_CFLAGS += $(call cc-disable-warning, unused-variable) -KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier) -KBUILD_CFLAGS += $(call cc-disable-warning, gnu) -KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member) -# Quiet clang warning: comparison of unsigned expression < 0 is always false -KBUILD_CFLAGS += $(call cc-disable-warning, tautological-compare) -# CLANG uses a _MergedGlobals as optimization, but this breaks modpost, as the -# source of a reference will be _MergedGlobals and not on of the whitelisted names. -# See modpost pattern 2 -KBUILD_CFLAGS += $(call cc-option, -mno-global-merge,) -KBUILD_CFLAGS += $(call cc-option, -fcatch-undefined-behavior) -KBUILD_CFLAGS += $(call cc-option, -no-integrated-as) -KBUILD_AFLAGS += $(call cc-option, -no-integrated-as) -else - -# These warnings generated too much noise in a regular build. -# Use make W=1 to enable them (see scripts/Makefile.build) -KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable) -KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable) -endif - ifdef CONFIG_FRAME_POINTER KBUILD_CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls else -- cgit v1.2.3 From b136f0e9e9d79b8449d99ea701ade1e17a971826 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 5 Dec 2018 19:42:42 +0100 Subject: Linux 4.9.143 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b161e06b7260..8ec52cd19526 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 142 +SUBLEVEL = 143 EXTRAVERSION = NAME = Roaring Lionus -- cgit v1.2.3