Blame - mm/truncate.c - kernel/linux-linaro-stable.git

blob: 2d6151fc8f083629a8c4e95ddd157afbb76b8772 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
				2	* mm/truncate.c - code for taking down pages from address_spaces
				3	*
				4	* Copyright (C) 2002, Linus Torvalds
				5	*
Francois Cami	e1f8e87	2008-10-15 22:01:59 -0700	[diff] [blame]	6	* 10Sep2002 Andrew Morton
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	7	* Initial version.
				8	*/
				9
				10	#include <linux/kernel.h>
Alexey Dobriyan	4af3c9c	2007-10-16 23:29:23 -0700	[diff] [blame]	11	#include <linux/backing-dev.h>
Tejun Heo	5a0e3ad	2010-03-24 17:04:11 +0900	[diff] [blame]	12	#include <linux/gfp.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	13	#include <linux/mm.h>
Nick Piggin	0fd0e6b	2006-09-27 01:50:02 -0700	[diff] [blame]	14	#include <linux/swap.h>
Paul Gortmaker	b95f1b31	2011-10-16 02:01:52 -0400	[diff] [blame]	15	#include <linux/export.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	16	#include <linux/pagemap.h>
Nate Diller	01f2705	2007-05-09 02:35:07 -0700	[diff] [blame]	17	#include <linux/highmem.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	18	#include <linux/pagevec.h>
Andrew Morton	e08748ce	2006-12-10 02:19:31 -0800	[diff] [blame]	19	#include <linux/task_io_accounting_ops.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	20	#include <linux/buffer_head.h> /* grr. try_to_release_page,
Jan Kara	aaa4059	2005-10-30 15:00:16 -0800	[diff] [blame]	21	do_invalidatepage */
Dan Magenheimer	c515e1f	2011-05-26 10:01:43 -0600	[diff] [blame]	22	#include <linux/cleancache.h>
Jan Kara	6cbdf11	2014-10-01 21:49:18 -0400	[diff] [blame]	23	#include <linux/rmap.h>
Rik van Riel	ba470de	2008-10-18 20:26:50 -0700	[diff] [blame]	24	#include "internal.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	25
				26
David Howells	cf9a2ae	2006-08-29 19:05:54 +0100	[diff] [blame]	27	/**
Fengguang Wu	28bc44d	2008-02-03 18:04:10 +0200	[diff] [blame]	28	* do_invalidatepage - invalidate part or all of a page
David Howells	cf9a2ae	2006-08-29 19:05:54 +0100	[diff] [blame]	29	* @page: the page which is affected
				30	* @offset: the index of the truncation point
				31	*
				32	* do_invalidatepage() is called when all or part of the page has become
				33	* invalidated by a truncate operation.
				34	*
				35	* do_invalidatepage() does not have to release all buffers, but it must
				36	* ensure that no dirty buffer is left outside @offset and that no I/O
				37	* is underway against any of the blocks which are outside the truncation
				38	* point. Because the caller is about to free (and possibly reuse) those
				39	* blocks on-disk.
				40	*/
				41	void do_invalidatepage(struct page *page, unsigned long offset)
				42	{
				43	void (invalidatepage)(struct page , unsigned long);
				44	invalidatepage = page->mapping->a_ops->invalidatepage;
David Howells	9361401	2006-09-30 20:45:40 +0200	[diff] [blame]	45	#ifdef CONFIG_BLOCK
David Howells	cf9a2ae	2006-08-29 19:05:54 +0100	[diff] [blame]	46	if (!invalidatepage)
				47	invalidatepage = block_invalidatepage;
David Howells	9361401	2006-09-30 20:45:40 +0200	[diff] [blame]	48	#endif
David Howells	cf9a2ae	2006-08-29 19:05:54 +0100	[diff] [blame]	49	if (invalidatepage)
				50	(*invalidatepage)(page, offset);
				51	}
				52
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	53	static inline void truncate_partial_page(struct page *page, unsigned partial)
				54	{
Christoph Lameter	eebd2aa	2008-02-04 22:28:29 -0800	[diff] [blame]	55	zero_user_segment(page, partial, PAGE_CACHE_SIZE);
Dan Magenheimer	3167760	2011-09-21 11:56:28 -0400	[diff] [blame]	56	cleancache_invalidate_page(page->mapping, page);
David Howells	266cf65	2009-04-03 16:42:36 +0100	[diff] [blame]	57	if (page_has_private(page))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	58	do_invalidatepage(page, partial);
				59	}
				60
Linus Torvalds	ecdfc97	2007-01-26 12:47:06 -0800	[diff] [blame]	61	/*
				62	* This cancels just the dirty bit on the kernel page itself, it
				63	* does NOT actually remove dirty bits on any mmap's that may be
				64	* around. It also leaves the page tagged dirty, so any sync
				65	* activity will still find it on the dirty lists, and in particular,
				66	* clear_page_dirty_for_io() will still look at the dirty bits in
				67	* the VM.
				68	*
				69	* Doing this should normally only ever be done when a page
				70	* is truncated, and is not actually mapped anywhere at all. However,
				71	* fs/buffer.c does this when it notices that somebody has cleaned
				72	* out all the buffers on a page without actually doing it through
				73	* the VM. Can you say "ext3 is horribly ugly"? Tought you could.
				74	*/
Linus Torvalds	fba2591	2006-12-20 13:46:42 -0800	[diff] [blame]	75	void cancel_dirty_page(struct page *page, unsigned int account_size)
				76	{
Linus Torvalds	8368e32	2006-12-23 09:25:04 -0800	[diff] [blame]	77	if (TestClearPageDirty(page)) {
				78	struct address_space *mapping = page->mapping;
				79	if (mapping && mapping_cap_account_dirty(mapping)) {
				80	dec_zone_page_state(page, NR_FILE_DIRTY);
Peter Zijlstra	c9e51e4	2007-10-16 23:25:47 -0700	[diff] [blame]	81	dec_bdi_stat(mapping->backing_dev_info,
				82	BDI_RECLAIMABLE);
Linus Torvalds	8368e32	2006-12-23 09:25:04 -0800	[diff] [blame]	83	if (account_size)
				84	task_io_account_cancelled_write(account_size);
				85	}
Andrew Morton	3e67c09	2006-12-21 11:00:33 -0800	[diff] [blame]	86	}
Linus Torvalds	fba2591	2006-12-20 13:46:42 -0800	[diff] [blame]	87	}
Linus Torvalds	8368e32	2006-12-23 09:25:04 -0800	[diff] [blame]	88	EXPORT_SYMBOL(cancel_dirty_page);
Linus Torvalds	fba2591	2006-12-20 13:46:42 -0800	[diff] [blame]	89
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	90	/*
				91	* If truncate cannot remove the fs-private metadata from the page, the page
Shaohua Li	62e1c55	2008-02-04 22:29:33 -0800	[diff] [blame]	92	* becomes orphaned. It will be left on the LRU and may even be mapped into
Nick Piggin	54cb882	2007-07-19 01:46:59 -0700	[diff] [blame]	93	* user pagetables if we're racing with filemap_fault().
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	94	*
				95	* We need to bale out if page->mapping is no longer equal to the original
				96	* mapping. This happens a) when the VM reclaimed the page while we waited on
Andrew Morton	fc0ecff	2007-02-10 01:45:39 -0800	[diff] [blame]	97	* its lock, b) when a concurrent invalidate_mapping_pages got there first and
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	98	* c) when tmpfs swizzles a page between a tmpfs inode and swapper_space.
				99	*/
Nick Piggin	750b498	2009-09-16 11:50:12 +0200	[diff] [blame]	100	static int
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	101	truncate_complete_page(struct address_space mapping, struct page page)
				102	{
				103	if (page->mapping != mapping)
Nick Piggin	750b498	2009-09-16 11:50:12 +0200	[diff] [blame]	104	return -EIO;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	105
David Howells	266cf65	2009-04-03 16:42:36 +0100	[diff] [blame]	106	if (page_has_private(page))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	107	do_invalidatepage(page, 0);
				108
Bjorn Steinbrink	a2b3456	2008-02-04 22:29:28 -0800	[diff] [blame]	109	cancel_dirty_page(page, PAGE_CACHE_SIZE);
				110
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	111	ClearPageMappedToDisk(page);
Minchan Kim	5adc7b5	2011-03-22 16:32:41 -0700	[diff] [blame]	112	delete_from_page_cache(page);
Nick Piggin	750b498	2009-09-16 11:50:12 +0200	[diff] [blame]	113	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	114	}
				115
				116	/*
Andrew Morton	fc0ecff	2007-02-10 01:45:39 -0800	[diff] [blame]	117	* This is for invalidate_mapping_pages(). That function can be called at
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	118	* any time, and is not supposed to throw away dirty pages. But pages can
Nick Piggin	0fd0e6b	2006-09-27 01:50:02 -0700	[diff] [blame]	119	* be marked dirty at any time too, so use remove_mapping which safely
				120	* discards clean, unused pages.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	121	*
				122	* Returns non-zero if the page was successfully invalidated.
				123	*/
				124	static int
				125	invalidate_complete_page(struct address_space mapping, struct page page)
				126	{
Nick Piggin	0fd0e6b	2006-09-27 01:50:02 -0700	[diff] [blame]	127	int ret;
				128
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	129	if (page->mapping != mapping)
				130	return 0;
				131
David Howells	266cf65	2009-04-03 16:42:36 +0100	[diff] [blame]	132	if (page_has_private(page) && !try_to_release_page(page, 0))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	133	return 0;
				134
Nick Piggin	0fd0e6b	2006-09-27 01:50:02 -0700	[diff] [blame]	135	ret = remove_mapping(mapping, page);
Nick Piggin	0fd0e6b	2006-09-27 01:50:02 -0700	[diff] [blame]	136
				137	return ret;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	138	}
				139
Nick Piggin	750b498	2009-09-16 11:50:12 +0200	[diff] [blame]	140	int truncate_inode_page(struct address_space mapping, struct page page)
				141	{
				142	if (page_mapped(page)) {
				143	unmap_mapping_range(mapping,
				144	(loff_t)page->index << PAGE_CACHE_SHIFT,
				145	PAGE_CACHE_SIZE, 0);
				146	}
				147	return truncate_complete_page(mapping, page);
				148	}
				149
Wu Fengguang	83f7866	2009-09-16 11:50:13 +0200	[diff] [blame]	150	/*
Andi Kleen	2571873	2009-09-16 11:50:13 +0200	[diff] [blame]	151	* Used to get rid of pages on hardware memory corruption.
				152	*/
				153	int generic_error_remove_page(struct address_space mapping, struct page page)
				154	{
				155	if (!mapping)
				156	return -EINVAL;
				157	/*
				158	* Only punch for normal data pages for now.
				159	* Handling other types like directories would need more auditing.
				160	*/
				161	if (!S_ISREG(mapping->host->i_mode))
				162	return -EIO;
				163	return truncate_inode_page(mapping, page);
				164	}
				165	EXPORT_SYMBOL(generic_error_remove_page);
				166
				167	/*
Wu Fengguang	83f7866	2009-09-16 11:50:13 +0200	[diff] [blame]	168	* Safely invalidate one page from its pagecache mapping.
				169	* It only drops clean, unused pages. The page must be locked.
				170	*
				171	* Returns 1 if the page is successfully invalidated, otherwise 0.
				172	*/
				173	int invalidate_inode_page(struct page *page)
				174	{
				175	struct address_space *mapping = page_mapping(page);
				176	if (!mapping)
				177	return 0;
				178	if (PageDirty(page) \|\| PageWriteback(page))
				179	return 0;
				180	if (page_mapped(page))
				181	return 0;
				182	return invalidate_complete_page(mapping, page);
				183	}
				184
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	185	/**
Liu Bo	73c1e20	2012-02-21 10:57:20 +0800	[diff] [blame]	186	* truncate_inode_pages_range - truncate range of pages specified by start & end byte offsets
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	187	* @mapping: mapping to truncate
				188	* @lstart: offset from which to truncate
Hans Reiser	d733907	2006-01-06 00:10:36 -0800	[diff] [blame]	189	* @lend: offset to which to truncate
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	190	*
Hans Reiser	d733907	2006-01-06 00:10:36 -0800	[diff] [blame]	191	* Truncate the page cache, removing the pages that are between
				192	* specified offsets (and zeroing out partial page
				193	* (if lstart is not page aligned)).
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	194	*
				195	* Truncate takes two passes - the first pass is nonblocking. It will not
				196	* block on page locks and it will not block on writeback. The second pass
				197	* will wait. This is to prevent as much IO as possible in the affected region.
				198	* The first pass will remove most pages, so the search cost of the second pass
				199	* is low.
				200	*
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	201	* We pass down the cache-hot hint to the page freeing code. Even if the
				202	* mapping is large, it is probably the case that the final pages are the most
				203	* recently touched, and freeing happens in ascending file offset order.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	204	*/
Hans Reiser	d733907	2006-01-06 00:10:36 -0800	[diff] [blame]	205	void truncate_inode_pages_range(struct address_space *mapping,
				206	loff_t lstart, loff_t lend)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	207	{
				208	const pgoff_t start = (lstart + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
				209	const unsigned partial = lstart & (PAGE_CACHE_SIZE - 1);
				210	struct pagevec pvec;
Hugh Dickins	b85e0ef	2011-07-25 17:12:25 -0700	[diff] [blame]	211	pgoff_t index;
				212	pgoff_t end;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	213	int i;
				214
Dan Magenheimer	3167760	2011-09-21 11:56:28 -0400	[diff] [blame]	215	cleancache_invalidate_inode(mapping);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	216	if (mapping->nrpages == 0)
				217	return;
				218
Hans Reiser	d733907	2006-01-06 00:10:36 -0800	[diff] [blame]	219	BUG_ON((lend & (PAGE_CACHE_SIZE - 1)) != (PAGE_CACHE_SIZE - 1));
				220	end = (lend >> PAGE_CACHE_SHIFT);
				221
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	222	pagevec_init(&pvec, 0);
Hugh Dickins	b85e0ef	2011-07-25 17:12:25 -0700	[diff] [blame]	223	index = start;
				224	while (index <= end && pagevec_lookup(&pvec, mapping, index,
				225	min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
Hugh Dickins	e5598f8	2011-02-25 14:44:29 -0800	[diff] [blame]	226	mem_cgroup_uncharge_start();
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	227	for (i = 0; i < pagevec_count(&pvec); i++) {
				228	struct page *page = pvec.pages[i];
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	229
Hugh Dickins	b85e0ef	2011-07-25 17:12:25 -0700	[diff] [blame]	230	/* We rely upon deletion not changing page->index */
				231	index = page->index;
				232	if (index > end)
Hans Reiser	d733907	2006-01-06 00:10:36 -0800	[diff] [blame]	233	break;
Hans Reiser	d733907	2006-01-06 00:10:36 -0800	[diff] [blame]	234
Nick Piggin	529ae9a	2008-08-02 12:01:03 +0200	[diff] [blame]	235	if (!trylock_page(page))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	236	continue;
Hugh Dickins	b85e0ef	2011-07-25 17:12:25 -0700	[diff] [blame]	237	WARN_ON(page->index != index);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	238	if (PageWriteback(page)) {
				239	unlock_page(page);
				240	continue;
				241	}
Nick Piggin	750b498	2009-09-16 11:50:12 +0200	[diff] [blame]	242	truncate_inode_page(mapping, page);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	243	unlock_page(page);
				244	}
				245	pagevec_release(&pvec);
Hugh Dickins	e5598f8	2011-02-25 14:44:29 -0800	[diff] [blame]	246	mem_cgroup_uncharge_end();
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	247	cond_resched();
Hugh Dickins	b85e0ef	2011-07-25 17:12:25 -0700	[diff] [blame]	248	index++;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	249	}
				250
				251	if (partial) {
				252	struct page *page = find_lock_page(mapping, start - 1);
				253	if (page) {
				254	wait_on_page_writeback(page);
				255	truncate_partial_page(page, partial);
				256	unlock_page(page);
				257	page_cache_release(page);
				258	}
				259	}
				260
Hugh Dickins	b85e0ef	2011-07-25 17:12:25 -0700	[diff] [blame]	261	index = start;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	262	for ( ; ; ) {
				263	cond_resched();
Hugh Dickins	b85e0ef	2011-07-25 17:12:25 -0700	[diff] [blame]	264	if (!pagevec_lookup(&pvec, mapping, index,
				265	min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
				266	if (index == start)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	267	break;
Hugh Dickins	b85e0ef	2011-07-25 17:12:25 -0700	[diff] [blame]	268	index = start;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	269	continue;
				270	}
Hugh Dickins	d082357	2011-07-25 17:12:25 -0700	[diff] [blame]	271	if (index == start && pvec.pages[0]->index > end) {
Hans Reiser	d733907	2006-01-06 00:10:36 -0800	[diff] [blame]	272	pagevec_release(&pvec);
				273	break;
				274	}
KAMEZAWA Hiroyuki	569b846	2009-12-15 16:47:03 -0800	[diff] [blame]	275	mem_cgroup_uncharge_start();
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	276	for (i = 0; i < pagevec_count(&pvec); i++) {
				277	struct page *page = pvec.pages[i];
				278
Hugh Dickins	b85e0ef	2011-07-25 17:12:25 -0700	[diff] [blame]	279	/* We rely upon deletion not changing page->index */
				280	index = page->index;
				281	if (index > end)
Hans Reiser	d733907	2006-01-06 00:10:36 -0800	[diff] [blame]	282	break;
Hugh Dickins	b85e0ef	2011-07-25 17:12:25 -0700	[diff] [blame]	283
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	284	lock_page(page);
Hugh Dickins	b85e0ef	2011-07-25 17:12:25 -0700	[diff] [blame]	285	WARN_ON(page->index != index);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	286	wait_on_page_writeback(page);
Nick Piggin	750b498	2009-09-16 11:50:12 +0200	[diff] [blame]	287	truncate_inode_page(mapping, page);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	288	unlock_page(page);
				289	}
				290	pagevec_release(&pvec);
KAMEZAWA Hiroyuki	569b846	2009-12-15 16:47:03 -0800	[diff] [blame]	291	mem_cgroup_uncharge_end();
Hugh Dickins	b85e0ef	2011-07-25 17:12:25 -0700	[diff] [blame]	292	index++;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	293	}
Dan Magenheimer	3167760	2011-09-21 11:56:28 -0400	[diff] [blame]	294	cleancache_invalidate_inode(mapping);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	295	}
Hans Reiser	d733907	2006-01-06 00:10:36 -0800	[diff] [blame]	296	EXPORT_SYMBOL(truncate_inode_pages_range);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	297
Hans Reiser	d733907	2006-01-06 00:10:36 -0800	[diff] [blame]	298	/**
				299	* truncate_inode_pages - truncate all the pages from an offset
				300	* @mapping: mapping to truncate
				301	* @lstart: offset from which to truncate
				302	*
Jes Sorensen	1b1dcc1	2006-01-09 15:59:24 -0800	[diff] [blame]	303	* Called under (and serialised by) inode->i_mutex.
Jan Kara	0814257	2011-06-27 16:18:10 -0700	[diff] [blame]	304	*
				305	* Note: When this function returns, there can be a page in the process of
				306	* deletion (inside __delete_from_page_cache()) in the specified range. Thus
				307	* mapping->nrpages can be non-zero when this function returns even after
				308	* truncation of the whole mapping.
Hans Reiser	d733907	2006-01-06 00:10:36 -0800	[diff] [blame]	309	*/
				310	void truncate_inode_pages(struct address_space *mapping, loff_t lstart)
				311	{
				312	truncate_inode_pages_range(mapping, lstart, (loff_t)-1);
				313	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	314	EXPORT_SYMBOL(truncate_inode_pages);
				315
Mike Waychison	2869735	2009-06-16 15:32:59 -0700	[diff] [blame]	316	/**
				317	* invalidate_mapping_pages - Invalidate all the unlocked pages of one inode
				318	* @mapping: the address_space which holds the pages to invalidate
				319	* @start: the offset 'from' which to invalidate
				320	* @end: the offset 'to' which to invalidate (inclusive)
				321	*
				322	* This function only removes the unlocked pages, if you want to
				323	* remove all the pages of one inode, you must call truncate_inode_pages.
				324	*
				325	* invalidate_mapping_pages() will not block on IO activity. It will not
				326	* invalidate pages which are dirty, locked, under writeback or mapped into
				327	* pagetables.
				328	*/
				329	unsigned long invalidate_mapping_pages(struct address_space *mapping,
Minchan Kim	3156018	2011-03-22 16:32:52 -0700	[diff] [blame]	330	pgoff_t start, pgoff_t end)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	331	{
				332	struct pagevec pvec;
Hugh Dickins	b85e0ef	2011-07-25 17:12:25 -0700	[diff] [blame]	333	pgoff_t index = start;
Minchan Kim	3156018	2011-03-22 16:32:52 -0700	[diff] [blame]	334	unsigned long ret;
				335	unsigned long count = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	336	int i;
				337
Hugh Dickins	31475dd	2011-08-03 16:21:27 -0700	[diff] [blame]	338	/*
				339	* Note: this function may get called on a shmem/tmpfs mapping:
				340	* pagevec_lookup() might then return 0 prematurely (because it
				341	* got a gangful of swap entries); but it's hardly worth worrying
				342	* about - it can rarely have anything to free from such a mapping
				343	* (most pages are dirty), and already skips over any difficulties.
				344	*/
				345
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	346	pagevec_init(&pvec, 0);
Hugh Dickins	b85e0ef	2011-07-25 17:12:25 -0700	[diff] [blame]	347	while (index <= end && pagevec_lookup(&pvec, mapping, index,
				348	min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
KAMEZAWA Hiroyuki	569b846	2009-12-15 16:47:03 -0800	[diff] [blame]	349	mem_cgroup_uncharge_start();
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	350	for (i = 0; i < pagevec_count(&pvec); i++) {
				351	struct page *page = pvec.pages[i];
				352
Hugh Dickins	b85e0ef	2011-07-25 17:12:25 -0700	[diff] [blame]	353	/* We rely upon deletion not changing page->index */
NeilBrown	e0f2360	2006-06-23 02:05:48 -0700	[diff] [blame]	354	index = page->index;
Hugh Dickins	b85e0ef	2011-07-25 17:12:25 -0700	[diff] [blame]	355	if (index > end)
				356	break;
NeilBrown	e0f2360	2006-06-23 02:05:48 -0700	[diff] [blame]	357
Hugh Dickins	b85e0ef	2011-07-25 17:12:25 -0700	[diff] [blame]	358	if (!trylock_page(page))
				359	continue;
				360	WARN_ON(page->index != index);
Minchan Kim	3156018	2011-03-22 16:32:52 -0700	[diff] [blame]	361	ret = invalidate_inode_page(page);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	362	unlock_page(page);
Minchan Kim	3156018	2011-03-22 16:32:52 -0700	[diff] [blame]	363	/*
				364	* Invalidation is a hint that the page is no longer
				365	* of interest and try to speed up its reclaim.
				366	*/
				367	if (!ret)
				368	deactivate_page(page);
				369	count += ret;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	370	}
				371	pagevec_release(&pvec);
KAMEZAWA Hiroyuki	569b846	2009-12-15 16:47:03 -0800	[diff] [blame]	372	mem_cgroup_uncharge_end();
Mike Waychison	2869735	2009-06-16 15:32:59 -0700	[diff] [blame]	373	cond_resched();
Hugh Dickins	b85e0ef	2011-07-25 17:12:25 -0700	[diff] [blame]	374	index++;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	375	}
Minchan Kim	3156018	2011-03-22 16:32:52 -0700	[diff] [blame]	376	return count;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	377	}
Anton Altaparmakov	54bc485	2007-02-10 01:45:38 -0800	[diff] [blame]	378	EXPORT_SYMBOL(invalidate_mapping_pages);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	379
Andrew Morton	bd4c8ce	2006-09-30 23:29:29 -0700	[diff] [blame]	380	/*
				381	* This is like invalidate_complete_page(), except it ignores the page's
				382	* refcount. We do this because invalidate_inode_pages2() needs stronger
				383	* invalidation guarantees, and cannot afford to leave pages behind because
Anderson Briglia	2706a1b	2007-07-15 23:38:09 -0700	[diff] [blame]	384	* shrink_page_list() has a temp ref on them, or because they're transiently
				385	* sitting in the lru_cache_add() pagevecs.
Andrew Morton	bd4c8ce	2006-09-30 23:29:29 -0700	[diff] [blame]	386	*/
				387	static int
				388	invalidate_complete_page2(struct address_space mapping, struct page page)
				389	{
				390	if (page->mapping != mapping)
				391	return 0;
				392
David Howells	266cf65	2009-04-03 16:42:36 +0100	[diff] [blame]	393	if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL))
Andrew Morton	bd4c8ce	2006-09-30 23:29:29 -0700	[diff] [blame]	394	return 0;
				395
Nick Piggin	19fd623	2008-07-25 19:45:32 -0700	[diff] [blame]	396	spin_lock_irq(&mapping->tree_lock);
Andrew Morton	bd4c8ce	2006-09-30 23:29:29 -0700	[diff] [blame]	397	if (PageDirty(page))
				398	goto failed;
				399
David Howells	266cf65	2009-04-03 16:42:36 +0100	[diff] [blame]	400	BUG_ON(page_has_private(page));
Minchan Kim	e64a782	2011-03-22 16:32:44 -0700	[diff] [blame]	401	__delete_from_page_cache(page);
Nick Piggin	19fd623	2008-07-25 19:45:32 -0700	[diff] [blame]	402	spin_unlock_irq(&mapping->tree_lock);
Daisuke Nishimura	e767e05	2009-05-28 14:34:28 -0700	[diff] [blame]	403	mem_cgroup_uncharge_cache_page(page);
Linus Torvalds	6072d13	2010-12-01 13:35:19 -0500	[diff] [blame]	404
				405	if (mapping->a_ops->freepage)
				406	mapping->a_ops->freepage(page);
				407
Andrew Morton	bd4c8ce	2006-09-30 23:29:29 -0700	[diff] [blame]	408	page_cache_release(page); /* pagecache ref */
				409	return 1;
				410	failed:
Nick Piggin	19fd623	2008-07-25 19:45:32 -0700	[diff] [blame]	411	spin_unlock_irq(&mapping->tree_lock);
Andrew Morton	bd4c8ce	2006-09-30 23:29:29 -0700	[diff] [blame]	412	return 0;
				413	}
				414
Trond Myklebust	e3db769	2007-01-10 23:15:39 -0800	[diff] [blame]	415	static int do_launder_page(struct address_space mapping, struct page page)
				416	{
				417	if (!PageDirty(page))
				418	return 0;
				419	if (page->mapping != mapping \|\| mapping->a_ops->launder_page == NULL)
				420	return 0;
				421	return mapping->a_ops->launder_page(page);
				422	}
				423
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	424	/**
				425	* invalidate_inode_pages2_range - remove range of pages from an address_space
Martin Waitz	67be2dd	2005-05-01 08:59:26 -0700	[diff] [blame]	426	* @mapping: the address_space
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	427	* @start: the page offset 'from' which to invalidate
				428	* @end: the page offset 'to' which to invalidate (inclusive)
				429	*
				430	* Any pages which are found to be mapped into pagetables are unmapped prior to
				431	* invalidation.
				432	*
Hisashi Hifumi	6ccfa80	2008-09-02 14:35:40 -0700	[diff] [blame]	433	* Returns -EBUSY if any pages could not be invalidated.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	434	*/
				435	int invalidate_inode_pages2_range(struct address_space *mapping,
				436	pgoff_t start, pgoff_t end)
				437	{
				438	struct pagevec pvec;
Hugh Dickins	b85e0ef	2011-07-25 17:12:25 -0700	[diff] [blame]	439	pgoff_t index;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	440	int i;
				441	int ret = 0;
Hisashi Hifumi	0dd1334	2008-04-28 02:12:08 -0700	[diff] [blame]	442	int ret2 = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	443	int did_range_unmap = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	444
Dan Magenheimer	3167760	2011-09-21 11:56:28 -0400	[diff] [blame]	445	cleancache_invalidate_inode(mapping);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	446	pagevec_init(&pvec, 0);
Hugh Dickins	b85e0ef	2011-07-25 17:12:25 -0700	[diff] [blame]	447	index = start;
				448	while (index <= end && pagevec_lookup(&pvec, mapping, index,
				449	min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
KAMEZAWA Hiroyuki	569b846	2009-12-15 16:47:03 -0800	[diff] [blame]	450	mem_cgroup_uncharge_start();
Trond Myklebust	7b965e0	2007-02-28 20:13:55 -0800	[diff] [blame]	451	for (i = 0; i < pagevec_count(&pvec); i++) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	452	struct page *page = pvec.pages[i];
Hugh Dickins	b85e0ef	2011-07-25 17:12:25 -0700	[diff] [blame]	453
				454	/* We rely upon deletion not changing page->index */
				455	index = page->index;
				456	if (index > end)
				457	break;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	458
				459	lock_page(page);
Hugh Dickins	b85e0ef	2011-07-25 17:12:25 -0700	[diff] [blame]	460	WARN_ON(page->index != index);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	461	if (page->mapping != mapping) {
				462	unlock_page(page);
				463	continue;
				464	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	465	wait_on_page_writeback(page);
Nick Piggin	d00806b	2007-07-19 01:46:57 -0700	[diff] [blame]	466	if (page_mapped(page)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	467	if (!did_range_unmap) {
				468	/*
				469	* Zap the rest of the file in one hit.
				470	*/
				471	unmap_mapping_range(mapping,
Hugh Dickins	b85e0ef	2011-07-25 17:12:25 -0700	[diff] [blame]	472	(loff_t)index << PAGE_CACHE_SHIFT,
				473	(loff_t)(1 + end - index)
				474	<< PAGE_CACHE_SHIFT,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	475	0);
				476	did_range_unmap = 1;
				477	} else {
				478	/*
				479	* Just zap this page
				480	*/
				481	unmap_mapping_range(mapping,
Hugh Dickins	b85e0ef	2011-07-25 17:12:25 -0700	[diff] [blame]	482	(loff_t)index << PAGE_CACHE_SHIFT,
				483	PAGE_CACHE_SIZE, 0);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	484	}
				485	}
Nick Piggin	d00806b	2007-07-19 01:46:57 -0700	[diff] [blame]	486	BUG_ON(page_mapped(page));
Hisashi Hifumi	0dd1334	2008-04-28 02:12:08 -0700	[diff] [blame]	487	ret2 = do_launder_page(mapping, page);
				488	if (ret2 == 0) {
				489	if (!invalidate_complete_page2(mapping, page))
Hisashi Hifumi	6ccfa80	2008-09-02 14:35:40 -0700	[diff] [blame]	490	ret2 = -EBUSY;
Hisashi Hifumi	0dd1334	2008-04-28 02:12:08 -0700	[diff] [blame]	491	}
				492	if (ret2 < 0)
				493	ret = ret2;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	494	unlock_page(page);
				495	}
				496	pagevec_release(&pvec);
KAMEZAWA Hiroyuki	569b846	2009-12-15 16:47:03 -0800	[diff] [blame]	497	mem_cgroup_uncharge_end();
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	498	cond_resched();
Hugh Dickins	b85e0ef	2011-07-25 17:12:25 -0700	[diff] [blame]	499	index++;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	500	}
Dan Magenheimer	3167760	2011-09-21 11:56:28 -0400	[diff] [blame]	501	cleancache_invalidate_inode(mapping);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	502	return ret;
				503	}
				504	EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range);
				505
				506	/**
				507	* invalidate_inode_pages2 - remove all pages from an address_space
Martin Waitz	67be2dd	2005-05-01 08:59:26 -0700	[diff] [blame]	508	* @mapping: the address_space
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	509	*
				510	* Any pages which are found to be mapped into pagetables are unmapped prior to
				511	* invalidation.
				512	*
Peng Tao	e9de25d	2009-10-19 14:48:13 +0800	[diff] [blame]	513	* Returns -EBUSY if any pages could not be invalidated.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	514	*/
				515	int invalidate_inode_pages2(struct address_space *mapping)
				516	{
				517	return invalidate_inode_pages2_range(mapping, 0, -1);
				518	}
				519	EXPORT_SYMBOL_GPL(invalidate_inode_pages2);
npiggin@suse.de	25d9e2d	2009-08-21 02:35:05 +1000	[diff] [blame]	520
				521	/**
				522	* truncate_pagecache - unmap and remove pagecache that has been truncated
				523	* @inode: inode
Hugh Dickins	8a549be	2011-07-25 17:12:24 -0700	[diff] [blame]	524	* @oldsize: old file size
				525	* @newsize: new file size
npiggin@suse.de	25d9e2d	2009-08-21 02:35:05 +1000	[diff] [blame]	526	*
				527	* inode's new i_size must already be written before truncate_pagecache
				528	* is called.
				529	*
				530	* This function should typically be called before the filesystem
				531	* releases resources associated with the freed range (eg. deallocates
				532	* blocks). This way, pagecache will always stay logically coherent
				533	* with on-disk format, and the filesystem would not have to deal with
				534	* situations such as writepage being called for a page that has already
				535	* had its underlying blocks deallocated.
				536	*/
Hugh Dickins	8a549be	2011-07-25 17:12:24 -0700	[diff] [blame]	537	void truncate_pagecache(struct inode *inode, loff_t oldsize, loff_t newsize)
npiggin@suse.de	25d9e2d	2009-08-21 02:35:05 +1000	[diff] [blame]	538	{
OGAWA Hirofumi	cedabed	2010-01-13 21:14:09 +0900	[diff] [blame]	539	struct address_space *mapping = inode->i_mapping;
Hugh Dickins	8a549be	2011-07-25 17:12:24 -0700	[diff] [blame]	540	loff_t holebegin = round_up(newsize, PAGE_SIZE);
npiggin@suse.de	25d9e2d	2009-08-21 02:35:05 +1000	[diff] [blame]	541
OGAWA Hirofumi	cedabed	2010-01-13 21:14:09 +0900	[diff] [blame]	542	/*
				543	* unmap_mapping_range is called twice, first simply for
				544	* efficiency so that truncate_inode_pages does fewer
				545	* single-page unmaps. However after this first call, and
				546	* before truncate_inode_pages finishes, it is possible for
				547	* private pages to be COWed, which remain after
				548	* truncate_inode_pages finishes, hence the second
				549	* unmap_mapping_range call must be made for correctness.
				550	*/
Hugh Dickins	8a549be	2011-07-25 17:12:24 -0700	[diff] [blame]	551	unmap_mapping_range(mapping, holebegin, 0, 1);
				552	truncate_inode_pages(mapping, newsize);
				553	unmap_mapping_range(mapping, holebegin, 0, 1);
npiggin@suse.de	25d9e2d	2009-08-21 02:35:05 +1000	[diff] [blame]	554	}
				555	EXPORT_SYMBOL(truncate_pagecache);
				556
				557	/**
Christoph Hellwig	2c27c65	2010-06-04 11:30:04 +0200	[diff] [blame]	558	* truncate_setsize - update inode and pagecache for a new file size
				559	* @inode: inode
				560	* @newsize: new file size
				561	*
Jan Kara	382e27d	2011-01-20 14:44:26 -0800	[diff] [blame]	562	* truncate_setsize updates i_size and performs pagecache truncation (if
				563	* necessary) to @newsize. It will be typically be called from the filesystem's
				564	* setattr function when ATTR_SIZE is passed in.
Christoph Hellwig	2c27c65	2010-06-04 11:30:04 +0200	[diff] [blame]	565	*
Jan Kara	382e27d	2011-01-20 14:44:26 -0800	[diff] [blame]	566	* Must be called with inode_mutex held and before all filesystem specific
				567	* block truncation has been performed.
Christoph Hellwig	2c27c65	2010-06-04 11:30:04 +0200	[diff] [blame]	568	*/
				569	void truncate_setsize(struct inode *inode, loff_t newsize)
				570	{
Jan Kara	6cbdf11	2014-10-01 21:49:18 -0400	[diff] [blame]	571	loff_t oldsize = inode->i_size;
Christoph Hellwig	2c27c65	2010-06-04 11:30:04 +0200	[diff] [blame]	572
Christoph Hellwig	2c27c65	2010-06-04 11:30:04 +0200	[diff] [blame]	573	i_size_write(inode, newsize);
Jan Kara	6cbdf11	2014-10-01 21:49:18 -0400	[diff] [blame]	574	if (newsize > oldsize)
				575	pagecache_isize_extended(inode, oldsize, newsize);
Christoph Hellwig	2c27c65	2010-06-04 11:30:04 +0200	[diff] [blame]	576	truncate_pagecache(inode, oldsize, newsize);
				577	}
				578	EXPORT_SYMBOL(truncate_setsize);
				579
				580	/**
Jan Kara	6cbdf11	2014-10-01 21:49:18 -0400	[diff] [blame]	581	* pagecache_isize_extended - update pagecache after extension of i_size
				582	* @inode: inode for which i_size was extended
				583	* @from: original inode size
				584	* @to: new inode size
				585	*
				586	* Handle extension of inode size either caused by extending truncate or by
				587	* write starting after current i_size. We mark the page straddling current
				588	* i_size RO so that page_mkwrite() is called on the nearest write access to
				589	* the page. This way filesystem can be sure that page_mkwrite() is called on
				590	* the page before user writes to the page via mmap after the i_size has been
				591	* changed.
				592	*
				593	* The function must be called after i_size is updated so that page fault
				594	* coming after we unlock the page will already see the new i_size.
				595	* The function must be called while we still hold i_mutex - this not only
				596	* makes sure i_size is stable but also that userspace cannot observe new
				597	* i_size value before we are prepared to store mmap writes at new inode size.
				598	*/
				599	void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to)
				600	{
				601	int bsize = 1 << inode->i_blkbits;
				602	loff_t rounded_from;
				603	struct page *page;
				604	pgoff_t index;
				605
Jan Kara	6cbdf11	2014-10-01 21:49:18 -0400	[diff] [blame]	606	WARN_ON(to > inode->i_size);
				607
				608	if (from >= to \|\| bsize == PAGE_CACHE_SIZE)
				609	return;
				610	/* Page straddling @from will not have any hole block created? */
				611	rounded_from = round_up(from, bsize);
				612	if (to <= rounded_from \|\| !(rounded_from & (PAGE_CACHE_SIZE - 1)))
				613	return;
				614
				615	index = from >> PAGE_CACHE_SHIFT;
				616	page = find_lock_page(inode->i_mapping, index);
				617	/* Page not cached? Nothing to do */
				618	if (!page)
				619	return;
				620	/*
				621	* See clear_page_dirty_for_io() for details why set_page_dirty()
				622	* is needed.
				623	*/
				624	if (page_mkclean(page))
				625	set_page_dirty(page);
				626	unlock_page(page);
				627	page_cache_release(page);
				628	}
				629	EXPORT_SYMBOL(pagecache_isize_extended);
				630
				631	/**
Hugh Dickins	623e3db	2012-03-28 14:42:40 -0700	[diff] [blame]	632	* truncate_pagecache_range - unmap and remove pagecache that is hole-punched
				633	* @inode: inode
				634	* @lstart: offset of beginning of hole
				635	* @lend: offset of last byte of hole
				636	*
				637	* This function should typically be called before the filesystem
				638	* releases resources associated with the freed range (eg. deallocates
				639	* blocks). This way, pagecache will always stay logically coherent
				640	* with on-disk format, and the filesystem would not have to deal with
				641	* situations such as writepage being called for a page that has already
				642	* had its underlying blocks deallocated.
				643	*/
				644	void truncate_pagecache_range(struct inode *inode, loff_t lstart, loff_t lend)
				645	{
				646	struct address_space *mapping = inode->i_mapping;
				647	loff_t unmap_start = round_up(lstart, PAGE_SIZE);
				648	loff_t unmap_end = round_down(1 + lend, PAGE_SIZE) - 1;
				649	/*
				650	* This rounding is currently just for example: unmap_mapping_range
				651	* expands its hole outwards, whereas we want it to contract the hole
				652	* inwards. However, existing callers of truncate_pagecache_range are
				653	* doing their own page rounding first; and truncate_inode_pages_range
				654	* currently BUGs if lend is not pagealigned-1 (it handles partial
				655	* page at start of hole, but not partial page at end of hole). Note
				656	* unmap_mapping_range allows holelen 0 for all, and we allow lend -1.
				657	*/
				658
				659	/*
				660	* Unlike in truncate_pagecache, unmap_mapping_range is called only
				661	* once (before truncating pagecache), and without "even_cows" flag:
				662	* hole-punching should not remove private COWed pages from the hole.
				663	*/
				664	if ((u64)unmap_end > (u64)unmap_start)
				665	unmap_mapping_range(mapping, unmap_start,
				666	1 + unmap_end - unmap_start, 0);
				667	truncate_inode_pages_range(mapping, lstart, lend);
				668	}
				669	EXPORT_SYMBOL(truncate_pagecache_range);