blob: 7b42c88b0b5701998e169583a0ccc4be56b74a52 [file] [log] [blame]
Avi Kivity6aa8b732006-12-10 02:21:36 -08001/*
2 * Kernel-based Virtual Machine driver for Linux
3 *
4 * This module enables machines with Intel VT-x extensions to run virtual
5 * machines without emulation or binary translation.
6 *
7 * MMU support
8 *
9 * Copyright (C) 2006 Qumranet, Inc.
10 *
11 * Authors:
12 * Yaniv Kamay <yaniv@qumranet.com>
13 * Avi Kivity <avi@qumranet.com>
14 *
15 * This work is licensed under the terms of the GNU GPL, version 2. See
16 * the COPYING file in the top-level directory.
17 *
18 */
Avi Kivity6aa8b732006-12-10 02:21:36 -080019
20#include "vmx.h"
21#include "kvm.h"
22
Avi Kivitye4956062007-06-28 14:15:57 -040023#include <linux/types.h>
24#include <linux/string.h>
25#include <linux/mm.h>
26#include <linux/highmem.h>
27#include <linux/module.h>
28
29#include <asm/page.h>
30#include <asm/cmpxchg.h>
31
Avi Kivity37a7d8b2007-01-05 16:36:56 -080032#undef MMU_DEBUG
33
34#undef AUDIT
35
36#ifdef AUDIT
37static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg);
38#else
39static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg) {}
40#endif
41
42#ifdef MMU_DEBUG
43
44#define pgprintk(x...) do { if (dbg) printk(x); } while (0)
45#define rmap_printk(x...) do { if (dbg) printk(x); } while (0)
46
47#else
48
49#define pgprintk(x...) do { } while (0)
50#define rmap_printk(x...) do { } while (0)
51
52#endif
53
54#if defined(MMU_DEBUG) || defined(AUDIT)
55static int dbg = 1;
56#endif
Avi Kivity6aa8b732006-12-10 02:21:36 -080057
Yaozu Dongd6c69ee2007-04-25 14:17:25 +080058#ifndef MMU_DEBUG
59#define ASSERT(x) do { } while (0)
60#else
Avi Kivity6aa8b732006-12-10 02:21:36 -080061#define ASSERT(x) \
62 if (!(x)) { \
63 printk(KERN_WARNING "assertion failed %s:%d: %s\n", \
64 __FILE__, __LINE__, #x); \
65 }
Yaozu Dongd6c69ee2007-04-25 14:17:25 +080066#endif
Avi Kivity6aa8b732006-12-10 02:21:36 -080067
Avi Kivitycea0f0e2007-01-05 16:36:43 -080068#define PT64_PT_BITS 9
69#define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS)
70#define PT32_PT_BITS 10
71#define PT32_ENT_PER_PAGE (1 << PT32_PT_BITS)
Avi Kivity6aa8b732006-12-10 02:21:36 -080072
73#define PT_WRITABLE_SHIFT 1
74
75#define PT_PRESENT_MASK (1ULL << 0)
76#define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT)
77#define PT_USER_MASK (1ULL << 2)
78#define PT_PWT_MASK (1ULL << 3)
79#define PT_PCD_MASK (1ULL << 4)
80#define PT_ACCESSED_MASK (1ULL << 5)
81#define PT_DIRTY_MASK (1ULL << 6)
82#define PT_PAGE_SIZE_MASK (1ULL << 7)
83#define PT_PAT_MASK (1ULL << 7)
84#define PT_GLOBAL_MASK (1ULL << 8)
85#define PT64_NX_MASK (1ULL << 63)
86
87#define PT_PAT_SHIFT 7
88#define PT_DIR_PAT_SHIFT 12
89#define PT_DIR_PAT_MASK (1ULL << PT_DIR_PAT_SHIFT)
90
91#define PT32_DIR_PSE36_SIZE 4
92#define PT32_DIR_PSE36_SHIFT 13
93#define PT32_DIR_PSE36_MASK (((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT)
94
95
Avi Kivity6aa8b732006-12-10 02:21:36 -080096#define PT_FIRST_AVAIL_BITS_SHIFT 9
97#define PT64_SECOND_AVAIL_BITS_SHIFT 52
98
Avi Kivity6aa8b732006-12-10 02:21:36 -080099#define PT_SHADOW_IO_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
100
Avi Kivity6aa8b732006-12-10 02:21:36 -0800101#define VALID_PAGE(x) ((x) != INVALID_PAGE)
102
103#define PT64_LEVEL_BITS 9
104
105#define PT64_LEVEL_SHIFT(level) \
106 ( PAGE_SHIFT + (level - 1) * PT64_LEVEL_BITS )
107
108#define PT64_LEVEL_MASK(level) \
109 (((1ULL << PT64_LEVEL_BITS) - 1) << PT64_LEVEL_SHIFT(level))
110
111#define PT64_INDEX(address, level)\
112 (((address) >> PT64_LEVEL_SHIFT(level)) & ((1 << PT64_LEVEL_BITS) - 1))
113
114
115#define PT32_LEVEL_BITS 10
116
117#define PT32_LEVEL_SHIFT(level) \
118 ( PAGE_SHIFT + (level - 1) * PT32_LEVEL_BITS )
119
120#define PT32_LEVEL_MASK(level) \
121 (((1ULL << PT32_LEVEL_BITS) - 1) << PT32_LEVEL_SHIFT(level))
122
123#define PT32_INDEX(address, level)\
124 (((address) >> PT32_LEVEL_SHIFT(level)) & ((1 << PT32_LEVEL_BITS) - 1))
125
126
Avi Kivity27aba762007-03-09 13:04:31 +0200127#define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1))
Avi Kivity6aa8b732006-12-10 02:21:36 -0800128#define PT64_DIR_BASE_ADDR_MASK \
129 (PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + PT64_LEVEL_BITS)) - 1))
130
131#define PT32_BASE_ADDR_MASK PAGE_MASK
132#define PT32_DIR_BASE_ADDR_MASK \
133 (PAGE_MASK & ~((1ULL << (PAGE_SHIFT + PT32_LEVEL_BITS)) - 1))
134
135
136#define PFERR_PRESENT_MASK (1U << 0)
137#define PFERR_WRITE_MASK (1U << 1)
138#define PFERR_USER_MASK (1U << 2)
Avi Kivity73b10872007-01-26 00:56:41 -0800139#define PFERR_FETCH_MASK (1U << 4)
Avi Kivity6aa8b732006-12-10 02:21:36 -0800140
141#define PT64_ROOT_LEVEL 4
142#define PT32_ROOT_LEVEL 2
143#define PT32E_ROOT_LEVEL 3
144
145#define PT_DIRECTORY_LEVEL 2
146#define PT_PAGE_TABLE_LEVEL 1
147
Avi Kivitycd4a4e52007-01-05 16:36:38 -0800148#define RMAP_EXT 4
149
150struct kvm_rmap_desc {
151 u64 *shadow_ptes[RMAP_EXT];
152 struct kvm_rmap_desc *more;
153};
154
Avi Kivityb5a33a72007-04-15 16:31:09 +0300155static struct kmem_cache *pte_chain_cache;
156static struct kmem_cache *rmap_desc_cache;
Avi Kivityd3d25b02007-05-30 12:34:53 +0300157static struct kmem_cache *mmu_page_header_cache;
Avi Kivityb5a33a72007-04-15 16:31:09 +0300158
Avi Kivity6aa8b732006-12-10 02:21:36 -0800159static int is_write_protection(struct kvm_vcpu *vcpu)
160{
Rusty Russell707d92fa2007-07-17 23:19:08 +1000161 return vcpu->cr0 & X86_CR0_WP;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800162}
163
164static int is_cpuid_PSE36(void)
165{
166 return 1;
167}
168
Avi Kivity73b10872007-01-26 00:56:41 -0800169static int is_nx(struct kvm_vcpu *vcpu)
170{
171 return vcpu->shadow_efer & EFER_NX;
172}
173
Avi Kivity6aa8b732006-12-10 02:21:36 -0800174static int is_present_pte(unsigned long pte)
175{
176 return pte & PT_PRESENT_MASK;
177}
178
179static int is_writeble_pte(unsigned long pte)
180{
181 return pte & PT_WRITABLE_MASK;
182}
183
184static int is_io_pte(unsigned long pte)
185{
186 return pte & PT_SHADOW_IO_MARK;
187}
188
Avi Kivitycd4a4e52007-01-05 16:36:38 -0800189static int is_rmap_pte(u64 pte)
190{
191 return (pte & (PT_WRITABLE_MASK | PT_PRESENT_MASK))
192 == (PT_WRITABLE_MASK | PT_PRESENT_MASK);
193}
194
Avi Kivitye663ee62007-05-31 15:46:04 +0300195static void set_shadow_pte(u64 *sptep, u64 spte)
196{
197#ifdef CONFIG_X86_64
198 set_64bit((unsigned long *)sptep, spte);
199#else
200 set_64bit((unsigned long long *)sptep, spte);
201#endif
202}
203
Avi Kivitye2dec932007-01-05 16:36:54 -0800204static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
Avi Kivity8c438502007-04-16 11:53:17 +0300205 struct kmem_cache *base_cache, int min,
206 gfp_t gfp_flags)
Avi Kivity714b93d2007-01-05 16:36:53 -0800207{
208 void *obj;
209
210 if (cache->nobjs >= min)
Avi Kivitye2dec932007-01-05 16:36:54 -0800211 return 0;
Avi Kivity714b93d2007-01-05 16:36:53 -0800212 while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
Avi Kivity8c438502007-04-16 11:53:17 +0300213 obj = kmem_cache_zalloc(base_cache, gfp_flags);
Avi Kivity714b93d2007-01-05 16:36:53 -0800214 if (!obj)
Avi Kivitye2dec932007-01-05 16:36:54 -0800215 return -ENOMEM;
Avi Kivity714b93d2007-01-05 16:36:53 -0800216 cache->objects[cache->nobjs++] = obj;
217 }
Avi Kivitye2dec932007-01-05 16:36:54 -0800218 return 0;
Avi Kivity714b93d2007-01-05 16:36:53 -0800219}
220
221static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc)
222{
223 while (mc->nobjs)
224 kfree(mc->objects[--mc->nobjs]);
225}
226
Avi Kivityc1158e62007-07-20 08:18:27 +0300227static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache,
228 int min, gfp_t gfp_flags)
229{
230 struct page *page;
231
232 if (cache->nobjs >= min)
233 return 0;
234 while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
235 page = alloc_page(gfp_flags);
236 if (!page)
237 return -ENOMEM;
238 set_page_private(page, 0);
239 cache->objects[cache->nobjs++] = page_address(page);
240 }
241 return 0;
242}
243
244static void mmu_free_memory_cache_page(struct kvm_mmu_memory_cache *mc)
245{
246 while (mc->nobjs)
Avi Kivityc4d198d2007-07-21 09:06:46 +0300247 free_page((unsigned long)mc->objects[--mc->nobjs]);
Avi Kivityc1158e62007-07-20 08:18:27 +0300248}
249
Avi Kivity8c438502007-04-16 11:53:17 +0300250static int __mmu_topup_memory_caches(struct kvm_vcpu *vcpu, gfp_t gfp_flags)
Avi Kivity714b93d2007-01-05 16:36:53 -0800251{
Avi Kivitye2dec932007-01-05 16:36:54 -0800252 int r;
253
254 r = mmu_topup_memory_cache(&vcpu->mmu_pte_chain_cache,
Avi Kivity8c438502007-04-16 11:53:17 +0300255 pte_chain_cache, 4, gfp_flags);
Avi Kivitye2dec932007-01-05 16:36:54 -0800256 if (r)
257 goto out;
258 r = mmu_topup_memory_cache(&vcpu->mmu_rmap_desc_cache,
Avi Kivity8c438502007-04-16 11:53:17 +0300259 rmap_desc_cache, 1, gfp_flags);
Avi Kivityd3d25b02007-05-30 12:34:53 +0300260 if (r)
261 goto out;
Avi Kivityc1158e62007-07-20 08:18:27 +0300262 r = mmu_topup_memory_cache_page(&vcpu->mmu_page_cache, 4, gfp_flags);
Avi Kivityd3d25b02007-05-30 12:34:53 +0300263 if (r)
264 goto out;
265 r = mmu_topup_memory_cache(&vcpu->mmu_page_header_cache,
266 mmu_page_header_cache, 4, gfp_flags);
Avi Kivitye2dec932007-01-05 16:36:54 -0800267out:
268 return r;
Avi Kivity714b93d2007-01-05 16:36:53 -0800269}
270
Avi Kivity8c438502007-04-16 11:53:17 +0300271static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
272{
273 int r;
274
275 r = __mmu_topup_memory_caches(vcpu, GFP_NOWAIT);
Avi Kivity22d95b12007-09-14 20:26:06 +0300276 kvm_mmu_free_some_pages(vcpu);
Avi Kivity8c438502007-04-16 11:53:17 +0300277 if (r < 0) {
Shaohua Li11ec2802007-07-23 14:51:37 +0800278 mutex_unlock(&vcpu->kvm->lock);
Avi Kivity8c438502007-04-16 11:53:17 +0300279 r = __mmu_topup_memory_caches(vcpu, GFP_KERNEL);
Shaohua Li11ec2802007-07-23 14:51:37 +0800280 mutex_lock(&vcpu->kvm->lock);
Avi Kivity8c438502007-04-16 11:53:17 +0300281 }
282 return r;
283}
284
Avi Kivity714b93d2007-01-05 16:36:53 -0800285static void mmu_free_memory_caches(struct kvm_vcpu *vcpu)
286{
287 mmu_free_memory_cache(&vcpu->mmu_pte_chain_cache);
288 mmu_free_memory_cache(&vcpu->mmu_rmap_desc_cache);
Avi Kivityc1158e62007-07-20 08:18:27 +0300289 mmu_free_memory_cache_page(&vcpu->mmu_page_cache);
Avi Kivityd3d25b02007-05-30 12:34:53 +0300290 mmu_free_memory_cache(&vcpu->mmu_page_header_cache);
Avi Kivity714b93d2007-01-05 16:36:53 -0800291}
292
293static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc,
294 size_t size)
295{
296 void *p;
297
298 BUG_ON(!mc->nobjs);
299 p = mc->objects[--mc->nobjs];
300 memset(p, 0, size);
301 return p;
302}
303
Avi Kivity714b93d2007-01-05 16:36:53 -0800304static struct kvm_pte_chain *mmu_alloc_pte_chain(struct kvm_vcpu *vcpu)
305{
306 return mmu_memory_cache_alloc(&vcpu->mmu_pte_chain_cache,
307 sizeof(struct kvm_pte_chain));
308}
309
Avi Kivity90cb0522007-07-17 13:04:56 +0300310static void mmu_free_pte_chain(struct kvm_pte_chain *pc)
Avi Kivity714b93d2007-01-05 16:36:53 -0800311{
Avi Kivity90cb0522007-07-17 13:04:56 +0300312 kfree(pc);
Avi Kivity714b93d2007-01-05 16:36:53 -0800313}
314
315static struct kvm_rmap_desc *mmu_alloc_rmap_desc(struct kvm_vcpu *vcpu)
316{
317 return mmu_memory_cache_alloc(&vcpu->mmu_rmap_desc_cache,
318 sizeof(struct kvm_rmap_desc));
319}
320
Avi Kivity90cb0522007-07-17 13:04:56 +0300321static void mmu_free_rmap_desc(struct kvm_rmap_desc *rd)
Avi Kivity714b93d2007-01-05 16:36:53 -0800322{
Avi Kivity90cb0522007-07-17 13:04:56 +0300323 kfree(rd);
Avi Kivity714b93d2007-01-05 16:36:53 -0800324}
325
Avi Kivitycd4a4e52007-01-05 16:36:38 -0800326/*
327 * Reverse mapping data structures:
328 *
329 * If page->private bit zero is zero, then page->private points to the
330 * shadow page table entry that points to page_address(page).
331 *
332 * If page->private bit zero is one, (then page->private & ~1) points
333 * to a struct kvm_rmap_desc containing more mappings.
334 */
Avi Kivity714b93d2007-01-05 16:36:53 -0800335static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte)
Avi Kivitycd4a4e52007-01-05 16:36:38 -0800336{
337 struct page *page;
338 struct kvm_rmap_desc *desc;
339 int i;
340
341 if (!is_rmap_pte(*spte))
342 return;
343 page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
Markus Rechberger5972e952007-02-19 14:37:47 +0200344 if (!page_private(page)) {
Avi Kivitycd4a4e52007-01-05 16:36:38 -0800345 rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte);
Markus Rechberger5972e952007-02-19 14:37:47 +0200346 set_page_private(page,(unsigned long)spte);
347 } else if (!(page_private(page) & 1)) {
Avi Kivitycd4a4e52007-01-05 16:36:38 -0800348 rmap_printk("rmap_add: %p %llx 1->many\n", spte, *spte);
Avi Kivity714b93d2007-01-05 16:36:53 -0800349 desc = mmu_alloc_rmap_desc(vcpu);
Markus Rechberger5972e952007-02-19 14:37:47 +0200350 desc->shadow_ptes[0] = (u64 *)page_private(page);
Avi Kivitycd4a4e52007-01-05 16:36:38 -0800351 desc->shadow_ptes[1] = spte;
Markus Rechberger5972e952007-02-19 14:37:47 +0200352 set_page_private(page,(unsigned long)desc | 1);
Avi Kivitycd4a4e52007-01-05 16:36:38 -0800353 } else {
354 rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte);
Markus Rechberger5972e952007-02-19 14:37:47 +0200355 desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul);
Avi Kivitycd4a4e52007-01-05 16:36:38 -0800356 while (desc->shadow_ptes[RMAP_EXT-1] && desc->more)
357 desc = desc->more;
358 if (desc->shadow_ptes[RMAP_EXT-1]) {
Avi Kivity714b93d2007-01-05 16:36:53 -0800359 desc->more = mmu_alloc_rmap_desc(vcpu);
Avi Kivitycd4a4e52007-01-05 16:36:38 -0800360 desc = desc->more;
361 }
362 for (i = 0; desc->shadow_ptes[i]; ++i)
363 ;
364 desc->shadow_ptes[i] = spte;
365 }
366}
367
Avi Kivity90cb0522007-07-17 13:04:56 +0300368static void rmap_desc_remove_entry(struct page *page,
Avi Kivitycd4a4e52007-01-05 16:36:38 -0800369 struct kvm_rmap_desc *desc,
370 int i,
371 struct kvm_rmap_desc *prev_desc)
372{
373 int j;
374
375 for (j = RMAP_EXT - 1; !desc->shadow_ptes[j] && j > i; --j)
376 ;
377 desc->shadow_ptes[i] = desc->shadow_ptes[j];
Al Viro11718b4d2007-02-09 16:39:20 +0000378 desc->shadow_ptes[j] = NULL;
Avi Kivitycd4a4e52007-01-05 16:36:38 -0800379 if (j != 0)
380 return;
381 if (!prev_desc && !desc->more)
Markus Rechberger5972e952007-02-19 14:37:47 +0200382 set_page_private(page,(unsigned long)desc->shadow_ptes[0]);
Avi Kivitycd4a4e52007-01-05 16:36:38 -0800383 else
384 if (prev_desc)
385 prev_desc->more = desc->more;
386 else
Markus Rechberger5972e952007-02-19 14:37:47 +0200387 set_page_private(page,(unsigned long)desc->more | 1);
Avi Kivity90cb0522007-07-17 13:04:56 +0300388 mmu_free_rmap_desc(desc);
Avi Kivitycd4a4e52007-01-05 16:36:38 -0800389}
390
Avi Kivity90cb0522007-07-17 13:04:56 +0300391static void rmap_remove(u64 *spte)
Avi Kivitycd4a4e52007-01-05 16:36:38 -0800392{
393 struct page *page;
394 struct kvm_rmap_desc *desc;
395 struct kvm_rmap_desc *prev_desc;
396 int i;
397
398 if (!is_rmap_pte(*spte))
399 return;
400 page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
Markus Rechberger5972e952007-02-19 14:37:47 +0200401 if (!page_private(page)) {
Avi Kivitycd4a4e52007-01-05 16:36:38 -0800402 printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
403 BUG();
Markus Rechberger5972e952007-02-19 14:37:47 +0200404 } else if (!(page_private(page) & 1)) {
Avi Kivitycd4a4e52007-01-05 16:36:38 -0800405 rmap_printk("rmap_remove: %p %llx 1->0\n", spte, *spte);
Markus Rechberger5972e952007-02-19 14:37:47 +0200406 if ((u64 *)page_private(page) != spte) {
Avi Kivitycd4a4e52007-01-05 16:36:38 -0800407 printk(KERN_ERR "rmap_remove: %p %llx 1->BUG\n",
408 spte, *spte);
409 BUG();
410 }
Markus Rechberger5972e952007-02-19 14:37:47 +0200411 set_page_private(page,0);
Avi Kivitycd4a4e52007-01-05 16:36:38 -0800412 } else {
413 rmap_printk("rmap_remove: %p %llx many->many\n", spte, *spte);
Markus Rechberger5972e952007-02-19 14:37:47 +0200414 desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul);
Avi Kivitycd4a4e52007-01-05 16:36:38 -0800415 prev_desc = NULL;
416 while (desc) {
417 for (i = 0; i < RMAP_EXT && desc->shadow_ptes[i]; ++i)
418 if (desc->shadow_ptes[i] == spte) {
Avi Kivity90cb0522007-07-17 13:04:56 +0300419 rmap_desc_remove_entry(page,
Avi Kivity714b93d2007-01-05 16:36:53 -0800420 desc, i,
Avi Kivitycd4a4e52007-01-05 16:36:38 -0800421 prev_desc);
422 return;
423 }
424 prev_desc = desc;
425 desc = desc->more;
426 }
427 BUG();
428 }
429}
430
Avi Kivity714b93d2007-01-05 16:36:53 -0800431static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn)
Avi Kivity374cbac2007-01-05 16:36:43 -0800432{
Avi Kivity714b93d2007-01-05 16:36:53 -0800433 struct kvm *kvm = vcpu->kvm;
Avi Kivity374cbac2007-01-05 16:36:43 -0800434 struct page *page;
Avi Kivity374cbac2007-01-05 16:36:43 -0800435 struct kvm_rmap_desc *desc;
436 u64 *spte;
437
Avi Kivity954bbbc2007-03-30 14:02:32 +0300438 page = gfn_to_page(kvm, gfn);
439 BUG_ON(!page);
Avi Kivity374cbac2007-01-05 16:36:43 -0800440
Markus Rechberger5972e952007-02-19 14:37:47 +0200441 while (page_private(page)) {
442 if (!(page_private(page) & 1))
443 spte = (u64 *)page_private(page);
Avi Kivity374cbac2007-01-05 16:36:43 -0800444 else {
Markus Rechberger5972e952007-02-19 14:37:47 +0200445 desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul);
Avi Kivity374cbac2007-01-05 16:36:43 -0800446 spte = desc->shadow_ptes[0];
447 }
448 BUG_ON(!spte);
Avi Kivity27aba762007-03-09 13:04:31 +0200449 BUG_ON((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT
450 != page_to_pfn(page));
Avi Kivity374cbac2007-01-05 16:36:43 -0800451 BUG_ON(!(*spte & PT_PRESENT_MASK));
452 BUG_ON(!(*spte & PT_WRITABLE_MASK));
453 rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte);
Avi Kivity90cb0522007-07-17 13:04:56 +0300454 rmap_remove(spte);
Avi Kivitye663ee62007-05-31 15:46:04 +0300455 set_shadow_pte(spte, *spte & ~PT_WRITABLE_MASK);
Shaohua Li88a97f02007-06-20 17:13:26 +0800456 kvm_flush_remote_tlbs(vcpu->kvm);
Avi Kivity374cbac2007-01-05 16:36:43 -0800457 }
458}
459
Yaozu Dongd6c69ee2007-04-25 14:17:25 +0800460#ifdef MMU_DEBUG
Avi Kivity47ad8e62007-05-06 15:50:58 +0300461static int is_empty_shadow_page(u64 *spt)
Avi Kivity6aa8b732006-12-10 02:21:36 -0800462{
Avi Kivity139bdb22007-01-05 16:36:50 -0800463 u64 *pos;
464 u64 *end;
465
Avi Kivity47ad8e62007-05-06 15:50:58 +0300466 for (pos = spt, end = pos + PAGE_SIZE / sizeof(u64); pos != end; pos++)
Avi Kivity139bdb22007-01-05 16:36:50 -0800467 if (*pos != 0) {
468 printk(KERN_ERR "%s: %p %llx\n", __FUNCTION__,
469 pos, *pos);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800470 return 0;
Avi Kivity139bdb22007-01-05 16:36:50 -0800471 }
Avi Kivity6aa8b732006-12-10 02:21:36 -0800472 return 1;
473}
Yaozu Dongd6c69ee2007-04-25 14:17:25 +0800474#endif
Avi Kivity6aa8b732006-12-10 02:21:36 -0800475
Avi Kivity90cb0522007-07-17 13:04:56 +0300476static void kvm_mmu_free_page(struct kvm *kvm,
Avi Kivity4b02d6d2007-05-06 15:36:30 +0300477 struct kvm_mmu_page *page_head)
Avi Kivity260746c2007-01-05 16:36:49 -0800478{
Avi Kivity47ad8e62007-05-06 15:50:58 +0300479 ASSERT(is_empty_shadow_page(page_head->spt));
Avi Kivityd3d25b02007-05-30 12:34:53 +0300480 list_del(&page_head->link);
Avi Kivityc1158e62007-07-20 08:18:27 +0300481 __free_page(virt_to_page(page_head->spt));
Avi Kivity90cb0522007-07-17 13:04:56 +0300482 kfree(page_head);
483 ++kvm->n_free_mmu_pages;
Avi Kivity260746c2007-01-05 16:36:49 -0800484}
485
Avi Kivitycea0f0e2007-01-05 16:36:43 -0800486static unsigned kvm_page_table_hashfn(gfn_t gfn)
487{
488 return gfn;
489}
490
Avi Kivity25c0de22007-01-05 16:36:42 -0800491static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
492 u64 *parent_pte)
Avi Kivity6aa8b732006-12-10 02:21:36 -0800493{
494 struct kvm_mmu_page *page;
495
Avi Kivityd3d25b02007-05-30 12:34:53 +0300496 if (!vcpu->kvm->n_free_mmu_pages)
Avi Kivity25c0de22007-01-05 16:36:42 -0800497 return NULL;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800498
Avi Kivityd3d25b02007-05-30 12:34:53 +0300499 page = mmu_memory_cache_alloc(&vcpu->mmu_page_header_cache,
500 sizeof *page);
501 page->spt = mmu_memory_cache_alloc(&vcpu->mmu_page_cache, PAGE_SIZE);
502 set_page_private(virt_to_page(page->spt), (unsigned long)page);
503 list_add(&page->link, &vcpu->kvm->active_mmu_pages);
Avi Kivity47ad8e62007-05-06 15:50:58 +0300504 ASSERT(is_empty_shadow_page(page->spt));
Avi Kivity6aa8b732006-12-10 02:21:36 -0800505 page->slot_bitmap = 0;
Avi Kivitycea0f0e2007-01-05 16:36:43 -0800506 page->multimapped = 0;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800507 page->parent_pte = parent_pte;
Avi Kivityebeace82007-01-05 16:36:47 -0800508 --vcpu->kvm->n_free_mmu_pages;
Avi Kivity25c0de22007-01-05 16:36:42 -0800509 return page;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800510}
511
Avi Kivity714b93d2007-01-05 16:36:53 -0800512static void mmu_page_add_parent_pte(struct kvm_vcpu *vcpu,
513 struct kvm_mmu_page *page, u64 *parent_pte)
Avi Kivitycea0f0e2007-01-05 16:36:43 -0800514{
515 struct kvm_pte_chain *pte_chain;
516 struct hlist_node *node;
517 int i;
518
519 if (!parent_pte)
520 return;
521 if (!page->multimapped) {
522 u64 *old = page->parent_pte;
523
524 if (!old) {
525 page->parent_pte = parent_pte;
526 return;
527 }
528 page->multimapped = 1;
Avi Kivity714b93d2007-01-05 16:36:53 -0800529 pte_chain = mmu_alloc_pte_chain(vcpu);
Avi Kivitycea0f0e2007-01-05 16:36:43 -0800530 INIT_HLIST_HEAD(&page->parent_ptes);
531 hlist_add_head(&pte_chain->link, &page->parent_ptes);
532 pte_chain->parent_ptes[0] = old;
533 }
534 hlist_for_each_entry(pte_chain, node, &page->parent_ptes, link) {
535 if (pte_chain->parent_ptes[NR_PTE_CHAIN_ENTRIES-1])
536 continue;
537 for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i)
538 if (!pte_chain->parent_ptes[i]) {
539 pte_chain->parent_ptes[i] = parent_pte;
540 return;
541 }
542 }
Avi Kivity714b93d2007-01-05 16:36:53 -0800543 pte_chain = mmu_alloc_pte_chain(vcpu);
Avi Kivitycea0f0e2007-01-05 16:36:43 -0800544 BUG_ON(!pte_chain);
545 hlist_add_head(&pte_chain->link, &page->parent_ptes);
546 pte_chain->parent_ptes[0] = parent_pte;
547}
548
Avi Kivity90cb0522007-07-17 13:04:56 +0300549static void mmu_page_remove_parent_pte(struct kvm_mmu_page *page,
Avi Kivitycea0f0e2007-01-05 16:36:43 -0800550 u64 *parent_pte)
551{
552 struct kvm_pte_chain *pte_chain;
553 struct hlist_node *node;
554 int i;
555
556 if (!page->multimapped) {
557 BUG_ON(page->parent_pte != parent_pte);
558 page->parent_pte = NULL;
559 return;
560 }
561 hlist_for_each_entry(pte_chain, node, &page->parent_ptes, link)
562 for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) {
563 if (!pte_chain->parent_ptes[i])
564 break;
565 if (pte_chain->parent_ptes[i] != parent_pte)
566 continue;
Avi Kivity697fe2e2007-01-05 16:36:46 -0800567 while (i + 1 < NR_PTE_CHAIN_ENTRIES
568 && pte_chain->parent_ptes[i + 1]) {
Avi Kivitycea0f0e2007-01-05 16:36:43 -0800569 pte_chain->parent_ptes[i]
570 = pte_chain->parent_ptes[i + 1];
571 ++i;
572 }
573 pte_chain->parent_ptes[i] = NULL;
Avi Kivity697fe2e2007-01-05 16:36:46 -0800574 if (i == 0) {
575 hlist_del(&pte_chain->link);
Avi Kivity90cb0522007-07-17 13:04:56 +0300576 mmu_free_pte_chain(pte_chain);
Avi Kivity697fe2e2007-01-05 16:36:46 -0800577 if (hlist_empty(&page->parent_ptes)) {
578 page->multimapped = 0;
579 page->parent_pte = NULL;
580 }
581 }
Avi Kivitycea0f0e2007-01-05 16:36:43 -0800582 return;
583 }
584 BUG();
585}
586
587static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm_vcpu *vcpu,
588 gfn_t gfn)
589{
590 unsigned index;
591 struct hlist_head *bucket;
592 struct kvm_mmu_page *page;
593 struct hlist_node *node;
594
595 pgprintk("%s: looking for gfn %lx\n", __FUNCTION__, gfn);
596 index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
597 bucket = &vcpu->kvm->mmu_page_hash[index];
598 hlist_for_each_entry(page, node, bucket, hash_link)
599 if (page->gfn == gfn && !page->role.metaphysical) {
600 pgprintk("%s: found role %x\n",
601 __FUNCTION__, page->role.word);
602 return page;
603 }
604 return NULL;
605}
606
607static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
608 gfn_t gfn,
609 gva_t gaddr,
610 unsigned level,
611 int metaphysical,
Avi Kivityd28c6cfb2007-03-23 09:55:25 +0200612 unsigned hugepage_access,
Avi Kivitycea0f0e2007-01-05 16:36:43 -0800613 u64 *parent_pte)
614{
615 union kvm_mmu_page_role role;
616 unsigned index;
617 unsigned quadrant;
618 struct hlist_head *bucket;
619 struct kvm_mmu_page *page;
620 struct hlist_node *node;
621
622 role.word = 0;
623 role.glevels = vcpu->mmu.root_level;
624 role.level = level;
625 role.metaphysical = metaphysical;
Avi Kivityd28c6cfb2007-03-23 09:55:25 +0200626 role.hugepage_access = hugepage_access;
Avi Kivitycea0f0e2007-01-05 16:36:43 -0800627 if (vcpu->mmu.root_level <= PT32_ROOT_LEVEL) {
628 quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level));
629 quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;
630 role.quadrant = quadrant;
631 }
632 pgprintk("%s: looking gfn %lx role %x\n", __FUNCTION__,
633 gfn, role.word);
634 index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
635 bucket = &vcpu->kvm->mmu_page_hash[index];
636 hlist_for_each_entry(page, node, bucket, hash_link)
637 if (page->gfn == gfn && page->role.word == role.word) {
Avi Kivity714b93d2007-01-05 16:36:53 -0800638 mmu_page_add_parent_pte(vcpu, page, parent_pte);
Avi Kivitycea0f0e2007-01-05 16:36:43 -0800639 pgprintk("%s: found\n", __FUNCTION__);
640 return page;
641 }
642 page = kvm_mmu_alloc_page(vcpu, parent_pte);
643 if (!page)
644 return page;
645 pgprintk("%s: adding gfn %lx role %x\n", __FUNCTION__, gfn, role.word);
646 page->gfn = gfn;
647 page->role = role;
648 hlist_add_head(&page->hash_link, bucket);
Avi Kivity374cbac2007-01-05 16:36:43 -0800649 if (!metaphysical)
Avi Kivity714b93d2007-01-05 16:36:53 -0800650 rmap_write_protect(vcpu, gfn);
Avi Kivitycea0f0e2007-01-05 16:36:43 -0800651 return page;
652}
653
Avi Kivity90cb0522007-07-17 13:04:56 +0300654static void kvm_mmu_page_unlink_children(struct kvm *kvm,
Avi Kivitya4360362007-01-05 16:36:45 -0800655 struct kvm_mmu_page *page)
656{
Avi Kivity697fe2e2007-01-05 16:36:46 -0800657 unsigned i;
658 u64 *pt;
659 u64 ent;
660
Avi Kivity47ad8e62007-05-06 15:50:58 +0300661 pt = page->spt;
Avi Kivity697fe2e2007-01-05 16:36:46 -0800662
663 if (page->role.level == PT_PAGE_TABLE_LEVEL) {
664 for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
665 if (pt[i] & PT_PRESENT_MASK)
Avi Kivity90cb0522007-07-17 13:04:56 +0300666 rmap_remove(&pt[i]);
Avi Kivity697fe2e2007-01-05 16:36:46 -0800667 pt[i] = 0;
668 }
Avi Kivity90cb0522007-07-17 13:04:56 +0300669 kvm_flush_remote_tlbs(kvm);
Avi Kivity697fe2e2007-01-05 16:36:46 -0800670 return;
671 }
672
673 for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
674 ent = pt[i];
675
676 pt[i] = 0;
677 if (!(ent & PT_PRESENT_MASK))
678 continue;
679 ent &= PT64_BASE_ADDR_MASK;
Avi Kivity90cb0522007-07-17 13:04:56 +0300680 mmu_page_remove_parent_pte(page_header(ent), &pt[i]);
Avi Kivity697fe2e2007-01-05 16:36:46 -0800681 }
Avi Kivity90cb0522007-07-17 13:04:56 +0300682 kvm_flush_remote_tlbs(kvm);
Avi Kivitya4360362007-01-05 16:36:45 -0800683}
684
Avi Kivity90cb0522007-07-17 13:04:56 +0300685static void kvm_mmu_put_page(struct kvm_mmu_page *page,
Avi Kivitycea0f0e2007-01-05 16:36:43 -0800686 u64 *parent_pte)
687{
Avi Kivity90cb0522007-07-17 13:04:56 +0300688 mmu_page_remove_parent_pte(page, parent_pte);
Avi Kivitya4360362007-01-05 16:36:45 -0800689}
690
Avi Kivity90cb0522007-07-17 13:04:56 +0300691static void kvm_mmu_zap_page(struct kvm *kvm,
Avi Kivitya4360362007-01-05 16:36:45 -0800692 struct kvm_mmu_page *page)
693{
694 u64 *parent_pte;
695
696 while (page->multimapped || page->parent_pte) {
697 if (!page->multimapped)
698 parent_pte = page->parent_pte;
699 else {
700 struct kvm_pte_chain *chain;
701
702 chain = container_of(page->parent_ptes.first,
703 struct kvm_pte_chain, link);
704 parent_pte = chain->parent_ptes[0];
705 }
Avi Kivity697fe2e2007-01-05 16:36:46 -0800706 BUG_ON(!parent_pte);
Avi Kivity90cb0522007-07-17 13:04:56 +0300707 kvm_mmu_put_page(page, parent_pte);
Avi Kivitye663ee62007-05-31 15:46:04 +0300708 set_shadow_pte(parent_pte, 0);
Avi Kivitya4360362007-01-05 16:36:45 -0800709 }
Avi Kivity90cb0522007-07-17 13:04:56 +0300710 kvm_mmu_page_unlink_children(kvm, page);
Avi Kivity3bb65a22007-01-05 16:36:51 -0800711 if (!page->root_count) {
712 hlist_del(&page->hash_link);
Avi Kivity90cb0522007-07-17 13:04:56 +0300713 kvm_mmu_free_page(kvm, page);
Avi Kivity36868f72007-03-26 19:31:52 +0200714 } else
Avi Kivity90cb0522007-07-17 13:04:56 +0300715 list_move(&page->link, &kvm->active_mmu_pages);
Avi Kivitya4360362007-01-05 16:36:45 -0800716}
717
718static int kvm_mmu_unprotect_page(struct kvm_vcpu *vcpu, gfn_t gfn)
719{
720 unsigned index;
721 struct hlist_head *bucket;
722 struct kvm_mmu_page *page;
723 struct hlist_node *node, *n;
724 int r;
725
726 pgprintk("%s: looking for gfn %lx\n", __FUNCTION__, gfn);
727 r = 0;
728 index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
729 bucket = &vcpu->kvm->mmu_page_hash[index];
730 hlist_for_each_entry_safe(page, node, n, bucket, hash_link)
731 if (page->gfn == gfn && !page->role.metaphysical) {
Avi Kivity697fe2e2007-01-05 16:36:46 -0800732 pgprintk("%s: gfn %lx role %x\n", __FUNCTION__, gfn,
733 page->role.word);
Avi Kivity90cb0522007-07-17 13:04:56 +0300734 kvm_mmu_zap_page(vcpu->kvm, page);
Avi Kivitya4360362007-01-05 16:36:45 -0800735 r = 1;
736 }
737 return r;
Avi Kivitycea0f0e2007-01-05 16:36:43 -0800738}
739
Avi Kivity97a0a012007-05-31 15:08:29 +0300740static void mmu_unshadow(struct kvm_vcpu *vcpu, gfn_t gfn)
741{
742 struct kvm_mmu_page *page;
743
744 while ((page = kvm_mmu_lookup_page(vcpu, gfn)) != NULL) {
745 pgprintk("%s: zap %lx %x\n",
746 __FUNCTION__, gfn, page->role.word);
Avi Kivity90cb0522007-07-17 13:04:56 +0300747 kvm_mmu_zap_page(vcpu->kvm, page);
Avi Kivity97a0a012007-05-31 15:08:29 +0300748 }
749}
750
Avi Kivity6aa8b732006-12-10 02:21:36 -0800751static void page_header_update_slot(struct kvm *kvm, void *pte, gpa_t gpa)
752{
753 int slot = memslot_id(kvm, gfn_to_memslot(kvm, gpa >> PAGE_SHIFT));
754 struct kvm_mmu_page *page_head = page_header(__pa(pte));
755
756 __set_bit(slot, &page_head->slot_bitmap);
757}
758
759hpa_t safe_gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa)
760{
761 hpa_t hpa = gpa_to_hpa(vcpu, gpa);
762
763 return is_error_hpa(hpa) ? bad_page_address | (gpa & ~PAGE_MASK): hpa;
764}
765
766hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa)
767{
Avi Kivity6aa8b732006-12-10 02:21:36 -0800768 struct page *page;
769
770 ASSERT((gpa & HPA_ERR_MASK) == 0);
Avi Kivity954bbbc2007-03-30 14:02:32 +0300771 page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
772 if (!page)
Avi Kivity6aa8b732006-12-10 02:21:36 -0800773 return gpa | HPA_ERR_MASK;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800774 return ((hpa_t)page_to_pfn(page) << PAGE_SHIFT)
775 | (gpa & (PAGE_SIZE-1));
776}
777
778hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva)
779{
780 gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, gva);
781
782 if (gpa == UNMAPPED_GVA)
783 return UNMAPPED_GVA;
784 return gpa_to_hpa(vcpu, gpa);
785}
786
Avi Kivity039576c2007-03-20 12:46:50 +0200787struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva)
788{
789 gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, gva);
790
791 if (gpa == UNMAPPED_GVA)
792 return NULL;
793 return pfn_to_page(gpa_to_hpa(vcpu, gpa) >> PAGE_SHIFT);
794}
795
Avi Kivity6aa8b732006-12-10 02:21:36 -0800796static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
797{
798}
799
800static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p)
801{
802 int level = PT32E_ROOT_LEVEL;
803 hpa_t table_addr = vcpu->mmu.root_hpa;
804
805 for (; ; level--) {
806 u32 index = PT64_INDEX(v, level);
807 u64 *table;
Avi Kivitycea0f0e2007-01-05 16:36:43 -0800808 u64 pte;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800809
810 ASSERT(VALID_PAGE(table_addr));
811 table = __va(table_addr);
812
813 if (level == 1) {
Avi Kivitycea0f0e2007-01-05 16:36:43 -0800814 pte = table[index];
815 if (is_present_pte(pte) && is_writeble_pte(pte))
816 return 0;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800817 mark_page_dirty(vcpu->kvm, v >> PAGE_SHIFT);
818 page_header_update_slot(vcpu->kvm, table, v);
819 table[index] = p | PT_PRESENT_MASK | PT_WRITABLE_MASK |
820 PT_USER_MASK;
Avi Kivity714b93d2007-01-05 16:36:53 -0800821 rmap_add(vcpu, &table[index]);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800822 return 0;
823 }
824
825 if (table[index] == 0) {
Avi Kivity25c0de22007-01-05 16:36:42 -0800826 struct kvm_mmu_page *new_table;
Avi Kivitycea0f0e2007-01-05 16:36:43 -0800827 gfn_t pseudo_gfn;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800828
Avi Kivitycea0f0e2007-01-05 16:36:43 -0800829 pseudo_gfn = (v & PT64_DIR_BASE_ADDR_MASK)
830 >> PAGE_SHIFT;
831 new_table = kvm_mmu_get_page(vcpu, pseudo_gfn,
832 v, level - 1,
Avi Kivityd28c6cfb2007-03-23 09:55:25 +0200833 1, 0, &table[index]);
Avi Kivity25c0de22007-01-05 16:36:42 -0800834 if (!new_table) {
Avi Kivity6aa8b732006-12-10 02:21:36 -0800835 pgprintk("nonpaging_map: ENOMEM\n");
836 return -ENOMEM;
837 }
838
Avi Kivity47ad8e62007-05-06 15:50:58 +0300839 table[index] = __pa(new_table->spt) | PT_PRESENT_MASK
Avi Kivity25c0de22007-01-05 16:36:42 -0800840 | PT_WRITABLE_MASK | PT_USER_MASK;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800841 }
842 table_addr = table[index] & PT64_BASE_ADDR_MASK;
843 }
844}
845
Avi Kivity17ac10a2007-01-05 16:36:40 -0800846static void mmu_free_roots(struct kvm_vcpu *vcpu)
847{
848 int i;
Avi Kivity3bb65a22007-01-05 16:36:51 -0800849 struct kvm_mmu_page *page;
Avi Kivity17ac10a2007-01-05 16:36:40 -0800850
Avi Kivity7b53aa52007-06-05 12:17:03 +0300851 if (!VALID_PAGE(vcpu->mmu.root_hpa))
852 return;
Avi Kivity17ac10a2007-01-05 16:36:40 -0800853#ifdef CONFIG_X86_64
854 if (vcpu->mmu.shadow_root_level == PT64_ROOT_LEVEL) {
855 hpa_t root = vcpu->mmu.root_hpa;
856
Avi Kivity3bb65a22007-01-05 16:36:51 -0800857 page = page_header(root);
858 --page->root_count;
Avi Kivity17ac10a2007-01-05 16:36:40 -0800859 vcpu->mmu.root_hpa = INVALID_PAGE;
860 return;
861 }
862#endif
863 for (i = 0; i < 4; ++i) {
864 hpa_t root = vcpu->mmu.pae_root[i];
865
Avi Kivity417726a2007-04-12 17:35:58 +0300866 if (root) {
Avi Kivity417726a2007-04-12 17:35:58 +0300867 root &= PT64_BASE_ADDR_MASK;
868 page = page_header(root);
869 --page->root_count;
870 }
Avi Kivity17ac10a2007-01-05 16:36:40 -0800871 vcpu->mmu.pae_root[i] = INVALID_PAGE;
872 }
873 vcpu->mmu.root_hpa = INVALID_PAGE;
874}
875
876static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
877{
878 int i;
Avi Kivitycea0f0e2007-01-05 16:36:43 -0800879 gfn_t root_gfn;
Avi Kivity3bb65a22007-01-05 16:36:51 -0800880 struct kvm_mmu_page *page;
881
Avi Kivitycea0f0e2007-01-05 16:36:43 -0800882 root_gfn = vcpu->cr3 >> PAGE_SHIFT;
Avi Kivity17ac10a2007-01-05 16:36:40 -0800883
884#ifdef CONFIG_X86_64
885 if (vcpu->mmu.shadow_root_level == PT64_ROOT_LEVEL) {
886 hpa_t root = vcpu->mmu.root_hpa;
887
888 ASSERT(!VALID_PAGE(root));
Ingo Molnar68a99f62007-01-05 16:36:59 -0800889 page = kvm_mmu_get_page(vcpu, root_gfn, 0,
Avi Kivityd28c6cfb2007-03-23 09:55:25 +0200890 PT64_ROOT_LEVEL, 0, 0, NULL);
Avi Kivity47ad8e62007-05-06 15:50:58 +0300891 root = __pa(page->spt);
Avi Kivity3bb65a22007-01-05 16:36:51 -0800892 ++page->root_count;
Avi Kivity17ac10a2007-01-05 16:36:40 -0800893 vcpu->mmu.root_hpa = root;
894 return;
895 }
896#endif
897 for (i = 0; i < 4; ++i) {
898 hpa_t root = vcpu->mmu.pae_root[i];
899
900 ASSERT(!VALID_PAGE(root));
Avi Kivity417726a2007-04-12 17:35:58 +0300901 if (vcpu->mmu.root_level == PT32E_ROOT_LEVEL) {
902 if (!is_present_pte(vcpu->pdptrs[i])) {
903 vcpu->mmu.pae_root[i] = 0;
904 continue;
905 }
Avi Kivitycea0f0e2007-01-05 16:36:43 -0800906 root_gfn = vcpu->pdptrs[i] >> PAGE_SHIFT;
Avi Kivity417726a2007-04-12 17:35:58 +0300907 } else if (vcpu->mmu.root_level == 0)
Avi Kivitycea0f0e2007-01-05 16:36:43 -0800908 root_gfn = 0;
Ingo Molnar68a99f62007-01-05 16:36:59 -0800909 page = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
Avi Kivitycea0f0e2007-01-05 16:36:43 -0800910 PT32_ROOT_LEVEL, !is_paging(vcpu),
Avi Kivityd28c6cfb2007-03-23 09:55:25 +0200911 0, NULL);
Avi Kivity47ad8e62007-05-06 15:50:58 +0300912 root = __pa(page->spt);
Avi Kivity3bb65a22007-01-05 16:36:51 -0800913 ++page->root_count;
Avi Kivity17ac10a2007-01-05 16:36:40 -0800914 vcpu->mmu.pae_root[i] = root | PT_PRESENT_MASK;
915 }
916 vcpu->mmu.root_hpa = __pa(vcpu->mmu.pae_root);
917}
918
Avi Kivity6aa8b732006-12-10 02:21:36 -0800919static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr)
920{
921 return vaddr;
922}
923
924static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
925 u32 error_code)
926{
Avi Kivity6aa8b732006-12-10 02:21:36 -0800927 gpa_t addr = gva;
Avi Kivityebeace82007-01-05 16:36:47 -0800928 hpa_t paddr;
Avi Kivitye2dec932007-01-05 16:36:54 -0800929 int r;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800930
Avi Kivitye2dec932007-01-05 16:36:54 -0800931 r = mmu_topup_memory_caches(vcpu);
932 if (r)
933 return r;
Avi Kivity714b93d2007-01-05 16:36:53 -0800934
Avi Kivity6aa8b732006-12-10 02:21:36 -0800935 ASSERT(vcpu);
936 ASSERT(VALID_PAGE(vcpu->mmu.root_hpa));
937
Avi Kivity6aa8b732006-12-10 02:21:36 -0800938
Avi Kivityebeace82007-01-05 16:36:47 -0800939 paddr = gpa_to_hpa(vcpu , addr & PT64_BASE_ADDR_MASK);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800940
Avi Kivityebeace82007-01-05 16:36:47 -0800941 if (is_error_hpa(paddr))
942 return 1;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800943
Avi Kivityebeace82007-01-05 16:36:47 -0800944 return nonpaging_map(vcpu, addr & PAGE_MASK, paddr);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800945}
946
Avi Kivity6aa8b732006-12-10 02:21:36 -0800947static void nonpaging_free(struct kvm_vcpu *vcpu)
948{
Avi Kivity17ac10a2007-01-05 16:36:40 -0800949 mmu_free_roots(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800950}
951
952static int nonpaging_init_context(struct kvm_vcpu *vcpu)
953{
954 struct kvm_mmu *context = &vcpu->mmu;
955
956 context->new_cr3 = nonpaging_new_cr3;
957 context->page_fault = nonpaging_page_fault;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800958 context->gva_to_gpa = nonpaging_gva_to_gpa;
959 context->free = nonpaging_free;
Avi Kivitycea0f0e2007-01-05 16:36:43 -0800960 context->root_level = 0;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800961 context->shadow_root_level = PT32E_ROOT_LEVEL;
Avi Kivity17c3ba92007-06-04 15:58:30 +0300962 context->root_hpa = INVALID_PAGE;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800963 return 0;
964}
965
Avi Kivity6aa8b732006-12-10 02:21:36 -0800966static void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
967{
Avi Kivity1165f5f2007-04-19 17:27:43 +0300968 ++vcpu->stat.tlb_flush;
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +0300969 kvm_x86_ops->tlb_flush(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800970}
971
972static void paging_new_cr3(struct kvm_vcpu *vcpu)
973{
Avi Kivity374cbac2007-01-05 16:36:43 -0800974 pgprintk("%s: cr3 %lx\n", __FUNCTION__, vcpu->cr3);
Avi Kivitycea0f0e2007-01-05 16:36:43 -0800975 mmu_free_roots(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800976}
977
Avi Kivity6aa8b732006-12-10 02:21:36 -0800978static void inject_page_fault(struct kvm_vcpu *vcpu,
979 u64 addr,
980 u32 err_code)
981{
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +0300982 kvm_x86_ops->inject_page_fault(vcpu, addr, err_code);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800983}
984
Avi Kivity6aa8b732006-12-10 02:21:36 -0800985static void paging_free(struct kvm_vcpu *vcpu)
986{
987 nonpaging_free(vcpu);
988}
989
990#define PTTYPE 64
991#include "paging_tmpl.h"
992#undef PTTYPE
993
994#define PTTYPE 32
995#include "paging_tmpl.h"
996#undef PTTYPE
997
Avi Kivity17ac10a2007-01-05 16:36:40 -0800998static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level)
Avi Kivity6aa8b732006-12-10 02:21:36 -0800999{
1000 struct kvm_mmu *context = &vcpu->mmu;
1001
1002 ASSERT(is_pae(vcpu));
1003 context->new_cr3 = paging_new_cr3;
1004 context->page_fault = paging64_page_fault;
Avi Kivity6aa8b732006-12-10 02:21:36 -08001005 context->gva_to_gpa = paging64_gva_to_gpa;
1006 context->free = paging_free;
Avi Kivity17ac10a2007-01-05 16:36:40 -08001007 context->root_level = level;
1008 context->shadow_root_level = level;
Avi Kivity17c3ba92007-06-04 15:58:30 +03001009 context->root_hpa = INVALID_PAGE;
Avi Kivity6aa8b732006-12-10 02:21:36 -08001010 return 0;
1011}
1012
Avi Kivity17ac10a2007-01-05 16:36:40 -08001013static int paging64_init_context(struct kvm_vcpu *vcpu)
1014{
1015 return paging64_init_context_common(vcpu, PT64_ROOT_LEVEL);
1016}
1017
Avi Kivity6aa8b732006-12-10 02:21:36 -08001018static int paging32_init_context(struct kvm_vcpu *vcpu)
1019{
1020 struct kvm_mmu *context = &vcpu->mmu;
1021
1022 context->new_cr3 = paging_new_cr3;
1023 context->page_fault = paging32_page_fault;
Avi Kivity6aa8b732006-12-10 02:21:36 -08001024 context->gva_to_gpa = paging32_gva_to_gpa;
1025 context->free = paging_free;
1026 context->root_level = PT32_ROOT_LEVEL;
1027 context->shadow_root_level = PT32E_ROOT_LEVEL;
Avi Kivity17c3ba92007-06-04 15:58:30 +03001028 context->root_hpa = INVALID_PAGE;
Avi Kivity6aa8b732006-12-10 02:21:36 -08001029 return 0;
1030}
1031
1032static int paging32E_init_context(struct kvm_vcpu *vcpu)
1033{
Avi Kivity17ac10a2007-01-05 16:36:40 -08001034 return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001035}
1036
1037static int init_kvm_mmu(struct kvm_vcpu *vcpu)
1038{
1039 ASSERT(vcpu);
1040 ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa));
1041
1042 if (!is_paging(vcpu))
1043 return nonpaging_init_context(vcpu);
Avi Kivitya9058ec2006-12-29 16:49:37 -08001044 else if (is_long_mode(vcpu))
Avi Kivity6aa8b732006-12-10 02:21:36 -08001045 return paging64_init_context(vcpu);
1046 else if (is_pae(vcpu))
1047 return paging32E_init_context(vcpu);
1048 else
1049 return paging32_init_context(vcpu);
1050}
1051
1052static void destroy_kvm_mmu(struct kvm_vcpu *vcpu)
1053{
1054 ASSERT(vcpu);
1055 if (VALID_PAGE(vcpu->mmu.root_hpa)) {
1056 vcpu->mmu.free(vcpu);
1057 vcpu->mmu.root_hpa = INVALID_PAGE;
1058 }
1059}
1060
1061int kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
1062{
Avi Kivity17c3ba92007-06-04 15:58:30 +03001063 destroy_kvm_mmu(vcpu);
1064 return init_kvm_mmu(vcpu);
1065}
1066
1067int kvm_mmu_load(struct kvm_vcpu *vcpu)
1068{
Avi Kivity714b93d2007-01-05 16:36:53 -08001069 int r;
1070
Shaohua Li11ec2802007-07-23 14:51:37 +08001071 mutex_lock(&vcpu->kvm->lock);
Avi Kivitye2dec932007-01-05 16:36:54 -08001072 r = mmu_topup_memory_caches(vcpu);
Avi Kivity17c3ba92007-06-04 15:58:30 +03001073 if (r)
1074 goto out;
1075 mmu_alloc_roots(vcpu);
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +03001076 kvm_x86_ops->set_cr3(vcpu, vcpu->mmu.root_hpa);
Avi Kivity17c3ba92007-06-04 15:58:30 +03001077 kvm_mmu_flush_tlb(vcpu);
Avi Kivity714b93d2007-01-05 16:36:53 -08001078out:
Shaohua Li11ec2802007-07-23 14:51:37 +08001079 mutex_unlock(&vcpu->kvm->lock);
Avi Kivity714b93d2007-01-05 16:36:53 -08001080 return r;
Avi Kivity6aa8b732006-12-10 02:21:36 -08001081}
Avi Kivity17c3ba92007-06-04 15:58:30 +03001082EXPORT_SYMBOL_GPL(kvm_mmu_load);
1083
1084void kvm_mmu_unload(struct kvm_vcpu *vcpu)
1085{
1086 mmu_free_roots(vcpu);
1087}
Avi Kivity6aa8b732006-12-10 02:21:36 -08001088
Avi Kivity09072da2007-05-01 14:16:52 +03001089static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu,
Avi Kivityac1b7142007-03-08 17:13:32 +02001090 struct kvm_mmu_page *page,
1091 u64 *spte)
1092{
1093 u64 pte;
1094 struct kvm_mmu_page *child;
1095
1096 pte = *spte;
1097 if (is_present_pte(pte)) {
1098 if (page->role.level == PT_PAGE_TABLE_LEVEL)
Avi Kivity90cb0522007-07-17 13:04:56 +03001099 rmap_remove(spte);
Avi Kivityac1b7142007-03-08 17:13:32 +02001100 else {
1101 child = page_header(pte & PT64_BASE_ADDR_MASK);
Avi Kivity90cb0522007-07-17 13:04:56 +03001102 mmu_page_remove_parent_pte(child, spte);
Avi Kivityac1b7142007-03-08 17:13:32 +02001103 }
1104 }
1105 *spte = 0;
Avi Kivityd9e368d2007-06-07 19:18:30 +03001106 kvm_flush_remote_tlbs(vcpu->kvm);
Avi Kivityac1b7142007-03-08 17:13:32 +02001107}
1108
Avi Kivity00284252007-05-01 16:53:31 +03001109static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
1110 struct kvm_mmu_page *page,
1111 u64 *spte,
1112 const void *new, int bytes)
1113{
1114 if (page->role.level != PT_PAGE_TABLE_LEVEL)
1115 return;
1116
1117 if (page->role.glevels == PT32_ROOT_LEVEL)
1118 paging32_update_pte(vcpu, page, spte, new, bytes);
1119 else
1120 paging64_update_pte(vcpu, page, spte, new, bytes);
1121}
1122
Avi Kivity09072da2007-05-01 14:16:52 +03001123void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
Shaohua Life551882007-07-23 14:51:39 +08001124 const u8 *new, int bytes)
Avi Kivityda4a00f2007-01-05 16:36:44 -08001125{
Avi Kivity9b7a0322007-01-05 16:36:45 -08001126 gfn_t gfn = gpa >> PAGE_SHIFT;
1127 struct kvm_mmu_page *page;
Avi Kivity0e7bc4b2007-01-05 16:36:48 -08001128 struct hlist_node *node, *n;
Avi Kivity9b7a0322007-01-05 16:36:45 -08001129 struct hlist_head *bucket;
1130 unsigned index;
1131 u64 *spte;
Avi Kivity9b7a0322007-01-05 16:36:45 -08001132 unsigned offset = offset_in_page(gpa);
Avi Kivity0e7bc4b2007-01-05 16:36:48 -08001133 unsigned pte_size;
Avi Kivity9b7a0322007-01-05 16:36:45 -08001134 unsigned page_offset;
Avi Kivity0e7bc4b2007-01-05 16:36:48 -08001135 unsigned misaligned;
Avi Kivityfce06572007-05-01 16:44:05 +03001136 unsigned quadrant;
Avi Kivity9b7a0322007-01-05 16:36:45 -08001137 int level;
Avi Kivity86a5ba02007-01-05 16:36:50 -08001138 int flooded = 0;
Avi Kivityac1b7142007-03-08 17:13:32 +02001139 int npte;
Avi Kivity9b7a0322007-01-05 16:36:45 -08001140
Avi Kivityda4a00f2007-01-05 16:36:44 -08001141 pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes);
Avi Kivity86a5ba02007-01-05 16:36:50 -08001142 if (gfn == vcpu->last_pt_write_gfn) {
1143 ++vcpu->last_pt_write_count;
1144 if (vcpu->last_pt_write_count >= 3)
1145 flooded = 1;
1146 } else {
1147 vcpu->last_pt_write_gfn = gfn;
1148 vcpu->last_pt_write_count = 1;
1149 }
Avi Kivity9b7a0322007-01-05 16:36:45 -08001150 index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
1151 bucket = &vcpu->kvm->mmu_page_hash[index];
Avi Kivity0e7bc4b2007-01-05 16:36:48 -08001152 hlist_for_each_entry_safe(page, node, n, bucket, hash_link) {
Avi Kivity9b7a0322007-01-05 16:36:45 -08001153 if (page->gfn != gfn || page->role.metaphysical)
1154 continue;
Avi Kivity0e7bc4b2007-01-05 16:36:48 -08001155 pte_size = page->role.glevels == PT32_ROOT_LEVEL ? 4 : 8;
1156 misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1);
Avi Kivitye925c5b2007-04-30 14:47:02 +03001157 misaligned |= bytes < 4;
Avi Kivity86a5ba02007-01-05 16:36:50 -08001158 if (misaligned || flooded) {
Avi Kivity0e7bc4b2007-01-05 16:36:48 -08001159 /*
1160 * Misaligned accesses are too much trouble to fix
1161 * up; also, they usually indicate a page is not used
1162 * as a page table.
Avi Kivity86a5ba02007-01-05 16:36:50 -08001163 *
1164 * If we're seeing too many writes to a page,
1165 * it may no longer be a page table, or we may be
1166 * forking, in which case it is better to unmap the
1167 * page.
Avi Kivity0e7bc4b2007-01-05 16:36:48 -08001168 */
1169 pgprintk("misaligned: gpa %llx bytes %d role %x\n",
1170 gpa, bytes, page->role.word);
Avi Kivity90cb0522007-07-17 13:04:56 +03001171 kvm_mmu_zap_page(vcpu->kvm, page);
Avi Kivity0e7bc4b2007-01-05 16:36:48 -08001172 continue;
1173 }
Avi Kivity9b7a0322007-01-05 16:36:45 -08001174 page_offset = offset;
1175 level = page->role.level;
Avi Kivityac1b7142007-03-08 17:13:32 +02001176 npte = 1;
Avi Kivity9b7a0322007-01-05 16:36:45 -08001177 if (page->role.glevels == PT32_ROOT_LEVEL) {
Avi Kivityac1b7142007-03-08 17:13:32 +02001178 page_offset <<= 1; /* 32->64 */
1179 /*
1180 * A 32-bit pde maps 4MB while the shadow pdes map
1181 * only 2MB. So we need to double the offset again
1182 * and zap two pdes instead of one.
1183 */
1184 if (level == PT32_ROOT_LEVEL) {
Avi Kivity6b8d0f92007-04-18 11:18:18 +03001185 page_offset &= ~7; /* kill rounding error */
Avi Kivityac1b7142007-03-08 17:13:32 +02001186 page_offset <<= 1;
1187 npte = 2;
1188 }
Avi Kivityfce06572007-05-01 16:44:05 +03001189 quadrant = page_offset >> PAGE_SHIFT;
Avi Kivity9b7a0322007-01-05 16:36:45 -08001190 page_offset &= ~PAGE_MASK;
Avi Kivityfce06572007-05-01 16:44:05 +03001191 if (quadrant != page->role.quadrant)
1192 continue;
Avi Kivity9b7a0322007-01-05 16:36:45 -08001193 }
Avi Kivity47ad8e62007-05-06 15:50:58 +03001194 spte = &page->spt[page_offset / sizeof(*spte)];
Avi Kivityac1b7142007-03-08 17:13:32 +02001195 while (npte--) {
Avi Kivity09072da2007-05-01 14:16:52 +03001196 mmu_pte_write_zap_pte(vcpu, page, spte);
Avi Kivity00284252007-05-01 16:53:31 +03001197 mmu_pte_write_new_pte(vcpu, page, spte, new, bytes);
Avi Kivityac1b7142007-03-08 17:13:32 +02001198 ++spte;
Avi Kivity9b7a0322007-01-05 16:36:45 -08001199 }
Avi Kivity9b7a0322007-01-05 16:36:45 -08001200 }
Avi Kivityda4a00f2007-01-05 16:36:44 -08001201}
1202
Avi Kivitya4360362007-01-05 16:36:45 -08001203int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
1204{
1205 gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, gva);
1206
1207 return kvm_mmu_unprotect_page(vcpu, gpa >> PAGE_SHIFT);
1208}
1209
Avi Kivity22d95b12007-09-14 20:26:06 +03001210void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
Avi Kivityebeace82007-01-05 16:36:47 -08001211{
1212 while (vcpu->kvm->n_free_mmu_pages < KVM_REFILL_PAGES) {
1213 struct kvm_mmu_page *page;
1214
1215 page = container_of(vcpu->kvm->active_mmu_pages.prev,
1216 struct kvm_mmu_page, link);
Avi Kivity90cb0522007-07-17 13:04:56 +03001217 kvm_mmu_zap_page(vcpu->kvm, page);
Avi Kivityebeace82007-01-05 16:36:47 -08001218 }
1219}
Avi Kivityebeace82007-01-05 16:36:47 -08001220
Avi Kivity6aa8b732006-12-10 02:21:36 -08001221static void free_mmu_pages(struct kvm_vcpu *vcpu)
1222{
Avi Kivityf51234c2007-01-05 16:36:52 -08001223 struct kvm_mmu_page *page;
Avi Kivity6aa8b732006-12-10 02:21:36 -08001224
Avi Kivityf51234c2007-01-05 16:36:52 -08001225 while (!list_empty(&vcpu->kvm->active_mmu_pages)) {
1226 page = container_of(vcpu->kvm->active_mmu_pages.next,
1227 struct kvm_mmu_page, link);
Avi Kivity90cb0522007-07-17 13:04:56 +03001228 kvm_mmu_zap_page(vcpu->kvm, page);
Avi Kivityf51234c2007-01-05 16:36:52 -08001229 }
Avi Kivity17ac10a2007-01-05 16:36:40 -08001230 free_page((unsigned long)vcpu->mmu.pae_root);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001231}
1232
1233static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
1234{
Avi Kivity17ac10a2007-01-05 16:36:40 -08001235 struct page *page;
Avi Kivity6aa8b732006-12-10 02:21:36 -08001236 int i;
1237
1238 ASSERT(vcpu);
1239
Avi Kivityd3d25b02007-05-30 12:34:53 +03001240 vcpu->kvm->n_free_mmu_pages = KVM_NUM_MMU_PAGES;
Avi Kivity17ac10a2007-01-05 16:36:40 -08001241
1242 /*
1243 * When emulating 32-bit mode, cr3 is only 32 bits even on x86_64.
1244 * Therefore we need to allocate shadow page tables in the first
1245 * 4GB of memory, which happens to fit the DMA32 zone.
1246 */
1247 page = alloc_page(GFP_KERNEL | __GFP_DMA32);
1248 if (!page)
1249 goto error_1;
1250 vcpu->mmu.pae_root = page_address(page);
1251 for (i = 0; i < 4; ++i)
1252 vcpu->mmu.pae_root[i] = INVALID_PAGE;
1253
Avi Kivity6aa8b732006-12-10 02:21:36 -08001254 return 0;
1255
1256error_1:
1257 free_mmu_pages(vcpu);
1258 return -ENOMEM;
1259}
1260
Ingo Molnar8018c272006-12-29 16:50:01 -08001261int kvm_mmu_create(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08001262{
Avi Kivity6aa8b732006-12-10 02:21:36 -08001263 ASSERT(vcpu);
1264 ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa));
Avi Kivity6aa8b732006-12-10 02:21:36 -08001265
Ingo Molnar8018c272006-12-29 16:50:01 -08001266 return alloc_mmu_pages(vcpu);
1267}
Avi Kivity6aa8b732006-12-10 02:21:36 -08001268
Ingo Molnar8018c272006-12-29 16:50:01 -08001269int kvm_mmu_setup(struct kvm_vcpu *vcpu)
1270{
1271 ASSERT(vcpu);
1272 ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa));
Avi Kivity2c264952006-12-22 01:05:28 -08001273
Ingo Molnar8018c272006-12-29 16:50:01 -08001274 return init_kvm_mmu(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001275}
1276
1277void kvm_mmu_destroy(struct kvm_vcpu *vcpu)
1278{
1279 ASSERT(vcpu);
1280
1281 destroy_kvm_mmu(vcpu);
1282 free_mmu_pages(vcpu);
Avi Kivity714b93d2007-01-05 16:36:53 -08001283 mmu_free_memory_caches(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001284}
1285
Avi Kivity90cb0522007-07-17 13:04:56 +03001286void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
Avi Kivity6aa8b732006-12-10 02:21:36 -08001287{
1288 struct kvm_mmu_page *page;
1289
1290 list_for_each_entry(page, &kvm->active_mmu_pages, link) {
1291 int i;
1292 u64 *pt;
1293
1294 if (!test_bit(slot, &page->slot_bitmap))
1295 continue;
1296
Avi Kivity47ad8e62007-05-06 15:50:58 +03001297 pt = page->spt;
Avi Kivity6aa8b732006-12-10 02:21:36 -08001298 for (i = 0; i < PT64_ENT_PER_PAGE; ++i)
1299 /* avoid RMW */
Avi Kivitycd4a4e52007-01-05 16:36:38 -08001300 if (pt[i] & PT_WRITABLE_MASK) {
Avi Kivity90cb0522007-07-17 13:04:56 +03001301 rmap_remove(&pt[i]);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001302 pt[i] &= ~PT_WRITABLE_MASK;
Avi Kivitycd4a4e52007-01-05 16:36:38 -08001303 }
Avi Kivity6aa8b732006-12-10 02:21:36 -08001304 }
1305}
Avi Kivity37a7d8b2007-01-05 16:36:56 -08001306
Avi Kivity90cb0522007-07-17 13:04:56 +03001307void kvm_mmu_zap_all(struct kvm *kvm)
Dor Laore0fa8262007-03-30 13:06:33 +03001308{
Avi Kivity90cb0522007-07-17 13:04:56 +03001309 struct kvm_mmu_page *page, *node;
Dor Laore0fa8262007-03-30 13:06:33 +03001310
Avi Kivity90cb0522007-07-17 13:04:56 +03001311 list_for_each_entry_safe(page, node, &kvm->active_mmu_pages, link)
1312 kvm_mmu_zap_page(kvm, page);
Dor Laore0fa8262007-03-30 13:06:33 +03001313
Avi Kivity90cb0522007-07-17 13:04:56 +03001314 kvm_flush_remote_tlbs(kvm);
Dor Laore0fa8262007-03-30 13:06:33 +03001315}
1316
Avi Kivityb5a33a72007-04-15 16:31:09 +03001317void kvm_mmu_module_exit(void)
1318{
1319 if (pte_chain_cache)
1320 kmem_cache_destroy(pte_chain_cache);
1321 if (rmap_desc_cache)
1322 kmem_cache_destroy(rmap_desc_cache);
Avi Kivityd3d25b02007-05-30 12:34:53 +03001323 if (mmu_page_header_cache)
1324 kmem_cache_destroy(mmu_page_header_cache);
Avi Kivityb5a33a72007-04-15 16:31:09 +03001325}
1326
1327int kvm_mmu_module_init(void)
1328{
1329 pte_chain_cache = kmem_cache_create("kvm_pte_chain",
1330 sizeof(struct kvm_pte_chain),
Paul Mundt20c2df82007-07-20 10:11:58 +09001331 0, 0, NULL);
Avi Kivityb5a33a72007-04-15 16:31:09 +03001332 if (!pte_chain_cache)
1333 goto nomem;
1334 rmap_desc_cache = kmem_cache_create("kvm_rmap_desc",
1335 sizeof(struct kvm_rmap_desc),
Paul Mundt20c2df82007-07-20 10:11:58 +09001336 0, 0, NULL);
Avi Kivityb5a33a72007-04-15 16:31:09 +03001337 if (!rmap_desc_cache)
1338 goto nomem;
1339
Avi Kivityd3d25b02007-05-30 12:34:53 +03001340 mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header",
1341 sizeof(struct kvm_mmu_page),
Paul Mundt20c2df82007-07-20 10:11:58 +09001342 0, 0, NULL);
Avi Kivityd3d25b02007-05-30 12:34:53 +03001343 if (!mmu_page_header_cache)
1344 goto nomem;
1345
Avi Kivityb5a33a72007-04-15 16:31:09 +03001346 return 0;
1347
1348nomem:
1349 kvm_mmu_module_exit();
1350 return -ENOMEM;
1351}
1352
Avi Kivity37a7d8b2007-01-05 16:36:56 -08001353#ifdef AUDIT
1354
1355static const char *audit_msg;
1356
1357static gva_t canonicalize(gva_t gva)
1358{
1359#ifdef CONFIG_X86_64
1360 gva = (long long)(gva << 16) >> 16;
1361#endif
1362 return gva;
1363}
1364
1365static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
1366 gva_t va, int level)
1367{
1368 u64 *pt = __va(page_pte & PT64_BASE_ADDR_MASK);
1369 int i;
1370 gva_t va_delta = 1ul << (PAGE_SHIFT + 9 * (level - 1));
1371
1372 for (i = 0; i < PT64_ENT_PER_PAGE; ++i, va += va_delta) {
1373 u64 ent = pt[i];
1374
Adrian Bunk28076962007-04-28 21:20:48 +02001375 if (!(ent & PT_PRESENT_MASK))
Avi Kivity37a7d8b2007-01-05 16:36:56 -08001376 continue;
1377
1378 va = canonicalize(va);
1379 if (level > 1)
1380 audit_mappings_page(vcpu, ent, va, level - 1);
1381 else {
1382 gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, va);
1383 hpa_t hpa = gpa_to_hpa(vcpu, gpa);
1384
1385 if ((ent & PT_PRESENT_MASK)
1386 && (ent & PT64_BASE_ADDR_MASK) != hpa)
1387 printk(KERN_ERR "audit error: (%s) levels %d"
1388 " gva %lx gpa %llx hpa %llx ent %llx\n",
1389 audit_msg, vcpu->mmu.root_level,
1390 va, gpa, hpa, ent);
1391 }
1392 }
1393}
1394
1395static void audit_mappings(struct kvm_vcpu *vcpu)
1396{
Avi Kivity1ea252a2007-03-08 11:48:09 +02001397 unsigned i;
Avi Kivity37a7d8b2007-01-05 16:36:56 -08001398
1399 if (vcpu->mmu.root_level == 4)
1400 audit_mappings_page(vcpu, vcpu->mmu.root_hpa, 0, 4);
1401 else
1402 for (i = 0; i < 4; ++i)
1403 if (vcpu->mmu.pae_root[i] & PT_PRESENT_MASK)
1404 audit_mappings_page(vcpu,
1405 vcpu->mmu.pae_root[i],
1406 i << 30,
1407 2);
1408}
1409
1410static int count_rmaps(struct kvm_vcpu *vcpu)
1411{
1412 int nmaps = 0;
1413 int i, j, k;
1414
1415 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
1416 struct kvm_memory_slot *m = &vcpu->kvm->memslots[i];
1417 struct kvm_rmap_desc *d;
1418
1419 for (j = 0; j < m->npages; ++j) {
1420 struct page *page = m->phys_mem[j];
1421
1422 if (!page->private)
1423 continue;
1424 if (!(page->private & 1)) {
1425 ++nmaps;
1426 continue;
1427 }
1428 d = (struct kvm_rmap_desc *)(page->private & ~1ul);
1429 while (d) {
1430 for (k = 0; k < RMAP_EXT; ++k)
1431 if (d->shadow_ptes[k])
1432 ++nmaps;
1433 else
1434 break;
1435 d = d->more;
1436 }
1437 }
1438 }
1439 return nmaps;
1440}
1441
1442static int count_writable_mappings(struct kvm_vcpu *vcpu)
1443{
1444 int nmaps = 0;
1445 struct kvm_mmu_page *page;
1446 int i;
1447
1448 list_for_each_entry(page, &vcpu->kvm->active_mmu_pages, link) {
Avi Kivity47ad8e62007-05-06 15:50:58 +03001449 u64 *pt = page->spt;
Avi Kivity37a7d8b2007-01-05 16:36:56 -08001450
1451 if (page->role.level != PT_PAGE_TABLE_LEVEL)
1452 continue;
1453
1454 for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
1455 u64 ent = pt[i];
1456
1457 if (!(ent & PT_PRESENT_MASK))
1458 continue;
1459 if (!(ent & PT_WRITABLE_MASK))
1460 continue;
1461 ++nmaps;
1462 }
1463 }
1464 return nmaps;
1465}
1466
1467static void audit_rmap(struct kvm_vcpu *vcpu)
1468{
1469 int n_rmap = count_rmaps(vcpu);
1470 int n_actual = count_writable_mappings(vcpu);
1471
1472 if (n_rmap != n_actual)
1473 printk(KERN_ERR "%s: (%s) rmap %d actual %d\n",
1474 __FUNCTION__, audit_msg, n_rmap, n_actual);
1475}
1476
1477static void audit_write_protection(struct kvm_vcpu *vcpu)
1478{
1479 struct kvm_mmu_page *page;
1480
1481 list_for_each_entry(page, &vcpu->kvm->active_mmu_pages, link) {
1482 hfn_t hfn;
1483 struct page *pg;
1484
1485 if (page->role.metaphysical)
1486 continue;
1487
1488 hfn = gpa_to_hpa(vcpu, (gpa_t)page->gfn << PAGE_SHIFT)
1489 >> PAGE_SHIFT;
1490 pg = pfn_to_page(hfn);
1491 if (pg->private)
1492 printk(KERN_ERR "%s: (%s) shadow page has writable"
1493 " mappings: gfn %lx role %x\n",
1494 __FUNCTION__, audit_msg, page->gfn,
1495 page->role.word);
1496 }
1497}
1498
1499static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg)
1500{
1501 int olddbg = dbg;
1502
1503 dbg = 0;
1504 audit_msg = msg;
1505 audit_rmap(vcpu);
1506 audit_write_protection(vcpu);
1507 audit_mappings(vcpu);
1508 dbg = olddbg;
1509}
1510
1511#endif