aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_64.c5
-rw-r--r--arch/x86/include/asm/vsyscall.h2
-rw-r--r--arch/x86/mm/kaiser.c34
3 files changed, 37 insertions, 4 deletions
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index 636c4b341f36..6bb7e92c6d50 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -66,6 +66,11 @@ static int __init vsyscall_setup(char *str)
}
early_param("vsyscall", vsyscall_setup);
+bool vsyscall_enabled(void)
+{
+ return vsyscall_mode != NONE;
+}
+
static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,
const char *message)
{
diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h
index 6ba66ee79710..4865e10dbb55 100644
--- a/arch/x86/include/asm/vsyscall.h
+++ b/arch/x86/include/asm/vsyscall.h
@@ -12,12 +12,14 @@ extern void map_vsyscall(void);
* Returns true if handled.
*/
extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address);
+extern bool vsyscall_enabled(void);
#else
static inline void map_vsyscall(void) {}
static inline bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
{
return false;
}
+static inline bool vsyscall_enabled(void) { return false; }
#endif
#endif /* _ASM_X86_VSYSCALL_H */
diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c
index d8376b4ad9f0..8f8e5e03d083 100644
--- a/arch/x86/mm/kaiser.c
+++ b/arch/x86/mm/kaiser.c
@@ -19,6 +19,7 @@
#include <asm/pgalloc.h>
#include <asm/desc.h>
#include <asm/cmdline.h>
+#include <asm/vsyscall.h>
int kaiser_enabled __read_mostly = 1;
EXPORT_SYMBOL(kaiser_enabled); /* for inlined TLB flush functions */
@@ -110,12 +111,13 @@ static inline unsigned long get_pa_from_mapping(unsigned long vaddr)
*
* Returns a pointer to a PTE on success, or NULL on failure.
*/
-static pte_t *kaiser_pagetable_walk(unsigned long address)
+static pte_t *kaiser_pagetable_walk(unsigned long address, bool user)
{
pmd_t *pmd;
pud_t *pud;
pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(address));
gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
+ unsigned long prot = _KERNPG_TABLE;
if (pgd_none(*pgd)) {
WARN_ONCE(1, "All shadow pgds should have been populated");
@@ -123,6 +125,17 @@ static pte_t *kaiser_pagetable_walk(unsigned long address)
}
BUILD_BUG_ON(pgd_large(*pgd) != 0);
+ if (user) {
+ /*
+ * The vsyscall page is the only page that will have
+ * _PAGE_USER set. Catch everything else.
+ */
+ BUG_ON(address != VSYSCALL_ADDR);
+
+ set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER));
+ prot = _PAGE_TABLE;
+ }
+
pud = pud_offset(pgd, address);
/* The shadow page tables do not use large mappings: */
if (pud_large(*pud)) {
@@ -135,7 +148,7 @@ static pte_t *kaiser_pagetable_walk(unsigned long address)
return NULL;
spin_lock(&shadow_table_allocation_lock);
if (pud_none(*pud)) {
- set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
+ set_pud(pud, __pud(prot | __pa(new_pmd_page)));
__inc_zone_page_state(virt_to_page((void *)
new_pmd_page), NR_KAISERTABLE);
} else
@@ -155,7 +168,7 @@ static pte_t *kaiser_pagetable_walk(unsigned long address)
return NULL;
spin_lock(&shadow_table_allocation_lock);
if (pmd_none(*pmd)) {
- set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
+ set_pmd(pmd, __pmd(prot | __pa(new_pte_page)));
__inc_zone_page_state(virt_to_page((void *)
new_pte_page), NR_KAISERTABLE);
} else
@@ -191,7 +204,7 @@ static int kaiser_add_user_map(const void *__start_addr, unsigned long size,
ret = -EIO;
break;
}
- pte = kaiser_pagetable_walk(address);
+ pte = kaiser_pagetable_walk(address, flags & _PAGE_USER);
if (!pte) {
ret = -ENOMEM;
break;
@@ -318,6 +331,19 @@ void __init kaiser_init(void)
kaiser_init_all_pgds();
+ /*
+ * Note that this sets _PAGE_USER and it needs to happen when the
+ * pagetable hierarchy gets created, i.e., early. Otherwise
+ * kaiser_pagetable_walk() will encounter initialized PTEs in the
+ * hierarchy and not set the proper permissions, leading to the
+ * pagefaults with page-protection violations when trying to read the
+ * vsyscall page. For example.
+ */
+ if (vsyscall_enabled())
+ kaiser_add_user_map_early((void *)VSYSCALL_ADDR,
+ PAGE_SIZE,
+ __PAGE_KERNEL_VSYSCALL);
+
for_each_possible_cpu(cpu) {
void *percpu_vaddr = __per_cpu_user_mapped_start +
per_cpu_offset(cpu);