From: Peter Zijlstra <pzijlstr@redhat.com> Date: Wed, 10 Sep 2008 15:40:52 +0200 Subject: [x86] vDSO: use install_special_mapping Message-id: 1221054052.4917.28.camel@lappy.programming.kicks-ass.net O-Subject: [kernel team] [RHEL-5 PATCH] BZ460251 CVE-2008-3527 i386 vDSO: use install_special_mapping Bugzilla: 460276 RH-Acked-by: David Howells <dhowells@redhat.com> RH-Acked-by: Eugene Teo <eteo@redhat.com> CVE: CVE-2008-3527 https://bugzilla.redhat.com/show_bug.cgi?id=460276 Tested the i386 and x86_64/ia32 bits with the reproducers from the BZ, don't have a clue wrt Xen but the code looked similar enough so it _ought_ to work. diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c index 0a10cae..2d77fa0 100644 --- a/arch/i386/kernel/sysenter.c +++ b/arch/i386/kernel/sysenter.c @@ -72,11 +72,12 @@ void enable_sep_cpu(void) */ extern const char vsyscall_int80_start, vsyscall_int80_end; extern const char vsyscall_sysenter_start, vsyscall_sysenter_end; -static void *syscall_page; +static struct page *syscall_pages[1]; int __cpuinit sysenter_setup(void) { - syscall_page = (void *)get_zeroed_page(GFP_ATOMIC); + void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC); + syscall_pages[0] = virt_to_page(syscall_page); #ifdef CONFIG_XEN if (boot_cpu_has(X86_FEATURE_SEP)) { @@ -109,24 +110,6 @@ int __cpuinit sysenter_setup(void) return 0; } -static struct page *syscall_nopage(struct vm_area_struct *vma, - unsigned long adr, int *type) -{ - struct page *p = virt_to_page(adr - vma->vm_start + syscall_page); - get_page(p); - return p; -} - -/* Prevent VMA merging */ -static void syscall_vma_close(struct vm_area_struct *vma) -{ -} - -static struct vm_operations_struct syscall_vm_ops = { - .close = syscall_vma_close, - .nopage = syscall_nopage, -}; - /* Defined in vsyscall-sysenter.S */ extern void SYSENTER_RETURN; @@ -134,7 +117,6 @@ extern void SYSENTER_RETURN; int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack, unsigned long start_code, unsigned long interp_map_address) { - struct vm_area_struct *vma; struct mm_struct *mm = current->mm; unsigned long addr; int ret; @@ -146,38 +128,25 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack, goto up_fail; } - vma = kmem_cache_zalloc(vm_area_cachep, SLAB_KERNEL); - if (!vma) { - ret = -ENOMEM; - goto up_fail; - } - - vma->vm_start = addr; - vma->vm_end = addr + PAGE_SIZE; - /* MAYWRITE to allow gdb to COW and set breakpoints */ - vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE; /* + * MAYWRITE to allow gdb to COW and set breakpoints + * * Make sure the vDSO gets into every core dump. * Dumping its contents makes post-mortem fully interpretable later * without matching up the same kernel and hardware config to see * what PC values meant. */ - vma->vm_flags |= VM_ALWAYSDUMP; - vma->vm_flags |= mm->def_flags; - vma->vm_page_prot = protection_map[vma->vm_flags & 7]; - vma->vm_ops = &syscall_vm_ops; - vma->vm_mm = mm; - - ret = insert_vm_struct(mm, vma); - if (unlikely(ret)) { - kmem_cache_free(vm_area_cachep, vma); + ret = install_special_mapping(mm, addr, PAGE_SIZE, + VM_READ|VM_EXEC| + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| + VM_ALWAYSDUMP, + syscall_pages); + if (ret) goto up_fail; - } current->mm->context.vdso = (void *)addr; current_thread_info()->sysenter_return = (void *)VDSO_SYM(&SYSENTER_RETURN); - mm->total_vm++; up_fail: up_write(&mm->mmap_sem); return ret; diff --git a/arch/x86_64/ia32/syscall32-xen.c b/arch/x86_64/ia32/syscall32-xen.c index 6fefd63..d51665e 100644 --- a/arch/x86_64/ia32/syscall32-xen.c +++ b/arch/x86_64/ia32/syscall32-xen.c @@ -21,29 +21,11 @@ extern unsigned char syscall32_syscall[], syscall32_syscall_end[]; extern unsigned char syscall32_sysenter[], syscall32_sysenter_end[]; extern int sysctl_vsyscall32; -char *syscall32_page; +static struct page *syscall32_pages[1]; #ifndef USE_INT80 static int use_sysenter = -1; #endif -static struct page * -syscall32_nopage(struct vm_area_struct *vma, unsigned long adr, int *type) -{ - struct page *p = virt_to_page(adr - vma->vm_start + syscall32_page); - get_page(p); - return p; -} - -/* Prevent VMA merging */ -static void syscall32_vma_close(struct vm_area_struct *vma) -{ -} - -static struct vm_operations_struct syscall32_vm_ops = { - .close = syscall32_vma_close, - .nopage = syscall32_nopage, -}; - struct linux_binprm; /* Setup a VMA at program startup for the vsyscall page */ @@ -51,40 +33,31 @@ int syscall32_setup_pages(struct linux_binprm *bprm, int exstack, unsigned long start_code, unsigned long interp_map_address) { - int npages = (VSYSCALL32_END - VSYSCALL32_BASE) >> PAGE_SHIFT; - struct vm_area_struct *vma; struct mm_struct *mm = current->mm; int ret; - vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); - if (!vma) - return -ENOMEM; - - memset(vma, 0, sizeof(struct vm_area_struct)); - /* Could randomize here */ - vma->vm_start = VSYSCALL32_BASE; - vma->vm_end = VSYSCALL32_END; - /* MAYWRITE to allow gdb to COW and set breakpoints */ - vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE; - vma->vm_flags |= mm->def_flags; - vma->vm_page_prot = protection_map[vma->vm_flags & 7]; - vma->vm_ops = &syscall32_vm_ops; - vma->vm_mm = mm; - down_write(&mm->mmap_sem); - if ((ret = insert_vm_struct(mm, vma))) { - up_write(&mm->mmap_sem); - kmem_cache_free(vm_area_cachep, vma); - return ret; - } - mm->total_vm += npages; + /* + * MAYWRITE to allow gdb to COW and set breakpoints + * + * Make sure the vDSO gets into every core dump. + * Dumping its contents makes post-mortem fully interpretable later + * without matching up the same kernel and hardware config to see + * what PC values meant. + */ + ret = install_special_mapping(mm, VSYSCALL32_BASE, PAGE_SIZE, + VM_READ|VM_EXEC| + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| + VM_ALWAYSDUMP, + syscall32_pages); up_write(&mm->mmap_sem); - return 0; + return ret; } static int __init init_syscall32(void) { - syscall32_page = (void *)get_zeroed_page(GFP_KERNEL); + void *syscall32_page = (void *)get_zeroed_page(GFP_KERNEL); + syscall32_pages[0] = virt_to_page(syscall32_page); if (!syscall32_page) panic("Cannot allocate syscall32 page"); diff --git a/arch/x86_64/ia32/syscall32.c b/arch/x86_64/ia32/syscall32.c index 3b13188..78372c4 100644 --- a/arch/x86_64/ia32/syscall32.c +++ b/arch/x86_64/ia32/syscall32.c @@ -18,27 +18,9 @@ extern unsigned char syscall32_syscall[], syscall32_syscall_end[]; extern unsigned char syscall32_sysenter[], syscall32_sysenter_end[]; extern int sysctl_vsyscall32; -char *syscall32_page; +static struct page *syscall32_pages[1]; static int use_sysenter = -1; -static struct page * -syscall32_nopage(struct vm_area_struct *vma, unsigned long adr, int *type) -{ - struct page *p = virt_to_page(adr - vma->vm_start + syscall32_page); - get_page(p); - return p; -} - -/* Prevent VMA merging */ -static void syscall32_vma_close(struct vm_area_struct *vma) -{ -} - -static struct vm_operations_struct syscall32_vm_ops = { - .close = syscall32_vma_close, - .nopage = syscall32_nopage, -}; - struct linux_binprm; /* Setup a VMA at program startup for the vsyscall page */ @@ -46,47 +28,31 @@ int syscall32_setup_pages(struct linux_binprm *bprm, int exstack, unsigned long start_code, unsigned long interp_map_address) { - int npages = (VSYSCALL32_END - VSYSCALL32_BASE) >> PAGE_SHIFT; - struct vm_area_struct *vma; struct mm_struct *mm = current->mm; int ret; - vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); - if (!vma) - return -ENOMEM; - - memset(vma, 0, sizeof(struct vm_area_struct)); - /* Could randomize here */ - vma->vm_start = VSYSCALL32_BASE; - vma->vm_end = VSYSCALL32_END; - /* MAYWRITE to allow gdb to COW and set breakpoints */ - vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE; + down_write(&mm->mmap_sem); /* + * MAYWRITE to allow gdb to COW and set breakpoints + * * Make sure the vDSO gets into every core dump. * Dumping its contents makes post-mortem fully interpretable later * without matching up the same kernel and hardware config to see * what PC values meant. */ - vma->vm_flags |= VM_ALWAYSDUMP; - vma->vm_flags |= mm->def_flags; - vma->vm_page_prot = protection_map[vma->vm_flags & 7]; - vma->vm_ops = &syscall32_vm_ops; - vma->vm_mm = mm; - - down_write(&mm->mmap_sem); - if ((ret = insert_vm_struct(mm, vma))) { - up_write(&mm->mmap_sem); - kmem_cache_free(vm_area_cachep, vma); - return ret; - } - mm->total_vm += npages; + ret = install_special_mapping(mm, VSYSCALL32_BASE, PAGE_SIZE, + VM_READ|VM_EXEC| + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| + VM_ALWAYSDUMP, + syscall32_pages); up_write(&mm->mmap_sem); - return 0; + return ret; } static int __init init_syscall32(void) { - syscall32_page = (void *)get_zeroed_page(GFP_KERNEL); + void *syscall32_page = (void *)get_zeroed_page(GFP_KERNEL); + syscall32_pages[0] = virt_to_page(syscall32_page); if (!syscall32_page) panic("Cannot allocate syscall32 page"); if (use_sysenter > 0) { diff --git a/include/asm-x86_64/proto.h b/include/asm-x86_64/proto.h index 30c0a88..c448d24 100644 --- a/include/asm-x86_64/proto.h +++ b/include/asm-x86_64/proto.h @@ -85,7 +85,6 @@ extern void swap_low_mappings(void); extern void __show_regs(struct pt_regs * regs); extern void show_regs(struct pt_regs * regs); -extern char *syscall32_page; extern void syscall32_cpu_init(void); extern void setup_node_bootmem(int nodeid, unsigned long start, unsigned long end); diff --git a/include/asm-x86_64/vsyscall32.h b/include/asm-x86_64/vsyscall32.h index c631c08..246b461 100644 --- a/include/asm-x86_64/vsyscall32.h +++ b/include/asm-x86_64/vsyscall32.h @@ -8,7 +8,6 @@ #define VSYSCALL32_SYSEXIT (VSYSCALL32_BASE + 0x410) #else #define VSYSCALL32_BASE 0xffffe000UL -#define VSYSCALL32_END (VSYSCALL32_BASE + PAGE_SIZE) #define VSYSCALL32_EHDR ((const struct elf32_hdr *) VSYSCALL32_BASE) #define VSYSCALL32_VSYSCALL ((void *)VSYSCALL32_BASE + 0x400) diff --git a/include/linux/mm.h b/include/linux/mm.h index 6a9ed9b..bff0f3d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1052,8 +1052,7 @@ static inline unsigned long get_unmapped_area(struct file * file, unsigned long extern int install_special_mapping(struct mm_struct *mm, unsigned long addr, unsigned long len, - unsigned long vm_flags, pgprot_t pgprot, - struct page **pages); + unsigned long vm_flags, struct page **pages); extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, diff --git a/mm/mmap.c b/mm/mmap.c index 59d2a5b..4ba4440 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2233,11 +2233,16 @@ special_mapping_nopage(struct vm_area_struct *vma, return NOPAGE_SIGBUS; } +static void special_mapping_close(struct vm_area_struct *vma) +{ +} + static struct vm_operations_struct special_mapping_vmops = { + .close = special_mapping_close, .nopage = special_mapping_nopage, }; -unsigned int vdso_populate = 1; +unsigned int vdso_populate = 0; /* * Insert a new vma covering the given region, with the given flags and @@ -2248,8 +2253,7 @@ unsigned int vdso_populate = 1; */ int install_special_mapping(struct mm_struct *mm, unsigned long addr, unsigned long len, - unsigned long vm_flags, pgprot_t pgprot, - struct page **pages) + unsigned long vm_flags, struct page **pages) { struct vm_area_struct *vma; int err; @@ -2263,13 +2267,16 @@ int install_special_mapping(struct mm_struct *mm, vma->vm_start = addr; vma->vm_end = addr + len; - vma->vm_flags = vm_flags | VM_DONTEXPAND; - vma->vm_page_prot = pgprot; + vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND; + vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); vma->vm_ops = &special_mapping_vmops; vma->vm_private_data = pages; - insert_vm_struct(mm, vma); + if (unlikely(insert_vm_struct(mm, vma))) { + kmem_cache_free(vm_area_cachep, vma); + return -ENOMEM; + } mm->total_vm += len >> PAGE_SHIFT; if (!vdso_populate)