Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > 27922b4260f65d317aabda37e42bbbff > files > 814

kernel-2.6.18-238.el5.src.rpm

diff -urNp --exclude-from=/home/davej/.exclude linux-802/arch/i386/kernel/cpu/common.c linux-810/arch/i386/kernel/cpu/common.c
--- linux-802/arch/i386/kernel/cpu/common.c
+++ linux-810/arch/i386/kernel/cpu/common.c
@@ -428,6 +428,13 @@ void __cpuinit identify_cpu(struct cpuin
 	if (disable_pse)
 		clear_bit(X86_FEATURE_PSE, c->x86_capability);
 
+	if (exec_shield != 0) {
+#ifdef CONFIG_HIGHMEM64G   /* NX implies PAE */
+		if (!test_bit(X86_FEATURE_NX, c->x86_capability))
+#endif
+		clear_bit(X86_FEATURE_SEP, c->x86_capability);
+	}
+
 	/* If the model name is still unset, do table lookup. */
 	if ( !c->x86_model_id[0] ) {
 		char *p;
diff -urNp --exclude-from=/home/davej/.exclude linux-802/arch/i386/kernel/process.c linux-810/arch/i386/kernel/process.c
--- linux-802/arch/i386/kernel/process.c
+++ linux-810/arch/i386/kernel/process.c
@@ -646,6 +646,8 @@ struct task_struct fastcall * __switch_t
 	/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
 
 	__unlazy_fpu(prev_p);
+	if (next_p->mm)
+		load_user_cs_desc(cpu, next_p->mm);
 
 	/*
 	 * Reload esp0.
@@ -906,3 +908,60 @@ unsigned long arch_align_stack(unsigned 
 		sp -= get_random_int() % 8192;
 	return sp & ~0xf;
 }
+
+void arch_add_exec_range(struct mm_struct *mm, unsigned long limit)
+{
+	if (limit > mm->context.exec_limit) {
+		mm->context.exec_limit = limit;
+		set_user_cs(&mm->context.user_cs, limit);
+		if (mm == current->mm) {
+			preempt_disable();
+			load_user_cs_desc(smp_processor_id(), mm);
+			preempt_enable();
+		}
+	}
+}
+
+void arch_remove_exec_range(struct mm_struct *mm, unsigned long old_end)
+{
+	struct vm_area_struct *vma;
+	unsigned long limit = PAGE_SIZE;
+
+	if (old_end == mm->context.exec_limit) {
+		for (vma = mm->mmap; vma; vma = vma->vm_next)
+			if ((vma->vm_flags & VM_EXEC) && (vma->vm_end > limit))
+				limit = vma->vm_end;
+
+		mm->context.exec_limit = limit;
+		set_user_cs(&mm->context.user_cs, limit);
+		if (mm == current->mm) {
+			preempt_disable();
+			load_user_cs_desc(smp_processor_id(), mm);
+			preempt_enable();
+		}
+	}
+}
+
+void arch_flush_exec_range(struct mm_struct *mm)
+{
+	mm->context.exec_limit = 0;
+	set_user_cs(&mm->context.user_cs, 0);
+}
+
+/*
+ * Generate random brk address between 128MB and 196MB. (if the layout
+ * allows it.)
+ */
+void randomize_brk(unsigned long old_brk)
+{
+	unsigned long new_brk, range_start, range_end;
+
+	range_start = 0x08000000;
+	if (current->mm->brk >= range_start)
+		range_start = current->mm->brk;
+	range_end = range_start + 0x02000000;
+	new_brk = randomize_range(range_start, range_end, 0);
+	if (new_brk)
+		current->mm->brk = new_brk;
+}
+
diff -urNp --exclude-from=/home/davej/.exclude linux-802/arch/i386/kernel/smp.c linux-810/arch/i386/kernel/smp.c
--- linux-802/arch/i386/kernel/smp.c
+++ linux-810/arch/i386/kernel/smp.c
@@ -23,6 +23,7 @@
 
 #include <asm/mtrr.h>
 #include <asm/tlbflush.h>
+#include <asm/desc.h>
 #include <mach_apic.h>
 
 /*
@@ -324,6 +325,8 @@ fastcall void smp_invalidate_interrupt(s
 	unsigned long cpu;
 
 	cpu = get_cpu();
+	if (current->active_mm)
+		load_user_cs_desc(cpu, current->active_mm);
 
 	if (!cpu_isset(cpu, flush_cpumask))
 		goto out;
diff -urNp --exclude-from=/home/davej/.exclude linux-802/arch/i386/kernel/sysenter.c linux-810/arch/i386/kernel/sysenter.c
--- linux-802/arch/i386/kernel/sysenter.c
+++ linux-810/arch/i386/kernel/sysenter.c
@@ -118,7 +118,8 @@ static struct vm_operations_struct sysca
 extern void SYSENTER_RETURN;
 
 /* Setup a VMA at program startup for the vsyscall page */
-int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
+int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack,
+				unsigned long start_code, unsigned long interp_map_address)
 {
 	struct vm_area_struct *vma;
 	struct mm_struct *mm = current->mm;
diff -urNp --exclude-from=/home/davej/.exclude linux-802/arch/i386/kernel/traps.c linux-810/arch/i386/kernel/traps.c
--- linux-802/arch/i386/kernel/traps.c
+++ linux-810/arch/i386/kernel/traps.c
@@ -565,7 +565,82 @@ DO_ERROR(10, SIGSEGV, "invalid TSS", inv
 DO_ERROR(11, SIGBUS,  "segment not present", segment_not_present)
 DO_ERROR(12, SIGBUS,  "stack segment", stack_segment)
 DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
-DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0)
+
+
+/*
+ * lazy-check for CS validity on exec-shield binaries:
+ *
+ * the original non-exec stack patch was written by
+ * Solar Designer <solar at openwall.com>. Thanks!
+ */
+static int
+check_lazy_exec_limit(int cpu, struct pt_regs *regs, long error_code)
+{
+	struct desc_struct *desc1, *desc2;
+	struct vm_area_struct *vma;
+	unsigned long limit;
+
+	if (current->mm == NULL)
+		return 0;
+
+	limit = -1UL;
+	if (current->mm->context.exec_limit != -1UL) {
+		limit = PAGE_SIZE;
+		spin_lock(&current->mm->page_table_lock);
+		for (vma = current->mm->mmap; vma; vma = vma->vm_next)
+			if ((vma->vm_flags & VM_EXEC) && (vma->vm_end > limit))
+				limit = vma->vm_end;
+		spin_unlock(&current->mm->page_table_lock);
+		if (limit >= TASK_SIZE)
+			limit = -1UL;
+		current->mm->context.exec_limit = limit;
+	}
+	set_user_cs(&current->mm->context.user_cs, limit);
+
+	desc1 = &current->mm->context.user_cs;
+	desc2 = get_cpu_gdt_table(cpu) + GDT_ENTRY_DEFAULT_USER_CS;
+
+	if (desc1->a != desc2->a || desc1->b != desc2->b) {
+		/*
+		 * The CS was not in sync - reload it and retry the
+		 * instruction. If the instruction still faults then
+		 * we won't hit this branch next time around.
+		 */
+		if (print_fatal_signals >= 2) {
+			printk("#GPF fixup (%ld[seg:%lx]) at %08lx, CPU#%d.\n", error_code, error_code/8, regs->eip, smp_processor_id());
+			printk(" exec_limit: %08lx, user_cs: %08lx/%08lx, CPU_cs: %08lx/%08lx.\n", current->mm->context.exec_limit, desc1->a, desc1->b, desc2->a, desc2->b);
+		}
+		load_user_cs_desc(cpu, current->mm);
+		return 1;
+	}
+
+	return 0;
+}
+
+/*
+ * The fixup code for errors in iret jumps to here (iret_exc).  It loses
+ * the original trap number and error code.  The bogus trap 32 and error
+ * code 0 are what the vanilla kernel delivers via:
+ * DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0)
+ *
+ * In case of a general protection fault in the iret instruction, we
+ * need to check for a lazy CS update for exec-shield.
+ */
+fastcall void do_iret_error(struct pt_regs *regs, long error_code)
+{
+	int ok = check_lazy_exec_limit(get_cpu(), regs, error_code);
+	put_cpu();
+	if (!ok && notify_die(DIE_TRAP, "iret exception", regs,
+			      error_code, 32, SIGSEGV) != NOTIFY_STOP) {
+		siginfo_t info;
+		info.si_signo = SIGSEGV;
+		info.si_errno = 0;
+		info.si_code = ILL_BADSTK;
+		info.si_addr = 0;
+		do_trap(32, SIGSEGV, "iret exception", 0, regs, error_code,
+			&info);
+	}
+}
 
 fastcall void __kprobes do_general_protection(struct pt_regs * regs,
 					      long error_code)
@@ -573,6 +648,7 @@ fastcall void __kprobes do_general_prote
 	int cpu = get_cpu();
 	struct tss_struct *tss = &per_cpu(init_tss, cpu);
 	struct thread_struct *thread = &current->thread;
+	int ok;
 
 	/*
 	 * Perform the lazy TSS's I/O bitmap copy. If the TSS has an
@@ -599,7 +675,6 @@ fastcall void __kprobes do_general_prote
 		put_cpu();
 		return;
 	}
-	put_cpu();
 
 	current->thread.error_code = error_code;
 	current->thread.trap_no = 13;
@@ -610,17 +685,31 @@ fastcall void __kprobes do_general_prote
 	if (!user_mode(regs))
 		goto gp_in_kernel;
 
+	ok = check_lazy_exec_limit(cpu, regs, error_code);
+
+	put_cpu();
+
+	if (ok)
+		return;
+
+	if (print_fatal_signals) {
+		printk("#GPF(%ld[seg:%lx]) at %08lx, CPU#%d.\n", error_code, error_code/8, regs->eip, smp_processor_id());
+		printk(" exec_limit: %08lx, user_cs: %08lx/%08lx.\n", current->mm->context.exec_limit, current->mm->context.user_cs.a, current->mm->context.user_cs.b);
+	}
+
 	current->thread.error_code = error_code;
 	current->thread.trap_no = 13;
 	force_sig(SIGSEGV, current);
 	return;
 
 gp_in_vm86:
+	put_cpu();
 	local_irq_enable();
 	handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
 	return;
 
 gp_in_kernel:
+	put_cpu();
 	if (!fixup_exception(regs)) {
 		if (notify_die(DIE_GPF, "general protection fault", regs,
 				error_code, 13, SIGSEGV) == NOTIFY_STOP)
diff -urNp --exclude-from=/home/davej/.exclude linux-802/arch/i386/mm/init.c linux-810/arch/i386/mm/init.c
--- linux-802/arch/i386/mm/init.c
+++ linux-810/arch/i386/mm/init.c
@@ -433,7 +433,7 @@ u64 __supported_pte_mask __read_mostly =
  * Control non executable mappings.
  *
  * on      Enable
- * off     Disable
+ * off     Disable (disables exec-shield too)
  */
 void __init noexec_setup(const char *str)
 {
@@ -443,6 +443,7 @@ void __init noexec_setup(const char *str
 	} else if (!strncmp(str,"off",3)) {
 		disable_nx = 1;
 		__supported_pte_mask &= ~_PAGE_NX;
+		exec_shield = 0;
 	}
 }
 
@@ -507,7 +508,10 @@ void __init paging_init(void)
 	set_nx();
 	if (nx_enabled)
 		printk("NX (Execute Disable) protection: active\n");
+	else
 #endif
+	if (exec_shield)
+		printk("Using x86 segment limits to approximate NX protection\n");
 
 	pagetable_init();
 
diff -urNp --exclude-from=/home/davej/.exclude linux-802/arch/i386/mm/mmap.c linux-810/arch/i386/mm/mmap.c
--- linux-802/arch/i386/mm/mmap.c
+++ linux-810/arch/i386/mm/mmap.c
@@ -62,15 +62,17 @@ void arch_pick_mmap_layout(struct mm_str
 	 * Fall back to the standard layout if the personality
 	 * bit is set, or if the expected stack growth is unlimited:
 	 */
-	if (sysctl_legacy_va_layout ||
+	if (!(2 & exec_shield) && (sysctl_legacy_va_layout ||
 			(current->personality & ADDR_COMPAT_LAYOUT) ||
-			current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY) {
+			current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY)) {
 		mm->mmap_base = TASK_UNMAPPED_BASE;
 		mm->get_unmapped_area = arch_get_unmapped_area;
 		mm->unmap_area = arch_unmap_area;
 	} else {
 		mm->mmap_base = mmap_base(mm);
 		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
+		if (!(current->personality & READ_IMPLIES_EXEC))
+			mm->get_unmapped_exec_area = arch_get_unmapped_exec_area;
 		mm->unmap_area = arch_unmap_area_topdown;
 	}
 }
diff -urNp --exclude-from=/home/davej/.exclude linux-802/arch/ia64/ia32/binfmt_elf32.c linux-810/arch/ia64/ia32/binfmt_elf32.c
--- linux-802/arch/ia64/ia32/binfmt_elf32.c
+++ linux-810/arch/ia64/ia32/binfmt_elf32.c
@@ -266,7 +266,7 @@ elf32_set_personality (void)
 }
 
 static unsigned long
-elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type)
+elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type, unsigned long unused)
 {
 	unsigned long pgoff = (eppnt->p_vaddr) & ~IA32_PAGE_MASK;
 
diff -urNp --exclude-from=/home/davej/.exclude linux-802/arch/powerpc/kernel/vdso.c linux-810/arch/powerpc/kernel/vdso.c
--- linux-802/arch/powerpc/kernel/vdso.c
+++ linux-810/arch/powerpc/kernel/vdso.c
@@ -216,7 +216,8 @@ static struct vm_operations_struct vdso_
  * vDSO and insert it into the mm struct tree
  */
 int arch_setup_additional_pages(struct linux_binprm *bprm,
-				int executable_stack)
+				int executable_stack, unsigned long start_code,
+				unsigned long interp_map_address)
 {
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma;
diff -urNp --exclude-from=/home/davej/.exclude linux-802/arch/x86_64/ia32/ia32_binfmt.c linux-810/arch/x86_64/ia32/ia32_binfmt.c
--- linux-802/arch/x86_64/ia32/ia32_binfmt.c
+++ linux-810/arch/x86_64/ia32/ia32_binfmt.c
@@ -251,8 +251,6 @@ elf_core_copy_task_xfpregs(struct task_s
 #define elf_check_arch(x) \
 	((x)->e_machine == EM_386)
 
-extern int force_personality32;
-
 #define ELF_EXEC_PAGESIZE PAGE_SIZE
 #define ELF_HWCAP (boot_cpu_data.x86_capability[0])
 #define ELF_PLATFORM  ("i686")
@@ -266,8 +264,6 @@ do {							\
 		set_thread_flag(TIF_ABI_PENDING);		\
 	else							\
 		clear_thread_flag(TIF_ABI_PENDING);		\
-	/* XXX This overwrites the user set personality */	\
-	current->personality |= force_personality32;		\
 } while (0)
 
 /* Override some function names */
@@ -311,7 +307,7 @@ static void elf32_init(struct pt_regs *)
 
 #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
 #define arch_setup_additional_pages syscall32_setup_pages
-extern int syscall32_setup_pages(struct linux_binprm *, int exstack);
+extern int syscall32_setup_pages(struct linux_binprm *, int exstack, unsigned long start_code, unsigned long interp_map_address);
 
 #include "../../../fs/binfmt_elf.c" 
 
diff -urNp --exclude-from=/home/davej/.exclude linux-802/arch/x86_64/ia32/syscall32.c linux-810/arch/x86_64/ia32/syscall32.c
--- linux-802/arch/x86_64/ia32/syscall32.c
+++ linux-810/arch/x86_64/ia32/syscall32.c
@@ -42,7 +42,9 @@ static struct vm_operations_struct sysca
 struct linux_binprm;
 
 /* Setup a VMA at program startup for the vsyscall page */
-int syscall32_setup_pages(struct linux_binprm *bprm, int exstack)
+int syscall32_setup_pages(struct linux_binprm *bprm, int exstack,
+			  unsigned long start_code,
+			  unsigned long interp_map_address)
 {
 	int npages = (VSYSCALL32_END - VSYSCALL32_BASE) >> PAGE_SHIFT;
 	struct vm_area_struct *vma;
diff -urNp --exclude-from=/home/davej/.exclude linux-802/arch/x86_64/kernel/process.c linux-810/arch/x86_64/kernel/process.c
--- linux-802/arch/x86_64/kernel/process.c
+++ linux-810/arch/x86_64/kernel/process.c
@@ -646,12 +646,6 @@ void set_personality_64bit(void)
 
 	/* Make sure to be in 64bit mode */
 	clear_thread_flag(TIF_IA32); 
-
-	/* TBD: overwrites user setup. Should have two bits.
-	   But 64bit processes have always behaved this way,
-	   so it's not too bad. The main problem is just that
-   	   32bit childs are affected again. */
-	current->personality &= ~READ_IMPLIES_EXEC;
 }
 
 asmlinkage long sys_fork(struct pt_regs *regs)
diff -urNp --exclude-from=/home/davej/.exclude linux-802/arch/x86_64/kernel/setup64.c linux-810/arch/x86_64/kernel/setup64.c
--- linux-802/arch/x86_64/kernel/setup64.c
+++ linux-810/arch/x86_64/kernel/setup64.c
@@ -46,7 +46,7 @@ Control non executable mappings for 64bi
 on	Enable(default)
 off	Disable
 */ 
-int __init nonx_setup(char *str)
+void __init nonx_setup(const char *str)
 {
 	if (!strncmp(str, "on", 2)) {
                 __supported_pte_mask |= _PAGE_NX; 
@@ -55,28 +55,7 @@ int __init nonx_setup(char *str)
 		do_not_nx = 1;
 		__supported_pte_mask &= ~_PAGE_NX;
         }
-	return 1;
-} 
-__setup("noexec=", nonx_setup);	/* parsed early actually */
-
-int force_personality32 = 0; 
-
-/* noexec32=on|off
-Control non executable heap for 32bit processes.
-To control the stack too use noexec=off
-
-on	PROT_READ does not imply PROT_EXEC for 32bit processes
-off	PROT_READ implies PROT_EXEC (default)
-*/
-static int __init nonx32_setup(char *str)
-{
-	if (!strcmp(str, "on"))
-		force_personality32 &= ~READ_IMPLIES_EXEC;
-	else if (!strcmp(str, "off"))
-		force_personality32 |= READ_IMPLIES_EXEC;
-	return 1;
 }
-__setup("noexec32=", nonx32_setup);
 
 /*
  * Great future plan:
diff -urNp --exclude-from=/home/davej/.exclude linux-802/arch/x86_64/mm/fault.c linux-810/arch/x86_64/mm/fault.c
--- linux-802/arch/x86_64/mm/fault.c
+++ linux-810/arch/x86_64/mm/fault.c
@@ -114,7 +114,7 @@ static noinline int is_prefetch(struct p
 	instr = (unsigned char *)convert_rip_to_linear(current, regs);
 	max_instr = instr + 15;
 
-	if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE)
+	if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE64)
 		return 0;
 
 	while (scan_more && instr < max_instr) { 
diff -urNp --exclude-from=/home/davej/.exclude linux-802/arch/x86_64/mm/mmap.c linux-810/arch/x86_64/mm/mmap.c
--- linux-802/arch/x86_64/mm/mmap.c
+++ linux-810/arch/x86_64/mm/mmap.c
@@ -1,29 +1,87 @@
-/* Copyright 2005 Andi Kleen, SuSE Labs.
- * Licensed under GPL, v.2
+/*
+ *  linux/arch/x86-64/mm/mmap.c
+ *
+ *  flexible mmap layout support
+ *
+ * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ *
+ * Started by Ingo Molnar <mingo@elte.hu>
  */
+
+#include <linux/personality.h>
 #include <linux/mm.h>
-#include <linux/sched.h>
 #include <linux/random.h>
-#include <asm/ia32.h>
 
-/* Notebook: move the mmap code from sys_x86_64.c over here. */
+/*
+ * Top of mmap area (just below the process stack).
+ *
+ * Leave an at least ~128 MB hole.
+ */
+#define MIN_GAP (128*1024*1024)
+#define MAX_GAP (TASK_SIZE/6*5)
+
+static inline unsigned long mmap_base(void)
+{
+	unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur;
+
+	if (gap < MIN_GAP)
+		gap = MIN_GAP;
+	else if (gap > MAX_GAP)
+		gap = MAX_GAP;
+
+	return TASK_SIZE - (gap & PAGE_MASK);
+}
 
+static inline int mmap_is_legacy(void)
+{
+	/*
+	 * Force standard allocation for 64 bit programs.
+	 */
+	if (!test_thread_flag(TIF_IA32))
+		return 1;
+
+	if (current->personality & ADDR_COMPAT_LAYOUT)
+		return 1;
+
+	if (current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY)
+		return 1;
+
+	return sysctl_legacy_va_layout;
+}
+
+/*
+ * This function, called very early during the creation of a new
+ * process VM image, sets up which VM layout function to use:
+ */
 void arch_pick_mmap_layout(struct mm_struct *mm)
 {
-#ifdef CONFIG_IA32_EMULATION
-	if (current_thread_info()->flags & _TIF_IA32)
-		return ia32_pick_mmap_layout(mm);
-#endif
-	mm->mmap_base = TASK_UNMAPPED_BASE;
-	if (current->flags & PF_RANDOMIZE) {
-		/* Add 28bit randomness which is about 40bits of address space
-		   because mmap base has to be page aligned.
- 		   or ~1/128 of the total user VM
-	   	   (total user address space is 47bits) */
-		unsigned rnd = get_random_int() & 0xfffffff;
-		mm->mmap_base += ((unsigned long)rnd) << PAGE_SHIFT;
+	/*
+	 * Fall back to the standard layout if the personality
+	 * bit is set, or if the expected stack growth is unlimited:
+	 */
+	if (mmap_is_legacy()) {
+		mm->mmap_base = TASK_UNMAPPED_BASE;
+		mm->get_unmapped_area = arch_get_unmapped_area;
+		mm->unmap_area = arch_unmap_area;
+	} else {
+		mm->mmap_base = mmap_base();
+		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
+		mm->unmap_area = arch_unmap_area_topdown;
 	}
-	mm->get_unmapped_area = arch_get_unmapped_area;
-	mm->unmap_area = arch_unmap_area;
 }
-
diff -urNp --exclude-from=/home/davej/.exclude linux-802/drivers/char/random.c linux-810/drivers/char/random.c
--- linux-802/drivers/char/random.c
+++ linux-810/drivers/char/random.c
@@ -1630,13 +1630,18 @@ EXPORT_SYMBOL(secure_dccp_sequence_numbe
  */
 unsigned int get_random_int(void)
 {
+	unsigned int val = 0;
+
+#ifdef CONFIG_X86_HAS_TSC
+	rdtscl(val);
+#endif
 	/*
 	 * Use IP's RNG. It suits our purpose perfectly: it re-keys itself
 	 * every second, from the entropy pool (and thus creates a limited
 	 * drain on it), and uses halfMD4Transform within the second. We
 	 * also mix it with jiffies and the PID:
 	 */
-	return secure_ip_id(current->pid + jiffies);
+	return secure_ip_id(current->pid + jiffies + (int)val);
 }
 
 /*
diff -urNp --exclude-from=/home/davej/.exclude linux-802/fs/binfmt_elf.c linux-810/fs/binfmt_elf.c
--- linux-802/fs/binfmt_elf.c
+++ linux-810/fs/binfmt_elf.c
@@ -45,7 +45,7 @@
 
 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
 static int load_elf_library(struct file *);
-static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int);
+static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int, unsigned long);
 extern int dump_fpu (struct pt_regs *, elf_fpregset_t *);
 
 #ifndef elf_addr_t
@@ -84,7 +84,7 @@ static struct linux_binfmt elf_format = 
 		.min_coredump	= ELF_EXEC_PAGESIZE
 };
 
-#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
+#define BAD_ADDR(x) ((unsigned long)(x) >= PAGE_MASK)
 
 static int set_brk(unsigned long start, unsigned long end)
 {
@@ -286,33 +286,71 @@ create_elf_tables(struct linux_binprm *b
 #ifndef elf_map
 
 static unsigned long elf_map(struct file *filep, unsigned long addr,
-		struct elf_phdr *eppnt, int prot, int type)
+		struct elf_phdr *eppnt, int prot, int type,
+		unsigned long total_size)
 {
 	unsigned long map_addr;
-	unsigned long pageoffset = ELF_PAGEOFFSET(eppnt->p_vaddr);
+	unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
+	unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
+
+	addr = ELF_PAGESTART(addr);
+	size = ELF_PAGEALIGN(size);
 
-	down_write(&current->mm->mmap_sem);
 	/* mmap() will return -EINVAL if given a zero size, but a
 	 * segment with zero filesize is perfectly valid */
-	if (eppnt->p_filesz + pageoffset)
-		map_addr = do_mmap(filep, ELF_PAGESTART(addr),
-				   eppnt->p_filesz + pageoffset, prot, type,
-				   eppnt->p_offset - pageoffset);
-	else
-		map_addr = ELF_PAGESTART(addr);
+	if (!size)
+		return addr;
+
+	down_write(&current->mm->mmap_sem);
+	/*
+	* total_size is the size of the ELF (interpreter) image.
+	* The _first_ mmap needs to know the full size, otherwise
+	* randomization might put this image into an overlapping
+	* position with the ELF binary image. (since size < total_size)
+	* So we first map the 'big' image - and unmap the remainder at
+	* the end. (which unmap is needed for ELF images with holes.)
+	*/
+	if (total_size) {
+		total_size = ELF_PAGEALIGN(total_size);
+		map_addr = do_mmap(filep, addr, total_size, prot, type, off);
+		if (!BAD_ADDR(map_addr))
+			do_munmap(current->mm, map_addr+size, total_size-size);
+	} else
+		map_addr = do_mmap(filep, addr, size, prot, type, off);
+
 	up_write(&current->mm->mmap_sem);
 	return(map_addr);
 }
 
 #endif /* !elf_map */
 
+static inline unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
+{
+	int i, first_idx = -1, last_idx = -1;
+
+	for (i = 0; i < nr; i++)
+		if (cmds[i].p_type == PT_LOAD) {
+			last_idx = i;
+			if (first_idx == -1)
+				first_idx = i;
+		}
+
+	if (first_idx == -1)
+		return 0;
+
+	return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
+				ELF_PAGESTART(cmds[first_idx].p_vaddr);
+}
+
+
 /* This is much more generalized than the library routine read function,
    so we keep this separate.  Technically the library read function
    is only provided so that we can read a.out libraries that have
    an ELF header */
 
 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
-		struct file *interpreter, unsigned long *interp_load_addr)
+		struct file *interpreter, unsigned long *interp_map_addr,
+		unsigned long no_base)
 {
 	struct elf_phdr *elf_phdata;
 	struct elf_phdr *eppnt;
@@ -320,6 +358,7 @@ static unsigned long load_elf_interp(str
 	int load_addr_set = 0;
 	unsigned long last_bss = 0, elf_bss = 0;
 	unsigned long error = ~0UL;
+	unsigned long total_size;
 	int retval, i, size;
 
 	/* First of all, some simple consistency checks */
@@ -358,6 +397,10 @@ static unsigned long load_elf_interp(str
 		goto out_close;
 	}
 
+	total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
+	if (!total_size)
+		goto out_close;
+
 	eppnt = elf_phdata;
 	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
 		if (eppnt->p_type == PT_LOAD) {
@@ -375,9 +418,14 @@ static unsigned long load_elf_interp(str
 			vaddr = eppnt->p_vaddr;
 			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
 				elf_type |= MAP_FIXED;
+			else if (no_base && interp_elf_ex->e_type == ET_DYN)
+				load_addr = -vaddr;
 
 			map_addr = elf_map(interpreter, load_addr + vaddr,
-					   eppnt, elf_prot, elf_type);
+					   eppnt, elf_prot, elf_type, total_size);
+			total_size = 0;
+			if (!*interp_map_addr)
+				*interp_map_addr = map_addr;
 			error = map_addr;
 			if (BAD_ADDR(map_addr))
 				goto out_close;
@@ -443,8 +491,7 @@ static unsigned long load_elf_interp(str
 			goto out_close;
 	}
 
-	*interp_load_addr = load_addr;
-	error = ((unsigned long)interp_elf_ex->e_entry) + load_addr;
+	error = load_addr;
 
 out_close:
 	kfree(elf_phdata);
@@ -540,12 +587,12 @@ static int load_elf_binary(struct linux_
 	int elf_exec_fileno;
 	int retval, i;
 	unsigned int size;
-	unsigned long elf_entry, interp_load_addr = 0;
+	unsigned long elf_entry, interp_load_addr = 0, interp_map_addr = 0;
 	unsigned long start_code, end_code, start_data, end_data;
 	unsigned long reloc_func_desc = 0;
 	char passed_fileno[6];
 	struct files_struct *files;
-	int have_pt_gnu_stack, executable_stack = EXSTACK_DEFAULT;
+	int have_pt_gnu_stack, executable_stack;
 	unsigned long def_flags = 0;
 	struct {
 		struct elfhdr elf_ex;
@@ -700,6 +747,8 @@ static int load_elf_binary(struct linux_
 	}
 
 	elf_ppnt = elf_phdata;
+	executable_stack = EXSTACK_DEFAULT;
+
 	for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
 		if (elf_ppnt->p_type == PT_GNU_STACK) {
 			if (elf_ppnt->p_flags & PF_X)
@@ -710,6 +759,11 @@ static int load_elf_binary(struct linux_
 		}
 	have_pt_gnu_stack = (i < loc->elf_ex.e_phnum);
 
+	if (current->personality == PER_LINUX && (exec_shield & 2)) {
+		executable_stack = EXSTACK_DISABLE_X;
+		current->flags |= PF_RANDOMIZE;
+	}
+
 	/* Some simple consistency checks for the interpreter */
 	if (elf_interpreter) {
 		interpreter_type = INTERPRETER_ELF | INTERPRETER_AOUT;
@@ -762,6 +816,15 @@ static int load_elf_binary(struct linux_
 	if (retval)
 		goto out_free_dentry;
 
+#ifdef __i386__
+	/*
+	 * Turn off the CS limit completely if exec-shield disabled or
+	 * NX active:
+	 */
+	if (!exec_shield || executable_stack != EXSTACK_DISABLE_X || nx_enabled)
+		arch_add_exec_range(current->mm, -1);
+#endif
+
 	/* Discard our unneeded old files struct */
 	if (files) {
 		put_files_struct(files);
@@ -779,7 +842,8 @@ static int load_elf_binary(struct linux_
 	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
 	   may depend on the personality.  */
 	SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
-	if (elf_read_implies_exec(loc->elf_ex, executable_stack))
+	if (!(exec_shield & 2) &&
+			elf_read_implies_exec(loc->elf_ex, executable_stack))
 		current->personality |= READ_IMPLIES_EXEC;
 
 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
@@ -800,9 +864,8 @@ static int load_elf_binary(struct linux_
 	current->mm->start_stack = bprm->p;
 
 	/* Now we do a little grungy work by mmaping the ELF image into
-	   the correct location in memory.  At this point, we assume that
-	   the image should be loaded at fixed address, not at a variable
-	   address. */
+	   the correct location in memory.
+	 */
 	for(i = 0, elf_ppnt = elf_phdata;
 	    i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
 		int elf_prot = 0, elf_flags;
@@ -849,18 +912,17 @@ static int load_elf_binary(struct linux_
 		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
 
 		vaddr = elf_ppnt->p_vaddr;
-		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
+		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set)
 			elf_flags |= MAP_FIXED;
-		} else if (loc->elf_ex.e_type == ET_DYN) {
-			/* Try and get dynamic programs out of the way of the
-			 * default mmap base, as well as whatever program they
-			 * might try to exec.  This is because the brk will
-			 * follow the loader, and is not movable.  */
+		else if (loc->elf_ex.e_type == ET_DYN)
+#ifdef __i386__
+			load_bias = 0;
+#else
 			load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
-		}
+#endif
 
 		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
-				elf_prot, elf_flags);
+				elf_prot, elf_flags, 0);
 		if (BAD_ADDR(error)) {
 			send_sig(SIGKILL, current, 0);
 			goto out_free_dentry;
@@ -936,10 +998,17 @@ static int load_elf_binary(struct linux_
 		if (interpreter_type == INTERPRETER_AOUT)
 			elf_entry = load_aout_interp(&loc->interp_ex,
 						     interpreter);
-		else
+		else {
 			elf_entry = load_elf_interp(&loc->interp_elf_ex,
 						    interpreter,
-						    &interp_load_addr);
+						    &interp_map_addr,
+						    load_bias);
+			if (!BAD_ADDR(elf_entry)) {
+				/* load_elf_interp() returns relocation adjustment */
+				interp_load_addr = elf_entry;
+				elf_entry += loc->interp_elf_ex.e_entry;
+			}
+		}
 		if (BAD_ADDR(elf_entry)) {
 			force_sig(SIGSEGV, current);
 			retval = IS_ERR((void *)elf_entry) ?
@@ -960,21 +1029,22 @@ static int load_elf_binary(struct linux_
 		}
 	}
 
-	kfree(elf_phdata);
-
 	if (interpreter_type != INTERPRETER_AOUT)
 		sys_close(elf_exec_fileno);
 
 	set_binfmt(&elf_format);
 
 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
-	retval = arch_setup_additional_pages(bprm, executable_stack);
+	retval = arch_setup_additional_pages(bprm, executable_stack,
+			start_code, interp_map_addr);
 	if (retval < 0) {
 		send_sig(SIGKILL, current, 0);
-		goto out;
+		goto out_free_fh;
 	}
 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
 
+	kfree(elf_phdata);
+
 	compute_creds(bprm);
 	current->flags &= ~PF_FORKNOEXEC;
 	create_elf_tables(bprm, &loc->elf_ex,
@@ -989,6 +1059,10 @@ static int load_elf_binary(struct linux_
 	current->mm->end_data = end_data;
 	current->mm->start_stack = bprm->p;
 
+#ifdef __HAVE_ARCH_RANDOMIZE_BRK
+	if (current->flags & PF_RANDOMIZE)
+		randomize_brk(elf_brk);
+#endif
 	if (current->personality & MMAP_PAGE_ZERO) {
 		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
 		   and some applications "depend" upon this behavior.
@@ -1168,6 +1242,9 @@ static int maydump(struct vm_area_struct
 	if (vma->vm_flags & (VM_IO | VM_RESERVED))
 		return 0;
 
+	if (vma->vm_flags & VM_DONTEXPAND) /* Kludge for vDSO.  */
+		return 1;
+
 	/* Dump shared memory only if mapped from an anonymous file. */
 	if (vma->vm_flags & VM_SHARED)
 		return vma->vm_file->f_dentry->d_inode->i_nlink == 0;
diff -urNp --exclude-from=/home/davej/.exclude linux-802/fs/proc/array.c linux-810/fs/proc/array.c
--- linux-802/fs/proc/array.c
+++ linux-810/fs/proc/array.c
@@ -397,8 +397,12 @@ static int do_task_stat(struct task_stru
 	ppid = pid_alive(task) ? task->group_leader->parent->tgid : 0;
 	read_unlock(&tasklist_lock);
 
-	if (!whole || num_threads<2)
-		wchan = get_wchan(task);
+	if (!whole || num_threads<2) {
+		wchan = 0;
+		if (current->uid == task->uid || current->euid == task->uid ||
+				capable(CAP_SYS_NICE))
+			wchan = get_wchan(task);
+	}
 	if (!whole) {
 		min_flt = task->min_flt;
 		maj_flt = task->maj_flt;
diff -urNp --exclude-from=/home/davej/.exclude linux-802/fs/proc/base.c linux-810/fs/proc/base.c
--- linux-802/fs/proc/base.c
+++ linux-810/fs/proc/base.c
@@ -208,7 +208,7 @@ static struct pid_entry tgid_base_stuff[
 	E(PROC_TGID_CMDLINE,   "cmdline", S_IFREG|S_IRUGO),
 	E(PROC_TGID_STAT,      "stat",    S_IFREG|S_IRUGO),
 	E(PROC_TGID_STATM,     "statm",   S_IFREG|S_IRUGO),
-	E(PROC_TGID_MAPS,      "maps",    S_IFREG|S_IRUGO),
+	E(PROC_TGID_MAPS,      "maps",    S_IFREG|S_IRUSR),
 #ifdef CONFIG_NUMA
 	E(PROC_TGID_NUMA_MAPS, "numa_maps", S_IFREG|S_IRUGO),
 #endif
@@ -222,7 +222,7 @@ static struct pid_entry tgid_base_stuff[
 	E(PROC_TGID_MOUNTS,    "mounts",  S_IFREG|S_IRUGO),
 	E(PROC_TGID_MOUNTSTATS, "mountstats", S_IFREG|S_IRUSR),
 #ifdef CONFIG_MMU
-	E(PROC_TGID_SMAPS,     "smaps",   S_IFREG|S_IRUGO),
+	E(PROC_TGID_SMAPS,     "smaps",   S_IFREG|S_IRUSR),
 #endif
 #ifdef CONFIG_SECURITY
 	E(PROC_TGID_ATTR,      "attr",    S_IFDIR|S_IRUGO|S_IXUGO),
@@ -251,7 +251,7 @@ static struct pid_entry tid_base_stuff[]
 	E(PROC_TID_CMDLINE,    "cmdline", S_IFREG|S_IRUGO),
 	E(PROC_TID_STAT,       "stat",    S_IFREG|S_IRUGO),
 	E(PROC_TID_STATM,      "statm",   S_IFREG|S_IRUGO),
-	E(PROC_TID_MAPS,       "maps",    S_IFREG|S_IRUGO),
+	E(PROC_TID_MAPS,       "maps",    S_IFREG|S_IRUSR),
 #ifdef CONFIG_NUMA
 	E(PROC_TID_NUMA_MAPS,  "numa_maps",    S_IFREG|S_IRUGO),
 #endif
@@ -264,7 +264,7 @@ static struct pid_entry tid_base_stuff[]
 	E(PROC_TID_EXE,        "exe",     S_IFLNK|S_IRWXUGO),
 	E(PROC_TID_MOUNTS,     "mounts",  S_IFREG|S_IRUGO),
 #ifdef CONFIG_MMU
-	E(PROC_TID_SMAPS,      "smaps",   S_IFREG|S_IRUGO),
+	E(PROC_TID_SMAPS,      "smaps",   S_IFREG|S_IRUSR),
 #endif
 #ifdef CONFIG_SECURITY
 	E(PROC_TID_ATTR,       "attr",    S_IFDIR|S_IRUGO|S_IXUGO),
diff -urNp --exclude-from=/home/davej/.exclude linux-802/fs/proc/task_mmu.c linux-810/fs/proc/task_mmu.c
--- linux-802/fs/proc/task_mmu.c
+++ linux-810/fs/proc/task_mmu.c
@@ -43,7 +43,11 @@ char *task_mem(struct mm_struct *mm, cha
 		"VmStk:\t%8lu kB\n"
 		"VmExe:\t%8lu kB\n"
 		"VmLib:\t%8lu kB\n"
-		"VmPTE:\t%8lu kB\n",
+		"VmPTE:\t%8lu kB\n"
+		"StaBrk:\t%08lx kB\n"
+		"Brk:\t%08lx kB\n"
+		"StaStk:\t%08lx kB\n"
+		,
 		hiwater_vm << (PAGE_SHIFT-10),
 		(total_vm - mm->reserved_vm) << (PAGE_SHIFT-10),
 		mm->locked_vm << (PAGE_SHIFT-10),
@@ -51,7 +55,13 @@ char *task_mem(struct mm_struct *mm, cha
 		total_rss << (PAGE_SHIFT-10),
 		data << (PAGE_SHIFT-10),
 		mm->stack_vm << (PAGE_SHIFT-10), text, lib,
-		(PTRS_PER_PTE*sizeof(pte_t)*mm->nr_ptes) >> 10);
+		(PTRS_PER_PTE*sizeof(pte_t)*mm->nr_ptes) >> 10,
+		mm->start_brk, mm->brk, mm->start_stack);
+#ifdef __i386__
+	if (!nx_enabled)
+		buffer += sprintf(buffer,
+			"ExecLim:\t%08lx\n", mm->context.exec_limit);
+#endif
 	return buffer;
 }
 
@@ -130,6 +140,9 @@ __attribute__((weak)) const char *arch_v
 static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss)
 {
 	struct proc_maps_private *priv = m->private;
 	struct task_struct *task = priv->task;
+#ifdef __i386__
+	struct mm_struct *tmm = get_task_mm(task);
+#endif
 	struct vm_area_struct *vma = v;
 	struct mm_struct *mm = vma->vm_mm;
@@ -150,10 +163,20 @@ static int show_map_internal(struct seq_
 			vma->vm_end,
 			flags & VM_READ ? 'r' : '-',
 			flags & VM_WRITE ? 'w' : '-',
-			flags & VM_EXEC ? 'x' : '-',
+			(flags & VM_EXEC
+#ifdef __i386__
+				|| (!nx_enabled && tmm &&
+				(vma->vm_start < tmm->context.exec_limit))
+#endif
+			)
+				? 'x' : '-',
 			flags & VM_MAYSHARE ? 's' : 'p',
 			vma->vm_pgoff << PAGE_SHIFT,
 			MAJOR(dev), MINOR(dev), ino, &len);
+#ifdef __i386__
+	if (tmm)
+		mmput(tmm);
+#endif
 
 	/*
 	 * Print the dentry name for named mappings, and a
diff -urNp --exclude-from=/home/davej/.exclude linux-802/include/asm-i386/a.out.h linux-810/include/asm-i386/a.out.h
--- linux-802/include/asm-i386/a.out.h
+++ linux-810/include/asm-i386/a.out.h
@@ -19,7 +19,7 @@ struct exec
 
 #ifdef __KERNEL__
 
-#define STACK_TOP	TASK_SIZE
+#define STACK_TOP	(TASK_SIZE - PAGE_SIZE)  /* 1 page for vdso */
 
 #endif
 
diff -urNp --exclude-from=/home/davej/.exclude linux-802/include/asm-i386/desc.h linux-810/include/asm-i386/desc.h
--- linux-802/include/asm-i386/desc.h
+++ linux-810/include/asm-i386/desc.h
@@ -162,6 +162,20 @@ static inline unsigned long get_desc_bas
 	return base;
 }
 
+static inline void set_user_cs(struct desc_struct *desc, unsigned long limit)
+{
+	limit = (limit - 1) / PAGE_SIZE;
+	desc->a = limit & 0xffff;
+	desc->b = (limit & 0xf0000) | 0x00c0fb00;
+}
+
+#define load_user_cs_desc(cpu, mm) \
+	get_cpu_gdt_table(cpu)[GDT_ENTRY_DEFAULT_USER_CS] = (mm)->context.user_cs
+
+extern void arch_add_exec_range(struct mm_struct *mm, unsigned long limit);
+extern void arch_remove_exec_range(struct mm_struct *mm, unsigned long limit);
+extern void arch_flush_exec_range(struct mm_struct *mm);
+
 #endif /* !__ASSEMBLY__ */
 
 #endif
diff -urNp --exclude-from=/home/davej/.exclude linux-802/include/asm-i386/elf.h linux-810/include/asm-i386/elf.h
--- linux-802/include/asm-i386/elf.h
+++ linux-810/include/asm-i386/elf.h
@@ -159,7 +159,8 @@ extern void __kernel_vsyscall;
 #define ARCH_HAS_SETUP_ADDITIONAL_PAGES
 struct linux_binprm;
 extern int arch_setup_additional_pages(struct linux_binprm *bprm,
-                                       int executable_stack);
+                                       int executable_stack, unsigned long start_code,
+                                       unsigned long interp_map_address);
 
 extern unsigned int vdso_enabled;
 
@@ -215,4 +216,7 @@ do {									      \
 
 #endif
 
+#define __HAVE_ARCH_RANDOMIZE_BRK
+extern void randomize_brk(unsigned long old_brk);
+
 #endif
diff -urNp --exclude-from=/home/davej/.exclude linux-802/include/asm-i386/mmu.h linux-810/include/asm-i386/mmu.h
--- linux-802/include/asm-i386/mmu.h
+++ linux-810/include/asm-i386/mmu.h
@@ -7,11 +7,15 @@
  * we put the segment information here.
  *
  * cpu_vm_mask is used to optimize ldt flushing.
+ * exec_limit is used to track the range PROT_EXEC
+ * mappings span.
  */
 typedef struct { 
 	int size;
 	struct semaphore sem;
 	void *ldt;
+	struct desc_struct user_cs;
+	unsigned long exec_limit;
 	void *vdso;
 } mm_context_t;
 
diff -urNp --exclude-from=/home/davej/.exclude linux-802/include/asm-i386/page.h linux-810/include/asm-i386/page.h
--- linux-802/include/asm-i386/page.h
+++ linux-810/include/asm-i386/page.h
@@ -119,6 +119,11 @@ extern int page_is_ram(unsigned long pag
 #endif
 #define __KERNEL_START		(__PAGE_OFFSET + __PHYSICAL_START)
 
+/*
+ * Under exec-shield we don't use the generic fixmap gate area.
+ * The vDSO ("gate area") has a normal vma found the normal ways.
+ */
+#define __HAVE_ARCH_GATE_AREA	1
 
 #define PAGE_OFFSET		((unsigned long)__PAGE_OFFSET)
 #define VMALLOC_RESERVE		((unsigned long)__VMALLOC_RESERVE)
diff -urNp --exclude-from=/home/davej/.exclude linux-802/include/asm-i386/pgalloc.h linux-810/include/asm-i386/pgalloc.h
--- linux-802/include/asm-i386/pgalloc.h
+++ linux-810/include/asm-i386/pgalloc.h
@@ -2,6 +2,7 @@
 #define _I386_PGALLOC_H
 
 #include <asm/fixmap.h>
+#include <asm/desc.h>
 #include <linux/threads.h>
 #include <linux/mm.h>		/* for struct page */
 
diff -urNp --exclude-from=/home/davej/.exclude linux-802/include/asm-i386/processor.h linux-810/include/asm-i386/processor.h
--- linux-802/include/asm-i386/processor.h
+++ linux-810/include/asm-i386/processor.h
@@ -330,7 +330,10 @@ extern int bootloader_type;
 /* This decides where the kernel will search for a free chunk of vm
  * space during mmap's.
  */
-#define TASK_UNMAPPED_BASE	(PAGE_ALIGN(TASK_SIZE / 3))
+#define TASK_UNMAPPED_BASE	PAGE_ALIGN(TASK_SIZE/3)
+
+#define __HAVE_ARCH_ALIGN_STACK
+extern unsigned long arch_align_stack(unsigned long sp);
 
 #define HAVE_ARCH_PICK_MMAP_LAYOUT
 
@@ -512,6 +515,9 @@ static inline void load_esp0(struct tss_
 	regs->xcs = __USER_CS;					\
 	regs->eip = new_eip;					\
 	regs->esp = new_esp;					\
+	preempt_disable();					\
+	load_user_cs_desc(smp_processor_id(), current->mm);	\
+	preempt_enable();					\
 } while (0)
 
 /*
diff -urNp --exclude-from=/home/davej/.exclude linux-802/include/asm-ia64/pgalloc.h linux-810/include/asm-ia64/pgalloc.h
--- linux-802/include/asm-ia64/pgalloc.h
+++ linux-810/include/asm-ia64/pgalloc.h
@@ -1,6 +1,10 @@
 #ifndef _ASM_IA64_PGALLOC_H
 #define _ASM_IA64_PGALLOC_H
 
+#define arch_add_exec_range(mm, limit)		do { ; } while (0)
+#define arch_flush_exec_range(mm)		do { ; } while (0)
+#define arch_remove_exec_range(mm, limit)	do { ; } while (0)
+
 /*
  * This file contains the functions and defines necessary to allocate
  * page tables.
diff -urNp --exclude-from=/home/davej/.exclude linux-802/include/asm-powerpc/elf.h linux-810/include/asm-powerpc/elf.h
--- linux-802/include/asm-powerpc/elf.h
+++ linux-810/include/asm-powerpc/elf.h
@@ -272,7 +272,8 @@ extern int ucache_bsize;
 #define ARCH_HAS_SETUP_ADDITIONAL_PAGES
 struct linux_binprm;
 extern int arch_setup_additional_pages(struct linux_binprm *bprm,
-				       int executable_stack);
+				       int executable_stack, unsigned long start_code,
+					   unsigned long interp_map_address);
 #define VDSO_AUX_ENT(a,b) NEW_AUX_ENT(a,b);
 
 /*
diff -urNp --exclude-from=/home/davej/.exclude linux-802/include/asm-powerpc/pgalloc.h linux-810/include/asm-powerpc/pgalloc.h
--- linux-802/include/asm-powerpc/pgalloc.h
+++ linux-810/include/asm-powerpc/pgalloc.h
@@ -26,6 +26,11 @@ extern kmem_cache_t *pgtable_cache[];
 #define HUGEPTE_CACHE_NUM 2
 #endif
 
+/* Dummy functions since we don't support execshield on ppc */
+#define arch_add_exec_range(mm, limit) do { ; } while (0)
+#define arch_flush_exec_range(mm)      do { ; } while (0)
+#define arch_remove_exec_range(mm, limit) do { ; } while (0)
+
 /*
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
diff -urNp --exclude-from=/home/davej/.exclude linux-802/include/asm-ppc/pgalloc.h linux-810/include/asm-ppc/pgalloc.h
--- linux-802/include/asm-ppc/pgalloc.h
+++ linux-810/include/asm-ppc/pgalloc.h
@@ -39,5 +39,10 @@ extern void pte_free(struct page *pte);
 
 #define check_pgt_cache()	do { } while (0)
 
+#define arch_add_exec_range(mm, limit)         do { ; } while (0)
+#define arch_flush_exec_range(mm)              do { ; } while (0)
+#define arch_remove_exec_range(mm, limit)      do { ; } while (0)
+
+
 #endif /* _PPC_PGALLOC_H */
 #endif /* __KERNEL__ */
diff -urNp --exclude-from=/home/davej/.exclude linux-802/include/asm-s390/pgalloc.h linux-810/include/asm-s390/pgalloc.h
--- linux-802/include/asm-s390/pgalloc.h
+++ linux-810/include/asm-s390/pgalloc.h
@@ -17,6 +17,10 @@
 #include <linux/gfp.h>
 #include <linux/mm.h>
 
+#define arch_add_exec_range(mm, limit) do { ; } while (0)
+#define arch_flush_exec_range(mm)      do { ; } while (0)
+#define arch_remove_exec_range(mm, limit) do { ; } while (0)
+
 #define check_pgt_cache()	do {} while (0)
 
 extern void diag10(unsigned long addr);
diff -urNp --exclude-from=/home/davej/.exclude linux-802/include/asm-sparc/pgalloc.h linux-810/include/asm-sparc/pgalloc.h
--- linux-802/include/asm-sparc/pgalloc.h
+++ linux-810/include/asm-sparc/pgalloc.h
@@ -65,4 +65,8 @@ BTFIXUPDEF_CALL(void, pte_free, struct p
 #define pte_free(pte)		BTFIXUP_CALL(pte_free)(pte)
 #define __pte_free_tlb(tlb, pte)	pte_free(pte)
 
+#define arch_add_exec_range(mm, limit)		do { ; } while (0)
+#define arch_flush_exec_range(mm)		do { ; } while (0)
+#define arch_remove_exec_range(mm, limit)	do { ; } while (0)
+
 #endif /* _SPARC_PGALLOC_H */
diff -urNp --exclude-from=/home/davej/.exclude linux-802/include/asm-sparc64/pgalloc.h linux-810/include/asm-sparc64/pgalloc.h
--- linux-802/include/asm-sparc64/pgalloc.h
+++ linux-810/include/asm-sparc64/pgalloc.h
@@ -68,4 +68,8 @@ static inline void pte_free(struct page 
 
 #define check_pgt_cache()	do { } while (0)
 
+#define arch_add_exec_range(mm, limit)		do { ; } while (0)
+#define arch_flush_exec_range(mm)		do { ; } while (0)
+#define arch_remove_exec_range(mm, limit)	do { ; } while (0)
+
 #endif /* _SPARC64_PGALLOC_H */
diff -urNp --exclude-from=/home/davej/.exclude linux-802/include/asm-x86_64/pgalloc.h linux-810/include/asm-x86_64/pgalloc.h
--- linux-802/include/asm-x86_64/pgalloc.h
+++ linux-810/include/asm-x86_64/pgalloc.h
@@ -6,6 +6,13 @@
 #include <linux/threads.h>
 #include <linux/mm.h>
 
+#define arch_add_exec_range(mm, limit) \
+		do { (void)(mm), (void)(limit); } while (0)
+#define arch_flush_exec_range(mm) \
+		do { (void)(mm); } while (0)
+#define arch_remove_exec_range(mm, limit) \
+		do { (void)(mm), (void)(limit); } while (0)
+
 #define pmd_populate_kernel(mm, pmd, pte) \
 		set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte)))
 #define pud_populate(mm, pud, pmd) \
diff -urNp --exclude-from=/home/davej/.exclude linux-802/include/asm-x86_64/pgtable.h linux-810/include/asm-x86_64/pgtable.h
--- linux-802/include/asm-x86_64/pgtable.h
+++ linux-810/include/asm-x86_64/pgtable.h
@@ -21,7 +21,7 @@ extern unsigned long __supported_pte_mas
 
 #define swapper_pg_dir init_level4_pgt
 
-extern int nonx_setup(char *str);
+extern void nonx_setup(const char *str);
 extern void paging_init(void);
 extern void clear_kernel_mapping(unsigned long addr, unsigned long size);
 
diff -urNp --exclude-from=/home/davej/.exclude linux-802/include/linux/mm.h linux-810/include/linux/mm.h
--- linux-802/include/linux/mm.h
+++ linux-810/include/linux/mm.h
@@ -930,7 +930,19 @@ extern struct vm_area_struct *copy_vma(s
 extern void exit_mmap(struct mm_struct *);
 extern int may_expand_vm(struct mm_struct *mm, unsigned long npages);
 
-extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
+extern unsigned long get_unmapped_area_prot(struct file *, unsigned long, unsigned long, unsigned long, unsigned long, int);
+
+
+static inline unsigned long get_unmapped_area(struct file * file, unsigned long addr,
+		unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+	return get_unmapped_area_prot(file, addr, len, pgoff, flags, 0);
+}
+
+extern int install_special_mapping(struct mm_struct *mm,
+				   unsigned long addr, unsigned long len,
+				   unsigned long vm_flags, pgprot_t pgprot,
+				   struct page **pages);
 
 extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
 	unsigned long len, unsigned long prot,
@@ -982,7 +994,7 @@ unsigned long page_cache_readahead(struc
 			  struct file *filp,
 			  pgoff_t offset,
 			  unsigned long size);
-void handle_ra_miss(struct address_space *mapping, 
+void handle_ra_miss(struct address_space *mapping,
 		    struct file_ra_state *ra, pgoff_t offset);
 unsigned long max_sane_readahead(unsigned long nr);
 
diff -urNp --exclude-from=/home/davej/.exclude linux-802/include/linux/resource.h linux-810/include/linux/resource.h
--- linux-802/include/linux/resource.h
+++ linux-810/include/linux/resource.h
@@ -54,8 +54,11 @@ struct rlimit {
 /*
  * Limit the stack by to some sane default: root can always
  * increase this limit if needed..  8MB seems reasonable.
+ *
+ * (2MB more to cover randomization effects.)
  */
-#define _STK_LIM	(8*1024*1024)
+#define _STK_LIM	(10*1024*1024)
+#define EXEC_STACK_BIAS	(2*1024*1024)
 
 /*
  * GPG wants 32kB of mlocked memory, to make sure pass phrases
diff -urNp --exclude-from=/home/davej/.exclude linux-802/include/linux/sched.h linux-810/include/linux/sched.h
--- linux-802/include/linux/sched.h
+++ linux-810/include/linux/sched.h
@@ -85,6 +85,8 @@ struct sched_param {
 
 struct exec_domain;
 struct futex_pi_state;
+extern int exec_shield;
+extern int print_fatal_signals;
 
 /*
  * List of flags we want to share for kernel threads,
@@ -250,6 +252,10 @@ extern int sysctl_max_map_count;
 extern unsigned long
 arch_get_unmapped_area(struct file *, unsigned long, unsigned long,
 		       unsigned long, unsigned long);
+
+extern unsigned long
+arch_get_unmapped_exec_area(struct file *, unsigned long, unsigned long,
+		       unsigned long, unsigned long);
 extern unsigned long
 arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
 			  unsigned long len, unsigned long pgoff,
@@ -302,6 +308,9 @@ struct mm_struct {
 	unsigned long (*get_unmapped_area) (struct file *filp,
 				unsigned long addr, unsigned long len,
 				unsigned long pgoff, unsigned long flags);
+	unsigned long (*get_unmapped_exec_area) (struct file *filp,
+				unsigned long addr, unsigned long len,
+				unsigned long pgoff, unsigned long flags);
 	void (*unmap_area) (struct mm_struct *mm, unsigned long addr);
 	unsigned long mmap_base;		/* base of mmap area */
 	unsigned long task_size;		/* size of task vm space */
diff -urNp --exclude-from=/home/davej/.exclude linux-802/include/linux/sysctl.h linux-810/include/linux/sysctl.h
--- linux-802/include/linux/sysctl.h
+++ linux-810/include/linux/sysctl.h
@@ -92,6 +92,9 @@ enum
 
 	KERN_CAP_BSET=14,	/* int: capability bounding set */
 	KERN_PANIC=15,		/* int: panic timeout */
+	KERN_EXEC_SHIELD=1000,	/* int: exec-shield enabled (0/1/2) */
+	KERN_PRINT_FATAL=1001,	/* int: print fatal signals (0/1/2) */
+	KERN_VDSO=1002,		/* int: VDSO enabled (0/1) */
 	KERN_REALROOTDEV=16,	/* real root device to mount after initrd */
 
 	KERN_SPARC_REBOOT=21,	/* reboot command on Sparc */
diff -urNp --exclude-from=/home/davej/.exclude linux-802/kernel/signal.c linux-810/kernel/signal.c
--- linux-802/kernel/signal.c
+++ linux-810/kernel/signal.c
@@ -759,6 +759,37 @@ out_set:
 #define LEGACY_QUEUE(sigptr, sig) \
 	(((sig) < SIGRTMIN) && sigismember(&(sigptr)->signal, (sig)))
 
+int print_fatal_signals = 0;
+
+static void print_fatal_signal(struct pt_regs *regs, int signr)
+{
+	printk("%s/%d: potentially unexpected fatal signal %d.\n",
+		current->comm, current->pid, signr);
+
+#ifdef __i386__
+	printk("code at %08lx: ", regs->eip);
+	{
+		int i;
+		for (i = 0; i < 16; i++) {
+			unsigned char insn;
+
+			__get_user(insn, (unsigned char *)(regs->eip + i));
+			printk("%02x ", insn);
+		}
+	}
+#endif
+	printk("\n");
+	show_regs(regs);
+}
+
+static int __init setup_print_fatal_signals(char *str)
+{
+	get_option (&str, &print_fatal_signals);
+
+	return 1;
+}
+
+__setup("print-fatal-signals=", setup_print_fatal_signals);
 
 static int
 specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t)
@@ -1753,6 +1784,8 @@ relock:
 		 * Anything else is fatal, maybe with a core dump.
 		 */
 		current->flags |= PF_SIGNALED;
+		if (print_fatal_signals)
+			print_fatal_signal(regs, signr);
 		if (sig_kernel_coredump(signr)) {
 			/*
 			 * If it was able to dump core, this kills all
diff -urNp --exclude-from=/home/davej/.exclude linux-802/kernel/sysctl.c linux-810/kernel/sysctl.c
--- linux-802/kernel/sysctl.c
+++ linux-810/kernel/sysctl.c
@@ -80,6 +80,29 @@ extern int proc_unknown_nmi_panic(ctl_ta
 				  void __user *, size_t *, loff_t *);
 #endif
 
+extern unsigned int vdso_enabled, vdso_populate;
+
+int exec_shield = (1<<3) | (1<<0);
+/* exec_shield is a bitmask:
+          0: off; vdso at STACK_TOP, 1 page below TASK_SIZE
+   (1<<0) 1: on [also on if !=0]
+   (1<<1) 2: noexecstack by default
+   (1<<2) 4: vdso just below .text of main (unless too low)
+   (1<<3) 8: vdso just below .text of PT_INTERP (unless too low)
+Yes, vdso placement is overloaded here; but exec_shield off
+is a strong incentive to place vdso at STACK_TOP, so the bit
+for vdso just below .text comes along for the ride.
+*/
+
+static int __init setup_exec_shield(char *str)
+{
+        get_option (&str, &exec_shield);
+
+        return 1;
+}
+
+__setup("exec-shield=", setup_exec_shield);
+
 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
 static int maxolduid = 65535;
 static int minolduid;
@@ -282,6 +305,40 @@ static ctl_table kern_table[] = {
 		.proc_handler	= &proc_dointvec,
 	},
 	{
+		.ctl_name	= KERN_EXEC_SHIELD,
+		.procname	= "exec-shield",
+		.data		= &exec_shield,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= KERN_PRINT_FATAL,
+		.procname	= "print-fatal-signals",
+		.data		= &print_fatal_signals,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+#ifdef __i386__
+	{
+		.ctl_name	= KERN_VDSO,
+		.procname	= "vdso",
+		.data		= &vdso_enabled,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= KERN_VDSO,
+		.procname	= "vdso_populate",
+		.data		= &vdso_populate,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+#endif
+	{
 		.ctl_name	= KERN_CORE_USES_PID,
 		.procname	= "core_uses_pid",
 		.data		= &core_uses_pid,
diff -urNp --exclude-from=/home/davej/.exclude linux-802/mm/fremap.c linux-810/mm/fremap.c
--- linux-802/mm/fremap.c
+++ linux-810/mm/fremap.c
@@ -67,13 +67,15 @@ int install_page(struct mm_struct *mm, s
 	 * caller about it.
 	 */
 	err = -EINVAL;
-	inode = vma->vm_file->f_mapping->host;
-	size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-	if (!page->mapping || page->index >= size)
-		goto unlock;
-	err = -ENOMEM;
-	if (page_mapcount(page) > INT_MAX/2)
-		goto unlock;
+	if (vma->vm_file) {
+		inode = vma->vm_file->f_mapping->host;
+		size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+		if (!page->mapping || page->index >= size)
+			goto unlock;
+		err = -ENOMEM;
+		if (page_mapcount(page) > INT_MAX/2)
+			goto unlock;
+	}
 
 	if (pte_none(*pte) || !zap_pte(mm, vma, addr, pte))
 		inc_mm_counter(mm, file_rss);
diff -urNp --exclude-from=/home/davej/.exclude linux-802/mm/mmap.c linux-810/mm/mmap.c
--- linux-802/mm/mmap.c
+++ linux-810/mm/mmap.c
@@ -25,6 +25,7 @@
 #include <linux/mount.h>
 #include <linux/mempolicy.h>
 #include <linux/rmap.h>
+#include <linux/random.h>
 
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
@@ -370,6 +371,8 @@ static inline void
 __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
 		struct vm_area_struct *prev, struct rb_node *rb_parent)
 {
+	if (vma->vm_flags & VM_EXEC)
+		arch_add_exec_range(mm, vma->vm_end);
 	if (prev) {
 		vma->vm_next = prev->vm_next;
 		prev->vm_next = vma;
@@ -473,6 +476,8 @@ __vma_unlink(struct mm_struct *mm, struc
 	rb_erase(&vma->vm_rb, &mm->mm_rb);
 	if (mm->mmap_cache == vma)
 		mm->mmap_cache = prev;
+	if (vma->vm_flags & VM_EXEC)
+		arch_remove_exec_range(mm, vma->vm_end);
 }
 
 /*
@@ -778,6 +783,8 @@ struct vm_area_struct *vma_merge(struct 
 		} else					/* cases 2, 5, 7 */
 			vma_adjust(prev, prev->vm_start,
 				end, prev->vm_pgoff, NULL);
+		if (prev->vm_flags & VM_EXEC)
+			arch_add_exec_range(mm, prev->vm_end);
 		return prev;
 	}
 
@@ -944,7 +951,7 @@ unsigned long do_mmap_pgoff(struct file 
 	/* Obtain the address to map to. we verify (or select) it and ensure
 	 * that it represents a valid section of the address space.
 	 */
-	addr = get_unmapped_area(file, addr, len, pgoff, flags);
+	addr = get_unmapped_area_prot(file, addr, len, pgoff, flags, prot & PROT_EXEC);
 	if (addr & ~PAGE_MASK)
 		return addr;
 
@@ -1356,16 +1363,21 @@ void arch_unmap_area_topdown(struct mm_s
 		mm->free_area_cache = mm->mmap_base;
 }
 
+
 unsigned long
-get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
-		unsigned long pgoff, unsigned long flags)
+get_unmapped_area_prot(struct file *file, unsigned long addr, unsigned long len,
+		unsigned long pgoff, unsigned long flags, int exec)
 {
 	unsigned long ret;
 
 	if (!(flags & MAP_FIXED)) {
 		unsigned long (*get_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
 
-		get_area = current->mm->get_unmapped_area;
+		if (exec && current->mm->get_unmapped_exec_area)
+			get_area = current->mm->get_unmapped_exec_area;
+		else
+			get_area = current->mm->get_unmapped_area;
+
 		if (file && file->f_op && file->f_op->get_unmapped_area)
 			get_area = file->f_op->get_unmapped_area;
 		addr = get_area(file, addr, len, pgoff, flags);
@@ -1396,7 +1408,71 @@ get_unmapped_area(struct file *file, uns
 	return addr;
 }
 
-EXPORT_SYMBOL(get_unmapped_area);
+EXPORT_SYMBOL(get_unmapped_area_prot);
+
+#define SHLIB_BASE             0x00111000
+
+unsigned long arch_get_unmapped_exec_area(struct file *filp, unsigned long addr0,
+		unsigned long len0, unsigned long pgoff, unsigned long flags)
+{
+	unsigned long addr = addr0, len = len0;
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	unsigned long tmp;
+
+	if (len > TASK_SIZE)
+		return -ENOMEM;
+
+	if (!addr && !(flags & MAP_FIXED))
+		addr = randomize_range(SHLIB_BASE, 0x01000000, len);
+
+	if (addr) {
+		addr = PAGE_ALIGN(addr);
+		vma = find_vma(mm, addr);
+		if (TASK_SIZE - len >= addr &&
+		    (!vma || addr + len <= vma->vm_start)) {
+			return addr;
+		}
+	}
+
+	addr = SHLIB_BASE;
+	for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
+		/* At this point:  (!vma || addr < vma->vm_end). */
+		if (TASK_SIZE - len < addr)
+			return -ENOMEM;
+
+		if (!vma || addr + len <= vma->vm_start) {
+			/*
+			 * Must not let a PROT_EXEC mapping get into the
+			 * brk area:
+			 */
+			if (addr + len > mm->brk)
+				goto failed;
+
+			/*
+			 * Up until the brk area we randomize addresses
+			 * as much as possible:
+			 */
+			if (addr >= 0x01000000) {
+				tmp = randomize_range(0x01000000, PAGE_ALIGN(max(mm->start_brk, (unsigned long)0x08000000)), len);
+				vma = find_vma(mm, tmp);
+				if (TASK_SIZE - len >= tmp &&
+				    (!vma || tmp + len <= vma->vm_start))
+					return tmp;
+			}
+			/*
+			 * Ok, randomization didnt work out - return
+			 * the result of the linear search:
+			 */
+			return addr;
+		}
+		addr = vma->vm_end;
+	}
+
+failed:
+	return current->mm->get_unmapped_area(filp, addr0, len0, pgoff, flags);
+}
+
 
 /* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
 struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr)
@@ -1471,6 +1547,14 @@ out:
 	return prev ? prev->vm_next : vma;
 }
 
+static int over_stack_limit(unsigned long sz)
+{
+	if (sz < EXEC_STACK_BIAS)
+		return 0;
+	return (sz - EXEC_STACK_BIAS) >
+			current->signal->rlim[RLIMIT_STACK].rlim_cur;
+}
+
 /*
  * Verify that the stack growth is acceptable and
  * update accounting. This is shared with both the
@@ -1486,7 +1570,7 @@ static int acct_stack_growth(struct vm_a
 		return -ENOMEM;
 
 	/* Stack limit test */
-	if (size > rlim[RLIMIT_STACK].rlim_cur)
+	if (over_stack_limit(size))
 		return -ENOMEM;
 
 	/* mlock limit tests */
@@ -1766,10 +1850,14 @@ int split_vma(struct mm_struct * mm, str
 	if (new->vm_ops && new->vm_ops->open)
 		new->vm_ops->open(new);
 
-	if (new_below)
+	if (new_below) {
+		unsigned long old_end = vma->vm_end;
+
 		vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
 			((addr - new->vm_start) >> PAGE_SHIFT), new);
-	else
+		if (vma->vm_flags & VM_EXEC)
+			arch_remove_exec_range(mm, old_end);
+	} else
 		vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
 
 	return 0;
@@ -1979,6 +2067,7 @@ void exit_mmap(struct mm_struct *mm)
 	vm_unacct_memory(nr_accounted);
 	free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0);
 	tlb_finish_mmu(tlb, 0, end);
+	arch_flush_exec_range(mm);
 
 	/*
 	 * Walk the list again, actually closing and freeing it,
@@ -2094,3 +2183,81 @@ int may_expand_vm(struct mm_struct *mm, 
 		return 0;
 	return 1;
 }
+
+
+static struct page *
+special_mapping_nopage(struct vm_area_struct *vma,
+		       unsigned long address, int *type)
+{
+	struct page **pages;
+
+	BUG_ON(address < vma->vm_start || address >= vma->vm_end);
+
+	address -= vma->vm_start;
+	for (pages = vma->vm_private_data; address > 0 && *pages; ++pages)
+		address -= PAGE_SIZE;
+
+	if (*pages) {
+		get_page(*pages);
+		return *pages;
+	}
+
+	return NOPAGE_SIGBUS;
+}
+
+static struct vm_operations_struct special_mapping_vmops = {
+	.nopage	= special_mapping_nopage,
+};
+
+unsigned int vdso_populate = 1;
+
+/*
+ * Insert a new vma covering the given region, with the given flags and
+ * protections.  Its pages are supplied by the given null-terminated array.
+ * The region past the last page supplied will always produce SIGBUS.
+ * The array pointer and the pages it points to are assumed to stay alive
+ * for as long as this mapping might exist.
+ */
+int install_special_mapping(struct mm_struct *mm,
+			    unsigned long addr, unsigned long len,
+			    unsigned long vm_flags, pgprot_t pgprot,
+			    struct page **pages)
+{
+	struct vm_area_struct *vma;
+	int err;
+
+	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+	if (unlikely(vma == NULL))
+		return -ENOMEM;
+	memset(vma, 0, sizeof(*vma));
+
+	vma->vm_mm = mm;
+	vma->vm_start = addr;
+	vma->vm_end = addr + len;
+
+	vma->vm_flags = vm_flags;
+	vma->vm_page_prot = pgprot;
+
+	vma->vm_ops = &special_mapping_vmops;
+	vma->vm_private_data = pages;
+
+	insert_vm_struct(mm, vma);
+	mm->total_vm += len >> PAGE_SHIFT;
+
+	if (!vdso_populate)
+		return 0;
+
+	err = 0;
+	while (*pages) {
+		struct page *page = *pages++;
+		get_page(page);
+		err = install_page(mm, vma, addr, page, vma->vm_page_prot);
+		if (err) {
+			put_page(page);
+			break;
+		}
+		addr += PAGE_SIZE;
+	}
+
+	return err;
+}
diff -urNp --exclude-from=/home/davej/.exclude linux-802/mm/mprotect.c linux-810/mm/mprotect.c
--- linux-802/mm/mprotect.c
+++ linux-810/mm/mprotect.c
@@ -23,6 +23,7 @@
 #include <linux/swapops.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
+#include <asm/pgalloc.h>
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 
@@ -122,7 +123,7 @@ mprotect_fixup(struct vm_area_struct *vm
 	struct mm_struct *mm = vma->vm_mm;
 	unsigned long oldflags = vma->vm_flags;
 	long nrpages = (end - start) >> PAGE_SHIFT;
-	unsigned long charged = 0;
+	unsigned long charged = 0, old_end = vma->vm_end;
 	unsigned int mask;
 	pgprot_t newprot;
 	pgoff_t pgoff;
@@ -190,6 +191,8 @@ success:
 	 */
 	vma->vm_flags = newflags;
 	vma->vm_page_prot = newprot;
+	if (oldflags & VM_EXEC)
+		arch_remove_exec_range(current->mm, old_end);
 	if (is_vm_hugetlb_page(vma))
 		hugetlb_change_protection(vma, start, end, newprot);
 	else
diff -urNp --exclude-from=/home/davej/.exclude linux-802/mm/mremap.c linux-810/mm/mremap.c
--- linux-802/mm/mremap.c
+++ linux-810/mm/mremap.c
@@ -387,8 +387,8 @@ unsigned long do_mremap(unsigned long ad
 			if (vma->vm_flags & VM_MAYSHARE)
 				map_flags |= MAP_SHARED;
 
-			new_addr = get_unmapped_area(vma->vm_file, 0, new_len,
-						vma->vm_pgoff, map_flags);
+			new_addr = get_unmapped_area_prot(vma->vm_file, 0, new_len,
+				vma->vm_pgoff, map_flags, vma->vm_flags & VM_EXEC);
 			ret = new_addr;
 			if (new_addr & ~PAGE_MASK)
 				goto out;


Date: Fri, 29 Sep 2006 02:14:32 +0100
From: "Bart Oldeman" <bartoldeman@users.sourceforge.net>
To: mingo@elte.hu
Subject: [patch] exec-shield: move vdso back to the ascii-armor.

Hi Ingo,

testing exec-shield I observed that unlike in earlier versions, the
vdso page was now placed at the normal maps around 0xbf000000. With
the cs-selector protection that makes the heap executable.

I wonder if this was done on purpose? In any case, the attached patch
rectifies this.

Also, two of the four bits in the sysctl are not used anymore, and I
observed that random_int() when called withing very short time ranges,
returns the same value, and the same random address, so the address is
reset to 0x110000.

As the vdso page is no longer placed just below any other region, SHLIB_BASE
could be decreased by 0x1000.

Bart

--- linux-2.6.18.noarch/kernel/sysctl.c~	2006-09-29 16:59:02.000000000 -0400
+++ linux-2.6.18.noarch/kernel/sysctl.c	2006-09-29 17:00:06.000000000 -0400
@@ -82,16 +82,15 @@ extern int proc_unknown_nmi_panic(ctl_ta
 
 extern unsigned int vdso_enabled, vdso_populate;
 
-int exec_shield = (1<<3) | (1<<0);
+int exec_shield = (1<<0);
 /* exec_shield is a bitmask:
           0: off; vdso at STACK_TOP, 1 page below TASK_SIZE
    (1<<0) 1: on [also on if !=0]
-   (1<<1) 2: noexecstack by default
+   (1<<1) 2: force noexecstack regardless of PT_GNU_STACK
+   The old settings
    (1<<2) 4: vdso just below .text of main (unless too low)
    (1<<3) 8: vdso just below .text of PT_INTERP (unless too low)
-Yes, vdso placement is overloaded here; but exec_shield off
-is a strong incentive to place vdso at STACK_TOP, so the bit
-for vdso just below .text comes along for the ride.
+   are ignored because the vdso is placed completely randomly
 */
 
 static int __init setup_exec_shield(char *str)
--- 1/mm/mmap.c.orig	2006-09-28 22:16:49.000000000 +0100
+++ 2/mm/mmap.c	2006-09-28 23:58:14.000000000 +0100
@@ -1407,7 +1407,7 @@
 
 EXPORT_SYMBOL(get_unmapped_area_prot);
 
-#define SHLIB_BASE             0x00111000
+#define SHLIB_BASE             0x00110000
 
 unsigned long arch_get_unmapped_exec_area(struct file *filp, unsigned long addr0,
 		unsigned long len0, unsigned long pgoff, unsigned long flags)
--- 1/arch/i386/kernel/sysenter.c.orig	2006-09-28 22:09:57.000000000 +0100
+++ 2/arch/i386/kernel/sysenter.c	2006-09-28 23:02:38.000000000 +0100
@@ -127,7 +127,7 @@
 	int ret;
 
 	down_write(&mm->mmap_sem);
-	addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
+	addr = get_unmapped_area_prot(NULL, 0, PAGE_SIZE, 0, 0, 1);
 	if (IS_ERR_VALUE(addr)) {
 		ret = addr;
 		goto up_fail;