diff options
author | Anthony G. Basile <basile@opensource.dyc.edu> | 2010-05-10 23:40:14 -0400 |
---|---|---|
committer | Anthony G. Basile <basile@opensource.dyc.edu> | 2010-05-10 23:40:14 -0400 |
commit | ef4abb88bdd9a76d9b87618532f5bdffc53ea0bb (patch) | |
tree | 8f5f23bafa4879265e59525b48db07c36d7ae4b1 | |
parent | update pax-without-grsecurity, grsecurity update 2.6.33.2 (diff) | |
download | hardened-patchset-ef4abb88bdd9a76d9b87618532f5bdffc53ea0bb.tar.gz hardened-patchset-ef4abb88bdd9a76d9b87618532f5bdffc53ea0bb.tar.bz2 hardened-patchset-ef4abb88bdd9a76d9b87618532f5bdffc53ea0bb.zip |
Added last stable patchset 2.6.28-10 from Gengor's devspace
-rw-r--r-- | kernel/2.6.28/0000_README | 44 | ||||
-rw-r--r-- | kernel/2.6.28/4420_grsec-2.1.13-2.6.28.10-200905241817.patch | 38913 | ||||
-rw-r--r-- | kernel/2.6.28/4421_grsec-remove-localversion-grsec.patch | 9 | ||||
-rw-r--r-- | kernel/2.6.28/4422_grsec-mute-warnings.patch | 28 | ||||
-rw-r--r-- | kernel/2.6.28/4425_grsec-pax-without-grsec.patch | 60 | ||||
-rw-r--r-- | kernel/2.6.28/4430_grsec-kconfig-default-gids.patch | 76 | ||||
-rw-r--r-- | kernel/2.6.28/4435_grsec-kconfig-gentoo.patch | 243 | ||||
-rw-r--r-- | kernel/2.6.28/4440_selinux-avc_audit-log-curr_ip.patch | 65 | ||||
-rw-r--r-- | kernel/2.6.28/4445_disable-compat_vdso.patch | 74 |
9 files changed, 39512 insertions, 0 deletions
diff --git a/kernel/2.6.28/0000_README b/kernel/2.6.28/0000_README new file mode 100644 index 0000000..e4977a7 --- /dev/null +++ b/kernel/2.6.28/0000_README @@ -0,0 +1,44 @@ +README +----------------------------------------------------------------------------- + +Individual Patch Descriptions: +----------------------------------------------------------------------------- +Patch: 4420_grsec-2.1.13-2.6.28.10-200905241817.patch +From: http://www.grsecurity.net +Desc: hardened-sources base patch from upstream grsecurity +Note: Ported to 2.6.28.10, added PaX -test24 through -test25 and + additional small fixes. + +Patch: 4421_grsec-remove-localversion-grsec.patch +From: Kerin Millar <kerframil@gmail.com> +Desc: Removes grsecurity's localversion-grsec file + +Patch: 4422_grsec-mute-warnings.patch +From: Alexander Gabert <gaberta@fh-trier.de> + Gordon Malm <gengor@gentoo.org> +Desc: Removes verbose compile warning settings from grsecurity, restores + mainline Linux kernel behavior + +Patch: 4425_grsec-pax-without-grsec.patch +From: Gordon Malm <gengor@gentoo.org> +Desc: Allows PaX features to be selected without enabling GRKERNSEC + +Patch: 4430_grsec-kconfig-default-gids.patch +From: Kerin Millar <kerframil@gmail.com> +Desc: Sets sane(r) default GIDs on various grsecurity group-dependent + features + +Patch: 4435_grsec-kconfig-gentoo.patch +From: Gordon Malm <gengor@gentoo.org> + Kerin Millar <kerframil@gmail.com> +Desc: Adds Hardened Gentoo [server/workstation] security levels, sets + Hardened Gentoo [workstation] as default + +Patch: 4440_selinux-avc_audit-log-curr_ip.patch +From: Gordon Malm <gengor@gentoo.org> +Desc: Configurable option to add src IP address to SELinux log messages + +Patch: 4445_disable-compat_vdso.patch +From: Gordon Malm <gengor@gentoo.org> + Kerin Millar <kerframil@gmail.com> +Desc: Disables VDSO_COMPAT operation completely diff --git a/kernel/2.6.28/4420_grsec-2.1.13-2.6.28.10-200905241817.patch b/kernel/2.6.28/4420_grsec-2.1.13-2.6.28.10-200905241817.patch new file mode 100644 index 0000000..d356758 --- /dev/null +++ b/kernel/2.6.28/4420_grsec-2.1.13-2.6.28.10-200905241817.patch @@ -0,0 +1,38913 @@ +diff -urNp a/Documentation/dontdiff b/Documentation/dontdiff +--- a/Documentation/dontdiff 2009-05-02 11:54:43.000000000 -0700 ++++ b/Documentation/dontdiff 2009-05-24 18:10:24.873959463 -0700 +@@ -3,6 +3,7 @@ + *.bin + *.cpio + *.csp ++*.dbg + *.dsp + *.dvi + *.elf +@@ -49,6 +50,10 @@ + 53c700_d.h + CVS + ChangeSet ++GPATH ++GRTAGS ++GSYMS ++GTAGS + Image + Kerntypes + Module.markers +@@ -62,7 +67,6 @@ aic7*reg_print.c* + aic7*seq.h* + aicasm + aicdb.h* +-asm + asm-offsets.h + asm_offsets.h + autoconf.h* +@@ -77,6 +81,7 @@ btfixupprep + build + bvmlinux + bzImage* ++capflags.c + classlist.h* + comp*.log + compile.h* +@@ -188,12 +193,15 @@ version.h* + vmlinux + vmlinux-* + vmlinux.aout ++vmlinux.bin.all + vmlinux.lds ++vmlinux.relocs + vsyscall.lds + vsyscall_32.lds + wanxlfw.inc + uImage + unifdef ++utsrelease.h + wakeup.bin + wakeup.elf + wakeup.lds +diff -urNp a/Makefile b/Makefile +--- a/Makefile 2009-05-02 11:54:43.000000000 -0700 ++++ b/Makefile 2009-05-24 18:10:24.895119423 -0700 +@@ -221,7 +221,7 @@ CONFIG_SHELL := $(shell if [ -x "$$BASH" + + HOSTCC = gcc + HOSTCXX = g++ +-HOSTCFLAGS = -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer ++HOSTCFLAGS = -Wall -W -Wno-unused -Wno-sign-compare -Wstrict-prototypes -O2 -fomit-frame-pointer + HOSTCXXFLAGS = -O2 + + # Decide whether to build built-in, modular, or both. +@@ -622,7 +622,7 @@ export mod_strip_cmd + + + ifeq ($(KBUILD_EXTMOD),) +-core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ ++core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ grsecurity/ + + vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \ + $(core-y) $(core-m) $(drivers-y) $(drivers-m) \ +diff -urNp a/arch/alpha/include/asm/elf.h b/arch/alpha/include/asm/elf.h +--- a/arch/alpha/include/asm/elf.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/alpha/include/asm/elf.h 2009-05-24 18:10:24.895965073 -0700 +@@ -91,6 +91,13 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_N + + #define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x1000000) + ++#ifdef CONFIG_PAX_ASLR ++#define PAX_ELF_ET_DYN_BASE (current->personality & ADDR_LIMIT_32BIT ? 0x10000 : 0x120000000UL) ++ ++#define PAX_DELTA_MMAP_LEN (current->personality & ADDR_LIMIT_32BIT ? 14 : 28) ++#define PAX_DELTA_STACK_LEN (current->personality & ADDR_LIMIT_32BIT ? 14 : 19) ++#endif ++ + /* $0 is set by ld.so to a pointer to a function which might be + registered using atexit. This provides a mean for the dynamic + linker to call DT_FINI functions for shared libraries that have +diff -urNp a/arch/alpha/include/asm/kmap_types.h b/arch/alpha/include/asm/kmap_types.h +--- a/arch/alpha/include/asm/kmap_types.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/alpha/include/asm/kmap_types.h 2009-05-24 18:10:24.925978136 -0700 +@@ -24,7 +24,8 @@ D(9) KM_IRQ0, + D(10) KM_IRQ1, + D(11) KM_SOFTIRQ0, + D(12) KM_SOFTIRQ1, +-D(13) KM_TYPE_NR ++D(13) KM_CLEARPAGE, ++D(14) KM_TYPE_NR + }; + + #undef D +diff -urNp a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h +--- a/arch/alpha/include/asm/pgtable.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/alpha/include/asm/pgtable.h 2009-05-24 18:10:24.925978136 -0700 +@@ -101,6 +101,17 @@ struct vm_area_struct; + #define PAGE_SHARED __pgprot(_PAGE_VALID | __ACCESS_BITS) + #define PAGE_COPY __pgprot(_PAGE_VALID | __ACCESS_BITS | _PAGE_FOW) + #define PAGE_READONLY __pgprot(_PAGE_VALID | __ACCESS_BITS | _PAGE_FOW) ++ ++#ifdef CONFIG_PAX_PAGEEXEC ++# define PAGE_SHARED_NOEXEC __pgprot(_PAGE_VALID | __ACCESS_BITS | _PAGE_FOE) ++# define PAGE_COPY_NOEXEC __pgprot(_PAGE_VALID | __ACCESS_BITS | _PAGE_FOW | _PAGE_FOE) ++# define PAGE_READONLY_NOEXEC __pgprot(_PAGE_VALID | __ACCESS_BITS | _PAGE_FOW | _PAGE_FOE) ++#else ++# define PAGE_SHARED_NOEXEC PAGE_SHARED ++# define PAGE_COPY_NOEXEC PAGE_COPY ++# define PAGE_READONLY_NOEXEC PAGE_READONLY ++#endif ++ + #define PAGE_KERNEL __pgprot(_PAGE_VALID | _PAGE_ASM | _PAGE_KRE | _PAGE_KWE) + + #define _PAGE_NORMAL(x) __pgprot(_PAGE_VALID | __ACCESS_BITS | (x)) +diff -urNp a/arch/alpha/kernel/module.c b/arch/alpha/kernel/module.c +--- a/arch/alpha/kernel/module.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/alpha/kernel/module.c 2009-05-24 18:10:24.927040628 -0700 +@@ -182,7 +182,7 @@ apply_relocate_add(Elf64_Shdr *sechdrs, + + /* The small sections were sorted to the end of the segment. + The following should definitely cover them. */ +- gp = (u64)me->module_core + me->core_size - 0x8000; ++ gp = (u64)me->module_core_rw + me->core_size_rw - 0x8000; + got = sechdrs[me->arch.gotsecindex].sh_addr; + + for (i = 0; i < n; i++) { +diff -urNp a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c +--- a/arch/alpha/kernel/osf_sys.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/alpha/kernel/osf_sys.c 2009-05-24 18:10:24.927040628 -0700 +@@ -1230,6 +1230,10 @@ arch_get_unmapped_area(struct file *filp + merely specific addresses, but regions of memory -- perhaps + this feature should be incorporated into all ports? */ + ++#ifdef CONFIG_PAX_RANDMMAP ++ if (!(current->mm->pax_flags & MF_PAX_RANDMMAP)) ++#endif ++ + if (addr) { + addr = arch_get_unmapped_area_1 (PAGE_ALIGN(addr), len, limit); + if (addr != (unsigned long) -ENOMEM) +@@ -1237,8 +1241,8 @@ arch_get_unmapped_area(struct file *filp + } + + /* Next, try allocating at TASK_UNMAPPED_BASE. */ +- addr = arch_get_unmapped_area_1 (PAGE_ALIGN(TASK_UNMAPPED_BASE), +- len, limit); ++ addr = arch_get_unmapped_area_1 (PAGE_ALIGN(current->mm->mmap_base), len, limit); ++ + if (addr != (unsigned long) -ENOMEM) + return addr; + +diff -urNp a/arch/alpha/kernel/ptrace.c b/arch/alpha/kernel/ptrace.c +--- a/arch/alpha/kernel/ptrace.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/alpha/kernel/ptrace.c 2009-05-24 18:10:24.928041035 -0700 +@@ -266,6 +266,9 @@ long arch_ptrace(struct task_struct *chi + size_t copied; + long ret; + ++ if (gr_handle_ptrace(child, request)) ++ return -EPERM; ++ + switch (request) { + /* When I and D space are separate, these will need to be fixed. */ + case PTRACE_PEEKTEXT: /* read word at location addr. */ +diff -urNp a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c +--- a/arch/alpha/mm/fault.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/alpha/mm/fault.c 2009-05-24 18:10:24.928041035 -0700 +@@ -54,6 +54,124 @@ __load_new_mm_context(struct mm_struct * + __reload_thread(pcb); + } + ++#ifdef CONFIG_PAX_PAGEEXEC ++/* ++ * PaX: decide what to do with offenders (regs->pc = fault address) ++ * ++ * returns 1 when task should be killed ++ * 2 when patched PLT trampoline was detected ++ * 3 when unpatched PLT trampoline was detected ++ */ ++static int pax_handle_fetch_fault(struct pt_regs *regs) ++{ ++ ++#ifdef CONFIG_PAX_EMUPLT ++ int err; ++ ++ do { /* PaX: patched PLT emulation #1 */ ++ unsigned int ldah, ldq, jmp; ++ ++ err = get_user(ldah, (unsigned int *)regs->pc); ++ err |= get_user(ldq, (unsigned int *)(regs->pc+4)); ++ err |= get_user(jmp, (unsigned int *)(regs->pc+8)); ++ ++ if (err) ++ break; ++ ++ if ((ldah & 0xFFFF0000U) == 0x277B0000U && ++ (ldq & 0xFFFF0000U) == 0xA77B0000U && ++ jmp == 0x6BFB0000U) ++ { ++ unsigned long r27, addr; ++ unsigned long addrh = (ldah | 0xFFFFFFFFFFFF0000UL) << 16; ++ unsigned long addrl = ldq | 0xFFFFFFFFFFFF0000UL; ++ ++ addr = regs->r27 + ((addrh ^ 0x80000000UL) + 0x80000000UL) + ((addrl ^ 0x8000UL) + 0x8000UL); ++ err = get_user(r27, (unsigned long *)addr); ++ if (err) ++ break; ++ ++ regs->r27 = r27; ++ regs->pc = r27; ++ return 2; ++ } ++ } while (0); ++ ++ do { /* PaX: patched PLT emulation #2 */ ++ unsigned int ldah, lda, br; ++ ++ err = get_user(ldah, (unsigned int *)regs->pc); ++ err |= get_user(lda, (unsigned int *)(regs->pc+4)); ++ err |= get_user(br, (unsigned int *)(regs->pc+8)); ++ ++ if (err) ++ break; ++ ++ if ((ldah & 0xFFFF0000U) == 0x277B0000U && ++ (lda & 0xFFFF0000U) == 0xA77B0000U && ++ (br & 0xFFE00000U) == 0xC3E00000U) ++ { ++ unsigned long addr = br | 0xFFFFFFFFFFE00000UL; ++ unsigned long addrh = (ldah | 0xFFFFFFFFFFFF0000UL) << 16; ++ unsigned long addrl = lda | 0xFFFFFFFFFFFF0000UL; ++ ++ regs->r27 += ((addrh ^ 0x80000000UL) + 0x80000000UL) + ((addrl ^ 0x8000UL) + 0x8000UL); ++ regs->pc += 12 + (((addr ^ 0x00100000UL) + 0x00100000UL) << 2); ++ return 2; ++ } ++ } while (0); ++ ++ do { /* PaX: unpatched PLT emulation */ ++ unsigned int br; ++ ++ err = get_user(br, (unsigned int *)regs->pc); ++ ++ if (!err && (br & 0xFFE00000U) == 0xC3800000U) { ++ unsigned int br2, ldq, nop, jmp; ++ unsigned long addr = br | 0xFFFFFFFFFFE00000UL, resolver; ++ ++ addr = regs->pc + 4 + (((addr ^ 0x00100000UL) + 0x00100000UL) << 2); ++ err = get_user(br2, (unsigned int *)addr); ++ err |= get_user(ldq, (unsigned int *)(addr+4)); ++ err |= get_user(nop, (unsigned int *)(addr+8)); ++ err |= get_user(jmp, (unsigned int *)(addr+12)); ++ err |= get_user(resolver, (unsigned long *)(addr+16)); ++ ++ if (err) ++ break; ++ ++ if (br2 == 0xC3600000U && ++ ldq == 0xA77B000CU && ++ nop == 0x47FF041FU && ++ jmp == 0x6B7B0000U) ++ { ++ regs->r28 = regs->pc+4; ++ regs->r27 = addr+16; ++ regs->pc = resolver; ++ return 3; ++ } ++ } ++ } while (0); ++#endif ++ ++ return 1; ++} ++ ++void pax_report_insns(void *pc, void *sp) ++{ ++ unsigned long i; ++ ++ printk(KERN_ERR "PAX: bytes at PC: "); ++ for (i = 0; i < 5; i++) { ++ unsigned int c; ++ if (get_user(c, (unsigned int *)pc+i)) ++ printk(KERN_CONT "???????? "); ++ else ++ printk(KERN_CONT "%08x ", c); ++ } ++ printk("\n"); ++} ++#endif + + /* + * This routine handles page faults. It determines the address, +@@ -131,8 +249,29 @@ do_page_fault(unsigned long address, uns + good_area: + si_code = SEGV_ACCERR; + if (cause < 0) { +- if (!(vma->vm_flags & VM_EXEC)) ++ if (!(vma->vm_flags & VM_EXEC)) { ++ ++#ifdef CONFIG_PAX_PAGEEXEC ++ if (!(mm->pax_flags & MF_PAX_PAGEEXEC) || address != regs->pc) ++ goto bad_area; ++ ++ up_read(&mm->mmap_sem); ++ switch (pax_handle_fetch_fault(regs)) { ++ ++#ifdef CONFIG_PAX_EMUPLT ++ case 2: ++ case 3: ++ return; ++#endif ++ ++ } ++ pax_report_fault(regs, (void *)regs->pc, (void *)rdusp()); ++ do_group_exit(SIGKILL); ++#else + goto bad_area; ++#endif ++ ++ } + } else if (!cause) { + /* Allow reads even for write-only mappings */ + if (!(vma->vm_flags & (VM_READ | VM_WRITE))) +diff -urNp a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h +--- a/arch/arm/include/asm/elf.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/arm/include/asm/elf.h 2009-05-24 18:10:24.929209539 -0700 +@@ -99,7 +99,14 @@ extern int arm_elf_read_implies_exec(con + the loader. We need to make sure that it is out of the way of the program + that it will "exec", and that there is sufficient room for the brk. */ + +-#define ELF_ET_DYN_BASE (2 * TASK_SIZE / 3) ++#define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) ++ ++#ifdef CONFIG_PAX_ASLR ++#define PAX_ELF_ET_DYN_BASE 0x00008000UL ++ ++#define PAX_DELTA_MMAP_LEN ((current->personality == PER_LINUX_32BIT) ? 16 : 10) ++#define PAX_DELTA_STACK_LEN ((current->personality == PER_LINUX_32BIT) ? 16 : 10) ++#endif + + /* When the program starts, a1 contains a pointer to a function to be + registered with atexit, as per the SVR4 ABI. A value of 0 means we +diff -urNp a/arch/arm/include/asm/kmap_types.h b/arch/arm/include/asm/kmap_types.h +--- a/arch/arm/include/asm/kmap_types.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/arm/include/asm/kmap_types.h 2009-05-24 18:10:24.929209539 -0700 +@@ -18,6 +18,7 @@ enum km_type { + KM_IRQ1, + KM_SOFTIRQ0, + KM_SOFTIRQ1, ++ KM_CLEARPAGE, + KM_TYPE_NR + }; + +diff -urNp a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c +--- a/arch/arm/mm/mmap.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/arm/mm/mmap.c 2009-05-24 18:10:24.929209539 -0700 +@@ -62,6 +62,10 @@ arch_get_unmapped_area(struct file *filp + if (len > TASK_SIZE) + return -ENOMEM; + ++#ifdef CONFIG_PAX_RANDMMAP ++ if (!(mm->pax_flags & MF_PAX_RANDMMAP)) ++#endif ++ + if (addr) { + if (do_align) + addr = COLOUR_ALIGN(addr, pgoff); +@@ -74,10 +78,10 @@ arch_get_unmapped_area(struct file *filp + return addr; + } + if (len > mm->cached_hole_size) { +- start_addr = addr = mm->free_area_cache; ++ start_addr = addr = mm->free_area_cache; + } else { +- start_addr = addr = TASK_UNMAPPED_BASE; +- mm->cached_hole_size = 0; ++ start_addr = addr = mm->mmap_base; ++ mm->cached_hole_size = 0; + } + + full_search: +@@ -93,8 +97,8 @@ full_search: + * Start a new search - just in case we missed + * some holes. + */ +- if (start_addr != TASK_UNMAPPED_BASE) { +- start_addr = addr = TASK_UNMAPPED_BASE; ++ if (start_addr != mm->mmap_base) { ++ start_addr = addr = mm->mmap_base; + mm->cached_hole_size = 0; + goto full_search; + } +diff -urNp a/arch/avr32/include/asm/elf.h b/arch/avr32/include/asm/elf.h +--- a/arch/avr32/include/asm/elf.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/avr32/include/asm/elf.h 2009-05-24 18:10:24.930209458 -0700 +@@ -85,8 +85,14 @@ typedef struct user_fpu_struct elf_fpreg + the loader. We need to make sure that it is out of the way of the program + that it will "exec", and that there is sufficient room for the brk. */ + +-#define ELF_ET_DYN_BASE (2 * TASK_SIZE / 3) ++#define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) + ++#ifdef CONFIG_PAX_ASLR ++#define PAX_ELF_ET_DYN_BASE 0x00001000UL ++ ++#define PAX_DELTA_MMAP_LEN 15 ++#define PAX_DELTA_STACK_LEN 15 ++#endif + + /* This yields a mask that user programs can use to figure out what + instruction set this CPU supports. This could be done in user space, +diff -urNp a/arch/avr32/include/asm/kmap_types.h b/arch/avr32/include/asm/kmap_types.h +--- a/arch/avr32/include/asm/kmap_types.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/avr32/include/asm/kmap_types.h 2009-05-24 18:10:24.930209458 -0700 +@@ -22,7 +22,8 @@ D(10) KM_IRQ0, + D(11) KM_IRQ1, + D(12) KM_SOFTIRQ0, + D(13) KM_SOFTIRQ1, +-D(14) KM_TYPE_NR ++D(14) KM_CLEARPAGE, ++D(15) KM_TYPE_NR + }; + + #undef D +diff -urNp a/arch/avr32/mm/fault.c b/arch/avr32/mm/fault.c +--- a/arch/avr32/mm/fault.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/avr32/mm/fault.c 2009-05-24 18:10:24.931102387 -0700 +@@ -41,6 +41,23 @@ static inline int notify_page_fault(stru + + int exception_trace = 1; + ++#ifdef CONFIG_PAX_PAGEEXEC ++void pax_report_insns(void *pc, void *sp) ++{ ++ unsigned long i; ++ ++ printk(KERN_ERR "PAX: bytes at PC: "); ++ for (i = 0; i < 20; i++) { ++ unsigned char c; ++ if (get_user(c, (unsigned char *)pc+i)) ++ printk(KERN_CONT "???????? "); ++ else ++ printk(KERN_CONT "%02x ", c); ++ } ++ printk("\n"); ++} ++#endif ++ + /* + * This routine handles page faults. It determines the address and the + * problem, and then passes it off to one of the appropriate routines. +@@ -157,6 +174,16 @@ bad_area: + up_read(&mm->mmap_sem); + + if (user_mode(regs)) { ++ ++#ifdef CONFIG_PAX_PAGEEXEC ++ if (mm->pax_flags & MF_PAX_PAGEEXEC) { ++ if (ecr == ECR_PROTECTION_X || ecr == ECR_TLB_MISS_X) { ++ pax_report_fault(regs, (void *)regs->pc, (void *)regs->sp); ++ do_group_exit(SIGKILL); ++ } ++ } ++#endif ++ + if (exception_trace && printk_ratelimit()) + printk("%s%s[%d]: segfault at %08lx pc %08lx " + "sp %08lx ecr %lu\n", +diff -urNp a/arch/blackfin/include/asm/kmap_types.h b/arch/blackfin/include/asm/kmap_types.h +--- a/arch/blackfin/include/asm/kmap_types.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/blackfin/include/asm/kmap_types.h 2009-05-24 18:10:24.931102387 -0700 +@@ -15,6 +15,7 @@ enum km_type { + KM_IRQ1, + KM_SOFTIRQ0, + KM_SOFTIRQ1, ++ KM_CLEARPAGE, + KM_TYPE_NR + }; + +diff -urNp a/arch/cris/include/asm/kmap_types.h b/arch/cris/include/asm/kmap_types.h +--- a/arch/cris/include/asm/kmap_types.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/cris/include/asm/kmap_types.h 2009-05-24 18:10:24.931102387 -0700 +@@ -19,6 +19,7 @@ enum km_type { + KM_IRQ1, + KM_SOFTIRQ0, + KM_SOFTIRQ1, ++ KM_CLEARPAGE, + KM_TYPE_NR + }; + +diff -urNp a/arch/h8300/include/asm/kmap_types.h b/arch/h8300/include/asm/kmap_types.h +--- a/arch/h8300/include/asm/kmap_types.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/h8300/include/asm/kmap_types.h 2009-05-24 18:10:24.932187227 -0700 +@@ -15,6 +15,7 @@ enum km_type { + KM_IRQ1, + KM_SOFTIRQ0, + KM_SOFTIRQ1, ++ KM_CLEARPAGE, + KM_TYPE_NR + }; + +diff -urNp a/arch/ia64/ia32/binfmt_elf32.c b/arch/ia64/ia32/binfmt_elf32.c +--- a/arch/ia64/ia32/binfmt_elf32.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/ia64/ia32/binfmt_elf32.c 2009-05-24 18:10:24.932187227 -0700 +@@ -45,6 +45,13 @@ randomize_stack_top(unsigned long stack_ + + #define elf_read_implies_exec(ex, have_pt_gnu_stack) (!(have_pt_gnu_stack)) + ++#ifdef CONFIG_PAX_ASLR ++#define PAX_ELF_ET_DYN_BASE (current->personality == PER_LINUX32 ? 0x08048000UL : 0x4000000000000000UL) ++ ++#define PAX_DELTA_MMAP_LEN (current->personality == PER_LINUX32 ? 16 : 3*PAGE_SHIFT - 13) ++#define PAX_DELTA_STACK_LEN (current->personality == PER_LINUX32 ? 16 : 3*PAGE_SHIFT - 13) ++#endif ++ + /* Ugly but avoids duplication */ + #include "../../../fs/binfmt_elf.c" + +diff -urNp a/arch/ia64/ia32/ia32priv.h b/arch/ia64/ia32/ia32priv.h +--- a/arch/ia64/ia32/ia32priv.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/ia64/ia32/ia32priv.h 2009-05-24 18:10:24.932187227 -0700 +@@ -296,7 +296,14 @@ typedef struct compat_siginfo { + #define ELF_DATA ELFDATA2LSB + #define ELF_ARCH EM_386 + +-#define IA32_STACK_TOP IA32_PAGE_OFFSET ++#ifdef CONFIG_PAX_RANDUSTACK ++#define __IA32_DELTA_STACK (current->mm->delta_stack) ++#else ++#define __IA32_DELTA_STACK 0UL ++#endif ++ ++#define IA32_STACK_TOP (IA32_PAGE_OFFSET - __IA32_DELTA_STACK) ++ + #define IA32_GATE_OFFSET IA32_PAGE_OFFSET + #define IA32_GATE_END IA32_PAGE_OFFSET + PAGE_SIZE + +diff -urNp a/arch/ia64/include/asm/elf.h b/arch/ia64/include/asm/elf.h +--- a/arch/ia64/include/asm/elf.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/ia64/include/asm/elf.h 2009-05-24 18:10:24.933209493 -0700 +@@ -43,6 +43,13 @@ + */ + #define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x800000000UL) + ++#ifdef CONFIG_PAX_ASLR ++#define PAX_ELF_ET_DYN_BASE (current->personality == PER_LINUX32 ? 0x08048000UL : 0x4000000000000000UL) ++ ++#define PAX_DELTA_MMAP_LEN (current->personality == PER_LINUX32 ? 16 : 3*PAGE_SHIFT - 13) ++#define PAX_DELTA_STACK_LEN (current->personality == PER_LINUX32 ? 16 : 3*PAGE_SHIFT - 13) ++#endif ++ + #define PT_IA_64_UNWIND 0x70000001 + + /* IA-64 relocations: */ +diff -urNp a/arch/ia64/include/asm/kmap_types.h b/arch/ia64/include/asm/kmap_types.h +--- a/arch/ia64/include/asm/kmap_types.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/ia64/include/asm/kmap_types.h 2009-05-24 18:10:24.933209493 -0700 +@@ -22,7 +22,8 @@ D(9) KM_IRQ0, + D(10) KM_IRQ1, + D(11) KM_SOFTIRQ0, + D(12) KM_SOFTIRQ1, +-D(13) KM_TYPE_NR ++D(13) KM_CLEARPAGE, ++D(14) KM_TYPE_NR + }; + + #undef D +diff -urNp a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h +--- a/arch/ia64/include/asm/pgtable.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/ia64/include/asm/pgtable.h 2009-05-24 18:10:24.934211507 -0700 +@@ -143,6 +143,17 @@ + #define PAGE_READONLY __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_R) + #define PAGE_COPY __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_R) + #define PAGE_COPY_EXEC __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RX) ++ ++#ifdef CONFIG_PAX_PAGEEXEC ++# define PAGE_SHARED_NOEXEC __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RW) ++# define PAGE_READONLY_NOEXEC __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_R) ++# define PAGE_COPY_NOEXEC __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_R) ++#else ++# define PAGE_SHARED_NOEXEC PAGE_SHARED ++# define PAGE_READONLY_NOEXEC PAGE_READONLY ++# define PAGE_COPY_NOEXEC PAGE_COPY ++#endif ++ + #define PAGE_GATE __pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_X_RX) + #define PAGE_KERNEL __pgprot(__DIRTY_BITS | _PAGE_PL_0 | _PAGE_AR_RWX) + #define PAGE_KERNELRX __pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_RX) +diff -urNp a/arch/ia64/kernel/module.c b/arch/ia64/kernel/module.c +--- a/arch/ia64/kernel/module.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/ia64/kernel/module.c 2009-05-24 18:10:24.935209819 -0700 +@@ -312,8 +312,7 @@ module_alloc (unsigned long size) + void + module_free (struct module *mod, void *module_region) + { +- if (mod && mod->arch.init_unw_table && +- module_region == mod->module_init) { ++ if (mod && mod->arch.init_unw_table && module_region == mod->module_init_rx) { + unw_remove_unwind_table(mod->arch.init_unw_table); + mod->arch.init_unw_table = NULL; + } +@@ -491,15 +490,39 @@ module_frob_arch_sections (Elf_Ehdr *ehd + } + + static inline int ++in_init_rx (const struct module *mod, uint64_t addr) ++{ ++ return addr - (uint64_t) mod->module_init_rx < mod->init_size_rx; ++} ++ ++static inline int ++in_init_rw (const struct module *mod, uint64_t addr) ++{ ++ return addr - (uint64_t) mod->module_init_rw < mod->init_size_rw; ++} ++ ++static inline int + in_init (const struct module *mod, uint64_t addr) + { +- return addr - (uint64_t) mod->module_init < mod->init_size; ++ return in_init_rx(mod, addr) || in_init_rw(mod, addr); ++} ++ ++static inline int ++in_core_rx (const struct module *mod, uint64_t addr) ++{ ++ return addr - (uint64_t) mod->module_core_rx < mod->core_size_rx; ++} ++ ++static inline int ++in_core_rw (const struct module *mod, uint64_t addr) ++{ ++ return addr - (uint64_t) mod->module_core_rw < mod->core_size_rw; + } + + static inline int + in_core (const struct module *mod, uint64_t addr) + { +- return addr - (uint64_t) mod->module_core < mod->core_size; ++ return in_core_rx(mod, addr) || in_core_rw(mod, addr); + } + + static inline int +@@ -683,7 +706,14 @@ do_reloc (struct module *mod, uint8_t r_ + break; + + case RV_BDREL: +- val -= (uint64_t) (in_init(mod, val) ? mod->module_init : mod->module_core); ++ if (in_init_rx(mod, val)) ++ val -= (uint64_t) mod->module_init_rx; ++ else if (in_init_rw(mod, val)) ++ val -= (uint64_t) mod->module_init_rw; ++ else if (in_core_rx(mod, val)) ++ val -= (uint64_t) mod->module_core_rx; ++ else if (in_core_rw(mod, val)) ++ val -= (uint64_t) mod->module_core_rw; + break; + + case RV_LTV: +@@ -817,15 +847,15 @@ apply_relocate_add (Elf64_Shdr *sechdrs, + * addresses have been selected... + */ + uint64_t gp; +- if (mod->core_size > MAX_LTOFF) ++ if (mod->core_size_rx + mod->core_size_rw > MAX_LTOFF) + /* + * This takes advantage of fact that SHF_ARCH_SMALL gets allocated + * at the end of the module. + */ +- gp = mod->core_size - MAX_LTOFF / 2; ++ gp = mod->core_size_rx + mod->core_size_rw - MAX_LTOFF / 2; + else +- gp = mod->core_size / 2; +- gp = (uint64_t) mod->module_core + ((gp + 7) & -8); ++ gp = (mod->core_size_rx + mod->core_size_rw) / 2; ++ gp = (uint64_t) mod->module_core_rx + ((gp + 7) & -8); + mod->arch.gp = gp; + DEBUGP("%s: placing gp at 0x%lx\n", __func__, gp); + } +diff -urNp a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c +--- a/arch/ia64/kernel/sys_ia64.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/ia64/kernel/sys_ia64.c 2009-05-24 18:10:24.936209598 -0700 +@@ -43,6 +43,13 @@ arch_get_unmapped_area (struct file *fil + if (REGION_NUMBER(addr) == RGN_HPAGE) + addr = 0; + #endif ++ ++#ifdef CONFIG_PAX_RANDMMAP ++ if (mm->pax_flags & MF_PAX_RANDMMAP) ++ addr = mm->free_area_cache; ++ else ++#endif ++ + if (!addr) + addr = mm->free_area_cache; + +@@ -61,9 +68,9 @@ arch_get_unmapped_area (struct file *fil + for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { + /* At this point: (!vma || addr < vma->vm_end). */ + if (TASK_SIZE - len < addr || RGN_MAP_LIMIT - len < REGION_OFFSET(addr)) { +- if (start_addr != TASK_UNMAPPED_BASE) { ++ if (start_addr != mm->mmap_base) { + /* Start a new search --- just in case we missed some holes. */ +- addr = TASK_UNMAPPED_BASE; ++ addr = mm->mmap_base; + goto full_search; + } + return -ENOMEM; +diff -urNp a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c +--- a/arch/ia64/mm/fault.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/ia64/mm/fault.c 2009-05-24 18:10:24.936986250 -0700 +@@ -72,6 +72,23 @@ mapped_kernel_page_is_present (unsigned + return pte_present(pte); + } + ++#ifdef CONFIG_PAX_PAGEEXEC ++void pax_report_insns(void *pc, void *sp) ++{ ++ unsigned long i; ++ ++ printk(KERN_ERR "PAX: bytes at PC: "); ++ for (i = 0; i < 8; i++) { ++ unsigned int c; ++ if (get_user(c, (unsigned int *)pc+i)) ++ printk(KERN_CONT "???????? "); ++ else ++ printk(KERN_CONT "%08x ", c); ++ } ++ printk("\n"); ++} ++#endif ++ + void __kprobes + ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *regs) + { +@@ -145,9 +162,23 @@ ia64_do_page_fault (unsigned long addres + mask = ( (((isr >> IA64_ISR_X_BIT) & 1UL) << VM_EXEC_BIT) + | (((isr >> IA64_ISR_W_BIT) & 1UL) << VM_WRITE_BIT)); + +- if ((vma->vm_flags & mask) != mask) ++ if ((vma->vm_flags & mask) != mask) { ++ ++#ifdef CONFIG_PAX_PAGEEXEC ++ if (!(vma->vm_flags & VM_EXEC) && (mask & VM_EXEC)) { ++ if (!(mm->pax_flags & MF_PAX_PAGEEXEC) || address != regs->cr_iip) ++ goto bad_area; ++ ++ up_read(&mm->mmap_sem); ++ pax_report_fault(regs, (void *)regs->cr_iip, (void *)regs->r12); ++ do_group_exit(SIGKILL); ++ } ++#endif ++ + goto bad_area; + ++ } ++ + survive: + /* + * If for any reason at all we couldn't handle the fault, make +diff -urNp a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c +--- a/arch/ia64/mm/init.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/ia64/mm/init.c 2009-05-24 18:10:24.936986250 -0700 +@@ -121,6 +121,19 @@ ia64_init_addr_space (void) + vma->vm_start = current->thread.rbs_bot & PAGE_MASK; + vma->vm_end = vma->vm_start + PAGE_SIZE; + vma->vm_flags = VM_DATA_DEFAULT_FLAGS|VM_GROWSUP|VM_ACCOUNT; ++ ++#ifdef CONFIG_PAX_PAGEEXEC ++ if (current->mm->pax_flags & MF_PAX_PAGEEXEC) { ++ vma->vm_flags &= ~VM_EXEC; ++ ++#ifdef CONFIG_PAX_MPROTECT ++ if (current->mm->pax_flags & MF_PAX_MPROTECT) ++ vma->vm_flags &= ~VM_MAYEXEC; ++#endif ++ ++ } ++#endif ++ + vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); + down_write(¤t->mm->mmap_sem); + if (insert_vm_struct(current->mm, vma)) { +diff -urNp a/arch/m68knommu/include/asm/kmap_types.h b/arch/m68knommu/include/asm/kmap_types.h +--- a/arch/m68knommu/include/asm/kmap_types.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/m68knommu/include/asm/kmap_types.h 2009-05-24 18:10:24.938046367 -0700 +@@ -15,6 +15,7 @@ enum km_type { + KM_IRQ1, + KM_SOFTIRQ0, + KM_SOFTIRQ1, ++ KM_CLEARPAGE, + KM_TYPE_NR + }; + +diff -urNp a/arch/mips/include/asm/elf.h b/arch/mips/include/asm/elf.h +--- a/arch/mips/include/asm/elf.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/mips/include/asm/elf.h 2009-05-24 18:10:24.938046367 -0700 +@@ -364,4 +364,11 @@ extern int dump_task_fpu(struct task_str + #define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) + #endif + ++#ifdef CONFIG_PAX_ASLR ++#define PAX_ELF_ET_DYN_BASE (test_thread_flag(TIF_32BIT_ADDR) ? 0x00400000UL : 0x00400000UL) ++ ++#define PAX_DELTA_MMAP_LEN (test_thread_flag(TIF_32BIT_ADDR) ? 27-PAGE_SHIFT : 36-PAGE_SHIFT) ++#define PAX_DELTA_STACK_LEN (test_thread_flag(TIF_32BIT_ADDR) ? 27-PAGE_SHIFT : 36-PAGE_SHIFT) ++#endif ++ + #endif /* _ASM_ELF_H */ +diff -urNp a/arch/mips/include/asm/kmap_types.h b/arch/mips/include/asm/kmap_types.h +--- a/arch/mips/include/asm/kmap_types.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/mips/include/asm/kmap_types.h 2009-05-24 18:10:24.938046367 -0700 +@@ -22,7 +22,8 @@ D(9) KM_IRQ0, + D(10) KM_IRQ1, + D(11) KM_SOFTIRQ0, + D(12) KM_SOFTIRQ1, +-D(13) KM_TYPE_NR ++D(13) KM_CLEARPAGE, ++D(14) KM_TYPE_NR + }; + + #undef D +diff -urNp a/arch/mips/include/asm/page.h b/arch/mips/include/asm/page.h +--- a/arch/mips/include/asm/page.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/mips/include/asm/page.h 2009-05-24 18:10:24.939072335 -0700 +@@ -82,7 +82,7 @@ extern void copy_user_highpage(struct pa + #ifdef CONFIG_CPU_MIPS32 + typedef struct { unsigned long pte_low, pte_high; } pte_t; + #define pte_val(x) ((x).pte_low | ((unsigned long long)(x).pte_high << 32)) +- #define __pte(x) ({ pte_t __pte = {(x), ((unsigned long long)(x)) >> 32}; __pte; }) ++ #define __pte(x) ({ pte_t __pte = {(x), (x) >> 32}; __pte; }) + #else + typedef struct { unsigned long long pte; } pte_t; + #define pte_val(x) ((x).pte) +diff -urNp a/arch/mips/include/asm/system.h b/arch/mips/include/asm/system.h +--- a/arch/mips/include/asm/system.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/mips/include/asm/system.h 2009-05-24 18:10:24.939072335 -0700 +@@ -217,6 +217,6 @@ extern void per_cpu_trap_init(void); + */ + #define __ARCH_WANT_UNLOCKED_CTXSW + +-extern unsigned long arch_align_stack(unsigned long sp); ++#define arch_align_stack(x) ((x) & ALMASK) + + #endif /* _ASM_SYSTEM_H */ +diff -urNp a/arch/mips/kernel/binfmt_elfn32.c b/arch/mips/kernel/binfmt_elfn32.c +--- a/arch/mips/kernel/binfmt_elfn32.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/mips/kernel/binfmt_elfn32.c 2009-05-24 18:10:24.940054585 -0700 +@@ -50,6 +50,13 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_N + #undef ELF_ET_DYN_BASE + #define ELF_ET_DYN_BASE (TASK32_SIZE / 3 * 2) + ++#ifdef CONFIG_PAX_ASLR ++#define PAX_ELF_ET_DYN_BASE (test_thread_flag(TIF_32BIT_ADDR) ? 0x00400000UL : 0x00400000UL) ++ ++#define PAX_DELTA_MMAP_LEN (test_thread_flag(TIF_32BIT_ADDR) ? 27-PAGE_SHIFT : 36-PAGE_SHIFT) ++#define PAX_DELTA_STACK_LEN (test_thread_flag(TIF_32BIT_ADDR) ? 27-PAGE_SHIFT : 36-PAGE_SHIFT) ++#endif ++ + #include <asm/processor.h> + #include <linux/module.h> + #include <linux/elfcore.h> +diff -urNp a/arch/mips/kernel/binfmt_elfo32.c b/arch/mips/kernel/binfmt_elfo32.c +--- a/arch/mips/kernel/binfmt_elfo32.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/mips/kernel/binfmt_elfo32.c 2009-05-24 18:10:24.940054585 -0700 +@@ -52,6 +52,13 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_N + #undef ELF_ET_DYN_BASE + #define ELF_ET_DYN_BASE (TASK32_SIZE / 3 * 2) + ++#ifdef CONFIG_PAX_ASLR ++#define PAX_ELF_ET_DYN_BASE (test_thread_flag(TIF_32BIT_ADDR) ? 0x00400000UL : 0x00400000UL) ++ ++#define PAX_DELTA_MMAP_LEN (test_thread_flag(TIF_32BIT_ADDR) ? 27-PAGE_SHIFT : 36-PAGE_SHIFT) ++#define PAX_DELTA_STACK_LEN (test_thread_flag(TIF_32BIT_ADDR) ? 27-PAGE_SHIFT : 36-PAGE_SHIFT) ++#endif ++ + #include <asm/processor.h> + #include <linux/module.h> + #include <linux/elfcore.h> +diff -urNp a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c +--- a/arch/mips/kernel/process.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/mips/kernel/process.c 2009-05-24 18:10:24.940054585 -0700 +@@ -457,15 +457,3 @@ unsigned long get_wchan(struct task_stru + out: + return pc; + } +- +-/* +- * Don't forget that the stack pointer must be aligned on a 8 bytes +- * boundary for 32-bits ABI and 16 bytes for 64-bits ABI. +- */ +-unsigned long arch_align_stack(unsigned long sp) +-{ +- if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) +- sp -= get_random_int() & ~PAGE_MASK; +- +- return sp & ALMASK; +-} +diff -urNp a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c +--- a/arch/mips/kernel/syscall.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/mips/kernel/syscall.c 2009-05-24 18:10:24.941110163 -0700 +@@ -99,6 +99,11 @@ unsigned long arch_get_unmapped_area(str + do_color_align = 0; + if (filp || (flags & MAP_SHARED)) + do_color_align = 1; ++ ++#ifdef CONFIG_PAX_RANDMMAP ++ if (!(current->mm->pax_flags & MF_PAX_RANDMMAP)) ++#endif ++ + if (addr) { + if (do_color_align) + addr = COLOUR_ALIGN(addr, pgoff); +@@ -109,7 +114,7 @@ unsigned long arch_get_unmapped_area(str + (!vmm || addr + len <= vmm->vm_start)) + return addr; + } +- addr = TASK_UNMAPPED_BASE; ++ addr = current->mm->mmap_base; + if (do_color_align) + addr = COLOUR_ALIGN(addr, pgoff); + else +diff -urNp a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c +--- a/arch/mips/mm/fault.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/mips/mm/fault.c 2009-05-24 18:10:24.941110163 -0700 +@@ -26,6 +26,23 @@ + #include <asm/ptrace.h> + #include <asm/highmem.h> /* For VMALLOC_END */ + ++#ifdef CONFIG_PAX_PAGEEXEC ++void pax_report_insns(void *pc) ++{ ++ unsigned long i; ++ ++ printk(KERN_ERR "PAX: bytes at PC: "); ++ for (i = 0; i < 5; i++) { ++ unsigned int c; ++ if (get_user(c, (unsigned int *)pc+i)) ++ printk(KERN_CONT "???????? "); ++ else ++ printk(KERN_CONT "%08x ", c); ++ } ++ printk("\n"); ++} ++#endif ++ + /* + * This routine handles page faults. It determines the address, + * and the problem, and then passes it off to one of the appropriate +diff -urNp a/arch/parisc/include/asm/elf.h b/arch/parisc/include/asm/elf.h +--- a/arch/parisc/include/asm/elf.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/parisc/include/asm/elf.h 2009-05-24 18:10:24.942032843 -0700 +@@ -333,6 +333,13 @@ struct pt_regs; /* forward declaration.. + + #define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x01000000) + ++#ifdef CONFIG_PAX_ASLR ++#define PAX_ELF_ET_DYN_BASE 0x10000UL ++ ++#define PAX_DELTA_MMAP_LEN 16 ++#define PAX_DELTA_STACK_LEN 16 ++#endif ++ + /* This yields a mask that user programs can use to figure out what + instruction set this CPU supports. This could be done in user space, + but it's not easy, and we've already done it here. */ +diff -urNp a/arch/parisc/include/asm/kmap_types.h b/arch/parisc/include/asm/kmap_types.h +--- a/arch/parisc/include/asm/kmap_types.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/parisc/include/asm/kmap_types.h 2009-05-24 18:10:24.942032843 -0700 +@@ -22,7 +22,8 @@ D(9) KM_IRQ0, + D(10) KM_IRQ1, + D(11) KM_SOFTIRQ0, + D(12) KM_SOFTIRQ1, +-D(13) KM_TYPE_NR ++D(13) KM_CLEARPAGE, ++D(14) KM_TYPE_NR + }; + + #undef D +diff -urNp a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h +--- a/arch/parisc/include/asm/pgtable.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/parisc/include/asm/pgtable.h 2009-05-24 18:10:24.943026476 -0700 +@@ -202,6 +202,17 @@ + #define PAGE_EXECREAD __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_EXEC |_PAGE_ACCESSED) + #define PAGE_COPY PAGE_EXECREAD + #define PAGE_RWX __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_WRITE | _PAGE_EXEC |_PAGE_ACCESSED) ++ ++#ifdef CONFIG_PAX_PAGEEXEC ++# define PAGE_SHARED_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_WRITE | _PAGE_ACCESSED) ++# define PAGE_COPY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_ACCESSED) ++# define PAGE_READONLY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_ACCESSED) ++#else ++# define PAGE_SHARED_NOEXEC PAGE_SHARED ++# define PAGE_COPY_NOEXEC PAGE_COPY ++# define PAGE_READONLY_NOEXEC PAGE_READONLY ++#endif ++ + #define PAGE_KERNEL __pgprot(_PAGE_KERNEL) + #define PAGE_KERNEL_RO __pgprot(_PAGE_KERNEL & ~_PAGE_WRITE) + #define PAGE_KERNEL_UNC __pgprot(_PAGE_KERNEL | _PAGE_NO_CACHE) +diff -urNp a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c +--- a/arch/parisc/kernel/module.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/parisc/kernel/module.c 2009-05-24 18:10:24.943026476 -0700 +@@ -75,16 +75,38 @@ + + /* three functions to determine where in the module core + * or init pieces the location is */ ++static inline int in_init_rx(struct module *me, void *loc) ++{ ++ return (loc >= me->module_init_rx && ++ loc < (me->module_init_rx + me->init_size_rx)); ++} ++ ++static inline int in_init_rw(struct module *me, void *loc) ++{ ++ return (loc >= me->module_init_rw && ++ loc < (me->module_init_rw + me->init_size_rw)); ++} ++ + static inline int in_init(struct module *me, void *loc) + { +- return (loc >= me->module_init && +- loc <= (me->module_init + me->init_size)); ++ return in_init_rx(me, loc) || in_init_rw(me, loc); ++} ++ ++static inline int in_core_rx(struct module *me, void *loc) ++{ ++ return (loc >= me->module_core_rx && ++ loc < (me->module_core_rx + me->core_size_rx)); ++} ++ ++static inline int in_core_rw(struct module *me, void *loc) ++{ ++ return (loc >= me->module_core_rw && ++ loc < (me->module_core_rw + me->core_size_rw)); + } + + static inline int in_core(struct module *me, void *loc) + { +- return (loc >= me->module_core && +- loc <= (me->module_core + me->core_size)); ++ return in_core_rx(me, loc) || in_core_rw(me, loc); + } + + static inline int in_local(struct module *me, void *loc) +@@ -298,21 +320,21 @@ int module_frob_arch_sections(CONST Elf_ + } + + /* align things a bit */ +- me->core_size = ALIGN(me->core_size, 16); +- me->arch.got_offset = me->core_size; +- me->core_size += gots * sizeof(struct got_entry); +- +- me->core_size = ALIGN(me->core_size, 16); +- me->arch.fdesc_offset = me->core_size; +- me->core_size += fdescs * sizeof(Elf_Fdesc); +- +- me->core_size = ALIGN(me->core_size, 16); +- me->arch.stub_offset = me->core_size; +- me->core_size += stubs * sizeof(struct stub_entry); +- +- me->init_size = ALIGN(me->init_size, 16); +- me->arch.init_stub_offset = me->init_size; +- me->init_size += init_stubs * sizeof(struct stub_entry); ++ me->core_size_rw = ALIGN(me->core_size_rw, 16); ++ me->arch.got_offset = me->core_size_rw; ++ me->core_size_rw += gots * sizeof(struct got_entry); ++ ++ me->core_size_rw = ALIGN(me->core_size_rw, 16); ++ me->arch.fdesc_offset = me->core_size_rw; ++ me->core_size_rw += fdescs * sizeof(Elf_Fdesc); ++ ++ me->core_size_rx = ALIGN(me->core_size_rx, 16); ++ me->arch.stub_offset = me->core_size_rx; ++ me->core_size_rx += stubs * sizeof(struct stub_entry); ++ ++ me->init_size_rx = ALIGN(me->init_size_rx, 16); ++ me->arch.init_stub_offset = me->init_size_rx; ++ me->init_size_rx += init_stubs * sizeof(struct stub_entry); + + me->arch.got_max = gots; + me->arch.fdesc_max = fdescs; +@@ -332,7 +354,7 @@ static Elf64_Word get_got(struct module + + BUG_ON(value == 0); + +- got = me->module_core + me->arch.got_offset; ++ got = me->module_core_rw + me->arch.got_offset; + for (i = 0; got[i].addr; i++) + if (got[i].addr == value) + goto out; +@@ -350,7 +372,7 @@ static Elf64_Word get_got(struct module + #ifdef CONFIG_64BIT + static Elf_Addr get_fdesc(struct module *me, unsigned long value) + { +- Elf_Fdesc *fdesc = me->module_core + me->arch.fdesc_offset; ++ Elf_Fdesc *fdesc = me->module_core_rw + me->arch.fdesc_offset; + + if (!value) { + printk(KERN_ERR "%s: zero OPD requested!\n", me->name); +@@ -368,7 +390,7 @@ static Elf_Addr get_fdesc(struct module + + /* Create new one */ + fdesc->addr = value; +- fdesc->gp = (Elf_Addr)me->module_core + me->arch.got_offset; ++ fdesc->gp = (Elf_Addr)me->module_core_rw + me->arch.got_offset; + return (Elf_Addr)fdesc; + } + #endif /* CONFIG_64BIT */ +@@ -388,12 +410,12 @@ static Elf_Addr get_stub(struct module * + if(init_section) { + i = me->arch.init_stub_count++; + BUG_ON(me->arch.init_stub_count > me->arch.init_stub_max); +- stub = me->module_init + me->arch.init_stub_offset + ++ stub = me->module_init_rx + me->arch.init_stub_offset + + i * sizeof(struct stub_entry); + } else { + i = me->arch.stub_count++; + BUG_ON(me->arch.stub_count > me->arch.stub_max); +- stub = me->module_core + me->arch.stub_offset + ++ stub = me->module_core_rx + me->arch.stub_offset + + i * sizeof(struct stub_entry); + } + +@@ -761,7 +783,7 @@ register_unwind_table(struct module *me, + + table = (unsigned char *)sechdrs[me->arch.unwind_section].sh_addr; + end = table + sechdrs[me->arch.unwind_section].sh_size; +- gp = (Elf_Addr)me->module_core + me->arch.got_offset; ++ gp = (Elf_Addr)me->module_core_rw + me->arch.got_offset; + + DEBUGP("register_unwind_table(), sect = %d at 0x%p - 0x%p (gp=0x%lx)\n", + me->arch.unwind_section, table, end, gp); +diff -urNp a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c +--- a/arch/parisc/kernel/sys_parisc.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/parisc/kernel/sys_parisc.c 2009-05-24 18:10:24.944131079 -0700 +@@ -98,7 +98,7 @@ unsigned long arch_get_unmapped_area(str + if (flags & MAP_FIXED) + return addr; + if (!addr) +- addr = TASK_UNMAPPED_BASE; ++ addr = current->mm->mmap_base; + + if (filp) { + addr = get_shared_area(filp->f_mapping, addr, len, pgoff); +diff -urNp a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c +--- a/arch/parisc/kernel/traps.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/parisc/kernel/traps.c 2009-05-24 18:10:24.944131079 -0700 +@@ -731,9 +731,7 @@ void handle_interruption(int code, struc + + down_read(¤t->mm->mmap_sem); + vma = find_vma(current->mm,regs->iaoq[0]); +- if (vma && (regs->iaoq[0] >= vma->vm_start) +- && (vma->vm_flags & VM_EXEC)) { +- ++ if (vma && (regs->iaoq[0] >= vma->vm_start)) { + fault_address = regs->iaoq[0]; + fault_space = regs->iasq[0]; + +diff -urNp a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c +--- a/arch/parisc/mm/fault.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/parisc/mm/fault.c 2009-05-24 18:10:24.945179884 -0700 +@@ -16,6 +16,7 @@ + #include <linux/sched.h> + #include <linux/interrupt.h> + #include <linux/module.h> ++#include <linux/unistd.h> + + #include <asm/uaccess.h> + #include <asm/traps.h> +@@ -53,7 +54,7 @@ DEFINE_PER_CPU(struct exception_data, ex + static unsigned long + parisc_acctyp(unsigned long code, unsigned int inst) + { +- if (code == 6 || code == 16) ++ if (code == 6 || code == 7 || code == 16) + return VM_EXEC; + + switch (inst & 0xf0000000) { +@@ -139,6 +140,116 @@ parisc_acctyp(unsigned long code, unsign + } + #endif + ++#ifdef CONFIG_PAX_PAGEEXEC ++/* ++ * PaX: decide what to do with offenders (instruction_pointer(regs) = fault address) ++ * ++ * returns 1 when task should be killed ++ * 2 when rt_sigreturn trampoline was detected ++ * 3 when unpatched PLT trampoline was detected ++ */ ++static int pax_handle_fetch_fault(struct pt_regs *regs) ++{ ++ ++#ifdef CONFIG_PAX_EMUPLT ++ int err; ++ ++ do { /* PaX: unpatched PLT emulation */ ++ unsigned int bl, depwi; ++ ++ err = get_user(bl, (unsigned int *)instruction_pointer(regs)); ++ err |= get_user(depwi, (unsigned int *)(instruction_pointer(regs)+4)); ++ ++ if (err) ++ break; ++ ++ if (bl == 0xEA9F1FDDU && depwi == 0xD6801C1EU) { ++ unsigned int ldw, bv, ldw2, addr = instruction_pointer(regs)-12; ++ ++ err = get_user(ldw, (unsigned int *)addr); ++ err |= get_user(bv, (unsigned int *)(addr+4)); ++ err |= get_user(ldw2, (unsigned int *)(addr+8)); ++ ++ if (err) ++ break; ++ ++ if (ldw == 0x0E801096U && ++ bv == 0xEAC0C000U && ++ ldw2 == 0x0E881095U) ++ { ++ unsigned int resolver, map; ++ ++ err = get_user(resolver, (unsigned int *)(instruction_pointer(regs)+8)); ++ err |= get_user(map, (unsigned int *)(instruction_pointer(regs)+12)); ++ if (err) ++ break; ++ ++ regs->gr[20] = instruction_pointer(regs)+8; ++ regs->gr[21] = map; ++ regs->gr[22] = resolver; ++ regs->iaoq[0] = resolver | 3UL; ++ regs->iaoq[1] = regs->iaoq[0] + 4; ++ return 3; ++ } ++ } ++ } while (0); ++#endif ++ ++#ifdef CONFIG_PAX_EMUTRAMP ++ ++#ifndef CONFIG_PAX_EMUSIGRT ++ if (!(current->mm->pax_flags & MF_PAX_EMUTRAMP)) ++ return 1; ++#endif ++ ++ do { /* PaX: rt_sigreturn emulation */ ++ unsigned int ldi1, ldi2, bel, nop; ++ ++ err = get_user(ldi1, (unsigned int *)instruction_pointer(regs)); ++ err |= get_user(ldi2, (unsigned int *)(instruction_pointer(regs)+4)); ++ err |= get_user(bel, (unsigned int *)(instruction_pointer(regs)+8)); ++ err |= get_user(nop, (unsigned int *)(instruction_pointer(regs)+12)); ++ ++ if (err) ++ break; ++ ++ if ((ldi1 == 0x34190000U || ldi1 == 0x34190002U) && ++ ldi2 == 0x3414015AU && ++ bel == 0xE4008200U && ++ nop == 0x08000240U) ++ { ++ regs->gr[25] = (ldi1 & 2) >> 1; ++ regs->gr[20] = __NR_rt_sigreturn; ++ regs->gr[31] = regs->iaoq[1] + 16; ++ regs->sr[0] = regs->iasq[1]; ++ regs->iaoq[0] = 0x100UL; ++ regs->iaoq[1] = regs->iaoq[0] + 4; ++ regs->iasq[0] = regs->sr[2]; ++ regs->iasq[1] = regs->sr[2]; ++ return 2; ++ } ++ } while (0); ++#endif ++ ++ return 1; ++} ++ ++void pax_report_insns(void *pc, void *sp) ++{ ++ unsigned long i; ++ ++ printk(KERN_ERR "PAX: bytes at PC: "); ++ for (i = 0; i < 5; i++) { ++ unsigned int c; ++ if (get_user(c, (unsigned int *)pc+i)) ++ printk(KERN_CONT "???????? "); ++ else ++ printk(KERN_CONT "%08x ", c); ++ } ++ printk("\n"); ++} ++#endif ++ + void do_page_fault(struct pt_regs *regs, unsigned long code, + unsigned long address) + { +@@ -165,8 +276,33 @@ good_area: + + acc_type = parisc_acctyp(code,regs->iir); + +- if ((vma->vm_flags & acc_type) != acc_type) ++ if ((vma->vm_flags & acc_type) != acc_type) { ++ ++#ifdef CONFIG_PAX_PAGEEXEC ++ if ((mm->pax_flags & MF_PAX_PAGEEXEC) && (acc_type & VM_EXEC) && ++ (address & ~3UL) == instruction_pointer(regs)) ++ { ++ up_read(&mm->mmap_sem); ++ switch (pax_handle_fetch_fault(regs)) { ++ ++#ifdef CONFIG_PAX_EMUPLT ++ case 3: ++ return; ++#endif ++ ++#ifdef CONFIG_PAX_EMUTRAMP ++ case 2: ++ return; ++#endif ++ ++ } ++ pax_report_fault(regs, (void *)instruction_pointer(regs), (void *)regs->gr[30]); ++ do_group_exit(SIGKILL); ++ } ++#endif ++ + goto bad_area; ++ } + + /* + * If for any reason at all we couldn't handle the fault, make +diff -urNp a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h +--- a/arch/powerpc/include/asm/elf.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/powerpc/include/asm/elf.h 2009-05-24 18:10:24.946058008 -0700 +@@ -180,6 +180,18 @@ typedef elf_fpreg_t elf_vsrreghalf_t32[E + + #define ELF_ET_DYN_BASE (0x20000000) + ++#ifdef CONFIG_PAX_ASLR ++#define PAX_ELF_ET_DYN_BASE (0x10000000UL) ++ ++#ifdef __powerpc64__ ++#define PAX_DELTA_MMAP_LEN (test_thread_flag(TIF_32BIT) ? 16 : 28) ++#define PAX_DELTA_STACK_LEN (test_thread_flag(TIF_32BIT) ? 16 : 28) ++#else ++#define PAX_DELTA_MMAP_LEN 15 ++#define PAX_DELTA_STACK_LEN 15 ++#endif ++#endif ++ + /* + * Our registers are always unsigned longs, whether we're a 32 bit + * process or 64 bit, on either a 64 bit or 32 bit kernel. +diff -urNp a/arch/powerpc/include/asm/kmap_types.h b/arch/powerpc/include/asm/kmap_types.h +--- a/arch/powerpc/include/asm/kmap_types.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/powerpc/include/asm/kmap_types.h 2009-05-24 18:10:24.946058008 -0700 +@@ -26,6 +26,7 @@ enum km_type { + KM_SOFTIRQ1, + KM_PPC_SYNC_PAGE, + KM_PPC_SYNC_ICACHE, ++ KM_CLEARPAGE, + KM_TYPE_NR + }; + +diff -urNp a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h +--- a/arch/powerpc/include/asm/page.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/powerpc/include/asm/page.h 2009-05-24 18:10:24.947061558 -0700 +@@ -111,8 +111,9 @@ extern phys_addr_t kernstart_addr; + * and needs to be executable. This means the whole heap ends + * up being executable. + */ +-#define VM_DATA_DEFAULT_FLAGS32 (VM_READ | VM_WRITE | VM_EXEC | \ +- VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) ++#define VM_DATA_DEFAULT_FLAGS32 \ ++ (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \ ++ VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) + + #define VM_DATA_DEFAULT_FLAGS64 (VM_READ | VM_WRITE | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +diff -urNp a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h +--- a/arch/powerpc/include/asm/page_64.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/powerpc/include/asm/page_64.h 2009-05-24 18:10:24.947061558 -0700 +@@ -170,15 +170,18 @@ do { \ + * stack by default, so in the absense of a PT_GNU_STACK program header + * we turn execute permission off. + */ +-#define VM_STACK_DEFAULT_FLAGS32 (VM_READ | VM_WRITE | VM_EXEC | \ +- VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) ++#define VM_STACK_DEFAULT_FLAGS32 \ ++ (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \ ++ VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) + + #define VM_STACK_DEFAULT_FLAGS64 (VM_READ | VM_WRITE | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) + ++#ifndef CONFIG_PAX_PAGEEXEC + #define VM_STACK_DEFAULT_FLAGS \ + (test_thread_flag(TIF_32BIT) ? \ + VM_STACK_DEFAULT_FLAGS32 : VM_STACK_DEFAULT_FLAGS64) ++#endif + + #include <asm-generic/page.h> + +diff -urNp a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c +--- a/arch/powerpc/kernel/module_32.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/powerpc/kernel/module_32.c 2009-05-24 18:10:24.947061558 -0700 +@@ -158,7 +158,7 @@ int module_frob_arch_sections(Elf32_Ehdr + me->arch.core_plt_section = i; + } + if (!me->arch.core_plt_section || !me->arch.init_plt_section) { +- printk("Module doesn't contain .plt or .init.plt sections.\n"); ++ printk("Module %s doesn't contain .plt or .init.plt sections.\n", me->name); + return -ENOEXEC; + } + +@@ -199,11 +199,16 @@ static uint32_t do_plt_call(void *locati + + DEBUGP("Doing plt for call to 0x%x at 0x%x\n", val, (unsigned int)location); + /* Init, or core PLT? */ +- if (location >= mod->module_core +- && location < mod->module_core + mod->core_size) ++ if ((location >= mod->module_core_rx && location < mod->module_core_rx + mod->core_size_rx) || ++ (location >= mod->module_core_rw && location < mod->module_core_rw + mod->core_size_rw)) + entry = (void *)sechdrs[mod->arch.core_plt_section].sh_addr; +- else ++ else if ((location >= mod->module_init_rx && location < mod->module_init_rx + mod->init_size_rx) || ++ (location >= mod->module_init_rw && location < mod->module_init_rw + mod->init_size_rw)) + entry = (void *)sechdrs[mod->arch.init_plt_section].sh_addr; ++ else { ++ printk(KERN_ERR "%s: invalid R_PPC_REL24 entry found\n", mod->name); ++ return ~0UL; ++ } + + /* Find this entry, or if that fails, the next avail. entry */ + while (entry->jump[0]) { +diff -urNp a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c +--- a/arch/powerpc/kernel/signal_32.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/powerpc/kernel/signal_32.c 2009-05-24 18:10:24.948171119 -0700 +@@ -857,7 +857,7 @@ int handle_rt_signal32(unsigned long sig + /* Save user registers on the stack */ + frame = &rt_sf->uc.uc_mcontext; + addr = frame; +- if (vdso32_rt_sigtramp && current->mm->context.vdso_base) { ++ if (vdso32_rt_sigtramp && current->mm->context.vdso_base != ~0UL) { + if (save_user_regs(regs, frame, 0, 1)) + goto badframe; + regs->link = current->mm->context.vdso_base + vdso32_rt_sigtramp; +diff -urNp a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c +--- a/arch/powerpc/kernel/signal_64.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/powerpc/kernel/signal_64.c 2009-05-24 18:10:24.949149529 -0700 +@@ -429,7 +429,7 @@ int handle_rt_signal64(int signr, struct + current->thread.fpscr.val = 0; + + /* Set up to return from userspace. */ +- if (vdso64_rt_sigtramp && current->mm->context.vdso_base) { ++ if (vdso64_rt_sigtramp && current->mm->context.vdso_base != ~0UL) { + regs->link = current->mm->context.vdso_base + vdso64_rt_sigtramp; + } else { + err |= setup_trampoline(__NR_rt_sigreturn, &frame->tramp[0]); +diff -urNp a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c +--- a/arch/powerpc/kernel/vdso.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/powerpc/kernel/vdso.c 2009-05-24 18:10:24.949149529 -0700 +@@ -212,7 +212,7 @@ int arch_setup_additional_pages(struct l + vdso_base = VDSO32_MBASE; + #endif + +- current->mm->context.vdso_base = 0; ++ current->mm->context.vdso_base = ~0UL; + + /* vDSO has a problem and was disabled, just don't "enable" it for the + * process +@@ -229,7 +229,7 @@ int arch_setup_additional_pages(struct l + */ + down_write(&mm->mmap_sem); + vdso_base = get_unmapped_area(NULL, vdso_base, +- vdso_pages << PAGE_SHIFT, 0, 0); ++ vdso_pages << PAGE_SHIFT, 0, MAP_PRIVATE | MAP_EXECUTABLE); + if (IS_ERR_VALUE(vdso_base)) { + rc = vdso_base; + goto fail_mmapsem; +diff -urNp a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c +--- a/arch/powerpc/mm/fault.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/powerpc/mm/fault.c 2009-05-24 18:10:24.951010321 -0700 +@@ -29,6 +29,10 @@ + #include <linux/module.h> + #include <linux/kprobes.h> + #include <linux/kdebug.h> ++#include <linux/slab.h> ++#include <linux/pagemap.h> ++#include <linux/compiler.h> ++#include <linux/unistd.h> + + #include <asm/page.h> + #include <asm/pgtable.h> +@@ -62,6 +66,363 @@ static inline int notify_page_fault(stru + } + #endif + ++#ifdef CONFIG_PAX_EMUSIGRT ++void pax_syscall_close(struct vm_area_struct *vma) ++{ ++ vma->vm_mm->call_syscall = 0UL; ++} ++ ++static int pax_syscall_fault(struct vm_area_struct *vma, struct vm_fault *vmf) ++{ ++ unsigned int *kaddr; ++ ++ vmf->page = alloc_page(GFP_HIGHUSER); ++ if (!vmf->page) ++ return VM_FAULT_OOM; ++ ++ kaddr = kmap(vmf->page); ++ memset(kaddr, 0, PAGE_SIZE); ++ kaddr[0] = 0x44000002U; /* sc */ ++ __flush_dcache_icache(kaddr); ++ kunmap(vmf->page); ++ return VM_FAULT_MAJOR; ++} ++ ++static struct vm_operations_struct pax_vm_ops = { ++ .close = pax_syscall_close, ++ .fault = pax_syscall_fault ++}; ++ ++static int pax_insert_vma(struct vm_area_struct *vma, unsigned long addr) ++{ ++ int ret; ++ ++ vma->vm_mm = current->mm; ++ vma->vm_start = addr; ++ vma->vm_end = addr + PAGE_SIZE; ++ vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC; ++ vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); ++ vma->vm_ops = &pax_vm_ops; ++ ++ ret = insert_vm_struct(current->mm, vma); ++ if (ret) ++ return ret; ++ ++ ++current->mm->total_vm; ++ return 0; ++} ++#endif ++ ++#ifdef CONFIG_PAX_PAGEEXEC ++/* ++ * PaX: decide what to do with offenders (regs->nip = fault address) ++ * ++ * returns 1 when task should be killed ++ * 2 when patched GOT trampoline was detected ++ * 3 when patched PLT trampoline was detected ++ * 4 when unpatched PLT trampoline was detected ++ * 5 when sigreturn trampoline was detected ++ * 6 when rt_sigreturn trampoline was detected ++ */ ++static int pax_handle_fetch_fault(struct pt_regs *regs) ++{ ++ ++#if defined(CONFIG_PAX_EMUPLT) || defined(CONFIG_PAX_EMUSIGRT) ++ int err; ++#endif ++ ++#ifdef CONFIG_PAX_EMUPLT ++ do { /* PaX: patched GOT emulation */ ++ unsigned int blrl; ++ ++ err = get_user(blrl, (unsigned int *)regs->nip); ++ ++ if (!err && blrl == 0x4E800021U) { ++ unsigned long temp = regs->nip; ++ ++ regs->nip = regs->link & 0xFFFFFFFCUL; ++ regs->link = temp + 4UL; ++ return 2; ++ } ++ } while (0); ++ ++ do { /* PaX: patched PLT emulation #1 */ ++ unsigned int b; ++ ++ err = get_user(b, (unsigned int *)regs->nip); ++ ++ if (!err && (b & 0xFC000003U) == 0x48000000U) { ++ regs->nip += (((b | 0xFC000000UL) ^ 0x02000000UL) + 0x02000000UL); ++ return 3; ++ } ++ } while (0); ++ ++ do { /* PaX: unpatched PLT emulation #1 */ ++ unsigned int li, b; ++ ++ err = get_user(li, (unsigned int *)regs->nip); ++ err |= get_user(b, (unsigned int *)(regs->nip+4)); ++ ++ if (!err && (li & 0xFFFF0000U) == 0x39600000U && (b & 0xFC000003U) == 0x48000000U) { ++ unsigned int rlwinm, add, li2, addis2, mtctr, li3, addis3, bctr; ++ unsigned long addr = b | 0xFC000000UL; ++ ++ addr = regs->nip + 4 + ((addr ^ 0x02000000UL) + 0x02000000UL); ++ err = get_user(rlwinm, (unsigned int *)addr); ++ err |= get_user(add, (unsigned int *)(addr+4)); ++ err |= get_user(li2, (unsigned int *)(addr+8)); ++ err |= get_user(addis2, (unsigned int *)(addr+12)); ++ err |= get_user(mtctr, (unsigned int *)(addr+16)); ++ err |= get_user(li3, (unsigned int *)(addr+20)); ++ err |= get_user(addis3, (unsigned int *)(addr+24)); ++ err |= get_user(bctr, (unsigned int *)(addr+28)); ++ ++ if (err) ++ break; ++ ++ if (rlwinm == 0x556C083CU && ++ add == 0x7D6C5A14U && ++ (li2 & 0xFFFF0000U) == 0x39800000U && ++ (addis2 & 0xFFFF0000U) == 0x3D8C0000U && ++ mtctr == 0x7D8903A6U && ++ (li3 & 0xFFFF0000U) == 0x39800000U && ++ (addis3 & 0xFFFF0000U) == 0x3D8C0000U && ++ bctr == 0x4E800420U) ++ { ++ regs->gpr[PT_R11] = 3 * (((li | 0xFFFF0000UL) ^ 0x00008000UL) + 0x00008000UL); ++ regs->gpr[PT_R12] = (((li3 | 0xFFFF0000UL) ^ 0x00008000UL) + 0x00008000UL); ++ regs->gpr[PT_R12] += (addis3 & 0xFFFFU) << 16; ++ regs->ctr = (((li2 | 0xFFFF0000UL) ^ 0x00008000UL) + 0x00008000UL); ++ regs->ctr += (addis2 & 0xFFFFU) << 16; ++ regs->nip = regs->ctr; ++ return 4; ++ } ++ } ++ } while (0); ++ ++#if 0 ++ do { /* PaX: unpatched PLT emulation #2 */ ++ unsigned int lis, lwzu, b, bctr; ++ ++ err = get_user(lis, (unsigned int *)regs->nip); ++ err |= get_user(lwzu, (unsigned int *)(regs->nip+4)); ++ err |= get_user(b, (unsigned int *)(regs->nip+8)); ++ err |= get_user(bctr, (unsigned int *)(regs->nip+12)); ++ ++ if (err) ++ break; ++ ++ if ((lis & 0xFFFF0000U) == 0x39600000U && ++ (lwzu & 0xU) == 0xU && ++ (b & 0xFC000003U) == 0x48000000U && ++ bctr == 0x4E800420U) ++ { ++ unsigned int addis, addi, rlwinm, add, li2, addis2, mtctr, li3, addis3, bctr; ++ unsigned long addr = b | 0xFC000000UL; ++ ++ addr = regs->nip + 12 + ((addr ^ 0x02000000UL) + 0x02000000UL); ++ err = get_user(addis, (unsigned int *)addr); ++ err |= get_user(addi, (unsigned int *)(addr+4)); ++ err |= get_user(rlwinm, (unsigned int *)(addr+8)); ++ err |= get_user(add, (unsigned int *)(addr+12)); ++ err |= get_user(li2, (unsigned int *)(addr+16)); ++ err |= get_user(addis2, (unsigned int *)(addr+20)); ++ err |= get_user(mtctr, (unsigned int *)(addr+24)); ++ err |= get_user(li3, (unsigned int *)(addr+28)); ++ err |= get_user(addis3, (unsigned int *)(addr+32)); ++ err |= get_user(bctr, (unsigned int *)(addr+36)); ++ ++ if (err) ++ break; ++ ++ if ((addis & 0xFFFF0000U) == 0x3D6B0000U && ++ (addi & 0xFFFF0000U) == 0x396B0000U && ++ rlwinm == 0x556C083CU && ++ add == 0x7D6C5A14U && ++ (li2 & 0xFFFF0000U) == 0x39800000U && ++ (addis2 & 0xFFFF0000U) == 0x3D8C0000U && ++ mtctr == 0x7D8903A6U && ++ (li3 & 0xFFFF0000U) == 0x39800000U && ++ (addis3 & 0xFFFF0000U) == 0x3D8C0000U && ++ bctr == 0x4E800420U) ++ { ++ regs->gpr[PT_R11] = 3 * (((li | 0xFFFF0000UL) ^ 0x00008000UL) + 0x00008000UL); ++ regs->gpr[PT_R12] = (((li3 | 0xFFFF0000UL) ^ 0x00008000UL) + 0x00008000UL); ++ regs->gpr[PT_R12] += (addis3 & 0xFFFFU) << 16; ++ regs->ctr = (((li2 | 0xFFFF0000UL) ^ 0x00008000UL) + 0x00008000UL); ++ regs->ctr += (addis2 & 0xFFFFU) << 16; ++ regs->nip = regs->ctr; ++ return 4; ++ } ++ } ++ } while (0); ++#endif ++ ++ do { /* PaX: unpatched PLT emulation #3 */ ++ unsigned int li, b; ++ ++ err = get_user(li, (unsigned int *)regs->nip); ++ err |= get_user(b, (unsigned int *)(regs->nip+4)); ++ ++ if (!err && (li & 0xFFFF0000U) == 0x39600000U && (b & 0xFC000003U) == 0x48000000U) { ++ unsigned int addis, lwz, mtctr, bctr; ++ unsigned long addr = b | 0xFC000000UL; ++ ++ addr = regs->nip + 4 + ((addr ^ 0x02000000UL) + 0x02000000UL); ++ err = get_user(addis, (unsigned int *)addr); ++ err |= get_user(lwz, (unsigned int *)(addr+4)); ++ err |= get_user(mtctr, (unsigned int *)(addr+8)); ++ err |= get_user(bctr, (unsigned int *)(addr+12)); ++ ++ if (err) ++ break; ++ ++ if ((addis & 0xFFFF0000U) == 0x3D6B0000U && ++ (lwz & 0xFFFF0000U) == 0x816B0000U && ++ mtctr == 0x7D6903A6U && ++ bctr == 0x4E800420U) ++ { ++ unsigned int r11; ++ ++ addr = (addis << 16) + (((li | 0xFFFF0000UL) ^ 0x00008000UL) + 0x00008000UL); ++ addr += (((lwz | 0xFFFF0000UL) ^ 0x00008000UL) + 0x00008000UL); ++ ++ err = get_user(r11, (unsigned int *)addr); ++ if (err) ++ break; ++ ++ regs->gpr[PT_R11] = r11; ++ regs->ctr = r11; ++ regs->nip = r11; ++ return 4; ++ } ++ } ++ } while (0); ++#endif ++ ++#ifdef CONFIG_PAX_EMUSIGRT ++ do { /* PaX: sigreturn emulation */ ++ unsigned int li, sc; ++ ++ err = get_user(li, (unsigned int *)regs->nip); ++ err |= get_user(sc, (unsigned int *)(regs->nip+4)); ++ ++ if (!err && li == 0x38000000U + __NR_sigreturn && sc == 0x44000002U) { ++ struct vm_area_struct *vma; ++ unsigned long call_syscall; ++ ++ down_read(¤t->mm->mmap_sem); ++ call_syscall = current->mm->call_syscall; ++ up_read(¤t->mm->mmap_sem); ++ if (likely(call_syscall)) ++ goto emulate; ++ ++ vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); ++ ++ down_write(¤t->mm->mmap_sem); ++ if (current->mm->call_syscall) { ++ call_syscall = current->mm->call_syscall; ++ up_write(¤t->mm->mmap_sem); ++ if (vma) ++ kmem_cache_free(vm_area_cachep, vma); ++ goto emulate; ++ } ++ ++ call_syscall = get_unmapped_area(NULL, 0UL, PAGE_SIZE, 0UL, MAP_PRIVATE); ++ if (!vma || (call_syscall & ~PAGE_MASK)) { ++ up_write(¤t->mm->mmap_sem); ++ if (vma) ++ kmem_cache_free(vm_area_cachep, vma); ++ return 1; ++ } ++ ++ if (pax_insert_vma(vma, call_syscall)) { ++ up_write(¤t->mm->mmap_sem); ++ kmem_cache_free(vm_area_cachep, vma); ++ return 1; ++ } ++ ++ current->mm->call_syscall = call_syscall; ++ up_write(¤t->mm->mmap_sem); ++ ++emulate: ++ regs->gpr[PT_R0] = __NR_sigreturn; ++ regs->nip = call_syscall; ++ return 5; ++ } ++ } while (0); ++ ++ do { /* PaX: rt_sigreturn emulation */ ++ unsigned int li, sc; ++ ++ err = get_user(li, (unsigned int *)regs->nip); ++ err |= get_user(sc, (unsigned int *)(regs->nip+4)); ++ ++ if (!err && li == 0x38000000U + __NR_rt_sigreturn && sc == 0x44000002U) { ++ struct vm_area_struct *vma; ++ unsigned int call_syscall; ++ ++ down_read(¤t->mm->mmap_sem); ++ call_syscall = current->mm->call_syscall; ++ up_read(¤t->mm->mmap_sem); ++ if (likely(call_syscall)) ++ goto rt_emulate; ++ ++ vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); ++ ++ down_write(¤t->mm->mmap_sem); ++ if (current->mm->call_syscall) { ++ call_syscall = current->mm->call_syscall; ++ up_write(¤t->mm->mmap_sem); ++ if (vma) ++ kmem_cache_free(vm_area_cachep, vma); ++ goto rt_emulate; ++ } ++ ++ call_syscall = get_unmapped_area(NULL, 0UL, PAGE_SIZE, 0UL, MAP_PRIVATE); ++ if (!vma || (call_syscall & ~PAGE_MASK)) { ++ up_write(¤t->mm->mmap_sem); ++ if (vma) ++ kmem_cache_free(vm_area_cachep, vma); ++ return 1; ++ } ++ ++ if (pax_insert_vma(vma, call_syscall)) { ++ up_write(¤t->mm->mmap_sem); ++ kmem_cache_free(vm_area_cachep, vma); ++ return 1; ++ } ++ ++ current->mm->call_syscall = call_syscall; ++ up_write(¤t->mm->mmap_sem); ++ ++rt_emulate: ++ regs->gpr[PT_R0] = __NR_rt_sigreturn; ++ regs->nip = call_syscall; ++ return 6; ++ } ++ } while (0); ++#endif ++ ++ return 1; ++} ++ ++void pax_report_insns(void *pc, void *sp) ++{ ++ unsigned long i; ++ ++ printk(KERN_ERR "PAX: bytes at PC: "); ++ for (i = 0; i < 5; i++) { ++ unsigned int c; ++ if (get_user(c, (unsigned int *)pc+i)) ++ printk(KERN_CONT "???????? "); ++ else ++ printk(KERN_CONT "%08x ", c); ++ } ++ printk("\n"); ++} ++#endif ++ + /* + * Check whether the instruction at regs->nip is a store using + * an update addressing form which will update r1. +@@ -132,7 +493,7 @@ int __kprobes do_page_fault(struct pt_re + * indicate errors in DSISR but can validly be set in SRR1. + */ + if (trap == 0x400) +- error_code &= 0x48200000; ++ error_code &= 0x58200000; + else + is_write = error_code & DSISR_ISSTORE; + #else +@@ -331,6 +692,37 @@ bad_area: + bad_area_nosemaphore: + /* User mode accesses cause a SIGSEGV */ + if (user_mode(regs)) { ++ ++#ifdef CONFIG_PAX_PAGEEXEC ++ if (mm->pax_flags & MF_PAX_PAGEEXEC) { ++#ifdef CONFIG_PPC64 ++ if (is_exec && (error_code & DSISR_PROTFAULT)) { ++#else ++ if (is_exec && regs->nip == address) { ++#endif ++ switch (pax_handle_fetch_fault(regs)) { ++ ++#ifdef CONFIG_PAX_EMUPLT ++ case 2: ++ case 3: ++ case 4: ++ return 0; ++#endif ++ ++#ifdef CONFIG_PAX_EMUSIGRT ++ case 5: ++ case 6: ++ return 0; ++#endif ++ ++ } ++ ++ pax_report_fault(regs, (void *)regs->nip, (void *)regs->gpr[PT_R1]); ++ do_group_exit(SIGKILL); ++ } ++ } ++#endif ++ + _exception(SIGSEGV, regs, code, address); + return 0; + } +diff -urNp a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c +--- a/arch/powerpc/mm/mmap.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/powerpc/mm/mmap.c 2009-05-24 18:10:24.952057659 -0700 +@@ -75,10 +75,22 @@ void arch_pick_mmap_layout(struct mm_str + */ + if (mmap_is_legacy()) { + mm->mmap_base = TASK_UNMAPPED_BASE; ++ ++#ifdef CONFIG_PAX_RANDMMAP ++ if (mm->pax_flags & MF_PAX_RANDMMAP) ++ mm->mmap_base += mm->delta_mmap; ++#endif ++ + mm->get_unmapped_area = arch_get_unmapped_area; + mm->unmap_area = arch_unmap_area; + } else { + mm->mmap_base = mmap_base(); ++ ++#ifdef CONFIG_PAX_RANDMMAP ++ if (mm->pax_flags & MF_PAX_RANDMMAP) ++ mm->mmap_base -= mm->delta_mmap + mm->delta_stack; ++#endif ++ + mm->get_unmapped_area = arch_get_unmapped_area_topdown; + mm->unmap_area = arch_unmap_area_topdown; + } +diff -urNp a/arch/s390/include/asm/kmap_types.h b/arch/s390/include/asm/kmap_types.h +--- a/arch/s390/include/asm/kmap_types.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/s390/include/asm/kmap_types.h 2009-05-24 18:10:24.952057659 -0700 +@@ -16,6 +16,7 @@ enum km_type { + KM_IRQ1, + KM_SOFTIRQ0, + KM_SOFTIRQ1, ++ KM_CLEARPAGE, + KM_TYPE_NR + }; + +diff -urNp a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c +--- a/arch/s390/kernel/module.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/s390/kernel/module.c 2009-05-24 18:10:24.952057659 -0700 +@@ -166,11 +166,11 @@ module_frob_arch_sections(Elf_Ehdr *hdr, + + /* Increase core size by size of got & plt and set start + offsets for got and plt. */ +- me->core_size = ALIGN(me->core_size, 4); +- me->arch.got_offset = me->core_size; +- me->core_size += me->arch.got_size; +- me->arch.plt_offset = me->core_size; +- me->core_size += me->arch.plt_size; ++ me->core_size_rw = ALIGN(me->core_size_rw, 4); ++ me->arch.got_offset = me->core_size_rw; ++ me->core_size_rw += me->arch.got_size; ++ me->arch.plt_offset = me->core_size_rx; ++ me->core_size_rx += me->arch.plt_size; + return 0; + } + +@@ -256,7 +256,7 @@ apply_rela(Elf_Rela *rela, Elf_Addr base + if (info->got_initialized == 0) { + Elf_Addr *gotent; + +- gotent = me->module_core + me->arch.got_offset + ++ gotent = me->module_core_rw + me->arch.got_offset + + info->got_offset; + *gotent = val; + info->got_initialized = 1; +@@ -280,7 +280,7 @@ apply_rela(Elf_Rela *rela, Elf_Addr base + else if (r_type == R_390_GOTENT || + r_type == R_390_GOTPLTENT) + *(unsigned int *) loc = +- (val + (Elf_Addr) me->module_core - loc) >> 1; ++ (val + (Elf_Addr) me->module_core_rw - loc) >> 1; + else if (r_type == R_390_GOT64 || + r_type == R_390_GOTPLT64) + *(unsigned long *) loc = val; +@@ -294,7 +294,7 @@ apply_rela(Elf_Rela *rela, Elf_Addr base + case R_390_PLTOFF64: /* 16 bit offset from GOT to PLT. */ + if (info->plt_initialized == 0) { + unsigned int *ip; +- ip = me->module_core + me->arch.plt_offset + ++ ip = me->module_core_rx + me->arch.plt_offset + + info->plt_offset; + #ifndef CONFIG_64BIT + ip[0] = 0x0d105810; /* basr 1,0; l 1,6(1); br 1 */ +@@ -316,7 +316,7 @@ apply_rela(Elf_Rela *rela, Elf_Addr base + val = me->arch.plt_offset - me->arch.got_offset + + info->plt_offset + rela->r_addend; + else +- val = (Elf_Addr) me->module_core + ++ val = (Elf_Addr) me->module_core_rx + + me->arch.plt_offset + info->plt_offset + + rela->r_addend - loc; + if (r_type == R_390_PLT16DBL) +@@ -336,7 +336,7 @@ apply_rela(Elf_Rela *rela, Elf_Addr base + case R_390_GOTOFF32: /* 32 bit offset to GOT. */ + case R_390_GOTOFF64: /* 64 bit offset to GOT. */ + val = val + rela->r_addend - +- ((Elf_Addr) me->module_core + me->arch.got_offset); ++ ((Elf_Addr) me->module_core_rw + me->arch.got_offset); + if (r_type == R_390_GOTOFF16) + *(unsigned short *) loc = val; + else if (r_type == R_390_GOTOFF32) +@@ -346,7 +346,7 @@ apply_rela(Elf_Rela *rela, Elf_Addr base + break; + case R_390_GOTPC: /* 32 bit PC relative offset to GOT. */ + case R_390_GOTPCDBL: /* 32 bit PC rel. off. to GOT shifted by 1. */ +- val = (Elf_Addr) me->module_core + me->arch.got_offset + ++ val = (Elf_Addr) me->module_core_rw + me->arch.got_offset + + rela->r_addend - loc; + if (r_type == R_390_GOTPC) + *(unsigned int *) loc = val; +diff -urNp a/arch/sh/include/asm/kmap_types.h b/arch/sh/include/asm/kmap_types.h +--- a/arch/sh/include/asm/kmap_types.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/sh/include/asm/kmap_types.h 2009-05-24 18:10:24.953159050 -0700 +@@ -24,7 +24,8 @@ D(9) KM_IRQ0, + D(10) KM_IRQ1, + D(11) KM_SOFTIRQ0, + D(12) KM_SOFTIRQ1, +-D(13) KM_TYPE_NR ++D(13) KM_CLEARPAGE, ++D(14) KM_TYPE_NR + }; + + #undef D +diff -urNp a/arch/sparc/Makefile b/arch/sparc/Makefile +--- a/arch/sparc/Makefile 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/sparc/Makefile 2009-05-24 18:10:24.954209670 -0700 +@@ -37,7 +37,7 @@ drivers-$(CONFIG_OPROFILE) += arch/sparc + # Renaming is done to avoid confusing pattern matching rules in 2.5.45 (multy-) + INIT_Y := $(patsubst %/, %/built-in.o, $(init-y)) + CORE_Y := $(core-y) +-CORE_Y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ ++CORE_Y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ grsecurity/ + CORE_Y := $(patsubst %/, %/built-in.o, $(CORE_Y)) + DRIVERS_Y := $(patsubst %/, %/built-in.o, $(drivers-y)) + NET_Y := $(patsubst %/, %/built-in.o, $(net-y)) +diff -urNp a/arch/sparc/include/asm/elf_32.h b/arch/sparc/include/asm/elf_32.h +--- a/arch/sparc/include/asm/elf_32.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/sparc/include/asm/elf_32.h 2009-05-24 18:10:24.954209670 -0700 +@@ -116,6 +116,13 @@ typedef struct { + + #define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE) + ++#ifdef CONFIG_PAX_ASLR ++#define PAX_ELF_ET_DYN_BASE 0x10000UL ++ ++#define PAX_DELTA_MMAP_LEN 16 ++#define PAX_DELTA_STACK_LEN 16 ++#endif ++ + /* This yields a mask that user programs can use to figure out what + instruction set this cpu supports. This can NOT be done in userspace + on Sparc. */ +diff -urNp a/arch/sparc/include/asm/elf_64.h b/arch/sparc/include/asm/elf_64.h +--- a/arch/sparc/include/asm/elf_64.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/sparc/include/asm/elf_64.h 2009-05-24 18:10:24.955033740 -0700 +@@ -163,6 +163,12 @@ typedef struct { + #define ELF_ET_DYN_BASE 0x0000010000000000UL + #define COMPAT_ELF_ET_DYN_BASE 0x0000000070000000UL + ++#ifdef CONFIG_PAX_ASLR ++#define PAX_ELF_ET_DYN_BASE (test_thread_flag(TIF_32BIT) ? 0x10000UL : 0x100000UL) ++ ++#define PAX_DELTA_MMAP_LEN (test_thread_flag(TIF_32BIT) ? 14 : 28 ) ++#define PAX_DELTA_STACK_LEN (test_thread_flag(TIF_32BIT) ? 15 : 29 ) ++#endif + + /* This yields a mask that user programs can use to figure out what + instruction set this cpu supports. */ +diff -urNp a/arch/sparc/include/asm/kmap_types.h b/arch/sparc/include/asm/kmap_types.h +--- a/arch/sparc/include/asm/kmap_types.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/sparc/include/asm/kmap_types.h 2009-05-24 18:10:24.955033740 -0700 +@@ -19,6 +19,7 @@ enum km_type { + KM_IRQ1, + KM_SOFTIRQ0, + KM_SOFTIRQ1, ++ KM_CLEARPAGE, + KM_TYPE_NR + }; + +diff -urNp a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h +--- a/arch/sparc/include/asm/pgtable_32.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/sparc/include/asm/pgtable_32.h 2009-05-24 18:10:24.955033740 -0700 +@@ -43,6 +43,13 @@ BTFIXUPDEF_SIMM13(user_ptrs_per_pgd) + BTFIXUPDEF_INT(page_none) + BTFIXUPDEF_INT(page_copy) + BTFIXUPDEF_INT(page_readonly) ++ ++#ifdef CONFIG_PAX_PAGEEXEC ++BTFIXUPDEF_INT(page_shared_noexec) ++BTFIXUPDEF_INT(page_copy_noexec) ++BTFIXUPDEF_INT(page_readonly_noexec) ++#endif ++ + BTFIXUPDEF_INT(page_kernel) + + #define PMD_SHIFT SUN4C_PMD_SHIFT +@@ -64,6 +71,16 @@ extern pgprot_t PAGE_SHARED; + #define PAGE_COPY __pgprot(BTFIXUP_INT(page_copy)) + #define PAGE_READONLY __pgprot(BTFIXUP_INT(page_readonly)) + ++#ifdef CONFIG_PAX_PAGEEXEC ++extern pgprot_t PAGE_SHARED_NOEXEC; ++# define PAGE_COPY_NOEXEC __pgprot(BTFIXUP_INT(page_copy_noexec)) ++# define PAGE_READONLY_NOEXEC __pgprot(BTFIXUP_INT(page_readonly_noexec)) ++#else ++# define PAGE_SHARED_NOEXEC PAGE_SHARED ++# define PAGE_COPY_NOEXEC PAGE_COPY ++# define PAGE_READONLY_NOEXEC PAGE_READONLY ++#endif ++ + extern unsigned long page_kernel; + + #ifdef MODULE +diff -urNp a/arch/sparc/include/asm/pgtsrmmu.h b/arch/sparc/include/asm/pgtsrmmu.h +--- a/arch/sparc/include/asm/pgtsrmmu.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/sparc/include/asm/pgtsrmmu.h 2009-05-24 18:10:24.956178430 -0700 +@@ -115,6 +115,13 @@ + SRMMU_EXEC | SRMMU_REF) + #define SRMMU_PAGE_RDONLY __pgprot(SRMMU_VALID | SRMMU_CACHE | \ + SRMMU_EXEC | SRMMU_REF) ++ ++#ifdef CONFIG_PAX_PAGEEXEC ++#define SRMMU_PAGE_SHARED_NOEXEC __pgprot(SRMMU_VALID | SRMMU_CACHE | SRMMU_WRITE | SRMMU_REF) ++#define SRMMU_PAGE_COPY_NOEXEC __pgprot(SRMMU_VALID | SRMMU_CACHE | SRMMU_REF) ++#define SRMMU_PAGE_RDONLY_NOEXEC __pgprot(SRMMU_VALID | SRMMU_CACHE | SRMMU_REF) ++#endif ++ + #define SRMMU_PAGE_KERNEL __pgprot(SRMMU_VALID | SRMMU_CACHE | SRMMU_PRIV | \ + SRMMU_DIRTY | SRMMU_REF) + +diff -urNp a/arch/sparc/kernel/sys_sparc.c b/arch/sparc/kernel/sys_sparc.c +--- a/arch/sparc/kernel/sys_sparc.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/sparc/kernel/sys_sparc.c 2009-05-24 18:10:24.956178430 -0700 +@@ -56,7 +56,7 @@ unsigned long arch_get_unmapped_area(str + if (ARCH_SUN4C && len > 0x20000000) + return -ENOMEM; + if (!addr) +- addr = TASK_UNMAPPED_BASE; ++ addr = current->mm->mmap_base; + + if (flags & MAP_SHARED) + addr = COLOUR_ALIGN(addr); +diff -urNp a/arch/sparc/mm/fault.c b/arch/sparc/mm/fault.c +--- a/arch/sparc/mm/fault.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/sparc/mm/fault.c 2009-05-24 18:10:24.957143151 -0700 +@@ -21,6 +21,9 @@ + #include <linux/interrupt.h> + #include <linux/module.h> + #include <linux/kdebug.h> ++#include <linux/slab.h> ++#include <linux/pagemap.h> ++#include <linux/compiler.h> + + #include <asm/system.h> + #include <asm/page.h> +@@ -167,6 +170,249 @@ static unsigned long compute_si_addr(str + return safe_compute_effective_address(regs, insn); + } + ++#ifdef CONFIG_PAX_PAGEEXEC ++void pax_emuplt_close(struct vm_area_struct *vma) ++{ ++ vma->vm_mm->call_dl_resolve = 0UL; ++} ++ ++static int pax_emuplt_fault(struct vm_area_struct *vma, struct vm_fault *vmf) ++{ ++ unsigned int *kaddr; ++ ++ vmf->page = alloc_page(GFP_HIGHUSER); ++ if (!vmf->page) ++ return VM_FAULT_OOM; ++ ++ kaddr = kmap(vmf->page); ++ memset(kaddr, 0, PAGE_SIZE); ++ kaddr[0] = 0x9DE3BFA8U; /* save */ ++ flush_dcache_page(vmf->page); ++ kunmap(vmf->page); ++ return VM_FAULT_MAJOR; ++} ++ ++static struct vm_operations_struct pax_vm_ops = { ++ .close = pax_emuplt_close, ++ .fault = pax_emuplt_fault ++}; ++ ++static int pax_insert_vma(struct vm_area_struct *vma, unsigned long addr) ++{ ++ int ret; ++ ++ vma->vm_mm = current->mm; ++ vma->vm_start = addr; ++ vma->vm_end = addr + PAGE_SIZE; ++ vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC; ++ vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); ++ vma->vm_ops = &pax_vm_ops; ++ ++ ret = insert_vm_struct(current->mm, vma); ++ if (ret) ++ return ret; ++ ++ ++current->mm->total_vm; ++ return 0; ++} ++ ++/* ++ * PaX: decide what to do with offenders (regs->pc = fault address) ++ * ++ * returns 1 when task should be killed ++ * 2 when patched PLT trampoline was detected ++ * 3 when unpatched PLT trampoline was detected ++ */ ++static int pax_handle_fetch_fault(struct pt_regs *regs) ++{ ++ ++#ifdef CONFIG_PAX_EMUPLT ++ int err; ++ ++ do { /* PaX: patched PLT emulation #1 */ ++ unsigned int sethi1, sethi2, jmpl; ++ ++ err = get_user(sethi1, (unsigned int *)regs->pc); ++ err |= get_user(sethi2, (unsigned int *)(regs->pc+4)); ++ err |= get_user(jmpl, (unsigned int *)(regs->pc+8)); ++ ++ if (err) ++ break; ++ ++ if ((sethi1 & 0xFFC00000U) == 0x03000000U && ++ (sethi2 & 0xFFC00000U) == 0x03000000U && ++ (jmpl & 0xFFFFE000U) == 0x81C06000U) ++ { ++ unsigned int addr; ++ ++ regs->u_regs[UREG_G1] = (sethi2 & 0x003FFFFFU) << 10; ++ addr = regs->u_regs[UREG_G1]; ++ addr += (((jmpl | 0xFFFFE000U) ^ 0x00001000U) + 0x00001000U); ++ regs->pc = addr; ++ regs->npc = addr+4; ++ return 2; ++ } ++ } while (0); ++ ++ { /* PaX: patched PLT emulation #2 */ ++ unsigned int ba; ++ ++ err = get_user(ba, (unsigned int *)regs->pc); ++ ++ if (!err && (ba & 0xFFC00000U) == 0x30800000U) { ++ unsigned int addr; ++ ++ addr = regs->pc + ((((ba | 0xFFC00000U) ^ 0x00200000U) + 0x00200000U) << 2); ++ regs->pc = addr; ++ regs->npc = addr+4; ++ return 2; ++ } ++ } ++ ++ do { /* PaX: patched PLT emulation #3 */ ++ unsigned int sethi, jmpl, nop; ++ ++ err = get_user(sethi, (unsigned int *)regs->pc); ++ err |= get_user(jmpl, (unsigned int *)(regs->pc+4)); ++ err |= get_user(nop, (unsigned int *)(regs->pc+8)); ++ ++ if (err) ++ break; ++ ++ if ((sethi & 0xFFC00000U) == 0x03000000U && ++ (jmpl & 0xFFFFE000U) == 0x81C06000U && ++ nop == 0x01000000U) ++ { ++ unsigned int addr; ++ ++ addr = (sethi & 0x003FFFFFU) << 10; ++ regs->u_regs[UREG_G1] = addr; ++ addr += (((jmpl | 0xFFFFE000U) ^ 0x00001000U) + 0x00001000U); ++ regs->pc = addr; ++ regs->npc = addr+4; ++ return 2; ++ } ++ } while (0); ++ ++ do { /* PaX: unpatched PLT emulation step 1 */ ++ unsigned int sethi, ba, nop; ++ ++ err = get_user(sethi, (unsigned int *)regs->pc); ++ err |= get_user(ba, (unsigned int *)(regs->pc+4)); ++ err |= get_user(nop, (unsigned int *)(regs->pc+8)); ++ ++ if (err) ++ break; ++ ++ if ((sethi & 0xFFC00000U) == 0x03000000U && ++ ((ba & 0xFFC00000U) == 0x30800000U || (ba & 0xFFF80000U) == 0x30680000U) && ++ nop == 0x01000000U) ++ { ++ unsigned int addr, save, call; ++ ++ if ((ba & 0xFFC00000U) == 0x30800000U) ++ addr = regs->pc + 4 + ((((ba | 0xFFC00000U) ^ 0x00200000U) + 0x00200000U) << 2); ++ else ++ addr = regs->pc + 4 + ((((ba | 0xFFF80000U) ^ 0x00040000U) + 0x00040000U) << 2); ++ ++ err = get_user(save, (unsigned int *)addr); ++ err |= get_user(call, (unsigned int *)(addr+4)); ++ err |= get_user(nop, (unsigned int *)(addr+8)); ++ if (err) ++ break; ++ ++ if (save == 0x9DE3BFA8U && ++ (call & 0xC0000000U) == 0x40000000U && ++ nop == 0x01000000U) ++ { ++ struct vm_area_struct *vma; ++ unsigned long call_dl_resolve; ++ ++ down_read(¤t->mm->mmap_sem); ++ call_dl_resolve = current->mm->call_dl_resolve; ++ up_read(¤t->mm->mmap_sem); ++ if (likely(call_dl_resolve)) ++ goto emulate; ++ ++ vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); ++ ++ down_write(¤t->mm->mmap_sem); ++ if (current->mm->call_dl_resolve) { ++ call_dl_resolve = current->mm->call_dl_resolve; ++ up_write(¤t->mm->mmap_sem); ++ if (vma) ++ kmem_cache_free(vm_area_cachep, vma); ++ goto emulate; ++ } ++ ++ call_dl_resolve = get_unmapped_area(NULL, 0UL, PAGE_SIZE, 0UL, MAP_PRIVATE); ++ if (!vma || (call_dl_resolve & ~PAGE_MASK)) { ++ up_write(¤t->mm->mmap_sem); ++ if (vma) ++ kmem_cache_free(vm_area_cachep, vma); ++ return 1; ++ } ++ ++ if (pax_insert_vma(vma, call_dl_resolve)) { ++ up_write(¤t->mm->mmap_sem); ++ kmem_cache_free(vm_area_cachep, vma); ++ return 1; ++ } ++ ++ current->mm->call_dl_resolve = call_dl_resolve; ++ up_write(¤t->mm->mmap_sem); ++ ++emulate: ++ regs->u_regs[UREG_G1] = (sethi & 0x003FFFFFU) << 10; ++ regs->pc = call_dl_resolve; ++ regs->npc = addr+4; ++ return 3; ++ } ++ } ++ } while (0); ++ ++ do { /* PaX: unpatched PLT emulation step 2 */ ++ unsigned int save, call, nop; ++ ++ err = get_user(save, (unsigned int *)(regs->pc-4)); ++ err |= get_user(call, (unsigned int *)regs->pc); ++ err |= get_user(nop, (unsigned int *)(regs->pc+4)); ++ if (err) ++ break; ++ ++ if (save == 0x9DE3BFA8U && ++ (call & 0xC0000000U) == 0x40000000U && ++ nop == 0x01000000U) ++ { ++ unsigned int dl_resolve = regs->pc + ((((call | 0xC0000000U) ^ 0x20000000U) + 0x20000000U) << 2); ++ ++ regs->u_regs[UREG_RETPC] = regs->pc; ++ regs->pc = dl_resolve; ++ regs->npc = dl_resolve+4; ++ return 3; ++ } ++ } while (0); ++#endif ++ ++ return 1; ++} ++ ++void pax_report_insns(void *pc, void *sp) ++{ ++ unsigned long i; ++ ++ printk(KERN_ERR "PAX: bytes at PC: "); ++ for (i = 0; i < 5; i++) { ++ unsigned int c; ++ if (get_user(c, (unsigned int *)pc+i)) ++ printk(KERN_CONT "???????? "); ++ else ++ printk(KERN_CONT "%08x ", c); ++ } ++ printk("\n"); ++} ++#endif ++ + asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write, + unsigned long address) + { +@@ -231,6 +477,24 @@ good_area: + if(!(vma->vm_flags & VM_WRITE)) + goto bad_area; + } else { ++ ++#ifdef CONFIG_PAX_PAGEEXEC ++ if ((mm->pax_flags & MF_PAX_PAGEEXEC) && text_fault && !(vma->vm_flags & VM_EXEC)) { ++ up_read(&mm->mmap_sem); ++ switch (pax_handle_fetch_fault(regs)) { ++ ++#ifdef CONFIG_PAX_EMUPLT ++ case 2: ++ case 3: ++ return; ++#endif ++ ++ } ++ pax_report_fault(regs, (void *)regs->pc, (void *)regs->u_regs[UREG_FP]); ++ do_group_exit(SIGKILL); ++ } ++#endif ++ + /* Allow reads even for write-only mappings */ + if(!(vma->vm_flags & (VM_READ | VM_EXEC))) + goto bad_area; +diff -urNp a/arch/sparc/mm/init.c b/arch/sparc/mm/init.c +--- a/arch/sparc/mm/init.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/sparc/mm/init.c 2009-05-24 18:10:24.958032169 -0700 +@@ -313,6 +313,9 @@ extern void device_scan(void); + pgprot_t PAGE_SHARED __read_mostly; + EXPORT_SYMBOL(PAGE_SHARED); + ++pgprot_t PAGE_SHARED_NOEXEC __read_mostly; ++EXPORT_SYMBOL(PAGE_SHARED_NOEXEC); ++ + void __init paging_init(void) + { + switch(sparc_cpu_model) { +@@ -338,17 +341,17 @@ void __init paging_init(void) + + /* Initialize the protection map with non-constant, MMU dependent values. */ + protection_map[0] = PAGE_NONE; +- protection_map[1] = PAGE_READONLY; +- protection_map[2] = PAGE_COPY; +- protection_map[3] = PAGE_COPY; ++ protection_map[1] = PAGE_READONLY_NOEXEC; ++ protection_map[2] = PAGE_COPY_NOEXEC; ++ protection_map[3] = PAGE_COPY_NOEXEC; + protection_map[4] = PAGE_READONLY; + protection_map[5] = PAGE_READONLY; + protection_map[6] = PAGE_COPY; + protection_map[7] = PAGE_COPY; + protection_map[8] = PAGE_NONE; +- protection_map[9] = PAGE_READONLY; +- protection_map[10] = PAGE_SHARED; +- protection_map[11] = PAGE_SHARED; ++ protection_map[9] = PAGE_READONLY_NOEXEC; ++ protection_map[10] = PAGE_SHARED_NOEXEC; ++ protection_map[11] = PAGE_SHARED_NOEXEC; + protection_map[12] = PAGE_READONLY; + protection_map[13] = PAGE_READONLY; + protection_map[14] = PAGE_SHARED; +diff -urNp a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c +--- a/arch/sparc/mm/srmmu.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/sparc/mm/srmmu.c 2009-05-24 18:10:24.959209682 -0700 +@@ -2162,6 +2162,13 @@ void __init ld_mmu_srmmu(void) + PAGE_SHARED = pgprot_val(SRMMU_PAGE_SHARED); + BTFIXUPSET_INT(page_copy, pgprot_val(SRMMU_PAGE_COPY)); + BTFIXUPSET_INT(page_readonly, pgprot_val(SRMMU_PAGE_RDONLY)); ++ ++#ifdef CONFIG_PAX_PAGEEXEC ++ PAGE_SHARED_NOEXEC = pgprot_val(SRMMU_PAGE_SHARED_NOEXEC); ++ BTFIXUPSET_INT(page_copy_noexec, pgprot_val(SRMMU_PAGE_COPY_NOEXEC)); ++ BTFIXUPSET_INT(page_readonly_noexec, pgprot_val(SRMMU_PAGE_RDONLY_NOEXEC)); ++#endif ++ + BTFIXUPSET_INT(page_kernel, pgprot_val(SRMMU_PAGE_KERNEL)); + page_kernel = pgprot_val(SRMMU_PAGE_KERNEL); + +diff -urNp a/arch/sparc64/kernel/Makefile b/arch/sparc64/kernel/Makefile +--- a/arch/sparc64/kernel/Makefile 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/sparc64/kernel/Makefile 2009-05-24 18:10:24.959985915 -0700 +@@ -3,7 +3,7 @@ + # + + EXTRA_AFLAGS := -ansi +-EXTRA_CFLAGS := -Werror ++#EXTRA_CFLAGS := -Werror + + CFLAGS_REMOVE_ftrace.o = -pg + +diff -urNp a/arch/sparc64/kernel/sys_sparc.c b/arch/sparc64/kernel/sys_sparc.c +--- a/arch/sparc64/kernel/sys_sparc.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/sparc64/kernel/sys_sparc.c 2009-05-24 18:10:24.959985915 -0700 +@@ -124,7 +124,7 @@ unsigned long arch_get_unmapped_area(str + /* We do not accept a shared mapping if it would violate + * cache aliasing constraints. + */ +- if ((flags & MAP_SHARED) && ++ if ((filp || (flags & MAP_SHARED)) && + ((addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1))) + return -EINVAL; + return addr; +@@ -139,6 +139,10 @@ unsigned long arch_get_unmapped_area(str + if (filp || (flags & MAP_SHARED)) + do_color_align = 1; + ++#ifdef CONFIG_PAX_RANDMMAP ++ if (!(mm->pax_flags & MF_PAX_RANDMMAP)) ++#endif ++ + if (addr) { + if (do_color_align) + addr = COLOUR_ALIGN(addr, pgoff); +@@ -152,9 +156,9 @@ unsigned long arch_get_unmapped_area(str + } + + if (len > mm->cached_hole_size) { +- start_addr = addr = mm->free_area_cache; ++ start_addr = addr = mm->free_area_cache; + } else { +- start_addr = addr = TASK_UNMAPPED_BASE; ++ start_addr = addr = mm->mmap_base; + mm->cached_hole_size = 0; + } + +@@ -174,8 +178,8 @@ full_search: + vma = find_vma(mm, VA_EXCLUDE_END); + } + if (unlikely(task_size < addr)) { +- if (start_addr != TASK_UNMAPPED_BASE) { +- start_addr = addr = TASK_UNMAPPED_BASE; ++ if (start_addr != mm->mmap_base) { ++ start_addr = addr = mm->mmap_base; + mm->cached_hole_size = 0; + goto full_search; + } +@@ -215,7 +219,7 @@ arch_get_unmapped_area_topdown(struct fi + /* We do not accept a shared mapping if it would violate + * cache aliasing constraints. + */ +- if ((flags & MAP_SHARED) && ++ if ((filp || (flags & MAP_SHARED)) && + ((addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1))) + return -EINVAL; + return addr; +@@ -378,6 +382,12 @@ void arch_pick_mmap_layout(struct mm_str + current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY || + sysctl_legacy_va_layout) { + mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; ++ ++#ifdef CONFIG_PAX_RANDMMAP ++ if (mm->pax_flags & MF_PAX_RANDMMAP) ++ mm->mmap_base += mm->delta_mmap; ++#endif ++ + mm->get_unmapped_area = arch_get_unmapped_area; + mm->unmap_area = arch_unmap_area; + } else { +@@ -392,6 +402,12 @@ void arch_pick_mmap_layout(struct mm_str + gap = (task_size / 6 * 5); + + mm->mmap_base = PAGE_ALIGN(task_size - gap - random_factor); ++ ++#ifdef CONFIG_PAX_RANDMMAP ++ if (mm->pax_flags & MF_PAX_RANDMMAP) ++ mm->mmap_base -= mm->delta_mmap + mm->delta_stack; ++#endif ++ + mm->get_unmapped_area = arch_get_unmapped_area_topdown; + mm->unmap_area = arch_unmap_area_topdown; + } +diff -urNp a/arch/sparc64/mm/Makefile b/arch/sparc64/mm/Makefile +--- a/arch/sparc64/mm/Makefile 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/sparc64/mm/Makefile 2009-05-24 18:10:24.961022428 -0700 +@@ -2,7 +2,7 @@ + # + + EXTRA_AFLAGS := -ansi +-EXTRA_CFLAGS := -Werror ++#EXTRA_CFLAGS := -Werror + + obj-y := ultra.o tlb.o tsb.o fault.o init.o generic.o + +diff -urNp a/arch/sparc64/mm/fault.c b/arch/sparc64/mm/fault.c +--- a/arch/sparc64/mm/fault.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/sparc64/mm/fault.c 2009-05-24 18:10:24.962063759 -0700 +@@ -19,6 +19,9 @@ + #include <linux/interrupt.h> + #include <linux/kprobes.h> + #include <linux/kdebug.h> ++#include <linux/slab.h> ++#include <linux/pagemap.h> ++#include <linux/compiler.h> + + #include <asm/page.h> + #include <asm/pgtable.h> +@@ -224,6 +227,367 @@ cannot_handle: + unhandled_fault (address, current, regs); + } + ++#ifdef CONFIG_PAX_PAGEEXEC ++#ifdef CONFIG_PAX_EMUPLT ++static void pax_emuplt_close(struct vm_area_struct *vma) ++{ ++ vma->vm_mm->call_dl_resolve = 0UL; ++} ++ ++static int pax_emuplt_fault(struct vm_area_struct *vma, struct vm_fault *vmf) ++{ ++ unsigned int *kaddr; ++ ++ vmf->page = alloc_page(GFP_HIGHUSER); ++ if (!vmf->page) ++ return VM_FAULT_OOM; ++ ++ kaddr = kmap(vmf->page); ++ memset(kaddr, 0, PAGE_SIZE); ++ kaddr[0] = 0x9DE3BFA8U; /* save */ ++ flush_dcache_page(vmf->page); ++ kunmap(vmf->page); ++ return VM_FAULT_MAJOR; ++} ++ ++static struct vm_operations_struct pax_vm_ops = { ++ .close = pax_emuplt_close, ++ .fault = pax_emuplt_fault ++}; ++ ++static int pax_insert_vma(struct vm_area_struct *vma, unsigned long addr) ++{ ++ int ret; ++ ++ vma->vm_mm = current->mm; ++ vma->vm_start = addr; ++ vma->vm_end = addr + PAGE_SIZE; ++ vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC; ++ vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); ++ vma->vm_ops = &pax_vm_ops; ++ ++ ret = insert_vm_struct(current->mm, vma); ++ if (ret) ++ return ret; ++ ++ ++current->mm->total_vm; ++ return 0; ++} ++#endif ++ ++/* ++ * PaX: decide what to do with offenders (regs->tpc = fault address) ++ * ++ * returns 1 when task should be killed ++ * 2 when patched PLT trampoline was detected ++ * 3 when unpatched PLT trampoline was detected ++ */ ++static int pax_handle_fetch_fault(struct pt_regs *regs) ++{ ++ ++#ifdef CONFIG_PAX_EMUPLT ++ int err; ++ ++ do { /* PaX: patched PLT emulation #1 */ ++ unsigned int sethi1, sethi2, jmpl; ++ ++ err = get_user(sethi1, (unsigned int *)regs->tpc); ++ err |= get_user(sethi2, (unsigned int *)(regs->tpc+4)); ++ err |= get_user(jmpl, (unsigned int *)(regs->tpc+8)); ++ ++ if (err) ++ break; ++ ++ if ((sethi1 & 0xFFC00000U) == 0x03000000U && ++ (sethi2 & 0xFFC00000U) == 0x03000000U && ++ (jmpl & 0xFFFFE000U) == 0x81C06000U) ++ { ++ unsigned long addr; ++ ++ regs->u_regs[UREG_G1] = (sethi2 & 0x003FFFFFU) << 10; ++ addr = regs->u_regs[UREG_G1]; ++ addr += (((jmpl | 0xFFFFFFFFFFFFE000UL) ^ 0x00001000UL) + 0x00001000UL); ++ regs->tpc = addr; ++ regs->tnpc = addr+4; ++ return 2; ++ } ++ } while (0); ++ ++ { /* PaX: patched PLT emulation #2 */ ++ unsigned int ba; ++ ++ err = get_user(ba, (unsigned int *)regs->tpc); ++ ++ if (!err && (ba & 0xFFC00000U) == 0x30800000U) { ++ unsigned long addr; ++ ++ addr = regs->tpc + ((((ba | 0xFFFFFFFFFFC00000UL) ^ 0x00200000UL) + 0x00200000UL) << 2); ++ regs->tpc = addr; ++ regs->tnpc = addr+4; ++ return 2; ++ } ++ } ++ ++ do { /* PaX: patched PLT emulation #3 */ ++ unsigned int sethi, jmpl, nop; ++ ++ err = get_user(sethi, (unsigned int *)regs->tpc); ++ err |= get_user(jmpl, (unsigned int *)(regs->tpc+4)); ++ err |= get_user(nop, (unsigned int *)(regs->tpc+8)); ++ ++ if (err) ++ break; ++ ++ if ((sethi & 0xFFC00000U) == 0x03000000U && ++ (jmpl & 0xFFFFE000U) == 0x81C06000U && ++ nop == 0x01000000U) ++ { ++ unsigned long addr; ++ ++ addr = (sethi & 0x003FFFFFU) << 10; ++ regs->u_regs[UREG_G1] = addr; ++ addr += (((jmpl | 0xFFFFFFFFFFFFE000UL) ^ 0x00001000UL) + 0x00001000UL); ++ regs->tpc = addr; ++ regs->tnpc = addr+4; ++ return 2; ++ } ++ } while (0); ++ ++ do { /* PaX: patched PLT emulation #4 */ ++ unsigned int mov1, call, mov2; ++ ++ err = get_user(mov1, (unsigned int *)regs->tpc); ++ err |= get_user(call, (unsigned int *)(regs->tpc+4)); ++ err |= get_user(mov2, (unsigned int *)(regs->tpc+8)); ++ ++ if (err) ++ break; ++ ++ if (mov1 == 0x8210000FU && ++ (call & 0xC0000000U) == 0x40000000U && ++ mov2 == 0x9E100001U) ++ { ++ unsigned long addr; ++ ++ regs->u_regs[UREG_G1] = regs->u_regs[UREG_RETPC]; ++ addr = regs->tpc + 4 + ((((call | 0xFFFFFFFFC0000000UL) ^ 0x20000000UL) + 0x20000000UL) << 2); ++ regs->tpc = addr; ++ regs->tnpc = addr+4; ++ return 2; ++ } ++ } while (0); ++ ++ do { /* PaX: patched PLT emulation #5 */ ++ unsigned int sethi1, sethi2, or1, or2, sllx, jmpl, nop; ++ ++ err = get_user(sethi1, (unsigned int *)regs->tpc); ++ err |= get_user(sethi2, (unsigned int *)(regs->tpc+4)); ++ err |= get_user(or1, (unsigned int *)(regs->tpc+8)); ++ err |= get_user(or2, (unsigned int *)(regs->tpc+12)); ++ err |= get_user(sllx, (unsigned int *)(regs->tpc+16)); ++ err |= get_user(jmpl, (unsigned int *)(regs->tpc+20)); ++ err |= get_user(nop, (unsigned int *)(regs->tpc+24)); ++ ++ if (err) ++ break; ++ ++ if ((sethi1 & 0xFFC00000U) == 0x03000000U && ++ (sethi2 & 0xFFC00000U) == 0x0B000000U && ++ (or1 & 0xFFFFE000U) == 0x82106000U && ++ (or2 & 0xFFFFE000U) == 0x8A116000U && ++ sllx == 0x83287020 && ++ jmpl == 0x81C04005U && ++ nop == 0x01000000U) ++ { ++ unsigned long addr; ++ ++ regs->u_regs[UREG_G1] = ((sethi1 & 0x003FFFFFU) << 10) | (or1 & 0x000003FFU); ++ regs->u_regs[UREG_G1] <<= 32; ++ regs->u_regs[UREG_G5] = ((sethi2 & 0x003FFFFFU) << 10) | (or2 & 0x000003FFU); ++ addr = regs->u_regs[UREG_G1] + regs->u_regs[UREG_G5]; ++ regs->tpc = addr; ++ regs->tnpc = addr+4; ++ return 2; ++ } ++ } while (0); ++ ++ do { /* PaX: patched PLT emulation #6 */ ++ unsigned int sethi1, sethi2, sllx, or, jmpl, nop; ++ ++ err = get_user(sethi1, (unsigned int *)regs->tpc); ++ err |= get_user(sethi2, (unsigned int *)(regs->tpc+4)); ++ err |= get_user(sllx, (unsigned int *)(regs->tpc+8)); ++ err |= get_user(or, (unsigned int *)(regs->tpc+12)); ++ err |= get_user(jmpl, (unsigned int *)(regs->tpc+16)); ++ err |= get_user(nop, (unsigned int *)(regs->tpc+20)); ++ ++ if (err) ++ break; ++ ++ if ((sethi1 & 0xFFC00000U) == 0x03000000U && ++ (sethi2 & 0xFFC00000U) == 0x0B000000U && ++ sllx == 0x83287020 && ++ (or & 0xFFFFE000U) == 0x8A116000U && ++ jmpl == 0x81C04005U && ++ nop == 0x01000000U) ++ { ++ unsigned long addr; ++ ++ regs->u_regs[UREG_G1] = (sethi1 & 0x003FFFFFU) << 10; ++ regs->u_regs[UREG_G1] <<= 32; ++ regs->u_regs[UREG_G5] = ((sethi2 & 0x003FFFFFU) << 10) | (or & 0x3FFU); ++ addr = regs->u_regs[UREG_G1] + regs->u_regs[UREG_G5]; ++ regs->tpc = addr; ++ regs->tnpc = addr+4; ++ return 2; ++ } ++ } while (0); ++ ++ do { /* PaX: patched PLT emulation #7 */ ++ unsigned int sethi, ba, nop; ++ ++ err = get_user(sethi, (unsigned int *)regs->tpc); ++ err |= get_user(ba, (unsigned int *)(regs->tpc+4)); ++ err |= get_user(nop, (unsigned int *)(regs->tpc+8)); ++ ++ if (err) ++ break; ++ ++ if ((sethi & 0xFFC00000U) == 0x03000000U && ++ (ba & 0xFFF00000U) == 0x30600000U && ++ nop == 0x01000000U) ++ { ++ unsigned long addr; ++ ++ addr = (sethi & 0x003FFFFFU) << 10; ++ regs->u_regs[UREG_G1] = addr; ++ addr = regs->tpc + ((((ba | 0xFFFFFFFFFFF80000UL) ^ 0x00040000UL) + 0x00040000UL) << 2); ++ regs->tpc = addr; ++ regs->tnpc = addr+4; ++ return 2; ++ } ++ } while (0); ++ ++ do { /* PaX: unpatched PLT emulation step 1 */ ++ unsigned int sethi, ba, nop; ++ ++ err = get_user(sethi, (unsigned int *)regs->tpc); ++ err |= get_user(ba, (unsigned int *)(regs->tpc+4)); ++ err |= get_user(nop, (unsigned int *)(regs->tpc+8)); ++ ++ if (err) ++ break; ++ ++ if ((sethi & 0xFFC00000U) == 0x03000000U && ++ ((ba & 0xFFC00000U) == 0x30800000U || (ba & 0xFFF80000U) == 0x30680000U) && ++ nop == 0x01000000U) ++ { ++ unsigned long addr; ++ unsigned int save, call; ++ ++ if ((ba & 0xFFC00000U) == 0x30800000U) ++ addr = regs->tpc + 4 + ((((ba | 0xFFFFFFFFFFC00000UL) ^ 0x00200000UL) + 0x00200000UL) << 2); ++ else ++ addr = regs->tpc + 4 + ((((ba | 0xFFFFFFFFFFF80000UL) ^ 0x00040000UL) + 0x00040000UL) << 2); ++ ++ err = get_user(save, (unsigned int *)addr); ++ err |= get_user(call, (unsigned int *)(addr+4)); ++ err |= get_user(nop, (unsigned int *)(addr+8)); ++ if (err) ++ break; ++ ++ if (save == 0x9DE3BFA8U && ++ (call & 0xC0000000U) == 0x40000000U && ++ nop == 0x01000000U) ++ { ++ struct vm_area_struct *vma; ++ unsigned long call_dl_resolve; ++ ++ down_read(¤t->mm->mmap_sem); ++ call_dl_resolve = current->mm->call_dl_resolve; ++ up_read(¤t->mm->mmap_sem); ++ if (likely(call_dl_resolve)) ++ goto emulate; ++ ++ vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); ++ ++ down_write(¤t->mm->mmap_sem); ++ if (current->mm->call_dl_resolve) { ++ call_dl_resolve = current->mm->call_dl_resolve; ++ up_write(¤t->mm->mmap_sem); ++ if (vma) ++ kmem_cache_free(vm_area_cachep, vma); ++ goto emulate; ++ } ++ ++ call_dl_resolve = get_unmapped_area(NULL, 0UL, PAGE_SIZE, 0UL, MAP_PRIVATE); ++ if (!vma || (call_dl_resolve & ~PAGE_MASK)) { ++ up_write(¤t->mm->mmap_sem); ++ if (vma) ++ kmem_cache_free(vm_area_cachep, vma); ++ return 1; ++ } ++ ++ if (pax_insert_vma(vma, call_dl_resolve)) { ++ up_write(¤t->mm->mmap_sem); ++ kmem_cache_free(vm_area_cachep, vma); ++ return 1; ++ } ++ ++ current->mm->call_dl_resolve = call_dl_resolve; ++ up_write(¤t->mm->mmap_sem); ++ ++emulate: ++ regs->u_regs[UREG_G1] = (sethi & 0x003FFFFFU) << 10; ++ regs->tpc = call_dl_resolve; ++ regs->tnpc = addr+4; ++ return 3; ++ } ++ } ++ } while (0); ++ ++ do { /* PaX: unpatched PLT emulation step 2 */ ++ unsigned int save, call, nop; ++ ++ err = get_user(save, (unsigned int *)(regs->tpc-4)); ++ err |= get_user(call, (unsigned int *)regs->tpc); ++ err |= get_user(nop, (unsigned int *)(regs->tpc+4)); ++ if (err) ++ break; ++ ++ if (save == 0x9DE3BFA8U && ++ (call & 0xC0000000U) == 0x40000000U && ++ nop == 0x01000000U) ++ { ++ unsigned long dl_resolve = regs->tpc + ((((call | 0xFFFFFFFFC0000000UL) ^ 0x20000000UL) + 0x20000000UL) << 2); ++ ++ regs->u_regs[UREG_RETPC] = regs->tpc; ++ regs->tpc = dl_resolve; ++ regs->tnpc = dl_resolve+4; ++ return 3; ++ } ++ } while (0); ++#endif ++ ++ return 1; ++} ++ ++void pax_report_insns(void *pc, void *sp) ++{ ++ unsigned long i; ++ ++ printk(KERN_ERR "PAX: bytes at PC: "); ++ for (i = 0; i < 5; i++) { ++ unsigned int c; ++ if (get_user(c, (unsigned int *)pc+i)) ++ printk(KERN_CONT "???????? "); ++ else ++ printk(KERN_CONT "%08x ", c); ++ } ++ printk("\n"); ++} ++#endif ++ + asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) + { + struct mm_struct *mm = current->mm; +@@ -265,8 +629,10 @@ asmlinkage void __kprobes do_sparc64_fau + goto intr_or_no_mm; + + if (test_thread_flag(TIF_32BIT)) { +- if (!(regs->tstate & TSTATE_PRIV)) ++ if (!(regs->tstate & TSTATE_PRIV)) { + regs->tpc &= 0xffffffff; ++ regs->tnpc &= 0xffffffff; ++ } + address &= 0xffffffff; + } + +@@ -283,6 +649,29 @@ asmlinkage void __kprobes do_sparc64_fau + if (!vma) + goto bad_area; + ++#ifdef CONFIG_PAX_PAGEEXEC ++ /* PaX: detect ITLB misses on non-exec pages */ ++ if ((mm->pax_flags & MF_PAX_PAGEEXEC) && vma->vm_start <= address && ++ !(vma->vm_flags & VM_EXEC) && (fault_code & FAULT_CODE_ITLB)) ++ { ++ if (address != regs->tpc) ++ goto good_area; ++ ++ up_read(&mm->mmap_sem); ++ switch (pax_handle_fetch_fault(regs)) { ++ ++#ifdef CONFIG_PAX_EMUPLT ++ case 2: ++ case 3: ++ return; ++#endif ++ ++ } ++ pax_report_fault(regs, (void *)regs->tpc, (void *)(regs->u_regs[UREG_FP] + STACK_BIAS)); ++ do_group_exit(SIGKILL); ++ } ++#endif ++ + /* Pure DTLB misses do not tell us whether the fault causing + * load/store/atomic was a write or not, it only says that there + * was no match. So in such a case we (carefully) read the +diff -urNp a/arch/um/include/asm/kmap_types.h b/arch/um/include/asm/kmap_types.h +--- a/arch/um/include/asm/kmap_types.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/um/include/asm/kmap_types.h 2009-05-24 18:10:24.962063759 -0700 +@@ -23,6 +23,7 @@ enum km_type { + KM_IRQ1, + KM_SOFTIRQ0, + KM_SOFTIRQ1, ++ KM_CLEARPAGE, + KM_TYPE_NR + }; + +diff -urNp a/arch/um/include/asm/page.h b/arch/um/include/asm/page.h +--- a/arch/um/include/asm/page.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/um/include/asm/page.h 2009-05-24 18:10:24.963090076 -0700 +@@ -14,6 +14,9 @@ + #define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) + #define PAGE_MASK (~(PAGE_SIZE-1)) + ++#define ktla_ktva(addr) (addr) ++#define ktva_ktla(addr) (addr) ++ + #ifndef __ASSEMBLY__ + + struct page; +diff -urNp a/arch/um/sys-i386/syscalls.c b/arch/um/sys-i386/syscalls.c +--- a/arch/um/sys-i386/syscalls.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/um/sys-i386/syscalls.c 2009-05-24 18:10:24.964209555 -0700 +@@ -11,6 +11,21 @@ + #include "asm/uaccess.h" + #include "asm/unistd.h" + ++int i386_mmap_check(unsigned long addr, unsigned long len, unsigned long flags) ++{ ++ unsigned long pax_task_size = TASK_SIZE; ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (current->mm->pax_flags & MF_PAX_SEGMEXEC) ++ pax_task_size = SEGMEXEC_TASK_SIZE; ++#endif ++ ++ if (len > pax_task_size || addr > pax_task_size - len) ++ return -EINVAL; ++ ++ return 0; ++} ++ + /* + * Perform the select(nd, in, out, ex, tv) and mmap() system + * calls. Linux/i386 didn't use to be able to handle more than +diff -urNp a/arch/x86/Kconfig b/arch/x86/Kconfig +--- a/arch/x86/Kconfig 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/Kconfig 2009-05-24 18:10:24.965209543 -0700 +@@ -935,7 +935,7 @@ config PAGE_OFFSET + hex + default 0xB0000000 if VMSPLIT_3G_OPT + default 0x80000000 if VMSPLIT_2G +- default 0x78000000 if VMSPLIT_2G_OPT ++ default 0x70000000 if VMSPLIT_2G_OPT + default 0x40000000 if VMSPLIT_1G + default 0xC0000000 + depends on X86_32 +@@ -1337,8 +1337,7 @@ config KEXEC_JUMP + config PHYSICAL_START + hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP) + default "0x1000000" if X86_NUMAQ +- default "0x200000" if X86_64 +- default "0x100000" ++ default "0x200000" + help + This gives the physical address where the kernel is loaded. + +@@ -1430,9 +1429,9 @@ config HOTPLUG_CPU + Say N if you want to disable CPU hotplug. + + config COMPAT_VDSO +- def_bool y ++ def_bool n + prompt "Compat VDSO support" +- depends on X86_32 || IA32_EMULATION ++ depends on (X86_32 || IA32_EMULATION) && !PAX_NOEXEC + help + Map the 32-bit VDSO to the predictable old-style address too. + ---help--- +diff -urNp a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu +--- a/arch/x86/Kconfig.cpu 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/Kconfig.cpu 2009-05-24 18:10:24.965209543 -0700 +@@ -331,7 +331,7 @@ config X86_PPRO_FENCE + + config X86_F00F_BUG + def_bool y +- depends on M586MMX || M586TSC || M586 || M486 || M386 ++ depends on (M586MMX || M586TSC || M586 || M486 || M386) && !PAX_KERNEXEC + + config X86_WP_WORKS_OK + def_bool y +@@ -351,7 +351,7 @@ config X86_POPAD_OK + + config X86_ALIGNMENT_16 + def_bool y +- depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1 ++ depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK8 || MK7 || MK6 || MCORE2 || MPENTIUM4 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1 + + config X86_INTEL_USERCOPY + def_bool y +@@ -397,7 +397,7 @@ config X86_CMPXCHG64 + # generates cmov. + config X86_CMOV + def_bool y +- depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64) ++ depends on (MK8 || MK7 || MCORE2 || MPSC || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64) + + config X86_MINIMUM_CPU_FAMILY + int +diff -urNp a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug +--- a/arch/x86/Kconfig.debug 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/Kconfig.debug 2009-05-24 18:10:24.966209741 -0700 +@@ -107,7 +107,7 @@ config X86_PTDUMP + config DEBUG_RODATA + bool "Write protect kernel read-only data structures" + default y +- depends on DEBUG_KERNEL ++ depends on DEBUG_KERNEL && BROKEN + help + Mark the kernel read-only data as write-protected in the pagetables, + in order to catch accidental (and incorrect) writes to such const +diff -urNp a/arch/x86/Makefile b/arch/x86/Makefile +--- a/arch/x86/Makefile 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/Makefile 2009-05-24 18:10:24.966209741 -0700 +@@ -232,3 +232,12 @@ endef + CLEAN_FILES += arch/x86/boot/fdimage \ + arch/x86/boot/image.iso \ + arch/x86/boot/mtools.conf ++ ++define OLD_LD ++ ++*** ${VERSION}.${PATCHLEVEL} PaX kernels no longer build correctly with old versions of binutils. ++*** Please upgrade your binutils to 2.18 or newer ++endef ++ ++archprepare: ++ $(if $(LDFLAGS_BUILD_ID),,$(error $(OLD_LD))) +diff -urNp a/arch/x86/boot/bitops.h b/arch/x86/boot/bitops.h +--- a/arch/x86/boot/bitops.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/boot/bitops.h 2009-05-24 18:10:24.967128091 -0700 +@@ -26,7 +26,7 @@ static inline int variable_test_bit(int + u8 v; + const u32 *p = (const u32 *)addr; + +- asm("btl %2,%1; setc %0" : "=qm" (v) : "m" (*p), "Ir" (nr)); ++ asm volatile("btl %2,%1; setc %0" : "=qm" (v) : "m" (*p), "Ir" (nr)); + return v; + } + +@@ -37,7 +37,7 @@ static inline int variable_test_bit(int + + static inline void set_bit(int nr, void *addr) + { +- asm("btsl %1,%0" : "+m" (*(u32 *)addr) : "Ir" (nr)); ++ asm volatile("btsl %1,%0" : "+m" (*(u32 *)addr) : "Ir" (nr)); + } + + #endif /* BOOT_BITOPS_H */ +diff -urNp a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h +--- a/arch/x86/boot/boot.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/boot/boot.h 2009-05-24 18:10:24.967128091 -0700 +@@ -80,7 +80,7 @@ static inline void io_delay(void) + static inline u16 ds(void) + { + u16 seg; +- asm("movw %%ds,%0" : "=rm" (seg)); ++ asm volatile("movw %%ds,%0" : "=rm" (seg)); + return seg; + } + +@@ -176,7 +176,7 @@ static inline void wrgs32(u32 v, addr_t + static inline int memcmp(const void *s1, const void *s2, size_t len) + { + u8 diff; +- asm("repe; cmpsb; setnz %0" ++ asm volatile("repe; cmpsb; setnz %0" + : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len)); + return diff; + } +diff -urNp a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S +--- a/arch/x86/boot/compressed/head_32.S 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/boot/compressed/head_32.S 2009-05-24 18:10:24.967978629 -0700 +@@ -70,7 +70,7 @@ startup_32: + addl $(CONFIG_PHYSICAL_ALIGN - 1), %ebx + andl $(~(CONFIG_PHYSICAL_ALIGN - 1)), %ebx + #else +- movl $LOAD_PHYSICAL_ADDR, %ebx ++ movl $____LOAD_PHYSICAL_ADDR, %ebx + #endif + + /* Replace the compressed data size with the uncompressed size */ +@@ -80,8 +80,8 @@ startup_32: + /* Add 8 bytes for every 32K input block */ + shrl $12, %eax + addl %eax, %ebx +- /* Add 32K + 18 bytes of extra slack */ +- addl $(32768 + 18), %ebx ++ /* Add 64K of extra slack */ ++ addl $65536, %ebx + /* Align on a 4K boundary */ + addl $4095, %ebx + andl $~4095, %ebx +@@ -105,7 +105,7 @@ startup_32: + addl $(CONFIG_PHYSICAL_ALIGN - 1), %ebp + andl $(~(CONFIG_PHYSICAL_ALIGN - 1)), %ebp + #else +- movl $LOAD_PHYSICAL_ADDR, %ebp ++ movl $____LOAD_PHYSICAL_ADDR, %ebp + #endif + + /* +@@ -160,16 +160,15 @@ relocated: + * and where it was actually loaded. + */ + movl %ebp, %ebx +- subl $LOAD_PHYSICAL_ADDR, %ebx ++ subl $____LOAD_PHYSICAL_ADDR, %ebx + jz 2f /* Nothing to be done if loaded at compiled addr. */ + /* + * Process relocations. + */ + + 1: subl $4, %edi +- movl 0(%edi), %ecx +- testl %ecx, %ecx +- jz 2f ++ movl (%edi), %ecx ++ jecxz 2f + addl %ebx, -__PAGE_OFFSET(%ebx, %ecx) + jmp 1b + 2: +diff -urNp a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c +--- a/arch/x86/boot/compressed/misc.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/boot/compressed/misc.c 2009-05-24 18:10:24.967978629 -0700 +@@ -373,7 +373,7 @@ static void parse_elf(void *output) + case PT_LOAD: + #ifdef CONFIG_RELOCATABLE + dest = output; +- dest += (phdr->p_paddr - LOAD_PHYSICAL_ADDR); ++ dest += (phdr->p_paddr - ____LOAD_PHYSICAL_ADDR); + #else + dest = (void *)(phdr->p_paddr); + #endif +@@ -425,7 +425,7 @@ asmlinkage void decompress_kernel(void * + if (heap > ((-__PAGE_OFFSET-(512<<20)-1) & 0x7fffffff)) + error("Destination address too large"); + #ifndef CONFIG_RELOCATABLE +- if ((u32)output != LOAD_PHYSICAL_ADDR) ++ if ((u32)output != ____LOAD_PHYSICAL_ADDR) + error("Wrong destination address"); + #endif + #endif +diff -urNp a/arch/x86/boot/compressed/relocs.c b/arch/x86/boot/compressed/relocs.c +--- a/arch/x86/boot/compressed/relocs.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/boot/compressed/relocs.c 2009-05-24 18:10:24.969055857 -0700 +@@ -10,8 +10,11 @@ + #define USE_BSD + #include <endian.h> + ++#include "../../../../include/linux/autoconf.h" ++ + #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + static Elf32_Ehdr ehdr; ++static Elf32_Phdr *phdr; + static unsigned long reloc_count, reloc_idx; + static unsigned long *relocs; + +@@ -245,6 +248,36 @@ static void read_ehdr(FILE *fp) + } + } + ++static void read_phdrs(FILE *fp) ++{ ++ int i; ++ ++ phdr = calloc(ehdr.e_phnum, sizeof(Elf32_Phdr)); ++ if (!phdr) { ++ die("Unable to allocate %d program headers\n", ++ ehdr.e_phnum); ++ } ++ if (fseek(fp, ehdr.e_phoff, SEEK_SET) < 0) { ++ die("Seek to %d failed: %s\n", ++ ehdr.e_phoff, strerror(errno)); ++ } ++ if (fread(phdr, sizeof(*phdr), ehdr.e_phnum, fp) != ehdr.e_phnum) { ++ die("Cannot read ELF program headers: %s\n", ++ strerror(errno)); ++ } ++ for(i = 0; i < ehdr.e_phnum; i++) { ++ phdr[i].p_type = elf32_to_cpu(phdr[i].p_type); ++ phdr[i].p_offset = elf32_to_cpu(phdr[i].p_offset); ++ phdr[i].p_vaddr = elf32_to_cpu(phdr[i].p_vaddr); ++ phdr[i].p_paddr = elf32_to_cpu(phdr[i].p_paddr); ++ phdr[i].p_filesz = elf32_to_cpu(phdr[i].p_filesz); ++ phdr[i].p_memsz = elf32_to_cpu(phdr[i].p_memsz); ++ phdr[i].p_flags = elf32_to_cpu(phdr[i].p_flags); ++ phdr[i].p_align = elf32_to_cpu(phdr[i].p_align); ++ } ++ ++} ++ + static void read_shdrs(FILE *fp) + { + int i; +@@ -341,6 +374,8 @@ static void read_symtabs(FILE *fp) + static void read_relocs(FILE *fp) + { + int i,j; ++ uint32_t base; ++ + for (i = 0; i < ehdr.e_shnum; i++) { + struct section *sec = &secs[i]; + if (sec->shdr.sh_type != SHT_REL) { +@@ -360,9 +395,18 @@ static void read_relocs(FILE *fp) + die("Cannot read symbol table: %s\n", + strerror(errno)); + } ++ base = 0; ++ for (j = 0; j < ehdr.e_phnum; j++) { ++ if (phdr[j].p_type != PT_LOAD ) ++ continue; ++ if (secs[sec->shdr.sh_info].shdr.sh_offset < phdr[j].p_offset || secs[sec->shdr.sh_info].shdr.sh_offset >= phdr[j].p_offset + phdr[j].p_filesz) ++ continue; ++ base = CONFIG_PAGE_OFFSET + phdr[j].p_paddr - phdr[j].p_vaddr; ++ break; ++ } + for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Rel); j++) { + Elf32_Rel *rel = &sec->reltab[j]; +- rel->r_offset = elf32_to_cpu(rel->r_offset); ++ rel->r_offset = elf32_to_cpu(rel->r_offset) + base; + rel->r_info = elf32_to_cpu(rel->r_info); + } + } +@@ -504,6 +548,23 @@ static void walk_relocs(void (*visit)(El + if (sym->st_shndx == SHN_ABS) { + continue; + } ++ /* Don't relocate actual per-cpu variables, they are absolute indices, not addresses */ ++ if (!strcmp(sec_name(sym->st_shndx), ".data.percpu") && strncmp(sym_name(sym_strtab, sym), "__per_cpu_", 10)) ++ continue; ++#if defined(CONFIG_PAX_KERNEXEC) && defined(CONFIG_X86_32) ++ /* Don't relocate actual code, they are relocated implicitly by the base address of KERNEL_CS */ ++ if (!strcmp(sec_name(sym->st_shndx), ".init.text")) ++ continue; ++ if (!strcmp(sec_name(sym->st_shndx), ".exit.text")) ++ continue; ++ if (!strcmp(sec_name(sym->st_shndx), ".text.head")) { ++ if (strcmp(sym_name(sym_strtab, sym), "__init_end") && ++ strcmp(sym_name(sym_strtab, sym), "KERNEL_TEXT_OFFSET")) ++ continue; ++ } ++ if (!strcmp(sec_name(sym->st_shndx), ".text")) ++ continue; ++#endif + if (r_type == R_386_PC32) { + /* PC relative relocations don't need to be adjusted */ + } +@@ -631,6 +692,7 @@ int main(int argc, char **argv) + fname, strerror(errno)); + } + read_ehdr(fp); ++ read_phdrs(fp); + read_shdrs(fp); + read_strtabs(fp); + read_symtabs(fp); +diff -urNp a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c +--- a/arch/x86/boot/cpucheck.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/boot/cpucheck.c 2009-05-24 18:10:24.969055857 -0700 +@@ -74,7 +74,7 @@ static int has_fpu(void) + u16 fcw = -1, fsw = -1; + u32 cr0; + +- asm("movl %%cr0,%0" : "=r" (cr0)); ++ asm volatile("movl %%cr0,%0" : "=r" (cr0)); + if (cr0 & (X86_CR0_EM|X86_CR0_TS)) { + cr0 &= ~(X86_CR0_EM|X86_CR0_TS); + asm volatile("movl %0,%%cr0" : : "r" (cr0)); +@@ -90,7 +90,7 @@ static int has_eflag(u32 mask) + { + u32 f0, f1; + +- asm("pushfl ; " ++ asm volatile("pushfl ; " + "pushfl ; " + "popl %0 ; " + "movl %0,%1 ; " +@@ -115,7 +115,7 @@ static void get_flags(void) + set_bit(X86_FEATURE_FPU, cpu.flags); + + if (has_eflag(X86_EFLAGS_ID)) { +- asm("cpuid" ++ asm volatile("cpuid" + : "=a" (max_intel_level), + "=b" (cpu_vendor[0]), + "=d" (cpu_vendor[1]), +@@ -124,7 +124,7 @@ static void get_flags(void) + + if (max_intel_level >= 0x00000001 && + max_intel_level <= 0x0000ffff) { +- asm("cpuid" ++ asm volatile("cpuid" + : "=a" (tfms), + "=c" (cpu.flags[4]), + "=d" (cpu.flags[0]) +@@ -136,7 +136,7 @@ static void get_flags(void) + cpu.model += ((tfms >> 16) & 0xf) << 4; + } + +- asm("cpuid" ++ asm volatile("cpuid" + : "=a" (max_amd_level) + : "a" (0x80000000) + : "ebx", "ecx", "edx"); +@@ -144,7 +144,7 @@ static void get_flags(void) + if (max_amd_level >= 0x80000001 && + max_amd_level <= 0x8000ffff) { + u32 eax = 0x80000001; +- asm("cpuid" ++ asm volatile("cpuid" + : "+a" (eax), + "=c" (cpu.flags[6]), + "=d" (cpu.flags[1]) +@@ -203,9 +203,9 @@ int check_cpu(int *cpu_level_ptr, int *r + u32 ecx = MSR_K7_HWCR; + u32 eax, edx; + +- asm("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx)); ++ asm volatile("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx)); + eax &= ~(1 << 15); +- asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); ++ asm volatile("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); + + get_flags(); /* Make sure it really did something */ + err = check_flags(); +@@ -218,9 +218,9 @@ int check_cpu(int *cpu_level_ptr, int *r + u32 ecx = MSR_VIA_FCR; + u32 eax, edx; + +- asm("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx)); ++ asm volatile("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx)); + eax |= (1<<1)|(1<<7); +- asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); ++ asm volatile("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); + + set_bit(X86_FEATURE_CX8, cpu.flags); + err = check_flags(); +@@ -231,12 +231,12 @@ int check_cpu(int *cpu_level_ptr, int *r + u32 eax, edx; + u32 level = 1; + +- asm("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx)); +- asm("wrmsr" : : "a" (~0), "d" (edx), "c" (ecx)); +- asm("cpuid" ++ asm volatile("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx)); ++ asm volatile("wrmsr" : : "a" (~0), "d" (edx), "c" (ecx)); ++ asm volatile("cpuid" + : "+a" (level), "=d" (cpu.flags[0]) + : : "ecx", "ebx"); +- asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); ++ asm volatile("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); + + err = check_flags(); + } +diff -urNp a/arch/x86/boot/edd.c b/arch/x86/boot/edd.c +--- a/arch/x86/boot/edd.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/boot/edd.c 2009-05-24 18:10:24.970079729 -0700 +@@ -81,7 +81,7 @@ static int get_edd_info(u8 devno, struct + ax = 0x4100; + bx = EDDMAGIC1; + dx = devno; +- asm("pushfl; stc; int $0x13; setc %%al; popfl" ++ asm volatile("pushfl; stc; int $0x13; setc %%al; popfl" + : "+a" (ax), "+b" (bx), "=c" (cx), "+d" (dx) + : : "esi", "edi"); + +@@ -100,7 +100,7 @@ static int get_edd_info(u8 devno, struct + ei->params.length = sizeof(ei->params); + ax = 0x4800; + dx = devno; +- asm("pushfl; int $0x13; popfl" ++ asm volatile("pushfl; int $0x13; popfl" + : "+a" (ax), "+d" (dx), "=m" (ei->params) + : "S" (&ei->params) + : "ebx", "ecx", "edi"); +@@ -111,7 +111,7 @@ static int get_edd_info(u8 devno, struct + ax = 0x0800; + dx = devno; + di = 0; +- asm("pushw %%es; " ++ asm volatile("pushw %%es; " + "movw %%di,%%es; " + "pushfl; stc; int $0x13; setc %%al; popfl; " + "popw %%es" +diff -urNp a/arch/x86/boot/main.c b/arch/x86/boot/main.c +--- a/arch/x86/boot/main.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/boot/main.c 2009-05-24 18:10:24.970079729 -0700 +@@ -78,7 +78,7 @@ static void query_ist(void) + if (cpu.level < 6) + return; + +- asm("int $0x15" ++ asm volatile("int $0x15" + : "=a" (boot_params.ist_info.signature), + "=b" (boot_params.ist_info.command), + "=c" (boot_params.ist_info.event), +diff -urNp a/arch/x86/boot/mca.c b/arch/x86/boot/mca.c +--- a/arch/x86/boot/mca.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/boot/mca.c 2009-05-24 18:10:24.971032718 -0700 +@@ -19,7 +19,7 @@ int query_mca(void) + u8 err; + u16 es, bx, len; + +- asm("pushw %%es ; " ++ asm volatile("pushw %%es ; " + "int $0x15 ; " + "setc %0 ; " + "movw %%es, %1 ; " +diff -urNp a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c +--- a/arch/x86/boot/memory.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/boot/memory.c 2009-05-24 18:10:24.971032718 -0700 +@@ -30,7 +30,7 @@ static int detect_memory_e820(void) + /* Important: %edx and %esi are clobbered by some BIOSes, + so they must be either used for the error output + or explicitly marked clobbered. */ +- asm("int $0x15; setc %0" ++ asm volatile("int $0x15; setc %0" + : "=d" (err), "+b" (next), "=a" (id), "+c" (size), + "=m" (*desc) + : "D" (desc), "d" (SMAP), "a" (0xe820) +@@ -66,7 +66,7 @@ static int detect_memory_e801(void) + + bx = cx = dx = 0; + ax = 0xe801; +- asm("stc; int $0x15; setc %0" ++ asm volatile("stc; int $0x15; setc %0" + : "=m" (err), "+a" (ax), "+b" (bx), "+c" (cx), "+d" (dx)); + + if (err) +@@ -96,7 +96,7 @@ static int detect_memory_88(void) + u8 err; + + ax = 0x8800; +- asm("stc; int $0x15; setc %0" : "=bcdm" (err), "+a" (ax)); ++ asm volatile("stc; int $0x15; setc %0" : "=bcdm" (err), "+a" (ax)); + + boot_params.screen_info.ext_mem_k = ax; + +diff -urNp a/arch/x86/boot/video-vesa.c b/arch/x86/boot/video-vesa.c +--- a/arch/x86/boot/video-vesa.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/boot/video-vesa.c 2009-05-24 18:10:24.971032718 -0700 +@@ -41,7 +41,7 @@ static int vesa_probe(void) + + ax = 0x4f00; + di = (size_t)&vginfo; +- asm(INT10 ++ asm volatile(INT10 + : "+a" (ax), "+D" (di), "=m" (vginfo) + : : "ebx", "ecx", "edx", "esi"); + +@@ -68,7 +68,7 @@ static int vesa_probe(void) + ax = 0x4f01; + cx = mode; + di = (size_t)&vminfo; +- asm(INT10 ++ asm volatile(INT10 + : "+a" (ax), "+c" (cx), "+D" (di), "=m" (vminfo) + : : "ebx", "edx", "esi"); + +@@ -120,7 +120,7 @@ static int vesa_set_mode(struct mode_inf + ax = 0x4f01; + cx = vesa_mode; + di = (size_t)&vminfo; +- asm(INT10 ++ asm volatile(INT10 + : "+a" (ax), "+c" (cx), "+D" (di), "=m" (vminfo) + : : "ebx", "edx", "esi"); + +@@ -202,19 +202,20 @@ static void vesa_dac_set_8bits(void) + /* Save the VESA protected mode info */ + static void vesa_store_pm_info(void) + { +- u16 ax, bx, di, es; ++ u16 ax, bx, cx, di, es; + + ax = 0x4f0a; +- bx = di = 0; +- asm("pushw %%es; "INT10"; movw %%es,%0; popw %%es" +- : "=d" (es), "+a" (ax), "+b" (bx), "+D" (di) +- : : "ecx", "esi"); ++ bx = cx = di = 0; ++ asm volatile("pushw %%es; "INT10"; movw %%es,%0; popw %%es" ++ : "=d" (es), "+a" (ax), "+b" (bx), "+c" (cx), "+D" (di) ++ : : "esi"); + + if (ax != 0x004f) + return; + + boot_params.screen_info.vesapm_seg = es; + boot_params.screen_info.vesapm_off = di; ++ boot_params.screen_info.vesapm_size = cx; + } + + /* +@@ -268,7 +269,7 @@ void vesa_store_edid(void) + /* Note: The VBE DDC spec is different from the main VESA spec; + we genuinely have to assume all registers are destroyed here. */ + +- asm("pushw %%es; movw %2,%%es; "INT10"; popw %%es" ++ asm volatile("pushw %%es; movw %2,%%es; "INT10"; popw %%es" + : "+a" (ax), "+b" (bx) + : "c" (cx), "D" (di) + : "esi"); +@@ -284,7 +285,7 @@ void vesa_store_edid(void) + cx = 0; /* Controller 0 */ + dx = 0; /* EDID block number */ + di =(size_t) &boot_params.edid_info; /* (ES:)Pointer to block */ +- asm(INT10 ++ asm volatile(INT10 + : "+a" (ax), "+b" (bx), "+d" (dx), "=m" (boot_params.edid_info) + : "c" (cx), "D" (di) + : "esi"); +diff -urNp a/arch/x86/boot/video-vga.c b/arch/x86/boot/video-vga.c +--- a/arch/x86/boot/video-vga.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/boot/video-vga.c 2009-05-24 18:10:24.972037735 -0700 +@@ -225,7 +225,7 @@ static int vga_probe(void) + }; + u8 vga_flag; + +- asm(INT10 ++ asm volatile(INT10 + : "=b" (ega_bx) + : "a" (0x1200), "b" (0x10) /* Check EGA/VGA */ + : "ecx", "edx", "esi", "edi"); +@@ -237,7 +237,7 @@ static int vga_probe(void) + /* If we have MDA/CGA/HGC then BL will be unchanged at 0x10 */ + if ((u8)ega_bx != 0x10) { + /* EGA/VGA */ +- asm(INT10 ++ asm volatile(INT10 + : "=a" (vga_flag) + : "a" (0x1a00) + : "ebx", "ecx", "edx", "esi", "edi"); +diff -urNp a/arch/x86/boot/video.c b/arch/x86/boot/video.c +--- a/arch/x86/boot/video.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/boot/video.c 2009-05-24 18:10:24.973209661 -0700 +@@ -23,7 +23,7 @@ static void store_cursor_position(void) + + ax = 0x0300; + bx = 0; +- asm(INT10 ++ asm volatile(INT10 + : "=d" (curpos), "+a" (ax), "+b" (bx) + : : "ecx", "esi", "edi"); + +@@ -38,7 +38,7 @@ static void store_video_mode(void) + /* N.B.: the saving of the video page here is a bit silly, + since we pretty much assume page 0 everywhere. */ + ax = 0x0f00; +- asm(INT10 ++ asm volatile(INT10 + : "+a" (ax), "=b" (page) + : : "ecx", "edx", "esi", "edi"); + +diff -urNp a/arch/x86/boot/voyager.c b/arch/x86/boot/voyager.c +--- a/arch/x86/boot/voyager.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/boot/voyager.c 2009-05-24 18:10:24.973209661 -0700 +@@ -23,7 +23,7 @@ int query_voyager(void) + + data_ptr[0] = 0xff; /* Flag on config not found(?) */ + +- asm("pushw %%es ; " ++ asm volatile("pushw %%es ; " + "int $0x15 ; " + "setc %0 ; " + "movw %%es, %1 ; " +diff -urNp a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c +--- a/arch/x86/ia32/ia32_signal.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/ia32/ia32_signal.c 2009-05-24 18:10:24.974013967 -0700 +@@ -413,7 +413,7 @@ static void __user *get_sigframe(struct + sp -= frame_size; + /* Align the stack pointer according to the i386 ABI, + * i.e. so that on function entry ((sp + 4) & 15) == 0. */ +- sp = ((sp + 4) & -16ul) - 4; ++ sp = ((sp - 12) & -16ul) - 4; + return (void __user *) sp; + } + +@@ -492,7 +492,7 @@ int ia32_setup_frame(int sig, struct k_s + + #if DEBUG_SIG + printk(KERN_DEBUG "SIG deliver (%s:%d): sp=%p pc=%lx ra=%u\n", +- current->comm, current->pid, frame, regs->ip, frame->pretcode); ++ current->comm, task_pid_nr(current), frame, regs->ip, frame->pretcode); + #endif + + return 0; +@@ -518,6 +518,7 @@ int ia32_setup_rt_frame(int sig, struct + __NR_ia32_rt_sigreturn, + 0x80cd, + 0, ++ 0 + }; + + frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); +@@ -585,7 +586,7 @@ int ia32_setup_rt_frame(int sig, struct + + #if DEBUG_SIG + printk(KERN_DEBUG "SIG deliver (%s:%d): sp=%p pc=%lx ra=%u\n", +- current->comm, current->pid, frame, regs->ip, frame->pretcode); ++ current->comm, task_pid_nr(current), frame, regs->ip, frame->pretcode); + #endif + + return 0; +diff -urNp a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h +--- a/arch/x86/include/asm/alternative.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/include/asm/alternative.h 2009-05-24 18:10:24.974013967 -0700 +@@ -96,7 +96,7 @@ const unsigned char *const *find_nop_tab + " .byte 662b-661b\n" /* sourcelen */ \ + " .byte 664f-663f\n" /* replacementlen */ \ + ".previous\n" \ +- ".section .altinstr_replacement,\"ax\"\n" \ ++ ".section .altinstr_replacement,\"a\"\n" \ + "663:\n\t" newinstr "\n664:\n" /* replacement */ \ + ".previous" :: "i" (feature) : "memory") + +@@ -120,7 +120,7 @@ const unsigned char *const *find_nop_tab + " .byte 662b-661b\n" /* sourcelen */ \ + " .byte 664f-663f\n" /* replacementlen */ \ + ".previous\n" \ +- ".section .altinstr_replacement,\"ax\"\n" \ ++ ".section .altinstr_replacement,\"a\"\n" \ + "663:\n\t" newinstr "\n664:\n" /* replacement */ \ + ".previous" :: "i" (feature), ##input) + +@@ -135,7 +135,7 @@ const unsigned char *const *find_nop_tab + " .byte 662b-661b\n" /* sourcelen */ \ + " .byte 664f-663f\n" /* replacementlen */ \ + ".previous\n" \ +- ".section .altinstr_replacement,\"ax\"\n" \ ++ ".section .altinstr_replacement,\"a\"\n" \ + "663:\n\t" newinstr "\n664:\n" /* replacement */ \ + ".previous" : output : [feat] "i" (feature), ##input) + +diff -urNp a/arch/x86/include/asm/atomic_32.h b/arch/x86/include/asm/atomic_32.h +--- a/arch/x86/include/asm/atomic_32.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/include/asm/atomic_32.h 2009-05-24 18:10:24.975019263 -0700 +@@ -47,7 +47,29 @@ typedef struct { + */ + static inline void atomic_add(int i, atomic_t *v) + { +- asm volatile(LOCK_PREFIX "addl %1,%0" ++ asm volatile(LOCK_PREFIX "addl %1,%0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "jno 0f\n" ++ LOCK_PREFIX "subl %1,%0\n" ++ "into\n0:\n" ++ _ASM_EXTABLE(0b, 0b) ++#endif ++ ++ : "+m" (v->counter) ++ : "ir" (i)); ++} ++ ++/** ++ * atomic_add_unchecked - add integer to atomic variable ++ * @i: integer value to add ++ * @v: pointer of type atomic_t ++ * ++ * Atomically adds @i to @v. ++ */ ++static inline void atomic_add_unchecked(int i, atomic_t *v) ++{ ++ asm volatile(LOCK_PREFIX "addl %1,%0\n" + : "+m" (v->counter) + : "ir" (i)); + } +@@ -61,7 +83,15 @@ static inline void atomic_add(int i, ato + */ + static inline void atomic_sub(int i, atomic_t *v) + { +- asm volatile(LOCK_PREFIX "subl %1,%0" ++ asm volatile(LOCK_PREFIX "subl %1,%0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "jno 0f\n" ++ LOCK_PREFIX "addl %1,%0\n" ++ "into\n0:\n" ++ _ASM_EXTABLE(0b, 0b) ++#endif ++ + : "+m" (v->counter) + : "ir" (i)); + } +@@ -79,7 +109,16 @@ static inline int atomic_sub_and_test(in + { + unsigned char c; + +- asm volatile(LOCK_PREFIX "subl %2,%0; sete %1" ++ asm volatile(LOCK_PREFIX "subl %2,%0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "jno 0f\n" ++ LOCK_PREFIX "addl %2,%0\n" ++ "into\n0:\n" ++ _ASM_EXTABLE(0b, 0b) ++#endif ++ ++ "sete %1\n" + : "+m" (v->counter), "=qm" (c) + : "ir" (i) : "memory"); + return c; +@@ -93,7 +132,30 @@ static inline int atomic_sub_and_test(in + */ + static inline void atomic_inc(atomic_t *v) + { +- asm volatile(LOCK_PREFIX "incl %0" ++ asm volatile(LOCK_PREFIX "incl %0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "into\n0:\n" ++ ".pushsection .fixup,\"ax\"\n" ++ "1:\n" ++ LOCK_PREFIX "decl %0\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ ++ : "+m" (v->counter)); ++} ++ ++/** ++ * atomic_inc_unchecked - increment atomic variable ++ * @v: pointer of type atomic_t ++ * ++ * Atomically increments @v by 1. ++ */ ++static inline void atomic_inc_unchecked(atomic_t *v) ++{ ++ asm volatile(LOCK_PREFIX "incl %0\n" + : "+m" (v->counter)); + } + +@@ -105,7 +167,18 @@ static inline void atomic_inc(atomic_t * + */ + static inline void atomic_dec(atomic_t *v) + { +- asm volatile(LOCK_PREFIX "decl %0" ++ asm volatile(LOCK_PREFIX "decl %0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "into\n0:\n" ++ ".pushsection .fixup,\"ax\"\n" ++ "1: \n" ++ LOCK_PREFIX "incl %0\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ + : "+m" (v->counter)); + } + +@@ -121,7 +194,19 @@ static inline int atomic_dec_and_test(at + { + unsigned char c; + +- asm volatile(LOCK_PREFIX "decl %0; sete %1" ++ asm volatile(LOCK_PREFIX "decl %0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "into\n0:\n" ++ ".pushsection .fixup,\"ax\"\n" ++ "1: \n" ++ LOCK_PREFIX "incl %0\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ ++ "sete %1\n" + : "+m" (v->counter), "=qm" (c) + : : "memory"); + return c != 0; +@@ -139,7 +224,19 @@ static inline int atomic_inc_and_test(at + { + unsigned char c; + +- asm volatile(LOCK_PREFIX "incl %0; sete %1" ++ asm volatile(LOCK_PREFIX "incl %0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "into\n0:\n" ++ ".pushsection .fixup,\"ax\"\n" ++ "1: \n" ++ LOCK_PREFIX "decl %0\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ ++ "sete %1\n" + : "+m" (v->counter), "=qm" (c) + : : "memory"); + return c != 0; +@@ -158,7 +255,16 @@ static inline int atomic_add_negative(in + { + unsigned char c; + +- asm volatile(LOCK_PREFIX "addl %2,%0; sets %1" ++ asm volatile(LOCK_PREFIX "addl %2,%0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "jno 0f\n" ++ LOCK_PREFIX "subl %2,%0\n" ++ "into\n0:\n" ++ _ASM_EXTABLE(0b, 0b) ++#endif ++ ++ "sets %1\n" + : "+m" (v->counter), "=qm" (c) + : "ir" (i) : "memory"); + return c; +@@ -181,7 +287,15 @@ static inline int atomic_add_return(int + #endif + /* Modern 486+ processor */ + __i = i; +- asm volatile(LOCK_PREFIX "xaddl %0, %1" ++ asm volatile(LOCK_PREFIX "xaddl %0, %1\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "jno 0f\n" ++ "movl %0, %1\n" ++ "into\n0:\n" ++ _ASM_EXTABLE(0b, 0b) ++#endif ++ + : "+r" (i), "+m" (v->counter) + : : "memory"); + return i + __i; +@@ -222,17 +336,28 @@ static inline int atomic_sub_return(int + */ + static inline int atomic_add_unless(atomic_t *v, int a, int u) + { +- int c, old; ++ int c, old, new; + c = atomic_read(v); + for (;;) { +- if (unlikely(c == (u))) ++ if (unlikely(c == u)) + break; +- old = atomic_cmpxchg((v), c, c + (a)); ++ ++ asm volatile("addl %2,%0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "into\n0:\n" ++ _ASM_EXTABLE(0b, 0b) ++#endif ++ ++ : "=r" (new) ++ : "0" (c), "ir" (a)); ++ ++ old = atomic_cmpxchg(v, c, new); + if (likely(old == c)) + break; + c = old; + } +- return c != (u); ++ return c != u; + } + + #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) +diff -urNp a/arch/x86/include/asm/atomic_64.h b/arch/x86/include/asm/atomic_64.h +--- a/arch/x86/include/asm/atomic_64.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/include/asm/atomic_64.h 2009-05-24 18:10:24.976209416 -0700 +@@ -48,7 +48,29 @@ typedef struct { + */ + static inline void atomic_add(int i, atomic_t *v) + { +- asm volatile(LOCK_PREFIX "addl %1,%0" ++ asm volatile(LOCK_PREFIX "addl %1,%0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "jno 0f\n" ++ LOCK_PREFIX "subl %1,%0\n" ++ "int $4\n0:\n" ++ _ASM_EXTABLE(0b, 0b) ++#endif ++ ++ : "=m" (v->counter) ++ : "ir" (i), "m" (v->counter)); ++} ++ ++/** ++ * atomic_add_unchecked - add integer to atomic variable ++ * @i: integer value to add ++ * @v: pointer of type atomic_t ++ * ++ * Atomically adds @i to @v. ++ */ ++static inline void atomic_add_unchecked(int i, atomic_t *v) ++{ ++ asm volatile(LOCK_PREFIX "addl %1,%0\n" + : "=m" (v->counter) + : "ir" (i), "m" (v->counter)); + } +@@ -62,7 +84,15 @@ static inline void atomic_add(int i, ato + */ + static inline void atomic_sub(int i, atomic_t *v) + { +- asm volatile(LOCK_PREFIX "subl %1,%0" ++ asm volatile(LOCK_PREFIX "subl %1,%0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "jno 0f\n" ++ LOCK_PREFIX "addl %1,%0\n" ++ "int $4\n0:\n" ++ _ASM_EXTABLE(0b, 0b) ++#endif ++ + : "=m" (v->counter) + : "ir" (i), "m" (v->counter)); + } +@@ -80,7 +110,16 @@ static inline int atomic_sub_and_test(in + { + unsigned char c; + +- asm volatile(LOCK_PREFIX "subl %2,%0; sete %1" ++ asm volatile(LOCK_PREFIX "subl %2,%0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "jno 0f\n" ++ LOCK_PREFIX "addl %2,%0\n" ++ "int $4\n0:\n" ++ _ASM_EXTABLE(0b, 0b) ++#endif ++ ++ "sete %1\n" + : "=m" (v->counter), "=qm" (c) + : "ir" (i), "m" (v->counter) : "memory"); + return c; +@@ -94,7 +133,32 @@ static inline int atomic_sub_and_test(in + */ + static inline void atomic_inc(atomic_t *v) + { +- asm volatile(LOCK_PREFIX "incl %0" ++ asm volatile(LOCK_PREFIX "incl %0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "jno 0f\n" ++ "int $4\n0:\n" ++ ".pushsection .fixup,\"ax\"\n" ++ "1:\n" ++ LOCK_PREFIX "decl %0\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ ++ : "=m" (v->counter) ++ : "m" (v->counter)); ++} ++ ++/** ++ * atomic_inc_unchecked - increment atomic variable ++ * @v: pointer of type atomic_t ++ * ++ * Atomically increments @v by 1. ++ */ ++static inline void atomic_inc_unchecked(atomic_t *v) ++{ ++ asm volatile(LOCK_PREFIX "incl %0\n" + : "=m" (v->counter) + : "m" (v->counter)); + } +@@ -107,7 +171,19 @@ static inline void atomic_inc(atomic_t * + */ + static inline void atomic_dec(atomic_t *v) + { +- asm volatile(LOCK_PREFIX "decl %0" ++ asm volatile(LOCK_PREFIX "decl %0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "jno 0f\n" ++ "int $4\n0:\n" ++ ".pushsection .fixup,\"ax\"\n" ++ "1: \n" ++ LOCK_PREFIX "incl %0\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ + : "=m" (v->counter) + : "m" (v->counter)); + } +@@ -124,7 +200,20 @@ static inline int atomic_dec_and_test(at + { + unsigned char c; + +- asm volatile(LOCK_PREFIX "decl %0; sete %1" ++ asm volatile(LOCK_PREFIX "decl %0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "jno 0f\n" ++ "int $4\n0:\n" ++ ".pushsection .fixup,\"ax\"\n" ++ "1: \n" ++ LOCK_PREFIX "incl %0\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ ++ "sete %1\n" + : "=m" (v->counter), "=qm" (c) + : "m" (v->counter) : "memory"); + return c != 0; +@@ -142,7 +231,20 @@ static inline int atomic_inc_and_test(at + { + unsigned char c; + +- asm volatile(LOCK_PREFIX "incl %0; sete %1" ++ asm volatile(LOCK_PREFIX "incl %0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "jno 0f\n" ++ "int $4\n0:\n" ++ ".pushsection .fixup,\"ax\"\n" ++ "1: \n" ++ LOCK_PREFIX "decl %0\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ ++ "sete %1\n" + : "=m" (v->counter), "=qm" (c) + : "m" (v->counter) : "memory"); + return c != 0; +@@ -161,7 +263,16 @@ static inline int atomic_add_negative(in + { + unsigned char c; + +- asm volatile(LOCK_PREFIX "addl %2,%0; sets %1" ++ asm volatile(LOCK_PREFIX "addl %2,%0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "jno 0f\n" ++ LOCK_PREFIX "subl %2,%0\n" ++ "int $4\n0:\n" ++ _ASM_EXTABLE(0b, 0b) ++#endif ++ ++ "sets %1\n" + : "=m" (v->counter), "=qm" (c) + : "ir" (i), "m" (v->counter) : "memory"); + return c; +@@ -177,7 +288,15 @@ static inline int atomic_add_negative(in + static inline int atomic_add_return(int i, atomic_t *v) + { + int __i = i; +- asm volatile(LOCK_PREFIX "xaddl %0, %1" ++ asm volatile(LOCK_PREFIX "xaddl %0, %1\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "jno 0f\n" ++ "movl %0, %1\n" ++ "int $4\n0:\n" ++ _ASM_EXTABLE(0b, 0b) ++#endif ++ + : "+r" (i), "+m" (v->counter) + : : "memory"); + return i + __i; +@@ -226,7 +345,15 @@ typedef struct { + */ + static inline void atomic64_add(long i, atomic64_t *v) + { +- asm volatile(LOCK_PREFIX "addq %1,%0" ++ asm volatile(LOCK_PREFIX "addq %1,%0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "jno 0f\n" ++ LOCK_PREFIX "subq %1,%0\n" ++ "int $4\n0:\n" ++ _ASM_EXTABLE(0b, 0b) ++#endif ++ + : "=m" (v->counter) + : "er" (i), "m" (v->counter)); + } +@@ -240,7 +367,15 @@ static inline void atomic64_add(long i, + */ + static inline void atomic64_sub(long i, atomic64_t *v) + { +- asm volatile(LOCK_PREFIX "subq %1,%0" ++ asm volatile(LOCK_PREFIX "subq %1,%0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "jno 0f\n" ++ LOCK_PREFIX "addq %1,%0\n" ++ "int $4\n0:\n" ++ _ASM_EXTABLE(0b, 0b) ++#endif ++ + : "=m" (v->counter) + : "er" (i), "m" (v->counter)); + } +@@ -258,7 +393,16 @@ static inline int atomic64_sub_and_test( + { + unsigned char c; + +- asm volatile(LOCK_PREFIX "subq %2,%0; sete %1" ++ asm volatile(LOCK_PREFIX "subq %2,%0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "jno 0f\n" ++ LOCK_PREFIX "addq %2,%0\n" ++ "int $4\n0:\n" ++ _ASM_EXTABLE(0b, 0b) ++#endif ++ ++ "sete %1\n" + : "=m" (v->counter), "=qm" (c) + : "er" (i), "m" (v->counter) : "memory"); + return c; +@@ -272,7 +416,19 @@ static inline int atomic64_sub_and_test( + */ + static inline void atomic64_inc(atomic64_t *v) + { +- asm volatile(LOCK_PREFIX "incq %0" ++ asm volatile(LOCK_PREFIX "incq %0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "jno 0f\n" ++ "int $4\n0:\n" ++ ".pushsection .fixup,\"ax\"\n" ++ "1:\n" ++ LOCK_PREFIX "decq %0\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ + : "=m" (v->counter) + : "m" (v->counter)); + } +@@ -285,7 +441,19 @@ static inline void atomic64_inc(atomic64 + */ + static inline void atomic64_dec(atomic64_t *v) + { +- asm volatile(LOCK_PREFIX "decq %0" ++ asm volatile(LOCK_PREFIX "decq %0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "jno 0f\n" ++ "int $4\n0:\n" ++ ".pushsection .fixup,\"ax\"\n" ++ "1: \n" ++ LOCK_PREFIX "incq %0\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ + : "=m" (v->counter) + : "m" (v->counter)); + } +@@ -302,7 +470,20 @@ static inline int atomic64_dec_and_test( + { + unsigned char c; + +- asm volatile(LOCK_PREFIX "decq %0; sete %1" ++ asm volatile(LOCK_PREFIX "decq %0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "jno 0f\n" ++ "int $4\n0:\n" ++ ".pushsection .fixup,\"ax\"\n" ++ "1: \n" ++ LOCK_PREFIX "incq %0\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ ++ "sete %1\n" + : "=m" (v->counter), "=qm" (c) + : "m" (v->counter) : "memory"); + return c != 0; +@@ -320,7 +501,20 @@ static inline int atomic64_inc_and_test( + { + unsigned char c; + +- asm volatile(LOCK_PREFIX "incq %0; sete %1" ++ asm volatile(LOCK_PREFIX "incq %0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "jno 0f\n" ++ "int $4\n0:\n" ++ ".pushsection .fixup,\"ax\"\n" ++ "1: \n" ++ LOCK_PREFIX "decq %0\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ ++ "sete %1\n" + : "=m" (v->counter), "=qm" (c) + : "m" (v->counter) : "memory"); + return c != 0; +@@ -339,7 +533,16 @@ static inline int atomic64_add_negative( + { + unsigned char c; + +- asm volatile(LOCK_PREFIX "addq %2,%0; sets %1" ++ asm volatile(LOCK_PREFIX "addq %2,%0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "jno 0f\n" ++ LOCK_PREFIX "subq %2,%0\n" ++ "int $4\n0:\n" ++ _ASM_EXTABLE(0b, 0b) ++#endif ++ ++ "sets %1\n" + : "=m" (v->counter), "=qm" (c) + : "er" (i), "m" (v->counter) : "memory"); + return c; +@@ -355,7 +558,15 @@ static inline int atomic64_add_negative( + static inline long atomic64_add_return(long i, atomic64_t *v) + { + long __i = i; +- asm volatile(LOCK_PREFIX "xaddq %0, %1;" ++ asm volatile(LOCK_PREFIX "xaddq %0, %1\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "jno 0f\n" ++ "movq %0, %1\n" ++ "int $4\n0:\n" ++ _ASM_EXTABLE(0b, 0b) ++#endif ++ + : "+r" (i), "+m" (v->counter) + : : "memory"); + return i + __i; +@@ -386,17 +597,29 @@ static inline long atomic64_sub_return(l + */ + static inline int atomic_add_unless(atomic_t *v, int a, int u) + { +- int c, old; ++ int c, old, new; + c = atomic_read(v); + for (;;) { +- if (unlikely(c == (u))) ++ if (unlikely(c == u)) + break; +- old = atomic_cmpxchg((v), c, c + (a)); ++ ++ asm volatile("addl %2,%0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "jno 0f\n" ++ "int $4\n0:\n" ++ _ASM_EXTABLE(0b, 0b) ++#endif ++ ++ : "=r" (new) ++ : "0" (c), "ir" (a)); ++ ++ old = atomic_cmpxchg((v), c, new); + if (likely(old == c)) + break; + c = old; + } +- return c != (u); ++ return c != u; + } + + #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) +@@ -412,17 +635,29 @@ static inline int atomic_add_unless(atom + */ + static inline int atomic64_add_unless(atomic64_t *v, long a, long u) + { +- long c, old; ++ long c, old, new; + c = atomic64_read(v); + for (;;) { +- if (unlikely(c == (u))) ++ if (unlikely(c == u)) + break; +- old = atomic64_cmpxchg((v), c, c + (a)); ++ ++ asm volatile("addq %2,%0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ "jno 0f\n" ++ "int $4\n0:\n" ++ _ASM_EXTABLE(0b, 0b) ++#endif ++ ++ : "=r" (new) ++ : "0" (c), "er" (a)); ++ ++ old = atomic64_cmpxchg((v), c, new); + if (likely(old == c)) + break; + c = old; + } +- return c != (u); ++ return c != u; + } + + /** +diff -urNp a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h +--- a/arch/x86/include/asm/boot.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/include/asm/boot.h 2009-05-24 18:10:24.976209416 -0700 +@@ -11,10 +11,15 @@ + #define ASK_VGA 0xfffd /* ask for it at bootup */ + + /* Physical address where kernel should be loaded. */ +-#define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \ ++#define ____LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \ + + (CONFIG_PHYSICAL_ALIGN - 1)) \ + & ~(CONFIG_PHYSICAL_ALIGN - 1)) + ++#ifndef __ASSEMBLY__ ++extern unsigned char __LOAD_PHYSICAL_ADDR[]; ++#define LOAD_PHYSICAL_ADDR ((unsigned long)__LOAD_PHYSICAL_ADDR) ++#endif ++ + #ifdef CONFIG_X86_64 + #define BOOT_HEAP_SIZE 0x7000 + #define BOOT_STACK_SIZE 0x4000 +diff -urNp a/arch/x86/include/asm/cache.h b/arch/x86/include/asm/cache.h +--- a/arch/x86/include/asm/cache.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/include/asm/cache.h 2009-05-24 18:10:24.977025805 -0700 +@@ -6,6 +6,7 @@ + #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) + + #define __read_mostly __attribute__((__section__(".data.read_mostly"))) ++#define __read_only __attribute__((__section__(".data.read_only"))) + + #ifdef CONFIG_X86_VSMP + /* vSMP Internode cacheline shift */ +diff -urNp a/arch/x86/include/asm/checksum_32.h b/arch/x86/include/asm/checksum_32.h +--- a/arch/x86/include/asm/checksum_32.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/include/asm/checksum_32.h 2009-05-24 18:10:24.978065181 -0700 +@@ -31,6 +31,14 @@ asmlinkage __wsum csum_partial_copy_gene + int len, __wsum sum, + int *src_err_ptr, int *dst_err_ptr); + ++asmlinkage __wsum csum_partial_copy_generic_to_user(const void *src, void *dst, ++ int len, __wsum sum, ++ int *src_err_ptr, int *dst_err_ptr); ++ ++asmlinkage __wsum csum_partial_copy_generic_from_user(const void *src, void *dst, ++ int len, __wsum sum, ++ int *src_err_ptr, int *dst_err_ptr); ++ + /* + * Note: when you get a NULL pointer exception here this means someone + * passed in an incorrect kernel address to one of these functions. +@@ -50,7 +58,7 @@ static inline __wsum csum_partial_copy_f + int *err_ptr) + { + might_sleep(); +- return csum_partial_copy_generic((__force void *)src, dst, ++ return csum_partial_copy_generic_from_user((__force void *)src, dst, + len, sum, err_ptr, NULL); + } + +@@ -177,7 +185,7 @@ static inline __wsum csum_and_copy_to_us + { + might_sleep(); + if (access_ok(VERIFY_WRITE, dst, len)) +- return csum_partial_copy_generic(src, (__force void *)dst, ++ return csum_partial_copy_generic_to_user(src, (__force void *)dst, + len, sum, NULL, err_ptr); + + if (len) +diff -urNp a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h +--- a/arch/x86/include/asm/cpufeature.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/include/asm/cpufeature.h 2009-05-24 18:10:24.978065181 -0700 +@@ -80,7 +80,6 @@ + #define X86_FEATURE_UP (3*32+ 9) /* smp kernel running on up */ + #define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* "" FXSAVE leaks FOP/FIP/FOP */ + #define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */ +-#define X86_FEATURE_NOPL (3*32+20) /* The NOPL (0F 1F) instructions */ + #define X86_FEATURE_PEBS (3*32+12) /* Precise-Event Based Sampling */ + #define X86_FEATURE_BTS (3*32+13) /* Branch Trace Store */ + #define X86_FEATURE_SYSCALL32 (3*32+14) /* "" syscall in ia32 userspace */ +diff -urNp a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h +--- a/arch/x86/include/asm/desc.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/include/asm/desc.h 2009-05-24 18:10:24.978994216 -0700 +@@ -16,6 +16,7 @@ static inline void fill_ldt(struct desc_ + desc->base1 = (info->base_addr & 0x00ff0000) >> 16; + desc->type = (info->read_exec_only ^ 1) << 1; + desc->type |= info->contents << 2; ++ desc->type |= info->seg_not_present ^ 1; + desc->s = 1; + desc->dpl = 0x3; + desc->p = info->seg_not_present ^ 1; +@@ -32,16 +33,12 @@ static inline void fill_ldt(struct desc_ + } + + extern struct desc_ptr idt_descr; +-extern gate_desc idt_table[]; +- +-struct gdt_page { +- struct desc_struct gdt[GDT_ENTRIES]; +-} __attribute__((aligned(PAGE_SIZE))); +-DECLARE_PER_CPU(struct gdt_page, gdt_page); ++extern gate_desc idt_table[256]; + ++extern struct desc_struct cpu_gdt_table[NR_CPUS][PAGE_SIZE / sizeof(struct desc_struct)]; + static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) + { +- return per_cpu(gdt_page, cpu).gdt; ++ return cpu_gdt_table[cpu]; + } + + #ifdef CONFIG_X86_64 +@@ -115,19 +112,48 @@ static inline void paravirt_free_ldt(str + static inline void native_write_idt_entry(gate_desc *idt, int entry, + const gate_desc *gate) + { ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++ ++ pax_open_kernel(cr0); ++#endif ++ + memcpy(&idt[entry], gate, sizeof(*gate)); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + } + + static inline void native_write_ldt_entry(struct desc_struct *ldt, int entry, + const void *desc) + { ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++ ++ pax_open_kernel(cr0); ++#endif ++ + memcpy(&ldt[entry], desc, 8); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + } + + static inline void native_write_gdt_entry(struct desc_struct *gdt, int entry, + const void *desc, int type) + { + unsigned int size; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++#endif ++ + switch (type) { + case DESC_TSS: + size = sizeof(tss_desc); +@@ -139,7 +165,17 @@ static inline void native_write_gdt_entr + size = sizeof(struct desc_struct); + break; + } ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + memcpy(&gdt[entry], desc, size); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + } + + static inline void pack_descriptor(struct desc_struct *desc, unsigned long base, +@@ -211,7 +247,19 @@ static inline void native_set_ldt(const + + static inline void native_load_tr_desc(void) + { ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++ ++ pax_open_kernel(cr0); ++#endif ++ + asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8)); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + } + + static inline void native_load_gdt(const struct desc_ptr *dtr) +@@ -246,8 +294,19 @@ static inline void native_load_tls(struc + unsigned int i; + struct desc_struct *gdt = get_cpu_gdt_table(cpu); + ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++ ++ pax_open_kernel(cr0); ++#endif ++ + for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++) + gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i]; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + } + + #define _LDT_empty(info) \ +@@ -381,6 +440,18 @@ static inline void set_system_intr_gate_ + _set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS); + } + ++#ifdef CONFIG_X86_32 ++static inline void set_user_cs(unsigned long base, unsigned long limit, int cpu) ++{ ++ struct desc_struct d; ++ ++ if (likely(limit)) ++ limit = (limit - 1UL) >> PAGE_SHIFT; ++ pack_descriptor(&d, base, limit, 0xFB, 0xC); ++ write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_DEFAULT_USER_CS, &d, DESCTYPE_S); ++} ++#endif ++ + #else + /* + * GET_DESC_BASE reads the descriptor base of the specified segment. +diff -urNp a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h +--- a/arch/x86/include/asm/e820.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/include/asm/e820.h 2009-05-24 18:10:24.978994216 -0700 +@@ -134,7 +134,7 @@ extern char *memory_setup(void); + #define ISA_END_ADDRESS 0x100000 + #define is_ISA_range(s, e) ((s) >= ISA_START_ADDRESS && (e) < ISA_END_ADDRESS) + +-#define BIOS_BEGIN 0x000a0000 ++#define BIOS_BEGIN 0x000c0000 + #define BIOS_END 0x00100000 + + #ifdef __KERNEL__ +diff -urNp a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h +--- a/arch/x86/include/asm/elf.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/include/asm/elf.h 2009-05-24 18:10:24.980016272 -0700 +@@ -252,7 +252,25 @@ extern int force_personality32; + the loader. We need to make sure that it is out of the way of the program + that it will "exec", and that there is sufficient room for the brk. */ + ++#ifdef CONFIG_PAX_SEGMEXEC ++#define ELF_ET_DYN_BASE ((current->mm->pax_flags & MF_PAX_SEGMEXEC) ? SEGMEXEC_TASK_SIZE/3*2 : TASK_SIZE/3*2) ++#else + #define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) ++#endif ++ ++#ifdef CONFIG_PAX_ASLR ++#ifdef CONFIG_X86_32 ++#define PAX_ELF_ET_DYN_BASE 0x10000000UL ++ ++#define PAX_DELTA_MMAP_LEN (current->mm->pax_flags & MF_PAX_SEGMEXEC ? 15 : 16) ++#define PAX_DELTA_STACK_LEN (current->mm->pax_flags & MF_PAX_SEGMEXEC ? 15 : 16) ++#else ++#define PAX_ELF_ET_DYN_BASE 0x400000UL ++ ++#define PAX_DELTA_MMAP_LEN ((test_thread_flag(TIF_IA32)) ? 16 : 32) ++#define PAX_DELTA_STACK_LEN ((test_thread_flag(TIF_IA32)) ? 16 : 32) ++#endif ++#endif + + /* This yields a mask that user programs can use to figure out what + instruction set this CPU supports. This could be done in user space, +@@ -304,8 +322,7 @@ do { \ + #define ARCH_DLINFO \ + do { \ + if (vdso_enabled) \ +- NEW_AUX_ENT(AT_SYSINFO_EHDR, \ +- (unsigned long)current->mm->context.vdso); \ ++ NEW_AUX_ENT(AT_SYSINFO_EHDR, current->mm->context.vdso);\ + } while (0) + + #define AT_SYSINFO 32 +@@ -316,7 +333,7 @@ do { \ + + #endif /* !CONFIG_X86_32 */ + +-#define VDSO_CURRENT_BASE ((unsigned long)current->mm->context.vdso) ++#define VDSO_CURRENT_BASE (current->mm->context.vdso) + + #define VDSO_ENTRY \ + ((unsigned long)VDSO32_SYMBOL(VDSO_CURRENT_BASE, vsyscall)) +@@ -330,7 +347,4 @@ extern int arch_setup_additional_pages(s + extern int syscall32_setup_pages(struct linux_binprm *, int exstack); + #define compat_arch_setup_additional_pages syscall32_setup_pages + +-extern unsigned long arch_randomize_brk(struct mm_struct *mm); +-#define arch_randomize_brk arch_randomize_brk +- + #endif /* _ASM_X86_ELF_H */ +diff -urNp a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h +--- a/arch/x86/include/asm/futex.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/include/asm/futex.h 2009-05-24 18:10:24.980016272 -0700 +@@ -11,6 +11,40 @@ + #include <asm/processor.h> + #include <asm/system.h> + ++#ifdef CONFIG_X86_32 ++#define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \ ++ asm volatile( \ ++ "movw\t%w6, %%ds\n" \ ++ "1:\t" insn "\n" \ ++ "2:\tpushl\t%%ss\n" \ ++ "\tpopl\t%%ds\n" \ ++ "\t.section .fixup,\"ax\"\n" \ ++ "3:\tmov\t%3, %1\n" \ ++ "\tjmp\t2b\n" \ ++ "\t.previous\n" \ ++ _ASM_EXTABLE(1b, 3b) \ ++ : "=r" (oldval), "=r" (ret), "+m" (*uaddr) \ ++ : "i" (-EFAULT), "0" (oparg), "1" (0), "r" (__USER_DS)) ++ ++#define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg) \ ++ asm volatile("movw\t%w7, %%es\n" \ ++ "1:\tmovl\t%%es:%2, %0\n" \ ++ "\tmovl\t%0, %3\n" \ ++ "\t" insn "\n" \ ++ "2:\t" LOCK_PREFIX "cmpxchgl %3, %%es:%2\n"\ ++ "\tjnz\t1b\n" \ ++ "3:\tpushl\t%%ss\n" \ ++ "\tpopl\t%%es\n" \ ++ "\t.section .fixup,\"ax\"\n" \ ++ "4:\tmov\t%5, %1\n" \ ++ "\tjmp\t3b\n" \ ++ "\t.previous\n" \ ++ _ASM_EXTABLE(1b, 4b) \ ++ _ASM_EXTABLE(2b, 4b) \ ++ : "=&a" (oldval), "=&r" (ret), \ ++ "+m" (*uaddr), "=&r" (tem) \ ++ : "r" (oparg), "i" (-EFAULT), "1" (0), "r" (__USER_DS)) ++#else + #define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \ + asm volatile("1:\t" insn "\n" \ + "2:\t.section .fixup,\"ax\"\n" \ +@@ -36,8 +70,9 @@ + : "=&a" (oldval), "=&r" (ret), \ + "+m" (*uaddr), "=&r" (tem) \ + : "r" (oparg), "i" (-EFAULT), "1" (0)) ++#endif + +-static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) ++static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) + { + int op = (encoded_op >> 28) & 7; + int cmp = (encoded_op >> 24) & 15; +@@ -61,11 +96,20 @@ static inline int futex_atomic_op_inuser + + switch (op) { + case FUTEX_OP_SET: ++#ifdef CONFIG_X86_32 ++ __futex_atomic_op1("xchgl %0, %%ds:%2", ret, oldval, uaddr, oparg); ++#else + __futex_atomic_op1("xchgl %0, %2", ret, oldval, uaddr, oparg); ++#endif + break; + case FUTEX_OP_ADD: ++#ifdef CONFIG_X86_32 ++ __futex_atomic_op1(LOCK_PREFIX "xaddl %0, %%ds:%2", ret, oldval, ++ uaddr, oparg); ++#else + __futex_atomic_op1(LOCK_PREFIX "xaddl %0, %2", ret, oldval, + uaddr, oparg); ++#endif + break; + case FUTEX_OP_OR: + __futex_atomic_op2("orl %4, %3", ret, oldval, uaddr, oparg); +@@ -109,7 +153,7 @@ static inline int futex_atomic_op_inuser + return ret; + } + +-static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, ++static inline int futex_atomic_cmpxchg_inatomic(u32 __user *uaddr, int oldval, + int newval) + { + +@@ -122,14 +166,27 @@ static inline int futex_atomic_cmpxchg_i + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) + return -EFAULT; + +- asm volatile("1:\t" LOCK_PREFIX "cmpxchgl %3, %1\n" ++ asm volatile( ++#ifdef CONFIG_X86_32 ++ "\tmovw %w5, %%ds\n" ++ "1:\t" LOCK_PREFIX "cmpxchgl %3, %1\n" ++ "2:\tpushl %%ss\n" ++ "\tpopl %%ds\n" ++ "\t.section .fixup, \"ax\"\n" ++#else ++ "1:\t" LOCK_PREFIX "cmpxchgl %3, %1\n" + "2:\t.section .fixup, \"ax\"\n" ++#endif + "3:\tmov %2, %0\n" + "\tjmp 2b\n" + "\t.previous\n" + _ASM_EXTABLE(1b, 3b) + : "=a" (oldval), "+m" (*uaddr) ++#ifdef CONFIG_X86_32 ++ : "i" (-EFAULT), "r" (newval), "0" (oldval), "r" (__USER_DS) ++#else + : "i" (-EFAULT), "r" (newval), "0" (oldval) ++#endif + : "memory" + ); + +diff -urNp a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h +--- a/arch/x86/include/asm/i387.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/include/asm/i387.h 2009-05-24 18:10:24.981046570 -0700 +@@ -197,13 +197,8 @@ static inline void restore_fpu(struct ta + } + + /* We need a safe address that is cheap to find and that is already +- in L1 during context switch. The best choices are unfortunately +- different for UP and SMP */ +-#ifdef CONFIG_SMP +-#define safe_address (__per_cpu_offset[0]) +-#else +-#define safe_address (kstat_cpu(0).cpustat.user) +-#endif ++ in L1 during context switch. */ ++#define safe_address (init_tss[smp_processor_id()].x86_tss.sp0) + + /* + * These must be called with preempt disabled +diff -urNp a/arch/x86/include/asm/io_64.h b/arch/x86/include/asm/io_64.h +--- a/arch/x86/include/asm/io_64.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/include/asm/io_64.h 2009-05-24 18:10:24.981046570 -0700 +@@ -158,6 +158,17 @@ static inline void *phys_to_virt(unsigne + } + #endif + ++#define ARCH_HAS_VALID_PHYS_ADDR_RANGE ++static inline int valid_phys_addr_range (unsigned long addr, size_t count) ++{ ++ return ((addr + count + PAGE_SIZE - 1) >> PAGE_SHIFT) < (1 << (boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) ? 1 : 0; ++} ++ ++static inline int valid_mmap_phys_addr_range (unsigned long pfn, size_t count) ++{ ++ return (pfn + (count >> PAGE_SHIFT)) < (1 << (boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) ? 1 : 0; ++} ++ + /* + * Change "struct page" to physical address. + */ +diff -urNp a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h +--- a/arch/x86/include/asm/irqflags.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/include/asm/irqflags.h 2009-05-24 18:10:24.981046570 -0700 +@@ -141,6 +141,8 @@ static inline unsigned long __raw_local_ + #define INTERRUPT_RETURN iret + #define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit + #define GET_CR0_INTO_EAX movl %cr0, %eax ++#define GET_CR0_INTO_EDX movl %cr0, %edx ++#define SET_CR0_FROM_EDX movl %edx, %cr0 + #endif + + +diff -urNp a/arch/x86/include/asm/kmap_types.h b/arch/x86/include/asm/kmap_types.h +--- a/arch/x86/include/asm/kmap_types.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/include/asm/kmap_types.h 2009-05-24 18:10:24.982176035 -0700 +@@ -21,7 +21,8 @@ D(9) KM_IRQ0, + D(10) KM_IRQ1, + D(11) KM_SOFTIRQ0, + D(12) KM_SOFTIRQ1, +-D(13) KM_TYPE_NR ++D(13) KM_CLEARPAGE, ++D(14) KM_TYPE_NR + }; + + #undef D +diff -urNp a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h +--- a/arch/x86/include/asm/kvm_host.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/include/asm/kvm_host.h 2009-05-24 18:10:24.982176035 -0700 +@@ -481,7 +481,7 @@ struct kvm_x86_ops { + int (*get_tdp_level)(void); + }; + +-extern struct kvm_x86_ops *kvm_x86_ops; ++extern const struct kvm_x86_ops *kvm_x86_ops; + + int kvm_mmu_module_init(void); + void kvm_mmu_module_exit(void); +diff -urNp a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h +--- a/arch/x86/include/asm/linkage.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/include/asm/linkage.h 2009-05-24 18:10:24.983186499 -0700 +@@ -7,6 +7,11 @@ + #ifdef CONFIG_X86_64 + #define __ALIGN .p2align 4,,15 + #define __ALIGN_STR ".p2align 4,,15" ++#else ++#ifdef CONFIG_X86_ALIGNMENT_16 ++#define __ALIGN .align 16,0x90 ++#define __ALIGN_STR ".align 16,0x90" ++#endif + #endif + + #ifdef CONFIG_X86_32 +@@ -52,10 +57,5 @@ + + #endif + +-#ifdef CONFIG_X86_ALIGNMENT_16 +-#define __ALIGN .align 16,0x90 +-#define __ALIGN_STR ".align 16,0x90" +-#endif +- + #endif /* _ASM_X86_LINKAGE_H */ + +diff -urNp a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h +--- a/arch/x86/include/asm/local.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/include/asm/local.h 2009-05-24 18:10:24.983186499 -0700 +@@ -18,26 +18,90 @@ typedef struct { + + static inline void local_inc(local_t *l) + { +- asm volatile(_ASM_INC "%0" ++ asm volatile(_ASM_INC "%0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++#ifdef CONFIG_X86_32 ++ "into\n0:\n" ++#else ++ "jno 0f\n" ++ "int $4\n0:\n" ++#endif ++ ".pushsection .fixup,\"ax\"\n" ++ "1:\n" ++ _ASM_DEC "%0\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ + : "+m" (l->a.counter)); + } + + static inline void local_dec(local_t *l) + { +- asm volatile(_ASM_DEC "%0" ++ asm volatile(_ASM_DEC "%0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++#ifdef CONFIG_X86_32 ++ "into\n0:\n" ++#else ++ "jno 0f\n" ++ "int $4\n0:\n" ++#endif ++ ".pushsection .fixup,\"ax\"\n" ++ "1:\n" ++ _ASM_INC "%0\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ + : "+m" (l->a.counter)); + } + + static inline void local_add(long i, local_t *l) + { +- asm volatile(_ASM_ADD "%1,%0" ++ asm volatile(_ASM_ADD "%1,%0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++#ifdef CONFIG_X86_32 ++ "into\n0:\n" ++#else ++ "jno 0f\n" ++ "int $4\n0:\n" ++#endif ++ ".pushsection .fixup,\"ax\"\n" ++ "1:\n" ++ _ASM_SUB "%1,%0\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ + : "+m" (l->a.counter) + : "ir" (i)); + } + + static inline void local_sub(long i, local_t *l) + { +- asm volatile(_ASM_SUB "%1,%0" ++ asm volatile(_ASM_SUB "%1,%0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++#ifdef CONFIG_X86_32 ++ "into\n0:\n" ++#else ++ "jno 0f\n" ++ "int $4\n0:\n" ++#endif ++ ".pushsection .fixup,\"ax\"\n" ++ "1:\n" ++ _ASM_ADD "%1,%0\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ + : "+m" (l->a.counter) + : "ir" (i)); + } +@@ -55,7 +119,24 @@ static inline int local_sub_and_test(lon + { + unsigned char c; + +- asm volatile(_ASM_SUB "%2,%0; sete %1" ++ asm volatile(_ASM_SUB "%2,%0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++#ifdef CONFIG_X86_32 ++ "into\n0:\n" ++#else ++ "jno 0f\n" ++ "int $4\n0:\n" ++#endif ++ ".pushsection .fixup,\"ax\"\n" ++ "1:\n" ++ _ASM_ADD "%2,%0\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ ++ "sete %1\n" + : "+m" (l->a.counter), "=qm" (c) + : "ir" (i) : "memory"); + return c; +@@ -73,7 +154,24 @@ static inline int local_dec_and_test(loc + { + unsigned char c; + +- asm volatile(_ASM_DEC "%0; sete %1" ++ asm volatile(_ASM_DEC "%0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++#ifdef CONFIG_X86_32 ++ "into\n0:\n" ++#else ++ "jno 0f\n" ++ "int $4\n0:\n" ++#endif ++ ".pushsection .fixup,\"ax\"\n" ++ "1:\n" ++ _ASM_INC "%0\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ ++ "sete %1\n" + : "+m" (l->a.counter), "=qm" (c) + : : "memory"); + return c != 0; +@@ -91,7 +189,24 @@ static inline int local_inc_and_test(loc + { + unsigned char c; + +- asm volatile(_ASM_INC "%0; sete %1" ++ asm volatile(_ASM_INC "%0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++#ifdef CONFIG_X86_32 ++ "into\n0:\n" ++#else ++ "jno 0f\n" ++ "int $4\n0:\n" ++#endif ++ ".pushsection .fixup,\"ax\"\n" ++ "1:\n" ++ _ASM_DEC "%0\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ ++ "sete %1\n" + : "+m" (l->a.counter), "=qm" (c) + : : "memory"); + return c != 0; +@@ -110,7 +225,24 @@ static inline int local_add_negative(lon + { + unsigned char c; + +- asm volatile(_ASM_ADD "%2,%0; sets %1" ++ asm volatile(_ASM_ADD "%2,%0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++#ifdef CONFIG_X86_32 ++ "into\n0:\n" ++#else ++ "jno 0f\n" ++ "int $4\n0:\n" ++#endif ++ ".pushsection .fixup,\"ax\"\n" ++ "1:\n" ++ _ASM_SUB "%2,%0\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ ++ "sets %1\n" + : "+m" (l->a.counter), "=qm" (c) + : "ir" (i) : "memory"); + return c; +@@ -133,7 +265,23 @@ static inline long local_add_return(long + #endif + /* Modern 486+ processor */ + __i = i; +- asm volatile(_ASM_XADD "%0, %1;" ++ asm volatile(_ASM_XADD "%0, %1\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++#ifdef CONFIG_X86_32 ++ "into\n0:\n" ++#else ++ "jno 0f\n" ++ "int $4\n0:\n" ++#endif ++ ".pushsection .fixup,\"ax\"\n" ++ "1:\n" ++ _ASM_MOV "%0,%1\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ + : "+r" (i), "+m" (l->a.counter) + : : "memory"); + return i + __i; +diff -urNp a/arch/x86/include/asm/mach-default/apm.h b/arch/x86/include/asm/mach-default/apm.h +--- a/arch/x86/include/asm/mach-default/apm.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/include/asm/mach-default/apm.h 2009-05-24 18:10:24.984194658 -0700 +@@ -34,7 +34,7 @@ static inline void apm_bios_call_asm(u32 + __asm__ __volatile__(APM_DO_ZERO_SEGS + "pushl %%edi\n\t" + "pushl %%ebp\n\t" +- "lcall *%%cs:apm_bios_entry\n\t" ++ "lcall *%%ss:apm_bios_entry\n\t" + "setc %%al\n\t" + "popl %%ebp\n\t" + "popl %%edi\n\t" +@@ -58,7 +58,7 @@ static inline u8 apm_bios_call_simple_as + __asm__ __volatile__(APM_DO_ZERO_SEGS + "pushl %%edi\n\t" + "pushl %%ebp\n\t" +- "lcall *%%cs:apm_bios_entry\n\t" ++ "lcall *%%ss:apm_bios_entry\n\t" + "setc %%bl\n\t" + "popl %%ebp\n\t" + "popl %%edi\n\t" +diff -urNp a/arch/x86/include/asm/mman.h b/arch/x86/include/asm/mman.h +--- a/arch/x86/include/asm/mman.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/include/asm/mman.h 2009-05-24 18:10:24.984194658 -0700 +@@ -17,4 +17,14 @@ + #define MCL_CURRENT 1 /* lock all current mappings */ + #define MCL_FUTURE 2 /* lock all future mappings */ + ++#ifdef __KERNEL__ ++#ifndef __ASSEMBLY__ ++#ifdef CONFIG_X86_32 ++#define arch_mmap_check i386_mmap_check ++int i386_mmap_check(unsigned long addr, unsigned long len, ++ unsigned long flags); ++#endif ++#endif ++#endif ++ + #endif /* _ASM_X86_MMAN_H */ +diff -urNp a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h +--- a/arch/x86/include/asm/mmu.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/include/asm/mmu.h 2009-05-24 18:10:24.985085004 -0700 +@@ -9,10 +9,23 @@ + * we put the segment information here. + */ + typedef struct { +- void *ldt; ++ struct desc_struct *ldt; + int size; + struct mutex lock; +- void *vdso; ++ unsigned long vdso; ++ ++#ifdef CONFIG_X86_32 ++#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) ++ unsigned long user_cs_base; ++ unsigned long user_cs_limit; ++ ++#if defined(CONFIG_PAX_PAGEEXEC) && defined(CONFIG_SMP) ++ cpumask_t cpu_user_cs_mask; ++#endif ++ ++#endif ++#endif ++ + } mm_context_t; + + #ifdef CONFIG_SMP +diff -urNp a/arch/x86/include/asm/mmu_context_32.h b/arch/x86/include/asm/mmu_context_32.h +--- a/arch/x86/include/asm/mmu_context_32.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/include/asm/mmu_context_32.h 2009-05-24 18:10:24.985085004 -0700 +@@ -15,11 +15,15 @@ static inline void switch_mm(struct mm_s + struct task_struct *tsk) + { + int cpu = smp_processor_id(); ++#ifdef CONFIG_SMP ++ int tlbstate = TLBSTATE_OK; ++#endif + + if (likely(prev != next)) { + /* stop flush ipis for the previous mm */ + cpu_clear(cpu, prev->cpu_vm_mask); + #ifdef CONFIG_SMP ++ tlbstate = x86_read_percpu(cpu_tlbstate.state); + per_cpu(cpu_tlbstate, cpu).state = TLBSTATE_OK; + per_cpu(cpu_tlbstate, cpu).active_mm = next; + #endif +@@ -33,6 +37,26 @@ static inline void switch_mm(struct mm_s + */ + if (unlikely(prev->context.ldt != next->context.ldt)) + load_LDT_nolock(&next->context); ++ ++#if defined(CONFIG_PAX_PAGEEXEC) && defined(CONFIG_SMP) ++ if (!nx_enabled) { ++ smp_mb__before_clear_bit(); ++ cpu_clear(cpu, prev->context.cpu_user_cs_mask); ++ smp_mb__after_clear_bit(); ++ cpu_set(cpu, next->context.cpu_user_cs_mask); ++ } ++#endif ++ ++#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) ++ if (unlikely(prev->context.user_cs_base != next->context.user_cs_base || ++ prev->context.user_cs_limit != next->context.user_cs_limit ++#ifdef CONFIG_SMP ++ || tlbstate != TLBSTATE_OK ++#endif ++ )) ++ set_user_cs(next->context.user_cs_base, next->context.user_cs_limit, cpu); ++#endif ++ + } + #ifdef CONFIG_SMP + else { +@@ -45,6 +69,19 @@ static inline void switch_mm(struct mm_s + */ + load_cr3(next->pgd); + load_LDT_nolock(&next->context); ++ ++#ifdef CONFIG_PAX_PAGEEXEC ++ if (!nx_enabled) ++ cpu_set(cpu, next->context.cpu_user_cs_mask); ++#endif ++ ++#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) ++#ifdef CONFIG_PAX_PAGEEXEC ++ if (!((next->pax_flags & MF_PAX_PAGEEXEC) && nx_enabled)) ++#endif ++ set_user_cs(next->context.user_cs_base, next->context.user_cs_limit, cpu); ++#endif ++ + } + } + #endif +diff -urNp a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h +--- a/arch/x86/include/asm/module.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/include/asm/module.h 2009-05-24 18:10:24.985085004 -0700 +@@ -74,7 +74,12 @@ struct mod_arch_specific {}; + # else + # define MODULE_STACKSIZE "" + # endif +-# define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY MODULE_STACKSIZE ++# ifdef CONFIG_GRKERNSEC ++# define MODULE_GRSEC "GRSECURITY " ++# else ++# define MODULE_GRSEC "" ++# endif ++# define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY MODULE_STACKSIZE MODULE_GRSEC + #endif + + #endif /* _ASM_X86_MODULE_H */ +diff -urNp a/arch/x86/include/asm/page_32.h b/arch/x86/include/asm/page_32.h +--- a/arch/x86/include/asm/page_32.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/include/asm/page_32.h 2009-05-24 18:10:24.986013759 -0700 +@@ -13,6 +13,23 @@ + */ + #define __PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL) + ++#ifdef CONFIG_PAX_KERNEXEC ++#ifndef __ASSEMBLY__ ++extern unsigned char MODULES_VADDR[]; ++extern unsigned char MODULES_END[]; ++extern unsigned char KERNEL_TEXT_OFFSET[]; ++#define ktla_ktva(addr) (addr + (unsigned long)KERNEL_TEXT_OFFSET) ++#define ktva_ktla(addr) (addr - (unsigned long)KERNEL_TEXT_OFFSET) ++#endif ++#else ++#define ktla_ktva(addr) (addr) ++#define ktva_ktla(addr) (addr) ++#endif ++ ++#ifdef CONFIG_PAX_PAGEEXEC ++#define CONFIG_ARCH_TRACK_EXEC_LIMIT 1 ++#endif ++ + #ifdef CONFIG_4KSTACKS + #define THREAD_ORDER 0 + #else +diff -urNp a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h +--- a/arch/x86/include/asm/page_64.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/include/asm/page_64.h 2009-05-24 18:10:24.986013759 -0700 +@@ -49,6 +49,9 @@ + #define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START) + #define __START_KERNEL_map _AC(0xffffffff80000000, UL) + ++#define ktla_ktva(addr) (addr) ++#define ktva_ktla(addr) (addr) ++ + /* See Documentation/x86_64/mm.txt for a description of the memory map. */ + #define __PHYSICAL_MASK_SHIFT 46 + #define __VIRTUAL_MASK_SHIFT 48 +@@ -101,5 +104,6 @@ extern void init_extra_mapping_wb(unsign + #define pfn_valid(pfn) ((pfn) < max_pfn) + #endif + ++#define nx_enabled (1) + + #endif /* _ASM_X86_PAGE_64_H */ +diff -urNp a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h +--- a/arch/x86/include/asm/paravirt.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/include/asm/paravirt.h 2009-05-24 18:10:24.986977433 -0700 +@@ -1557,7 +1557,7 @@ static inline unsigned long __raw_local_ + #define PV_RESTORE_REGS popl %edx; popl %ecx; popl %edi; popl %eax + #define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 4) + #define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .long, 4) +-#define PARA_INDIRECT(addr) *%cs:addr ++#define PARA_INDIRECT(addr) *%ss:addr + #endif + + #define INTERRUPT_RETURN \ +diff -urNp a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h +--- a/arch/x86/include/asm/pda.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/include/asm/pda.h 2009-05-24 18:10:24.995209407 -0700 +@@ -16,11 +16,9 @@ struct x8664_pda { + unsigned long oldrsp; /* 24 user rsp for system call */ + int irqcount; /* 32 Irq nesting counter. Starts -1 */ + unsigned int cpunumber; /* 36 Logical CPU number */ +-#ifdef CONFIG_CC_STACKPROTECTOR + unsigned long stack_canary; /* 40 stack canary value */ + /* gcc-ABI: this canary MUST be at + offset 40!!! */ +-#endif + char *irqstackptr; + short nodenumber; /* number of current node (32k max) */ + short in_bootmem; /* pda lives in bootmem */ +diff -urNp a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h +--- a/arch/x86/include/asm/percpu.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/include/asm/percpu.h 2009-05-24 18:10:24.996028240 -0700 +@@ -93,6 +93,12 @@ DECLARE_PER_CPU(struct x8664_pda, pda); + + #define __my_cpu_offset x86_read_percpu(this_cpu_off) + ++#include <asm-generic/sections.h> ++#include <linux/threads.h> ++#define __per_cpu_offset __per_cpu_offset ++extern unsigned long __per_cpu_offset[NR_CPUS]; ++#define per_cpu_offset(x) (__per_cpu_offset[x] + (unsigned long)__per_cpu_start) ++ + /* fs segment starts at (positive) offset == __per_cpu_offset[cpu] */ + #define __percpu_seg "%%fs:" + +diff -urNp a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h +--- a/arch/x86/include/asm/pgalloc.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/include/asm/pgalloc.h 2009-05-24 18:10:24.996028240 -0700 +@@ -52,7 +52,7 @@ static inline void pmd_populate_kernel(s + pmd_t *pmd, pte_t *pte) + { + paravirt_alloc_pte(mm, __pa(pte) >> PAGE_SHIFT); +- set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE)); ++ set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE)); + } + + static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, +diff -urNp a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h +--- a/arch/x86/include/asm/pgtable-2level.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/include/asm/pgtable-2level.h 2009-05-24 18:10:24.997021594 -0700 +@@ -18,7 +18,19 @@ static inline void native_set_pte(pte_t + + static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) + { ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++ ++ pax_open_kernel(cr0); ++#endif ++ + *pmdp = pmd; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + } + + static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) +diff -urNp a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h +--- a/arch/x86/include/asm/pgtable-3level.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/include/asm/pgtable-3level.h 2009-05-24 18:10:24.997021594 -0700 +@@ -70,12 +70,36 @@ static inline void native_set_pte_atomic + + static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) + { ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++ ++ pax_open_kernel(cr0); ++#endif ++ + set_64bit((unsigned long long *)(pmdp), native_pmd_val(pmd)); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + } + + static inline void native_set_pud(pud_t *pudp, pud_t pud) + { ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++ ++ pax_open_kernel(cr0); ++#endif ++ + set_64bit((unsigned long long *)(pudp), native_pud_val(pud)); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + } + + /* +diff -urNp a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h +--- a/arch/x86/include/asm/pgtable.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/include/asm/pgtable.h 2009-05-24 18:10:24.998209582 -0700 +@@ -14,12 +14,11 @@ + #define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */ + #define _PAGE_BIT_PAT 7 /* on 4KB pages */ + #define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */ +-#define _PAGE_BIT_UNUSED1 9 /* available for programmer */ ++#define _PAGE_BIT_SPECIAL 9 /* special mappings, no associated struct page */ + #define _PAGE_BIT_IOMAP 10 /* flag used to indicate IO mapping */ + #define _PAGE_BIT_UNUSED3 11 + #define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */ +-#define _PAGE_BIT_SPECIAL _PAGE_BIT_UNUSED1 +-#define _PAGE_BIT_CPA_TEST _PAGE_BIT_UNUSED1 ++#define _PAGE_BIT_CPA_TEST _PAGE_BIT_SPECIAL + #define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ + + #define _PAGE_PRESENT (_AT(pteval_t, 1) << _PAGE_BIT_PRESENT) +@@ -31,7 +30,6 @@ + #define _PAGE_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_DIRTY) + #define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE) + #define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) +-#define _PAGE_UNUSED1 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1) + #define _PAGE_IOMAP (_AT(pteval_t, 1) << _PAGE_BIT_IOMAP) + #define _PAGE_UNUSED3 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED3) + #define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT) +@@ -43,7 +41,7 @@ + #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) + #define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX) + #else +-#define _PAGE_NX (_AT(pteval_t, 0)) ++#define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED3) + #endif + + /* If _PAGE_PRESENT is clear, we use these: */ +@@ -83,6 +81,9 @@ + #define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \ + _PAGE_ACCESSED) + ++#define PAGE_READONLY_NOEXEC PAGE_READONLY ++#define PAGE_SHARED_NOEXEC PAGE_SHARED ++ + #define __PAGE_KERNEL_EXEC \ + (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_GLOBAL) + #define __PAGE_KERNEL (__PAGE_KERNEL_EXEC | _PAGE_NX) +@@ -94,7 +95,7 @@ + #define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_PCD | _PAGE_PWT) + #define __PAGE_KERNEL_UC_MINUS (__PAGE_KERNEL | _PAGE_PCD) + #define __PAGE_KERNEL_VSYSCALL (__PAGE_KERNEL_RX | _PAGE_USER) +-#define __PAGE_KERNEL_VSYSCALL_NOCACHE (__PAGE_KERNEL_VSYSCALL | _PAGE_PCD | _PAGE_PWT) ++#define __PAGE_KERNEL_VSYSCALL_NOCACHE (__PAGE_KERNEL_RO | _PAGE_PCD | _PAGE_PWT | _PAGE_USER) + #define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE) + #define __PAGE_KERNEL_LARGE_NOCACHE (__PAGE_KERNEL | _PAGE_CACHE_UC | _PAGE_PSE) + #define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) +@@ -153,7 +154,7 @@ + * bits are combined, this will alow user to access the high address mapped + * VDSO in the presence of CONFIG_COMPAT_VDSO + */ +-#define PTE_IDENT_ATTR 0x003 /* PRESENT+RW */ ++#define PTE_IDENT_ATTR 0x063 /* PRESENT+RW+DIRTY+ACCESSED */ + #define PDE_IDENT_ATTR 0x067 /* PRESENT+RW+USER+DIRTY+ACCESSED */ + #define PGD_IDENT_ATTR 0x001 /* PRESENT (no other attributes) */ + #endif +@@ -170,10 +171,17 @@ extern unsigned long empty_zero_page[PAG + extern spinlock_t pgd_lock; + extern struct list_head pgd_list; + ++extern pteval_t __supported_pte_mask; ++ + /* + * The following only work if pte_present() is true. + * Undefined behaviour if not.. + */ ++static inline int pte_user(pte_t pte) ++{ ++ return pte_val(pte) & _PAGE_USER; ++} ++ + static inline int pte_dirty(pte_t pte) + { + return pte_flags(pte) & _PAGE_DIRTY; +@@ -242,9 +250,29 @@ static inline pte_t pte_wrprotect(pte_t + return __pte(pte_val(pte) & ~_PAGE_RW); + } + ++static inline pte_t pte_mkread(pte_t pte) ++{ ++ return __pte(pte_val(pte) | _PAGE_USER); ++} ++ + static inline pte_t pte_mkexec(pte_t pte) + { +- return __pte(pte_val(pte) & ~_PAGE_NX); ++#ifdef CONFIG_X86_PAE ++ if (__supported_pte_mask & _PAGE_NX) ++ return __pte(pte_val(pte) & ~(pteval_t)_PAGE_NX); ++ else ++#endif ++ return __pte(pte_val(pte) | _PAGE_USER); ++} ++ ++static inline pte_t pte_exprotect(pte_t pte) ++{ ++#ifdef CONFIG_X86_PAE ++ if (__supported_pte_mask & _PAGE_NX) ++ return __pte(pte_val(pte) | _PAGE_NX); ++ else ++#endif ++ return __pte(pte_val(pte) & ~_PAGE_USER); + } + + static inline pte_t pte_mkdirty(pte_t pte) +@@ -287,8 +315,6 @@ static inline pte_t pte_mkspecial(pte_t + return __pte(pte_val(pte) | _PAGE_SPECIAL); + } + +-extern pteval_t __supported_pte_mask; +- + static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) + { + return __pte((((phys_addr_t)page_nr << PAGE_SHIFT) | +@@ -552,7 +578,19 @@ static inline void ptep_set_wrprotect(st + */ + static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) + { +- memcpy(dst, src, count * sizeof(pgd_t)); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++ ++ pax_open_kernel(cr0); ++#endif ++ ++ memcpy(dst, src, count * sizeof(pgd_t)); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + } + + +diff -urNp a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h +--- a/arch/x86/include/asm/pgtable_32.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/include/asm/pgtable_32.h 2009-05-24 18:10:24.998209582 -0700 +@@ -25,8 +25,6 @@ + struct mm_struct; + struct vm_area_struct; + +-extern pgd_t swapper_pg_dir[1024]; +- + static inline void pgtable_cache_init(void) { } + static inline void check_pgt_cache(void) { } + void paging_init(void); +@@ -46,6 +44,11 @@ extern void set_pmd_pfn(unsigned long, u + # include <asm/pgtable-2level-defs.h> + #endif + ++extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; ++#ifdef CONFIG_X86_PAE ++extern pmd_t swapper_pm_dir[PTRS_PER_PGD][PTRS_PER_PMD]; ++#endif ++ + #define PGDIR_SIZE (1UL << PGDIR_SHIFT) + #define PGDIR_MASK (~(PGDIR_SIZE - 1)) + +@@ -83,7 +86,7 @@ extern void set_pmd_pfn(unsigned long, u + #undef TEST_ACCESS_OK + + /* The boot page tables (all created as a single array) */ +-extern unsigned long pg0[]; ++extern pte_t pg0[]; + + #define pte_present(x) ((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE)) + +@@ -175,6 +178,9 @@ do { \ + + #endif /* !__ASSEMBLY__ */ + ++#define HAVE_ARCH_UNMAPPED_AREA ++#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN ++ + /* + * kern_addr_valid() is (1) for FLATMEM and (0) for + * SPARSEMEM and DISCONTIGMEM +diff -urNp a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h +--- a/arch/x86/include/asm/pgtable_64.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/include/asm/pgtable_64.h 2009-05-24 18:10:24.998991192 -0700 +@@ -15,9 +15,12 @@ + + extern pud_t level3_kernel_pgt[512]; + extern pud_t level3_ident_pgt[512]; ++extern pud_t level3_vmalloc_pgt[512]; ++extern pud_t level3_vmemmap_pgt[512]; + extern pmd_t level2_kernel_pgt[512]; + extern pmd_t level2_fixmap_pgt[512]; +-extern pmd_t level2_ident_pgt[512]; ++extern pmd_t level2_ident_pgt[512*4]; ++extern pte_t level1_fixmap_pgt[512]; + extern pgd_t init_level4_pgt[]; + + #define swapper_pg_dir init_level4_pgt +@@ -106,7 +109,19 @@ static inline pte_t native_ptep_get_and_ + + static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) + { ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++ ++ pax_open_kernel(cr0); ++#endif ++ + *pmdp = pmd; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + } + + static inline void native_pmd_clear(pmd_t *pmd) +@@ -158,17 +173,17 @@ static inline void native_pgd_clear(pgd_ + + static inline int pgd_bad(pgd_t pgd) + { +- return (pgd_val(pgd) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE; ++ return (pgd_val(pgd) & ~(PTE_PFN_MASK | _PAGE_USER | _PAGE_NX)) != _KERNPG_TABLE; + } + + static inline int pud_bad(pud_t pud) + { +- return (pud_val(pud) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE; ++ return (pud_val(pud) & ~(PTE_PFN_MASK | _PAGE_USER | _PAGE_NX)) != _KERNPG_TABLE; + } + + static inline int pmd_bad(pmd_t pmd) + { +- return (pmd_val(pmd) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE; ++ return (pmd_val(pmd) & ~(PTE_PFN_MASK | _PAGE_USER | _PAGE_NX)) != _KERNPG_TABLE; + } + + #define pte_none(x) (!pte_val((x))) +diff -urNp a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h +--- a/arch/x86/include/asm/processor.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/include/asm/processor.h 2009-05-24 18:10:24.998991192 -0700 +@@ -271,7 +271,7 @@ struct tss_struct { + + } ____cacheline_aligned; + +-DECLARE_PER_CPU(struct tss_struct, init_tss); ++extern struct tss_struct init_tss[NR_CPUS]; + + /* + * Save the original ist values for checking stack pointers during debugging +@@ -822,11 +822,20 @@ static inline void spin_lock_prefetch(co + * User space process size: 3GB (default). + */ + #define TASK_SIZE PAGE_OFFSET ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++#define SEGMEXEC_TASK_SIZE (TASK_SIZE / 2) ++#endif ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++#define STACK_TOP ((current->mm->pax_flags & MF_PAX_SEGMEXEC)?SEGMEXEC_TASK_SIZE:TASK_SIZE) ++#else + #define STACK_TOP TASK_SIZE +-#define STACK_TOP_MAX STACK_TOP ++#endif ++#define STACK_TOP_MAX TASK_SIZE + + #define INIT_THREAD { \ +- .sp0 = sizeof(init_stack) + (long)&init_stack, \ ++ .sp0 = sizeof(init_stack) + (long)&init_stack - 8, \ + .vm86_info = NULL, \ + .sysenter_cs = __KERNEL_CS, \ + .io_bitmap_ptr = NULL, \ +@@ -841,7 +850,7 @@ static inline void spin_lock_prefetch(co + */ + #define INIT_TSS { \ + .x86_tss = { \ +- .sp0 = sizeof(init_stack) + (long)&init_stack, \ ++ .sp0 = sizeof(init_stack) + (long)&init_stack - 8, \ + .ss0 = __KERNEL_DS, \ + .ss1 = __KERNEL_CS, \ + .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, \ +@@ -852,11 +861,7 @@ static inline void spin_lock_prefetch(co + extern unsigned long thread_saved_pc(struct task_struct *tsk); + + #define THREAD_SIZE_LONGS (THREAD_SIZE/sizeof(unsigned long)) +-#define KSTK_TOP(info) \ +-({ \ +- unsigned long *__ptr = (unsigned long *)(info); \ +- (unsigned long)(&__ptr[THREAD_SIZE_LONGS]); \ +-}) ++#define KSTK_TOP(info) ((info)->task.thread.sp0) + + /* + * The below -8 is to reserve 8 bytes on top of the ring0 stack. +@@ -871,7 +876,7 @@ extern unsigned long thread_saved_pc(str + #define task_pt_regs(task) \ + ({ \ + struct pt_regs *__regs__; \ +- __regs__ = (struct pt_regs *)(KSTK_TOP(task_stack_page(task))-8); \ ++ __regs__ = (struct pt_regs *)((task)->thread.sp0); \ + __regs__ - 1; \ + }) + +@@ -887,7 +892,7 @@ extern unsigned long thread_saved_pc(str + * space during mmap's. + */ + #define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? \ +- 0xc0000000 : 0xFFFFe000) ++ 0xc0000000 : 0xFFFFf000) + + #define TASK_SIZE (test_thread_flag(TIF_IA32) ? \ + IA32_PAGE_OFFSET : TASK_SIZE64) +@@ -924,6 +929,10 @@ extern void start_thread(struct pt_regs + */ + #define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 3)) + ++#ifdef CONFIG_PAX_SEGMEXEC ++#define SEGMEXEC_TASK_UNMAPPED_BASE (PAGE_ALIGN(SEGMEXEC_TASK_SIZE / 3)) ++#endif ++ + #define KSTK_EIP(task) (task_pt_regs(task)->ip) + + /* Get/set a process' ability to use the timestamp counter instruction */ +diff -urNp a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h +--- a/arch/x86/include/asm/ptrace.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/include/asm/ptrace.h 2009-05-24 18:10:24.999971975 -0700 +@@ -187,28 +187,29 @@ static inline unsigned long regs_return_ + } + + /* +- * user_mode_vm(regs) determines whether a register set came from user mode. ++ * user_mode(regs) determines whether a register set came from user mode. + * This is true if V8086 mode was enabled OR if the register set was from + * protected mode with RPL-3 CS value. This tricky test checks that with + * one comparison. Many places in the kernel can bypass this full check +- * if they have already ruled out V8086 mode, so user_mode(regs) can be used. ++ * if they have already ruled out V8086 mode, so user_mode_novm(regs) can ++ * be used. + */ +-static inline int user_mode(struct pt_regs *regs) ++static inline int user_mode_novm(struct pt_regs *regs) + { + #ifdef CONFIG_X86_32 + return (regs->cs & SEGMENT_RPL_MASK) == USER_RPL; + #else +- return !!(regs->cs & 3); ++ return !!(regs->cs & SEGMENT_RPL_MASK); + #endif + } + +-static inline int user_mode_vm(struct pt_regs *regs) ++static inline int user_mode(struct pt_regs *regs) + { + #ifdef CONFIG_X86_32 + return ((regs->cs & SEGMENT_RPL_MASK) | (regs->flags & X86_VM_MASK)) >= + USER_RPL; + #else +- return user_mode(regs); ++ return user_mode_novm(regs); + #endif + } + +diff -urNp a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h +--- a/arch/x86/include/asm/reboot.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/include/asm/reboot.h 2009-05-24 18:10:24.999971975 -0700 +@@ -16,6 +16,6 @@ extern struct machine_ops machine_ops; + + void native_machine_crash_shutdown(struct pt_regs *regs); + void native_machine_shutdown(void); +-void machine_real_restart(const unsigned char *code, int length); ++void machine_real_restart(const unsigned char *code, unsigned int length); + + #endif /* _ASM_X86_REBOOT_H */ +diff -urNp a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h +--- a/arch/x86/include/asm/rwsem.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/include/asm/rwsem.h 2009-05-24 18:10:25.000979087 -0700 +@@ -106,10 +106,26 @@ static inline void __down_read(struct rw + { + asm volatile("# beginning down_read\n\t" + LOCK_PREFIX " incl (%%eax)\n\t" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++#ifdef CONFIG_X86_32 ++ "into\n0:\n" ++#else ++ "jno 0f\n" ++ "int $4\n0:\n" ++#endif ++ ".pushsection .fixup,\"ax\"\n" ++ "1:\n" ++ LOCK_PREFIX "decl (%%eax)\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ + /* adds 0x00000001, returns the old value */ +- " jns 1f\n" ++ " jns 2f\n" + " call call_rwsem_down_read_failed\n" +- "1:\n\t" ++ "2:\n\t" + "# ending down_read\n\t" + : "+m" (sem->count) + : "a" (sem) +@@ -124,13 +140,29 @@ static inline int __down_read_trylock(st + __s32 result, tmp; + asm volatile("# beginning __down_read_trylock\n\t" + " movl %0,%1\n\t" +- "1:\n\t" ++ "2:\n\t" + " movl %1,%2\n\t" + " addl %3,%2\n\t" +- " jle 2f\n\t" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++#ifdef CONFIG_X86_32 ++ "into\n0:\n" ++#else ++ "jno 0f\n" ++ "int $4\n0:\n" ++#endif ++ ".pushsection .fixup,\"ax\"\n" ++ "1:\n" ++ "subl %3,%2\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ ++ " jle 3f\n\t" + LOCK_PREFIX " cmpxchgl %2,%0\n\t" +- " jnz 1b\n\t" +- "2:\n\t" ++ " jnz 2b\n\t" ++ "3:\n\t" + "# ending __down_read_trylock\n\t" + : "+m" (sem->count), "=&a" (result), "=&r" (tmp) + : "i" (RWSEM_ACTIVE_READ_BIAS) +@@ -148,12 +180,28 @@ static inline void __down_write_nested(s + tmp = RWSEM_ACTIVE_WRITE_BIAS; + asm volatile("# beginning down_write\n\t" + LOCK_PREFIX " xadd %%edx,(%%eax)\n\t" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++#ifdef CONFIG_X86_32 ++ "into\n0:\n" ++#else ++ "jno 0f\n" ++ "int $4\n0:\n" ++#endif ++ ".pushsection .fixup,\"ax\"\n" ++ "1:\n" ++ "movl %%edx,(%%eax)\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ + /* subtract 0x0000ffff, returns the old value */ + " testl %%edx,%%edx\n\t" + /* was the count 0 before? */ +- " jz 1f\n" ++ " jz 2f\n" + " call call_rwsem_down_write_failed\n" +- "1:\n" ++ "2:\n" + "# ending down_write" + : "+m" (sem->count), "=d" (tmp) + : "a" (sem), "1" (tmp) +@@ -186,10 +234,26 @@ static inline void __up_read(struct rw_s + __s32 tmp = -RWSEM_ACTIVE_READ_BIAS; + asm volatile("# beginning __up_read\n\t" + LOCK_PREFIX " xadd %%edx,(%%eax)\n\t" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++#ifdef CONFIG_X86_32 ++ "into\n0:\n" ++#else ++ "jno 0f\n" ++ "int $4\n0:\n" ++#endif ++ ".pushsection .fixup,\"ax\"\n" ++ "1:\n" ++ "movl %%edx,(%%eax)\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ + /* subtracts 1, returns the old value */ +- " jns 1f\n\t" ++ " jns 2f\n\t" + " call call_rwsem_wake\n" +- "1:\n" ++ "2:\n" + "# ending __up_read\n" + : "+m" (sem->count), "=d" (tmp) + : "a" (sem), "1" (tmp) +@@ -204,11 +268,27 @@ static inline void __up_write(struct rw_ + asm volatile("# beginning __up_write\n\t" + " movl %2,%%edx\n\t" + LOCK_PREFIX " xaddl %%edx,(%%eax)\n\t" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++#ifdef CONFIG_X86_32 ++ "into\n0:\n" ++#else ++ "jno 0f\n" ++ "int $4\n0:\n" ++#endif ++ ".pushsection .fixup,\"ax\"\n" ++ "1:\n" ++ "movl %%edx,(%%eax)\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ + /* tries to transition + 0xffff0001 -> 0x00000000 */ +- " jz 1f\n" ++ " jz 2f\n" + " call call_rwsem_wake\n" +- "1:\n\t" ++ "2:\n\t" + "# ending __up_write\n" + : "+m" (sem->count) + : "a" (sem), "i" (-RWSEM_ACTIVE_WRITE_BIAS) +@@ -222,10 +302,26 @@ static inline void __downgrade_write(str + { + asm volatile("# beginning __downgrade_write\n\t" + LOCK_PREFIX " addl %2,(%%eax)\n\t" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++#ifdef CONFIG_X86_32 ++ "into\n0:\n" ++#else ++ "jno 0f\n" ++ "int $4\n0:\n" ++#endif ++ ".pushsection .fixup,\"ax\"\n" ++ "1:\n" ++ LOCK_PREFIX "subl %2,(%%eax)\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ + /* transitions 0xZZZZ0001 -> 0xYYYY0001 */ +- " jns 1f\n\t" ++ " jns 2f\n\t" + " call call_rwsem_downgrade_wake\n" +- "1:\n\t" ++ "2:\n\t" + "# ending __downgrade_write\n" + : "+m" (sem->count) + : "a" (sem), "i" (-RWSEM_WAITING_BIAS) +@@ -237,7 +333,23 @@ static inline void __downgrade_write(str + */ + static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem) + { +- asm volatile(LOCK_PREFIX "addl %1,%0" ++ asm volatile(LOCK_PREFIX "addl %1,%0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++#ifdef CONFIG_X86_32 ++ "into\n0:\n" ++#else ++ "jno 0f\n" ++ "int $4\n0:\n" ++#endif ++ ".pushsection .fixup,\"ax\"\n" ++ "1:\n" ++ LOCK_PREFIX "subl %1,%0\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ + : "+m" (sem->count) + : "ir" (delta)); + } +@@ -249,7 +361,23 @@ static inline int rwsem_atomic_update(in + { + int tmp = delta; + +- asm volatile(LOCK_PREFIX "xadd %0,%1" ++ asm volatile(LOCK_PREFIX "xadd %0,%1\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++#ifdef CONFIG_X86_32 ++ "into\n0:\n" ++#else ++ "jno 0f\n" ++ "int $4\n0:\n" ++#endif ++ ".pushsection .fixup,\"ax\"\n" ++ "1:\n" ++ "movl %0,%1\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ + : "+r" (tmp), "+m" (sem->count) + : : "memory"); + +diff -urNp a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h +--- a/arch/x86/include/asm/segment.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/include/asm/segment.h 2009-05-24 18:10:25.001976841 -0700 +@@ -88,13 +88,19 @@ + #define GDT_ENTRY_ESPFIX_SS (GDT_ENTRY_KERNEL_BASE + 14) + #define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS * 8) + +-#define GDT_ENTRY_PERCPU (GDT_ENTRY_KERNEL_BASE + 15) ++#define GDT_ENTRY_PERCPU (GDT_ENTRY_KERNEL_BASE + 15) + #ifdef CONFIG_SMP + #define __KERNEL_PERCPU (GDT_ENTRY_PERCPU * 8) + #else + #define __KERNEL_PERCPU 0 + #endif + ++#define GDT_ENTRY_PCIBIOS_CS (GDT_ENTRY_KERNEL_BASE + 16) ++#define __PCIBIOS_CS (GDT_ENTRY_PCIBIOS_CS * 8) ++ ++#define GDT_ENTRY_PCIBIOS_DS (GDT_ENTRY_KERNEL_BASE + 17) ++#define __PCIBIOS_DS (GDT_ENTRY_PCIBIOS_DS * 8) ++ + #define GDT_ENTRY_DOUBLEFAULT_TSS 31 + + /* +@@ -132,7 +138,7 @@ + */ + + /* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */ +-#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8) ++#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xFFFCU) == PNP_CS32 || ((x) & 0xFFFCU) == PNP_CS16) + + + #else +diff -urNp a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h +--- a/arch/x86/include/asm/spinlock.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/include/asm/spinlock.h 2009-05-24 18:10:25.003061541 -0700 +@@ -310,18 +310,50 @@ static inline int __raw_write_can_lock(r + static inline void __raw_read_lock(raw_rwlock_t *rw) + { + asm volatile(LOCK_PREFIX " subl $1,(%0)\n\t" +- "jns 1f\n" +- "call __read_lock_failed\n\t" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++#ifdef CONFIG_X86_32 ++ "into\n0:\n" ++#else ++ "jno 0f\n" ++ "int $4\n0:\n" ++#endif ++ ".pushsection .fixup,\"ax\"\n" + "1:\n" ++ LOCK_PREFIX " addl $1,(%0)\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ ++ "jns 2f\n" ++ "call __read_lock_failed\n\t" ++ "2:\n" + ::LOCK_PTR_REG (rw) : "memory"); + } + + static inline void __raw_write_lock(raw_rwlock_t *rw) + { + asm volatile(LOCK_PREFIX " subl %1,(%0)\n\t" +- "jz 1f\n" +- "call __write_lock_failed\n\t" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++#ifdef CONFIG_X86_32 ++ "into\n0:\n" ++#else ++ "jno 0f\n" ++ "int $4\n0:\n" ++#endif ++ ".pushsection .fixup,\"ax\"\n" + "1:\n" ++ LOCK_PREFIX " addl %1,(%0)\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ ++ "jz 2f\n" ++ "call __write_lock_failed\n\t" ++ "2:\n" + ::LOCK_PTR_REG (rw), "i" (RW_LOCK_BIAS) : "memory"); + } + +@@ -348,12 +380,45 @@ static inline int __raw_write_trylock(ra + + static inline void __raw_read_unlock(raw_rwlock_t *rw) + { +- asm volatile(LOCK_PREFIX "incl %0" :"+m" (rw->lock) : : "memory"); ++ asm volatile(LOCK_PREFIX "incl %0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++#ifdef CONFIG_X86_32 ++ "into\n0:\n" ++#else ++ "jno 0f\n" ++ "int $4\n0:\n" ++#endif ++ ".pushsection .fixup,\"ax\"\n" ++ "1:\n" ++ LOCK_PREFIX "decl %0\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ ++ :"+m" (rw->lock) : : "memory"); + } + + static inline void __raw_write_unlock(raw_rwlock_t *rw) + { +- asm volatile(LOCK_PREFIX "addl %1, %0" ++ asm volatile(LOCK_PREFIX "addl %1, %0\n" ++ ++#ifdef CONFIG_PAX_REFCOUNT ++#ifdef CONFIG_X86_32 ++ "into\n0:\n" ++#else ++ "jno 0f\n" ++ "int $4\n0:\n" ++#endif ++ ".pushsection .fixup,\"ax\"\n" ++ "1:\n" ++ LOCK_PREFIX "subl %1,%0\n" ++ "jmp 0b\n" ++ ".popsection\n" ++ _ASM_EXTABLE(0b, 1b) ++#endif ++ + : "+m" (rw->lock) : "i" (RW_LOCK_BIAS) : "memory"); + } + +diff -urNp a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h +--- a/arch/x86/include/asm/system.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/include/asm/system.h 2009-05-24 18:10:25.003061541 -0700 +@@ -95,6 +95,8 @@ do { \ + ".globl thread_return\n" \ + "thread_return:\n\t" \ + "movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \ ++ "movq %P[task_canary](%%rsi),%%r8\n\t" \ ++ "movq %%r8,%%gs:%P[pda_canary]\n\t" \ + "movq %P[thread_info](%%rsi),%%r8\n\t" \ + LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \ + "movq %%rax,%%rdi\n\t" \ +@@ -106,7 +108,9 @@ do { \ + [ti_flags] "i" (offsetof(struct thread_info, flags)), \ + [tif_fork] "i" (TIF_FORK), \ + [thread_info] "i" (offsetof(struct task_struct, stack)), \ +- [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ ++ [task_canary] "i" (offsetof(struct task_struct, stack_canary)), \ ++ [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)), \ ++ [pda_canary] "i" (offsetof(struct x8664_pda, stack_canary))\ + : "memory", "cc" __EXTRA_CLOBBER) + #endif + +@@ -169,7 +173,7 @@ static inline unsigned long get_limit(un + { + unsigned long __limit; + asm("lsll %1,%0" : "=r" (__limit) : "r" (segment)); +- return __limit + 1; ++ return __limit; + } + + static inline void native_clts(void) +@@ -295,6 +299,21 @@ static inline void native_wbinvd(void) + + #define stts() write_cr0(read_cr0() | X86_CR0_TS) + ++#define pax_open_kernel(cr0) \ ++do { \ ++ typecheck(unsigned long, cr0); \ ++ preempt_disable(); \ ++ cr0 = read_cr0(); \ ++ write_cr0(cr0 & ~X86_CR0_WP); \ ++} while (0) ++ ++#define pax_close_kernel(cr0) \ ++do { \ ++ typecheck(unsigned long, cr0); \ ++ write_cr0(cr0); \ ++ preempt_enable_no_resched(); \ ++} while (0) ++ + #endif /* __KERNEL__ */ + + static inline void clflush(volatile void *__p) +@@ -309,7 +328,7 @@ void enable_hlt(void); + + void cpu_idle_wait(void); + +-extern unsigned long arch_align_stack(unsigned long sp); ++#define arch_align_stack(x) ((x) & ~0xfUL) + extern void free_init_pages(char *what, unsigned long begin, unsigned long end); + + void default_idle(void); +diff -urNp a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h +--- a/arch/x86/include/asm/uaccess.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/include/asm/uaccess.h 2009-05-24 18:10:25.004210979 -0700 +@@ -10,6 +10,7 @@ + #include <linux/string.h> + #include <asm/asm.h> + #include <asm/page.h> ++#include <asm/segment.h> + + #define VERIFY_READ 0 + #define VERIFY_WRITE 1 +@@ -29,7 +30,12 @@ + + #define get_ds() (KERNEL_DS) + #define get_fs() (current_thread_info()->addr_limit) ++#ifdef CONFIG_X86_32 ++void __set_fs(mm_segment_t x, int cpu); ++void set_fs(mm_segment_t x); ++#else + #define set_fs(x) (current_thread_info()->addr_limit = (x)) ++#endif + + #define segment_eq(a, b) ((a).seg == (b).seg) + +@@ -186,9 +192,12 @@ extern int __get_user_bad(void); + + #ifdef CONFIG_X86_32 + #define __put_user_u64(x, addr, err) \ +- asm volatile("1: movl %%eax,0(%2)\n" \ +- "2: movl %%edx,4(%2)\n" \ ++ asm volatile(" movw %w5,%%ds\n" \ ++ "1: movl %%eax,%%ds:0(%2)\n" \ ++ "2: movl %%edx,%%ds:4(%2)\n" \ + "3:\n" \ ++ " pushl %%ss\n" \ ++ " popl %%ds\n" \ + ".section .fixup,\"ax\"\n" \ + "4: movl %3,%0\n" \ + " jmp 3b\n" \ +@@ -196,7 +205,8 @@ extern int __get_user_bad(void); + _ASM_EXTABLE(1b, 4b) \ + _ASM_EXTABLE(2b, 4b) \ + : "=r" (err) \ +- : "A" (x), "r" (addr), "i" (-EFAULT), "0" (err)) ++ : "A" (x), "r" (addr), "i" (-EFAULT), "0" (err), \ ++ "r"(__USER_DS)) + + #define __put_user_x8(x, ptr, __ret_pu) \ + asm volatile("call __put_user_8" : "=a" (__ret_pu) \ +@@ -336,6 +346,22 @@ do { \ + } \ + } while (0) + ++#ifdef CONFIG_X86_32 ++#define __get_user_asm(x, addr, err, itype, rtype, ltype, errret) \ ++ asm volatile(" movw %w5,%%ds\n" \ ++ "1: mov"itype" %%ds:%2,%"rtype"1\n" \ ++ "2:\n" \ ++ " pushl %%ss\n" \ ++ " popl %%ds\n" \ ++ ".section .fixup,\"ax\"\n" \ ++ "3: movl %3,%0\n" \ ++ " xor"itype" %"rtype"1,%"rtype"1\n" \ ++ " jmp 2b\n" \ ++ ".previous\n" \ ++ _ASM_EXTABLE(1b, 3b) \ ++ : "=r" (err), ltype (x) \ ++ : "m" (__m(addr)), "i" (errret), "0" (err), "r"(__USER_DS)) ++#else + #define __get_user_asm(x, addr, err, itype, rtype, ltype, errret) \ + asm volatile("1: mov"itype" %2,%"rtype"1\n" \ + "2:\n" \ +@@ -347,6 +373,7 @@ do { \ + _ASM_EXTABLE(1b, 3b) \ + : "=r" (err), ltype(x) \ + : "m" (__m(addr)), "i" (errret), "0" (err)) ++#endif + + #define __put_user_nocheck(x, ptr, size) \ + ({ \ +@@ -373,6 +400,22 @@ struct __large_struct { unsigned long bu + * we do not write to any memory gcc knows about, so there are no + * aliasing issues. + */ ++#ifdef CONFIG_X86_32 ++#define __put_user_asm(x, addr, err, itype, rtype, ltype, errret) \ ++ asm volatile(" movw %w5,%%ds\n" \ ++ "1: mov"itype" %"rtype"1,%%ds:%2\n" \ ++ "2:\n" \ ++ " pushl %%ss\n" \ ++ " popl %%ds\n" \ ++ ".section .fixup,\"ax\"\n" \ ++ "3: movl %3,%0\n" \ ++ " jmp 2b\n" \ ++ ".previous\n" \ ++ _ASM_EXTABLE(1b, 3b) \ ++ : "=r"(err) \ ++ : ltype (x), "m" (__m(addr)), "i" (errret), "0" (err),\ ++ "r"(__USER_DS)) ++#else + #define __put_user_asm(x, addr, err, itype, rtype, ltype, errret) \ + asm volatile("1: mov"itype" %"rtype"1,%2\n" \ + "2:\n" \ +@@ -383,6 +426,7 @@ struct __large_struct { unsigned long bu + _ASM_EXTABLE(1b, 3b) \ + : "=r"(err) \ + : ltype(x), "m" (__m(addr)), "i" (errret), "0" (err)) ++#endif + /** + * __get_user: - Get a simple variable from user space, with less checking. + * @x: Variable to store result. +@@ -443,6 +487,7 @@ extern struct movsl_mask { + + #define ARCH_HAS_NOCACHE_UACCESS 1 + ++#define ARCH_HAS_SORT_EXTABLE + #ifdef CONFIG_X86_32 + # include "uaccess_32.h" + #else +diff -urNp a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h +--- a/arch/x86/include/asm/uaccess_64.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/include/asm/uaccess_64.h 2009-05-24 18:10:25.004210979 -0700 +@@ -10,6 +10,8 @@ + #include <linux/lockdep.h> + #include <asm/page.h> + ++#define set_fs(x) (current_thread_info()->addr_limit = (x)) ++ + /* + * Copy To/From Userspace + */ +diff -urNp a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c +--- a/arch/x86/kernel/acpi/boot.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/acpi/boot.c 2009-05-24 18:10:25.005209641 -0700 +@@ -1645,7 +1645,7 @@ static struct dmi_system_id __initdata a + DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq 6715b"), + }, + }, +- {} ++ { NULL, NULL, {{0, {0}}}, NULL} + }; + + /* +diff -urNp a/arch/x86/kernel/acpi/realmode/wakeup.S b/arch/x86/kernel/acpi/realmode/wakeup.S +--- a/arch/x86/kernel/acpi/realmode/wakeup.S 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/acpi/realmode/wakeup.S 2009-05-24 18:10:25.006209629 -0700 +@@ -104,7 +104,7 @@ _start: + movl %eax, %ecx + orl %edx, %ecx + jz 1f +- movl $0xc0000080, %ecx ++ mov $MSR_EFER, %ecx + wrmsr + 1: + +diff -urNp a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c +--- a/arch/x86/kernel/acpi/sleep.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/acpi/sleep.c 2009-05-24 18:10:25.006209629 -0700 +@@ -11,11 +11,12 @@ + #include <linux/cpumask.h> + #include <asm/segment.h> + #include <asm/desc.h> ++#include <asm/e820.h> + + #include "realmode/wakeup.h" + #include "sleep.h" + +-unsigned long acpi_wakeup_address; ++unsigned long acpi_wakeup_address = 0x2000; + unsigned long acpi_realmode_flags; + + /* address in low memory of the wakeup routine. */ +@@ -37,6 +38,10 @@ int acpi_save_state_mem(void) + { + struct wakeup_header *header; + ++#if defined(CONFIG_64BIT) && defined(CONFIG_SMP) && defined(CONFIG_PAX_KERNEXEC) ++ unsigned long cr0; ++#endif ++ + if (!acpi_realmode) { + printk(KERN_ERR "Could not allocate memory during boot, " + "S3 disabled\n"); +@@ -99,8 +104,18 @@ int acpi_save_state_mem(void) + header->trampoline_segment = setup_trampoline() >> 4; + #ifdef CONFIG_SMP + stack_start.sp = temp_stack + sizeof(temp_stack); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + early_gdt_descr.address = + (unsigned long)get_cpu_gdt_table(smp_processor_id()); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + #endif + initial_code = (unsigned long)wakeup_long64; + saved_magic = 0x123456789abcdef0; +@@ -133,14 +148,8 @@ void __init acpi_reserve_bootmem(void) + return; + } + +- acpi_realmode = (unsigned long)alloc_bootmem_low(WAKEUP_SIZE); +- +- if (!acpi_realmode) { +- printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n"); +- return; +- } +- +- acpi_wakeup_address = virt_to_phys((void *)acpi_realmode); ++ reserve_early(acpi_wakeup_address, acpi_wakeup_address + WAKEUP_SIZE, "ACPI Wakeup Code"); ++ acpi_realmode = (unsigned long)__va(acpi_wakeup_address);; + } + + +diff -urNp a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S +--- a/arch/x86/kernel/acpi/wakeup_32.S 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/acpi/wakeup_32.S 2009-05-24 18:10:25.007209478 -0700 +@@ -30,13 +30,11 @@ wakeup_pmode_return: + # and restore the stack ... but you need gdt for this to work + movl saved_context_esp, %esp + +- movl %cs:saved_magic, %eax +- cmpl $0x12345678, %eax ++ cmpl $0x12345678, saved_magic + jne bogus_magic + + # jump to place where we left off +- movl saved_eip, %eax +- jmp *%eax ++ jmp *(saved_eip) + + bogus_magic: + jmp bogus_magic +diff -urNp a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c +--- a/arch/x86/kernel/alternative.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/alternative.c 2009-05-24 18:10:25.007209478 -0700 +@@ -393,7 +393,7 @@ void apply_paravirt(struct paravirt_patc + + BUG_ON(p->len > MAX_PATCH_LEN); + /* prep the buffer with the original instructions */ +- memcpy(insnbuf, p->instr, p->len); ++ memcpy(insnbuf, ktla_ktva(p->instr), p->len); + used = pv_init_ops.patch(p->instrtype, p->clobbers, insnbuf, + (unsigned long)p->instr, p->len); + +@@ -473,11 +473,26 @@ void __init alternative_instructions(voi + * instructions. And on the local CPU you need to be protected again NMI or MCE + * handlers seeing an inconsistent instruction while you patch. + */ +-void *text_poke_early(void *addr, const void *opcode, size_t len) ++void *__kprobes text_poke_early(void *addr, const void *opcode, size_t len) + { + unsigned long flags; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++#endif ++ + local_irq_save(flags); +- memcpy(addr, opcode, len); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ ++ memcpy(ktla_ktva(addr), opcode, len); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + local_irq_restore(flags); + sync_core(); + /* Could also do a CLFLUSH here to speed up CPU recovery; but +@@ -498,33 +513,27 @@ void *text_poke_early(void *addr, const + */ + void *__kprobes text_poke(void *addr, const void *opcode, size_t len) + { +- unsigned long flags; +- char *vaddr; +- int nr_pages = 2; ++ unsigned char *vaddr = ktla_ktva(addr); + struct page *pages[2]; +- int i; ++ size_t i; ++ ++ if (!core_kernel_text((unsigned long)addr) + +- if (!core_kernel_text((unsigned long)addr)) { +- pages[0] = vmalloc_to_page(addr); +- pages[1] = vmalloc_to_page(addr + PAGE_SIZE); ++#if defined(CONFIG_X86_32) && defined(CONFIG_MODULES) && defined(CONFIG_PAX_KERNEXEC) ++ && (vaddr < MODULES_VADDR || MODULES_END < vaddr) ++#endif ++ ++ ) { ++ pages[0] = vmalloc_to_page(vaddr); ++ pages[1] = vmalloc_to_page(vaddr + PAGE_SIZE); + } else { +- pages[0] = virt_to_page(addr); ++ pages[0] = virt_to_page(vaddr); + WARN_ON(!PageReserved(pages[0])); +- pages[1] = virt_to_page(addr + PAGE_SIZE); ++ pages[1] = virt_to_page(vaddr + PAGE_SIZE); + } + BUG_ON(!pages[0]); +- if (!pages[1]) +- nr_pages = 1; +- vaddr = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL); +- BUG_ON(!vaddr); +- local_irq_save(flags); +- memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len); +- local_irq_restore(flags); +- vunmap(vaddr); +- sync_core(); +- /* Could also do a CLFLUSH here to speed up CPU recovery; but +- that causes hangs on some VIA CPUs. */ ++ text_poke_early(addr, opcode, len); + for (i = 0; i < len; i++) +- BUG_ON(((char *)addr)[i] != ((char *)opcode)[i]); ++ BUG_ON((vaddr)[i] != ((unsigned char *)opcode)[i]); + return addr; + } +diff -urNp a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c +--- a/arch/x86/kernel/apm_32.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/apm_32.c 2009-05-24 18:10:25.008098147 -0700 +@@ -407,7 +407,7 @@ static DECLARE_WAIT_QUEUE_HEAD(apm_waitq + static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue); + static struct apm_user *user_list; + static DEFINE_SPINLOCK(user_list_lock); +-static const struct desc_struct bad_bios_desc = { { { 0, 0x00409200 } } }; ++static const struct desc_struct bad_bios_desc = { { { 0, 0x00409300 } } }; + + static const char driver_version[] = "1.16ac"; /* no spaces */ + +@@ -602,19 +602,42 @@ static u8 apm_bios_call(u32 func, u32 eb + struct desc_struct save_desc_40; + struct desc_struct *gdt; + ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++#endif ++ + cpus = apm_save_cpus(); + + cpu = get_cpu(); + gdt = get_cpu_gdt_table(cpu); + save_desc_40 = gdt[0x40 / 8]; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + gdt[0x40 / 8] = bad_bios_desc; + ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + apm_irq_save(flags); + APM_DO_SAVE_SEGS; + apm_bios_call_asm(func, ebx_in, ecx_in, eax, ebx, ecx, edx, esi); + APM_DO_RESTORE_SEGS; + apm_irq_restore(flags); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + gdt[0x40 / 8] = save_desc_40; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + put_cpu(); + apm_restore_cpus(cpus); + +@@ -645,19 +668,42 @@ static u8 apm_bios_call_simple(u32 func, + struct desc_struct save_desc_40; + struct desc_struct *gdt; + ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++#endif ++ + cpus = apm_save_cpus(); + + cpu = get_cpu(); + gdt = get_cpu_gdt_table(cpu); + save_desc_40 = gdt[0x40 / 8]; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + gdt[0x40 / 8] = bad_bios_desc; + ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + apm_irq_save(flags); + APM_DO_SAVE_SEGS; + error = apm_bios_call_simple_asm(func, ebx_in, ecx_in, eax); + APM_DO_RESTORE_SEGS; + apm_irq_restore(flags); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + gdt[0x40 / 8] = save_desc_40; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + put_cpu(); + apm_restore_cpus(cpus); + return error; +@@ -929,7 +975,7 @@ recalc: + + static void apm_power_off(void) + { +- unsigned char po_bios_call[] = { ++ const unsigned char po_bios_call[] = { + 0xb8, 0x00, 0x10, /* movw $0x1000,ax */ + 0x8e, 0xd0, /* movw ax,ss */ + 0xbc, 0x00, 0xf0, /* movw $0xf000,sp */ +@@ -1876,7 +1922,10 @@ static const struct file_operations apm_ + static struct miscdevice apm_device = { + APM_MINOR_DEV, + "apm_bios", +- &apm_bios_fops ++ &apm_bios_fops, ++ {NULL, NULL}, ++ NULL, ++ NULL + }; + + +@@ -2197,7 +2246,7 @@ static struct dmi_system_id __initdata a + { DMI_MATCH(DMI_SYS_VENDOR, "IBM"), }, + }, + +- { } ++ { NULL, NULL, {DMI_MATCH(DMI_NONE, {0})}, NULL} + }; + + /* +@@ -2215,6 +2264,10 @@ static int __init apm_init(void) + struct desc_struct *gdt; + int err; + ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++#endif ++ + dmi_check_system(apm_dmi_table); + + if (apm_info.bios.version == 0 || paravirt_enabled() || machine_is_olpc()) { +@@ -2288,9 +2341,18 @@ static int __init apm_init(void) + * This is for buggy BIOS's that refer to (real mode) segment 0x40 + * even though they are called in protected mode. + */ ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + set_base(bad_bios_desc, __va((unsigned long)0x40 << 4)); + _set_limit((char *)&bad_bios_desc, 4095 - (0x40 << 4)); + ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + /* + * Set up the long jump entry point to the APM BIOS, which is called + * from inline assembly. +@@ -2309,6 +2371,11 @@ static int __init apm_init(void) + * code to that CPU. + */ + gdt = get_cpu_gdt_table(0); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + set_base(gdt[APM_CS >> 3], + __va((unsigned long)apm_info.bios.cseg << 4)); + set_base(gdt[APM_CS_16 >> 3], +@@ -2316,6 +2383,10 @@ static int __init apm_init(void) + set_base(gdt[APM_DS >> 3], + __va((unsigned long)apm_info.bios.dseg << 4)); + ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + proc_create("apm", 0, NULL, &apm_file_ops); + + kapmd_task = kthread_create(apm, NULL, "kapmd"); +diff -urNp a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c +--- a/arch/x86/kernel/asm-offsets_32.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/asm-offsets_32.c 2009-05-24 18:10:25.009211410 -0700 +@@ -100,6 +100,7 @@ void foo(void) + DEFINE(PTRS_PER_PTE, PTRS_PER_PTE); + DEFINE(PTRS_PER_PMD, PTRS_PER_PMD); + DEFINE(PTRS_PER_PGD, PTRS_PER_PGD); ++ DEFINE(PERCPU_MODULE_RESERVE, PERCPU_MODULE_RESERVE); + + OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); + +@@ -113,6 +114,7 @@ void foo(void) + OFFSET(PV_CPU_iret, pv_cpu_ops, iret); + OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit); + OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0); ++ OFFSET(PV_CPU_write_cr0, pv_cpu_ops, write_cr0); + #endif + + #ifdef CONFIG_XEN +diff -urNp a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c +--- a/arch/x86/kernel/asm-offsets_64.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/asm-offsets_64.c 2009-05-24 18:10:25.010210840 -0700 +@@ -122,6 +122,7 @@ int main(void) + ENTRY(cr8); + BLANK(); + #undef ENTRY ++ DEFINE(TSS_size, sizeof(struct tss_struct)); + DEFINE(TSS_ist, offsetof(struct tss_struct, x86_tss.ist)); + BLANK(); + DEFINE(crypto_tfm_ctx_offset, offsetof(struct crypto_tfm, __crt_ctx)); +diff -urNp a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c +--- a/arch/x86/kernel/cpu/common.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/cpu/common.c 2009-05-24 18:10:25.011209990 -0700 +@@ -2,7 +2,6 @@ + #include <linux/kernel.h> + #include <linux/sched.h> + #include <linux/string.h> +-#include <linux/bootmem.h> + #include <linux/bitops.h> + #include <linux/module.h> + #include <linux/kgdb.h> +@@ -41,59 +40,6 @@ + + static struct cpu_dev *this_cpu __cpuinitdata; + +-#ifdef CONFIG_X86_64 +-/* We need valid kernel segments for data and code in long mode too +- * IRET will check the segment types kkeil 2000/10/28 +- * Also sysret mandates a special GDT layout +- */ +-/* The TLS descriptors are currently at a different place compared to i386. +- Hopefully nobody expects them at a fixed place (Wine?) */ +-DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { +- [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, +- [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, +- [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, +- [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, +- [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, +- [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, +-} }; +-#else +-DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { +- [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, +- [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, +- [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, +- [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff200 } } }, +- /* +- * Segments used for calling PnP BIOS have byte granularity. +- * They code segments and data segments have fixed 64k limits, +- * the transfer segment sizes are set at run time. +- */ +- /* 32-bit code */ +- [GDT_ENTRY_PNPBIOS_CS32] = { { { 0x0000ffff, 0x00409a00 } } }, +- /* 16-bit code */ +- [GDT_ENTRY_PNPBIOS_CS16] = { { { 0x0000ffff, 0x00009a00 } } }, +- /* 16-bit data */ +- [GDT_ENTRY_PNPBIOS_DS] = { { { 0x0000ffff, 0x00009200 } } }, +- /* 16-bit data */ +- [GDT_ENTRY_PNPBIOS_TS1] = { { { 0x00000000, 0x00009200 } } }, +- /* 16-bit data */ +- [GDT_ENTRY_PNPBIOS_TS2] = { { { 0x00000000, 0x00009200 } } }, +- /* +- * The APM segments have byte granularity and their bases +- * are set at run time. All have 64k limits. +- */ +- /* 32-bit code */ +- [GDT_ENTRY_APMBIOS_BASE] = { { { 0x0000ffff, 0x00409a00 } } }, +- /* 16-bit code */ +- [GDT_ENTRY_APMBIOS_BASE+1] = { { { 0x0000ffff, 0x00009a00 } } }, +- /* data */ +- [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, +- +- [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, +- [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } }, +-} }; +-#endif +-EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); +- + #ifdef CONFIG_X86_32 + static int cachesize_override __cpuinitdata = -1; + static int disable_x86_serial_nr __cpuinitdata = 1; +@@ -227,7 +173,7 @@ void switch_to_new_gdt(void) + { + struct desc_ptr gdt_descr; + +- gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); ++ gdt_descr.address = (unsigned long)get_cpu_gdt_table(smp_processor_id()); + gdt_descr.size = GDT_SIZE - 1; + load_gdt(&gdt_descr); + #ifdef CONFIG_X86_32 +@@ -687,6 +633,10 @@ static void __cpuinit identify_cpu(struc + * we do "generic changes." + */ + ++#if defined(CONFIG_PAX_SEGMEXEC) || defined(CONFIG_PAX_KERNEXEC) || defined(CONFIG_PAX_MEMORY_UDEREF) ++ setup_clear_cpu_cap(X86_FEATURE_SEP); ++#endif ++ + /* If the model name is still unset, do table lookup. */ + if (!c->x86_model_id[0]) { + char *p; +@@ -854,13 +804,13 @@ static __init int setup_disablecpuid(cha + } + __setup("clearcpuid=", setup_disablecpuid); + +-cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; ++cpumask_t cpu_initialized = CPU_MASK_NONE; + + #ifdef CONFIG_X86_64 + struct x8664_pda **_cpu_pda __read_mostly; + EXPORT_SYMBOL(_cpu_pda); + +-struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; ++struct desc_ptr idt_descr __read_only = { 256 * 16 - 1, (unsigned long) idt_table }; + + char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; + +@@ -959,7 +909,7 @@ struct pt_regs * __cpuinit idle_regs(str + void __cpuinit cpu_init(void) + { + int cpu = stack_smp_processor_id(); +- struct tss_struct *t = &per_cpu(init_tss, cpu); ++ struct tss_struct *t = init_tss + cpu; + struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu); + unsigned long v; + char *estacks = NULL; +@@ -1080,7 +1030,7 @@ void __cpuinit cpu_init(void) + { + int cpu = smp_processor_id(); + struct task_struct *curr = current; +- struct tss_struct *t = &per_cpu(init_tss, cpu); ++ struct tss_struct *t = init_tss + cpu; + struct thread_struct *thread = &curr->thread; + + if (cpu_test_and_set(cpu, cpu_initialized)) { +diff -urNp a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c 2009-05-24 18:10:25.011209990 -0700 +@@ -561,7 +561,7 @@ static const struct dmi_system_id sw_any + DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"), + }, + }, +- { } ++ { NULL, NULL, {DMI_MATCH(DMI_NONE, {0})}, NULL } + }; + #endif + +diff -urNp a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c +--- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c 2009-05-24 18:10:25.012210328 -0700 +@@ -225,7 +225,7 @@ static struct cpu_model models[] = + { &cpu_ids[CPU_MP4HT_D0], NULL, 0, NULL }, + { &cpu_ids[CPU_MP4HT_E0], NULL, 0, NULL }, + +- { NULL, } ++ { NULL, NULL, 0, NULL} + }; + #undef _BANIAS + #undef BANIAS +diff -urNp a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c +--- a/arch/x86/kernel/cpu/intel.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/cpu/intel.c 2009-05-24 18:10:25.013209967 -0700 +@@ -85,7 +85,7 @@ static void __cpuinit trap_init_f00f_bug + * Update the IDT descriptor and reload the IDT so that + * it uses the read-only mapped virtual address. + */ +- idt_descr.address = fix_to_virt(FIX_F00F_IDT); ++ idt_descr.address = (struct desc_struct *)fix_to_virt(FIX_F00F_IDT); + load_idt(&idt_descr); + } + #endif +diff -urNp a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c +--- a/arch/x86/kernel/cpu/mcheck/mce_64.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/cpu/mcheck/mce_64.c 2009-05-24 18:10:25.013209967 -0700 +@@ -678,6 +678,7 @@ static struct miscdevice mce_log_device + MISC_MCELOG_MINOR, + "mcelog", + &mce_chrdev_ops, ++ {NULL, NULL}, NULL, NULL + }; + + static unsigned long old_cr4 __initdata; +diff -urNp a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c +--- a/arch/x86/kernel/cpu/mtrr/generic.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/cpu/mtrr/generic.c 2009-05-24 18:10:25.014210375 -0700 +@@ -31,11 +31,11 @@ static struct fixed_range_block fixed_ra + { MTRRfix64K_00000_MSR, 1 }, /* one 64k MTRR */ + { MTRRfix16K_80000_MSR, 2 }, /* two 16k MTRRs */ + { MTRRfix4K_C0000_MSR, 8 }, /* eight 4k MTRRs */ +- {} ++ { 0, 0 } + }; + + static unsigned long smp_changes_mask; +-static struct mtrr_state mtrr_state = {}; ++static struct mtrr_state mtrr_state; + static int mtrr_state_set; + u64 mtrr_tom2; + +diff -urNp a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c +--- a/arch/x86/kernel/crash.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/crash.c 2009-05-24 18:10:25.014210375 -0700 +@@ -59,7 +59,7 @@ static int crash_nmi_callback(struct not + local_irq_disable(); + + #ifdef CONFIG_X86_32 +- if (!user_mode_vm(regs)) { ++ if (!user_mode(regs)) { + crash_fixup_ss_esp(&fixed_regs, regs); + regs = &fixed_regs; + } +diff -urNp a/arch/x86/kernel/doublefault_32.c b/arch/x86/kernel/doublefault_32.c +--- a/arch/x86/kernel/doublefault_32.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/doublefault_32.c 2009-05-24 18:10:25.015209456 -0700 +@@ -11,7 +11,7 @@ + + #define DOUBLEFAULT_STACKSIZE (1024) + static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE]; +-#define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE) ++#define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE-2) + + #define ptr_ok(x) ((x) > PAGE_OFFSET && (x) < PAGE_OFFSET + MAXMEM) + +@@ -21,7 +21,7 @@ static void doublefault_fn(void) + unsigned long gdt, tss; + + store_gdt(&gdt_desc); +- gdt = gdt_desc.address; ++ gdt = (unsigned long)gdt_desc.address; + + printk(KERN_EMERG "PANIC: double fault, gdt at %08lx [%d bytes]\n", gdt, gdt_desc.size); + +@@ -60,10 +60,10 @@ struct tss_struct doublefault_tss __cach + /* 0x2 bit is always set */ + .flags = X86_EFLAGS_SF | 0x2, + .sp = STACK_START, +- .es = __USER_DS, ++ .es = __KERNEL_DS, + .cs = __KERNEL_CS, + .ss = __KERNEL_DS, +- .ds = __USER_DS, ++ .ds = __KERNEL_DS, + .fs = __KERNEL_PERCPU, + + .__cr3 = __pa_nodebug(swapper_pg_dir), +diff -urNp a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c +--- a/arch/x86/kernel/dumpstack_32.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/dumpstack_32.c 2009-05-24 18:10:25.016209654 -0700 +@@ -215,7 +215,7 @@ void dump_stack(void) + #endif + + printk("Pid: %d, comm: %.20s %s %s %.*s\n", +- current->pid, current->comm, print_tainted(), ++ task_pid_nr(current), current->comm, print_tainted(), + init_utsname()->release, + (int)strcspn(init_utsname()->version, " "), + init_utsname()->version); +@@ -238,11 +238,12 @@ void show_registers(struct pt_regs *regs + * When in-kernel, we also print out the stack and code at the + * time of the fault.. + */ +- if (!user_mode_vm(regs)) { ++ if (!user_mode(regs)) { + unsigned int code_prologue = code_bytes * 43 / 64; + unsigned int code_len = code_bytes; + unsigned char c; + u8 *ip; ++ unsigned long cs_base = get_desc_base(&get_cpu_gdt_table(smp_processor_id())[(0xffff & regs->cs) >> 3]); + + printk(KERN_EMERG "Stack:\n"); + show_stack_log_lvl(NULL, regs, ®s->sp, +@@ -250,10 +251,10 @@ void show_registers(struct pt_regs *regs + + printk(KERN_EMERG "Code: "); + +- ip = (u8 *)regs->ip - code_prologue; ++ ip = (u8 *)regs->ip - code_prologue + cs_base; + if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { + /* try starting at IP */ +- ip = (u8 *)regs->ip; ++ ip = (u8 *)regs->ip + cs_base; + code_len = code_len - code_prologue + 1; + } + for (i = 0; i < code_len; i++, ip++) { +@@ -262,7 +263,7 @@ void show_registers(struct pt_regs *regs + printk(" Bad EIP value."); + break; + } +- if (ip == (u8 *)regs->ip) ++ if (ip == (u8 *)regs->ip + cs_base) + printk("<%02x> ", c); + else + printk("%02x ", c); +@@ -275,6 +276,7 @@ int is_valid_bugaddr(unsigned long ip) + { + unsigned short ud2; + ++ ip = ktla_ktva(ip); + if (ip < PAGE_OFFSET) + return 0; + if (probe_kernel_address((unsigned short *)ip, ud2)) +@@ -410,7 +412,7 @@ die_nmi(char *str, struct pt_regs *regs, + * If we are in kernel we are probably nested up pretty bad + * and might aswell get out now while we still can: + */ +- if (!user_mode_vm(regs)) { ++ if (!user_mode(regs)) { + current->thread.trap_no = 2; + crash_kexec(regs); + } +diff -urNp a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c +--- a/arch/x86/kernel/dumpstack_64.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/dumpstack_64.c 2009-05-24 18:10:25.017209851 -0700 +@@ -361,7 +361,7 @@ void dump_stack(void) + #endif + + printk("Pid: %d, comm: %.20s %s %s %.*s\n", +- current->pid, current->comm, print_tainted(), ++ task_pid_nr(current), current->comm, print_tainted(), + init_utsname()->release, + (int)strcspn(init_utsname()->version, " "), + init_utsname()->version); +diff -urNp a/arch/x86/kernel/efi_32.c b/arch/x86/kernel/efi_32.c +--- a/arch/x86/kernel/efi_32.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/efi_32.c 2009-05-24 18:10:25.018209910 -0700 +@@ -38,70 +38,38 @@ + */ + + static unsigned long efi_rt_eflags; +-static pgd_t efi_bak_pg_dir_pointer[2]; ++static pgd_t __initdata efi_bak_pg_dir_pointer[KERNEL_PGD_PTRS]; + +-void efi_call_phys_prelog(void) ++void __init efi_call_phys_prelog(void) + { +- unsigned long cr4; +- unsigned long temp; + struct desc_ptr gdt_descr; + + local_irq_save(efi_rt_eflags); + +- /* +- * If I don't have PAE, I should just duplicate two entries in page +- * directory. If I have PAE, I just need to duplicate one entry in +- * page directory. +- */ +- cr4 = read_cr4_safe(); + +- if (cr4 & X86_CR4_PAE) { +- efi_bak_pg_dir_pointer[0].pgd = +- swapper_pg_dir[pgd_index(0)].pgd; +- swapper_pg_dir[0].pgd = +- swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd; +- } else { +- efi_bak_pg_dir_pointer[0].pgd = +- swapper_pg_dir[pgd_index(0)].pgd; +- efi_bak_pg_dir_pointer[1].pgd = +- swapper_pg_dir[pgd_index(0x400000)].pgd; +- swapper_pg_dir[pgd_index(0)].pgd = +- swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd; +- temp = PAGE_OFFSET + 0x400000; +- swapper_pg_dir[pgd_index(0x400000)].pgd = +- swapper_pg_dir[pgd_index(temp)].pgd; +- } ++ clone_pgd_range(efi_bak_pg_dir_pointer, swapper_pg_dir, KERNEL_PGD_PTRS); ++ clone_pgd_range(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY, ++ min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); + + /* + * After the lock is released, the original page table is restored. + */ + __flush_tlb_all(); + +- gdt_descr.address = __pa(get_cpu_gdt_table(0)); ++ gdt_descr.address = (struct desc_struct *)__pa(get_cpu_gdt_table(0)); + gdt_descr.size = GDT_SIZE - 1; + load_gdt(&gdt_descr); + } + +-void efi_call_phys_epilog(void) ++void __init efi_call_phys_epilog(void) + { +- unsigned long cr4; + struct desc_ptr gdt_descr; + +- gdt_descr.address = (unsigned long)get_cpu_gdt_table(0); ++ gdt_descr.address = get_cpu_gdt_table(0); + gdt_descr.size = GDT_SIZE - 1; + load_gdt(&gdt_descr); + +- cr4 = read_cr4_safe(); +- +- if (cr4 & X86_CR4_PAE) { +- swapper_pg_dir[pgd_index(0)].pgd = +- efi_bak_pg_dir_pointer[0].pgd; +- } else { +- swapper_pg_dir[pgd_index(0)].pgd = +- efi_bak_pg_dir_pointer[0].pgd; +- swapper_pg_dir[pgd_index(0x400000)].pgd = +- efi_bak_pg_dir_pointer[1].pgd; +- } ++ clone_pgd_range(swapper_pg_dir, efi_bak_pg_dir_pointer, KERNEL_PGD_PTRS); + + /* + * After the lock is released, the original page table is restored. +diff -urNp a/arch/x86/kernel/efi_stub_32.S b/arch/x86/kernel/efi_stub_32.S +--- a/arch/x86/kernel/efi_stub_32.S 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/efi_stub_32.S 2009-05-24 18:10:25.019209619 -0700 +@@ -6,6 +6,7 @@ + */ + + #include <linux/linkage.h> ++#include <linux/init.h> + #include <asm/page.h> + + /* +@@ -20,7 +21,7 @@ + * service functions will comply with gcc calling convention, too. + */ + +-.text ++__INIT + ENTRY(efi_call_phys) + /* + * 0. The function can only be called in Linux kernel. So CS has been +@@ -36,9 +37,7 @@ ENTRY(efi_call_phys) + * The mapping of lower virtual memory has been created in prelog and + * epilog. + */ +- movl $1f, %edx +- subl $__PAGE_OFFSET, %edx +- jmp *%edx ++ jmp 1f-__PAGE_OFFSET + 1: + + /* +@@ -47,14 +46,8 @@ ENTRY(efi_call_phys) + * parameter 2, ..., param n. To make things easy, we save the return + * address of efi_call_phys in a global variable. + */ +- popl %edx +- movl %edx, saved_return_addr +- /* get the function pointer into ECX*/ +- popl %ecx +- movl %ecx, efi_rt_function_ptr +- movl $2f, %edx +- subl $__PAGE_OFFSET, %edx +- pushl %edx ++ popl (saved_return_addr) ++ popl (efi_rt_function_ptr) + + /* + * 3. Clear PG bit in %CR0. +@@ -73,9 +66,8 @@ ENTRY(efi_call_phys) + /* + * 5. Call the physical function. + */ +- jmp *%ecx ++ call *(efi_rt_function_ptr-__PAGE_OFFSET) + +-2: + /* + * 6. After EFI runtime service returns, control will return to + * following instruction. We'd better readjust stack pointer first. +@@ -88,34 +80,27 @@ ENTRY(efi_call_phys) + movl %cr0, %edx + orl $0x80000000, %edx + movl %edx, %cr0 +- jmp 1f +-1: ++ + /* + * 8. Now restore the virtual mode from flat mode by + * adding EIP with PAGE_OFFSET. + */ +- movl $1f, %edx +- jmp *%edx ++ jmp 1f+__PAGE_OFFSET + 1: + + /* + * 9. Balance the stack. And because EAX contain the return value, + * we'd better not clobber it. + */ +- leal efi_rt_function_ptr, %edx +- movl (%edx), %ecx +- pushl %ecx ++ pushl (efi_rt_function_ptr) + + /* +- * 10. Push the saved return address onto the stack and return. ++ * 10. Return to the saved return address. + */ +- leal saved_return_addr, %edx +- movl (%edx), %ecx +- pushl %ecx +- ret ++ jmpl *(saved_return_addr) + .previous + +-.data ++__INITDATA + saved_return_addr: + .long 0 + efi_rt_function_ptr: +diff -urNp a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S +--- a/arch/x86/kernel/entry_32.S 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/entry_32.S 2009-05-24 18:10:25.020209887 -0700 +@@ -101,7 +101,7 @@ + #define resume_userspace_sig resume_userspace + #endif + +-#define SAVE_ALL \ ++#define __SAVE_ALL(_DS) \ + cld; \ + pushl %fs; \ + CFI_ADJUST_CFA_OFFSET 4;\ +@@ -133,12 +133,26 @@ + pushl %ebx; \ + CFI_ADJUST_CFA_OFFSET 4;\ + CFI_REL_OFFSET ebx, 0;\ +- movl $(__USER_DS), %edx; \ ++ movl $(_DS), %edx; \ + movl %edx, %ds; \ + movl %edx, %es; \ + movl $(__KERNEL_PERCPU), %edx; \ + movl %edx, %fs + ++#ifdef CONFIG_PAX_KERNEXEC ++#define SAVE_ALL \ ++ __SAVE_ALL(__KERNEL_DS); \ ++ GET_CR0_INTO_EDX; \ ++ movl %edx, %esi; \ ++ orl $X86_CR0_WP, %edx; \ ++ xorl %edx, %esi; \ ++ SET_CR0_FROM_EDX ++#elif defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) || defined(CONFIG_PAX_MEMORY_UDEREF) ++#define SAVE_ALL __SAVE_ALL(__KERNEL_DS) ++#else ++#define SAVE_ALL __SAVE_ALL(__USER_DS) ++#endif ++ + #define RESTORE_INT_REGS \ + popl %ebx; \ + CFI_ADJUST_CFA_OFFSET -4;\ +@@ -229,6 +243,11 @@ ENTRY(ret_from_fork) + CFI_ADJUST_CFA_OFFSET 4 + popfl + CFI_ADJUST_CFA_OFFSET -4 ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ xorl %esi, %esi ++#endif ++ + jmp syscall_exit + CFI_ENDPROC + END(ret_from_fork) +@@ -252,7 +271,17 @@ check_userspace: + movb PT_CS(%esp), %al + andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax + cmpl $USER_RPL, %eax ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ jae resume_userspace ++ ++ GET_CR0_INTO_EDX ++ xorl %esi, %edx ++ SET_CR0_FROM_EDX ++ jmp resume_kernel ++#else + jb resume_kernel # not returning to v8086 or userspace ++#endif + + ENTRY(resume_userspace) + LOCKDEP_SYS_EXIT +@@ -314,10 +343,9 @@ sysenter_past_esp: + /*CFI_REL_OFFSET cs, 0*/ + /* + * Push current_thread_info()->sysenter_return to the stack. +- * A tiny bit of offset fixup is necessary - 4*4 means the 4 words +- * pushed above; +8 corresponds to copy_thread's esp0 setting. + */ +- pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp) ++ GET_THREAD_INFO(%ebp) ++ pushl TI_sysenter_return(%ebp) + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET eip, 0 + +@@ -330,9 +358,19 @@ sysenter_past_esp: + * Load the potential sixth argument from user stack. + * Careful about security. + */ ++ movl PT_OLDESP(%esp),%ebp ++ ++#ifdef CONFIG_PAX_MEMORY_UDEREF ++ mov PT_OLDSS(%esp),%ds ++1: movl %ds:(%ebp),%ebp ++ push %ss ++ pop %ds ++#else + cmpl $__PAGE_OFFSET-3,%ebp + jae syscall_fault + 1: movl (%ebp),%ebp ++#endif ++ + movl %ebp,PT_EBP(%esp) + .section __ex_table,"a" + .align 4 +@@ -356,12 +394,23 @@ sysenter_do_call: + testw $_TIF_ALLWORK_MASK, %cx + jne sysexit_audit + sysenter_exit: ++ ++#ifdef CONFIG_PAX_RANDKSTACK ++ pushl %eax ++ CFI_ADJUST_CFA_OFFSET 4 ++ call pax_randomize_kstack ++ popl %eax ++ CFI_ADJUST_CFA_OFFSET -4 ++#endif ++ + /* if something modifies registers it must also disable sysexit */ + movl PT_EIP(%esp), %edx + movl PT_OLDESP(%esp), %ecx + xorl %ebp,%ebp + TRACE_IRQS_ON + 1: mov PT_FS(%esp), %fs ++2: mov PT_DS(%esp), %ds ++3: mov PT_ES(%esp), %es + ENABLE_INTERRUPTS_SYSEXIT + + #ifdef CONFIG_AUDITSYSCALL +@@ -404,11 +453,17 @@ sysexit_audit: + + CFI_ENDPROC + .pushsection .fixup,"ax" +-2: movl $0,PT_FS(%esp) ++4: movl $0,PT_FS(%esp) ++ jmp 1b ++5: movl $0,PT_DS(%esp) ++ jmp 1b ++6: movl $0,PT_ES(%esp) + jmp 1b + .section __ex_table,"a" + .align 4 +- .long 1b,2b ++ .long 1b,4b ++ .long 2b,5b ++ .long 3b,6b + .popsection + ENDPROC(ia32_sysenter_target) + +@@ -438,6 +493,10 @@ syscall_exit: + testw $_TIF_ALLWORK_MASK, %cx # current->work + jne syscall_exit_work + ++#ifdef CONFIG_PAX_RANDKSTACK ++ call pax_randomize_kstack ++#endif ++ + restore_all: + movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS + # Warning: PT_OLDSS(%esp) contains the wrong/random values if we +@@ -531,25 +590,19 @@ work_resched: + + work_notifysig: # deal with pending signals and + # notify-resume requests ++ movl %esp, %eax + #ifdef CONFIG_VM86 + testl $X86_EFLAGS_VM, PT_EFLAGS(%esp) +- movl %esp, %eax +- jne work_notifysig_v86 # returning to kernel-space or ++ jz 1f # returning to kernel-space or + # vm86-space +- xorl %edx, %edx +- call do_notify_resume +- jmp resume_userspace_sig + +- ALIGN +-work_notifysig_v86: + pushl %ecx # save ti_flags for do_notify_resume + CFI_ADJUST_CFA_OFFSET 4 + call save_v86_state # %eax contains pt_regs pointer + popl %ecx + CFI_ADJUST_CFA_OFFSET -4 + movl %eax, %esp +-#else +- movl %esp, %eax ++1: + #endif + xorl %edx, %edx + call do_notify_resume +@@ -584,6 +637,10 @@ END(syscall_exit_work) + + RING0_INT_FRAME # can't unwind into user space anyway + syscall_fault: ++#ifdef CONFIG_PAX_MEMORY_UDEREF ++ push %ss ++ pop %ds ++#endif + GET_THREAD_INFO(%ebp) + movl $-EFAULT,PT_EAX(%esp) + jmp resume_userspace +@@ -595,17 +652,24 @@ syscall_badsys: + END(syscall_badsys) + CFI_ENDPROC + +-#define FIXUP_ESPFIX_STACK \ +- /* since we are on a wrong stack, we cant make it a C code :( */ \ +- PER_CPU(gdt_page, %ebx); \ +- GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \ +- addl %esp, %eax; \ +- pushl $__KERNEL_DS; \ +- CFI_ADJUST_CFA_OFFSET 4; \ +- pushl %eax; \ +- CFI_ADJUST_CFA_OFFSET 4; \ +- lss (%esp), %esp; \ ++.macro FIXUP_ESPFIX_STACK ++ /* since we are on a wrong stack, we cant make it a C code :( */ ++#ifdef CONFIG_SMP ++ movl PER_CPU_VAR(cpu_number), %ebx; ++ shll $PAGE_SHIFT_asm, %ebx; ++ addl $cpu_gdt_table, %ebx; ++#else ++ movl $cpu_gdt_table, %ebx; ++#endif ++ GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); ++ addl %esp, %eax; ++ pushl $__KERNEL_DS; ++ CFI_ADJUST_CFA_OFFSET 4; ++ pushl %eax; ++ CFI_ADJUST_CFA_OFFSET 4; ++ lss (%esp), %esp; + CFI_ADJUST_CFA_OFFSET -8; ++.endm + #define UNWIND_ESPFIX_STACK \ + movl %ss, %eax; \ + /* see if on espfix stack */ \ +@@ -622,7 +686,7 @@ END(syscall_badsys) + * Build the entry stubs and pointer table with + * some assembler magic. + */ +-.section .rodata,"a" ++.section .rodata,"a",@progbits + ENTRY(interrupt) + .text + +@@ -722,12 +786,21 @@ error_code: + popl %ecx + CFI_ADJUST_CFA_OFFSET -4 + /*CFI_REGISTER es, ecx*/ ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ GET_CR0_INTO_EDX ++ movl %edx, %esi ++ orl $X86_CR0_WP, %edx ++ xorl %edx, %esi ++ SET_CR0_FROM_EDX ++#endif ++ + movl PT_FS(%esp), %edi # get the function address + movl PT_ORIG_EAX(%esp), %edx # get the error code + movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart + mov %ecx, PT_FS(%esp) + /*CFI_REL_OFFSET fs, ES*/ +- movl $(__USER_DS), %ecx ++ movl $(__KERNEL_DS), %ecx + movl %ecx, %ds + movl %ecx, %es + TRACE_IRQS_OFF +@@ -853,6 +926,13 @@ nmi_stack_correct: + xorl %edx,%edx # zero error code + movl %esp,%eax # pt_regs pointer + call do_nmi ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ GET_CR0_INTO_EDX ++ xorl %esi, %edx ++ SET_CR0_FROM_EDX ++#endif ++ + jmp restore_nocheck_notrace + CFI_ENDPROC + +@@ -894,6 +974,13 @@ nmi_espfix_stack: + FIXUP_ESPFIX_STACK # %eax == %esp + xorl %edx,%edx # zero error code + call do_nmi ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ GET_CR0_INTO_EDX ++ xorl %esi, %edx ++ SET_CR0_FROM_EDX ++#endif ++ + RESTORE_REGS + lss 12+4(%esp), %esp # back to espfix stack + CFI_ADJUST_CFA_OFFSET -24 +@@ -1206,7 +1293,6 @@ END(mcount) + #endif /* CONFIG_DYNAMIC_FTRACE */ + #endif /* CONFIG_FUNCTION_TRACER */ + +-.section .rodata,"a" + #include "syscall_table_32.S" + + syscall_table_size=(.-sys_call_table) +diff -urNp a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S +--- a/arch/x86/kernel/entry_64.S 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/entry_64.S 2009-05-24 18:10:25.021210853 -0700 +@@ -911,7 +911,8 @@ END(spurious_interrupt) + xorl %ebx,%ebx + 1: + .if \ist +- movq %gs:pda_data_offset, %rbp ++ imul $TSS_size, %gs:pda_cpunumber, %ebp ++ lea init_tss(%rbp), %rbp + .endif + .if \irqtrace + TRACE_IRQS_OFF +@@ -920,11 +921,11 @@ END(spurious_interrupt) + movq ORIG_RAX(%rsp),%rsi + movq $-1,ORIG_RAX(%rsp) + .if \ist +- subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) ++ subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp) + .endif + call \sym + .if \ist +- addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) ++ addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp) + .endif + DISABLE_INTERRUPTS(CLBR_NONE) + .if \irqtrace +diff -urNp a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c +--- a/arch/x86/kernel/ftrace.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/ftrace.c 2009-05-24 18:10:25.021210853 -0700 +@@ -95,9 +95,9 @@ int ftrace_update_ftrace_func(ftrace_fun + unsigned char old[MCOUNT_INSN_SIZE], *new; + int ret; + +- memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE); ++ memcpy(old, (void *)ktla_ktva((unsigned long)ftrace_call), MCOUNT_INSN_SIZE); + new = ftrace_call_replace(ip, (unsigned long)func); +- ret = ftrace_modify_code(ip, old, new); ++ ret = ftrace_modify_code(ktla_ktva(ip), old, new); + + return ret; + } +diff -urNp a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c +--- a/arch/x86/kernel/head32.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/head32.c 2009-05-24 18:10:25.021210853 -0700 +@@ -12,10 +12,11 @@ + #include <asm/sections.h> + #include <asm/e820.h> + #include <asm/bios_ebda.h> ++#include <asm/boot.h> + + void __init i386_start_kernel(void) + { +- reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); ++ reserve_early(LOAD_PHYSICAL_ADDR, __pa_symbol(&_end), "TEXT DATA BSS"); + + #ifdef CONFIG_BLK_DEV_INITRD + /* Reserve INITRD */ +diff -urNp a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c +--- a/arch/x86/kernel/head64.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/head64.c 2009-05-24 18:10:25.022209724 -0700 +@@ -93,6 +93,8 @@ void __init x86_64_start_kernel(char * r + /* clear bss before set_intr_gate with early_idt_handler */ + clear_bss(); + ++ x86_64_init_pda(); ++ + /* Make NULL pointers segfault */ + zap_identity_mappings(); + +@@ -111,8 +113,6 @@ void __init x86_64_start_kernel(char * r + if (console_loglevel == 10) + early_printk("Kernel alive\n"); + +- x86_64_init_pda(); +- + x86_64_start_reservations(real_mode_data); + } + +diff -urNp a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S +--- a/arch/x86/kernel/head_32.S 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/head_32.S 2009-05-24 18:10:25.023209433 -0700 +@@ -19,6 +19,7 @@ + #include <asm/asm-offsets.h> + #include <asm/setup.h> + #include <asm/processor-flags.h> ++#include <asm/msr-index.h> + + /* Physical address */ + #define pa(X) ((X) - __PAGE_OFFSET) +@@ -64,17 +65,22 @@ LOW_PAGES = 1<<(32-PAGE_SHIFT_asm) + LOW_PAGES = LOW_PAGES + 0x1000000 + #endif + +-#if PTRS_PER_PMD > 1 +-PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PMD) + PTRS_PER_PGD +-#else +-PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PGD) +-#endif ++PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PTE) + BOOTBITMAP_SIZE = LOW_PAGES / 8 + ALLOCATOR_SLOP = 4 + + INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + ALLOCATOR_SLOP)*PAGE_SIZE_asm + + /* ++ * Real beginning of normal "text" segment ++ */ ++ENTRY(stext) ++ENTRY(_stext) ++ ++.section .text.startup,"ax",@progbits ++ ljmp $(__BOOT_CS),$phys_startup_32 ++ ++/* + * 32-bit kernel entrypoint; only used by the boot CPU. On entry, + * %esi points to the real-mode code as a 32-bit pointer. + * CS and DS must be 4 GB flat segments, but we don't depend on +@@ -82,6 +88,12 @@ INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + + * can. + */ + .section .text.head,"ax",@progbits ++ ++#ifdef CONFIG_PAX_KERNEXEC ++/* PaX: fill first page in .text with int3 to catch NULL derefs in kernel mode */ ++.fill 4096,1,0xcc ++#endif ++ + ENTRY(startup_32) + /* test KEEP_SEGMENTS flag to see if the bootloader is asking + us to not reload segments */ +@@ -99,6 +111,56 @@ ENTRY(startup_32) + movl %eax,%gs + 2: + ++ movl $pa(cpu_gdt_table),%edi ++ movl $__per_cpu_start,%eax ++ movw %ax,__KERNEL_PERCPU + 2(%edi) ++ rorl $16,%eax ++ movb %al,__KERNEL_PERCPU + 4(%edi) ++ movb %ah,__KERNEL_PERCPU + 7(%edi) ++ movl $__per_cpu_end + PERCPU_MODULE_RESERVE - 1,%eax ++ subl $__per_cpu_start,%eax ++ movw %ax,__KERNEL_PERCPU + 0(%edi) ++ ++#ifdef CONFIG_PAX_MEMORY_UDEREF ++ /* check for VMware */ ++ movl $0x564d5868,%eax ++ xorl %ebx,%ebx ++ movl $0xa,%ecx ++ movl $0x5658,%edx ++ in (%dx),%eax ++ cmpl $0x564d5868,%ebx ++ jz 2f ++ ++ movl $NR_CPUS,%ecx ++ movl $pa(cpu_gdt_table),%edi ++1: ++ movl $((((__PAGE_OFFSET-1) & 0xf0000000) >> 12) | 0x00c09700),GDT_ENTRY_KERNEL_DS * 8 + 4(%edi) ++ addl $PAGE_SIZE_asm,%edi ++ loop 1b ++2: ++#endif ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ movl $pa(boot_gdt),%edi ++ movl $KERNEL_TEXT_OFFSET,%eax ++ movw %ax,__BOOT_CS + 2(%edi) ++ rorl $16,%eax ++ movb %al,__BOOT_CS + 4(%edi) ++ movb %ah,__BOOT_CS + 7(%edi) ++ rorl $16,%eax ++ ++ movl $NR_CPUS,%ecx ++ movl $pa(cpu_gdt_table),%edi ++1: ++ movw %ax,__KERNEL_CS + 2(%edi) ++ rorl $16,%eax ++ movb %al,__KERNEL_CS + 4(%edi) ++ movb %ah,__KERNEL_CS + 7(%edi) ++ rorl $16,%eax ++ addl $PAGE_SIZE_asm,%edi ++ loop 1b ++#endif ++ + /* + * Clear BSS first so that there are no surprises... + */ +@@ -142,9 +204,7 @@ ENTRY(startup_32) + cmpl $num_subarch_entries, %eax + jae bad_subarch + +- movl pa(subarch_entries)(,%eax,4), %eax +- subl $__PAGE_OFFSET, %eax +- jmp *%eax ++ jmp *pa(subarch_entries)(,%eax,4) + + bad_subarch: + WEAK(lguest_entry) +@@ -156,9 +216,9 @@ WEAK(xen_entry) + __INITDATA + + subarch_entries: +- .long default_entry /* normal x86/PC */ +- .long lguest_entry /* lguest hypervisor */ +- .long xen_entry /* Xen hypervisor */ ++ .long pa(default_entry) /* normal x86/PC */ ++ .long pa(lguest_entry) /* lguest hypervisor */ ++ .long pa(xen_entry) /* Xen hypervisor */ + num_subarch_entries = (. - subarch_entries) / 4 + .previous + #endif /* CONFIG_PARAVIRT */ +@@ -220,8 +280,7 @@ default_entry: + movl %eax, pa(max_pfn_mapped) + + /* Do early initialization of the fixmap area */ +- movl $pa(swapper_pg_fixmap)+PDE_IDENT_ATTR,%eax +- movl %eax,pa(swapper_pg_pmd+0x1000*KPMDS-8) ++ movl $pa(swapper_pg_fixmap)+PDE_IDENT_ATTR,pa(swapper_pg_pmd+0x1000*KPMDS-8) + #else /* Not PAE */ + + page_pde_offset = (__PAGE_OFFSET >> 20); +@@ -253,8 +312,7 @@ page_pde_offset = (__PAGE_OFFSET >> 20); + movl %eax, pa(max_pfn_mapped) + + /* Do early initialization of the fixmap area */ +- movl $pa(swapper_pg_fixmap)+PDE_IDENT_ATTR,%eax +- movl %eax,pa(swapper_pg_dir+0xffc) ++ movl $pa(swapper_pg_fixmap)+PDE_IDENT_ATTR,pa(swapper_pg_dir+0xffc) + #endif + jmp 3f + /* +@@ -318,13 +376,16 @@ ENTRY(startup_32_smp) + jnc 6f + + /* Setup EFER (Extended Feature Enable Register) */ +- movl $0xc0000080, %ecx ++ movl $MSR_EFER, %ecx + rdmsr + + btsl $11, %eax + /* Make changes effective */ + wrmsr + ++ btsl $_PAGE_BIT_NX-32,pa(__supported_pte_mask+4) ++ movl $1,pa(nx_enabled) ++ + 6: + + /* +@@ -350,9 +411,7 @@ ENTRY(startup_32_smp) + + #ifdef CONFIG_SMP + cmpb $0, ready +- jz 1f /* Initial CPU cleans BSS */ +- jmp checkCPUtype +-1: ++ jnz checkCPUtype /* Initial CPU cleans BSS */ + #endif /* CONFIG_SMP */ + + /* +@@ -429,12 +488,12 @@ is386: movl $2,%ecx # set MP + ljmp $(__KERNEL_CS),$1f + 1: movl $(__KERNEL_DS),%eax # reload all the segment registers + movl %eax,%ss # after changing gdt. +- movl %eax,%fs # gets reset once there's real percpu +- +- movl $(__USER_DS),%eax # DS/ES contains default USER segment + movl %eax,%ds + movl %eax,%es + ++ movl $(__KERNEL_PERCPU), %eax ++ movl %eax,%fs # set this cpu's percpu ++ + xorl %eax,%eax # Clear GS and LDT + movl %eax,%gs + lldt %ax +@@ -444,12 +503,6 @@ is386: movl $2,%ecx # set MP + #ifdef CONFIG_SMP + movb ready, %cl + movb $1, ready +- cmpb $0,%cl # the first CPU calls start_kernel +- je 1f +- movl $(__KERNEL_PERCPU), %eax +- movl %eax,%fs # set this cpu's percpu +- movl (stack_start), %esp +-1: + #endif /* CONFIG_SMP */ + jmp *(initial_code) + +@@ -535,15 +588,15 @@ early_page_fault: + jmp early_fault + + early_fault: +- cld + #ifdef CONFIG_PRINTK ++ cmpl $2,%ss:early_recursion_flag ++ je hlt_loop ++ incl %ss:early_recursion_flag ++ cld + pusha + movl $(__KERNEL_DS),%eax + movl %eax,%ds + movl %eax,%es +- cmpl $2,early_recursion_flag +- je hlt_loop +- incl early_recursion_flag + movl %cr2,%eax + pushl %eax + pushl %edx /* trapno */ +@@ -553,8 +606,8 @@ early_fault: + #else + call printk + #endif +-#endif + call dump_stack ++#endif + hlt_loop: + hlt + jmp hlt_loop +@@ -562,8 +615,11 @@ hlt_loop: + /* This is the default interrupt "handler" :-) */ + ALIGN + ignore_int: +- cld + #ifdef CONFIG_PRINTK ++ cmpl $2,%ss:early_recursion_flag ++ je hlt_loop ++ incl %ss:early_recursion_flag ++ cld + pushl %eax + pushl %ecx + pushl %edx +@@ -572,9 +628,6 @@ ignore_int: + movl $(__KERNEL_DS),%eax + movl %eax,%ds + movl %eax,%es +- cmpl $2,early_recursion_flag +- je hlt_loop +- incl early_recursion_flag + pushl 16(%esp) + pushl 24(%esp) + pushl 32(%esp) +@@ -599,36 +652,41 @@ ignore_int: + ENTRY(initial_code) + .long i386_start_kernel + +-.section .text +-/* +- * Real beginning of normal "text" segment +- */ +-ENTRY(stext) +-ENTRY(_stext) +- + /* + * BSS section + */ +-.section ".bss.page_aligned","wa" +- .align PAGE_SIZE_asm + #ifdef CONFIG_X86_PAE ++.section .swapper_pg_pmd,"a",@progbits + swapper_pg_pmd: + .fill 1024*KPMDS,4,0 + #else ++.section .swapper_pg_dir,"a",@progbits + ENTRY(swapper_pg_dir) + .fill 1024,4,0 + #endif + swapper_pg_fixmap: + .fill 1024,4,0 ++ ++.section .empty_zero_page,"a",@progbits + ENTRY(empty_zero_page) + .fill 4096,1,0 ++ ++/* ++ * The IDT has to be page-aligned to simplify the Pentium ++ * F0 0F bug workaround.. We have a special link segment ++ * for this. ++ */ ++.section .idt,"a",@progbits ++ENTRY(idt_table) ++ .fill 256,8,0 ++ + /* + * This starts the data section. + */ ++.data ++ + #ifdef CONFIG_X86_PAE +-.section ".data.page_aligned","wa" +- /* Page-aligned for the benefit of paravirt? */ +- .align PAGE_SIZE_asm ++.section .swapper_pg_dir,"a",@progbits + ENTRY(swapper_pg_dir) + .long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0 /* low identity map */ + # if KPMDS == 3 +@@ -651,11 +709,12 @@ ENTRY(swapper_pg_dir) + + .data + ENTRY(stack_start) +- .long init_thread_union+THREAD_SIZE ++ .long init_thread_union+THREAD_SIZE-8 + .long __BOOT_DS + + ready: .byte 0 + ++.section .rodata,"a",@progbits + early_recursion_flag: + .long 0 + +@@ -691,7 +750,7 @@ fault_msg: + .word 0 # 32 bit align gdt_desc.address + boot_gdt_descr: + .word __BOOT_DS+7 +- .long boot_gdt - __PAGE_OFFSET ++ .long pa(boot_gdt) + + .word 0 # 32-bit align idt_desc.address + idt_descr: +@@ -702,7 +761,7 @@ idt_descr: + .word 0 # 32 bit align gdt_desc.address + ENTRY(early_gdt_descr) + .word GDT_ENTRIES*8-1 +- .long per_cpu__gdt_page /* Overwritten for secondary CPUs */ ++ .long cpu_gdt_table /* Overwritten for secondary CPUs */ + + /* + * The boot_gdt must mirror the equivalent in setup.S and is +@@ -711,5 +770,59 @@ ENTRY(early_gdt_descr) + .align L1_CACHE_BYTES + ENTRY(boot_gdt) + .fill GDT_ENTRY_BOOT_CS,8,0 +- .quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */ +- .quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */ ++ .quad 0x00cf9b000000ffff /* kernel 4GB code at 0x00000000 */ ++ .quad 0x00cf93000000ffff /* kernel 4GB data at 0x00000000 */ ++ ++ .align PAGE_SIZE_asm ++ENTRY(cpu_gdt_table) ++ .rept NR_CPUS ++ .quad 0x0000000000000000 /* NULL descriptor */ ++ .quad 0x0000000000000000 /* 0x0b reserved */ ++ .quad 0x0000000000000000 /* 0x13 reserved */ ++ .quad 0x0000000000000000 /* 0x1b reserved */ ++ .quad 0x0000000000000000 /* 0x20 unused */ ++ .quad 0x0000000000000000 /* 0x28 unused */ ++ .quad 0x0000000000000000 /* 0x33 TLS entry 1 */ ++ .quad 0x0000000000000000 /* 0x3b TLS entry 2 */ ++ .quad 0x0000000000000000 /* 0x43 TLS entry 3 */ ++ .quad 0x0000000000000000 /* 0x4b reserved */ ++ .quad 0x0000000000000000 /* 0x53 reserved */ ++ .quad 0x0000000000000000 /* 0x5b reserved */ ++ ++ .quad 0x00cf9b000000ffff /* 0x60 kernel 4GB code at 0x00000000 */ ++ .quad 0x00cf93000000ffff /* 0x68 kernel 4GB data at 0x00000000 */ ++ .quad 0x00cffb000000ffff /* 0x73 user 4GB code at 0x00000000 */ ++ .quad 0x00cff3000000ffff /* 0x7b user 4GB data at 0x00000000 */ ++ ++ .quad 0x0000000000000000 /* 0x80 TSS descriptor */ ++ .quad 0x0000000000000000 /* 0x88 LDT descriptor */ ++ ++ /* ++ * Segments used for calling PnP BIOS have byte granularity. ++ * The code segments and data segments have fixed 64k limits, ++ * the transfer segment sizes are set at run time. ++ */ ++ .quad 0x00409b000000ffff /* 0x90 32-bit code */ ++ .quad 0x00009b000000ffff /* 0x98 16-bit code */ ++ .quad 0x000093000000ffff /* 0xa0 16-bit data */ ++ .quad 0x0000930000000000 /* 0xa8 16-bit data */ ++ .quad 0x0000930000000000 /* 0xb0 16-bit data */ ++ ++ /* ++ * The APM segments have byte granularity and their bases ++ * are set at run time. All have 64k limits. ++ */ ++ .quad 0x00409b000000ffff /* 0xb8 APM CS code */ ++ .quad 0x00009b000000ffff /* 0xc0 APM CS 16 code (16 bit) */ ++ .quad 0x004093000000ffff /* 0xc8 APM DS data */ ++ ++ .quad 0x00c0930000000000 /* 0xd0 - ESPFIX SS */ ++ .quad 0x0040930000000000 /* 0xd8 - PERCPU */ ++ .quad 0x0000000000000000 /* 0xe0 - PCIBIOS_CS */ ++ .quad 0x0000000000000000 /* 0xe8 - PCIBIOS_DS */ ++ .quad 0x0000000000000000 /* 0xf0 - unused */ ++ .quad 0x0000000000000000 /* 0xf8 - GDT entry 31: double-fault TSS */ ++ ++ /* Be sure this is zeroed to avoid false validations in Xen */ ++ .fill PAGE_SIZE_asm - GDT_SIZE,1,0 ++ .endr +diff -urNp a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S +--- a/arch/x86/kernel/head_64.S 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/head_64.S 2009-05-24 18:10:25.024209631 -0700 +@@ -38,6 +38,10 @@ L4_PAGE_OFFSET = pgd_index(__PAGE_OFFSET + L3_PAGE_OFFSET = pud_index(__PAGE_OFFSET) + L4_START_KERNEL = pgd_index(__START_KERNEL_map) + L3_START_KERNEL = pud_index(__START_KERNEL_map) ++L4_VMALLOC_START = pgd_index(VMALLOC_START) ++L3_VMALLOC_START = pud_index(VMALLOC_START) ++L4_VMEMMAP_START = pgd_index(VMEMMAP_START) ++L3_VMEMMAP_START = pud_index(VMEMMAP_START) + + .text + .section .text.head +@@ -85,35 +89,22 @@ startup_64: + */ + addq %rbp, init_level4_pgt + 0(%rip) + addq %rbp, init_level4_pgt + (L4_PAGE_OFFSET*8)(%rip) ++ addq %rbp, init_level4_pgt + (L4_VMALLOC_START*8)(%rip) ++ addq %rbp, init_level4_pgt + (L4_VMEMMAP_START*8)(%rip) + addq %rbp, init_level4_pgt + (L4_START_KERNEL*8)(%rip) + + addq %rbp, level3_ident_pgt + 0(%rip) ++ addq %rbp, level3_ident_pgt + 8(%rip) ++ addq %rbp, level3_ident_pgt + 16(%rip) ++ addq %rbp, level3_ident_pgt + 24(%rip) + +- addq %rbp, level3_kernel_pgt + (510*8)(%rip) +- addq %rbp, level3_kernel_pgt + (511*8)(%rip) ++ addq %rbp, level3_vmemmap_pgt + (L3_VMEMMAP_START*8)(%rip) + +- addq %rbp, level2_fixmap_pgt + (506*8)(%rip) ++ addq %rbp, level3_kernel_pgt + (L3_START_KERNEL*8)(%rip) ++ addq %rbp, level3_kernel_pgt + (L3_START_KERNEL*8+8)(%rip) + +- /* Add an Identity mapping if I am above 1G */ +- leaq _text(%rip), %rdi +- andq $PMD_PAGE_MASK, %rdi +- +- movq %rdi, %rax +- shrq $PUD_SHIFT, %rax +- andq $(PTRS_PER_PUD - 1), %rax +- jz ident_complete +- +- leaq (level2_spare_pgt - __START_KERNEL_map + _KERNPG_TABLE)(%rbp), %rdx +- leaq level3_ident_pgt(%rip), %rbx +- movq %rdx, 0(%rbx, %rax, 8) +- +- movq %rdi, %rax +- shrq $PMD_SHIFT, %rax +- andq $(PTRS_PER_PMD - 1), %rax +- leaq __PAGE_KERNEL_IDENT_LARGE_EXEC(%rdi), %rdx +- leaq level2_spare_pgt(%rip), %rbx +- movq %rdx, 0(%rbx, %rax, 8) +-ident_complete: ++ addq %rbp, level2_fixmap_pgt + (506*8)(%rip) ++ addq %rbp, level2_fixmap_pgt + (507*8)(%rip) + + /* + * Fixup the kernel text+data virtual addresses. Note that +@@ -187,6 +178,10 @@ ENTRY(secondary_startup_64) + btl $20,%edi /* No Execute supported? */ + jnc 1f + btsl $_EFER_NX, %eax ++ leaq init_level4_pgt(%rip), %rdi ++ btsq $_PAGE_BIT_NX, 8*L4_PAGE_OFFSET(%rdi) ++ btsq $_PAGE_BIT_NX, 8*L4_VMALLOC_START(%rdi) ++ btsq $_PAGE_BIT_NX, 8*L4_VMEMMAP_START(%rdi) + 1: wrmsr /* Make changes effective */ + + /* Setup cr0 */ +@@ -257,16 +252,16 @@ ENTRY(secondary_startup_64) + .align 8 + ENTRY(initial_code) + .quad x86_64_start_kernel +- __FINITDATA + + ENTRY(stack_start) + .quad init_thread_union+THREAD_SIZE-8 + .word 0 ++ __FINITDATA + + bad_address: + jmp bad_address + +- .section ".init.text","ax" ++ __INIT + #ifdef CONFIG_EARLY_PRINTK + .globl early_idt_handlers + early_idt_handlers: +@@ -311,18 +306,23 @@ ENTRY(early_idt_handler) + #endif /* EARLY_PRINTK */ + 1: hlt + jmp 1b ++ .previous + + #ifdef CONFIG_EARLY_PRINTK ++ __INITDATA + early_recursion_flag: + .long 0 ++ .previous + ++ .section .rodata,"a",@progbits + early_idt_msg: + .asciz "PANIC: early exception %02lx rip %lx:%lx error %lx cr2 %lx\n" + early_idt_ripmsg: + .asciz "RIP %s\n" +-#endif /* CONFIG_EARLY_PRINTK */ + .previous ++#endif /* CONFIG_EARLY_PRINTK */ + ++ .section .rodata,"a",@progbits + .balign PAGE_SIZE + + #define NEXT_PAGE(name) \ +@@ -347,13 +347,27 @@ NEXT_PAGE(init_level4_pgt) + .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE + .org init_level4_pgt + L4_PAGE_OFFSET*8, 0 + .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE ++ .org init_level4_pgt + L4_VMALLOC_START*8, 0 ++ .quad level3_vmalloc_pgt - __START_KERNEL_map + _KERNPG_TABLE ++ .org init_level4_pgt + L4_VMEMMAP_START*8, 0 ++ .quad level3_vmemmap_pgt - __START_KERNEL_map + _KERNPG_TABLE + .org init_level4_pgt + L4_START_KERNEL*8, 0 + /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ + .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE + + NEXT_PAGE(level3_ident_pgt) + .quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE +- .fill 511,8,0 ++ .quad level2_ident_pgt + PAGE_SIZE - __START_KERNEL_map + _KERNPG_TABLE ++ .quad level2_ident_pgt + 2*PAGE_SIZE - __START_KERNEL_map + _KERNPG_TABLE ++ .quad level2_ident_pgt + 3*PAGE_SIZE - __START_KERNEL_map + _KERNPG_TABLE ++ .fill 508,8,0 ++ ++NEXT_PAGE(level3_vmalloc_pgt) ++ .fill 512,8,0 ++ ++NEXT_PAGE(level3_vmemmap_pgt) ++ .fill L3_VMEMMAP_START,8,0 ++ .quad level2_vmemmap_pgt - __START_KERNEL_map + _KERNPG_TABLE + + NEXT_PAGE(level3_kernel_pgt) + .fill L3_START_KERNEL,8,0 +@@ -361,20 +375,27 @@ NEXT_PAGE(level3_kernel_pgt) + .quad level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE + .quad level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE + ++NEXT_PAGE(level2_vmemmap_pgt) ++ .fill 512,8,0 ++ + NEXT_PAGE(level2_fixmap_pgt) + .fill 506,8,0 + .quad level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE +- /* 8MB reserved for vsyscalls + a 2MB hole = 4 + 1 entries */ +- .fill 5,8,0 ++ .quad level1_vsyscall_pgt - __START_KERNEL_map + _PAGE_TABLE ++ /* 6MB reserved for vsyscalls + a 2MB hole = 3 + 1 entries */ ++ .fill 4,8,0 + + NEXT_PAGE(level1_fixmap_pgt) + .fill 512,8,0 + +-NEXT_PAGE(level2_ident_pgt) +- /* Since I easily can, map the first 1G. ++NEXT_PAGE(level1_vsyscall_pgt) ++ .fill 512,8,0 ++ ++ /* Since I easily can, map the first 4G. + * Don't set NX because code runs from these pages. + */ +- PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) ++NEXT_PAGE(level2_ident_pgt) ++ PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, 4*PTRS_PER_PMD) + + NEXT_PAGE(level2_kernel_pgt) + /* +@@ -387,32 +408,48 @@ NEXT_PAGE(level2_kernel_pgt) + * If you want to increase this then increase MODULES_VADDR + * too.) + */ +- PMDS(0, __PAGE_KERNEL_LARGE_EXEC, +- KERNEL_IMAGE_SIZE/PMD_SIZE) +- +-NEXT_PAGE(level2_spare_pgt) +- .fill 512, 8, 0 ++ PMDS(0, __PAGE_KERNEL_LARGE_EXEC, KERNEL_IMAGE_SIZE/PMD_SIZE) + + #undef PMDS + #undef NEXT_PAGE + +- .data ++ .align PAGE_SIZE ++ENTRY(cpu_gdt_table) ++ .rept NR_CPUS ++ .quad 0x0000000000000000 /* NULL descriptor */ ++ .quad 0x00cf9b000000ffff /* __KERNEL32_CS */ ++ .quad 0x00af9b000000ffff /* __KERNEL_CS */ ++ .quad 0x00cf93000000ffff /* __KERNEL_DS */ ++ .quad 0x00cffb000000ffff /* __USER32_CS */ ++ .quad 0x00cff3000000ffff /* __USER_DS, __USER32_DS */ ++ .quad 0x00affb000000ffff /* __USER_CS */ ++ .quad 0x0 /* unused */ ++ .quad 0,0 /* TSS */ ++ .quad 0,0 /* LDT */ ++ .quad 0,0,0 /* three TLS descriptors */ ++ .quad 0x0000f40000000000 /* node/CPU stored in limit */ ++ /* asm/segment.h:GDT_ENTRIES must match this */ ++ ++ /* zero the remaining page */ ++ .fill PAGE_SIZE / 8 - GDT_ENTRIES,8,0 ++ .endr ++ + .align 16 + .globl early_gdt_descr + early_gdt_descr: + .word GDT_ENTRIES*8-1 +- .quad per_cpu__gdt_page ++ .quad cpu_gdt_table + + ENTRY(phys_base) + /* This must match the first entry in level2_kernel_pgt */ + .quad 0x0000000000000000 + + #include "../../x86/xen/xen-head.S" +- +- .section .bss, "aw", @nobits ++ ++ .section .rodata,"a",@progbits + .align L1_CACHE_BYTES + ENTRY(idt_table) +- .skip 256 * 16 ++ .fill 512,8,0 + + .section .bss.page_aligned, "aw", @nobits + .align PAGE_SIZE +diff -urNp a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c +--- a/arch/x86/kernel/i386_ksyms_32.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/i386_ksyms_32.c 2009-05-24 18:10:25.024209631 -0700 +@@ -10,8 +10,12 @@ + EXPORT_SYMBOL(mcount); + #endif + ++EXPORT_SYMBOL_GPL(cpu_gdt_table); ++ + /* Networking helper routines. */ + EXPORT_SYMBOL(csum_partial_copy_generic); ++EXPORT_SYMBOL(csum_partial_copy_generic_to_user); ++EXPORT_SYMBOL(csum_partial_copy_generic_from_user); + + EXPORT_SYMBOL(__get_user_1); + EXPORT_SYMBOL(__get_user_2); +@@ -26,3 +30,7 @@ EXPORT_SYMBOL(strstr); + + EXPORT_SYMBOL(csum_partial); + EXPORT_SYMBOL(empty_zero_page); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++EXPORT_SYMBOL(KERNEL_TEXT_OFFSET); ++#endif +diff -urNp a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c +--- a/arch/x86/kernel/init_task.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/init_task.c 2009-05-24 18:10:25.025209829 -0700 +@@ -42,5 +42,5 @@ EXPORT_SYMBOL(init_task); + * section. Since TSS's are completely CPU-local, we want them + * on exact cacheline boundaries, to eliminate cacheline ping-pong. + */ +-DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS; +- ++struct tss_struct init_tss[NR_CPUS] ____cacheline_internodealigned_in_smp = { [0 ... NR_CPUS-1] = INIT_TSS }; ++EXPORT_SYMBOL(init_tss); +diff -urNp a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c +--- a/arch/x86/kernel/ioport.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/ioport.c 2009-05-24 18:10:25.025209829 -0700 +@@ -6,6 +6,7 @@ + #include <linux/sched.h> + #include <linux/kernel.h> + #include <linux/capability.h> ++#include <linux/security.h> + #include <linux/errno.h> + #include <linux/types.h> + #include <linux/ioport.h> +@@ -41,6 +42,12 @@ asmlinkage long sys_ioperm(unsigned long + + if ((from + num <= from) || (from + num > IO_BITMAP_BITS)) + return -EINVAL; ++#ifdef CONFIG_GRKERNSEC_IO ++ if (turn_on) { ++ gr_handle_ioperm(); ++ return -EPERM; ++ } ++#endif + if (turn_on && !capable(CAP_SYS_RAWIO)) + return -EPERM; + +@@ -67,7 +74,7 @@ asmlinkage long sys_ioperm(unsigned long + * because the ->io_bitmap_max value must match the bitmap + * contents: + */ +- tss = &per_cpu(init_tss, get_cpu()); ++ tss = init_tss + get_cpu(); + + set_bitmap(t->io_bitmap_ptr, from, num, !turn_on); + +@@ -122,8 +129,13 @@ static int do_iopl(unsigned int level, s + return -EINVAL; + /* Trying to gain more privileges? */ + if (level > old) { ++#ifdef CONFIG_GRKERNSEC_IO ++ gr_handle_iopl(); ++ return -EPERM; ++#else + if (!capable(CAP_SYS_RAWIO)) + return -EPERM; ++#endif + } + regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) | (level << 12); + +diff -urNp a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c +--- a/arch/x86/kernel/irq_32.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/irq_32.c 2009-05-24 18:10:25.025209829 -0700 +@@ -93,7 +93,7 @@ execute_on_irq_stack(int overflow, struc + return 0; + + /* build the stack frame on the IRQ stack */ +- isp = (u32 *) ((char*)irqctx + sizeof(*irqctx)); ++ isp = (u32 *) ((char*)irqctx + sizeof(*irqctx) - 8); + irqctx->tinfo.task = curctx->tinfo.task; + irqctx->tinfo.previous_esp = current_stack_pointer; + +@@ -174,7 +174,7 @@ asmlinkage void do_softirq(void) + irqctx->tinfo.previous_esp = current_stack_pointer; + + /* build the stack frame on the softirq stack */ +- isp = (u32*) ((char*)irqctx + sizeof(*irqctx)); ++ isp = (u32*) ((char*)irqctx + sizeof(*irqctx) - 8); + + call_on_stack(__do_softirq, isp); + /* +diff -urNp a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c +--- a/arch/x86/kernel/kprobes.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/kprobes.c 2009-05-24 18:10:25.026209678 -0700 +@@ -166,9 +166,24 @@ static void __kprobes set_jmp_op(void *f + char op; + s32 raddr; + } __attribute__((packed)) * jop; +- jop = (struct __arch_jmp_op *)from; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++#endif ++ ++ jop = (struct __arch_jmp_op *)(ktla_ktva(from)); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + jop->raddr = (s32)((long)(to) - ((long)(from) + 5)); + jop->op = RELATIVEJUMP_INSTRUCTION; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + } + + /* +@@ -342,16 +357,29 @@ static void __kprobes fix_riprel(struct + + static void __kprobes arch_copy_kprobe(struct kprobe *p) + { +- memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++#endif ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ ++ memcpy(p->ainsn.insn, ktla_ktva(p->addr), MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif + + fix_riprel(p); + +- if (can_boost(p->addr)) ++ if (can_boost(ktla_ktva(p->addr))) + p->ainsn.boostable = 0; + else + p->ainsn.boostable = -1; + +- p->opcode = *p->addr; ++ p->opcode = *(ktla_ktva(p->addr)); + } + + int __kprobes arch_prepare_kprobe(struct kprobe *p) +@@ -428,7 +456,7 @@ static void __kprobes prepare_singlestep + if (p->opcode == BREAKPOINT_INSTRUCTION) + regs->ip = (unsigned long)p->addr; + else +- regs->ip = (unsigned long)p->ainsn.insn; ++ regs->ip = ktva_ktla((unsigned long)p->ainsn.insn); + } + + void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, +@@ -449,7 +477,7 @@ static void __kprobes setup_singlestep(s + if (p->ainsn.boostable == 1 && !p->post_handler) { + /* Boost up -- we can execute copied instructions directly */ + reset_current_kprobe(); +- regs->ip = (unsigned long)p->ainsn.insn; ++ regs->ip = ktva_ktla((unsigned long)p->ainsn.insn); + preempt_enable_no_resched(); + return; + } +@@ -519,7 +547,7 @@ static int __kprobes kprobe_handler(stru + struct kprobe_ctlblk *kcb; + + addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t)); +- if (*addr != BREAKPOINT_INSTRUCTION) { ++ if (*(kprobe_opcode_t *)ktla_ktva((unsigned long)addr) != BREAKPOINT_INSTRUCTION) { + /* + * The breakpoint instruction was removed right + * after we hit it. Another cpu has removed +@@ -770,7 +798,7 @@ static void __kprobes resume_execution(s + struct pt_regs *regs, struct kprobe_ctlblk *kcb) + { + unsigned long *tos = stack_addr(regs); +- unsigned long copy_ip = (unsigned long)p->ainsn.insn; ++ unsigned long copy_ip = ktva_ktla((unsigned long)p->ainsn.insn); + unsigned long orig_ip = (unsigned long)p->addr; + kprobe_opcode_t *insn = p->ainsn.insn; + +@@ -953,7 +981,7 @@ int __kprobes kprobe_exceptions_notify(s + struct die_args *args = data; + int ret = NOTIFY_DONE; + +- if (args->regs && user_mode_vm(args->regs)) ++ if (args->regs && user_mode(args->regs)) + return ret; + + switch (val) { +diff -urNp a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c +--- a/arch/x86/kernel/ldt.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/ldt.c 2009-05-24 18:10:25.027210784 -0700 +@@ -66,13 +66,13 @@ static int alloc_ldt(mm_context_t *pc, i + if (reload) { + #ifdef CONFIG_SMP + preempt_disable(); +- load_LDT(pc); ++ load_LDT_nolock(pc); + if (!cpus_equal(current->mm->cpu_vm_mask, + cpumask_of_cpu(smp_processor_id()))) + smp_call_function(flush_ldt, current->mm, 1); + preempt_enable(); + #else +- load_LDT(pc); ++ load_LDT_nolock(pc); + #endif + } + if (oldsize) { +@@ -94,7 +94,7 @@ static inline int copy_ldt(mm_context_t + return err; + + for(i = 0; i < old->size; i++) +- write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE); ++ write_ldt_entry(new->ldt, i, old->ldt + i); + return 0; + } + +@@ -115,6 +115,24 @@ int init_new_context(struct task_struct + retval = copy_ldt(&mm->context, &old_mm->context); + mutex_unlock(&old_mm->context.lock); + } ++ ++ if (tsk == current) { ++ mm->context.vdso = ~0UL; ++ ++#ifdef CONFIG_X86_32 ++#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) ++ mm->context.user_cs_base = 0UL; ++ mm->context.user_cs_limit = ~0UL; ++ ++#if defined(CONFIG_PAX_PAGEEXEC) && defined(CONFIG_SMP) ++ cpus_clear(mm->context.cpu_user_cs_mask); ++#endif ++ ++#endif ++#endif ++ ++ } ++ + return retval; + } + +@@ -229,6 +247,13 @@ static int write_ldt(void __user *ptr, u + } + } + ++#ifdef CONFIG_PAX_SEGMEXEC ++ if ((mm->pax_flags & MF_PAX_SEGMEXEC) && (ldt_info.contents & MODIFY_LDT_CONTENTS_CODE)) { ++ error = -EINVAL; ++ goto out_unlock; ++ } ++#endif ++ + fill_ldt(&ldt, &ldt_info); + if (oldmode) + ldt.avl = 0; +diff -urNp a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c +--- a/arch/x86/kernel/machine_kexec_32.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/machine_kexec_32.c 2009-05-24 18:10:25.027210784 -0700 +@@ -34,7 +34,7 @@ static u32 kexec_pmd1[1024] PAGE_ALIGNED + static u32 kexec_pte0[1024] PAGE_ALIGNED; + static u32 kexec_pte1[1024] PAGE_ALIGNED; + +-static void set_idt(void *newidt, __u16 limit) ++static void set_idt(struct desc_struct *newidt, __u16 limit) + { + struct desc_ptr curidt; + +@@ -46,7 +46,7 @@ static void set_idt(void *newidt, __u16 + } + + +-static void set_gdt(void *newgdt, __u16 limit) ++static void set_gdt(struct desc_struct *newgdt, __u16 limit) + { + struct desc_ptr curgdt; + +@@ -145,7 +145,7 @@ void machine_kexec(struct kimage *image) + } + + control_page = page_address(image->control_code_page); +- memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE); ++ memcpy(control_page, (void *)ktla_ktva((unsigned long)relocate_kernel), KEXEC_CONTROL_CODE_MAX_SIZE); + + relocate_kernel_ptr = control_page; + page_list[PA_CONTROL_PAGE] = __pa(control_page); +diff -urNp a/arch/x86/kernel/module_32.c b/arch/x86/kernel/module_32.c +--- a/arch/x86/kernel/module_32.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/module_32.c 2009-05-24 18:10:25.028210842 -0700 +@@ -23,6 +23,9 @@ + #include <linux/kernel.h> + #include <linux/bug.h> + ++#include <asm/desc.h> ++#include <asm/pgtable.h> ++ + #if 0 + #define DEBUGP printk + #else +@@ -33,9 +36,31 @@ void *module_alloc(unsigned long size) + { + if (size == 0) + return NULL; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL); ++#else + return vmalloc_exec(size); ++#endif ++ + } + ++#ifdef CONFIG_PAX_KERNEXEC ++void *module_alloc_exec(unsigned long size) ++{ ++ struct vm_struct *area; ++ ++ if (size == 0) ++ return NULL; ++ ++ area = __get_vm_area(size, VM_ALLOC, (unsigned long)&MODULES_VADDR, (unsigned long)&MODULES_END); ++ if (area) ++ return area->addr; ++ ++ return NULL; ++} ++EXPORT_SYMBOL(module_alloc_exec); ++#endif + + /* Free memory returned from module_alloc */ + void module_free(struct module *mod, void *module_region) +@@ -45,6 +70,45 @@ void module_free(struct module *mod, voi + table entries. */ + } + ++#ifdef CONFIG_PAX_KERNEXEC ++void module_free_exec(struct module *mod, void *module_region) ++{ ++ struct vm_struct **p, *tmp; ++ ++ if (!module_region) ++ return; ++ ++ if ((PAGE_SIZE-1) & (unsigned long)module_region) { ++ printk(KERN_ERR "Trying to module_free_exec() bad address (%p)\n", module_region); ++ WARN_ON(1); ++ return; ++ } ++ ++ write_lock(&vmlist_lock); ++ for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) ++ if (tmp->addr == module_region) ++ break; ++ ++ if (tmp) { ++ unsigned long cr0; ++ ++ pax_open_kernel(cr0); ++ memset(tmp->addr, 0xCC, tmp->size); ++ pax_close_kernel(cr0); ++ ++ *p = tmp->next; ++ kfree(tmp); ++ } ++ write_unlock(&vmlist_lock); ++ ++ if (!tmp) { ++ printk(KERN_ERR "Trying to module_free_exec() nonexistent vm area (%p)\n", ++ module_region); ++ WARN_ON(1); ++ } ++} ++#endif ++ + /* We don't need anything special. */ + int module_frob_arch_sections(Elf_Ehdr *hdr, + Elf_Shdr *sechdrs, +@@ -63,14 +127,20 @@ int apply_relocate(Elf32_Shdr *sechdrs, + unsigned int i; + Elf32_Rel *rel = (void *)sechdrs[relsec].sh_addr; + Elf32_Sym *sym; +- uint32_t *location; ++ uint32_t *plocation, location; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++#endif + + DEBUGP("Applying relocate section %u to %u\n", relsec, + sechdrs[relsec].sh_info); + for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { + /* This is where to make the change */ +- location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr +- + rel[i].r_offset; ++ plocation = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr + rel[i].r_offset; ++ location = (uint32_t)plocation; ++ if (sechdrs[sechdrs[relsec].sh_info].sh_flags & SHF_EXECINSTR) ++ plocation = ktla_ktva((void *)plocation); + /* This is the symbol it is referring to. Note that all + undefined symbols have been resolved. */ + sym = (Elf32_Sym *)sechdrs[symindex].sh_addr +@@ -78,12 +148,32 @@ int apply_relocate(Elf32_Shdr *sechdrs, + + switch (ELF32_R_TYPE(rel[i].r_info)) { + case R_386_32: ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + /* We add the value into the location given */ +- *location += sym->st_value; ++ *plocation += sym->st_value; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + break; + case R_386_PC32: ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + /* Add the value, subtract its postition */ +- *location += sym->st_value - (uint32_t)location; ++ *plocation += sym->st_value - location; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + break; + default: + printk(KERN_ERR "module %s: Unknown relocation: %u\n", +diff -urNp a/arch/x86/kernel/module_64.c b/arch/x86/kernel/module_64.c +--- a/arch/x86/kernel/module_64.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/module_64.c 2009-05-24 18:10:25.028210842 -0700 +@@ -40,7 +40,7 @@ void module_free(struct module *mod, voi + table entries. */ + } + +-void *module_alloc(unsigned long size) ++static void *__module_alloc(unsigned long size, pgprot_t prot) + { + struct vm_struct *area; + +@@ -54,8 +54,31 @@ void *module_alloc(unsigned long size) + if (!area) + return NULL; + +- return __vmalloc_area(area, GFP_KERNEL, PAGE_KERNEL_EXEC); ++ return __vmalloc_area(area, GFP_KERNEL | __GFP_ZERO, prot); ++} ++ ++#ifdef CONFIG_PAX_KERNEXEC ++void *module_alloc(unsigned long size) ++{ ++ return __module_alloc(size, PAGE_KERNEL); ++} ++ ++void module_free_exec(struct module *mod, void *module_region) ++{ ++ module_free(mod, module_region); ++} ++ ++void *module_alloc_exec(unsigned long size) ++{ ++ return __module_alloc(size, PAGE_KERNEL_RX); + } ++#else ++void *module_alloc(unsigned long size) ++{ ++ return __module_alloc(size, PAGE_KERNEL_EXEC); ++} ++#endif ++ + #endif + + /* We don't need anything special. */ +@@ -77,7 +100,11 @@ int apply_relocate_add(Elf64_Shdr *sechd + Elf64_Rela *rel = (void *)sechdrs[relsec].sh_addr; + Elf64_Sym *sym; + void *loc; +- u64 val; ++ u64 val; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++#endif + + DEBUGP("Applying relocate section %u to %u\n", relsec, + sechdrs[relsec].sh_info); +@@ -101,21 +128,61 @@ int apply_relocate_add(Elf64_Shdr *sechd + case R_X86_64_NONE: + break; + case R_X86_64_64: ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + *(u64 *)loc = val; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + break; + case R_X86_64_32: ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + *(u32 *)loc = val; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + if (val != *(u32 *)loc) + goto overflow; + break; + case R_X86_64_32S: ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + *(s32 *)loc = val; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + if ((s64)val != *(s32 *)loc) + goto overflow; + break; + case R_X86_64_PC32: + val -= (u64)loc; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + *(u32 *)loc = val; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + #if 0 + if ((s64)val != *(s32 *)loc) + goto overflow; +diff -urNp a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c +--- a/arch/x86/kernel/paravirt-spinlocks.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/paravirt-spinlocks.c 2009-05-24 18:10:25.029210132 -0700 +@@ -13,7 +13,7 @@ default_spin_lock_flags(raw_spinlock_t * + __raw_spin_lock(lock); + } + +-struct pv_lock_ops pv_lock_ops = { ++struct pv_lock_ops pv_lock_ops __read_only = { + #ifdef CONFIG_SMP + .spin_is_locked = __ticket_spin_is_locked, + .spin_is_contended = __ticket_spin_is_contended, +diff -urNp a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c +--- a/arch/x86/kernel/paravirt.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/paravirt.c 2009-05-24 18:10:25.030209842 -0700 +@@ -44,7 +44,7 @@ void _paravirt_nop(void) + { + } + +-static void __init default_banner(void) ++static void default_banner(void) + { + printk(KERN_INFO "Booting paravirtualized kernel on %s\n", + pv_info.name); +@@ -164,7 +164,7 @@ unsigned paravirt_patch_insns(void *insn + if (insn_len > len || start == NULL) + insn_len = len; + else +- memcpy(insnbuf, start, insn_len); ++ memcpy(insnbuf, ktla_ktva(start), insn_len); + + return insn_len; + } +@@ -292,21 +292,21 @@ void arch_flush_lazy_cpu_mode(void) + preempt_enable(); + } + +-struct pv_info pv_info = { ++struct pv_info pv_info __read_only = { + .name = "bare hardware", + .paravirt_enabled = 0, + .kernel_rpl = 0, + .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */ + }; + +-struct pv_init_ops pv_init_ops = { ++struct pv_init_ops pv_init_ops __read_only = { + .patch = native_patch, + .banner = default_banner, + .arch_setup = paravirt_nop, + .memory_setup = machine_specific_memory_setup, + }; + +-struct pv_time_ops pv_time_ops = { ++struct pv_time_ops pv_time_ops __read_only = { + .time_init = hpet_time_init, + .get_wallclock = native_get_wallclock, + .set_wallclock = native_set_wallclock, +@@ -314,7 +314,7 @@ struct pv_time_ops pv_time_ops = { + .get_tsc_khz = native_calibrate_tsc, + }; + +-struct pv_irq_ops pv_irq_ops = { ++struct pv_irq_ops pv_irq_ops __read_only = { + .init_IRQ = native_init_IRQ, + .save_fl = native_save_fl, + .restore_fl = native_restore_fl, +@@ -327,7 +327,7 @@ struct pv_irq_ops pv_irq_ops = { + #endif + }; + +-struct pv_cpu_ops pv_cpu_ops = { ++struct pv_cpu_ops pv_cpu_ops __read_only = { + .cpuid = native_cpuid, + .get_debugreg = native_get_debugreg, + .set_debugreg = native_set_debugreg, +@@ -389,7 +389,7 @@ struct pv_cpu_ops pv_cpu_ops = { + }, + }; + +-struct pv_apic_ops pv_apic_ops = { ++struct pv_apic_ops pv_apic_ops __read_only = { + #ifdef CONFIG_X86_LOCAL_APIC + .setup_boot_clock = setup_boot_APIC_clock, + .setup_secondary_clock = setup_secondary_APIC_clock, +@@ -397,7 +397,7 @@ struct pv_apic_ops pv_apic_ops = { + #endif + }; + +-struct pv_mmu_ops pv_mmu_ops = { ++struct pv_mmu_ops pv_mmu_ops __read_only = { + #ifndef CONFIG_X86_64 + .pagetable_setup_start = native_pagetable_setup_start, + .pagetable_setup_done = native_pagetable_setup_done, +diff -urNp a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c +--- a/arch/x86/kernel/process_32.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/process_32.c 2009-05-24 18:10:25.030209842 -0700 +@@ -65,8 +65,10 @@ asmlinkage void ret_from_fork(void) __as + DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; + EXPORT_PER_CPU_SYMBOL(current_task); + ++#ifdef CONFIG_SMP + DEFINE_PER_CPU(int, cpu_number); + EXPORT_PER_CPU_SYMBOL(cpu_number); ++#endif + + /* + * Return saved PC of a blocked thread. +@@ -74,6 +76,7 @@ EXPORT_PER_CPU_SYMBOL(cpu_number); + unsigned long thread_saved_pc(struct task_struct *tsk) + { + return ((unsigned long *)tsk->thread.sp)[3]; ++//XXX return tsk->thread.eip; + } + + #ifndef CONFIG_SMP +@@ -131,7 +134,7 @@ void __show_regs(struct pt_regs *regs, i + unsigned short ss, gs; + const char *board; + +- if (user_mode_vm(regs)) { ++ if (user_mode(regs)) { + sp = regs->sp; + ss = regs->ss & 0xffff; + savesegment(gs, gs); +@@ -212,8 +215,8 @@ int kernel_thread(int (*fn)(void *), voi + regs.bx = (unsigned long) fn; + regs.dx = (unsigned long) arg; + +- regs.ds = __USER_DS; +- regs.es = __USER_DS; ++ regs.ds = __KERNEL_DS; ++ regs.es = __KERNEL_DS; + regs.fs = __KERNEL_PERCPU; + regs.orig_ax = -1; + regs.ip = (unsigned long) kernel_thread_helper; +@@ -235,7 +238,7 @@ void exit_thread(void) + struct task_struct *tsk = current; + struct thread_struct *t = &tsk->thread; + int cpu = get_cpu(); +- struct tss_struct *tss = &per_cpu(init_tss, cpu); ++ struct tss_struct *tss = init_tss + cpu; + + kfree(t->io_bitmap_ptr); + t->io_bitmap_ptr = NULL; +@@ -264,6 +267,7 @@ void flush_thread(void) + { + struct task_struct *tsk = current; + ++ loadsegment(gs, 0); + tsk->thread.debugreg0 = 0; + tsk->thread.debugreg1 = 0; + tsk->thread.debugreg2 = 0; +@@ -303,7 +307,7 @@ int copy_thread(int nr, unsigned long cl + struct task_struct *tsk; + int err; + +- childregs = task_pt_regs(p); ++ childregs = task_stack_page(p) + THREAD_SIZE - sizeof(struct pt_regs) - 8; + *childregs = *regs; + childregs->ax = 0; + childregs->sp = sp; +@@ -332,6 +336,7 @@ int copy_thread(int nr, unsigned long cl + * Set a new TLS for the child thread? + */ + if (clone_flags & CLONE_SETTLS) ++//XXX needs set_fs()? + err = do_set_thread_area(p, -1, + (struct user_desc __user *)childregs->si, 0); + +@@ -553,7 +558,7 @@ struct task_struct * __switch_to(struct + struct thread_struct *prev = &prev_p->thread, + *next = &next_p->thread; + int cpu = smp_processor_id(); +- struct tss_struct *tss = &per_cpu(init_tss, cpu); ++ struct tss_struct *tss = init_tss + cpu; + + /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ + +@@ -581,6 +586,11 @@ struct task_struct * __switch_to(struct + */ + savesegment(gs, prev->gs); + ++#ifdef CONFIG_PAX_MEMORY_UDEREF ++ if (!segment_eq(task_thread_info(prev_p)->addr_limit, task_thread_info(next_p)->addr_limit)) ++ __set_fs(task_thread_info(next_p)->addr_limit, cpu); ++#endif ++ + /* + * Load the per-thread Thread-Local Storage descriptor. + */ +@@ -719,15 +729,27 @@ unsigned long get_wchan(struct task_stru + return 0; + } + +-unsigned long arch_align_stack(unsigned long sp) ++#ifdef CONFIG_PAX_RANDKSTACK ++asmlinkage void pax_randomize_kstack(void) + { +- if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) +- sp -= get_random_int() % 8192; +- return sp & ~0xf; +-} ++ struct thread_struct *thread = ¤t->thread; ++ unsigned long time; + +-unsigned long arch_randomize_brk(struct mm_struct *mm) +-{ +- unsigned long range_end = mm->brk + 0x02000000; +- return randomize_range(mm->brk, range_end, 0) ? : mm->brk; ++ if (!randomize_va_space) ++ return; ++ ++ rdtscl(time); ++ ++ /* P4 seems to return a 0 LSB, ignore it */ ++#ifdef CONFIG_MPENTIUM4 ++ time &= 0x1EUL; ++ time <<= 2; ++#else ++ time &= 0xFUL; ++ time <<= 3; ++#endif ++ ++ thread->sp0 ^= time; ++ load_sp0(init_tss + smp_processor_id(), thread); + } ++#endif +diff -urNp a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c +--- a/arch/x86/kernel/process_64.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/process_64.c 2009-05-24 18:10:25.031209550 -0700 +@@ -88,7 +88,7 @@ static void __exit_idle(void) + void exit_idle(void) + { + /* idle loop has pid 0 */ +- if (current->pid) ++ if (task_pid_nr(current)) + return; + __exit_idle(); + } +@@ -109,6 +109,8 @@ static inline void play_dead(void) + void cpu_idle(void) + { + current_thread_info()->status |= TS_POLLING; ++ current->stack_canary = pax_get_random_long(); ++ write_pda(stack_canary, current->stack_canary); + /* endless idle loop with no priority at all */ + while (1) { + tick_nohz_stop_sched_tick(1); +@@ -153,7 +155,7 @@ void __show_regs(struct pt_regs *regs, i + printk("\n"); + print_modules(); + printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s\n", +- current->pid, current->comm, print_tainted(), ++ task_pid_nr(current), current->comm, print_tainted(), + init_utsname()->release, + (int)strcspn(init_utsname()->version, " "), + init_utsname()->version); +@@ -223,7 +225,7 @@ void exit_thread(void) + struct thread_struct *t = &me->thread; + + if (me->thread.io_bitmap_ptr) { +- struct tss_struct *tss = &per_cpu(init_tss, get_cpu()); ++ struct tss_struct *tss = init_tss + get_cpu(); + + kfree(t->io_bitmap_ptr); + t->io_bitmap_ptr = NULL; +@@ -558,7 +560,7 @@ __switch_to(struct task_struct *prev_p, + struct thread_struct *prev = &prev_p->thread; + struct thread_struct *next = &next_p->thread; + int cpu = smp_processor_id(); +- struct tss_struct *tss = &per_cpu(init_tss, cpu); ++ struct tss_struct *tss = init_tss + cpu; + unsigned fsindex, gsindex; + + /* we're going to use this soon, after a few expensive things */ +@@ -647,7 +649,6 @@ __switch_to(struct task_struct *prev_p, + (unsigned long)task_stack_page(next_p) + + THREAD_SIZE - PDA_STACKOFFSET); + #ifdef CONFIG_CC_STACKPROTECTOR +- write_pda(stack_canary, next_p->stack_canary); + /* + * Build time only check to make sure the stack_canary is at + * offset 40 in the pda; this is a gcc ABI requirement +@@ -746,12 +747,11 @@ unsigned long get_wchan(struct task_stru + if (!p || p == current || p->state == TASK_RUNNING) + return 0; + stack = (unsigned long)task_stack_page(p); +- if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE) ++ if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE-8-sizeof(u64)) + return 0; + fp = *(u64 *)(p->thread.sp); + do { +- if (fp < (unsigned long)stack || +- fp >= (unsigned long)stack+THREAD_SIZE) ++ if (fp < stack || fp > stack+THREAD_SIZE-8-sizeof(u64)) + return 0; + ip = *(u64 *)(fp+8); + if (!in_sched_functions(ip)) +@@ -860,16 +860,3 @@ long sys_arch_prctl(int code, unsigned l + { + return do_arch_prctl(current, code, addr); + } +- +-unsigned long arch_align_stack(unsigned long sp) +-{ +- if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) +- sp -= get_random_int() % 8192; +- return sp & ~0xf; +-} +- +-unsigned long arch_randomize_brk(struct mm_struct *mm) +-{ +- unsigned long range_end = mm->brk + 0x02000000; +- return randomize_range(mm->brk, range_end, 0) ? : mm->brk; +-} +diff -urNp a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c +--- a/arch/x86/kernel/ptrace.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/ptrace.c 2009-05-24 18:10:25.032210587 -0700 +@@ -1502,7 +1502,7 @@ void send_sigtrap(struct task_struct *ts + info.si_code = si_code; + + /* User-mode ip? */ +- info.si_addr = user_mode_vm(regs) ? (void __user *) regs->ip : NULL; ++ info.si_addr = user_mode(regs) ? (void __user *) regs->ip : NULL; + + /* Send us the fake SIGTRAP */ + force_sig_info(SIGTRAP, &info, tsk); +diff -urNp a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c +--- a/arch/x86/kernel/reboot.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/reboot.c 2009-05-24 18:10:25.033209597 -0700 +@@ -28,7 +28,7 @@ void (*pm_power_off)(void); + EXPORT_SYMBOL(pm_power_off); + + static const struct desc_ptr no_idt = {}; +-static int reboot_mode; ++static unsigned short reboot_mode; + enum reboot_type reboot_type = BOOT_KBD; + int reboot_force; + +@@ -210,7 +210,7 @@ static struct dmi_system_id __initdata r + DMI_MATCH(DMI_PRODUCT_NAME, "Dell XPS710"), + }, + }, +- { } ++ { NULL, NULL, {{0, {0}}}, NULL} + }; + + static int __init reboot_init(void) +@@ -226,12 +226,12 @@ core_initcall(reboot_init); + controller to pulse the CPU reset line, which is more thorough, but + doesn't work with at least one type of 486 motherboard. It is easy + to stop this code working; hence the copious comments. */ +-static const unsigned long long +-real_mode_gdt_entries [3] = ++static struct desc_struct ++real_mode_gdt_entries [3] __read_only = + { +- 0x0000000000000000ULL, /* Null descriptor */ +- 0x00009b000000ffffULL, /* 16-bit real-mode 64k code at 0x00000000 */ +- 0x000093000100ffffULL /* 16-bit real-mode 64k data at 0x00000100 */ ++ {{{0x00000000, 0x00000000}}}, /* Null descriptor */ ++ {{{0x0000ffff, 0x00009b00}}}, /* 16-bit real-mode 64k code at 0x00000000 */ ++ {{{0x0100ffff, 0x00009300}}} /* 16-bit real-mode 64k data at 0x00000100 */ + }; + + static const struct desc_ptr +@@ -280,7 +280,7 @@ static const unsigned char jump_to_bios + * specified by the code and length parameters. + * We assume that length will aways be less that 100! + */ +-void machine_real_restart(const unsigned char *code, int length) ++void machine_real_restart(const unsigned char *code, unsigned int length) + { + local_irq_disable(); + +@@ -300,8 +300,8 @@ void machine_real_restart(const unsigned + /* Remap the kernel at virtual address zero, as well as offset zero + from the kernel segment. This assumes the kernel segment starts at + virtual address PAGE_OFFSET. */ +- memcpy(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY, +- sizeof(swapper_pg_dir [0]) * KERNEL_PGD_PTRS); ++ clone_pgd_range(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY, ++ min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); + + /* + * Use `swapper_pg_dir' as our page directory. +@@ -313,16 +313,15 @@ void machine_real_restart(const unsigned + boot)". This seems like a fairly standard thing that gets set by + REBOOT.COM programs, and the previous reset routine did this + too. */ +- *((unsigned short *)0x472) = reboot_mode; ++ *(unsigned short *)(__va(0x472)) = reboot_mode; + + /* For the switch to real mode, copy some code to low memory. It has + to be in the first 64k because it is running in 16-bit mode, and it + has to have the same physical and virtual address, because it turns + off paging. Copy it near the end of the first page, out of the way + of BIOS variables. */ +- memcpy((void *)(0x1000 - sizeof(real_mode_switch) - 100), +- real_mode_switch, sizeof (real_mode_switch)); +- memcpy((void *)(0x1000 - 100), code, length); ++ memcpy(__va(0x1000 - sizeof (real_mode_switch) - 100), real_mode_switch, sizeof (real_mode_switch)); ++ memcpy(__va(0x1000 - 100), code, length); + + /* Set up the IDT for real mode. */ + load_idt(&real_mode_idt); +diff -urNp a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c +--- a/arch/x86/kernel/setup.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/setup.c 2009-05-24 18:10:25.033209597 -0700 +@@ -738,6 +738,7 @@ void start_periodic_check_for_corruption + } + #endif + ++#ifdef CONFIG_X86_RESERVE_LOW_64K + static int __init dmi_low_memory_corruption(const struct dmi_system_id *d) + { + printk(KERN_NOTICE +@@ -749,6 +750,7 @@ static int __init dmi_low_memory_corrupt + + return 0; + } ++#endif + + /* List of systems that have known low memory corruption BIOS problems */ + static struct dmi_system_id __initdata bad_bios_dmi_table[] = { +@@ -845,8 +847,8 @@ void __init setup_arch(char **cmdline_p) + + if (!boot_params.hdr.root_flags) + root_mountflags &= ~MS_RDONLY; +- init_mm.start_code = (unsigned long) _text; +- init_mm.end_code = (unsigned long) _etext; ++ init_mm.start_code = ktla_ktva((unsigned long) _text); ++ init_mm.end_code = ktla_ktva((unsigned long) _etext); + init_mm.end_data = (unsigned long) _edata; + #ifdef CONFIG_X86_32 + init_mm.brk = init_pg_tables_end + PAGE_OFFSET; +@@ -854,9 +856,9 @@ void __init setup_arch(char **cmdline_p) + init_mm.brk = (unsigned long) &_end; + #endif + +- code_resource.start = virt_to_phys(_text); +- code_resource.end = virt_to_phys(_etext)-1; +- data_resource.start = virt_to_phys(_etext); ++ code_resource.start = virt_to_phys(ktla_ktva(_text)); ++ code_resource.end = virt_to_phys(ktla_ktva(_etext))-1; ++ data_resource.start = virt_to_phys(_data); + data_resource.end = virt_to_phys(_edata)-1; + bss_resource.start = virt_to_phys(&__bss_start); + bss_resource.end = virt_to_phys(&__bss_stop)-1; +diff -urNp a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c +--- a/arch/x86/kernel/setup_percpu.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/setup_percpu.c 2009-05-24 18:10:25.034209446 -0700 +@@ -179,7 +179,11 @@ void __init setup_per_cpu_areas(void) + cpu, node, __pa(ptr)); + } + #endif ++#ifdef CONFIG_X86_32 ++ __per_cpu_offset[cpu] = ptr - __per_cpu_start; ++#else + per_cpu_offset(cpu) = ptr - __per_cpu_start; ++#endif + memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); + } + +diff -urNp a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c +--- a/arch/x86/kernel/signal_32.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/signal_32.c 2009-05-24 18:10:25.035210971 -0700 +@@ -335,7 +335,7 @@ get_sigframe(struct k_sigaction *ka, str + * Align the stack pointer according to the i386 ABI, + * i.e. so that on function entry ((sp + 4) & 15) == 0. + */ +- sp = ((sp + 4) & -16ul) - 4; ++ sp = ((sp - 12) & -16ul) - 4; + + return (void __user *) sp; + } +@@ -367,9 +367,9 @@ __setup_frame(int sig, struct k_sigactio + } + + if (current->mm->context.vdso) +- restorer = VDSO32_SYMBOL(current->mm->context.vdso, sigreturn); ++ restorer = (void __user *)VDSO32_SYMBOL(current->mm->context.vdso, sigreturn); + else +- restorer = &frame->retcode; ++ restorer = (void __user *)&frame->retcode; + if (ka->sa.sa_flags & SA_RESTORER) + restorer = ka->sa.sa_restorer; + +@@ -442,7 +442,7 @@ static int __setup_rt_frame(int sig, str + return -EFAULT; + + /* Set up to return from userspace. */ +- restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); ++ restorer = (void __user *)VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); + if (ka->sa.sa_flags & SA_RESTORER) + restorer = ka->sa.sa_restorer; + err |= __put_user(restorer, &frame->pretcode); +@@ -612,7 +612,7 @@ static void do_signal(struct pt_regs *re + * X86_32: vm86 regs switched out by assembly code before reaching + * here, so testing against kernel CS suffices. + */ +- if (!user_mode(regs)) ++ if (!user_mode_novm(regs)) + return; + + if (current_thread_info()->status & TS_RESTORE_SIGMASK) +diff -urNp a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c +--- a/arch/x86/kernel/signal_64.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/signal_64.c 2009-05-24 18:10:25.036210191 -0700 +@@ -239,8 +239,8 @@ static int __setup_rt_frame(int sig, str + err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, set->sig[0], me); + err |= __put_user(fp, &frame->uc.uc_mcontext.fpstate); + if (sizeof(*set) == 16) { +- __put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]); +- __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]); ++ err |= __put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]); ++ err |= __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]); + } else + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + +diff -urNp a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c +--- a/arch/x86/kernel/smpboot.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/smpboot.c 2009-05-24 18:10:25.037210180 -0700 +@@ -814,6 +814,11 @@ static int __cpuinit do_boot_cpu(int api + .cpu = cpu, + .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done), + }; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++#endif ++ + INIT_WORK(&c_idle.work, do_fork_idle); + + #ifdef CONFIG_X86_64 +@@ -864,7 +869,17 @@ do_rest: + cpu_pda(cpu)->pcurrent = c_idle.idle; + clear_tsk_thread_flag(c_idle.idle, TIF_FORK); + #endif ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + initial_code = (unsigned long)start_secondary; + stack_start.sp = (void *) c_idle.idle->thread.sp; + +diff -urNp a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c +--- a/arch/x86/kernel/smpcommon.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/smpcommon.c 2009-05-24 18:10:25.037210180 -0700 +@@ -3,9 +3,10 @@ + */ + #include <linux/module.h> + #include <asm/smp.h> ++#include <asm/sections.h> + + #ifdef CONFIG_X86_32 +-DEFINE_PER_CPU(unsigned long, this_cpu_off); ++DEFINE_PER_CPU(unsigned long, this_cpu_off) = (unsigned long)__per_cpu_start; + EXPORT_PER_CPU_SYMBOL(this_cpu_off); + + /* +@@ -15,16 +16,19 @@ EXPORT_PER_CPU_SYMBOL(this_cpu_off); + */ + __cpuinit void init_gdt(int cpu) + { +- struct desc_struct gdt; ++ struct desc_struct d, *gdt = get_cpu_gdt_table(cpu); ++ unsigned long base, limit; + +- pack_descriptor(&gdt, __per_cpu_offset[cpu], 0xFFFFF, +- 0x2 | DESCTYPE_S, 0x8); +- gdt.s = 1; ++ base = per_cpu_offset(cpu); ++ limit = PERCPU_ENOUGH_ROOM - 1; ++ if (limit < 64*1024) ++ pack_descriptor(&d, base, limit, 0x80 | DESCTYPE_S | 0x3, 0x4); ++ else ++ pack_descriptor(&d, base, limit >> PAGE_SHIFT, 0x80 | DESCTYPE_S | 0x3, 0xC); + +- write_gdt_entry(get_cpu_gdt_table(cpu), +- GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S); ++ write_gdt_entry(gdt, GDT_ENTRY_PERCPU, &d, DESCTYPE_S); + +- per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu]; ++ per_cpu(this_cpu_off, cpu) = base; + per_cpu(cpu_number, cpu) = cpu; + } + #endif +diff -urNp a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c +--- a/arch/x86/kernel/step.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/step.c 2009-05-24 18:10:25.038209470 -0700 +@@ -23,22 +23,20 @@ unsigned long convert_ip_to_linear(struc + * and APM bios ones we just ignore here. + */ + if ((seg & SEGMENT_TI_MASK) == SEGMENT_LDT) { +- u32 *desc; ++ struct desc_struct *desc; + unsigned long base; + +- seg &= ~7UL; ++ seg >>= 3; + + mutex_lock(&child->mm->context.lock); +- if (unlikely((seg >> 3) >= child->mm->context.size)) +- addr = -1L; /* bogus selector, access would fault */ ++ if (unlikely(seg >= child->mm->context.size)) ++ addr = -EINVAL; + else { +- desc = child->mm->context.ldt + seg; +- base = ((desc[0] >> 16) | +- ((desc[1] & 0xff) << 16) | +- (desc[1] & 0xff000000)); ++ desc = &child->mm->context.ldt[seg]; ++ base = (desc->a >> 16) | ((desc->b & 0xff) << 16) | (desc->b & 0xff000000); + + /* 16-bit code segment? */ +- if (!((desc[1] >> 22) & 1)) ++ if (!((desc->b >> 22) & 1)) + addr &= 0xffff; + addr += base; + } +@@ -54,6 +52,9 @@ static int is_setting_trap_flag(struct t + unsigned char opcode[15]; + unsigned long addr = convert_ip_to_linear(child, regs); + ++ if (addr == -EINVAL) ++ return 0; ++ + copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0); + for (i = 0; i < copied; i++) { + switch (opcode[i]) { +@@ -75,7 +76,7 @@ static int is_setting_trap_flag(struct t + + #ifdef CONFIG_X86_64 + case 0x40 ... 0x4f: +- if (regs->cs != __USER_CS) ++ if ((regs->cs & 0xffff) != __USER_CS) + /* 32-bit mode: register increment */ + return 0; + /* 64-bit mode: REX prefix */ +diff -urNp a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c +--- a/arch/x86/kernel/sys_i386_32.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/sys_i386_32.c 2009-05-24 18:10:25.038209470 -0700 +@@ -24,6 +24,21 @@ + + #include <asm/syscalls.h> + ++int i386_mmap_check(unsigned long addr, unsigned long len, unsigned long flags) ++{ ++ unsigned long pax_task_size = TASK_SIZE; ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (current->mm->pax_flags & MF_PAX_SEGMEXEC) ++ pax_task_size = SEGMEXEC_TASK_SIZE; ++#endif ++ ++ if (len > pax_task_size || addr > pax_task_size - len) ++ return -EINVAL; ++ ++ return 0; ++} ++ + asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long pgoff) +@@ -83,6 +98,205 @@ out: + return err; + } + ++unsigned long ++arch_get_unmapped_area(struct file *filp, unsigned long addr, ++ unsigned long len, unsigned long pgoff, unsigned long flags) ++{ ++ struct mm_struct *mm = current->mm; ++ struct vm_area_struct *vma; ++ unsigned long start_addr, pax_task_size = TASK_SIZE; ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (mm->pax_flags & MF_PAX_SEGMEXEC) ++ pax_task_size = SEGMEXEC_TASK_SIZE; ++#endif ++ ++ if (len > pax_task_size) ++ return -ENOMEM; ++ ++ if (flags & MAP_FIXED) ++ return addr; ++ ++#ifdef CONFIG_PAX_RANDMMAP ++ if (!(mm->pax_flags & MF_PAX_RANDMMAP)) ++#endif ++ ++ if (addr) { ++ addr = PAGE_ALIGN(addr); ++ vma = find_vma(mm, addr); ++ if (pax_task_size - len >= addr && ++ (!vma || addr + len <= vma->vm_start)) ++ return addr; ++ } ++ if (len > mm->cached_hole_size) { ++ start_addr = addr = mm->free_area_cache; ++ } else { ++ start_addr = addr = mm->mmap_base; ++ mm->cached_hole_size = 0; ++ } ++ ++#ifdef CONFIG_PAX_PAGEEXEC ++ if (!nx_enabled && (mm->pax_flags & MF_PAX_PAGEEXEC) && (flags & MAP_EXECUTABLE) && start_addr >= mm->mmap_base) { ++ start_addr = 0x00110000UL; ++ ++#ifdef CONFIG_PAX_RANDMMAP ++ if (mm->pax_flags & MF_PAX_RANDMMAP) ++ start_addr += mm->delta_mmap & 0x03FFF000UL; ++#endif ++ ++ if (mm->start_brk <= start_addr && start_addr < mm->mmap_base) ++ start_addr = addr = mm->mmap_base; ++ else ++ addr = start_addr; ++ } ++#endif ++ ++full_search: ++ for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { ++ /* At this point: (!vma || addr < vma->vm_end). */ ++ if (pax_task_size - len < addr) { ++ /* ++ * Start a new search - just in case we missed ++ * some holes. ++ */ ++ if (start_addr != mm->mmap_base) { ++ start_addr = addr = mm->mmap_base; ++ mm->cached_hole_size = 0; ++ goto full_search; ++ } ++ return -ENOMEM; ++ } ++ if (!vma || addr + len <= vma->vm_start) { ++ /* ++ * Remember the place where we stopped the search: ++ */ ++ mm->free_area_cache = addr + len; ++ return addr; ++ } ++ if (addr + mm->cached_hole_size < vma->vm_start) ++ mm->cached_hole_size = vma->vm_start - addr; ++ addr = vma->vm_end; ++ if (mm->start_brk <= addr && addr < mm->mmap_base) { ++ start_addr = addr = mm->mmap_base; ++ mm->cached_hole_size = 0; ++ goto full_search; ++ } ++ } ++} ++ ++unsigned long ++arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, ++ const unsigned long len, const unsigned long pgoff, ++ const unsigned long flags) ++{ ++ struct vm_area_struct *vma; ++ struct mm_struct *mm = current->mm; ++ unsigned long base = mm->mmap_base, addr = addr0, pax_task_size = TASK_SIZE; ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (mm->pax_flags & MF_PAX_SEGMEXEC) ++ pax_task_size = SEGMEXEC_TASK_SIZE; ++#endif ++ ++ /* requested length too big for entire address space */ ++ if (len > pax_task_size) ++ return -ENOMEM; ++ ++ if (flags & MAP_FIXED) ++ return addr; ++ ++#ifdef CONFIG_PAX_PAGEEXEC ++ if (!nx_enabled && (mm->pax_flags & MF_PAX_PAGEEXEC) && (flags & MAP_EXECUTABLE)) ++ goto bottomup; ++#endif ++ ++#ifdef CONFIG_PAX_RANDMMAP ++ if (!(mm->pax_flags & MF_PAX_RANDMMAP)) ++#endif ++ ++ /* requesting a specific address */ ++ if (addr) { ++ addr = PAGE_ALIGN(addr); ++ vma = find_vma(mm, addr); ++ if (pax_task_size - len >= addr && ++ (!vma || addr + len <= vma->vm_start)) ++ return addr; ++ } ++ ++ /* check if free_area_cache is useful for us */ ++ if (len <= mm->cached_hole_size) { ++ mm->cached_hole_size = 0; ++ mm->free_area_cache = mm->mmap_base; ++ } ++ ++ /* either no address requested or can't fit in requested address hole */ ++ addr = mm->free_area_cache; ++ ++ /* make sure it can fit in the remaining address space */ ++ if (addr > len) { ++ vma = find_vma(mm, addr-len); ++ if (!vma || addr <= vma->vm_start) ++ /* remember the address as a hint for next time */ ++ return (mm->free_area_cache = addr-len); ++ } ++ ++ if (mm->mmap_base < len) ++ goto bottomup; ++ ++ addr = mm->mmap_base-len; ++ ++ do { ++ /* ++ * Lookup failure means no vma is above this address, ++ * else if new region fits below vma->vm_start, ++ * return with success: ++ */ ++ vma = find_vma(mm, addr); ++ if (!vma || addr+len <= vma->vm_start) ++ /* remember the address as a hint for next time */ ++ return (mm->free_area_cache = addr); ++ ++ /* remember the largest hole we saw so far */ ++ if (addr + mm->cached_hole_size < vma->vm_start) ++ mm->cached_hole_size = vma->vm_start - addr; ++ ++ /* try just below the current vma->vm_start */ ++ addr = vma->vm_start-len; ++ } while (len < vma->vm_start); ++ ++bottomup: ++ /* ++ * A failed mmap() very likely causes application failure, ++ * so fall back to the bottom-up function here. This scenario ++ * can happen with large stack limits and large mmap() ++ * allocations. ++ */ ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (mm->pax_flags & MF_PAX_SEGMEXEC) ++ mm->mmap_base = SEGMEXEC_TASK_UNMAPPED_BASE; ++ else ++#endif ++ ++ mm->mmap_base = TASK_UNMAPPED_BASE; ++ ++#ifdef CONFIG_PAX_RANDMMAP ++ if (mm->pax_flags & MF_PAX_RANDMMAP) ++ mm->mmap_base += mm->delta_mmap; ++#endif ++ ++ mm->free_area_cache = mm->mmap_base; ++ mm->cached_hole_size = ~0UL; ++ addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); ++ /* ++ * Restore the topdown base: ++ */ ++ mm->mmap_base = base; ++ mm->free_area_cache = base; ++ mm->cached_hole_size = ~0UL; ++ ++ return addr; ++} + + struct sel_arg_struct { + unsigned long n; +diff -urNp a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c +--- a/arch/x86/kernel/sys_x86_64.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/sys_x86_64.c 2009-05-24 18:10:25.039209598 -0700 +@@ -47,8 +47,8 @@ out: + return error; + } + +-static void find_start_end(unsigned long flags, unsigned long *begin, +- unsigned long *end) ++static void find_start_end(struct mm_struct *mm, unsigned long flags, ++ unsigned long *begin, unsigned long *end) + { + if (!test_thread_flag(TIF_IA32) && (flags & MAP_32BIT)) { + unsigned long new_begin; +@@ -67,7 +67,7 @@ static void find_start_end(unsigned long + *begin = new_begin; + } + } else { +- *begin = TASK_UNMAPPED_BASE; ++ *begin = mm->mmap_base; + *end = TASK_SIZE; + } + } +@@ -84,11 +84,15 @@ arch_get_unmapped_area(struct file *filp + if (flags & MAP_FIXED) + return addr; + +- find_start_end(flags, &begin, &end); ++ find_start_end(mm, flags, &begin, &end); + + if (len > end) + return -ENOMEM; + ++#ifdef CONFIG_PAX_RANDMMAP ++ if (!(mm->pax_flags & MF_PAX_RANDMMAP)) ++#endif ++ + if (addr) { + addr = PAGE_ALIGN(addr); + vma = find_vma(mm, addr); +@@ -143,7 +147,7 @@ arch_get_unmapped_area_topdown(struct fi + { + struct vm_area_struct *vma; + struct mm_struct *mm = current->mm; +- unsigned long addr = addr0; ++ unsigned long base = mm->mmap_base, addr = addr0; + + /* requested length too big for entire address space */ + if (len > TASK_SIZE) +@@ -156,6 +160,10 @@ arch_get_unmapped_area_topdown(struct fi + if (!test_thread_flag(TIF_IA32) && (flags & MAP_32BIT)) + goto bottomup; + ++#ifdef CONFIG_PAX_RANDMMAP ++ if (!(mm->pax_flags & MF_PAX_RANDMMAP)) ++#endif ++ + /* requesting a specific address */ + if (addr) { + addr = PAGE_ALIGN(addr); +@@ -213,13 +221,21 @@ bottomup: + * can happen with large stack limits and large mmap() + * allocations. + */ ++ mm->mmap_base = TASK_UNMAPPED_BASE; ++ ++#ifdef CONFIG_PAX_RANDMMAP ++ if (mm->pax_flags & MF_PAX_RANDMMAP) ++ mm->mmap_base += mm->delta_mmap; ++#endif ++ ++ mm->free_area_cache = mm->mmap_base; + mm->cached_hole_size = ~0UL; +- mm->free_area_cache = TASK_UNMAPPED_BASE; + addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); + /* + * Restore the topdown base: + */ +- mm->free_area_cache = mm->mmap_base; ++ mm->mmap_base = base; ++ mm->free_area_cache = base; + mm->cached_hole_size = ~0UL; + + return addr; +diff -urNp a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S +--- a/arch/x86/kernel/syscall_table_32.S 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/syscall_table_32.S 2009-05-24 18:10:25.039209598 -0700 +@@ -1,3 +1,4 @@ ++.section .rodata,"a",@progbits + ENTRY(sys_call_table) + .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */ + .long sys_exit +diff -urNp a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c +--- a/arch/x86/kernel/time_32.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/time_32.c 2009-05-24 18:10:25.040209936 -0700 +@@ -47,22 +47,32 @@ unsigned long profile_pc(struct pt_regs + unsigned long pc = instruction_pointer(regs); + + #ifdef CONFIG_SMP +- if (!user_mode_vm(regs) && in_lock_functions(pc)) { ++ if (!user_mode(regs) && in_lock_functions(pc)) { + #ifdef CONFIG_FRAME_POINTER +- return *(unsigned long *)(regs->bp + sizeof(long)); ++ return ktla_ktva(*(unsigned long *)(regs->bp + sizeof(long))); + #else + unsigned long *sp = (unsigned long *)®s->sp; + + /* Return address is either directly at stack pointer + or above a saved flags. Eflags has bits 22-31 zero, + kernel addresses don't. */ ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ return ktla_ktva(sp[0]); ++#else + if (sp[0] >> 22) + return sp[0]; + if (sp[1] >> 22) + return sp[1]; + #endif ++ ++#endif + } + #endif ++ ++ if (!user_mode(regs)) ++ pc = ktla_ktva(pc); ++ + return pc; + } + EXPORT_SYMBOL(profile_pc); +diff -urNp a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c +--- a/arch/x86/kernel/time_64.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/time_64.c 2009-05-24 18:10:25.040209936 -0700 +@@ -34,7 +34,7 @@ unsigned long profile_pc(struct pt_regs + /* Assume the lock function has either no stack frame or a copy + of flags from PUSHF + Eflags always has bits 22 and up cleared unlike kernel addresses. */ +- if (!user_mode_vm(regs) && in_lock_functions(pc)) { ++ if (!user_mode(regs) && in_lock_functions(pc)) { + #ifdef CONFIG_FRAME_POINTER + return *(unsigned long *)(regs->bp + sizeof(long)); + #else +diff -urNp a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c +--- a/arch/x86/kernel/tlb_32.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/tlb_32.c 2009-05-24 18:10:25.040209936 -0700 +@@ -5,7 +5,7 @@ + #include <asm/tlbflush.h> + + DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) +- ____cacheline_aligned = { &init_mm, 0, }; ++ ____cacheline_aligned = { &init_mm, 0, {0} }; + + /* must come after the send_IPI functions above for inlining */ + #include <mach_ipi.h> +diff -urNp a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c +--- a/arch/x86/kernel/tls.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/tls.c 2009-05-24 18:10:25.041209645 -0700 +@@ -85,6 +85,11 @@ int do_set_thread_area(struct task_struc + if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) + return -EINVAL; + ++#ifdef CONFIG_PAX_SEGMEXEC ++ if ((p->mm->pax_flags & MF_PAX_SEGMEXEC) && (info.contents & MODIFY_LDT_CONTENTS_CODE)) ++ return -EINVAL; ++#endif ++ + set_tls_desc(p, idx, &info, 1); + + return 0; +diff -urNp a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c +--- a/arch/x86/kernel/traps.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/traps.c 2009-05-24 18:10:25.041209645 -0700 +@@ -79,14 +79,6 @@ asmlinkage int system_call(void); + + /* Do we ignore FPU interrupts ? */ + char ignore_fpu_irq; +- +-/* +- * The IDT has to be page-aligned to simplify the Pentium +- * F0 0F bug workaround.. We have a special link segment +- * for this. +- */ +-gate_desc idt_table[256] +- __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; + #endif + + static int ignore_nmis; +@@ -121,7 +113,7 @@ static inline void preempt_conditional_c + static inline void + die_if_kernel(const char *str, struct pt_regs *regs, long err) + { +- if (!user_mode_vm(regs)) ++ if (!user_mode(regs)) + die(str, regs, err); + } + +@@ -138,7 +130,7 @@ static int lazy_iobitmap_copy(void) + int cpu; + + cpu = get_cpu(); +- tss = &per_cpu(init_tss, cpu); ++ tss = init_tss + cpu; + thread = ¤t->thread; + + if (tss->x86_tss.io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY && +@@ -174,7 +166,7 @@ do_trap(int trapnr, int signr, char *str + struct task_struct *tsk = current; + + #ifdef CONFIG_X86_32 +- if (regs->flags & X86_VM_MASK) { ++ if (v8086_mode(regs)) { + /* + * traps 0, 1, 3, 4, and 5 should be forwarded to vm86. + * On nmi (interrupt 2), do_trap should not be called. +@@ -185,7 +177,7 @@ do_trap(int trapnr, int signr, char *str + } + #endif + +- if (!user_mode(regs)) ++ if (!user_mode_novm(regs)) + goto kernel_trap; + + #ifdef CONFIG_X86_32 +@@ -208,7 +200,7 @@ trap_signal: + printk_ratelimit()) { + printk(KERN_INFO + "%s[%d] trap %s ip:%lx sp:%lx error:%lx", +- tsk->comm, tsk->pid, str, ++ tsk->comm, task_pid_nr(tsk), str, + regs->ip, regs->sp, error_code); + print_vma_addr(" in ", regs->ip); + printk("\n"); +@@ -227,6 +219,12 @@ kernel_trap: + tsk->thread.trap_no = trapnr; + die(str, regs, error_code); + } ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ if (trapnr == 4) ++ pax_report_refcount_overflow(regs); ++#endif ++ + return; + + #ifdef CONFIG_X86_32 +@@ -318,14 +316,30 @@ do_general_protection(struct pt_regs *re + return; + } + +- if (regs->flags & X86_VM_MASK) ++ if (v8086_mode(regs)) + goto gp_in_vm86; + #endif + + tsk = current; +- if (!user_mode(regs)) ++ if (!user_mode_novm(regs)) + goto gp_in_kernel; + ++#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_PAGEEXEC) ++ if (!nx_enabled && tsk->mm && (tsk->mm->pax_flags & MF_PAX_PAGEEXEC)) { ++ struct mm_struct *mm = tsk->mm; ++ unsigned long limit; ++ ++ down_write(&mm->mmap_sem); ++ limit = mm->context.user_cs_limit; ++ if (limit < TASK_SIZE) { ++ track_exec_limit(mm, limit, TASK_SIZE, VM_EXEC); ++ up_write(&mm->mmap_sem); ++ return; ++ } ++ up_write(&mm->mmap_sem); ++ } ++#endif ++ + tsk->thread.error_code = error_code; + tsk->thread.trap_no = 13; + +@@ -358,6 +372,13 @@ gp_in_kernel: + if (notify_die(DIE_GPF, "general protection fault", regs, + error_code, 13, SIGSEGV) == NOTIFY_STOP) + return; ++ ++#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_KERNEXEC) ++ if ((regs->cs & 0xFFFF) == __KERNEL_CS) ++ die("PAX: suspicious general protection fault", regs, error_code); ++ else ++#endif ++ + die("general protection fault", regs, error_code); + } + +@@ -604,7 +625,7 @@ dotraplinkage void __kprobes do_debug(st + } + + #ifdef CONFIG_X86_32 +- if (regs->flags & X86_VM_MASK) ++ if (v8086_mode(regs)) + goto debug_vm86; + #endif + +@@ -616,7 +637,7 @@ dotraplinkage void __kprobes do_debug(st + * kernel space (but re-enable TF when returning to user mode). + */ + if (condition & DR_STEP) { +- if (!user_mode(regs)) ++ if (!user_mode_novm(regs)) + goto clear_TF_reenable; + } + +@@ -808,7 +829,7 @@ do_simd_coprocessor_error(struct pt_regs + * Handle strange cache flush from user space exception + * in all other cases. This is undocumented behaviour. + */ +- if (regs->flags & X86_VM_MASK) { ++ if (v8086_mode(regs)) { + handle_vm86_fault((struct kernel_vm86_regs *)regs, error_code); + return; + } +@@ -837,19 +858,14 @@ do_spurious_interrupt_bug(struct pt_regs + #ifdef CONFIG_X86_32 + unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp) + { +- struct desc_struct *gdt = get_cpu_gdt_table(smp_processor_id()); + unsigned long base = (kesp - uesp) & -THREAD_SIZE; + unsigned long new_kesp = kesp - base; + unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT; +- __u64 desc = *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS]; ++ struct desc_struct ss; + + /* Set up base for espfix segment */ +- desc &= 0x00f0ff0000000000ULL; +- desc |= ((((__u64)base) << 16) & 0x000000ffffff0000ULL) | +- ((((__u64)base) << 32) & 0xff00000000000000ULL) | +- ((((__u64)lim_pages) << 32) & 0x000f000000000000ULL) | +- (lim_pages & 0xffff); +- *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS] = desc; ++ pack_descriptor(&ss, base, lim_pages, 0x93, 0xC); ++ write_gdt_entry(get_cpu_gdt_table(smp_processor_id()), GDT_ENTRY_ESPFIX_SS, &ss, DESCTYPE_S); + + return new_kesp; + } +diff -urNp a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c +--- a/arch/x86/kernel/tsc.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/tsc.c 2009-05-24 18:10:25.042209424 -0700 +@@ -728,7 +728,7 @@ static struct dmi_system_id __initdata b + DMI_MATCH(DMI_BOARD_NAME, "2635FA0"), + }, + }, +- {} ++ { NULL, NULL, {{0, {0}}}, NULL} + }; + + /* +diff -urNp a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c +--- a/arch/x86/kernel/vm86_32.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/vm86_32.c 2009-05-24 18:10:25.043209901 -0700 +@@ -148,7 +148,7 @@ struct pt_regs *save_v86_state(struct ke + do_exit(SIGSEGV); + } + +- tss = &per_cpu(init_tss, get_cpu()); ++ tss = init_tss + get_cpu(); + current->thread.sp0 = current->thread.saved_sp0; + current->thread.sysenter_cs = __KERNEL_CS; + load_sp0(tss, ¤t->thread); +@@ -325,7 +325,7 @@ static void do_sys_vm86(struct kernel_vm + tsk->thread.saved_fs = info->regs32->fs; + savesegment(gs, tsk->thread.saved_gs); + +- tss = &per_cpu(init_tss, get_cpu()); ++ tss = init_tss + get_cpu(); + tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0; + if (cpu_has_sep) + tsk->thread.sysenter_cs = 0; +diff -urNp a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c +--- a/arch/x86/kernel/vmi_32.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/vmi_32.c 2009-05-24 18:10:25.044209820 -0700 +@@ -102,18 +102,43 @@ static unsigned patch_internal(int call, + { + u64 reloc; + struct vmi_relocation_info *const rel = (struct vmi_relocation_info *)&reloc; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++#endif ++ + reloc = call_vrom_long_func(vmi_rom, get_reloc, call); + switch(rel->type) { + case VMI_RELOCATION_CALL_REL: + BUG_ON(len < 5); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + *(char *)insnbuf = MNEM_CALL; + patch_offset(insnbuf, ip, (unsigned long)rel->eip); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + return 5; + + case VMI_RELOCATION_JUMP_REL: + BUG_ON(len < 5); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + *(char *)insnbuf = MNEM_JMP; + patch_offset(insnbuf, ip, (unsigned long)rel->eip); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + return 5; + + case VMI_RELOCATION_NOP: +@@ -526,14 +551,14 @@ static void vmi_set_pud(pud_t *pudp, pud + + static void vmi_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) + { +- const pte_t pte = { .pte = 0 }; ++ const pte_t pte = __pte(0ULL); + vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); + vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); + } + + static void vmi_pmd_clear(pmd_t *pmd) + { +- const pte_t pte = { .pte = 0 }; ++ const pte_t pte = __pte(0ULL); + vmi_check_page_type(__pa(pmd) >> PAGE_SHIFT, VMI_PAGE_PMD); + vmi_ops.set_pte(pte, (pte_t *)pmd, VMI_PAGE_PD); + } +@@ -562,8 +587,8 @@ vmi_startup_ipi_hook(int phys_apicid, un + ap.ss = __KERNEL_DS; + ap.esp = (unsigned long) start_esp; + +- ap.ds = __USER_DS; +- ap.es = __USER_DS; ++ ap.ds = __KERNEL_DS; ++ ap.es = __KERNEL_DS; + ap.fs = __KERNEL_PERCPU; + ap.gs = 0; + +@@ -758,12 +783,20 @@ static inline int __init activate_vmi(vo + u64 reloc; + const struct vmi_relocation_info *rel = (struct vmi_relocation_info *)&reloc; + ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++#endif ++ + if (call_vrom_func(vmi_rom, vmi_init) != 0) { + printk(KERN_ERR "VMI ROM failed to initialize!"); + return 0; + } + savesegment(cs, kernel_cs); + ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + pv_info.paravirt_enabled = 1; + pv_info.kernel_rpl = kernel_cs & SEGMENT_RPL_MASK; + pv_info.name = "vmi"; +@@ -954,6 +987,10 @@ static inline int __init activate_vmi(vo + + para_fill(pv_irq_ops.safe_halt, Halt); + ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + /* + * Alternative instruction rewriting doesn't happen soon enough + * to convert VMI_IRET to a call instead of a jump; so we have +diff -urNp a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S +--- a/arch/x86/kernel/vmlinux_32.lds.S 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/vmlinux_32.lds.S 2009-05-24 18:10:25.045210367 -0700 +@@ -15,6 +15,20 @@ + #include <asm/page.h> + #include <asm/cache.h> + #include <asm/boot.h> ++#include <asm/segment.h> ++ ++#ifdef CONFIG_X86_PAE ++#define PMD_SHIFT 21 ++#else ++#define PMD_SHIFT 22 ++#endif ++#define PMD_SIZE (1 << PMD_SHIFT) ++ ++#ifdef CONFIG_PAX_KERNEXEC ++#define __KERNEL_TEXT_OFFSET (__PAGE_OFFSET + (((____LOAD_PHYSICAL_ADDR + 2*(PMD_SIZE - 1)) - 1) & ~(PMD_SIZE - 1))) ++#else ++#define __KERNEL_TEXT_OFFSET 0 ++#endif + + OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") + OUTPUT_ARCH(i386) +@@ -22,81 +36,23 @@ ENTRY(phys_startup_32) + jiffies = jiffies_64; + + PHDRS { +- text PT_LOAD FLAGS(5); /* R_E */ +- data PT_LOAD FLAGS(7); /* RWE */ +- note PT_NOTE FLAGS(0); /* ___ */ ++ initdata PT_LOAD FLAGS(6); /* RW_ */ ++ percpu PT_LOAD FLAGS(6); /* RW_ */ ++ inittext PT_LOAD FLAGS(5); /* R_E */ ++ text PT_LOAD FLAGS(5); /* R_E */ ++ rodata PT_LOAD FLAGS(4); /* R__ */ ++ data PT_LOAD FLAGS(6); /* RW_ */ ++ note PT_NOTE FLAGS(0); /* ___ */ + } + SECTIONS + { +- . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR; +- phys_startup_32 = startup_32 - LOAD_OFFSET; +- +- .text.head : AT(ADDR(.text.head) - LOAD_OFFSET) { +- _text = .; /* Text and read-only data */ +- *(.text.head) +- } :text = 0x9090 +- +- /* read-only */ +- .text : AT(ADDR(.text) - LOAD_OFFSET) { +- . = ALIGN(PAGE_SIZE); /* not really needed, already page aligned */ +- *(.text.page_aligned) +- TEXT_TEXT +- SCHED_TEXT +- LOCK_TEXT +- KPROBES_TEXT +- *(.fixup) +- *(.gnu.warning) +- _etext = .; /* End of text section */ +- } :text = 0x9090 +- +- NOTES :text :note +- +- . = ALIGN(16); /* Exception table */ +- __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { +- __start___ex_table = .; +- *(__ex_table) +- __stop___ex_table = .; +- } :text = 0x9090 +- +- RODATA +- +- /* writeable */ +- . = ALIGN(PAGE_SIZE); +- .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */ +- DATA_DATA +- CONSTRUCTORS +- } :data +- +- . = ALIGN(PAGE_SIZE); +- .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { +- __nosave_begin = .; +- *(.data.nosave) +- . = ALIGN(PAGE_SIZE); +- __nosave_end = .; +- } ++ . = LOAD_OFFSET + ____LOAD_PHYSICAL_ADDR; + +- . = ALIGN(PAGE_SIZE); +- .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { +- *(.data.page_aligned) +- *(.data.idt) +- } +- +- . = ALIGN(32); +- .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { +- *(.data.cacheline_aligned) +- } +- +- /* rarely changed data like cpu maps */ +- . = ALIGN(32); +- .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { +- *(.data.read_mostly) +- _edata = .; /* End of data section */ +- } +- +- . = ALIGN(THREAD_SIZE); /* init_task */ +- .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { +- *(.data.init_task) +- } ++ .text.startup : AT(ADDR(.text.startup) - LOAD_OFFSET) { ++ __LOAD_PHYSICAL_ADDR = . - LOAD_OFFSET; ++ phys_startup_32 = startup_32 - LOAD_OFFSET + __KERNEL_TEXT_OFFSET; ++ *(.text.startup) ++ } :initdata + + /* might get freed after init */ + . = ALIGN(PAGE_SIZE); +@@ -114,14 +70,8 @@ SECTIONS + . = ALIGN(PAGE_SIZE); + + /* will be freed after init */ +- . = ALIGN(PAGE_SIZE); /* Init code and data */ +- .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { +- __init_begin = .; +- _sinittext = .; +- INIT_TEXT +- _einittext = .; +- } + .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { ++ __init_begin = .; + INIT_DATA + } + . = ALIGN(16); +@@ -161,11 +111,6 @@ SECTIONS + *(.parainstructions) + __parainstructions_end = .; + } +- /* .exit.text is discard at runtime, not link time, to deal with references +- from .altinstructions and .eh_frame */ +- .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { +- EXIT_TEXT +- } + .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { + EXIT_DATA + } +@@ -178,18 +123,138 @@ SECTIONS + } + #endif + . = ALIGN(PAGE_SIZE); +- .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { +- __per_cpu_start = .; +- *(.data.percpu.page_aligned) ++ per_cpu_start = .; ++ .data.percpu (0) : AT(ADDR(.data.percpu) - LOAD_OFFSET + per_cpu_start) { ++ __per_cpu_start = . + per_cpu_start; ++ LONG(0) + *(.data.percpu) + *(.data.percpu.shared_aligned) +- __per_cpu_end = .; +- } ++ . = ALIGN(PAGE_SIZE); ++ *(.data.percpu.page_aligned) ++ __per_cpu_end = . + per_cpu_start; ++ } :percpu ++ . += per_cpu_start; + . = ALIGN(PAGE_SIZE); + /* freed after init ends here */ + ++ . = ALIGN(PAGE_SIZE); /* Init code and data */ ++ .init.text (. - __KERNEL_TEXT_OFFSET) : AT(ADDR(.init.text) - LOAD_OFFSET + __KERNEL_TEXT_OFFSET) { ++ _sinittext = .; ++ INIT_TEXT ++ _einittext = .; ++ } :inittext ++ ++ /* .exit.text is discard at runtime, not link time, to deal with references ++ from .altinstructions and .eh_frame */ ++ .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET + __KERNEL_TEXT_OFFSET) { ++ EXIT_TEXT ++ } ++ ++ .filler : AT(ADDR(.filler) - LOAD_OFFSET + __KERNEL_TEXT_OFFSET) { ++ BYTE(0) ++ . = ALIGN(2*PMD_SIZE) - 1; ++ } ++ ++ /* freed after init ends here */ ++ ++ .text.head : AT(ADDR(.text.head) - LOAD_OFFSET + __KERNEL_TEXT_OFFSET) { ++ __init_end = . + __KERNEL_TEXT_OFFSET; ++ KERNEL_TEXT_OFFSET = . + __KERNEL_TEXT_OFFSET; ++ _text = .; /* Text and read-only data */ ++ *(.text.head) ++ } :text = 0x9090 ++ ++ /* read-only */ ++ .text : AT(ADDR(.text) - LOAD_OFFSET + __KERNEL_TEXT_OFFSET) { ++ . = ALIGN(PAGE_SIZE); /* not really needed, already page aligned */ ++ *(.text.page_aligned) ++ TEXT_TEXT ++ SCHED_TEXT ++ LOCK_TEXT ++ KPROBES_TEXT ++ *(.fixup) ++ *(.gnu.warning) ++ _etext = .; /* End of text section */ ++ } :text = 0x9090 ++ ++ . += __KERNEL_TEXT_OFFSET; ++ ++ . = ALIGN(4096); ++ NOTES :rodata :note ++ ++ . = ALIGN(16); /* Exception table */ ++ __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { ++ __start___ex_table = .; ++ *(__ex_table) ++ __stop___ex_table = .; ++ } :rodata ++ ++ RO_DATA(PAGE_SIZE) ++ ++ . = ALIGN(PAGE_SIZE); ++ .rodata.page_aligned : AT(ADDR(.rodata.page_aligned) - LOAD_OFFSET) { ++ *(.idt) ++ . = ALIGN(PAGE_SIZE); ++ *(.empty_zero_page) ++ *(.swapper_pg_pmd) ++ *(.swapper_pg_dir) ++ ++#if defined(CONFIG_PAX_KERNEXEC) && !defined(CONFIG_MODULES) ++ . = ALIGN(PMD_SIZE); ++#endif ++ ++ } ++ ++#if defined(CONFIG_PAX_KERNEXEC) && defined(CONFIG_MODULES) ++ . = ALIGN(PAGE_SIZE); ++ .module.text : AT(ADDR(.module.text) - LOAD_OFFSET) { ++ MODULES_VADDR = .; ++ BYTE(0) ++ . += (6 * 1024 * 1024); ++ . = ALIGN(PMD_SIZE); ++ MODULES_END = . - 1; ++ } ++#endif ++ ++ /* writeable */ ++ . = ALIGN(PAGE_SIZE); ++ .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */ ++ _data = .; ++ DATA_DATA ++ CONSTRUCTORS ++ } :data ++ ++ . = ALIGN(PAGE_SIZE); ++ .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { ++ __nosave_begin = .; ++ *(.data.nosave) ++ . = ALIGN(PAGE_SIZE); ++ __nosave_end = .; ++ } ++ ++ . = ALIGN(PAGE_SIZE); ++ .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { ++ *(.data.page_aligned) ++ } ++ ++ . = ALIGN(32); ++ .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { ++ *(.data.cacheline_aligned) ++ } ++ ++ /* rarely changed data like cpu maps */ ++ . = ALIGN(32); ++ .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { ++ *(.data.read_mostly) ++ _edata = .; /* End of data section */ ++ } ++ ++ . = ALIGN(THREAD_SIZE); /* init_task */ ++ .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { ++ *(.data.init_task) ++ } ++ + .bss : AT(ADDR(.bss) - LOAD_OFFSET) { +- __init_end = .; + __bss_start = .; /* BSS */ + *(.bss.page_aligned) + *(.bss) +diff -urNp a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S +--- a/arch/x86/kernel/vmlinux_64.lds.S 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/vmlinux_64.lds.S 2009-05-24 18:10:25.045210367 -0700 +@@ -16,7 +16,7 @@ jiffies_64 = jiffies; + _proxy_pda = 1; + PHDRS { + text PT_LOAD FLAGS(5); /* R_E */ +- data PT_LOAD FLAGS(7); /* RWE */ ++ data PT_LOAD FLAGS(6); /* RW_ */ + user PT_LOAD FLAGS(7); /* RWE */ + data.init PT_LOAD FLAGS(7); /* RWE */ + note PT_NOTE FLAGS(0); /* ___ */ +@@ -49,17 +49,20 @@ SECTIONS + __stop___ex_table = .; + } :text = 0x9090 + +- RODATA ++ RO_DATA(PAGE_SIZE) + ++#ifdef CONFIG_PAX_KERNEXEC ++ . = ALIGN(2*1024*1024); /* Align data segment to PMD size boundary */ ++#else + . = ALIGN(PAGE_SIZE); /* Align data segment to page size boundary */ ++#endif + /* Data */ ++ _data = .; + .data : AT(ADDR(.data) - LOAD_OFFSET) { + DATA_DATA + CONSTRUCTORS + } :data + +- _edata = .; /* End of data section */ +- + . = ALIGN(PAGE_SIZE); + . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); + .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { +@@ -70,9 +73,27 @@ SECTIONS + *(.data.read_mostly) + } + ++ . = ALIGN(THREAD_SIZE); /* init_task */ ++ .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { ++ *(.data.init_task) ++ } ++ ++ . = ALIGN(PAGE_SIZE); ++ .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { ++ *(.data.page_aligned) ++ } ++ ++ . = ALIGN(PAGE_SIZE); ++ __nosave_begin = .; ++ .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { *(.data.nosave) } ++ . = ALIGN(PAGE_SIZE); ++ __nosave_end = .; ++ ++ _edata = .; /* End of data section */ ++ + #define VSYSCALL_ADDR (-10*1024*1024) +-#define VSYSCALL_PHYS_ADDR ((LOADADDR(.data.read_mostly) + SIZEOF(.data.read_mostly) + 4095) & ~(4095)) +-#define VSYSCALL_VIRT_ADDR ((ADDR(.data.read_mostly) + SIZEOF(.data.read_mostly) + 4095) & ~(4095)) ++#define VSYSCALL_PHYS_ADDR ((LOADADDR(.data_nosave) + SIZEOF(.data_nosave) + 4095) & ~(4095)) ++#define VSYSCALL_VIRT_ADDR ((ADDR(.data_nosave) + SIZEOF(.data_nosave) + 4095) & ~(4095)) + + #define VLOAD_OFFSET (VSYSCALL_ADDR - VSYSCALL_PHYS_ADDR) + #define VLOAD(x) (ADDR(x) - VLOAD_OFFSET) +@@ -120,23 +141,13 @@ SECTIONS + #undef VVIRT_OFFSET + #undef VVIRT + +- . = ALIGN(THREAD_SIZE); /* init_task */ +- .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { +- *(.data.init_task) +- }:data.init +- +- . = ALIGN(PAGE_SIZE); +- .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { +- *(.data.page_aligned) +- } +- + /* might get freed after init */ + . = ALIGN(PAGE_SIZE); + __smp_alt_begin = .; + __smp_locks = .; + .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { + *(.smp_locks) +- } ++ } :data.init + __smp_locks_end = .; + . = ALIGN(PAGE_SIZE); + __smp_alt_end = .; +@@ -212,16 +223,11 @@ SECTIONS + . = ALIGN(PAGE_SIZE); + __init_end = .; + +- . = ALIGN(PAGE_SIZE); +- __nosave_begin = .; +- .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { *(.data.nosave) } +- . = ALIGN(PAGE_SIZE); +- __nosave_end = .; +- + __bss_start = .; /* BSS */ + .bss : AT(ADDR(.bss) - LOAD_OFFSET) { + *(.bss.page_aligned) + *(.bss) ++ . = ALIGN(2*1024*1024); + } + __bss_stop = .; + +diff -urNp a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c +--- a/arch/x86/kernel/vsyscall_64.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kernel/vsyscall_64.c 2009-05-24 18:10:25.046209657 -0700 +@@ -236,13 +236,13 @@ static ctl_table kernel_table2[] = { + .data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = vsyscall_sysctl_change }, +- {} ++ { 0, NULL, NULL, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL } + }; + + static ctl_table kernel_root_table2[] = { + { .ctl_name = CTL_KERN, .procname = "kernel", .mode = 0555, + .child = kernel_table2 }, +- {} ++ { 0, NULL, NULL, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL } + }; + #endif + +diff -urNp a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c +--- a/arch/x86/kvm/svm.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kvm/svm.c 2009-05-24 18:10:25.047209645 -0700 +@@ -1521,7 +1521,19 @@ static void reload_tss(struct kvm_vcpu * + int cpu = raw_smp_processor_id(); + + struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++ ++ pax_open_kernel(cr0); ++#endif ++ + svm_data->tss_desc->type = 9; /* available 32/64-bit TSS */ ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + load_TR_desc(); + } + +@@ -1927,7 +1939,7 @@ static int get_npt_level(void) + #endif + } + +-static struct kvm_x86_ops svm_x86_ops = { ++static const struct kvm_x86_ops svm_x86_ops = { + .cpu_has_kvm_support = has_svm, + .disabled_by_bios = is_disabled, + .hardware_setup = svm_hardware_setup, +diff -urNp a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c +--- a/arch/x86/kvm/vmx.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kvm/vmx.c 2009-05-24 18:10:25.048209704 -0700 +@@ -122,7 +122,7 @@ static struct vmcs_config { + u32 vmentry_ctrl; + } vmcs_config; + +-struct vmx_capability { ++static struct vmx_capability { + u32 ept; + u32 vpid; + } vmx_capability; +@@ -491,9 +491,23 @@ static void reload_tss(void) + struct descriptor_table gdt; + struct desc_struct *descs; + ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++#endif ++ + kvm_get_gdt(&gdt); + descs = (void *)gdt.base; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + descs[GDT_ENTRY_TSS].type = 9; /* available TSS */ ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + load_TR_desc(); + } + +@@ -2165,7 +2179,7 @@ static int vmx_vcpu_setup(struct vcpu_vm + vmcs_writel(HOST_IDTR_BASE, dt.base); /* 22.2.4 */ + + asm("mov $.Lkvm_vmx_return, %0" : "=r"(kvm_vmx_return)); +- vmcs_writel(HOST_RIP, kvm_vmx_return); /* 22.2.5 */ ++ vmcs_writel(HOST_RIP, ktla_ktva(kvm_vmx_return)); /* 22.2.5 */ + vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0); + vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0); + vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0); +@@ -3266,6 +3280,12 @@ static void vmx_vcpu_run(struct kvm_vcpu + "jmp .Lkvm_vmx_return \n\t" + ".Llaunched: " __ex(ASM_VMX_VMRESUME) "\n\t" + ".Lkvm_vmx_return: " ++ ++#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_KERNEXEC) ++ "ljmp %[cs],$.Lkvm_vmx_return2\n\t" ++ ".Lkvm_vmx_return2: " ++#endif ++ + /* Save guest registers, load host registers, keep flags */ + "xchg %0, (%%"R"sp) \n\t" + "mov %%"R"ax, %c[rax](%0) \n\t" +@@ -3312,6 +3332,11 @@ static void vmx_vcpu_run(struct kvm_vcpu + [r15]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R15])), + #endif + [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)) ++ ++#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_KERNEXEC) ++ ,[cs]"i"(__KERNEL_CS) ++#endif ++ + : "cc", "memory" + , R"bx", R"di", R"si" + #ifdef CONFIG_X86_64 +@@ -3330,7 +3355,7 @@ static void vmx_vcpu_run(struct kvm_vcpu + (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & + (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)) == 0; + +- asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); ++ asm("mov %0, %%ds; mov %0, %%es" : : "r"(__KERNEL_DS)); + vmx->launched = 1; + + intr_info = vmcs_read32(VM_EXIT_INTR_INFO); +@@ -3454,7 +3479,7 @@ static int get_ept_level(void) + return VMX_EPT_DEFAULT_GAW + 1; + } + +-static struct kvm_x86_ops vmx_x86_ops = { ++static const struct kvm_x86_ops vmx_x86_ops = { + .cpu_has_kvm_support = cpu_has_kvm_support, + .disabled_by_bios = vmx_disabled_by_bios, + .hardware_setup = hardware_setup, +diff -urNp a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +--- a/arch/x86/kvm/x86.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/kvm/x86.c 2009-05-24 18:10:25.050209541 -0700 +@@ -68,41 +68,41 @@ static u64 __read_mostly efer_reserved_b + static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, + struct kvm_cpuid_entry2 __user *entries); + +-struct kvm_x86_ops *kvm_x86_ops; ++const struct kvm_x86_ops *kvm_x86_ops; + EXPORT_SYMBOL_GPL(kvm_x86_ops); + + struct kvm_stats_debugfs_item debugfs_entries[] = { +- { "pf_fixed", VCPU_STAT(pf_fixed) }, +- { "pf_guest", VCPU_STAT(pf_guest) }, +- { "tlb_flush", VCPU_STAT(tlb_flush) }, +- { "invlpg", VCPU_STAT(invlpg) }, +- { "exits", VCPU_STAT(exits) }, +- { "io_exits", VCPU_STAT(io_exits) }, +- { "mmio_exits", VCPU_STAT(mmio_exits) }, +- { "signal_exits", VCPU_STAT(signal_exits) }, +- { "irq_window", VCPU_STAT(irq_window_exits) }, +- { "nmi_window", VCPU_STAT(nmi_window_exits) }, +- { "halt_exits", VCPU_STAT(halt_exits) }, +- { "halt_wakeup", VCPU_STAT(halt_wakeup) }, +- { "hypercalls", VCPU_STAT(hypercalls) }, +- { "request_irq", VCPU_STAT(request_irq_exits) }, +- { "irq_exits", VCPU_STAT(irq_exits) }, +- { "host_state_reload", VCPU_STAT(host_state_reload) }, +- { "efer_reload", VCPU_STAT(efer_reload) }, +- { "fpu_reload", VCPU_STAT(fpu_reload) }, +- { "insn_emulation", VCPU_STAT(insn_emulation) }, +- { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) }, +- { "irq_injections", VCPU_STAT(irq_injections) }, +- { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) }, +- { "mmu_pte_write", VM_STAT(mmu_pte_write) }, +- { "mmu_pte_updated", VM_STAT(mmu_pte_updated) }, +- { "mmu_pde_zapped", VM_STAT(mmu_pde_zapped) }, +- { "mmu_flooded", VM_STAT(mmu_flooded) }, +- { "mmu_recycled", VM_STAT(mmu_recycled) }, +- { "mmu_cache_miss", VM_STAT(mmu_cache_miss) }, +- { "mmu_unsync", VM_STAT(mmu_unsync) }, +- { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, +- { "largepages", VM_STAT(lpages) }, ++ { "pf_fixed", VCPU_STAT(pf_fixed), NULL }, ++ { "pf_guest", VCPU_STAT(pf_guest), NULL }, ++ { "tlb_flush", VCPU_STAT(tlb_flush), NULL }, ++ { "invlpg", VCPU_STAT(invlpg), NULL }, ++ { "exits", VCPU_STAT(exits), NULL }, ++ { "io_exits", VCPU_STAT(io_exits), NULL }, ++ { "mmio_exits", VCPU_STAT(mmio_exits), NULL }, ++ { "signal_exits", VCPU_STAT(signal_exits), NULL }, ++ { "irq_window", VCPU_STAT(irq_window_exits), NULL }, ++ { "nmi_window", VCPU_STAT(nmi_window_exits), NULL }, ++ { "halt_exits", VCPU_STAT(halt_exits), NULL }, ++ { "halt_wakeup", VCPU_STAT(halt_wakeup), NULL }, ++ { "hypercalls", VCPU_STAT(hypercalls), NULL }, ++ { "request_irq", VCPU_STAT(request_irq_exits), NULL }, ++ { "irq_exits", VCPU_STAT(irq_exits), NULL }, ++ { "host_state_reload", VCPU_STAT(host_state_reload), NULL }, ++ { "efer_reload", VCPU_STAT(efer_reload), NULL }, ++ { "fpu_reload", VCPU_STAT(fpu_reload), NULL }, ++ { "insn_emulation", VCPU_STAT(insn_emulation), NULL }, ++ { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail), NULL }, ++ { "irq_injections", VCPU_STAT(irq_injections), NULL }, ++ { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped), NULL }, ++ { "mmu_pte_write", VM_STAT(mmu_pte_write), NULL }, ++ { "mmu_pte_updated", VM_STAT(mmu_pte_updated), NULL }, ++ { "mmu_pde_zapped", VM_STAT(mmu_pde_zapped), NULL }, ++ { "mmu_flooded", VM_STAT(mmu_flooded), NULL }, ++ { "mmu_recycled", VM_STAT(mmu_recycled), NULL }, ++ { "mmu_cache_miss", VM_STAT(mmu_cache_miss), NULL }, ++ { "mmu_unsync", VM_STAT(mmu_unsync), NULL }, ++ { "remote_tlb_flush", VM_STAT(remote_tlb_flush), NULL }, ++ { "largepages", VM_STAT(lpages), NULL }, + { NULL } + }; + +@@ -1307,7 +1307,7 @@ static int kvm_vcpu_ioctl_set_lapic(stru + static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, + struct kvm_interrupt *irq) + { +- if (irq->irq < 0 || irq->irq >= 256) ++ if (irq->irq >= 256) + return -EINVAL; + if (irqchip_in_kernel(vcpu->kvm)) + return -ENXIO; +@@ -2512,10 +2512,10 @@ int kvm_emulate_pio_string(struct kvm_vc + } + EXPORT_SYMBOL_GPL(kvm_emulate_pio_string); + +-int kvm_arch_init(void *opaque) ++int kvm_arch_init(const void *opaque) + { + int r; +- struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque; ++ const struct kvm_x86_ops *ops = (const struct kvm_x86_ops *)opaque; + + if (kvm_x86_ops) { + printk(KERN_ERR "kvm: already loaded the other module\n"); +diff -urNp a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S +--- a/arch/x86/lib/checksum_32.S 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/lib/checksum_32.S 2009-05-24 18:10:25.052006085 -0700 +@@ -28,7 +28,8 @@ + #include <linux/linkage.h> + #include <asm/dwarf2.h> + #include <asm/errno.h> +- ++#include <asm/segment.h> ++ + /* + * computes a partial checksum, e.g. for TCP/UDP fragments + */ +@@ -304,9 +305,22 @@ unsigned int csum_partial_copy_generic ( + + #define ARGBASE 16 + #define FP 12 +- +-ENTRY(csum_partial_copy_generic) ++ ++ENTRY(csum_partial_copy_generic_to_user) + CFI_STARTPROC ++ pushl $(__USER_DS) ++ CFI_ADJUST_CFA_OFFSET 4 ++ popl %es ++ CFI_ADJUST_CFA_OFFSET -4 ++ jmp csum_partial_copy_generic ++ ++ENTRY(csum_partial_copy_generic_from_user) ++ pushl $(__USER_DS) ++ CFI_ADJUST_CFA_OFFSET 4 ++ popl %ds ++ CFI_ADJUST_CFA_OFFSET -4 ++ ++ENTRY(csum_partial_copy_generic) + subl $4,%esp + CFI_ADJUST_CFA_OFFSET 4 + pushl %edi +@@ -331,7 +345,7 @@ ENTRY(csum_partial_copy_generic) + jmp 4f + SRC(1: movw (%esi), %bx ) + addl $2, %esi +-DST( movw %bx, (%edi) ) ++DST( movw %bx, %es:(%edi) ) + addl $2, %edi + addw %bx, %ax + adcl $0, %eax +@@ -343,30 +357,30 @@ DST( movw %bx, (%edi) ) + SRC(1: movl (%esi), %ebx ) + SRC( movl 4(%esi), %edx ) + adcl %ebx, %eax +-DST( movl %ebx, (%edi) ) ++DST( movl %ebx, %es:(%edi) ) + adcl %edx, %eax +-DST( movl %edx, 4(%edi) ) ++DST( movl %edx, %es:4(%edi) ) + + SRC( movl 8(%esi), %ebx ) + SRC( movl 12(%esi), %edx ) + adcl %ebx, %eax +-DST( movl %ebx, 8(%edi) ) ++DST( movl %ebx, %es:8(%edi) ) + adcl %edx, %eax +-DST( movl %edx, 12(%edi) ) ++DST( movl %edx, %es:12(%edi) ) + + SRC( movl 16(%esi), %ebx ) + SRC( movl 20(%esi), %edx ) + adcl %ebx, %eax +-DST( movl %ebx, 16(%edi) ) ++DST( movl %ebx, %es:16(%edi) ) + adcl %edx, %eax +-DST( movl %edx, 20(%edi) ) ++DST( movl %edx, %es:20(%edi) ) + + SRC( movl 24(%esi), %ebx ) + SRC( movl 28(%esi), %edx ) + adcl %ebx, %eax +-DST( movl %ebx, 24(%edi) ) ++DST( movl %ebx, %es:24(%edi) ) + adcl %edx, %eax +-DST( movl %edx, 28(%edi) ) ++DST( movl %edx, %es:28(%edi) ) + + lea 32(%esi), %esi + lea 32(%edi), %edi +@@ -380,7 +394,7 @@ DST( movl %edx, 28(%edi) ) + shrl $2, %edx # This clears CF + SRC(3: movl (%esi), %ebx ) + adcl %ebx, %eax +-DST( movl %ebx, (%edi) ) ++DST( movl %ebx, %es:(%edi) ) + lea 4(%esi), %esi + lea 4(%edi), %edi + dec %edx +@@ -392,12 +406,12 @@ DST( movl %ebx, (%edi) ) + jb 5f + SRC( movw (%esi), %cx ) + leal 2(%esi), %esi +-DST( movw %cx, (%edi) ) ++DST( movw %cx, %es:(%edi) ) + leal 2(%edi), %edi + je 6f + shll $16,%ecx + SRC(5: movb (%esi), %cl ) +-DST( movb %cl, (%edi) ) ++DST( movb %cl, %es:(%edi) ) + 6: addl %ecx, %eax + adcl $0, %eax + 7: +@@ -408,7 +422,7 @@ DST( movb %cl, (%edi) ) + + 6001: + movl ARGBASE+20(%esp), %ebx # src_err_ptr +- movl $-EFAULT, (%ebx) ++ movl $-EFAULT, %ss:(%ebx) + + # zero the complete destination - computing the rest + # is too much work +@@ -421,11 +435,19 @@ DST( movb %cl, (%edi) ) + + 6002: + movl ARGBASE+24(%esp), %ebx # dst_err_ptr +- movl $-EFAULT,(%ebx) ++ movl $-EFAULT,%ss:(%ebx) + jmp 5000b + + .previous + ++ pushl %ss ++ CFI_ADJUST_CFA_OFFSET 4 ++ popl %ds ++ CFI_ADJUST_CFA_OFFSET -4 ++ pushl %ss ++ CFI_ADJUST_CFA_OFFSET 4 ++ popl %es ++ CFI_ADJUST_CFA_OFFSET -4 + popl %ebx + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE ebx +@@ -439,26 +461,41 @@ DST( movb %cl, (%edi) ) + CFI_ADJUST_CFA_OFFSET -4 + ret + CFI_ENDPROC +-ENDPROC(csum_partial_copy_generic) ++ENDPROC(csum_partial_copy_generic_to_user) + + #else + + /* Version for PentiumII/PPro */ + + #define ROUND1(x) \ ++ nop; nop; nop; \ + SRC(movl x(%esi), %ebx ) ; \ + addl %ebx, %eax ; \ +- DST(movl %ebx, x(%edi) ) ; ++ DST(movl %ebx, %es:x(%edi)) ; + + #define ROUND(x) \ ++ nop; nop; nop; \ + SRC(movl x(%esi), %ebx ) ; \ + adcl %ebx, %eax ; \ +- DST(movl %ebx, x(%edi) ) ; ++ DST(movl %ebx, %es:x(%edi)) ; + + #define ARGBASE 12 +- +-ENTRY(csum_partial_copy_generic) ++ ++ENTRY(csum_partial_copy_generic_to_user) + CFI_STARTPROC ++ pushl $(__USER_DS) ++ CFI_ADJUST_CFA_OFFSET 4 ++ popl %es ++ CFI_ADJUST_CFA_OFFSET -4 ++ jmp csum_partial_copy_generic ++ ++ENTRY(csum_partial_copy_generic_from_user) ++ pushl $(__USER_DS) ++ CFI_ADJUST_CFA_OFFSET 4 ++ popl %ds ++ CFI_ADJUST_CFA_OFFSET -4 ++ ++ENTRY(csum_partial_copy_generic) + pushl %ebx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ebx, 0 +@@ -482,7 +519,7 @@ ENTRY(csum_partial_copy_generic) + subl %ebx, %edi + lea -1(%esi),%edx + andl $-32,%edx +- lea 3f(%ebx,%ebx), %ebx ++ lea 3f(%ebx,%ebx,2), %ebx + testl %esi, %esi + jmp *%ebx + 1: addl $64,%esi +@@ -503,19 +540,19 @@ ENTRY(csum_partial_copy_generic) + jb 5f + SRC( movw (%esi), %dx ) + leal 2(%esi), %esi +-DST( movw %dx, (%edi) ) ++DST( movw %dx, %es:(%edi) ) + leal 2(%edi), %edi + je 6f + shll $16,%edx + 5: + SRC( movb (%esi), %dl ) +-DST( movb %dl, (%edi) ) ++DST( movb %dl, %es:(%edi) ) + 6: addl %edx, %eax + adcl $0, %eax + 7: + .section .fixup, "ax" + 6001: movl ARGBASE+20(%esp), %ebx # src_err_ptr +- movl $-EFAULT, (%ebx) ++ movl $-EFAULT, %ss:(%ebx) + # zero the complete destination (computing the rest is too much work) + movl ARGBASE+8(%esp),%edi # dst + movl ARGBASE+12(%esp),%ecx # len +@@ -523,10 +560,18 @@ DST( movb %dl, (%edi) ) + rep; stosb + jmp 7b + 6002: movl ARGBASE+24(%esp), %ebx # dst_err_ptr +- movl $-EFAULT, (%ebx) ++ movl $-EFAULT, %ss:(%ebx) + jmp 7b + .previous + ++ pushl %ss ++ CFI_ADJUST_CFA_OFFSET 4 ++ popl %ds ++ CFI_ADJUST_CFA_OFFSET -4 ++ pushl %ss ++ CFI_ADJUST_CFA_OFFSET 4 ++ popl %es ++ CFI_ADJUST_CFA_OFFSET -4 + popl %esi + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE esi +@@ -538,7 +583,7 @@ DST( movb %dl, (%edi) ) + CFI_RESTORE ebx + ret + CFI_ENDPROC +-ENDPROC(csum_partial_copy_generic) ++ENDPROC(csum_partial_copy_generic_to_user) + + #undef ROUND + #undef ROUND1 +diff -urNp a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S +--- a/arch/x86/lib/clear_page_64.S 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/lib/clear_page_64.S 2009-05-24 18:10:25.052006085 -0700 +@@ -44,7 +44,7 @@ ENDPROC(clear_page) + + #include <asm/cpufeature.h> + +- .section .altinstr_replacement,"ax" ++ .section .altinstr_replacement,"a" + 1: .byte 0xeb /* jmp <disp8> */ + .byte (clear_page_c - clear_page) - (2f - 1b) /* offset */ + 2: +diff -urNp a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S +--- a/arch/x86/lib/copy_page_64.S 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/lib/copy_page_64.S 2009-05-24 18:10:25.055406562 -0700 +@@ -104,7 +104,7 @@ ENDPROC(copy_page) + + #include <asm/cpufeature.h> + +- .section .altinstr_replacement,"ax" ++ .section .altinstr_replacement,"a" + 1: .byte 0xeb /* jmp <disp8> */ + .byte (copy_page_c - copy_page) - (2f - 1b) /* offset */ + 2: +diff -urNp a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S +--- a/arch/x86/lib/copy_user_64.S 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/lib/copy_user_64.S 2009-05-24 18:10:25.056209611 -0700 +@@ -21,7 +21,7 @@ + .byte 0xe9 /* 32bit jump */ + .long \orig-1f /* by default jump to orig */ + 1: +- .section .altinstr_replacement,"ax" ++ .section .altinstr_replacement,"a" + 2: .byte 0xe9 /* near jump with 32bit immediate */ + .long \alt-1b /* offset */ /* or alternatively to alt */ + .previous +@@ -106,6 +106,8 @@ ENDPROC(__copy_from_user_inatomic) + ENTRY(bad_from_user) + bad_from_user: + CFI_STARTPROC ++ testl %edx,%edx ++ js bad_to_user + movl %edx,%ecx + xorl %eax,%eax + rep +diff -urNp a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S +--- a/arch/x86/lib/getuser.S 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/lib/getuser.S 2009-05-24 18:10:25.056209611 -0700 +@@ -33,6 +33,7 @@ + #include <asm/asm-offsets.h> + #include <asm/thread_info.h> + #include <asm/asm.h> ++#include <asm/segment.h> + + .text + ENTRY(__get_user_1) +@@ -40,7 +41,19 @@ ENTRY(__get_user_1) + GET_THREAD_INFO(%_ASM_DX) + cmp TI_addr_limit(%_ASM_DX),%_ASM_AX + jae bad_get_user ++ ++#ifdef CONFIG_X86_32 ++ pushl $(__USER_DS) ++ popl %ds ++#endif ++ + 1: movzb (%_ASM_AX),%edx ++ ++#ifdef CONFIG_X86_32 ++ pushl %ss ++ pop %ds ++#endif ++ + xor %eax,%eax + ret + CFI_ENDPROC +@@ -53,7 +66,19 @@ ENTRY(__get_user_2) + GET_THREAD_INFO(%_ASM_DX) + cmp TI_addr_limit(%_ASM_DX),%_ASM_AX + jae bad_get_user ++ ++#ifdef CONFIG_X86_32 ++ pushl $(__USER_DS) ++ popl %ds ++#endif ++ + 2: movzwl -1(%_ASM_AX),%edx ++ ++#ifdef CONFIG_X86_32 ++ pushl %ss ++ pop %ds ++#endif ++ + xor %eax,%eax + ret + CFI_ENDPROC +@@ -66,7 +91,19 @@ ENTRY(__get_user_4) + GET_THREAD_INFO(%_ASM_DX) + cmp TI_addr_limit(%_ASM_DX),%_ASM_AX + jae bad_get_user ++ ++#ifdef CONFIG_X86_32 ++ pushl $(__USER_DS) ++ popl %ds ++#endif ++ + 3: mov -3(%_ASM_AX),%edx ++ ++#ifdef CONFIG_X86_32 ++ pushl %ss ++ pop %ds ++#endif ++ + xor %eax,%eax + ret + CFI_ENDPROC +@@ -89,6 +126,12 @@ ENDPROC(__get_user_8) + + bad_get_user: + CFI_STARTPROC ++ ++#ifdef CONFIG_X86_32 ++ pushl %ss ++ pop %ds ++#endif ++ + xor %edx,%edx + mov $(-EFAULT),%_ASM_AX + ret +diff -urNp a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S +--- a/arch/x86/lib/memcpy_64.S 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/lib/memcpy_64.S 2009-05-24 18:10:25.057209600 -0700 +@@ -114,7 +114,7 @@ ENDPROC(__memcpy) + /* Some CPUs run faster using the string copy instructions. + It is also a lot simpler. Use this when possible */ + +- .section .altinstr_replacement,"ax" ++ .section .altinstr_replacement,"a" + 1: .byte 0xeb /* jmp <disp8> */ + .byte (memcpy_c - memcpy) - (2f - 1b) /* offset */ + 2: +diff -urNp a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S +--- a/arch/x86/lib/memset_64.S 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/lib/memset_64.S 2009-05-24 18:10:25.057209600 -0700 +@@ -118,7 +118,7 @@ ENDPROC(__memset) + + #include <asm/cpufeature.h> + +- .section .altinstr_replacement,"ax" ++ .section .altinstr_replacement,"a" + 1: .byte 0xeb /* jmp <disp8> */ + .byte (memset_c - memset) - (2f - 1b) /* offset */ + 2: +diff -urNp a/arch/x86/lib/mmx_32.c b/arch/x86/lib/mmx_32.c +--- a/arch/x86/lib/mmx_32.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/lib/mmx_32.c 2009-05-24 18:10:25.058209378 -0700 +@@ -29,6 +29,7 @@ void *_mmx_memcpy(void *to, const void * + { + void *p; + int i; ++ unsigned long cr0; + + if (unlikely(in_interrupt())) + return __memcpy(to, from, len); +@@ -39,44 +40,72 @@ void *_mmx_memcpy(void *to, const void * + kernel_fpu_begin(); + + __asm__ __volatile__ ( +- "1: prefetch (%0)\n" /* This set is 28 bytes */ +- " prefetch 64(%0)\n" +- " prefetch 128(%0)\n" +- " prefetch 192(%0)\n" +- " prefetch 256(%0)\n" ++ "1: prefetch (%1)\n" /* This set is 28 bytes */ ++ " prefetch 64(%1)\n" ++ " prefetch 128(%1)\n" ++ " prefetch 192(%1)\n" ++ " prefetch 256(%1)\n" + "2: \n" + ".section .fixup, \"ax\"\n" +- "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ ++ "3: \n" ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ " movl %%cr0, %0\n" ++ " movl %0, %%eax\n" ++ " andl $0xFFFEFFFF, %%eax\n" ++ " movl %%eax, %%cr0\n" ++#endif ++ ++ " movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ " movl %0, %%cr0\n" ++#endif ++ + " jmp 2b\n" + ".previous\n" + _ASM_EXTABLE(1b, 3b) +- : : "r" (from)); ++ : "=&r" (cr0) : "r" (from) : "ax"); + + for ( ; i > 5; i--) { + __asm__ __volatile__ ( +- "1: prefetch 320(%0)\n" +- "2: movq (%0), %%mm0\n" +- " movq 8(%0), %%mm1\n" +- " movq 16(%0), %%mm2\n" +- " movq 24(%0), %%mm3\n" +- " movq %%mm0, (%1)\n" +- " movq %%mm1, 8(%1)\n" +- " movq %%mm2, 16(%1)\n" +- " movq %%mm3, 24(%1)\n" +- " movq 32(%0), %%mm0\n" +- " movq 40(%0), %%mm1\n" +- " movq 48(%0), %%mm2\n" +- " movq 56(%0), %%mm3\n" +- " movq %%mm0, 32(%1)\n" +- " movq %%mm1, 40(%1)\n" +- " movq %%mm2, 48(%1)\n" +- " movq %%mm3, 56(%1)\n" ++ "1: prefetch 320(%1)\n" ++ "2: movq (%1), %%mm0\n" ++ " movq 8(%1), %%mm1\n" ++ " movq 16(%1), %%mm2\n" ++ " movq 24(%1), %%mm3\n" ++ " movq %%mm0, (%2)\n" ++ " movq %%mm1, 8(%2)\n" ++ " movq %%mm2, 16(%2)\n" ++ " movq %%mm3, 24(%2)\n" ++ " movq 32(%1), %%mm0\n" ++ " movq 40(%1), %%mm1\n" ++ " movq 48(%1), %%mm2\n" ++ " movq 56(%1), %%mm3\n" ++ " movq %%mm0, 32(%2)\n" ++ " movq %%mm1, 40(%2)\n" ++ " movq %%mm2, 48(%2)\n" ++ " movq %%mm3, 56(%2)\n" + ".section .fixup, \"ax\"\n" +- "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ ++ "3:\n" ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ " movl %%cr0, %0\n" ++ " movl %0, %%eax\n" ++ " andl $0xFFFEFFFF, %%eax\n" ++ " movl %%eax, %%cr0\n" ++#endif ++ ++ " movw $0x05EB, 1b\n" /* jmp on 5 bytes */ ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ " movl %0, %%cr0\n" ++#endif ++ + " jmp 2b\n" + ".previous\n" + _ASM_EXTABLE(1b, 3b) +- : : "r" (from), "r" (to) : "memory"); ++ : "=&r" (cr0) : "r" (from), "r" (to) : "memory", "ax"); + + from += 64; + to += 64; +@@ -158,6 +187,7 @@ static void fast_clear_page(void *page) + static void fast_copy_page(void *to, void *from) + { + int i; ++ unsigned long cr0; + + kernel_fpu_begin(); + +@@ -166,42 +196,70 @@ static void fast_copy_page(void *to, voi + * but that is for later. -AV + */ + __asm__ __volatile__( +- "1: prefetch (%0)\n" +- " prefetch 64(%0)\n" +- " prefetch 128(%0)\n" +- " prefetch 192(%0)\n" +- " prefetch 256(%0)\n" ++ "1: prefetch (%1)\n" ++ " prefetch 64(%1)\n" ++ " prefetch 128(%1)\n" ++ " prefetch 192(%1)\n" ++ " prefetch 256(%1)\n" + "2: \n" + ".section .fixup, \"ax\"\n" +- "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ ++ "3: \n" ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ " movl %%cr0, %0\n" ++ " movl %0, %%eax\n" ++ " andl $0xFFFEFFFF, %%eax\n" ++ " movl %%eax, %%cr0\n" ++#endif ++ ++ " movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ " movl %0, %%cr0\n" ++#endif ++ + " jmp 2b\n" + ".previous\n" +- _ASM_EXTABLE(1b, 3b) : : "r" (from)); ++ _ASM_EXTABLE(1b, 3b) : "=&r" (cr0) : "r" (from) : "ax"); + + for (i = 0; i < (4096-320)/64; i++) { + __asm__ __volatile__ ( +- "1: prefetch 320(%0)\n" +- "2: movq (%0), %%mm0\n" +- " movntq %%mm0, (%1)\n" +- " movq 8(%0), %%mm1\n" +- " movntq %%mm1, 8(%1)\n" +- " movq 16(%0), %%mm2\n" +- " movntq %%mm2, 16(%1)\n" +- " movq 24(%0), %%mm3\n" +- " movntq %%mm3, 24(%1)\n" +- " movq 32(%0), %%mm4\n" +- " movntq %%mm4, 32(%1)\n" +- " movq 40(%0), %%mm5\n" +- " movntq %%mm5, 40(%1)\n" +- " movq 48(%0), %%mm6\n" +- " movntq %%mm6, 48(%1)\n" +- " movq 56(%0), %%mm7\n" +- " movntq %%mm7, 56(%1)\n" ++ "1: prefetch 320(%1)\n" ++ "2: movq (%1), %%mm0\n" ++ " movntq %%mm0, (%2)\n" ++ " movq 8(%1), %%mm1\n" ++ " movntq %%mm1, 8(%2)\n" ++ " movq 16(%1), %%mm2\n" ++ " movntq %%mm2, 16(%2)\n" ++ " movq 24(%1), %%mm3\n" ++ " movntq %%mm3, 24(%2)\n" ++ " movq 32(%1), %%mm4\n" ++ " movntq %%mm4, 32(%2)\n" ++ " movq 40(%1), %%mm5\n" ++ " movntq %%mm5, 40(%2)\n" ++ " movq 48(%1), %%mm6\n" ++ " movntq %%mm6, 48(%2)\n" ++ " movq 56(%1), %%mm7\n" ++ " movntq %%mm7, 56(%2)\n" + ".section .fixup, \"ax\"\n" +- "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ ++ "3:\n" ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ " movl %%cr0, %0\n" ++ " movl %0, %%eax\n" ++ " andl $0xFFFEFFFF, %%eax\n" ++ " movl %%eax, %%cr0\n" ++#endif ++ ++ " movw $0x05EB, 1b\n" /* jmp on 5 bytes */ ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ " movl %0, %%cr0\n" ++#endif ++ + " jmp 2b\n" + ".previous\n" +- _ASM_EXTABLE(1b, 3b) : : "r" (from), "r" (to) : "memory"); ++ _ASM_EXTABLE(1b, 3b) : "=&r" (cr0) : "r" (from), "r" (to) : "memory", "ax"); + + from += 64; + to += 64; +@@ -280,47 +338,76 @@ static void fast_clear_page(void *page) + static void fast_copy_page(void *to, void *from) + { + int i; ++ unsigned long cr0; + + kernel_fpu_begin(); + + __asm__ __volatile__ ( +- "1: prefetch (%0)\n" +- " prefetch 64(%0)\n" +- " prefetch 128(%0)\n" +- " prefetch 192(%0)\n" +- " prefetch 256(%0)\n" ++ "1: prefetch (%1)\n" ++ " prefetch 64(%1)\n" ++ " prefetch 128(%1)\n" ++ " prefetch 192(%1)\n" ++ " prefetch 256(%1)\n" + "2: \n" + ".section .fixup, \"ax\"\n" +- "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ ++ "3: \n" ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ " movl %%cr0, %0\n" ++ " movl %0, %%eax\n" ++ " andl $0xFFFEFFFF, %%eax\n" ++ " movl %%eax, %%cr0\n" ++#endif ++ ++ " movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ " movl %0, %%cr0\n" ++#endif ++ + " jmp 2b\n" + ".previous\n" +- _ASM_EXTABLE(1b, 3b) : : "r" (from)); ++ _ASM_EXTABLE(1b, 3b) : "=&r" (cr0) : "r" (from) : "ax"); + + for (i = 0; i < 4096/64; i++) { + __asm__ __volatile__ ( +- "1: prefetch 320(%0)\n" +- "2: movq (%0), %%mm0\n" +- " movq 8(%0), %%mm1\n" +- " movq 16(%0), %%mm2\n" +- " movq 24(%0), %%mm3\n" +- " movq %%mm0, (%1)\n" +- " movq %%mm1, 8(%1)\n" +- " movq %%mm2, 16(%1)\n" +- " movq %%mm3, 24(%1)\n" +- " movq 32(%0), %%mm0\n" +- " movq 40(%0), %%mm1\n" +- " movq 48(%0), %%mm2\n" +- " movq 56(%0), %%mm3\n" +- " movq %%mm0, 32(%1)\n" +- " movq %%mm1, 40(%1)\n" +- " movq %%mm2, 48(%1)\n" +- " movq %%mm3, 56(%1)\n" ++ "1: prefetch 320(%1)\n" ++ "2: movq (%1), %%mm0\n" ++ " movq 8(%1), %%mm1\n" ++ " movq 16(%1), %%mm2\n" ++ " movq 24(%1), %%mm3\n" ++ " movq %%mm0, (%2)\n" ++ " movq %%mm1, 8(%2)\n" ++ " movq %%mm2, 16(%2)\n" ++ " movq %%mm3, 24(%2)\n" ++ " movq 32(%1), %%mm0\n" ++ " movq 40(%1), %%mm1\n" ++ " movq 48(%1), %%mm2\n" ++ " movq 56(%1), %%mm3\n" ++ " movq %%mm0, 32(%2)\n" ++ " movq %%mm1, 40(%2)\n" ++ " movq %%mm2, 48(%2)\n" ++ " movq %%mm3, 56(%2)\n" + ".section .fixup, \"ax\"\n" +- "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ ++ "3:\n" ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ " movl %%cr0, %0\n" ++ " movl %0, %%eax\n" ++ " andl $0xFFFEFFFF, %%eax\n" ++ " movl %%eax, %%cr0\n" ++#endif ++ ++ " movw $0x05EB, 1b\n" /* jmp on 5 bytes */ ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ " movl %0, %%cr0\n" ++#endif ++ + " jmp 2b\n" + ".previous\n" + _ASM_EXTABLE(1b, 3b) +- : : "r" (from), "r" (to) : "memory"); ++ : "=&r" (cr0) : "r" (from), "r" (to) : "memory", "ax"); + + from += 64; + to += 64; +diff -urNp a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S +--- a/arch/x86/lib/putuser.S 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/lib/putuser.S 2009-05-24 18:10:25.058209378 -0700 +@@ -15,6 +15,7 @@ + #include <asm/thread_info.h> + #include <asm/errno.h> + #include <asm/asm.h> ++#include <asm/segment.h> + + + /* +@@ -39,7 +40,19 @@ ENTRY(__put_user_1) + ENTER + cmp TI_addr_limit(%_ASM_BX),%_ASM_CX + jae bad_put_user ++ ++#ifdef CONFIG_X86_32 ++ pushl $(__USER_DS) ++ popl %ds ++#endif ++ + 1: movb %al,(%_ASM_CX) ++ ++#ifdef CONFIG_X86_32 ++ pushl %ss ++ popl %ds ++#endif ++ + xor %eax,%eax + EXIT + ENDPROC(__put_user_1) +@@ -50,7 +63,19 @@ ENTRY(__put_user_2) + sub $1,%_ASM_BX + cmp %_ASM_BX,%_ASM_CX + jae bad_put_user ++ ++#ifdef CONFIG_X86_32 ++ pushl $(__USER_DS) ++ popl %ds ++#endif ++ + 2: movw %ax,(%_ASM_CX) ++ ++#ifdef CONFIG_X86_32 ++ pushl %ss ++ popl %ds ++#endif ++ + xor %eax,%eax + EXIT + ENDPROC(__put_user_2) +@@ -61,7 +86,19 @@ ENTRY(__put_user_4) + sub $3,%_ASM_BX + cmp %_ASM_BX,%_ASM_CX + jae bad_put_user ++ ++#ifdef CONFIG_X86_32 ++ pushl $(__USER_DS) ++ popl %ds ++#endif ++ + 3: movl %eax,(%_ASM_CX) ++ ++#ifdef CONFIG_X86_32 ++ pushl %ss ++ popl %ds ++#endif ++ + xor %eax,%eax + EXIT + ENDPROC(__put_user_4) +@@ -72,16 +109,34 @@ ENTRY(__put_user_8) + sub $7,%_ASM_BX + cmp %_ASM_BX,%_ASM_CX + jae bad_put_user ++ ++#ifdef CONFIG_X86_32 ++ pushl $(__USER_DS) ++ popl %ds ++#endif ++ + 4: mov %_ASM_AX,(%_ASM_CX) + #ifdef CONFIG_X86_32 + 5: movl %edx,4(%_ASM_CX) + #endif ++ ++#ifdef CONFIG_X86_32 ++ pushl %ss ++ popl %ds ++#endif ++ + xor %eax,%eax + EXIT + ENDPROC(__put_user_8) + + bad_put_user: + CFI_STARTPROC ++ ++#ifdef CONFIG_X86_32 ++ pushl %ss ++ popl %ds ++#endif ++ + movl $-EFAULT,%eax + EXIT + END(bad_put_user) +diff -urNp a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c +--- a/arch/x86/lib/usercopy_32.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/lib/usercopy_32.c 2009-05-24 18:10:25.059209297 -0700 +@@ -36,31 +36,38 @@ static inline int __movsl_is_ok(unsigned + * Copy a null terminated string from userspace. + */ + +-#define __do_strncpy_from_user(dst, src, count, res) \ +-do { \ +- int __d0, __d1, __d2; \ +- might_sleep(); \ +- __asm__ __volatile__( \ +- " testl %1,%1\n" \ +- " jz 2f\n" \ +- "0: lodsb\n" \ +- " stosb\n" \ +- " testb %%al,%%al\n" \ +- " jz 1f\n" \ +- " decl %1\n" \ +- " jnz 0b\n" \ +- "1: subl %1,%0\n" \ +- "2:\n" \ +- ".section .fixup,\"ax\"\n" \ +- "3: movl %5,%0\n" \ +- " jmp 2b\n" \ +- ".previous\n" \ +- _ASM_EXTABLE(0b,3b) \ +- : "=&d"(res), "=&c"(count), "=&a" (__d0), "=&S" (__d1), \ +- "=&D" (__d2) \ +- : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \ +- : "memory"); \ +-} while (0) ++static long __do_strncpy_from_user(char *dst, const char __user *src, long count) ++{ ++ int __d0, __d1, __d2; ++ long res = -EFAULT; ++ ++ might_sleep(); ++ __asm__ __volatile__( ++ " movw %w10,%%ds\n" ++ " testl %1,%1\n" ++ " jz 2f\n" ++ "0: lodsb\n" ++ " stosb\n" ++ " testb %%al,%%al\n" ++ " jz 1f\n" ++ " decl %1\n" ++ " jnz 0b\n" ++ "1: subl %1,%0\n" ++ "2:\n" ++ " pushl %%ss\n" ++ " popl %%ds\n" ++ ".section .fixup,\"ax\"\n" ++ "3: movl %5,%0\n" ++ " jmp 2b\n" ++ ".previous\n" ++ _ASM_EXTABLE(0b,3b) ++ : "=&d"(res), "=&c"(count), "=&a" (__d0), "=&S" (__d1), ++ "=&D" (__d2) ++ : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst), ++ "r"(__USER_DS) ++ : "memory"); ++ return res; ++} + + /** + * __strncpy_from_user: - Copy a NUL terminated string from userspace, with less checking. +@@ -85,9 +92,7 @@ do { \ + long + __strncpy_from_user(char *dst, const char __user *src, long count) + { +- long res; +- __do_strncpy_from_user(dst, src, count, res); +- return res; ++ return __do_strncpy_from_user(dst, src, count); + } + EXPORT_SYMBOL(__strncpy_from_user); + +@@ -114,7 +119,7 @@ strncpy_from_user(char *dst, const char + { + long res = -EFAULT; + if (access_ok(VERIFY_READ, src, 1)) +- __do_strncpy_from_user(dst, src, count, res); ++ res = __do_strncpy_from_user(dst, src, count); + return res; + } + EXPORT_SYMBOL(strncpy_from_user); +@@ -123,24 +128,30 @@ EXPORT_SYMBOL(strncpy_from_user); + * Zero Userspace + */ + +-#define __do_clear_user(addr,size) \ +-do { \ +- int __d0; \ +- might_sleep(); \ +- __asm__ __volatile__( \ +- "0: rep; stosl\n" \ +- " movl %2,%0\n" \ +- "1: rep; stosb\n" \ +- "2:\n" \ +- ".section .fixup,\"ax\"\n" \ +- "3: lea 0(%2,%0,4),%0\n" \ +- " jmp 2b\n" \ +- ".previous\n" \ +- _ASM_EXTABLE(0b,3b) \ +- _ASM_EXTABLE(1b,2b) \ +- : "=&c"(size), "=&D" (__d0) \ +- : "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0)); \ +-} while (0) ++static unsigned long __do_clear_user(void __user *addr, unsigned long size) ++{ ++ int __d0; ++ ++ might_sleep(); ++ __asm__ __volatile__( ++ " movw %w6,%%es\n" ++ "0: rep; stosl\n" ++ " movl %2,%0\n" ++ "1: rep; stosb\n" ++ "2:\n" ++ " pushl %%ss\n" ++ " popl %%es\n" ++ ".section .fixup,\"ax\"\n" ++ "3: lea 0(%2,%0,4),%0\n" ++ " jmp 2b\n" ++ ".previous\n" ++ _ASM_EXTABLE(0b,3b) ++ _ASM_EXTABLE(1b,2b) ++ : "=&c"(size), "=&D" (__d0) ++ : "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0), ++ "r"(__USER_DS)); ++ return size; ++} + + /** + * clear_user: - Zero a block of memory in user space. +@@ -157,7 +168,7 @@ clear_user(void __user *to, unsigned lon + { + might_sleep(); + if (access_ok(VERIFY_WRITE, to, n)) +- __do_clear_user(to, n); ++ n = __do_clear_user(to, n); + return n; + } + EXPORT_SYMBOL(clear_user); +@@ -176,8 +187,7 @@ EXPORT_SYMBOL(clear_user); + unsigned long + __clear_user(void __user *to, unsigned long n) + { +- __do_clear_user(to, n); +- return n; ++ return __do_clear_user(to, n); + } + EXPORT_SYMBOL(__clear_user); + +@@ -200,14 +210,17 @@ long strnlen_user(const char __user *s, + might_sleep(); + + __asm__ __volatile__( ++ " movw %w8,%%es\n" + " testl %0, %0\n" + " jz 3f\n" +- " andl %0,%%ecx\n" ++ " movl %0,%%ecx\n" + "0: repne; scasb\n" + " setne %%al\n" + " subl %%ecx,%0\n" + " addl %0,%%eax\n" + "1:\n" ++ " pushl %%ss\n" ++ " popl %%es\n" + ".section .fixup,\"ax\"\n" + "2: xorl %%eax,%%eax\n" + " jmp 1b\n" +@@ -219,7 +232,7 @@ long strnlen_user(const char __user *s, + " .long 0b,2b\n" + ".previous" + :"=&r" (n), "=&D" (s), "=&a" (res), "=&c" (tmp) +- :"0" (n), "1" (s), "2" (0), "3" (mask) ++ :"0" (n), "1" (s), "2" (0), "3" (mask), "r" (__USER_DS) + :"cc"); + return res & mask; + } +@@ -227,10 +240,11 @@ EXPORT_SYMBOL(strnlen_user); + + #ifdef CONFIG_X86_INTEL_USERCOPY + static unsigned long +-__copy_user_intel(void __user *to, const void *from, unsigned long size) ++__generic_copy_to_user_intel(void __user *to, const void *from, unsigned long size) + { + int d0, d1; + __asm__ __volatile__( ++ " movw %w6, %%es\n" + " .align 2,0x90\n" + "1: movl 32(%4), %%eax\n" + " cmpl $67, %0\n" +@@ -239,36 +253,36 @@ __copy_user_intel(void __user *to, const + " .align 2,0x90\n" + "3: movl 0(%4), %%eax\n" + "4: movl 4(%4), %%edx\n" +- "5: movl %%eax, 0(%3)\n" +- "6: movl %%edx, 4(%3)\n" ++ "5: movl %%eax, %%es:0(%3)\n" ++ "6: movl %%edx, %%es:4(%3)\n" + "7: movl 8(%4), %%eax\n" + "8: movl 12(%4),%%edx\n" +- "9: movl %%eax, 8(%3)\n" +- "10: movl %%edx, 12(%3)\n" ++ "9: movl %%eax, %%es:8(%3)\n" ++ "10: movl %%edx, %%es:12(%3)\n" + "11: movl 16(%4), %%eax\n" + "12: movl 20(%4), %%edx\n" +- "13: movl %%eax, 16(%3)\n" +- "14: movl %%edx, 20(%3)\n" ++ "13: movl %%eax, %%es:16(%3)\n" ++ "14: movl %%edx, %%es:20(%3)\n" + "15: movl 24(%4), %%eax\n" + "16: movl 28(%4), %%edx\n" +- "17: movl %%eax, 24(%3)\n" +- "18: movl %%edx, 28(%3)\n" ++ "17: movl %%eax, %%es:24(%3)\n" ++ "18: movl %%edx, %%es:28(%3)\n" + "19: movl 32(%4), %%eax\n" + "20: movl 36(%4), %%edx\n" +- "21: movl %%eax, 32(%3)\n" +- "22: movl %%edx, 36(%3)\n" ++ "21: movl %%eax, %%es:32(%3)\n" ++ "22: movl %%edx, %%es:36(%3)\n" + "23: movl 40(%4), %%eax\n" + "24: movl 44(%4), %%edx\n" +- "25: movl %%eax, 40(%3)\n" +- "26: movl %%edx, 44(%3)\n" ++ "25: movl %%eax, %%es:40(%3)\n" ++ "26: movl %%edx, %%es:44(%3)\n" + "27: movl 48(%4), %%eax\n" + "28: movl 52(%4), %%edx\n" +- "29: movl %%eax, 48(%3)\n" +- "30: movl %%edx, 52(%3)\n" ++ "29: movl %%eax, %%es:48(%3)\n" ++ "30: movl %%edx, %%es:52(%3)\n" + "31: movl 56(%4), %%eax\n" + "32: movl 60(%4), %%edx\n" +- "33: movl %%eax, 56(%3)\n" +- "34: movl %%edx, 60(%3)\n" ++ "33: movl %%eax, %%es:56(%3)\n" ++ "34: movl %%edx, %%es:60(%3)\n" + " addl $-64, %0\n" + " addl $64, %4\n" + " addl $64, %3\n" +@@ -282,6 +296,8 @@ __copy_user_intel(void __user *to, const + "36: movl %%eax, %0\n" + "37: rep; movsb\n" + "100:\n" ++ " pushl %%ss\n" ++ " popl %%es\n" + ".section .fixup,\"ax\"\n" + "101: lea 0(%%eax,%0,4),%0\n" + " jmp 100b\n" +@@ -328,7 +344,117 @@ __copy_user_intel(void __user *to, const + " .long 99b,101b\n" + ".previous" + : "=&c"(size), "=&D" (d0), "=&S" (d1) +- : "1"(to), "2"(from), "0"(size) ++ : "1"(to), "2"(from), "0"(size), "r"(__USER_DS) ++ : "eax", "edx", "memory"); ++ return size; ++} ++ ++static unsigned long ++__generic_copy_from_user_intel(void *to, const void __user *from, unsigned long size) ++{ ++ int d0, d1; ++ __asm__ __volatile__( ++ " movw %w6, %%ds\n" ++ " .align 2,0x90\n" ++ "1: movl 32(%4), %%eax\n" ++ " cmpl $67, %0\n" ++ " jbe 3f\n" ++ "2: movl 64(%4), %%eax\n" ++ " .align 2,0x90\n" ++ "3: movl 0(%4), %%eax\n" ++ "4: movl 4(%4), %%edx\n" ++ "5: movl %%eax, %%es:0(%3)\n" ++ "6: movl %%edx, %%es:4(%3)\n" ++ "7: movl 8(%4), %%eax\n" ++ "8: movl 12(%4),%%edx\n" ++ "9: movl %%eax, %%es:8(%3)\n" ++ "10: movl %%edx, %%es:12(%3)\n" ++ "11: movl 16(%4), %%eax\n" ++ "12: movl 20(%4), %%edx\n" ++ "13: movl %%eax, %%es:16(%3)\n" ++ "14: movl %%edx, %%es:20(%3)\n" ++ "15: movl 24(%4), %%eax\n" ++ "16: movl 28(%4), %%edx\n" ++ "17: movl %%eax, %%es:24(%3)\n" ++ "18: movl %%edx, %%es:28(%3)\n" ++ "19: movl 32(%4), %%eax\n" ++ "20: movl 36(%4), %%edx\n" ++ "21: movl %%eax, %%es:32(%3)\n" ++ "22: movl %%edx, %%es:36(%3)\n" ++ "23: movl 40(%4), %%eax\n" ++ "24: movl 44(%4), %%edx\n" ++ "25: movl %%eax, %%es:40(%3)\n" ++ "26: movl %%edx, %%es:44(%3)\n" ++ "27: movl 48(%4), %%eax\n" ++ "28: movl 52(%4), %%edx\n" ++ "29: movl %%eax, %%es:48(%3)\n" ++ "30: movl %%edx, %%es:52(%3)\n" ++ "31: movl 56(%4), %%eax\n" ++ "32: movl 60(%4), %%edx\n" ++ "33: movl %%eax, %%es:56(%3)\n" ++ "34: movl %%edx, %%es:60(%3)\n" ++ " addl $-64, %0\n" ++ " addl $64, %4\n" ++ " addl $64, %3\n" ++ " cmpl $63, %0\n" ++ " ja 1b\n" ++ "35: movl %0, %%eax\n" ++ " shrl $2, %0\n" ++ " andl $3, %%eax\n" ++ " cld\n" ++ "99: rep; movsl\n" ++ "36: movl %%eax, %0\n" ++ "37: rep; movsb\n" ++ "100:\n" ++ " pushl %%ss\n" ++ " popl %%ds\n" ++ ".section .fixup,\"ax\"\n" ++ "101: lea 0(%%eax,%0,4),%0\n" ++ " jmp 100b\n" ++ ".previous\n" ++ ".section __ex_table,\"a\"\n" ++ " .align 4\n" ++ " .long 1b,100b\n" ++ " .long 2b,100b\n" ++ " .long 3b,100b\n" ++ " .long 4b,100b\n" ++ " .long 5b,100b\n" ++ " .long 6b,100b\n" ++ " .long 7b,100b\n" ++ " .long 8b,100b\n" ++ " .long 9b,100b\n" ++ " .long 10b,100b\n" ++ " .long 11b,100b\n" ++ " .long 12b,100b\n" ++ " .long 13b,100b\n" ++ " .long 14b,100b\n" ++ " .long 15b,100b\n" ++ " .long 16b,100b\n" ++ " .long 17b,100b\n" ++ " .long 18b,100b\n" ++ " .long 19b,100b\n" ++ " .long 20b,100b\n" ++ " .long 21b,100b\n" ++ " .long 22b,100b\n" ++ " .long 23b,100b\n" ++ " .long 24b,100b\n" ++ " .long 25b,100b\n" ++ " .long 26b,100b\n" ++ " .long 27b,100b\n" ++ " .long 28b,100b\n" ++ " .long 29b,100b\n" ++ " .long 30b,100b\n" ++ " .long 31b,100b\n" ++ " .long 32b,100b\n" ++ " .long 33b,100b\n" ++ " .long 34b,100b\n" ++ " .long 35b,100b\n" ++ " .long 36b,100b\n" ++ " .long 37b,100b\n" ++ " .long 99b,101b\n" ++ ".previous" ++ : "=&c"(size), "=&D" (d0), "=&S" (d1) ++ : "1"(to), "2"(from), "0"(size), "r"(__USER_DS) + : "eax", "edx", "memory"); + return size; + } +@@ -338,6 +464,7 @@ __copy_user_zeroing_intel(void *to, cons + { + int d0, d1; + __asm__ __volatile__( ++ " movw %w6, %%ds\n" + " .align 2,0x90\n" + "0: movl 32(%4), %%eax\n" + " cmpl $67, %0\n" +@@ -346,36 +473,36 @@ __copy_user_zeroing_intel(void *to, cons + " .align 2,0x90\n" + "2: movl 0(%4), %%eax\n" + "21: movl 4(%4), %%edx\n" +- " movl %%eax, 0(%3)\n" +- " movl %%edx, 4(%3)\n" ++ " movl %%eax, %%es:0(%3)\n" ++ " movl %%edx, %%es:4(%3)\n" + "3: movl 8(%4), %%eax\n" + "31: movl 12(%4),%%edx\n" +- " movl %%eax, 8(%3)\n" +- " movl %%edx, 12(%3)\n" ++ " movl %%eax, %%es:8(%3)\n" ++ " movl %%edx, %%es:12(%3)\n" + "4: movl 16(%4), %%eax\n" + "41: movl 20(%4), %%edx\n" +- " movl %%eax, 16(%3)\n" +- " movl %%edx, 20(%3)\n" ++ " movl %%eax, %%es:16(%3)\n" ++ " movl %%edx, %%es:20(%3)\n" + "10: movl 24(%4), %%eax\n" + "51: movl 28(%4), %%edx\n" +- " movl %%eax, 24(%3)\n" +- " movl %%edx, 28(%3)\n" ++ " movl %%eax, %%es:24(%3)\n" ++ " movl %%edx, %%es:28(%3)\n" + "11: movl 32(%4), %%eax\n" + "61: movl 36(%4), %%edx\n" +- " movl %%eax, 32(%3)\n" +- " movl %%edx, 36(%3)\n" ++ " movl %%eax, %%es:32(%3)\n" ++ " movl %%edx, %%es:36(%3)\n" + "12: movl 40(%4), %%eax\n" + "71: movl 44(%4), %%edx\n" +- " movl %%eax, 40(%3)\n" +- " movl %%edx, 44(%3)\n" ++ " movl %%eax, %%es:40(%3)\n" ++ " movl %%edx, %%es:44(%3)\n" + "13: movl 48(%4), %%eax\n" + "81: movl 52(%4), %%edx\n" +- " movl %%eax, 48(%3)\n" +- " movl %%edx, 52(%3)\n" ++ " movl %%eax, %%es:48(%3)\n" ++ " movl %%edx, %%es:52(%3)\n" + "14: movl 56(%4), %%eax\n" + "91: movl 60(%4), %%edx\n" +- " movl %%eax, 56(%3)\n" +- " movl %%edx, 60(%3)\n" ++ " movl %%eax, %%es:56(%3)\n" ++ " movl %%edx, %%es:60(%3)\n" + " addl $-64, %0\n" + " addl $64, %4\n" + " addl $64, %3\n" +@@ -389,6 +516,8 @@ __copy_user_zeroing_intel(void *to, cons + " movl %%eax,%0\n" + "7: rep; movsb\n" + "8:\n" ++ " pushl %%ss\n" ++ " popl %%ds\n" + ".section .fixup,\"ax\"\n" + "9: lea 0(%%eax,%0,4),%0\n" + "16: pushl %0\n" +@@ -423,7 +552,7 @@ __copy_user_zeroing_intel(void *to, cons + " .long 7b,16b\n" + ".previous" + : "=&c"(size), "=&D" (d0), "=&S" (d1) +- : "1"(to), "2"(from), "0"(size) ++ : "1"(to), "2"(from), "0"(size), "r"(__USER_DS) + : "eax", "edx", "memory"); + return size; + } +@@ -439,6 +568,7 @@ static unsigned long __copy_user_zeroing + int d0, d1; + + __asm__ __volatile__( ++ " movw %w6, %%ds\n" + " .align 2,0x90\n" + "0: movl 32(%4), %%eax\n" + " cmpl $67, %0\n" +@@ -447,36 +577,36 @@ static unsigned long __copy_user_zeroing + " .align 2,0x90\n" + "2: movl 0(%4), %%eax\n" + "21: movl 4(%4), %%edx\n" +- " movnti %%eax, 0(%3)\n" +- " movnti %%edx, 4(%3)\n" ++ " movnti %%eax, %%es:0(%3)\n" ++ " movnti %%edx, %%es:4(%3)\n" + "3: movl 8(%4), %%eax\n" + "31: movl 12(%4),%%edx\n" +- " movnti %%eax, 8(%3)\n" +- " movnti %%edx, 12(%3)\n" ++ " movnti %%eax, %%es:8(%3)\n" ++ " movnti %%edx, %%es:12(%3)\n" + "4: movl 16(%4), %%eax\n" + "41: movl 20(%4), %%edx\n" +- " movnti %%eax, 16(%3)\n" +- " movnti %%edx, 20(%3)\n" ++ " movnti %%eax, %%es:16(%3)\n" ++ " movnti %%edx, %%es:20(%3)\n" + "10: movl 24(%4), %%eax\n" + "51: movl 28(%4), %%edx\n" +- " movnti %%eax, 24(%3)\n" +- " movnti %%edx, 28(%3)\n" ++ " movnti %%eax, %%es:24(%3)\n" ++ " movnti %%edx, %%es:28(%3)\n" + "11: movl 32(%4), %%eax\n" + "61: movl 36(%4), %%edx\n" +- " movnti %%eax, 32(%3)\n" +- " movnti %%edx, 36(%3)\n" ++ " movnti %%eax, %%es:32(%3)\n" ++ " movnti %%edx, %%es:36(%3)\n" + "12: movl 40(%4), %%eax\n" + "71: movl 44(%4), %%edx\n" +- " movnti %%eax, 40(%3)\n" +- " movnti %%edx, 44(%3)\n" ++ " movnti %%eax, %%es:40(%3)\n" ++ " movnti %%edx, %%es:44(%3)\n" + "13: movl 48(%4), %%eax\n" + "81: movl 52(%4), %%edx\n" +- " movnti %%eax, 48(%3)\n" +- " movnti %%edx, 52(%3)\n" ++ " movnti %%eax, %%es:48(%3)\n" ++ " movnti %%edx, %%es:52(%3)\n" + "14: movl 56(%4), %%eax\n" + "91: movl 60(%4), %%edx\n" +- " movnti %%eax, 56(%3)\n" +- " movnti %%edx, 60(%3)\n" ++ " movnti %%eax, %%es:56(%3)\n" ++ " movnti %%edx, %%es:60(%3)\n" + " addl $-64, %0\n" + " addl $64, %4\n" + " addl $64, %3\n" +@@ -491,6 +621,8 @@ static unsigned long __copy_user_zeroing + " movl %%eax,%0\n" + "7: rep; movsb\n" + "8:\n" ++ " pushl %%ss\n" ++ " popl %%ds\n" + ".section .fixup,\"ax\"\n" + "9: lea 0(%%eax,%0,4),%0\n" + "16: pushl %0\n" +@@ -525,7 +657,7 @@ static unsigned long __copy_user_zeroing + " .long 7b,16b\n" + ".previous" + : "=&c"(size), "=&D" (d0), "=&S" (d1) +- : "1"(to), "2"(from), "0"(size) ++ : "1"(to), "2"(from), "0"(size), "r"(__USER_DS) + : "eax", "edx", "memory"); + return size; + } +@@ -536,6 +668,7 @@ static unsigned long __copy_user_intel_n + int d0, d1; + + __asm__ __volatile__( ++ " movw %w6, %%ds\n" + " .align 2,0x90\n" + "0: movl 32(%4), %%eax\n" + " cmpl $67, %0\n" +@@ -544,36 +677,36 @@ static unsigned long __copy_user_intel_n + " .align 2,0x90\n" + "2: movl 0(%4), %%eax\n" + "21: movl 4(%4), %%edx\n" +- " movnti %%eax, 0(%3)\n" +- " movnti %%edx, 4(%3)\n" ++ " movnti %%eax, %%es:0(%3)\n" ++ " movnti %%edx, %%es:4(%3)\n" + "3: movl 8(%4), %%eax\n" + "31: movl 12(%4),%%edx\n" +- " movnti %%eax, 8(%3)\n" +- " movnti %%edx, 12(%3)\n" ++ " movnti %%eax, %%es:8(%3)\n" ++ " movnti %%edx, %%es:12(%3)\n" + "4: movl 16(%4), %%eax\n" + "41: movl 20(%4), %%edx\n" +- " movnti %%eax, 16(%3)\n" +- " movnti %%edx, 20(%3)\n" ++ " movnti %%eax, %%es:16(%3)\n" ++ " movnti %%edx, %%es:20(%3)\n" + "10: movl 24(%4), %%eax\n" + "51: movl 28(%4), %%edx\n" +- " movnti %%eax, 24(%3)\n" +- " movnti %%edx, 28(%3)\n" ++ " movnti %%eax, %%es:24(%3)\n" ++ " movnti %%edx, %%es:28(%3)\n" + "11: movl 32(%4), %%eax\n" + "61: movl 36(%4), %%edx\n" +- " movnti %%eax, 32(%3)\n" +- " movnti %%edx, 36(%3)\n" ++ " movnti %%eax, %%es:32(%3)\n" ++ " movnti %%edx, %%es:36(%3)\n" + "12: movl 40(%4), %%eax\n" + "71: movl 44(%4), %%edx\n" +- " movnti %%eax, 40(%3)\n" +- " movnti %%edx, 44(%3)\n" ++ " movnti %%eax, %%es:40(%3)\n" ++ " movnti %%edx, %%es:44(%3)\n" + "13: movl 48(%4), %%eax\n" + "81: movl 52(%4), %%edx\n" +- " movnti %%eax, 48(%3)\n" +- " movnti %%edx, 52(%3)\n" ++ " movnti %%eax, %%es:48(%3)\n" ++ " movnti %%edx, %%es:52(%3)\n" + "14: movl 56(%4), %%eax\n" + "91: movl 60(%4), %%edx\n" +- " movnti %%eax, 56(%3)\n" +- " movnti %%edx, 60(%3)\n" ++ " movnti %%eax, %%es:56(%3)\n" ++ " movnti %%edx, %%es:60(%3)\n" + " addl $-64, %0\n" + " addl $64, %4\n" + " addl $64, %3\n" +@@ -588,6 +721,8 @@ static unsigned long __copy_user_intel_n + " movl %%eax,%0\n" + "7: rep; movsb\n" + "8:\n" ++ " pushl %%ss\n" ++ " popl %%ds\n" + ".section .fixup,\"ax\"\n" + "9: lea 0(%%eax,%0,4),%0\n" + "16: jmp 8b\n" +@@ -616,7 +751,7 @@ static unsigned long __copy_user_intel_n + " .long 7b,16b\n" + ".previous" + : "=&c"(size), "=&D" (d0), "=&S" (d1) +- : "1"(to), "2"(from), "0"(size) ++ : "1"(to), "2"(from), "0"(size), "r"(__USER_DS) + : "eax", "edx", "memory"); + return size; + } +@@ -629,90 +764,146 @@ static unsigned long __copy_user_intel_n + */ + unsigned long __copy_user_zeroing_intel(void *to, const void __user *from, + unsigned long size); +-unsigned long __copy_user_intel(void __user *to, const void *from, ++unsigned long __generic_copy_to_user_intel(void __user *to, const void *from, ++ unsigned long size); ++unsigned long __generic_copy_from_user_intel(void *to, const void __user *from, + unsigned long size); + unsigned long __copy_user_zeroing_intel_nocache(void *to, + const void __user *from, unsigned long size); + #endif /* CONFIG_X86_INTEL_USERCOPY */ + + /* Generic arbitrary sized copy. */ +-#define __copy_user(to, from, size) \ +-do { \ +- int __d0, __d1, __d2; \ +- __asm__ __volatile__( \ +- " cmp $7,%0\n" \ +- " jbe 1f\n" \ +- " movl %1,%0\n" \ +- " negl %0\n" \ +- " andl $7,%0\n" \ +- " subl %0,%3\n" \ +- "4: rep; movsb\n" \ +- " movl %3,%0\n" \ +- " shrl $2,%0\n" \ +- " andl $3,%3\n" \ +- " .align 2,0x90\n" \ +- "0: rep; movsl\n" \ +- " movl %3,%0\n" \ +- "1: rep; movsb\n" \ +- "2:\n" \ +- ".section .fixup,\"ax\"\n" \ +- "5: addl %3,%0\n" \ +- " jmp 2b\n" \ +- "3: lea 0(%3,%0,4),%0\n" \ +- " jmp 2b\n" \ +- ".previous\n" \ +- ".section __ex_table,\"a\"\n" \ +- " .align 4\n" \ +- " .long 4b,5b\n" \ +- " .long 0b,3b\n" \ +- " .long 1b,2b\n" \ +- ".previous" \ +- : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \ +- : "3"(size), "0"(size), "1"(to), "2"(from) \ +- : "memory"); \ +-} while (0) +- +-#define __copy_user_zeroing(to, from, size) \ +-do { \ +- int __d0, __d1, __d2; \ +- __asm__ __volatile__( \ +- " cmp $7,%0\n" \ +- " jbe 1f\n" \ +- " movl %1,%0\n" \ +- " negl %0\n" \ +- " andl $7,%0\n" \ +- " subl %0,%3\n" \ +- "4: rep; movsb\n" \ +- " movl %3,%0\n" \ +- " shrl $2,%0\n" \ +- " andl $3,%3\n" \ +- " .align 2,0x90\n" \ +- "0: rep; movsl\n" \ +- " movl %3,%0\n" \ +- "1: rep; movsb\n" \ +- "2:\n" \ +- ".section .fixup,\"ax\"\n" \ +- "5: addl %3,%0\n" \ +- " jmp 6f\n" \ +- "3: lea 0(%3,%0,4),%0\n" \ +- "6: pushl %0\n" \ +- " pushl %%eax\n" \ +- " xorl %%eax,%%eax\n" \ +- " rep; stosb\n" \ +- " popl %%eax\n" \ +- " popl %0\n" \ +- " jmp 2b\n" \ +- ".previous\n" \ +- ".section __ex_table,\"a\"\n" \ +- " .align 4\n" \ +- " .long 4b,5b\n" \ +- " .long 0b,3b\n" \ +- " .long 1b,6b\n" \ +- ".previous" \ +- : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \ +- : "3"(size), "0"(size), "1"(to), "2"(from) \ +- : "memory"); \ +-} while (0) ++static unsigned long ++__generic_copy_to_user(void __user *to, const void *from, unsigned long size) ++{ ++ int __d0, __d1, __d2; ++ ++ __asm__ __volatile__( ++ " movw %w8,%%es\n" ++ " cmp $7,%0\n" ++ " jbe 1f\n" ++ " movl %1,%0\n" ++ " negl %0\n" ++ " andl $7,%0\n" ++ " subl %0,%3\n" ++ "4: rep; movsb\n" ++ " movl %3,%0\n" ++ " shrl $2,%0\n" ++ " andl $3,%3\n" ++ " .align 2,0x90\n" ++ "0: rep; movsl\n" ++ " movl %3,%0\n" ++ "1: rep; movsb\n" ++ "2:\n" ++ " pushl %%ss\n" ++ " popl %%es\n" ++ ".section .fixup,\"ax\"\n" ++ "5: addl %3,%0\n" ++ " jmp 2b\n" ++ "3: lea 0(%3,%0,4),%0\n" ++ " jmp 2b\n" ++ ".previous\n" ++ ".section __ex_table,\"a\"\n" ++ " .align 4\n" ++ " .long 4b,5b\n" ++ " .long 0b,3b\n" ++ " .long 1b,2b\n" ++ ".previous" ++ : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) ++ : "3"(size), "0"(size), "1"(to), "2"(from), "r"(__USER_DS) ++ : "memory"); ++ return size; ++} ++ ++static unsigned long ++__generic_copy_from_user(void *to, const void __user *from, unsigned long size) ++{ ++ int __d0, __d1, __d2; ++ ++ __asm__ __volatile__( ++ " movw %w8,%%ds\n" ++ " cmp $7,%0\n" ++ " jbe 1f\n" ++ " movl %1,%0\n" ++ " negl %0\n" ++ " andl $7,%0\n" ++ " subl %0,%3\n" ++ "4: rep; movsb\n" ++ " movl %3,%0\n" ++ " shrl $2,%0\n" ++ " andl $3,%3\n" ++ " .align 2,0x90\n" ++ "0: rep; movsl\n" ++ " movl %3,%0\n" ++ "1: rep; movsb\n" ++ "2:\n" ++ " pushl %%ss\n" ++ " popl %%ds\n" ++ ".section .fixup,\"ax\"\n" ++ "5: addl %3,%0\n" ++ " jmp 2b\n" ++ "3: lea 0(%3,%0,4),%0\n" ++ " jmp 2b\n" ++ ".previous\n" ++ ".section __ex_table,\"a\"\n" ++ " .align 4\n" ++ " .long 4b,5b\n" ++ " .long 0b,3b\n" ++ " .long 1b,2b\n" ++ ".previous" ++ : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) ++ : "3"(size), "0"(size), "1"(to), "2"(from), "r"(__USER_DS) ++ : "memory"); ++ return size; ++} ++ ++static unsigned long ++__copy_user_zeroing(void *to, const void __user *from, unsigned long size) ++{ ++ int __d0, __d1, __d2; ++ ++ __asm__ __volatile__( ++ " movw %w8,%%ds\n" ++ " cmp $7,%0\n" ++ " jbe 1f\n" ++ " movl %1,%0\n" ++ " negl %0\n" ++ " andl $7,%0\n" ++ " subl %0,%3\n" ++ "4: rep; movsb\n" ++ " movl %3,%0\n" ++ " shrl $2,%0\n" ++ " andl $3,%3\n" ++ " .align 2,0x90\n" ++ "0: rep; movsl\n" ++ " movl %3,%0\n" ++ "1: rep; movsb\n" ++ "2:\n" ++ " pushl %%ss\n" ++ " popl %%ds\n" ++ ".section .fixup,\"ax\"\n" ++ "5: addl %3,%0\n" ++ " jmp 6f\n" ++ "3: lea 0(%3,%0,4),%0\n" ++ "6: pushl %0\n" ++ " pushl %%eax\n" ++ " xorl %%eax,%%eax\n" ++ " rep; stosb\n" ++ " popl %%eax\n" ++ " popl %0\n" ++ " jmp 2b\n" ++ ".previous\n" ++ ".section __ex_table,\"a\"\n" ++ " .align 4\n" ++ " .long 4b,5b\n" ++ " .long 0b,3b\n" ++ " .long 1b,6b\n" ++ ".previous" ++ : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) ++ : "3"(size), "0"(size), "1"(to), "2"(from), "r"(__USER_DS) ++ : "memory"); ++ return size; ++} + + unsigned long __copy_to_user_ll(void __user *to, const void *from, + unsigned long n) +@@ -775,9 +966,9 @@ survive: + } + #endif + if (movsl_is_ok(to, from, n)) +- __copy_user(to, from, n); ++ n = __generic_copy_to_user(to, from, n); + else +- n = __copy_user_intel(to, from, n); ++ n = __generic_copy_to_user_intel(to, from, n); + return n; + } + EXPORT_SYMBOL(__copy_to_user_ll); +@@ -786,7 +977,7 @@ unsigned long __copy_from_user_ll(void * + unsigned long n) + { + if (movsl_is_ok(to, from, n)) +- __copy_user_zeroing(to, from, n); ++ n = __copy_user_zeroing(to, from, n); + else + n = __copy_user_zeroing_intel(to, from, n); + return n; +@@ -797,10 +988,9 @@ unsigned long __copy_from_user_ll_nozero + unsigned long n) + { + if (movsl_is_ok(to, from, n)) +- __copy_user(to, from, n); ++ n = __generic_copy_from_user(to, from, n); + else +- n = __copy_user_intel((void __user *)to, +- (const void *)from, n); ++ n = __generic_copy_from_user_intel(to, from, n); + return n; + } + EXPORT_SYMBOL(__copy_from_user_ll_nozero); +@@ -812,9 +1002,9 @@ unsigned long __copy_from_user_ll_nocach + if (n > 64 && cpu_has_xmm2) + n = __copy_user_zeroing_intel_nocache(to, from, n); + else +- __copy_user_zeroing(to, from, n); ++ n = __copy_user_zeroing(to, from, n); + #else +- __copy_user_zeroing(to, from, n); ++ n = __copy_user_zeroing(to, from, n); + #endif + return n; + } +@@ -827,9 +1017,9 @@ unsigned long __copy_from_user_ll_nocach + if (n > 64 && cpu_has_xmm2) + n = __copy_user_intel_nocache(to, from, n); + else +- __copy_user(to, from, n); ++ n = __generic_copy_from_user(to, from, n); + #else +- __copy_user(to, from, n); ++ n = __generic_copy_from_user(to, from, n); + #endif + return n; + } +@@ -878,8 +1068,35 @@ copy_from_user(void *to, const void __us + { + if (access_ok(VERIFY_READ, from, n)) + n = __copy_from_user(to, from, n); +- else ++ else if ((long)n > 0) + memset(to, 0, n); + return n; + } + EXPORT_SYMBOL(copy_from_user); ++ ++#ifdef CONFIG_PAX_MEMORY_UDEREF ++void __set_fs(mm_segment_t x, int cpu) ++{ ++ unsigned long limit = x.seg; ++ struct desc_struct d; ++ ++ current_thread_info()->addr_limit = x; ++ if (likely(limit)) ++ limit = (limit - 1UL) >> PAGE_SHIFT; ++ pack_descriptor(&d, 0UL, limit, 0xF3, 0xC); ++ write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_DEFAULT_USER_DS, &d, DESCTYPE_S); ++} ++ ++void set_fs(mm_segment_t x) ++{ ++ __set_fs(x, get_cpu()); ++ put_cpu_no_resched(); ++} ++#else ++void set_fs(mm_segment_t x) ++{ ++ current_thread_info()->addr_limit = x; ++} ++#endif ++ ++EXPORT_SYMBOL(set_fs); +diff -urNp a/arch/x86/mach-voyager/voyager_basic.c b/arch/x86/mach-voyager/voyager_basic.c +--- a/arch/x86/mach-voyager/voyager_basic.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/mach-voyager/voyager_basic.c 2009-05-24 18:10:25.060209774 -0700 +@@ -123,7 +123,7 @@ int __init voyager_memory_detect(int reg + __u8 cmos[4]; + ClickMap_t *map; + unsigned long map_addr; +- unsigned long old; ++ pte_t old; + + if (region >= CLICK_ENTRIES) { + printk("Voyager: Illegal ClickMap region %d\n", region); +@@ -138,7 +138,7 @@ int __init voyager_memory_detect(int reg + + /* steal page 0 for this */ + old = pg0[0]; +- pg0[0] = ((map_addr & PAGE_MASK) | _PAGE_RW | _PAGE_PRESENT); ++ pg0[0] = __pte((map_addr & PAGE_MASK) | _PAGE_RW | _PAGE_PRESENT); + local_flush_tlb(); + /* now clear everything out but page 0 */ + map = (ClickMap_t *) (map_addr & (~PAGE_MASK)); +diff -urNp a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c +--- a/arch/x86/mach-voyager/voyager_smp.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/mach-voyager/voyager_smp.c 2009-05-24 18:10:25.061209693 -0700 +@@ -521,6 +521,10 @@ static void __init do_boot_cpu(__u8 cpu) + __u32 *hijack_vector; + __u32 start_phys_address = setup_trampoline(); + ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++#endif ++ + /* There's a clever trick to this: The linux trampoline is + * compiled to begin at absolute location zero, so make the + * address zero but have the data segment selector compensate +@@ -540,7 +544,17 @@ static void __init do_boot_cpu(__u8 cpu) + + init_gdt(cpu); + per_cpu(current_task, cpu) = idle; +- early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ ++ early_gdt_descr.address = get_cpu_gdt_table(cpu); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + irq_ctx_init(cpu); + + /* Note: Don't modify initial ss override */ +@@ -1154,7 +1168,7 @@ void smp_local_timer_interrupt(void) + per_cpu(prof_counter, cpu); + } + +- update_process_times(user_mode_vm(get_irq_regs())); ++ update_process_times(user_mode(get_irq_regs())); + } + + if (((1 << cpu) & voyager_extended_vic_processors) == 0) +diff -urNp a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c +--- a/arch/x86/mm/extable.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/mm/extable.c 2009-05-24 18:10:25.062210031 -0700 +@@ -1,14 +1,62 @@ + #include <linux/module.h> + #include <linux/spinlock.h> ++#include <linux/sort.h> + #include <asm/uaccess.h> + ++/* ++ * The exception table needs to be sorted so that the binary ++ * search that we use to find entries in it works properly. ++ * This is used both for the kernel exception table and for ++ * the exception tables of modules that get loaded. ++ */ ++static int cmp_ex(const void *a, const void *b) ++{ ++ const struct exception_table_entry *x = a, *y = b; ++ ++ /* avoid overflow */ ++ if (x->insn > y->insn) ++ return 1; ++ if (x->insn < y->insn) ++ return -1; ++ return 0; ++} ++ ++static void swap_ex(void *a, void *b, int size) ++{ ++ struct exception_table_entry t, *x = a, *y = b; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++#endif ++ ++ t = *x; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ ++ *x = *y; ++ *y = t; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ ++} ++ ++void sort_extable(struct exception_table_entry *start, ++ struct exception_table_entry *finish) ++{ ++ sort(start, finish - start, sizeof(struct exception_table_entry), ++ cmp_ex, swap_ex); ++} + + int fixup_exception(struct pt_regs *regs) + { + const struct exception_table_entry *fixup; + + #ifdef CONFIG_PNPBIOS +- if (unlikely(SEGMENT_IS_PNP_CODE(regs->cs))) { ++ if (unlikely(!v8086_mode(regs) && SEGMENT_IS_PNP_CODE(regs->cs))) { + extern u32 pnp_bios_fault_eip, pnp_bios_fault_esp; + extern u32 pnp_bios_is_utter_crap; + pnp_bios_is_utter_crap = 1; +diff -urNp a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c +--- a/arch/x86/mm/fault.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/mm/fault.c 2009-05-24 18:10:25.063209251 -0700 +@@ -26,6 +26,8 @@ + #include <linux/kprobes.h> + #include <linux/uaccess.h> + #include <linux/kdebug.h> ++#include <linux/unistd.h> ++#include <linux/compiler.h> + + #include <asm/system.h> + #include <asm/desc.h> +@@ -67,7 +69,7 @@ static inline int notify_page_fault(stru + int ret = 0; + + /* kprobe_running() needs smp_processor_id() */ +- if (!user_mode_vm(regs)) { ++ if (!user_mode(regs)) { + preempt_disable(); + if (kprobe_running() && kprobe_fault_handler(regs, 14)) + ret = 1; +@@ -265,6 +267,30 @@ bad: + #endif + } + ++#ifdef CONFIG_PAX_EMUTRAMP ++static int pax_handle_fetch_fault(struct pt_regs *regs); ++#endif ++ ++#ifdef CONFIG_PAX_PAGEEXEC ++static inline pmd_t * pax_get_pmd(struct mm_struct *mm, unsigned long address) ++{ ++ pgd_t *pgd; ++ pud_t *pud; ++ pmd_t *pmd; ++ ++ pgd = pgd_offset(mm, address); ++ if (!pgd_present(*pgd)) ++ return NULL; ++ pud = pud_offset(pgd, address); ++ if (!pud_present(*pud)) ++ return NULL; ++ pmd = pmd_offset(pud, address); ++ if (!pmd_present(*pmd)) ++ return NULL; ++ return pmd; ++} ++#endif ++ + #ifdef CONFIG_X86_32 + static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) + { +@@ -351,7 +377,7 @@ static int is_errata93(struct pt_regs *r + static int is_errata100(struct pt_regs *regs, unsigned long address) + { + #ifdef CONFIG_X86_64 +- if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) && ++ if ((regs->cs == __USER32_CS || (regs->cs & SEGMENT_LDT)) && + (address >> 32)) + return 1; + #endif +@@ -386,14 +412,31 @@ static void show_fault_oops(struct pt_re + #endif + + #ifdef CONFIG_X86_PAE +- if (error_code & PF_INSTR) { ++ if (nx_enabled && (error_code & PF_INSTR)) { + unsigned int level; + pte_t *pte = lookup_address(address, &level); + + if (pte && pte_present(*pte) && !pte_exec(*pte)) + printk(KERN_CRIT "kernel tried to execute " + "NX-protected page - exploit attempt? " +- "(uid: %d)\n", current->uid); ++ "(uid: %d, task: %s, pid: %d)\n", ++ current->uid, current->comm, task_pid_nr(current)); ++ } ++#endif ++ ++#ifdef CONFIG_PAX_KERNEXEC ++#ifdef CONFIG_MODULES ++ if (init_mm.start_code <= address && address < (unsigned long)MODULES_END) ++#else ++ if (init_mm.start_code <= address && address < init_mm.end_code) ++#endif ++ { ++ if (current->signal->curr_ip) ++ printk(KERN_ERR "PAX: From %u.%u.%u.%u: %s:%d, uid/euid: %u/%u, attempted to modify kernel code\n", ++ NIPQUAD(current->signal->curr_ip), current->comm, task_pid_nr(current), current->uid, current->euid); ++ else ++ printk(KERN_ERR "PAX: %s:%d, uid/euid: %u/%u, attempted to modify kernel code\n", ++ current->comm, task_pid_nr(current), current->uid, current->euid); + } + #endif + +@@ -585,20 +628,26 @@ void __kprobes do_page_fault(struct pt_r + struct task_struct *tsk; + struct mm_struct *mm; + struct vm_area_struct *vma; +- unsigned long address; + int write, si_code; + int fault; + #ifdef CONFIG_X86_64 + unsigned long flags; + #endif + ++#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_PAGEEXEC) ++ pte_t *pte; ++ pmd_t *pmd; ++ spinlock_t *ptl; ++ unsigned char pte_mask; ++#endif ++ ++ /* get the address */ ++ const unsigned long address = read_cr2(); ++ + tsk = current; + mm = tsk->mm; + prefetchw(&mm->mmap_sem); + +- /* get the address */ +- address = read_cr2(); +- + si_code = SEGV_MAPERR; + + if (unlikely(kmmio_fault(regs, address))) +@@ -651,7 +700,7 @@ void __kprobes do_page_fault(struct pt_r + * User-mode registers count as a user access even for any + * potential system fault or CPU buglet. + */ +- if (user_mode_vm(regs)) { ++ if (user_mode(regs)) { + local_irq_enable(); + error_code |= PF_USER; + } else if (regs->flags & X86_EFLAGS_IF) +@@ -667,7 +716,7 @@ void __kprobes do_page_fault(struct pt_r + * atomic region then we must not take the fault. + */ + if (unlikely(in_atomic() || !mm)) +- goto bad_area_nosemaphore; ++ goto bad_area_nopax; + + again: + /* +@@ -689,10 +738,104 @@ again: + if (!down_read_trylock(&mm->mmap_sem)) { + if ((error_code & PF_USER) == 0 && + !search_exception_tables(regs->ip)) +- goto bad_area_nosemaphore; ++ goto bad_area_nopax; + down_read(&mm->mmap_sem); + } + ++#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_PAGEEXEC) ++ if (nx_enabled || (error_code & (PF_PROT|PF_USER)) != (PF_PROT|PF_USER) || v8086_mode(regs) || ++ !(mm->pax_flags & MF_PAX_PAGEEXEC)) ++ goto not_pax_fault; ++ ++ /* PaX: it's our fault, let's handle it if we can */ ++ ++ /* PaX: take a look at read faults before acquiring any locks */ ++ if (unlikely(!(error_code & PF_WRITE) && (regs->ip == address))) { ++ /* instruction fetch attempt from a protected page in user mode */ ++ up_read(&mm->mmap_sem); ++ ++#ifdef CONFIG_PAX_EMUTRAMP ++ switch (pax_handle_fetch_fault(regs)) { ++ case 2: ++ return; ++ } ++#endif ++ ++ pax_report_fault(regs, (void *)regs->ip, (void *)regs->sp); ++ do_group_exit(SIGKILL); ++ } ++ ++ pmd = pax_get_pmd(mm, address); ++ if (unlikely(!pmd)) ++ goto not_pax_fault; ++ ++ pte = pte_offset_map_lock(mm, pmd, address, &ptl); ++ if (unlikely(!(pte_val(*pte) & _PAGE_PRESENT) || pte_user(*pte))) { ++ pte_unmap_unlock(pte, ptl); ++ goto not_pax_fault; ++ } ++ ++ if (unlikely((error_code & PF_WRITE) && !pte_write(*pte))) { ++ /* write attempt to a protected page in user mode */ ++ pte_unmap_unlock(pte, ptl); ++ goto not_pax_fault; ++ } ++ ++#ifdef CONFIG_SMP ++ if (likely(address > get_limit(regs->cs) && cpu_isset(smp_processor_id(), mm->context.cpu_user_cs_mask))) ++#else ++ if (likely(address > get_limit(regs->cs))) ++#endif ++ { ++ set_pte(pte, pte_mkread(*pte)); ++ __flush_tlb_one(address); ++ pte_unmap_unlock(pte, ptl); ++ up_read(&mm->mmap_sem); ++ return; ++ } ++ ++ pte_mask = _PAGE_ACCESSED | _PAGE_USER | ((error_code & PF_WRITE) << (_PAGE_BIT_DIRTY-1)); ++ ++ /* ++ * PaX: fill DTLB with user rights and retry ++ */ ++ __asm__ __volatile__ ( ++#ifdef CONFIG_PAX_MEMORY_UDEREF ++ "movw %w4,%%es\n" ++#endif ++ "orb %2,(%1)\n" ++#if defined(CONFIG_M586) || defined(CONFIG_M586TSC) ++/* ++ * PaX: let this uncommented 'invlpg' remind us on the behaviour of Intel's ++ * (and AMD's) TLBs. namely, they do not cache PTEs that would raise *any* ++ * page fault when examined during a TLB load attempt. this is true not only ++ * for PTEs holding a non-present entry but also present entries that will ++ * raise a page fault (such as those set up by PaX, or the copy-on-write ++ * mechanism). in effect it means that we do *not* need to flush the TLBs ++ * for our target pages since their PTEs are simply not in the TLBs at all. ++ ++ * the best thing in omitting it is that we gain around 15-20% speed in the ++ * fast path of the page fault handler and can get rid of tracing since we ++ * can no longer flush unintended entries. ++ */ ++ "invlpg (%0)\n" ++#endif ++ "testb $0,%%es:(%0)\n" ++ "xorb %3,(%1)\n" ++#ifdef CONFIG_PAX_MEMORY_UDEREF ++ "pushl %%ss\n" ++ "popl %%es\n" ++#endif ++ : ++ : "r" (address), "r" (pte), "q" (pte_mask), "i" (_PAGE_USER), "r" (__USER_DS) ++ : "memory", "cc"); ++ pte_unmap_unlock(pte, ptl); ++ up_read(&mm->mmap_sem); ++ return; ++ ++not_pax_fault: ++#endif ++ + vma = find_vma(mm, address); + if (!vma) + goto bad_area; +@@ -700,16 +843,20 @@ again: + goto good_area; + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; +- if (error_code & PF_USER) { +- /* +- * Accessing the stack below %sp is always a bug. +- * The large cushion allows instructions like enter +- * and pusha to work. ("enter $65535,$31" pushes +- * 32 pointers and then decrements %sp by 65535.) +- */ +- if (address + 65536 + 32 * sizeof(unsigned long) < regs->sp) +- goto bad_area; +- } ++ /* ++ * Accessing the stack below %sp is always a bug. ++ * The large cushion allows instructions like enter ++ * and pusha to work. ("enter $65535,$31" pushes ++ * 32 pointers and then decrements %sp by 65535.) ++ */ ++ if (address + 65536 + 32 * sizeof(unsigned long) < task_pt_regs(tsk)->sp) ++ goto bad_area; ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if ((mm->pax_flags & MF_PAX_SEGMEXEC) && vma->vm_end - SEGMEXEC_TASK_SIZE - 1 < address - SEGMEXEC_TASK_SIZE - 1) ++ goto bad_area; ++#endif ++ + if (expand_stack(vma, address)) + goto bad_area; + /* +@@ -719,6 +866,8 @@ again: + good_area: + si_code = SEGV_ACCERR; + write = 0; ++ if (nx_enabled && (error_code & PF_INSTR) && !(vma->vm_flags & VM_EXEC)) ++ goto bad_area; + switch (error_code & (PF_PROT|PF_WRITE)) { + default: /* 3: write, present */ + /* fall through */ +@@ -773,6 +922,54 @@ bad_area: + up_read(&mm->mmap_sem); + + bad_area_nosemaphore: ++ ++#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) ++ if (mm && (error_code & PF_USER)) { ++ unsigned long ip = regs->ip; ++ ++ if (v8086_mode(regs)) ++ ip = ((regs->cs & 0xffff) << 4) + (regs->ip & 0xffff); ++ ++ /* ++ * It's possible to have interrupts off here. ++ */ ++ local_irq_enable(); ++ ++#ifdef CONFIG_PAX_PAGEEXEC ++ if ((mm->pax_flags & MF_PAX_PAGEEXEC) && ++ ((nx_enabled && (error_code & PF_INSTR)) || (!(error_code & (PF_PROT | PF_WRITE)) && regs->ip == address))) { ++ ++#ifdef CONFIG_PAX_EMUTRAMP ++ switch (pax_handle_fetch_fault(regs)) { ++ case 2: ++ return; ++ } ++#endif ++ ++ pax_report_fault(regs, (void *)regs->ip, (void *)regs->sp); ++ do_group_exit(SIGKILL); ++ } ++#endif ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if ((mm->pax_flags & MF_PAX_SEGMEXEC) && !(error_code & (PF_PROT | PF_WRITE)) && (regs->ip + SEGMEXEC_TASK_SIZE == address)) { ++ ++#ifdef CONFIG_PAX_EMUTRAMP ++ switch (pax_handle_fetch_fault(regs)) { ++ case 2: ++ return; ++ } ++#endif ++ ++ pax_report_fault(regs, (void *)regs->ip, (void *)regs->sp); ++ do_group_exit(SIGKILL); ++ } ++#endif ++ ++ } ++#endif ++ ++bad_area_nopax: + /* User mode accesses just cause a SIGSEGV */ + if (error_code & PF_USER) { + /* +@@ -851,7 +1048,7 @@ no_context: + #ifdef CONFIG_X86_32 + die("Oops", regs, error_code); + bust_spinlocks(0); +- do_exit(SIGKILL); ++ do_group_exit(SIGKILL); + #else + if (__die("Oops", regs, error_code)) + regs = NULL; +@@ -944,3 +1141,174 @@ void vmalloc_sync_all(void) + } + #endif + } ++ ++#ifdef CONFIG_PAX_EMUTRAMP ++static int pax_handle_fetch_fault_32(struct pt_regs *regs) ++{ ++ int err; ++ ++ do { /* PaX: gcc trampoline emulation #1 */ ++ unsigned char mov1, mov2; ++ unsigned short jmp; ++ unsigned int addr1, addr2; ++ ++#ifdef CONFIG_X86_64 ++ if ((regs->ip + 11) >> 32) ++ break; ++#endif ++ ++ err = get_user(mov1, (unsigned char __user *)regs->ip); ++ err |= get_user(addr1, (unsigned int __user *)(regs->ip + 1)); ++ err |= get_user(mov2, (unsigned char __user *)(regs->ip + 5)); ++ err |= get_user(addr2, (unsigned int __user *)(regs->ip + 6)); ++ err |= get_user(jmp, (unsigned short __user *)(regs->ip + 10)); ++ ++ if (err) ++ break; ++ ++ if (mov1 == 0xB9 && mov2 == 0xB8 && jmp == 0xE0FF) { ++ regs->cx = addr1; ++ regs->ax = addr2; ++ regs->ip = addr2; ++ return 2; ++ } ++ } while (0); ++ ++ do { /* PaX: gcc trampoline emulation #2 */ ++ unsigned char mov, jmp; ++ unsigned int addr1, addr2; ++ ++#ifdef CONFIG_X86_64 ++ if ((regs->ip + 9) >> 32) ++ break; ++#endif ++ ++ err = get_user(mov, (unsigned char __user *)regs->ip); ++ err |= get_user(addr1, (unsigned int __user *)(regs->ip + 1)); ++ err |= get_user(jmp, (unsigned char __user *)(regs->ip + 5)); ++ err |= get_user(addr2, (unsigned int __user *)(regs->ip + 6)); ++ ++ if (err) ++ break; ++ ++ if (mov == 0xB9 && jmp == 0xE9) { ++ regs->cx = addr1; ++ regs->ip = (unsigned int)(regs->ip + addr2 + 10); ++ return 2; ++ } ++ } while (0); ++ ++ return 1; /* PaX in action */ ++} ++ ++#ifdef CONFIG_X86_64 ++static int pax_handle_fetch_fault_64(struct pt_regs *regs) ++{ ++ int err; ++ ++ do { /* PaX: gcc trampoline emulation #1 */ ++ unsigned short mov1, mov2, jmp1; ++ unsigned char jmp2; ++ unsigned int addr1; ++ unsigned long addr2; ++ ++ err = get_user(mov1, (unsigned short __user *)regs->ip); ++ err |= get_user(addr1, (unsigned int __user *)(regs->ip + 2)); ++ err |= get_user(mov2, (unsigned short __user *)(regs->ip + 6)); ++ err |= get_user(addr2, (unsigned long __user *)(regs->ip + 8)); ++ err |= get_user(jmp1, (unsigned short __user *)(regs->ip + 16)); ++ err |= get_user(jmp2, (unsigned char __user *)(regs->ip + 18)); ++ ++ if (err) ++ break; ++ ++ if (mov1 == 0xBB41 && mov2 == 0xBA49 && jmp1 == 0xFF49 && jmp2 == 0xE3) { ++ regs->r11 = addr1; ++ regs->r10 = addr2; ++ regs->ip = addr1; ++ return 2; ++ } ++ } while (0); ++ ++ do { /* PaX: gcc trampoline emulation #2 */ ++ unsigned short mov1, mov2, jmp1; ++ unsigned char jmp2; ++ unsigned long addr1, addr2; ++ ++ err = get_user(mov1, (unsigned short __user *)regs->ip); ++ err |= get_user(addr1, (unsigned long __user *)(regs->ip + 2)); ++ err |= get_user(mov2, (unsigned short __user *)(regs->ip + 10)); ++ err |= get_user(addr2, (unsigned long __user *)(regs->ip + 12)); ++ err |= get_user(jmp1, (unsigned short __user *)(regs->ip + 20)); ++ err |= get_user(jmp2, (unsigned char __user *)(regs->ip + 22)); ++ ++ if (err) ++ break; ++ ++ if (mov1 == 0xBB49 && mov2 == 0xBA49 && jmp1 == 0xFF49 && jmp2 == 0xE3) { ++ regs->r11 = addr1; ++ regs->r10 = addr2; ++ regs->ip = addr1; ++ return 2; ++ } ++ } while (0); ++ ++ return 1; /* PaX in action */ ++} ++#endif ++ ++/* ++ * PaX: decide what to do with offenders (regs->ip = fault address) ++ * ++ * returns 1 when task should be killed ++ * 2 when gcc trampoline was detected ++ */ ++static int pax_handle_fetch_fault(struct pt_regs *regs) ++{ ++ if (v8086_mode(regs)) ++ return 1; ++ ++ if (!(current->mm->pax_flags & MF_PAX_EMUTRAMP)) ++ return 1; ++ ++#ifdef CONFIG_X86_32 ++ return pax_handle_fetch_fault_32(regs); ++#else ++ if (regs->cs == __USER32_CS || (regs->cs & SEGMENT_LDT)) ++ return pax_handle_fetch_fault_32(regs); ++ else ++ return pax_handle_fetch_fault_64(regs); ++#endif ++} ++#endif ++ ++#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) ++void pax_report_insns(void *pc, void *sp) ++{ ++ long i; ++ ++ printk(KERN_ERR "PAX: bytes at PC: "); ++ for (i = 0; i < 20; i++) { ++ unsigned char c; ++ if (get_user(c, (unsigned char __user *)pc+i)) ++ printk(KERN_CONT "?? "); ++ else ++ printk(KERN_CONT "%02x ", c); ++ } ++ printk("\n"); ++ ++ printk(KERN_ERR "PAX: bytes at SP-%lu: ", (unsigned long)sizeof(long)); ++ for (i = -1; i < 80 / sizeof(long); i++) { ++ unsigned long c; ++ if (get_user(c, (unsigned long __user *)sp+i)) ++#ifdef CONFIG_X86_32 ++ printk(KERN_CONT "???????? "); ++#else ++ printk(KERN_CONT "???????????????? "); ++#endif ++ else ++ printk(KERN_CONT "%0*lx ", 2 * (int)sizeof(long), c); ++ } ++ printk("\n"); ++} ++#endif +diff -urNp a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c +--- a/arch/x86/mm/highmem_32.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/mm/highmem_32.c 2009-05-24 18:10:25.064210077 -0700 +@@ -74,6 +74,10 @@ void *kmap_atomic_prot(struct page *page + enum fixed_addresses idx; + unsigned long vaddr; + ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++#endif ++ + /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ + pagefault_disable(); + +@@ -85,7 +89,17 @@ void *kmap_atomic_prot(struct page *page + idx = type + KM_TYPE_NR*smp_processor_id(); + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); + BUG_ON(!pte_none(*(kmap_pte-idx))); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + set_pte(kmap_pte-idx, mk_pte(page, prot)); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + arch_flush_lazy_mmu_mode(); + + return (void *)vaddr; +@@ -101,15 +115,29 @@ void kunmap_atomic(void *kvaddr, enum km + unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; + enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); + ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++#endif ++ + /* + * Force other mappings to Oops if they'll try to access this pte + * without first remap it. Keeping stale mappings around is a bad idea + * also, in case the page changes cacheability attributes or becomes + * a protected page in a hypervisor. + */ +- if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx)) ++ if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx)) { ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + kpte_clear_flush(kmap_pte-idx, vaddr); +- else { ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ ++ } else { + #ifdef CONFIG_DEBUG_HIGHMEM + BUG_ON(vaddr < PAGE_OFFSET); + BUG_ON(vaddr >= (unsigned long)high_memory); +@@ -128,11 +156,25 @@ void *kmap_atomic_pfn(unsigned long pfn, + enum fixed_addresses idx; + unsigned long vaddr; + ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++#endif ++ + pagefault_disable(); + + idx = type + KM_TYPE_NR*smp_processor_id(); + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + set_pte(kmap_pte-idx, pfn_pte(pfn, kmap_prot)); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + arch_flush_lazy_mmu_mode(); + + return (void*) vaddr; +diff -urNp a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c +--- a/arch/x86/mm/hugetlbpage.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/mm/hugetlbpage.c 2009-05-24 18:10:25.064210077 -0700 +@@ -263,13 +263,18 @@ static unsigned long hugetlb_get_unmappe + struct hstate *h = hstate_file(file); + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; +- unsigned long start_addr; ++ unsigned long start_addr, pax_task_size = TASK_SIZE; ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (mm->pax_flags & MF_PAX_SEGMEXEC) ++ pax_task_size = SEGMEXEC_TASK_SIZE; ++#endif + + if (len > mm->cached_hole_size) { +- start_addr = mm->free_area_cache; ++ start_addr = mm->free_area_cache; + } else { +- start_addr = TASK_UNMAPPED_BASE; +- mm->cached_hole_size = 0; ++ start_addr = mm->mmap_base; ++ mm->cached_hole_size = 0; + } + + full_search: +@@ -277,13 +282,13 @@ full_search: + + for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { + /* At this point: (!vma || addr < vma->vm_end). */ +- if (TASK_SIZE - len < addr) { ++ if (pax_task_size - len < addr) { + /* + * Start a new search - just in case we missed + * some holes. + */ +- if (start_addr != TASK_UNMAPPED_BASE) { +- start_addr = TASK_UNMAPPED_BASE; ++ if (start_addr != mm->mmap_base) { ++ start_addr = mm->mmap_base; + mm->cached_hole_size = 0; + goto full_search; + } +@@ -306,9 +311,8 @@ static unsigned long hugetlb_get_unmappe + struct hstate *h = hstate_file(file); + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma, *prev_vma; +- unsigned long base = mm->mmap_base, addr = addr0; ++ unsigned long base = mm->mmap_base, addr; + unsigned long largest_hole = mm->cached_hole_size; +- int first_time = 1; + + /* don't allow allocations above current base */ + if (mm->free_area_cache > base) +@@ -318,7 +322,7 @@ static unsigned long hugetlb_get_unmappe + largest_hole = 0; + mm->free_area_cache = base; + } +-try_again: ++ + /* make sure it can fit in the remaining address space */ + if (mm->free_area_cache < len) + goto fail; +@@ -360,22 +364,26 @@ try_again: + + fail: + /* +- * if hint left us with no space for the requested +- * mapping then try again: +- */ +- if (first_time) { +- mm->free_area_cache = base; +- largest_hole = 0; +- first_time = 0; +- goto try_again; +- } +- /* + * A failed mmap() very likely causes application failure, + * so fall back to the bottom-up function here. This scenario + * can happen with large stack limits and large mmap() + * allocations. + */ +- mm->free_area_cache = TASK_UNMAPPED_BASE; ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (mm->pax_flags & MF_PAX_SEGMEXEC) ++ mm->mmap_base = SEGMEXEC_TASK_UNMAPPED_BASE; ++ else ++#endif ++ ++ mm->mmap_base = TASK_UNMAPPED_BASE; ++ ++#ifdef CONFIG_PAX_RANDMMAP ++ if (mm->pax_flags & MF_PAX_RANDMMAP) ++ mm->mmap_base += mm->delta_mmap; ++#endif ++ ++ mm->free_area_cache = mm->mmap_base; + mm->cached_hole_size = ~0UL; + addr = hugetlb_get_unmapped_area_bottomup(file, addr0, + len, pgoff, flags); +@@ -383,6 +391,7 @@ fail: + /* + * Restore the topdown base: + */ ++ mm->mmap_base = base; + mm->free_area_cache = base; + mm->cached_hole_size = ~0UL; + +@@ -396,10 +405,17 @@ hugetlb_get_unmapped_area(struct file *f + struct hstate *h = hstate_file(file); + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; ++ unsigned long pax_task_size = TASK_SIZE; + + if (len & ~huge_page_mask(h)) + return -EINVAL; +- if (len > TASK_SIZE) ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (mm->pax_flags & MF_PAX_SEGMEXEC) ++ pax_task_size = SEGMEXEC_TASK_SIZE; ++#endif ++ ++ if (len > pax_task_size) + return -ENOMEM; + + if (flags & MAP_FIXED) { +@@ -411,7 +427,7 @@ hugetlb_get_unmapped_area(struct file *f + if (addr) { + addr = ALIGN(addr, huge_page_size(h)); + vma = find_vma(mm, addr); +- if (TASK_SIZE - len >= addr && ++ if (pax_task_size - len >= addr && + (!vma || addr + len <= vma->vm_start)) + return addr; + } +diff -urNp a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c +--- a/arch/x86/mm/init_32.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/mm/init_32.c 2009-05-24 18:10:25.065209368 -0700 +@@ -49,6 +49,7 @@ + #include <asm/setup.h> + #include <asm/cacheflush.h> + #include <asm/smp.h> ++#include <asm/desc.h> + + unsigned int __VMALLOC_RESERVE = 128 << 20; + +@@ -82,35 +83,6 @@ static __init void *alloc_low_page(unsig + } + + /* +- * Creates a middle page table and puts a pointer to it in the +- * given global directory entry. This only returns the gd entry +- * in non-PAE compilation mode, since the middle layer is folded. +- */ +-static pmd_t * __init one_md_table_init(pgd_t *pgd) +-{ +- pud_t *pud; +- pmd_t *pmd_table; +- +-#ifdef CONFIG_X86_PAE +- unsigned long phys; +- if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { +- if (after_init_bootmem) +- pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); +- else +- pmd_table = (pmd_t *)alloc_low_page(&phys); +- paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT); +- set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); +- pud = pud_offset(pgd, 0); +- BUG_ON(pmd_table != pmd_offset(pud, 0)); +- } +-#endif +- pud = pud_offset(pgd, 0); +- pmd_table = pmd_offset(pud, 0); +- +- return pmd_table; +-} +- +-/* + * Create a page table and place a pointer to it in a middle page + * directory entry: + */ +@@ -132,7 +104,11 @@ static pte_t * __init one_page_table_ini + } + + paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT); ++#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) ++ set_pmd(pmd, __pmd(__pa(page_table) | _KERNPG_TABLE)); ++#else + set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); ++#endif + BUG_ON(page_table != pte_offset_kernel(pmd, 0)); + } + +@@ -154,6 +130,7 @@ page_table_range_init(unsigned long star + int pgd_idx, pmd_idx; + unsigned long vaddr; + pgd_t *pgd; ++ pud_t *pud; + pmd_t *pmd; + + vaddr = start; +@@ -162,8 +139,13 @@ page_table_range_init(unsigned long star + pgd = pgd_base + pgd_idx; + + for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) { +- pmd = one_md_table_init(pgd); +- pmd = pmd + pmd_index(vaddr); ++ pud = pud_offset(pgd, vaddr); ++ pmd = pmd_offset(pud, vaddr); ++ ++#ifdef CONFIG_X86_PAE ++ paravirt_alloc_pmd(&init_mm, __pa(pmd) >> PAGE_SHIFT); ++#endif ++ + for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); + pmd++, pmd_idx++) { + one_page_table_init(pmd); +@@ -174,11 +156,23 @@ page_table_range_init(unsigned long star + } + } + +-static inline int is_kernel_text(unsigned long addr) ++static inline int is_kernel_text(unsigned long start, unsigned long end) + { +- if (addr >= PAGE_OFFSET && addr <= (unsigned long)__init_end) +- return 1; +- return 0; ++ unsigned long etext; ++ ++#if defined(CONFIG_MODULES) && defined(CONFIG_PAX_KERNEXEC) ++ etext = ktva_ktla((unsigned long)&MODULES_END); ++#else ++ etext = (unsigned long)&_etext; ++#endif ++ ++ if ((start > ktla_ktva(etext) || ++ end <= ktla_ktva((unsigned long)_stext)) && ++ (start > ktla_ktva((unsigned long)_einittext) || ++ end <= ktla_ktva((unsigned long)_sinittext)) && ++ (start > (unsigned long)__va(0xfffff) || end <= (unsigned long)__va(0xc0000))) ++ return 0; ++ return 1; + } + + /* +@@ -191,9 +185,10 @@ static void __init kernel_physical_mappi + unsigned long end_pfn, + int use_pse) + { +- int pgd_idx, pmd_idx, pte_ofs; ++ unsigned int pgd_idx, pmd_idx, pte_ofs; + unsigned long pfn; + pgd_t *pgd; ++ pud_t *pud; + pmd_t *pmd; + pte_t *pte; + unsigned pages_2m, pages_4k; +@@ -223,8 +218,13 @@ repeat: + pfn = start_pfn; + pgd_idx = pgd_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET); + pgd = pgd_base + pgd_idx; +- for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) { +- pmd = one_md_table_init(pgd); ++ for (; pgd_idx < PTRS_PER_PGD && pfn < max_low_pfn; pgd++, pgd_idx++) { ++ pud = pud_offset(pgd, 0); ++ pmd = pmd_offset(pud, 0); ++ ++#ifdef CONFIG_X86_PAE ++ paravirt_alloc_pmd(&init_mm, __pa(pmd) >> PAGE_SHIFT); ++#endif + + if (pfn >= end_pfn) + continue; +@@ -236,14 +236,13 @@ repeat: + #endif + for (; pmd_idx < PTRS_PER_PMD && pfn < end_pfn; + pmd++, pmd_idx++) { +- unsigned int addr = pfn * PAGE_SIZE + PAGE_OFFSET; ++ unsigned long address = pfn * PAGE_SIZE + PAGE_OFFSET; + + /* + * Map with big pages if possible, otherwise + * create normal page tables: + */ + if (use_pse) { +- unsigned int addr2; + pgprot_t prot = PAGE_KERNEL_LARGE; + /* + * first pass will use the same initial +@@ -253,11 +252,7 @@ repeat: + __pgprot(PTE_IDENT_ATTR | + _PAGE_PSE); + +- addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE + +- PAGE_OFFSET + PAGE_SIZE-1; +- +- if (is_kernel_text(addr) || +- is_kernel_text(addr2)) ++ if (is_kernel_text(address, address + PMD_SIZE)) + prot = PAGE_KERNEL_LARGE_EXEC; + + pages_2m++; +@@ -274,7 +269,7 @@ repeat: + pte_ofs = pte_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET); + pte += pte_ofs; + for (; pte_ofs < PTRS_PER_PTE && pfn < end_pfn; +- pte++, pfn++, pte_ofs++, addr += PAGE_SIZE) { ++ pte++, pfn++, pte_ofs++, address += PAGE_SIZE) { + pgprot_t prot = PAGE_KERNEL; + /* + * first pass will use the same initial +@@ -282,7 +277,7 @@ repeat: + */ + pgprot_t init_prot = __pgprot(PTE_IDENT_ATTR); + +- if (is_kernel_text(addr)) ++ if (is_kernel_text(address, address + PAGE_SIZE)) + prot = PAGE_KERNEL_EXEC; + + pages_4k++; +@@ -327,7 +322,13 @@ repeat: + */ + int devmem_is_allowed(unsigned long pagenr) + { +- if (pagenr <= 256) ++ if (!pagenr) ++ return 1; ++#ifdef CONFIG_VM86 ++ if (pagenr < (ISA_START_ADDRESS >> PAGE_SHIFT)) ++ return 1; ++#endif ++ if ((ISA_START_ADDRESS >> PAGE_SHIFT) <= pagenr && pagenr < (ISA_END_ADDRESS >> PAGE_SHIFT)) + return 1; + if (!page_is_ram(pagenr)) + return 1; +@@ -460,7 +461,7 @@ void __init native_pagetable_setup_start + + pud = pud_offset(pgd, va); + pmd = pmd_offset(pud, va); +- if (!pmd_present(*pmd)) ++ if (!pmd_present(*pmd) || pmd_huge(*pmd)) + break; + + pte = pte_offset_kernel(pmd, va); +@@ -512,9 +513,7 @@ static void __init early_ioremap_page_ta + + static void __init pagetable_init(void) + { +- pgd_t *pgd_base = swapper_pg_dir; +- +- permanent_kmaps_init(pgd_base); ++ permanent_kmaps_init(swapper_pg_dir); + } + + #ifdef CONFIG_ACPI_SLEEP +@@ -522,12 +521,12 @@ static void __init pagetable_init(void) + * ACPI suspend needs this for resume, because things like the intel-agp + * driver might have split up a kernel 4MB mapping. + */ +-char swsusp_pg_dir[PAGE_SIZE] ++pgd_t swsusp_pg_dir[PTRS_PER_PGD] + __attribute__ ((aligned(PAGE_SIZE))); + + static inline void save_pg_dir(void) + { +- memcpy(swsusp_pg_dir, swapper_pg_dir, PAGE_SIZE); ++ clone_pgd_range(swsusp_pg_dir, swapper_pg_dir, PTRS_PER_PGD); + } + #else /* !CONFIG_ACPI_SLEEP */ + static inline void save_pg_dir(void) +@@ -557,13 +556,11 @@ void zap_low_mappings(void) + + int nx_enabled; + +-pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL | _PAGE_IOMAP); ++pteval_t __supported_pte_mask __read_only = ~(_PAGE_NX | _PAGE_GLOBAL | _PAGE_IOMAP); + EXPORT_SYMBOL_GPL(__supported_pte_mask); + + #ifdef CONFIG_X86_PAE + +-static int disable_nx __initdata; +- + /* + * noexec = on|off + * +@@ -572,40 +569,33 @@ static int disable_nx __initdata; + * on Enable + * off Disable + */ ++#if !defined(CONFIG_PAX_PAGEEXEC) + static int __init noexec_setup(char *str) + { + if (!str || !strcmp(str, "on")) { +- if (cpu_has_nx) { +- __supported_pte_mask |= _PAGE_NX; +- disable_nx = 0; +- } ++ if (cpu_has_nx) ++ nx_enabled = 1; + } else { +- if (!strcmp(str, "off")) { +- disable_nx = 1; +- __supported_pte_mask &= ~_PAGE_NX; +- } else { ++ if (!strcmp(str, "off")) ++ nx_enabled = 0; ++ else + return -EINVAL; +- } + } + + return 0; + } + early_param("noexec", noexec_setup); ++#endif + + static void __init set_nx(void) + { +- unsigned int v[4], l, h; ++ if (!nx_enabled && cpu_has_nx) { ++ unsigned l, h; + +- if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) { +- cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]); +- +- if ((v[3] & (1 << 20)) && !disable_nx) { +- rdmsr(MSR_EFER, l, h); +- l |= EFER_NX; +- wrmsr(MSR_EFER, l, h); +- nx_enabled = 1; +- __supported_pte_mask |= _PAGE_NX; +- } ++ __supported_pte_mask &= ~_PAGE_NX; ++ rdmsr(MSR_EFER, l, h); ++ l &= ~EFER_NX; ++ wrmsr(MSR_EFER, l, h); + } + } + #endif +@@ -988,7 +978,7 @@ void __init mem_init(void) + set_highmem_pages_init(); + + codesize = (unsigned long) &_etext - (unsigned long) &_text; +- datasize = (unsigned long) &_edata - (unsigned long) &_etext; ++ datasize = (unsigned long) &_edata - (unsigned long) &_data; + initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; + + kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT); +@@ -1034,10 +1024,10 @@ void __init mem_init(void) + ((unsigned long)&__init_end - + (unsigned long)&__init_begin) >> 10, + +- (unsigned long)&_etext, (unsigned long)&_edata, +- ((unsigned long)&_edata - (unsigned long)&_etext) >> 10, ++ (unsigned long)&_data, (unsigned long)&_edata, ++ ((unsigned long)&_edata - (unsigned long)&_data) >> 10, + +- (unsigned long)&_text, (unsigned long)&_etext, ++ ktla_ktva((unsigned long)&_text), ktla_ktva((unsigned long)&_etext), + ((unsigned long)&_etext - (unsigned long)&_text) >> 10); + + #ifdef CONFIG_HIGHMEM +@@ -1166,6 +1156,46 @@ void free_init_pages(char *what, unsigne + + void free_initmem(void) + { ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ /* PaX: limit KERNEL_CS to actual size */ ++ unsigned long addr, limit; ++ struct desc_struct d; ++ int cpu; ++ pgd_t *pgd; ++ pud_t *pud; ++ pmd_t *pmd; ++ ++#ifdef CONFIG_MODULES ++ limit = ktva_ktla((unsigned long)&MODULES_END); ++#else ++ limit = (unsigned long)&_etext; ++#endif ++ limit = (limit - 1UL) >> PAGE_SHIFT; ++ ++ for (cpu = 0; cpu < NR_CPUS; cpu++) { ++ pack_descriptor(&d, get_desc_base(&get_cpu_gdt_table(cpu)[GDT_ENTRY_KERNEL_CS]), limit, 0x9B, 0xC); ++ write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_KERNEL_CS, &d, DESCTYPE_S); ++ } ++ ++ /* PaX: make KERNEL_CS read-only */ ++ for (addr = ktla_ktva((unsigned long)&_text); addr < (unsigned long)&_data; addr += PMD_SIZE) { ++ pgd = pgd_offset_k(addr); ++ pud = pud_offset(pgd, addr); ++ pmd = pmd_offset(pud, addr); ++ set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_RW)); ++ } ++#ifdef CONFIG_X86_PAE ++ for (addr = (unsigned long)&__init_begin; addr < (unsigned long)&__init_end; addr += PMD_SIZE) { ++ pgd = pgd_offset_k(addr); ++ pud = pud_offset(pgd, addr); ++ pmd = pmd_offset(pud, addr); ++ set_pmd(pmd, __pmd(pmd_val(*pmd) | (_PAGE_NX & __supported_pte_mask))); ++ } ++#endif ++ flush_tlb_all(); ++#endif ++ + free_init_pages("unused kernel memory", + (unsigned long)(&__init_begin), + (unsigned long)(&__init_end)); +diff -urNp a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c +--- a/arch/x86/mm/init_64.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/mm/init_64.c 2009-05-24 18:10:25.066209496 -0700 +@@ -175,6 +175,10 @@ set_pte_vaddr_pud(pud_t *pud_page, unsig + pmd_t *pmd; + pte_t *pte; + ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++#endif ++ + pud = pud_page + pud_index(vaddr); + if (pud_none(*pud)) { + pmd = (pmd_t *) spp_getpage(); +@@ -196,8 +200,17 @@ set_pte_vaddr_pud(pud_t *pud_page, unsig + } + + pte = pte_offset_kernel(pmd, vaddr); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + set_pte(pte, new_pte); + ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + /* + * It's enough to flush this one mapping. + * (PGE mappings get flushed as well) +@@ -238,14 +251,12 @@ static void __init __init_extra_mapping( + pgd = pgd_offset_k((unsigned long)__va(phys)); + if (pgd_none(*pgd)) { + pud = (pud_t *) spp_getpage(); +- set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE | +- _PAGE_USER)); ++ set_pgd(pgd, __pgd(__pa(pud) | _PAGE_TABLE)); + } + pud = pud_offset(pgd, (unsigned long)__va(phys)); + if (pud_none(*pud)) { + pmd = (pmd_t *) spp_getpage(); +- set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | +- _PAGE_USER)); ++ set_pud(pud, __pud(__pa(pmd) | _PAGE_TABLE)); + } + pmd = pmd_offset(pud, phys); + BUG_ON(!pmd_none(*pmd)); +@@ -886,7 +897,9 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to + */ + int devmem_is_allowed(unsigned long pagenr) + { +- if (pagenr <= 256) ++ if (!pagenr) ++ return 1; ++ if ((ISA_START_ADDRESS >> PAGE_SHIFT) <= pagenr && pagenr < (ISA_END_ADDRESS >> PAGE_SHIFT)) + return 1; + if (!page_is_ram(pagenr)) + return 1; +@@ -977,6 +990,39 @@ void free_init_pages(char *what, unsigne + + void free_initmem(void) + { ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long addr, end; ++ pgd_t *pgd; ++ pud_t *pud; ++ pmd_t *pmd; ++ ++ /* PaX: make kernel code/rodata read-only, rest non-executable */ ++ for (addr = __START_KERNEL_map; addr < __START_KERNEL_map + KERNEL_IMAGE_SIZE; addr += PMD_SIZE) { ++ pgd = pgd_offset_k(addr); ++ pud = pud_offset(pgd, addr); ++ pmd = pmd_offset(pud, addr); ++ if ((unsigned long)_text <= addr && addr < (unsigned long)_data) ++ set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_RW)); ++ else ++ set_pmd(pmd, __pmd(pmd_val(*pmd) | (_PAGE_NX & __supported_pte_mask))); ++ } ++ ++ addr = (unsigned long)__va(__pa(__START_KERNEL_map)); ++ end = addr + KERNEL_IMAGE_SIZE; ++ for (; addr < end; addr += PMD_SIZE) { ++ pgd = pgd_offset_k(addr); ++ pud = pud_offset(pgd, addr); ++ pmd = pmd_offset(pud, addr); ++ if ((unsigned long)__va(__pa(_text)) <= addr && addr < (unsigned long)__va(__pa(_data))) ++ set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_RW)); ++ else ++ set_pmd(pmd, __pmd(pmd_val(*pmd) | (_PAGE_NX & __supported_pte_mask))); ++ } ++ ++ flush_tlb_all(); ++#endif ++ + free_init_pages("unused kernel memory", + (unsigned long)(&__init_begin), + (unsigned long)(&__init_end)); +@@ -1149,7 +1195,7 @@ int in_gate_area_no_task(unsigned long a + + const char *arch_vma_name(struct vm_area_struct *vma) + { +- if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso) ++ if (vma->vm_mm && vma->vm_start == vma->vm_mm->context.vdso) + return "[vdso]"; + if (vma == &gate_vma) + return "[vsyscall]"; +diff -urNp a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c +--- a/arch/x86/mm/ioremap.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/mm/ioremap.c 2009-05-24 18:10:25.067209694 -0700 +@@ -114,8 +114,8 @@ int page_is_ram(unsigned long pagenr) + * Second special case: Some BIOSen report the PC BIOS + * area (640->1Mb) as ram even though it is not. + */ +- if (pagenr >= (BIOS_BEGIN >> PAGE_SHIFT) && +- pagenr < (BIOS_END >> PAGE_SHIFT)) ++ if (pagenr >= (ISA_START_ADDRESS >> PAGE_SHIFT) && ++ pagenr < (ISA_END_ADDRESS >> PAGE_SHIFT)) + return 0; + + for (i = 0; i < e820.nr_map; i++) { +@@ -293,6 +293,8 @@ static void __iomem *__ioremap_caller(re + break; + } + ++ prot = canon_pgprot(prot); ++ + /* + * Ok, go for it.. + */ +@@ -508,7 +510,7 @@ static int __init early_ioremap_debug_se + early_param("early_ioremap_debug", early_ioremap_debug_setup); + + static __initdata int after_paging_init; +-static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss; ++static __initdata pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __aligned(PAGE_SIZE); + + static inline pmd_t * __init early_ioremap_pmd(unsigned long addr) + { +@@ -523,7 +525,11 @@ static inline pmd_t * __init early_iorem + + static inline pte_t * __init early_ioremap_pte(unsigned long addr) + { ++#ifdef CONFIG_X86_32 + return &bm_pte[pte_index(addr)]; ++#else ++ return &level1_fixmap_pgt[pte_index(addr)]; ++#endif + } + + void __init early_ioremap_init(void) +@@ -534,8 +540,10 @@ void __init early_ioremap_init(void) + printk(KERN_INFO "early_ioremap_init()\n"); + + pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); ++#ifdef CONFIG_X86_32 + memset(bm_pte, 0, sizeof(bm_pte)); + pmd_populate_kernel(&init_mm, pmd, bm_pte); ++#endif + + /* + * The boot-ioremap range spans multiple pmds, for which +diff -urNp a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c +--- a/arch/x86/mm/mmap.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/mm/mmap.c 2009-05-24 18:10:25.068209333 -0700 +@@ -36,7 +36,7 @@ + * Leave an at least ~128 MB hole. + */ + #define MIN_GAP (128*1024*1024) +-#define MAX_GAP (TASK_SIZE/6*5) ++#define MAX_GAP (pax_task_size/6*5) + + /* + * True on X86_32 or when emulating IA32 on X86_64 +@@ -81,27 +81,40 @@ static unsigned long mmap_rnd(void) + return rnd << PAGE_SHIFT; + } + +-static unsigned long mmap_base(void) ++static unsigned long mmap_base(struct mm_struct *mm) + { + unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur; ++ unsigned long pax_task_size = TASK_SIZE; ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (mm->pax_flags & MF_PAX_SEGMEXEC) ++ pax_task_size = SEGMEXEC_TASK_SIZE; ++#endif + + if (gap < MIN_GAP) + gap = MIN_GAP; + else if (gap > MAX_GAP) + gap = MAX_GAP; + +- return PAGE_ALIGN(TASK_SIZE - gap - mmap_rnd()); ++ return PAGE_ALIGN(pax_task_size - gap - mmap_rnd()); + } + + /* + * Bottom-up (legacy) layout on X86_32 did not support randomization, X86_64 + * does, but not when emulating X86_32 + */ +-static unsigned long mmap_legacy_base(void) ++static unsigned long mmap_legacy_base(struct mm_struct *mm) + { +- if (mmap_is_ia32()) ++ if (mmap_is_ia32()) { ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (mm->pax_flags & MF_PAX_SEGMEXEC) ++ return SEGMEXEC_TASK_UNMAPPED_BASE; ++ else ++#endif ++ + return TASK_UNMAPPED_BASE; +- else ++ } else + return TASK_UNMAPPED_BASE + mmap_rnd(); + } + +@@ -112,11 +125,23 @@ static unsigned long mmap_legacy_base(vo + void arch_pick_mmap_layout(struct mm_struct *mm) + { + if (mmap_is_legacy()) { +- mm->mmap_base = mmap_legacy_base(); ++ mm->mmap_base = mmap_legacy_base(mm); ++ ++#ifdef CONFIG_PAX_RANDMMAP ++ if (mm->pax_flags & MF_PAX_RANDMMAP) ++ mm->mmap_base += mm->delta_mmap; ++#endif ++ + mm->get_unmapped_area = arch_get_unmapped_area; + mm->unmap_area = arch_unmap_area; + } else { +- mm->mmap_base = mmap_base(); ++ mm->mmap_base = mmap_base(mm); ++ ++#ifdef CONFIG_PAX_RANDMMAP ++ if (mm->pax_flags & MF_PAX_RANDMMAP) ++ mm->mmap_base -= mm->delta_mmap + mm->delta_stack; ++#endif ++ + mm->get_unmapped_area = arch_get_unmapped_area_topdown; + mm->unmap_area = arch_unmap_area_topdown; + } +diff -urNp a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c +--- a/arch/x86/mm/numa_32.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/mm/numa_32.c 2009-05-24 18:10:25.068209333 -0700 +@@ -98,7 +98,6 @@ unsigned long node_memmap_size_bytes(int + } + #endif + +-extern unsigned long find_max_low_pfn(void); + extern unsigned long highend_pfn, highstart_pfn; + + #define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE) +diff -urNp a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c +--- a/arch/x86/mm/pageattr-test.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/mm/pageattr-test.c 2009-05-24 18:10:25.068209333 -0700 +@@ -36,7 +36,7 @@ enum { + + static int pte_testbit(pte_t pte) + { +- return pte_flags(pte) & _PAGE_UNUSED1; ++ return pte_flags(pte) & _PAGE_CPA_TEST; + } + + struct split_state { +diff -urNp a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c +--- a/arch/x86/mm/pageattr.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/mm/pageattr.c 2009-05-24 18:10:25.069210230 -0700 +@@ -20,6 +20,7 @@ + #include <asm/pgalloc.h> + #include <asm/proto.h> + #include <asm/pat.h> ++#include <asm/desc.h> + + /* + * The current flushing context - we pass it instead of 5 arguments: +@@ -259,9 +260,10 @@ static inline pgprot_t static_protection + * Does not cover __inittext since that is gone later on. On + * 64bit we do not enforce !NX on the low mapping + */ +- if (within(address, (unsigned long)_text, (unsigned long)_etext)) ++ if (within(address, ktla_ktva((unsigned long)_text), ktla_ktva((unsigned long)_etext))) + pgprot_val(forbidden) |= _PAGE_NX; + ++#ifdef CONFIG_DEBUG_RODATA + /* + * The .rodata section needs to be read-only. Using the pfn + * catches all aliases. +@@ -269,6 +271,7 @@ static inline pgprot_t static_protection + if (within(pfn, __pa((unsigned long)__start_rodata) >> PAGE_SHIFT, + __pa((unsigned long)__end_rodata) >> PAGE_SHIFT)) + pgprot_val(forbidden) |= _PAGE_RW; ++#endif + + prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden)); + +@@ -321,8 +324,20 @@ EXPORT_SYMBOL_GPL(lookup_address); + */ + static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) + { ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++ ++ pax_open_kernel(cr0); ++#endif ++ + /* change init_mm */ + set_pte_atomic(kpte, pte); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + #ifdef CONFIG_X86_32 + if (!SHARED_KERNEL_PMD) { + struct page *page; +diff -urNp a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c +--- a/arch/x86/mm/pat.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/mm/pat.c 2009-05-24 18:10:25.070209799 -0700 +@@ -203,7 +203,7 @@ chk_conflict(struct memtype *new, struct + + conflict: + printk(KERN_INFO "%s:%d conflicting memory types " +- "%Lx-%Lx %s<->%s\n", current->comm, current->pid, new->start, ++ "%Lx-%Lx %s<->%s\n", current->comm, task_pid_nr(current), new->start, + new->end, cattr_name(new->type), cattr_name(entry->type)); + return -EBUSY; + } +@@ -476,7 +476,7 @@ int free_memtype(u64 start, u64 end) + + if (err) { + printk(KERN_INFO "%s:%d freeing invalid memtype %Lx-%Lx\n", +- current->comm, current->pid, start, end); ++ current->comm, task_pid_nr(current), start, end); + } + + dprintk("free_memtype request 0x%Lx-0x%Lx\n", start, end); +@@ -578,7 +578,7 @@ int phys_mem_access_prot_allowed(struct + free_memtype(offset, offset + size); + printk(KERN_INFO + "%s:%d /dev/mem ioremap_change_attr failed %s for %Lx-%Lx\n", +- current->comm, current->pid, ++ current->comm, task_pid_nr(current), + cattr_name(flags), + offset, (unsigned long long)(offset + size)); + return 0; +@@ -599,7 +599,7 @@ void map_devmem(unsigned long pfn, unsig + if (flags != want_flags) { + printk(KERN_INFO + "%s:%d /dev/mem expected mapping type %s for %Lx-%Lx, got %s\n", +- current->comm, current->pid, ++ current->comm, task_pid_nr(current), + cattr_name(want_flags), + addr, (unsigned long long)(addr + size), + cattr_name(flags)); +diff -urNp a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c +--- a/arch/x86/mm/pgtable_32.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/mm/pgtable_32.c 2009-05-24 18:10:25.070209799 -0700 +@@ -31,6 +31,10 @@ void set_pte_vaddr(unsigned long vaddr, + pmd_t *pmd; + pte_t *pte; + ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++#endif ++ + pgd = swapper_pg_dir + pgd_index(vaddr); + if (pgd_none(*pgd)) { + BUG(); +@@ -47,11 +51,20 @@ void set_pte_vaddr(unsigned long vaddr, + return; + } + pte = pte_offset_kernel(pmd, vaddr); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + if (pte_val(pteval)) + set_pte_present(&init_mm, vaddr, pte, pteval); + else + pte_clear(&init_mm, vaddr, pte); + ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + /* + * It's enough to flush this one mapping. + * (PGE mappings get flushed as well) +diff -urNp a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c +--- a/arch/x86/oprofile/backtrace.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/oprofile/backtrace.c 2009-05-24 18:10:25.071209718 -0700 +@@ -37,7 +37,7 @@ static void backtrace_address(void *data + unsigned int *depth = data; + + if ((*depth)--) +- oprofile_add_trace(addr); ++ oprofile_add_trace(ktla_ktva(addr)); + } + + static struct stacktrace_ops backtrace_ops = { +@@ -78,7 +78,7 @@ x86_backtrace(struct pt_regs * const reg + struct frame_head *head = (struct frame_head *)frame_pointer(regs); + unsigned long stack = kernel_trap_sp(regs); + +- if (!user_mode_vm(regs)) { ++ if (!user_mode(regs)) { + if (depth) + dump_trace(NULL, regs, (unsigned long *)stack, 0, + &backtrace_ops, &depth); +diff -urNp a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c +--- a/arch/x86/oprofile/op_model_p4.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/oprofile/op_model_p4.c 2009-05-24 18:10:25.071209718 -0700 +@@ -48,7 +48,7 @@ static inline void setup_num_counters(vo + #endif + } + +-static int inline addr_increment(void) ++static inline int addr_increment(void) + { + #ifdef CONFIG_SMP + return smp_num_siblings == 2 ? 2 : 1; +diff -urNp a/arch/x86/pci/common.c b/arch/x86/pci/common.c +--- a/arch/x86/pci/common.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/pci/common.c 2009-05-24 18:10:25.072209706 -0700 +@@ -362,7 +362,7 @@ static struct dmi_system_id __devinitdat + DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant DL585 G2"), + }, + }, +- {} ++ { NULL, NULL, {DMI_MATCH(DMI_NONE, {0})}, NULL} + }; + + void __init dmi_check_pciprobe(void) +diff -urNp a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c +--- a/arch/x86/pci/fixup.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/pci/fixup.c 2009-05-24 18:10:25.073210603 -0700 +@@ -365,7 +365,7 @@ static struct dmi_system_id __devinitdat + DMI_MATCH(DMI_PRODUCT_NAME, "MS-6702E"), + }, + }, +- {} ++ { NULL, NULL, {DMI_MATCH(DMI_NONE, {0})}, NULL } + }; + + /* +@@ -436,7 +436,7 @@ static struct dmi_system_id __devinitdat + DMI_MATCH(DMI_PRODUCT_VERSION, "PSA40U"), + }, + }, +- { } ++ { NULL, NULL, {DMI_MATCH(DMI_NONE, {0})}, NULL } + }; + + static void __devinit pci_pre_fixup_toshiba_ohci1394(struct pci_dev *dev) +diff -urNp a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c +--- a/arch/x86/pci/irq.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/pci/irq.c 2009-05-24 18:10:25.074209264 -0700 +@@ -544,7 +544,7 @@ static __init int intel_router_probe(str + static struct pci_device_id __initdata pirq_440gx[] = { + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443GX_0) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443GX_2) }, +- { }, ++ { PCI_DEVICE(0, 0) } + }; + + /* 440GX has a proprietary PIRQ router -- don't use it */ +@@ -1148,7 +1148,7 @@ static struct dmi_system_id __initdata p + DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"), + }, + }, +- { } ++ { NULL, NULL, {DMI_MATCH(DMI_NONE, {0})}, NULL } + }; + + int __init pcibios_irq_init(void) +diff -urNp a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c +--- a/arch/x86/pci/pcbios.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/pci/pcbios.c 2009-05-24 18:10:25.074209264 -0700 +@@ -57,50 +57,120 @@ union bios32 { + static struct { + unsigned long address; + unsigned short segment; +-} bios32_indirect = { 0, __KERNEL_CS }; ++} bios32_indirect __read_only = { 0, __PCIBIOS_CS }; + + /* + * Returns the entry point for the given service, NULL on error + */ + +-static unsigned long bios32_service(unsigned long service) ++static unsigned long __devinit bios32_service(unsigned long service) + { + unsigned char return_code; /* %al */ + unsigned long address; /* %ebx */ + unsigned long length; /* %ecx */ + unsigned long entry; /* %edx */ + unsigned long flags; ++ struct desc_struct d, *gdt; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++#endif + + local_irq_save(flags); +- __asm__("lcall *(%%edi); cld" ++ ++ gdt = get_cpu_gdt_table(smp_processor_id()); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ ++ pack_descriptor(&d, 0UL, 0xFFFFFUL, 0x9B, 0xC); ++ write_gdt_entry(gdt, GDT_ENTRY_PCIBIOS_CS, &d, DESCTYPE_S); ++ pack_descriptor(&d, 0UL, 0xFFFFFUL, 0x93, 0xC); ++ write_gdt_entry(gdt, GDT_ENTRY_PCIBIOS_DS, &d, DESCTYPE_S); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ ++ __asm__("movw %w7, %%ds; lcall *(%%edi); push %%ss; pop %%ds; cld" + : "=a" (return_code), + "=b" (address), + "=c" (length), + "=d" (entry) + : "0" (service), + "1" (0), +- "D" (&bios32_indirect)); ++ "D" (&bios32_indirect), ++ "r"(__PCIBIOS_DS) ++ : "memory"); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ ++ gdt[GDT_ENTRY_PCIBIOS_CS].a = 0; ++ gdt[GDT_ENTRY_PCIBIOS_CS].b = 0; ++ gdt[GDT_ENTRY_PCIBIOS_DS].a = 0; ++ gdt[GDT_ENTRY_PCIBIOS_DS].b = 0; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + local_irq_restore(flags); + + switch (return_code) { +- case 0: +- return address + entry; +- case 0x80: /* Not present */ +- printk(KERN_WARNING "bios32_service(0x%lx): not present\n", service); +- return 0; +- default: /* Shouldn't happen */ +- printk(KERN_WARNING "bios32_service(0x%lx): returned 0x%x -- BIOS bug!\n", +- service, return_code); ++ case 0: { ++ int cpu; ++ unsigned char flags; ++ ++ printk(KERN_INFO "bios32_service: base:%08lx length:%08lx entry:%08lx\n", address, length, entry); ++ if (address >= 0xFFFF0 || length > 0x100000 - address || length <= entry) { ++ printk(KERN_WARNING "bios32_service: not valid\n"); + return 0; ++ } ++ address = address + PAGE_OFFSET; ++ length += 16UL; /* some BIOSs underreport this... */ ++ flags = 4; ++ if (length >= 64*1024*1024) { ++ length >>= PAGE_SHIFT; ++ flags |= 8; ++ } ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ ++ for (cpu = 0; cpu < NR_CPUS; cpu++) { ++ gdt = get_cpu_gdt_table(cpu); ++ pack_descriptor(&d, address, length, 0x9b, flags); ++ write_gdt_entry(gdt, GDT_ENTRY_PCIBIOS_CS, &d, DESCTYPE_S); ++ pack_descriptor(&d, address, length, 0x93, flags); ++ write_gdt_entry(gdt, GDT_ENTRY_PCIBIOS_DS, &d, DESCTYPE_S); ++ } ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ ++ return entry; ++ } ++ case 0x80: /* Not present */ ++ printk(KERN_WARNING "bios32_service(0x%lx): not present\n", service); ++ return 0; ++ default: /* Shouldn't happen */ ++ printk(KERN_WARNING "bios32_service(0x%lx): returned 0x%x -- BIOS bug!\n", ++ service, return_code); ++ return 0; + } + } + + static struct { + unsigned long address; + unsigned short segment; +-} pci_indirect = { 0, __KERNEL_CS }; ++} pci_indirect __read_only = { 0, __PCIBIOS_CS }; + +-static int pci_bios_present; ++static int pci_bios_present __read_only; + + static int __devinit check_pcibios(void) + { +@@ -109,11 +179,13 @@ static int __devinit check_pcibios(void) + unsigned long flags, pcibios_entry; + + if ((pcibios_entry = bios32_service(PCI_SERVICE))) { +- pci_indirect.address = pcibios_entry + PAGE_OFFSET; ++ pci_indirect.address = pcibios_entry; + + local_irq_save(flags); +- __asm__( +- "lcall *(%%edi); cld\n\t" ++ __asm__("movw %w6, %%ds\n\t" ++ "lcall *%%ss:(%%edi); cld\n\t" ++ "push %%ss\n\t" ++ "pop %%ds\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" +@@ -122,7 +194,8 @@ static int __devinit check_pcibios(void) + "=b" (ebx), + "=c" (ecx) + : "1" (PCIBIOS_PCI_BIOS_PRESENT), +- "D" (&pci_indirect) ++ "D" (&pci_indirect), ++ "r" (__PCIBIOS_DS) + : "memory"); + local_irq_restore(flags); + +@@ -166,7 +239,10 @@ static int pci_bios_read(unsigned int se + + switch (len) { + case 1: +- __asm__("lcall *(%%esi); cld\n\t" ++ __asm__("movw %w6, %%ds\n\t" ++ "lcall *%%ss:(%%esi); cld\n\t" ++ "push %%ss\n\t" ++ "pop %%ds\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" +@@ -175,7 +251,8 @@ static int pci_bios_read(unsigned int se + : "1" (PCIBIOS_READ_CONFIG_BYTE), + "b" (bx), + "D" ((long)reg), +- "S" (&pci_indirect)); ++ "S" (&pci_indirect), ++ "r" (__PCIBIOS_DS)); + /* + * Zero-extend the result beyond 8 bits, do not trust the + * BIOS having done it: +@@ -183,7 +260,10 @@ static int pci_bios_read(unsigned int se + *value &= 0xff; + break; + case 2: +- __asm__("lcall *(%%esi); cld\n\t" ++ __asm__("movw %w6, %%ds\n\t" ++ "lcall *%%ss:(%%esi); cld\n\t" ++ "push %%ss\n\t" ++ "pop %%ds\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" +@@ -192,7 +272,8 @@ static int pci_bios_read(unsigned int se + : "1" (PCIBIOS_READ_CONFIG_WORD), + "b" (bx), + "D" ((long)reg), +- "S" (&pci_indirect)); ++ "S" (&pci_indirect), ++ "r" (__PCIBIOS_DS)); + /* + * Zero-extend the result beyond 16 bits, do not trust the + * BIOS having done it: +@@ -200,7 +281,10 @@ static int pci_bios_read(unsigned int se + *value &= 0xffff; + break; + case 4: +- __asm__("lcall *(%%esi); cld\n\t" ++ __asm__("movw %w6, %%ds\n\t" ++ "lcall *%%ss:(%%esi); cld\n\t" ++ "push %%ss\n\t" ++ "pop %%ds\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" +@@ -209,7 +293,8 @@ static int pci_bios_read(unsigned int se + : "1" (PCIBIOS_READ_CONFIG_DWORD), + "b" (bx), + "D" ((long)reg), +- "S" (&pci_indirect)); ++ "S" (&pci_indirect), ++ "r" (__PCIBIOS_DS)); + break; + } + +@@ -232,7 +317,10 @@ static int pci_bios_write(unsigned int s + + switch (len) { + case 1: +- __asm__("lcall *(%%esi); cld\n\t" ++ __asm__("movw %w6, %%ds\n\t" ++ "lcall *%%ss:(%%esi); cld\n\t" ++ "push %%ss\n\t" ++ "pop %%ds\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" +@@ -241,10 +329,14 @@ static int pci_bios_write(unsigned int s + "c" (value), + "b" (bx), + "D" ((long)reg), +- "S" (&pci_indirect)); ++ "S" (&pci_indirect), ++ "r" (__PCIBIOS_DS)); + break; + case 2: +- __asm__("lcall *(%%esi); cld\n\t" ++ __asm__("movw %w6, %%ds\n\t" ++ "lcall *%%ss:(%%esi); cld\n\t" ++ "push %%ss\n\t" ++ "pop %%ds\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" +@@ -253,10 +345,14 @@ static int pci_bios_write(unsigned int s + "c" (value), + "b" (bx), + "D" ((long)reg), +- "S" (&pci_indirect)); ++ "S" (&pci_indirect), ++ "r" (__PCIBIOS_DS)); + break; + case 4: +- __asm__("lcall *(%%esi); cld\n\t" ++ __asm__("movw %w6, %%ds\n\t" ++ "lcall *%%ss:(%%esi); cld\n\t" ++ "push %%ss\n\t" ++ "pop %%ds\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" +@@ -265,7 +361,8 @@ static int pci_bios_write(unsigned int s + "c" (value), + "b" (bx), + "D" ((long)reg), +- "S" (&pci_indirect)); ++ "S" (&pci_indirect), ++ "r" (__PCIBIOS_DS)); + break; + } + +@@ -369,10 +466,13 @@ struct irq_routing_table * pcibios_get_i + + DBG("PCI: Fetching IRQ routing table... "); + __asm__("push %%es\n\t" ++ "movw %w8, %%ds\n\t" + "push %%ds\n\t" + "pop %%es\n\t" +- "lcall *(%%esi); cld\n\t" ++ "lcall *%%ss:(%%esi); cld\n\t" + "pop %%es\n\t" ++ "push %%ss\n\t" ++ "pop %%ds\n" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" +@@ -383,7 +483,8 @@ struct irq_routing_table * pcibios_get_i + "1" (0), + "D" ((long) &opt), + "S" (&pci_indirect), +- "m" (opt) ++ "m" (opt), ++ "r" (__PCIBIOS_DS) + : "memory"); + DBG("OK ret=%d, size=%d, map=%x\n", ret, opt.size, map); + if (ret & 0xff00) +@@ -407,7 +508,10 @@ int pcibios_set_irq_routing(struct pci_d + { + int ret; + +- __asm__("lcall *(%%esi); cld\n\t" ++ __asm__("movw %w5, %%ds\n\t" ++ "lcall *%%ss:(%%esi); cld\n\t" ++ "push %%ss\n\t" ++ "pop %%ds\n" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" +@@ -415,7 +519,8 @@ int pcibios_set_irq_routing(struct pci_d + : "0" (PCIBIOS_SET_PCI_HW_INT), + "b" ((dev->bus->number << 8) | dev->devfn), + "c" ((irq << 8) | (pin + 10)), +- "S" (&pci_indirect)); ++ "S" (&pci_indirect), ++ "r" (__PCIBIOS_DS)); + return !(ret & 0xff00); + } + EXPORT_SYMBOL(pcibios_set_irq_routing); +diff -urNp a/arch/x86/power/cpu_32.c b/arch/x86/power/cpu_32.c +--- a/arch/x86/power/cpu_32.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/power/cpu_32.c 2009-05-24 18:10:25.075209602 -0700 +@@ -67,7 +67,7 @@ static void do_fpu_end(void) + static void fix_processor_context(void) + { + int cpu = smp_processor_id(); +- struct tss_struct *t = &per_cpu(init_tss, cpu); ++ struct tss_struct *t = init_tss + cpu; + + set_tss_desc(cpu, t); /* + * This just modifies memory; should not be +diff -urNp a/arch/x86/power/cpu_64.c b/arch/x86/power/cpu_64.c +--- a/arch/x86/power/cpu_64.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/power/cpu_64.c 2009-05-24 18:10:25.075209602 -0700 +@@ -143,7 +143,11 @@ void restore_processor_state(void) + static void fix_processor_context(void) + { + int cpu = smp_processor_id(); +- struct tss_struct *t = &per_cpu(init_tss, cpu); ++ struct tss_struct *t = init_tss + cpu; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++#endif + + /* + * This just modifies memory; should not be necessary. But... This +@@ -152,8 +156,16 @@ static void fix_processor_context(void) + */ + set_tss_desc(cpu, t); + ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + get_cpu_gdt_table(cpu)[GDT_ENTRY_TSS].type = 9; + ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + syscall_init(); /* This sets MSR_*STAR and related */ + load_TR_desc(); /* This does ltr */ + load_LDT(¤t->active_mm->context); /* This does lldt */ +diff -urNp a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c +--- a/arch/x86/vdso/vdso32-setup.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/vdso/vdso32-setup.c 2009-05-24 18:10:25.076209310 -0700 +@@ -226,7 +226,7 @@ static inline void map_compat_vdso(int m + void enable_sep_cpu(void) + { + int cpu = get_cpu(); +- struct tss_struct *tss = &per_cpu(init_tss, cpu); ++ struct tss_struct *tss = init_tss + cpu; + + if (!boot_cpu_has(X86_FEATURE_SEP)) { + put_cpu(); +@@ -249,7 +249,7 @@ static int __init gate_vma_init(void) + gate_vma.vm_start = FIXADDR_USER_START; + gate_vma.vm_end = FIXADDR_USER_END; + gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC; +- gate_vma.vm_page_prot = __P101; ++ gate_vma.vm_page_prot = vm_get_page_prot(gate_vma.vm_flags); + /* + * Make sure the vDSO gets into every core dump. + * Dumping its contents makes post-mortem fully interpretable later +@@ -331,7 +331,7 @@ int arch_setup_additional_pages(struct l + if (compat) + addr = VDSO_HIGH_BASE; + else { +- addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0); ++ addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, MAP_EXECUTABLE); + if (IS_ERR_VALUE(addr)) { + ret = addr; + goto up_fail; +@@ -358,7 +358,7 @@ int arch_setup_additional_pages(struct l + goto up_fail; + } + +- current->mm->context.vdso = (void *)addr; ++ current->mm->context.vdso = addr; + current_thread_info()->sysenter_return = + VDSO32_SYMBOL(addr, SYSENTER_RETURN); + +@@ -384,7 +384,7 @@ static ctl_table abi_table2[] = { + .mode = 0644, + .proc_handler = proc_dointvec + }, +- {} ++ { 0, NULL, NULL, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL } + }; + + static ctl_table abi_root_table2[] = { +@@ -394,7 +394,7 @@ static ctl_table abi_root_table2[] = { + .mode = 0555, + .child = abi_table2 + }, +- {} ++ { 0, NULL, NULL, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL } + }; + + static __init int ia32_binfmt_init(void) +@@ -409,8 +409,14 @@ __initcall(ia32_binfmt_init); + + const char *arch_vma_name(struct vm_area_struct *vma) + { +- if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso) ++ if (vma->vm_mm && vma->vm_start == vma->vm_mm->context.vdso) + return "[vdso]"; ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (vma->vm_mm && vma->vm_mirror && vma->vm_mirror->vm_start == vma->vm_mm->context.vdso) ++ return "[vdso]"; ++#endif ++ + return NULL; + } + +@@ -419,7 +425,7 @@ struct vm_area_struct *get_gate_vma(stru + struct mm_struct *mm = tsk->mm; + + /* Check to see if this task was created in compat vdso mode */ +- if (mm && mm->context.vdso == (void *)VDSO_HIGH_BASE) ++ if (mm && mm->context.vdso == VDSO_HIGH_BASE) + return &gate_vma; + return NULL; + } +diff -urNp a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c +--- a/arch/x86/vdso/vma.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/vdso/vma.c 2009-05-24 18:10:25.077210347 -0700 +@@ -123,7 +123,7 @@ int arch_setup_additional_pages(struct l + if (ret) + goto up_fail; + +- current->mm->context.vdso = (void *)addr; ++ current->mm->context.vdso = addr; + up_fail: + up_write(&mm->mmap_sem); + return ret; +diff -urNp a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c +--- a/arch/x86/xen/enlighten.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/xen/enlighten.c 2009-05-24 18:10:25.078210056 -0700 +@@ -318,7 +318,7 @@ static void xen_set_ldt(const void *addr + static void xen_load_gdt(const struct desc_ptr *dtr) + { + unsigned long *frames; +- unsigned long va = dtr->address; ++ unsigned long va = (unsigned long)dtr->address; + unsigned int size = dtr->size + 1; + unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; + int f; +@@ -333,7 +333,7 @@ static void xen_load_gdt(const struct de + mcs = xen_mc_entry(sizeof(*frames) * pages); + frames = mcs.args; + +- for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) { ++ for (f = 0; va < (unsigned long)dtr->address + size; va += PAGE_SIZE, f++) { + frames[f] = virt_to_mfn(va); + make_lowmem_page_readonly((void *)va); + } +@@ -441,7 +441,7 @@ static void xen_write_idt_entry(gate_des + + preempt_disable(); + +- start = __get_cpu_var(idt_desc).address; ++ start = (unsigned long)__get_cpu_var(idt_desc).address; + end = start + __get_cpu_var(idt_desc).size + 1; + + xen_mc_flush(); +@@ -1526,6 +1526,8 @@ static __init pgd_t *xen_setup_kernel_pa + convert_pfn_mfn(init_level4_pgt); + convert_pfn_mfn(level3_ident_pgt); + convert_pfn_mfn(level3_kernel_pgt); ++ convert_pfn_mfn(level3_vmalloc_pgt); ++ convert_pfn_mfn(level3_vmemmap_pgt); + + l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); + l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); +@@ -1544,9 +1546,12 @@ static __init pgd_t *xen_setup_kernel_pa + set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); + set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); + set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); ++ set_page_prot(level3_vmalloc_pgt, PAGE_KERNEL_RO); ++ set_page_prot(level3_vmemmap_pgt, PAGE_KERNEL_RO); + set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); + set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); + set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); ++ set_page_prot(level1_fixmap_pgt, PAGE_KERNEL_RO); + + /* Pin down new L4 */ + pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, +diff -urNp a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c +--- a/arch/x86/xen/smp.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/arch/x86/xen/smp.c 2009-05-24 18:10:25.078210056 -0700 +@@ -171,11 +171,6 @@ static void __init xen_smp_prepare_boot_ + { + BUG_ON(smp_processor_id() != 0); + native_smp_prepare_boot_cpu(); +- +- /* We've switched to the "real" per-cpu gdt, so make sure the +- old memory can be recycled */ +- make_lowmem_page_readwrite(&per_cpu_var(gdt_page)); +- + xen_setup_vcpu_info_placement(); + } + +@@ -231,8 +226,8 @@ cpu_initialize_context(unsigned int cpu, + gdt = get_cpu_gdt_table(cpu); + + ctxt->flags = VGCF_IN_KERNEL; +- ctxt->user_regs.ds = __USER_DS; +- ctxt->user_regs.es = __USER_DS; ++ ctxt->user_regs.ds = __KERNEL_DS; ++ ctxt->user_regs.es = __KERNEL_DS; + ctxt->user_regs.ss = __KERNEL_DS; + #ifdef CONFIG_X86_32 + ctxt->user_regs.fs = __KERNEL_PERCPU; +diff -urNp a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c +--- a/crypto/async_tx/async_tx.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/crypto/async_tx/async_tx.c 2009-05-24 18:10:25.079209137 -0700 +@@ -358,8 +358,8 @@ async_tx_init(void) + err: + printk(KERN_ERR "async_tx: initialization failure\n"); + +- while (--cap >= 0) +- free_percpu(channel_table[cap]); ++ while (cap) ++ free_percpu(channel_table[--cap]); + + return 1; + } +diff -urNp a/crypto/lrw.c b/crypto/lrw.c +--- a/crypto/lrw.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/crypto/lrw.c 2009-05-24 18:10:25.079209137 -0700 +@@ -54,7 +54,7 @@ static int setkey(struct crypto_tfm *par + struct priv *ctx = crypto_tfm_ctx(parent); + struct crypto_cipher *child = ctx->child; + int err, i; +- be128 tmp = { 0 }; ++ be128 tmp = { 0, 0 }; + int bsize = crypto_cipher_blocksize(child); + + crypto_cipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); +diff -urNp a/drivers/acpi/blacklist.c b/drivers/acpi/blacklist.c +--- a/drivers/acpi/blacklist.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/acpi/blacklist.c 2009-05-24 18:10:25.080210801 -0700 +@@ -71,7 +71,7 @@ static struct acpi_blacklist_item acpi_b + {"IBM ", "TP600E ", 0x00000105, ACPI_SIG_DSDT, less_than_or_equal, + "Incorrect _ADR", 1}, + +- {""} ++ {"", "", 0, 0, 0, all_versions, 0} + }; + + #if CONFIG_ACPI_BLACKLIST_YEAR +diff -urNp a/drivers/acpi/osl.c b/drivers/acpi/osl.c +--- a/drivers/acpi/osl.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/acpi/osl.c 2009-05-24 18:10:25.080210801 -0700 +@@ -483,6 +483,8 @@ acpi_os_read_memory(acpi_physical_addres + void __iomem *virt_addr; + + virt_addr = ioremap(phys_addr, width); ++ if (!virt_addr) ++ return AE_NO_MEMORY; + if (!value) + value = &dummy; + +@@ -511,6 +513,8 @@ acpi_os_write_memory(acpi_physical_addre + void __iomem *virt_addr; + + virt_addr = ioremap(phys_addr, width); ++ if (!virt_addr) ++ return AE_NO_MEMORY; + + switch (width) { + case 8: +diff -urNp a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c +--- a/drivers/acpi/processor_core.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/acpi/processor_core.c 2009-05-24 18:10:25.081209323 -0700 +@@ -678,7 +678,7 @@ static int __cpuinit acpi_processor_star + return 0; + } + +- BUG_ON((pr->id >= nr_cpu_ids) || (pr->id < 0)); ++ BUG_ON(pr->id >= nr_cpu_ids); + + /* + * Buggy BIOS check +diff -urNp a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c +--- a/drivers/acpi/processor_idle.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/acpi/processor_idle.c 2009-05-24 18:10:25.082210498 -0700 +@@ -181,7 +181,7 @@ static struct dmi_system_id __cpuinitdat + DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"), + DMI_MATCH(DMI_BIOS_VERSION,"SHE845M0.86C.0013.D.0302131307")}, + (void *)2}, +- {}, ++ { NULL, NULL, {DMI_MATCH(DMI_NONE, {0})}, NULL}, + }; + + static inline u32 ticks_elapsed(u32 t1, u32 t2) +diff -urNp a/drivers/acpi/tables/tbfadt.c b/drivers/acpi/tables/tbfadt.c +--- a/drivers/acpi/tables/tbfadt.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/acpi/tables/tbfadt.c 2009-05-24 18:10:25.083209649 -0700 +@@ -48,7 +48,7 @@ + ACPI_MODULE_NAME("tbfadt") + + /* Local prototypes */ +-static void inline ++static inline void + acpi_tb_init_generic_address(struct acpi_generic_address *generic_address, + u8 byte_width, u64 address); + +@@ -122,7 +122,7 @@ static struct acpi_fadt_info fadt_info_t + * + ******************************************************************************/ + +-static void inline ++static inline void + acpi_tb_init_generic_address(struct acpi_generic_address *generic_address, + u8 byte_width, u64 address) + { +diff -urNp a/drivers/ata/ahci.c b/drivers/ata/ahci.c +--- a/drivers/ata/ahci.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/ata/ahci.c 2009-05-24 18:10:25.084209218 -0700 +@@ -606,7 +606,7 @@ static const struct pci_device_id ahci_p + { PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, + PCI_CLASS_STORAGE_SATA_AHCI, 0xffffff, board_ahci }, + +- { } /* terminate list */ ++ { 0, 0, 0, 0, 0, 0, 0 } /* terminate list */ + }; + + +diff -urNp a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c +--- a/drivers/ata/ata_piix.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/ata/ata_piix.c 2009-05-24 18:10:25.085209626 -0700 +@@ -289,7 +289,7 @@ static const struct pci_device_id piix_p + { 0x8086, 0x3b2d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata }, + /* SATA Controller IDE (PCH) */ + { 0x8086, 0x3b2e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata }, +- { } /* terminate list */ ++ { 0, 0, 0, 0, 0, 0, 0 } /* terminate list */ + }; + + static struct pci_driver piix_pci_driver = { +@@ -593,7 +593,7 @@ static const struct ich_laptop ich_lapto + { 0x266F, 0x1025, 0x0066 }, /* ICH6 on ACER Aspire 1694WLMi */ + { 0x2653, 0x1043, 0x82D8 }, /* ICH6M on Asus Eee 701 */ + /* end marker */ +- { 0, } ++ { 0, 0, 0 } + }; + + /** +@@ -1052,7 +1052,7 @@ static int piix_broken_suspend(void) + }, + }, + +- { } /* terminate list */ ++ { NULL, NULL, {DMI_MATCH(DMI_NONE, {0})}, NULL } /* terminate list */ + }; + static const char *oemstrs[] = { + "Tecra M3,", +diff -urNp a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c +--- a/drivers/ata/libata-core.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/ata/libata-core.c 2009-05-24 18:10:25.088209521 -0700 +@@ -807,7 +807,7 @@ static const struct ata_xfer_ent { + { ATA_SHIFT_PIO, ATA_NR_PIO_MODES, XFER_PIO_0 }, + { ATA_SHIFT_MWDMA, ATA_NR_MWDMA_MODES, XFER_MW_DMA_0 }, + { ATA_SHIFT_UDMA, ATA_NR_UDMA_MODES, XFER_UDMA_0 }, +- { -1, }, ++ { -1, 0, 0 } + }; + + /** +@@ -2983,7 +2983,7 @@ static const struct ata_timing ata_timin + { XFER_UDMA_5, 0, 0, 0, 0, 0, 0, 0, 20 }, + { XFER_UDMA_6, 0, 0, 0, 0, 0, 0, 0, 15 }, + +- { 0xFF } ++ { 0xFF, 0, 0, 0, 0, 0, 0, 0, 0 } + }; + + #define ENOUGH(v, unit) (((v)-1)/(unit)+1) +@@ -4149,7 +4149,7 @@ static const struct ata_blacklist_entry + { "MTRON MSP-SATA*", NULL, ATA_HORKAGE_BRIDGE_OK, }, + + /* End Marker */ +- { } ++ { NULL, NULL, 0 } + }; + + static int strn_pattern_cmp(const char *patt, const char *name, int wildchar) +diff -urNp a/drivers/atm/adummy.c b/drivers/atm/adummy.c +--- a/drivers/atm/adummy.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/atm/adummy.c 2009-05-24 18:10:25.089210069 -0700 +@@ -77,7 +77,7 @@ adummy_send(struct atm_vcc *vcc, struct + vcc->pop(vcc, skb); + else + dev_kfree_skb_any(skb); +- atomic_inc(&vcc->stats->tx); ++ atomic_inc_unchecked(&vcc->stats->tx); + + return 0; + } +diff -urNp a/drivers/atm/ambassador.c b/drivers/atm/ambassador.c +--- a/drivers/atm/ambassador.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/atm/ambassador.c 2009-05-24 18:10:25.090209219 -0700 +@@ -453,7 +453,7 @@ static void tx_complete (amb_dev * dev, + PRINTD (DBG_FLOW|DBG_TX, "tx_complete %p %p", dev, tx); + + // VC layer stats +- atomic_inc(&ATM_SKB(skb)->vcc->stats->tx); ++ atomic_inc_unchecked(&ATM_SKB(skb)->vcc->stats->tx); + + // free the descriptor + kfree (tx_descr); +@@ -494,7 +494,7 @@ static void rx_complete (amb_dev * dev, + dump_skb ("<<<", vc, skb); + + // VC layer stats +- atomic_inc(&atm_vcc->stats->rx); ++ atomic_inc_unchecked(&atm_vcc->stats->rx); + __net_timestamp(skb); + // end of our responsability + atm_vcc->push (atm_vcc, skb); +@@ -509,7 +509,7 @@ static void rx_complete (amb_dev * dev, + } else { + PRINTK (KERN_INFO, "dropped over-size frame"); + // should we count this? +- atomic_inc(&atm_vcc->stats->rx_drop); ++ atomic_inc_unchecked(&atm_vcc->stats->rx_drop); + } + + } else { +@@ -1349,7 +1349,7 @@ static int amb_send (struct atm_vcc * at + } + + if (check_area (skb->data, skb->len)) { +- atomic_inc(&atm_vcc->stats->tx_err); ++ atomic_inc_unchecked(&atm_vcc->stats->tx_err); + return -ENOMEM; // ? + } + +diff -urNp a/drivers/atm/atmtcp.c b/drivers/atm/atmtcp.c +--- a/drivers/atm/atmtcp.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/atm/atmtcp.c 2009-05-24 18:10:25.091209767 -0700 +@@ -206,7 +206,7 @@ static int atmtcp_v_send(struct atm_vcc + if (vcc->pop) vcc->pop(vcc,skb); + else dev_kfree_skb(skb); + if (dev_data) return 0; +- atomic_inc(&vcc->stats->tx_err); ++ atomic_inc_unchecked(&vcc->stats->tx_err); + return -ENOLINK; + } + size = skb->len+sizeof(struct atmtcp_hdr); +@@ -214,7 +214,7 @@ static int atmtcp_v_send(struct atm_vcc + if (!new_skb) { + if (vcc->pop) vcc->pop(vcc,skb); + else dev_kfree_skb(skb); +- atomic_inc(&vcc->stats->tx_err); ++ atomic_inc_unchecked(&vcc->stats->tx_err); + return -ENOBUFS; + } + hdr = (void *) skb_put(new_skb,sizeof(struct atmtcp_hdr)); +@@ -225,8 +225,8 @@ static int atmtcp_v_send(struct atm_vcc + if (vcc->pop) vcc->pop(vcc,skb); + else dev_kfree_skb(skb); + out_vcc->push(out_vcc,new_skb); +- atomic_inc(&vcc->stats->tx); +- atomic_inc(&out_vcc->stats->rx); ++ atomic_inc_unchecked(&vcc->stats->tx); ++ atomic_inc_unchecked(&out_vcc->stats->rx); + return 0; + } + +@@ -300,7 +300,7 @@ static int atmtcp_c_send(struct atm_vcc + out_vcc = find_vcc(dev, ntohs(hdr->vpi), ntohs(hdr->vci)); + read_unlock(&vcc_sklist_lock); + if (!out_vcc) { +- atomic_inc(&vcc->stats->tx_err); ++ atomic_inc_unchecked(&vcc->stats->tx_err); + goto done; + } + skb_pull(skb,sizeof(struct atmtcp_hdr)); +@@ -312,8 +312,8 @@ static int atmtcp_c_send(struct atm_vcc + __net_timestamp(new_skb); + skb_copy_from_linear_data(skb, skb_put(new_skb, skb->len), skb->len); + out_vcc->push(out_vcc,new_skb); +- atomic_inc(&vcc->stats->tx); +- atomic_inc(&out_vcc->stats->rx); ++ atomic_inc_unchecked(&vcc->stats->tx); ++ atomic_inc_unchecked(&out_vcc->stats->rx); + done: + if (vcc->pop) vcc->pop(vcc,skb); + else dev_kfree_skb(skb); +diff -urNp a/drivers/atm/eni.c b/drivers/atm/eni.c +--- a/drivers/atm/eni.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/atm/eni.c 2009-05-24 18:10:25.092209337 -0700 +@@ -525,7 +525,7 @@ static int rx_aal0(struct atm_vcc *vcc) + DPRINTK(DEV_LABEL "(itf %d): trashing empty cell\n", + vcc->dev->number); + length = 0; +- atomic_inc(&vcc->stats->rx_err); ++ atomic_inc_unchecked(&vcc->stats->rx_err); + } + else { + length = ATM_CELL_SIZE-1; /* no HEC */ +@@ -580,7 +580,7 @@ static int rx_aal5(struct atm_vcc *vcc) + size); + } + eff = length = 0; +- atomic_inc(&vcc->stats->rx_err); ++ atomic_inc_unchecked(&vcc->stats->rx_err); + } + else { + size = (descr & MID_RED_COUNT)*(ATM_CELL_PAYLOAD >> 2); +@@ -597,7 +597,7 @@ static int rx_aal5(struct atm_vcc *vcc) + "(VCI=%d,length=%ld,size=%ld (descr 0x%lx))\n", + vcc->dev->number,vcc->vci,length,size << 2,descr); + length = eff = 0; +- atomic_inc(&vcc->stats->rx_err); ++ atomic_inc_unchecked(&vcc->stats->rx_err); + } + } + skb = eff ? atm_alloc_charge(vcc,eff << 2,GFP_ATOMIC) : NULL; +@@ -770,7 +770,7 @@ rx_dequeued++; + vcc->push(vcc,skb); + pushed++; + } +- atomic_inc(&vcc->stats->rx); ++ atomic_inc_unchecked(&vcc->stats->rx); + } + wake_up(&eni_dev->rx_wait); + } +@@ -1227,7 +1227,7 @@ static void dequeue_tx(struct atm_dev *d + PCI_DMA_TODEVICE); + if (vcc->pop) vcc->pop(vcc,skb); + else dev_kfree_skb_irq(skb); +- atomic_inc(&vcc->stats->tx); ++ atomic_inc_unchecked(&vcc->stats->tx); + wake_up(&eni_dev->tx_wait); + dma_complete++; + } +diff -urNp a/drivers/atm/firestream.c b/drivers/atm/firestream.c +--- a/drivers/atm/firestream.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/atm/firestream.c 2009-05-24 18:10:25.094210850 -0700 +@@ -748,7 +748,7 @@ static void process_txdone_queue (struct + } + } + +- atomic_inc(&ATM_SKB(skb)->vcc->stats->tx); ++ atomic_inc_unchecked(&ATM_SKB(skb)->vcc->stats->tx); + + fs_dprintk (FS_DEBUG_TXMEM, "i"); + fs_dprintk (FS_DEBUG_ALLOC, "Free t-skb: %p\n", skb); +@@ -815,7 +815,7 @@ static void process_incoming (struct fs_ + #endif + skb_put (skb, qe->p1 & 0xffff); + ATM_SKB(skb)->vcc = atm_vcc; +- atomic_inc(&atm_vcc->stats->rx); ++ atomic_inc_unchecked(&atm_vcc->stats->rx); + __net_timestamp(skb); + fs_dprintk (FS_DEBUG_ALLOC, "Free rec-skb: %p (pushed)\n", skb); + atm_vcc->push (atm_vcc, skb); +@@ -836,12 +836,12 @@ static void process_incoming (struct fs_ + kfree (pe); + } + if (atm_vcc) +- atomic_inc(&atm_vcc->stats->rx_drop); ++ atomic_inc_unchecked(&atm_vcc->stats->rx_drop); + break; + case 0x1f: /* Reassembly abort: no buffers. */ + /* Silently increment error counter. */ + if (atm_vcc) +- atomic_inc(&atm_vcc->stats->rx_drop); ++ atomic_inc_unchecked(&atm_vcc->stats->rx_drop); + break; + default: /* Hmm. Haven't written the code to handle the others yet... -- REW */ + printk (KERN_WARNING "Don't know what to do with RX status %x: %s.\n", +diff -urNp a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c +--- a/drivers/atm/fore200e.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/atm/fore200e.c 2009-05-24 18:10:25.095209511 -0700 +@@ -931,9 +931,9 @@ fore200e_tx_irq(struct fore200e* fore200 + #endif + /* check error condition */ + if (*entry->status & STATUS_ERROR) +- atomic_inc(&vcc->stats->tx_err); ++ atomic_inc_unchecked(&vcc->stats->tx_err); + else +- atomic_inc(&vcc->stats->tx); ++ atomic_inc_unchecked(&vcc->stats->tx); + } + } + +@@ -1082,7 +1082,7 @@ fore200e_push_rpd(struct fore200e* fore2 + if (skb == NULL) { + DPRINTK(2, "unable to alloc new skb, rx PDU length = %d\n", pdu_len); + +- atomic_inc(&vcc->stats->rx_drop); ++ atomic_inc_unchecked(&vcc->stats->rx_drop); + return -ENOMEM; + } + +@@ -1125,14 +1125,14 @@ fore200e_push_rpd(struct fore200e* fore2 + + dev_kfree_skb_any(skb); + +- atomic_inc(&vcc->stats->rx_drop); ++ atomic_inc_unchecked(&vcc->stats->rx_drop); + return -ENOMEM; + } + + ASSERT(atomic_read(&sk_atm(vcc)->sk_wmem_alloc) >= 0); + + vcc->push(vcc, skb); +- atomic_inc(&vcc->stats->rx); ++ atomic_inc_unchecked(&vcc->stats->rx); + + ASSERT(atomic_read(&sk_atm(vcc)->sk_wmem_alloc) >= 0); + +@@ -1210,7 +1210,7 @@ fore200e_rx_irq(struct fore200e* fore200 + DPRINTK(2, "damaged PDU on %d.%d.%d\n", + fore200e->atm_dev->number, + entry->rpd->atm_header.vpi, entry->rpd->atm_header.vci); +- atomic_inc(&vcc->stats->rx_err); ++ atomic_inc_unchecked(&vcc->stats->rx_err); + } + } + +@@ -1655,7 +1655,7 @@ fore200e_send(struct atm_vcc *vcc, struc + goto retry_here; + } + +- atomic_inc(&vcc->stats->tx_err); ++ atomic_inc_unchecked(&vcc->stats->tx_err); + + fore200e->tx_sat++; + DPRINTK(2, "tx queue of device %s is saturated, PDU dropped - heartbeat is %08x\n", +diff -urNp a/drivers/atm/he.c b/drivers/atm/he.c +--- a/drivers/atm/he.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/atm/he.c 2009-05-24 18:10:25.097210186 -0700 +@@ -1728,7 +1728,7 @@ he_service_rbrq(struct he_dev *he_dev, i + + if (RBRQ_HBUF_ERR(he_dev->rbrq_head)) { + hprintk("HBUF_ERR! (cid 0x%x)\n", cid); +- atomic_inc(&vcc->stats->rx_drop); ++ atomic_inc_unchecked(&vcc->stats->rx_drop); + goto return_host_buffers; + } + +@@ -1761,7 +1761,7 @@ he_service_rbrq(struct he_dev *he_dev, i + RBRQ_LEN_ERR(he_dev->rbrq_head) + ? "LEN_ERR" : "", + vcc->vpi, vcc->vci); +- atomic_inc(&vcc->stats->rx_err); ++ atomic_inc_unchecked(&vcc->stats->rx_err); + goto return_host_buffers; + } + +@@ -1820,7 +1820,7 @@ he_service_rbrq(struct he_dev *he_dev, i + vcc->push(vcc, skb); + spin_lock(&he_dev->global_lock); + +- atomic_inc(&vcc->stats->rx); ++ atomic_inc_unchecked(&vcc->stats->rx); + + return_host_buffers: + ++pdus_assembled; +@@ -2673,7 +2673,7 @@ he_send(struct atm_vcc *vcc, struct sk_b + __enqueue_tpd(he_dev, tpd, cid); + spin_unlock_irqrestore(&he_dev->global_lock, flags); + +- atomic_inc(&vcc->stats->tx); ++ atomic_inc_unchecked(&vcc->stats->tx); + + return 0; + } +diff -urNp a/drivers/atm/horizon.c b/drivers/atm/horizon.c +--- a/drivers/atm/horizon.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/atm/horizon.c 2009-05-24 18:10:25.098210385 -0700 +@@ -1033,7 +1033,7 @@ static void rx_schedule (hrz_dev * dev, + { + struct atm_vcc * vcc = ATM_SKB(skb)->vcc; + // VC layer stats +- atomic_inc(&vcc->stats->rx); ++ atomic_inc_unchecked(&vcc->stats->rx); + __net_timestamp(skb); + // end of our responsability + vcc->push (vcc, skb); +@@ -1185,7 +1185,7 @@ static void tx_schedule (hrz_dev * const + dev->tx_iovec = NULL; + + // VC layer stats +- atomic_inc(&ATM_SKB(skb)->vcc->stats->tx); ++ atomic_inc_unchecked(&ATM_SKB(skb)->vcc->stats->tx); + + // free the skb + hrz_kfree_skb (skb); +diff -urNp a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c +--- a/drivers/atm/idt77252.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/atm/idt77252.c 2009-05-24 18:10:25.100209314 -0700 +@@ -810,7 +810,7 @@ drain_scq(struct idt77252_dev *card, str + else + dev_kfree_skb(skb); + +- atomic_inc(&vcc->stats->tx); ++ atomic_inc_unchecked(&vcc->stats->tx); + } + + atomic_dec(&scq->used); +@@ -1073,13 +1073,13 @@ dequeue_rx(struct idt77252_dev *card, st + if ((sb = dev_alloc_skb(64)) == NULL) { + printk("%s: Can't allocate buffers for aal0.\n", + card->name); +- atomic_add(i, &vcc->stats->rx_drop); ++ atomic_add_unchecked(i, &vcc->stats->rx_drop); + break; + } + if (!atm_charge(vcc, sb->truesize)) { + RXPRINTK("%s: atm_charge() dropped aal0 packets.\n", + card->name); +- atomic_add(i - 1, &vcc->stats->rx_drop); ++ atomic_add_unchecked(i - 1, &vcc->stats->rx_drop); + dev_kfree_skb(sb); + break; + } +@@ -1096,7 +1096,7 @@ dequeue_rx(struct idt77252_dev *card, st + ATM_SKB(sb)->vcc = vcc; + __net_timestamp(sb); + vcc->push(vcc, sb); +- atomic_inc(&vcc->stats->rx); ++ atomic_inc_unchecked(&vcc->stats->rx); + + cell += ATM_CELL_PAYLOAD; + } +@@ -1133,13 +1133,13 @@ dequeue_rx(struct idt77252_dev *card, st + "(CDC: %08x)\n", + card->name, len, rpp->len, readl(SAR_REG_CDC)); + recycle_rx_pool_skb(card, rpp); +- atomic_inc(&vcc->stats->rx_err); ++ atomic_inc_unchecked(&vcc->stats->rx_err); + return; + } + if (stat & SAR_RSQE_CRC) { + RXPRINTK("%s: AAL5 CRC error.\n", card->name); + recycle_rx_pool_skb(card, rpp); +- atomic_inc(&vcc->stats->rx_err); ++ atomic_inc_unchecked(&vcc->stats->rx_err); + return; + } + if (skb_queue_len(&rpp->queue) > 1) { +@@ -1150,7 +1150,7 @@ dequeue_rx(struct idt77252_dev *card, st + RXPRINTK("%s: Can't alloc RX skb.\n", + card->name); + recycle_rx_pool_skb(card, rpp); +- atomic_inc(&vcc->stats->rx_err); ++ atomic_inc_unchecked(&vcc->stats->rx_err); + return; + } + if (!atm_charge(vcc, skb->truesize)) { +@@ -1169,7 +1169,7 @@ dequeue_rx(struct idt77252_dev *card, st + __net_timestamp(skb); + + vcc->push(vcc, skb); +- atomic_inc(&vcc->stats->rx); ++ atomic_inc_unchecked(&vcc->stats->rx); + + return; + } +@@ -1191,7 +1191,7 @@ dequeue_rx(struct idt77252_dev *card, st + __net_timestamp(skb); + + vcc->push(vcc, skb); +- atomic_inc(&vcc->stats->rx); ++ atomic_inc_unchecked(&vcc->stats->rx); + + if (skb->truesize > SAR_FB_SIZE_3) + add_rx_skb(card, 3, SAR_FB_SIZE_3, 1); +@@ -1303,14 +1303,14 @@ idt77252_rx_raw(struct idt77252_dev *car + if (vcc->qos.aal != ATM_AAL0) { + RPRINTK("%s: raw cell for non AAL0 vc %u.%u\n", + card->name, vpi, vci); +- atomic_inc(&vcc->stats->rx_drop); ++ atomic_inc_unchecked(&vcc->stats->rx_drop); + goto drop; + } + + if ((sb = dev_alloc_skb(64)) == NULL) { + printk("%s: Can't allocate buffers for AAL0.\n", + card->name); +- atomic_inc(&vcc->stats->rx_err); ++ atomic_inc_unchecked(&vcc->stats->rx_err); + goto drop; + } + +@@ -1329,7 +1329,7 @@ idt77252_rx_raw(struct idt77252_dev *car + ATM_SKB(sb)->vcc = vcc; + __net_timestamp(sb); + vcc->push(vcc, sb); +- atomic_inc(&vcc->stats->rx); ++ atomic_inc_unchecked(&vcc->stats->rx); + + drop: + skb_pull(queue, 64); +@@ -1954,13 +1954,13 @@ idt77252_send_skb(struct atm_vcc *vcc, s + + if (vc == NULL) { + printk("%s: NULL connection in send().\n", card->name); +- atomic_inc(&vcc->stats->tx_err); ++ atomic_inc_unchecked(&vcc->stats->tx_err); + dev_kfree_skb(skb); + return -EINVAL; + } + if (!test_bit(VCF_TX, &vc->flags)) { + printk("%s: Trying to transmit on a non-tx VC.\n", card->name); +- atomic_inc(&vcc->stats->tx_err); ++ atomic_inc_unchecked(&vcc->stats->tx_err); + dev_kfree_skb(skb); + return -EINVAL; + } +@@ -1972,14 +1972,14 @@ idt77252_send_skb(struct atm_vcc *vcc, s + break; + default: + printk("%s: Unsupported AAL: %d\n", card->name, vcc->qos.aal); +- atomic_inc(&vcc->stats->tx_err); ++ atomic_inc_unchecked(&vcc->stats->tx_err); + dev_kfree_skb(skb); + return -EINVAL; + } + + if (skb_shinfo(skb)->nr_frags != 0) { + printk("%s: No scatter-gather yet.\n", card->name); +- atomic_inc(&vcc->stats->tx_err); ++ atomic_inc_unchecked(&vcc->stats->tx_err); + dev_kfree_skb(skb); + return -EINVAL; + } +@@ -1987,7 +1987,7 @@ idt77252_send_skb(struct atm_vcc *vcc, s + + err = queue_skb(card, vc, skb, oam); + if (err) { +- atomic_inc(&vcc->stats->tx_err); ++ atomic_inc_unchecked(&vcc->stats->tx_err); + dev_kfree_skb(skb); + return err; + } +@@ -2010,7 +2010,7 @@ idt77252_send_oam(struct atm_vcc *vcc, v + skb = dev_alloc_skb(64); + if (!skb) { + printk("%s: Out of memory in send_oam().\n", card->name); +- atomic_inc(&vcc->stats->tx_err); ++ atomic_inc_unchecked(&vcc->stats->tx_err); + return -ENOMEM; + } + atomic_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); +diff -urNp a/drivers/atm/iphase.c b/drivers/atm/iphase.c +--- a/drivers/atm/iphase.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/atm/iphase.c 2009-05-24 18:10:25.102209151 -0700 +@@ -1125,7 +1125,7 @@ static int rx_pkt(struct atm_dev *dev) + status = (u_short) (buf_desc_ptr->desc_mode); + if (status & (RX_CER | RX_PTE | RX_OFL)) + { +- atomic_inc(&vcc->stats->rx_err); ++ atomic_inc_unchecked(&vcc->stats->rx_err); + IF_ERR(printk("IA: bad packet, dropping it");) + if (status & RX_CER) { + IF_ERR(printk(" cause: packet CRC error\n");) +@@ -1148,7 +1148,7 @@ static int rx_pkt(struct atm_dev *dev) + len = dma_addr - buf_addr; + if (len > iadev->rx_buf_sz) { + printk("Over %d bytes sdu received, dropped!!!\n", iadev->rx_buf_sz); +- atomic_inc(&vcc->stats->rx_err); ++ atomic_inc_unchecked(&vcc->stats->rx_err); + goto out_free_desc; + } + +@@ -1298,7 +1298,7 @@ static void rx_dle_intr(struct atm_dev * + ia_vcc = INPH_IA_VCC(vcc); + if (ia_vcc == NULL) + { +- atomic_inc(&vcc->stats->rx_err); ++ atomic_inc_unchecked(&vcc->stats->rx_err); + dev_kfree_skb_any(skb); + atm_return(vcc, atm_guess_pdu2truesize(len)); + goto INCR_DLE; +@@ -1310,7 +1310,7 @@ static void rx_dle_intr(struct atm_dev * + if ((length > iadev->rx_buf_sz) || (length > + (skb->len - sizeof(struct cpcs_trailer)))) + { +- atomic_inc(&vcc->stats->rx_err); ++ atomic_inc_unchecked(&vcc->stats->rx_err); + IF_ERR(printk("rx_dle_intr: Bad AAL5 trailer %d (skb len %d)", + length, skb->len);) + dev_kfree_skb_any(skb); +@@ -1326,7 +1326,7 @@ static void rx_dle_intr(struct atm_dev * + + IF_RX(printk("rx_dle_intr: skb push");) + vcc->push(vcc,skb); +- atomic_inc(&vcc->stats->rx); ++ atomic_inc_unchecked(&vcc->stats->rx); + iadev->rx_pkt_cnt++; + } + INCR_DLE: +@@ -2921,7 +2921,7 @@ static int ia_pkt_tx (struct atm_vcc *vc + if ((desc == 0) || (desc > iadev->num_tx_desc)) + { + IF_ERR(printk(DEV_LABEL "invalid desc for send: %d\n", desc);) +- atomic_inc(&vcc->stats->tx); ++ atomic_inc_unchecked(&vcc->stats->tx); + if (vcc->pop) + vcc->pop(vcc, skb); + else +@@ -3026,7 +3026,7 @@ static int ia_pkt_tx (struct atm_vcc *vc + ATM_DESC(skb) = vcc->vci; + skb_queue_tail(&iadev->tx_dma_q, skb); + +- atomic_inc(&vcc->stats->tx); ++ atomic_inc_unchecked(&vcc->stats->tx); + iadev->tx_pkt_cnt++; + /* Increment transaction counter */ + writel(2, iadev->dma+IPHASE5575_TX_COUNTER); +diff -urNp a/drivers/atm/lanai.c b/drivers/atm/lanai.c +--- a/drivers/atm/lanai.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/atm/lanai.c 2009-05-24 18:10:25.103038389 -0700 +@@ -1305,7 +1305,7 @@ static void lanai_send_one_aal5(struct l + vcc_tx_add_aal5_trailer(lvcc, skb->len, 0, 0); + lanai_endtx(lanai, lvcc); + lanai_free_skb(lvcc->tx.atmvcc, skb); +- atomic_inc(&lvcc->tx.atmvcc->stats->tx); ++ atomic_inc_unchecked(&lvcc->tx.atmvcc->stats->tx); + } + + /* Try to fill the buffer - don't call unless there is backlog */ +@@ -1428,7 +1428,7 @@ static void vcc_rx_aal5(struct lanai_vcc + ATM_SKB(skb)->vcc = lvcc->rx.atmvcc; + __net_timestamp(skb); + lvcc->rx.atmvcc->push(lvcc->rx.atmvcc, skb); +- atomic_inc(&lvcc->rx.atmvcc->stats->rx); ++ atomic_inc_unchecked(&lvcc->rx.atmvcc->stats->rx); + out: + lvcc->rx.buf.ptr = end; + cardvcc_write(lvcc, endptr, vcc_rxreadptr); +@@ -1670,7 +1670,7 @@ static int handle_service(struct lanai_d + DPRINTK("(itf %d) got RX service entry 0x%X for non-AAL5 " + "vcc %d\n", lanai->number, (unsigned int) s, vci); + lanai->stats.service_rxnotaal5++; +- atomic_inc(&lvcc->rx.atmvcc->stats->rx_err); ++ atomic_inc_unchecked(&lvcc->rx.atmvcc->stats->rx_err); + return 0; + } + if (likely(!(s & (SERVICE_TRASH | SERVICE_STREAM | SERVICE_CRCERR)))) { +@@ -1682,7 +1682,7 @@ static int handle_service(struct lanai_d + int bytes; + read_unlock(&vcc_sklist_lock); + DPRINTK("got trashed rx pdu on vci %d\n", vci); +- atomic_inc(&lvcc->rx.atmvcc->stats->rx_err); ++ atomic_inc_unchecked(&lvcc->rx.atmvcc->stats->rx_err); + lvcc->stats.x.aal5.service_trash++; + bytes = (SERVICE_GET_END(s) * 16) - + (((unsigned long) lvcc->rx.buf.ptr) - +@@ -1694,7 +1694,7 @@ static int handle_service(struct lanai_d + } + if (s & SERVICE_STREAM) { + read_unlock(&vcc_sklist_lock); +- atomic_inc(&lvcc->rx.atmvcc->stats->rx_err); ++ atomic_inc_unchecked(&lvcc->rx.atmvcc->stats->rx_err); + lvcc->stats.x.aal5.service_stream++; + printk(KERN_ERR DEV_LABEL "(itf %d): Got AAL5 stream " + "PDU on VCI %d!\n", lanai->number, vci); +@@ -1702,7 +1702,7 @@ static int handle_service(struct lanai_d + return 0; + } + DPRINTK("got rx crc error on vci %d\n", vci); +- atomic_inc(&lvcc->rx.atmvcc->stats->rx_err); ++ atomic_inc_unchecked(&lvcc->rx.atmvcc->stats->rx_err); + lvcc->stats.x.aal5.service_rxcrc++; + lvcc->rx.buf.ptr = &lvcc->rx.buf.start[SERVICE_GET_END(s) * 4]; + cardvcc_write(lvcc, SERVICE_GET_END(s), vcc_rxreadptr); +diff -urNp a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c +--- a/drivers/atm/nicstar.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/atm/nicstar.c 2009-05-24 18:10:25.105209116 -0700 +@@ -1723,7 +1723,7 @@ static int ns_send(struct atm_vcc *vcc, + if ((vc = (vc_map *) vcc->dev_data) == NULL) + { + printk("nicstar%d: vcc->dev_data == NULL on ns_send().\n", card->index); +- atomic_inc(&vcc->stats->tx_err); ++ atomic_inc_unchecked(&vcc->stats->tx_err); + dev_kfree_skb_any(skb); + return -EINVAL; + } +@@ -1731,7 +1731,7 @@ static int ns_send(struct atm_vcc *vcc, + if (!vc->tx) + { + printk("nicstar%d: Trying to transmit on a non-tx VC.\n", card->index); +- atomic_inc(&vcc->stats->tx_err); ++ atomic_inc_unchecked(&vcc->stats->tx_err); + dev_kfree_skb_any(skb); + return -EINVAL; + } +@@ -1739,7 +1739,7 @@ static int ns_send(struct atm_vcc *vcc, + if (vcc->qos.aal != ATM_AAL5 && vcc->qos.aal != ATM_AAL0) + { + printk("nicstar%d: Only AAL0 and AAL5 are supported.\n", card->index); +- atomic_inc(&vcc->stats->tx_err); ++ atomic_inc_unchecked(&vcc->stats->tx_err); + dev_kfree_skb_any(skb); + return -EINVAL; + } +@@ -1747,7 +1747,7 @@ static int ns_send(struct atm_vcc *vcc, + if (skb_shinfo(skb)->nr_frags != 0) + { + printk("nicstar%d: No scatter-gather yet.\n", card->index); +- atomic_inc(&vcc->stats->tx_err); ++ atomic_inc_unchecked(&vcc->stats->tx_err); + dev_kfree_skb_any(skb); + return -EINVAL; + } +@@ -1792,11 +1792,11 @@ static int ns_send(struct atm_vcc *vcc, + + if (push_scqe(card, vc, scq, &scqe, skb) != 0) + { +- atomic_inc(&vcc->stats->tx_err); ++ atomic_inc_unchecked(&vcc->stats->tx_err); + dev_kfree_skb_any(skb); + return -EIO; + } +- atomic_inc(&vcc->stats->tx); ++ atomic_inc_unchecked(&vcc->stats->tx); + + return 0; + } +@@ -2111,14 +2111,14 @@ static void dequeue_rx(ns_dev *card, ns_ + { + printk("nicstar%d: Can't allocate buffers for aal0.\n", + card->index); +- atomic_add(i,&vcc->stats->rx_drop); ++ atomic_add_unchecked(i,&vcc->stats->rx_drop); + break; + } + if (!atm_charge(vcc, sb->truesize)) + { + RXPRINTK("nicstar%d: atm_charge() dropped aal0 packets.\n", + card->index); +- atomic_add(i-1,&vcc->stats->rx_drop); /* already increased by 1 */ ++ atomic_add_unchecked(i-1,&vcc->stats->rx_drop); /* already increased by 1 */ + dev_kfree_skb_any(sb); + break; + } +@@ -2133,7 +2133,7 @@ static void dequeue_rx(ns_dev *card, ns_ + ATM_SKB(sb)->vcc = vcc; + __net_timestamp(sb); + vcc->push(vcc, sb); +- atomic_inc(&vcc->stats->rx); ++ atomic_inc_unchecked(&vcc->stats->rx); + cell += ATM_CELL_PAYLOAD; + } + +@@ -2152,7 +2152,7 @@ static void dequeue_rx(ns_dev *card, ns_ + if (iovb == NULL) + { + printk("nicstar%d: Out of iovec buffers.\n", card->index); +- atomic_inc(&vcc->stats->rx_drop); ++ atomic_inc_unchecked(&vcc->stats->rx_drop); + recycle_rx_buf(card, skb); + return; + } +@@ -2182,7 +2182,7 @@ static void dequeue_rx(ns_dev *card, ns_ + else if (NS_SKB(iovb)->iovcnt >= NS_MAX_IOVECS) + { + printk("nicstar%d: received too big AAL5 SDU.\n", card->index); +- atomic_inc(&vcc->stats->rx_err); ++ atomic_inc_unchecked(&vcc->stats->rx_err); + recycle_iovec_rx_bufs(card, (struct iovec *) iovb->data, NS_MAX_IOVECS); + NS_SKB(iovb)->iovcnt = 0; + iovb->len = 0; +@@ -2202,7 +2202,7 @@ static void dequeue_rx(ns_dev *card, ns_ + printk("nicstar%d: Expected a small buffer, and this is not one.\n", + card->index); + which_list(card, skb); +- atomic_inc(&vcc->stats->rx_err); ++ atomic_inc_unchecked(&vcc->stats->rx_err); + recycle_rx_buf(card, skb); + vc->rx_iov = NULL; + recycle_iov_buf(card, iovb); +@@ -2216,7 +2216,7 @@ static void dequeue_rx(ns_dev *card, ns_ + printk("nicstar%d: Expected a large buffer, and this is not one.\n", + card->index); + which_list(card, skb); +- atomic_inc(&vcc->stats->rx_err); ++ atomic_inc_unchecked(&vcc->stats->rx_err); + recycle_iovec_rx_bufs(card, (struct iovec *) iovb->data, + NS_SKB(iovb)->iovcnt); + vc->rx_iov = NULL; +@@ -2240,7 +2240,7 @@ static void dequeue_rx(ns_dev *card, ns_ + printk(" - PDU size mismatch.\n"); + else + printk(".\n"); +- atomic_inc(&vcc->stats->rx_err); ++ atomic_inc_unchecked(&vcc->stats->rx_err); + recycle_iovec_rx_bufs(card, (struct iovec *) iovb->data, + NS_SKB(iovb)->iovcnt); + vc->rx_iov = NULL; +@@ -2256,7 +2256,7 @@ static void dequeue_rx(ns_dev *card, ns_ + if (!atm_charge(vcc, skb->truesize)) + { + push_rxbufs(card, skb); +- atomic_inc(&vcc->stats->rx_drop); ++ atomic_inc_unchecked(&vcc->stats->rx_drop); + } + else + { +@@ -2268,7 +2268,7 @@ static void dequeue_rx(ns_dev *card, ns_ + ATM_SKB(skb)->vcc = vcc; + __net_timestamp(skb); + vcc->push(vcc, skb); +- atomic_inc(&vcc->stats->rx); ++ atomic_inc_unchecked(&vcc->stats->rx); + } + } + else if (NS_SKB(iovb)->iovcnt == 2) /* One small plus one large buffer */ +@@ -2283,7 +2283,7 @@ static void dequeue_rx(ns_dev *card, ns_ + if (!atm_charge(vcc, sb->truesize)) + { + push_rxbufs(card, sb); +- atomic_inc(&vcc->stats->rx_drop); ++ atomic_inc_unchecked(&vcc->stats->rx_drop); + } + else + { +@@ -2295,7 +2295,7 @@ static void dequeue_rx(ns_dev *card, ns_ + ATM_SKB(sb)->vcc = vcc; + __net_timestamp(sb); + vcc->push(vcc, sb); +- atomic_inc(&vcc->stats->rx); ++ atomic_inc_unchecked(&vcc->stats->rx); + } + + push_rxbufs(card, skb); +@@ -2306,7 +2306,7 @@ static void dequeue_rx(ns_dev *card, ns_ + if (!atm_charge(vcc, skb->truesize)) + { + push_rxbufs(card, skb); +- atomic_inc(&vcc->stats->rx_drop); ++ atomic_inc_unchecked(&vcc->stats->rx_drop); + } + else + { +@@ -2320,7 +2320,7 @@ static void dequeue_rx(ns_dev *card, ns_ + ATM_SKB(skb)->vcc = vcc; + __net_timestamp(skb); + vcc->push(vcc, skb); +- atomic_inc(&vcc->stats->rx); ++ atomic_inc_unchecked(&vcc->stats->rx); + } + + push_rxbufs(card, sb); +@@ -2342,7 +2342,7 @@ static void dequeue_rx(ns_dev *card, ns_ + if (hb == NULL) + { + printk("nicstar%d: Out of huge buffers.\n", card->index); +- atomic_inc(&vcc->stats->rx_drop); ++ atomic_inc_unchecked(&vcc->stats->rx_drop); + recycle_iovec_rx_bufs(card, (struct iovec *) iovb->data, + NS_SKB(iovb)->iovcnt); + vc->rx_iov = NULL; +@@ -2393,7 +2393,7 @@ static void dequeue_rx(ns_dev *card, ns_ + } + else + dev_kfree_skb_any(hb); +- atomic_inc(&vcc->stats->rx_drop); ++ atomic_inc_unchecked(&vcc->stats->rx_drop); + } + else + { +@@ -2427,7 +2427,7 @@ static void dequeue_rx(ns_dev *card, ns_ + #endif /* NS_USE_DESTRUCTORS */ + __net_timestamp(hb); + vcc->push(vcc, hb); +- atomic_inc(&vcc->stats->rx); ++ atomic_inc_unchecked(&vcc->stats->rx); + } + } + +diff -urNp a/drivers/atm/suni.c b/drivers/atm/suni.c +--- a/drivers/atm/suni.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/atm/suni.c 2009-05-24 18:10:25.106209664 -0700 +@@ -49,7 +49,7 @@ static DEFINE_SPINLOCK(sunis_lock); + + + #define ADD_LIMITED(s,v) \ +- atomic_add((v),&stats->s); \ ++ atomic_add_unchecked((v),&stats->s); \ + if (atomic_read(&stats->s) < 0) atomic_set(&stats->s,INT_MAX); + + +diff -urNp a/drivers/atm/uPD98402.c b/drivers/atm/uPD98402.c +--- a/drivers/atm/uPD98402.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/atm/uPD98402.c 2009-05-24 18:10:25.107038273 -0700 +@@ -41,7 +41,7 @@ static int fetch_stats(struct atm_dev *d + struct sonet_stats tmp; + int error = 0; + +- atomic_add(GET(HECCT),&PRIV(dev)->sonet_stats.uncorr_hcs); ++ atomic_add_unchecked(GET(HECCT),&PRIV(dev)->sonet_stats.uncorr_hcs); + sonet_copy_stats(&PRIV(dev)->sonet_stats,&tmp); + if (arg) error = copy_to_user(arg,&tmp,sizeof(tmp)); + if (zero && !error) { +@@ -160,7 +160,7 @@ static int uPD98402_ioctl(struct atm_dev + + + #define ADD_LIMITED(s,v) \ +- { atomic_add(GET(v),&PRIV(dev)->sonet_stats.s); \ ++ { atomic_add_unchecked(GET(v),&PRIV(dev)->sonet_stats.s); \ + if (atomic_read(&PRIV(dev)->sonet_stats.s) < 0) \ + atomic_set(&PRIV(dev)->sonet_stats.s,INT_MAX); } + +@@ -193,7 +193,7 @@ static void uPD98402_int(struct atm_dev + if (reason & uPD98402_INT_PFM) stat_event(dev); + if (reason & uPD98402_INT_PCO) { + (void) GET(PCOCR); /* clear interrupt cause */ +- atomic_add(GET(HECCT), ++ atomic_add_unchecked(GET(HECCT), + &PRIV(dev)->sonet_stats.uncorr_hcs); + } + if ((reason & uPD98402_INT_RFO) && +diff -urNp a/drivers/atm/zatm.c b/drivers/atm/zatm.c +--- a/drivers/atm/zatm.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/atm/zatm.c 2009-05-24 18:10:25.108209641 -0700 +@@ -458,7 +458,7 @@ printk("dummy: 0x%08lx, 0x%08lx\n",dummy + } + if (!size) { + dev_kfree_skb_irq(skb); +- if (vcc) atomic_inc(&vcc->stats->rx_err); ++ if (vcc) atomic_inc_unchecked(&vcc->stats->rx_err); + continue; + } + if (!atm_charge(vcc,skb->truesize)) { +@@ -468,7 +468,7 @@ printk("dummy: 0x%08lx, 0x%08lx\n",dummy + skb->len = size; + ATM_SKB(skb)->vcc = vcc; + vcc->push(vcc,skb); +- atomic_inc(&vcc->stats->rx); ++ atomic_inc_unchecked(&vcc->stats->rx); + } + zout(pos & 0xffff,MTA(mbx)); + #if 0 /* probably a stupid idea */ +@@ -732,7 +732,7 @@ if (*ZATM_PRV_DSC(skb) != (uPD98401_TXPD + skb_queue_head(&zatm_vcc->backlog,skb); + break; + } +- atomic_inc(&vcc->stats->tx); ++ atomic_inc_unchecked(&vcc->stats->tx); + wake_up(&zatm_vcc->tx_wait); + } + +diff -urNp a/drivers/char/agp/frontend.c b/drivers/char/agp/frontend.c +--- a/drivers/char/agp/frontend.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/char/agp/frontend.c 2009-05-24 18:10:25.109209419 -0700 +@@ -824,7 +824,7 @@ static int agpioc_reserve_wrap(struct ag + if (copy_from_user(&reserve, arg, sizeof(struct agp_region))) + return -EFAULT; + +- if ((unsigned) reserve.seg_count >= ~0U/sizeof(struct agp_segment)) ++ if ((unsigned) reserve.seg_count >= ~0U/sizeof(struct agp_segment_priv)) + return -EFAULT; + + client = agp_find_client_by_pid(reserve.pid); +diff -urNp a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c +--- a/drivers/char/agp/intel-agp.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/char/agp/intel-agp.c 2009-05-24 18:10:25.111210654 -0700 +@@ -2369,7 +2369,7 @@ static struct pci_device_id agp_intel_pc + ID(PCI_DEVICE_ID_INTEL_Q45_HB), + ID(PCI_DEVICE_ID_INTEL_G45_HB), + ID(PCI_DEVICE_ID_INTEL_G41_HB), +- { } ++ { 0, 0, 0, 0, 0, 0, 0 } + }; + + MODULE_DEVICE_TABLE(pci, agp_intel_pci_table); +diff -urNp a/drivers/char/hpet.c b/drivers/char/hpet.c +--- a/drivers/char/hpet.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/char/hpet.c 2009-05-24 18:10:25.111210654 -0700 +@@ -975,7 +975,7 @@ static struct acpi_driver hpet_acpi_driv + }, + }; + +-static struct miscdevice hpet_misc = { HPET_MINOR, "hpet", &hpet_fops }; ++static struct miscdevice hpet_misc = { HPET_MINOR, "hpet", &hpet_fops, {NULL, NULL}, NULL, NULL }; + + static int __init hpet_init(void) + { +diff -urNp a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c +--- a/drivers/char/ipmi/ipmi_msghandler.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/char/ipmi/ipmi_msghandler.c 2009-05-24 18:10:25.113209304 -0700 +@@ -441,7 +441,7 @@ static DEFINE_MUTEX(smi_watchers_mutex); + + + #define ipmi_inc_stat(intf, stat) \ +- atomic_inc(&(intf)->stats[IPMI_STAT_ ## stat]) ++ atomic_inc_unchecked(&(intf)->stats[IPMI_STAT_ ## stat]) + #define ipmi_get_stat(intf, stat) \ + ((unsigned int) atomic_read(&(intf)->stats[IPMI_STAT_ ## stat])) + +diff -urNp a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c +--- a/drivers/char/ipmi/ipmi_si_intf.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/char/ipmi/ipmi_si_intf.c 2009-05-24 18:10:25.115209351 -0700 +@@ -288,7 +288,7 @@ struct smi_info { + }; + + #define smi_inc_stat(smi, stat) \ +- atomic_inc(&(smi)->stats[SI_STAT_ ## stat]) ++ atomic_inc_unchecked(&(smi)->stats[SI_STAT_ ## stat]) + #define smi_get_stat(smi, stat) \ + ((unsigned int) atomic_read(&(smi)->stats[SI_STAT_ ## stat])) + +diff -urNp a/drivers/char/keyboard.c b/drivers/char/keyboard.c +--- a/drivers/char/keyboard.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/char/keyboard.c 2009-05-24 18:10:25.116209619 -0700 +@@ -635,6 +635,16 @@ static void k_spec(struct vc_data *vc, u + kbd->kbdmode == VC_MEDIUMRAW) && + value != KVAL(K_SAK)) + return; /* SAK is allowed even in raw mode */ ++ ++#if defined(CONFIG_GRKERNSEC_PROC) || defined(CONFIG_GRKERNSEC_PROC_MEMMAP) ++ { ++ void *func = fn_handler[value]; ++ if (func == fn_show_state || func == fn_show_ptregs || ++ func == fn_show_mem) ++ return; ++ } ++#endif ++ + fn_handler[value](vc); + } + +@@ -1388,7 +1398,7 @@ static const struct input_device_id kbd_ + .evbit = { BIT_MASK(EV_SND) }, + }, + +- { }, /* Terminating entry */ ++ { 0 }, /* Terminating entry */ + }; + + MODULE_DEVICE_TABLE(input, kbd_ids); +diff -urNp a/drivers/char/mem.c b/drivers/char/mem.c +--- a/drivers/char/mem.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/char/mem.c 2009-05-24 18:10:25.116209619 -0700 +@@ -18,6 +18,7 @@ + #include <linux/raw.h> + #include <linux/tty.h> + #include <linux/capability.h> ++#include <linux/security.h> + #include <linux/ptrace.h> + #include <linux/device.h> + #include <linux/highmem.h> +@@ -35,6 +36,10 @@ + # include <linux/efi.h> + #endif + ++#ifdef CONFIG_GRKERNSEC ++extern struct file_operations grsec_fops; ++#endif ++ + /* + * Architectures vary in how they handle caching for addresses + * outside of main memory. +@@ -192,6 +197,11 @@ static ssize_t write_mem(struct file * f + if (!valid_phys_addr_range(p, count)) + return -EFAULT; + ++#ifdef CONFIG_GRKERNSEC_KMEM ++ gr_handle_mem_write(); ++ return -EPERM; ++#endif ++ + written = 0; + + #ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED +@@ -350,6 +360,11 @@ static int mmap_mem(struct file * file, + &vma->vm_page_prot)) + return -EINVAL; + ++#ifdef CONFIG_GRKERNSEC_KMEM ++ if (gr_handle_mem_mmap(vma->vm_pgoff << PAGE_SHIFT, vma)) ++ return -EPERM; ++#endif ++ + vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff, + size, + vma->vm_page_prot); +@@ -588,6 +603,11 @@ static ssize_t write_kmem(struct file * + ssize_t written; + char * kbuf; /* k-addr because vwrite() takes vmlist_lock rwlock */ + ++#ifdef CONFIG_GRKERNSEC_KMEM ++ gr_handle_kmem_write(); ++ return -EPERM; ++#endif ++ + if (p < (unsigned long) high_memory) { + + wrote = count; +@@ -791,6 +811,16 @@ static loff_t memory_lseek(struct file * + + static int open_port(struct inode * inode, struct file * filp) + { ++#ifdef CONFIG_GRKERNSEC_KMEM ++ gr_handle_open_port(); ++ return -EPERM; ++#endif ++ ++ return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; ++} ++ ++static int open_mem(struct inode * inode, struct file * filp) ++{ + return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; + } + +@@ -798,7 +828,6 @@ static int open_port(struct inode * inod + #define full_lseek null_lseek + #define write_zero write_null + #define read_full read_zero +-#define open_mem open_port + #define open_kmem open_mem + #define open_oldmem open_mem + +@@ -938,6 +967,11 @@ static int memory_open(struct inode * in + filp->f_op = &oldmem_fops; + break; + #endif ++#ifdef CONFIG_GRKERNSEC ++ case 13: ++ filp->f_op = &grsec_fops; ++ break; ++#endif + default: + unlock_kernel(); + return -ENXIO; +@@ -974,6 +1008,9 @@ static const struct { + #ifdef CONFIG_CRASH_DUMP + {12,"oldmem", S_IRUSR | S_IWUSR | S_IRGRP, &oldmem_fops}, + #endif ++#ifdef CONFIG_GRKERNSEC ++ {13,"grsec", S_IRUSR | S_IWUGO, &grsec_fops}, ++#endif + }; + + static struct class *mem_class; +diff -urNp a/drivers/char/nvram.c b/drivers/char/nvram.c +--- a/drivers/char/nvram.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/char/nvram.c 2009-05-24 18:10:25.117209258 -0700 +@@ -433,7 +433,10 @@ static const struct file_operations nvra + static struct miscdevice nvram_dev = { + NVRAM_MINOR, + "nvram", +- &nvram_fops ++ &nvram_fops, ++ {NULL, NULL}, ++ NULL, ++ NULL + }; + + static int __init +diff -urNp a/drivers/char/random.c b/drivers/char/random.c +--- a/drivers/char/random.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/char/random.c 2009-05-24 18:10:25.118209526 -0700 +@@ -249,8 +249,13 @@ + /* + * Configuration information + */ ++#ifdef CONFIG_GRKERNSEC_RANDNET ++#define INPUT_POOL_WORDS 512 ++#define OUTPUT_POOL_WORDS 128 ++#else + #define INPUT_POOL_WORDS 128 + #define OUTPUT_POOL_WORDS 32 ++#endif + #define SEC_XFER_SIZE 512 + + /* +@@ -287,10 +292,17 @@ static struct poolinfo { + int poolwords; + int tap1, tap2, tap3, tap4, tap5; + } poolinfo_table[] = { ++#ifdef CONFIG_GRKERNSEC_RANDNET ++ /* x^512 + x^411 + x^308 + x^208 +x^104 + x + 1 -- 225 */ ++ { 512, 411, 308, 208, 104, 1 }, ++ /* x^128 + x^103 + x^76 + x^51 + x^25 + x + 1 -- 105 */ ++ { 128, 103, 76, 51, 25, 1 }, ++#else + /* x^128 + x^103 + x^76 + x^51 +x^25 + x + 1 -- 105 */ + { 128, 103, 76, 51, 25, 1 }, + /* x^32 + x^26 + x^20 + x^14 + x^7 + x + 1 -- 15 */ + { 32, 26, 20, 14, 7, 1 }, ++#endif + #if 0 + /* x^2048 + x^1638 + x^1231 + x^819 + x^411 + x + 1 -- 115 */ + { 2048, 1638, 1231, 819, 411, 1 }, +@@ -1185,7 +1197,7 @@ EXPORT_SYMBOL(generate_random_uuid); + #include <linux/sysctl.h> + + static int min_read_thresh = 8, min_write_thresh; +-static int max_read_thresh = INPUT_POOL_WORDS * 32; ++static int max_read_thresh = OUTPUT_POOL_WORDS * 32; + static int max_write_thresh = INPUT_POOL_WORDS * 32; + static char sysctl_bootid[16]; + +diff -urNp a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c +--- a/drivers/char/tpm/tpm.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/char/tpm/tpm.c 2009-05-24 18:10:25.120209293 -0700 +@@ -1035,7 +1035,7 @@ ssize_t tpm_write(struct file *file, con + + mutex_lock(&chip->buffer_mutex); + +- if (in_size > TPM_BUFSIZE) ++ if (in_size > (unsigned int)TPM_BUFSIZE) + in_size = TPM_BUFSIZE; + + if (copy_from_user +diff -urNp a/drivers/char/tty_ldisc.c b/drivers/char/tty_ldisc.c +--- a/drivers/char/tty_ldisc.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/char/tty_ldisc.c 2009-05-24 18:10:25.120209293 -0700 +@@ -74,7 +74,7 @@ int tty_register_ldisc(int disc, struct + spin_lock_irqsave(&tty_ldisc_lock, flags); + tty_ldiscs[disc] = new_ldisc; + new_ldisc->num = disc; +- new_ldisc->refcount = 0; ++ atomic_set(&new_ldisc->refcount, 0); + spin_unlock_irqrestore(&tty_ldisc_lock, flags); + + return ret; +@@ -102,7 +102,7 @@ int tty_unregister_ldisc(int disc) + return -EINVAL; + + spin_lock_irqsave(&tty_ldisc_lock, flags); +- if (tty_ldiscs[disc]->refcount) ++ if (atomic_read(&tty_ldiscs[disc]->refcount)) + ret = -EBUSY; + else + tty_ldiscs[disc] = NULL; +@@ -139,7 +139,7 @@ static int tty_ldisc_try_get(int disc, s + err = -EAGAIN; + else { + /* lock it */ +- ldops->refcount++; ++ atomic_inc(&ldops->refcount); + ld->ops = ldops; + err = 0; + } +@@ -196,8 +196,8 @@ static void tty_ldisc_put(struct tty_ldi + + spin_lock_irqsave(&tty_ldisc_lock, flags); + ld = tty_ldiscs[disc]; +- BUG_ON(ld->refcount == 0); +- ld->refcount--; ++ BUG_ON(atomic_read(&ld->refcount) == 0); ++ atomic_dec(&ld->refcount); + module_put(ld->owner); + spin_unlock_irqrestore(&tty_ldisc_lock, flags); + } +@@ -264,7 +264,7 @@ const struct file_operations tty_ldiscs_ + + static void tty_ldisc_assign(struct tty_struct *tty, struct tty_ldisc *ld) + { +- ld->refcount = 0; ++ atomic_set(&ld->refcount, 0); + tty->ldisc = *ld; + } + +@@ -289,7 +289,7 @@ static int tty_ldisc_try(struct tty_stru + spin_lock_irqsave(&tty_ldisc_lock, flags); + ld = &tty->ldisc; + if (test_bit(TTY_LDISC, &tty->flags)) { +- ld->refcount++; ++ atomic_inc(&ld->refcount); + ret = 1; + } + spin_unlock_irqrestore(&tty_ldisc_lock, flags); +@@ -316,7 +316,7 @@ struct tty_ldisc *tty_ldisc_ref_wait(str + { + /* wait_event is a macro */ + wait_event(tty_ldisc_wait, tty_ldisc_try(tty)); +- if (tty->ldisc.refcount == 0) ++ if (atomic_read(&tty->ldisc.refcount) == 0) + printk(KERN_ERR "tty_ldisc_ref_wait\n"); + return &tty->ldisc; + } +@@ -360,11 +360,9 @@ void tty_ldisc_deref(struct tty_ldisc *l + BUG_ON(ld == NULL); + + spin_lock_irqsave(&tty_ldisc_lock, flags); +- if (ld->refcount == 0) ++ if (!atomic_add_unless(&ld->refcount, -1, 0)) + printk(KERN_ERR "tty_ldisc_deref: no references.\n"); +- else +- ld->refcount--; +- if (ld->refcount == 0) ++ if (atomic_read(&ld->refcount) == 0) + wake_up(&tty_ldisc_wait); + spin_unlock_irqrestore(&tty_ldisc_lock, flags); + } +@@ -499,8 +497,8 @@ restart: + */ + + spin_lock_irqsave(&tty_ldisc_lock, flags); +- if (tty->ldisc.refcount || (o_tty && o_tty->ldisc.refcount)) { +- if (tty->ldisc.refcount) { ++ if (atomic_read(&tty->ldisc.refcount) || (o_tty && atomic_read(&o_tty->ldisc.refcount))) { ++ if (atomic_read(&tty->ldisc.refcount)) { + /* Free the new ldisc we grabbed. Must drop the lock + first. */ + spin_unlock_irqrestore(&tty_ldisc_lock, flags); +@@ -512,14 +510,14 @@ restart: + * and retries if we made tty_ldisc_wait() smarter. + * That is up for discussion. + */ +- if (wait_event_interruptible(tty_ldisc_wait, tty->ldisc.refcount == 0) < 0) ++ if (wait_event_interruptible(tty_ldisc_wait, atomic_read(&tty->ldisc.refcount) == 0) < 0) + return -ERESTARTSYS; + goto restart; + } +- if (o_tty && o_tty->ldisc.refcount) { ++ if (o_tty && atomic_read(&o_tty->ldisc.refcount)) { + spin_unlock_irqrestore(&tty_ldisc_lock, flags); + tty_ldisc_put(o_tty->ldisc.ops); +- if (wait_event_interruptible(tty_ldisc_wait, o_tty->ldisc.refcount == 0) < 0) ++ if (wait_event_interruptible(tty_ldisc_wait, atomic_read(&o_tty->ldisc.refcount) == 0) < 0) + return -ERESTARTSYS; + goto restart; + } +@@ -658,9 +656,9 @@ void tty_ldisc_release(struct tty_struct + * side is zero. + */ + spin_lock_irqsave(&tty_ldisc_lock, flags); +- while (tty->ldisc.refcount) { ++ while (atomic_read(&tty->ldisc.refcount)) { + spin_unlock_irqrestore(&tty_ldisc_lock, flags); +- wait_event(tty_ldisc_wait, tty->ldisc.refcount == 0); ++ wait_event(tty_ldisc_wait, atomic_read(&tty->ldisc.refcount) == 0); + spin_lock_irqsave(&tty_ldisc_lock, flags); + } + spin_unlock_irqrestore(&tty_ldisc_lock, flags); +diff -urNp a/drivers/char/vt_ioctl.c b/drivers/char/vt_ioctl.c +--- a/drivers/char/vt_ioctl.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/char/vt_ioctl.c 2009-05-24 18:10:25.121209561 -0700 +@@ -96,6 +96,12 @@ do_kdsk_ioctl(int cmd, struct kbentry __ + case KDSKBENT: + if (!perm) + return -EPERM; ++ ++#ifdef CONFIG_GRKERNSEC ++ if (!capable(CAP_SYS_TTY_CONFIG)) ++ return -EPERM; ++#endif ++ + if (!i && v == K_NOSUCHMAP) { + /* deallocate map */ + key_map = key_maps[s]; +@@ -236,6 +242,13 @@ do_kdgkb_ioctl(int cmd, struct kbsentry + goto reterr; + } + ++#ifdef CONFIG_GRKERNSEC ++ if (!capable(CAP_SYS_TTY_CONFIG)) { ++ ret = -EPERM; ++ goto reterr; ++ } ++#endif ++ + q = func_table[i]; + first_free = funcbufptr + (funcbufsize - funcbufleft); + for (j = i+1; j < MAX_NR_FUNC && !func_table[j]; j++) +diff -urNp a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h +--- a/drivers/edac/edac_core.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/edac/edac_core.h 2009-05-24 18:10:25.122209340 -0700 +@@ -85,11 +85,11 @@ extern int edac_debug_level; + + #else /* !CONFIG_EDAC_DEBUG */ + +-#define debugf0( ... ) +-#define debugf1( ... ) +-#define debugf2( ... ) +-#define debugf3( ... ) +-#define debugf4( ... ) ++#define debugf0( ... ) do {} while (0) ++#define debugf1( ... ) do {} while (0) ++#define debugf2( ... ) do {} while (0) ++#define debugf3( ... ) do {} while (0) ++#define debugf4( ... ) do {} while (0) + + #endif /* !CONFIG_EDAC_DEBUG */ + +diff -urNp a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c +--- a/drivers/firmware/dmi_scan.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/firmware/dmi_scan.c 2009-05-24 18:10:25.123209259 -0700 +@@ -389,11 +389,6 @@ void __init dmi_scan_machine(void) + } + } + else { +- /* +- * no iounmap() for that ioremap(); it would be a no-op, but +- * it's so early in setup that sucker gets confused into doing +- * what it shouldn't if we actually call it. +- */ + p = dmi_ioremap(0xF0000, 0x10000); + if (p == NULL) + goto error; +diff -urNp a/drivers/hwmon/fscpos.c b/drivers/hwmon/fscpos.c +--- a/drivers/hwmon/fscpos.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/hwmon/fscpos.c 2009-05-24 18:10:25.124210644 -0700 +@@ -240,7 +240,6 @@ static ssize_t set_pwm(struct i2c_client + unsigned long v = simple_strtoul(buf, NULL, 10); + + /* Range: 0..255 */ +- if (v < 0) v = 0; + if (v > 255) v = 255; + + mutex_lock(&data->update_lock); +diff -urNp a/drivers/hwmon/k8temp.c b/drivers/hwmon/k8temp.c +--- a/drivers/hwmon/k8temp.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/hwmon/k8temp.c 2009-05-24 18:10:25.124210644 -0700 +@@ -130,7 +130,7 @@ static DEVICE_ATTR(name, S_IRUGO, show_n + + static struct pci_device_id k8temp_ids[] = { + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, +- { 0 }, ++ { 0, 0, 0, 0, 0, 0, 0 }, + }; + + MODULE_DEVICE_TABLE(pci, k8temp_ids); +diff -urNp a/drivers/hwmon/sis5595.c b/drivers/hwmon/sis5595.c +--- a/drivers/hwmon/sis5595.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/hwmon/sis5595.c 2009-05-24 18:10:25.125210633 -0700 +@@ -698,7 +698,7 @@ static struct sis5595_data *sis5595_upda + + static struct pci_device_id sis5595_pci_ids[] = { + { PCI_DEVICE(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_503) }, +- { 0, } ++ { 0, 0, 0, 0, 0, 0, 0 } + }; + + MODULE_DEVICE_TABLE(pci, sis5595_pci_ids); +diff -urNp a/drivers/hwmon/via686a.c b/drivers/hwmon/via686a.c +--- a/drivers/hwmon/via686a.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/hwmon/via686a.c 2009-05-24 18:10:25.126209224 -0700 +@@ -768,7 +768,7 @@ static struct via686a_data *via686a_upda + + static struct pci_device_id via686a_pci_ids[] = { + { PCI_DEVICE(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686_4) }, +- { 0, } ++ { 0, 0, 0, 0, 0, 0, 0 } + }; + + MODULE_DEVICE_TABLE(pci, via686a_pci_ids); +diff -urNp a/drivers/hwmon/vt8231.c b/drivers/hwmon/vt8231.c +--- a/drivers/hwmon/vt8231.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/hwmon/vt8231.c 2009-05-24 18:10:25.126209224 -0700 +@@ -698,7 +698,7 @@ static struct platform_driver vt8231_dri + + static struct pci_device_id vt8231_pci_ids[] = { + { PCI_DEVICE(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8231_4) }, +- { 0, } ++ { 0, 0, 0, 0, 0, 0, 0 } + }; + + MODULE_DEVICE_TABLE(pci, vt8231_pci_ids); +diff -urNp a/drivers/hwmon/w83791d.c b/drivers/hwmon/w83791d.c +--- a/drivers/hwmon/w83791d.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/hwmon/w83791d.c 2009-05-24 18:10:25.127209841 -0700 +@@ -327,8 +327,8 @@ static int w83791d_detect(struct i2c_cli + struct i2c_board_info *info); + static int w83791d_remove(struct i2c_client *client); + +-static int w83791d_read(struct i2c_client *client, u8 register); +-static int w83791d_write(struct i2c_client *client, u8 register, u8 value); ++static int w83791d_read(struct i2c_client *client, u8 reg); ++static int w83791d_write(struct i2c_client *client, u8 reg, u8 value); + static struct w83791d_data *w83791d_update_device(struct device *dev); + + #ifdef DEBUG +diff -urNp a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c +--- a/drivers/i2c/busses/i2c-i801.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/i2c/busses/i2c-i801.c 2009-05-24 18:10:25.128209341 -0700 +@@ -578,7 +578,7 @@ static struct pci_device_id i801_ids[] = + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH10_4) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH10_5) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PCH_SMBUS) }, +- { 0, } ++ { 0, 0, 0, 0, 0, 0, 0 } + }; + + MODULE_DEVICE_TABLE (pci, i801_ids); +diff -urNp a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c +--- a/drivers/i2c/busses/i2c-piix4.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/i2c/busses/i2c-piix4.c 2009-05-24 18:10:25.129209539 -0700 +@@ -123,7 +123,7 @@ static struct dmi_system_id __devinitdat + .ident = "IBM", + .matches = { DMI_MATCH(DMI_SYS_VENDOR, "IBM"), }, + }, +- { }, ++ { NULL, NULL, {DMI_MATCH(DMI_NONE, NULL)}, NULL }, + }; + + static int __devinit piix4_setup(struct pci_dev *PIIX4_dev, +@@ -424,7 +424,7 @@ static struct pci_device_id piix4_ids[] + PCI_DEVICE_ID_SERVERWORKS_CSB6) }, + { PCI_DEVICE(PCI_VENDOR_ID_SERVERWORKS, + PCI_DEVICE_ID_SERVERWORKS_HT1000SB) }, +- { 0, } ++ { 0, 0, 0, 0, 0, 0, 0 } + }; + + MODULE_DEVICE_TABLE (pci, piix4_ids); +diff -urNp a/drivers/i2c/busses/i2c-sis630.c b/drivers/i2c/busses/i2c-sis630.c +--- a/drivers/i2c/busses/i2c-sis630.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/i2c/busses/i2c-sis630.c 2009-05-24 18:10:25.129209539 -0700 +@@ -472,7 +472,7 @@ static struct i2c_adapter sis630_adapter + static struct pci_device_id sis630_ids[] __devinitdata = { + { PCI_DEVICE(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_503) }, + { PCI_DEVICE(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_LPC) }, +- { 0, } ++ { 0, 0, 0, 0, 0, 0, 0 } + }; + + MODULE_DEVICE_TABLE (pci, sis630_ids); +diff -urNp a/drivers/i2c/busses/i2c-sis96x.c b/drivers/i2c/busses/i2c-sis96x.c +--- a/drivers/i2c/busses/i2c-sis96x.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/i2c/busses/i2c-sis96x.c 2009-05-24 18:10:25.130209527 -0700 +@@ -248,7 +248,7 @@ static struct i2c_adapter sis96x_adapter + + static struct pci_device_id sis96x_ids[] = { + { PCI_DEVICE(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_SMBUS) }, +- { 0, } ++ { 0, 0, 0, 0, 0, 0, 0 } + }; + + MODULE_DEVICE_TABLE (pci, sis96x_ids); +diff -urNp a/drivers/ieee1394/dv1394.c b/drivers/ieee1394/dv1394.c +--- a/drivers/ieee1394/dv1394.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/ieee1394/dv1394.c 2009-05-24 18:10:25.131209446 -0700 +@@ -739,7 +739,7 @@ static void frame_prepare(struct video_c + based upon DIF section and sequence + */ + +-static void inline ++static inline void + frame_put_packet (struct frame *f, struct packet *p) + { + int section_type = p->data[0] >> 5; /* section type is in bits 5 - 7 */ +@@ -2177,7 +2177,7 @@ static struct ieee1394_device_id dv1394_ + .specifier_id = AVC_UNIT_SPEC_ID_ENTRY & 0xffffff, + .version = AVC_SW_VERSION_ENTRY & 0xffffff + }, +- { } ++ { 0, 0, 0, 0, 0, 0 } + }; + + MODULE_DEVICE_TABLE(ieee1394, dv1394_id_table); +diff -urNp a/drivers/ieee1394/eth1394.c b/drivers/ieee1394/eth1394.c +--- a/drivers/ieee1394/eth1394.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/ieee1394/eth1394.c 2009-05-24 18:10:25.132209993 -0700 +@@ -451,7 +451,7 @@ static struct ieee1394_device_id eth1394 + .specifier_id = ETHER1394_GASP_SPECIFIER_ID, + .version = ETHER1394_GASP_VERSION, + }, +- {} ++ { 0, 0, 0, 0, 0, 0 } + }; + + MODULE_DEVICE_TABLE(ieee1394, eth1394_id_table); +diff -urNp a/drivers/ieee1394/hosts.c b/drivers/ieee1394/hosts.c +--- a/drivers/ieee1394/hosts.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/ieee1394/hosts.c 2009-05-24 18:10:25.133209144 -0700 +@@ -78,6 +78,7 @@ static int dummy_isoctl(struct hpsb_iso + } + + static struct hpsb_host_driver dummy_driver = { ++ .name = "dummy", + .transmit_packet = dummy_transmit_packet, + .devctl = dummy_devctl, + .isoctl = dummy_isoctl +diff -urNp a/drivers/ieee1394/ohci1394.c b/drivers/ieee1394/ohci1394.c +--- a/drivers/ieee1394/ohci1394.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/ieee1394/ohci1394.c 2009-05-24 18:10:25.134209271 -0700 +@@ -147,9 +147,9 @@ printk(level "%s: " fmt "\n" , OHCI1394_ + printk(level "%s: fw-host%d: " fmt "\n" , OHCI1394_DRIVER_NAME, ohci->host->id , ## args) + + /* Module Parameters */ +-static int phys_dma = 1; ++static int phys_dma; + module_param(phys_dma, int, 0444); +-MODULE_PARM_DESC(phys_dma, "Enable physical DMA (default = 1)."); ++MODULE_PARM_DESC(phys_dma, "Enable physical DMA (default = 0)."); + + static void dma_trm_tasklet(unsigned long data); + static void dma_trm_reset(struct dma_trm_ctx *d); +@@ -3437,7 +3437,7 @@ static struct pci_device_id ohci1394_pci + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + }, +- { 0, }, ++ { 0, 0, 0, 0, 0, 0, 0 }, + }; + + MODULE_DEVICE_TABLE(pci, ohci1394_pci_tbl); +diff -urNp a/drivers/ieee1394/raw1394.c b/drivers/ieee1394/raw1394.c +--- a/drivers/ieee1394/raw1394.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/ieee1394/raw1394.c 2009-05-24 18:10:25.136209528 -0700 +@@ -2995,7 +2995,7 @@ static struct ieee1394_device_id raw1394 + .match_flags = IEEE1394_MATCH_SPECIFIER_ID | IEEE1394_MATCH_VERSION, + .specifier_id = CAMERA_UNIT_SPEC_ID_ENTRY & 0xffffff, + .version = (CAMERA_SW_VERSION_ENTRY + 2) & 0xffffff}, +- {} ++ { 0, 0, 0, 0, 0, 0 } + }; + + MODULE_DEVICE_TABLE(ieee1394, raw1394_id_table); +diff -urNp a/drivers/ieee1394/sbp2.c b/drivers/ieee1394/sbp2.c +--- a/drivers/ieee1394/sbp2.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/ieee1394/sbp2.c 2009-05-24 18:10:25.138209574 -0700 +@@ -290,7 +290,7 @@ static struct ieee1394_device_id sbp2_id + .match_flags = IEEE1394_MATCH_SPECIFIER_ID | IEEE1394_MATCH_VERSION, + .specifier_id = SBP2_UNIT_SPEC_ID_ENTRY & 0xffffff, + .version = SBP2_SW_VERSION_ENTRY & 0xffffff}, +- {} ++ { 0, 0, 0, 0, 0, 0 } + }; + MODULE_DEVICE_TABLE(ieee1394, sbp2_id_table); + +@@ -2110,7 +2110,7 @@ MODULE_DESCRIPTION("IEEE-1394 SBP-2 prot + MODULE_SUPPORTED_DEVICE(SBP2_DEVICE_NAME); + MODULE_LICENSE("GPL"); + +-static int sbp2_module_init(void) ++static int __init sbp2_module_init(void) + { + int ret; + +diff -urNp a/drivers/ieee1394/video1394.c b/drivers/ieee1394/video1394.c +--- a/drivers/ieee1394/video1394.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/ieee1394/video1394.c 2009-05-24 18:10:25.139209284 -0700 +@@ -1310,7 +1310,7 @@ static struct ieee1394_device_id video13 + .specifier_id = CAMERA_UNIT_SPEC_ID_ENTRY & 0xffffff, + .version = (CAMERA_SW_VERSION_ENTRY + 2) & 0xffffff + }, +- { } ++ { 0, 0, 0, 0, 0, 0 } + }; + + MODULE_DEVICE_TABLE(ieee1394, video1394_id_table); +diff -urNp a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c +--- a/drivers/input/keyboard/atkbd.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/input/keyboard/atkbd.c 2009-05-24 18:10:25.140209831 -0700 +@@ -1164,7 +1164,7 @@ static struct serio_device_id atkbd_seri + .id = SERIO_ANY, + .extra = SERIO_ANY, + }, +- { 0 } ++ { 0, 0, 0, 0 } + }; + + MODULE_DEVICE_TABLE(serio, atkbd_serio_ids); +diff -urNp a/drivers/input/mouse/lifebook.c b/drivers/input/mouse/lifebook.c +--- a/drivers/input/mouse/lifebook.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/input/mouse/lifebook.c 2009-05-24 18:10:25.140209831 -0700 +@@ -110,7 +110,7 @@ static const struct dmi_system_id lifebo + DMI_MATCH(DMI_PRODUCT_NAME, "LifeBook B142"), + }, + }, +- { } ++ { NULL, NULL, {DMI_MATCH(DMI_NONE, {0})}, NULL} + }; + + static psmouse_ret_t lifebook_process_byte(struct psmouse *psmouse) +diff -urNp a/drivers/input/mouse/psmouse-base.c b/drivers/input/mouse/psmouse-base.c +--- a/drivers/input/mouse/psmouse-base.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/input/mouse/psmouse-base.c 2009-05-24 18:10:25.141210099 -0700 +@@ -1378,7 +1378,7 @@ static struct serio_device_id psmouse_se + .id = SERIO_ANY, + .extra = SERIO_ANY, + }, +- { 0 } ++ { 0, 0, 0, 0 } + }; + + MODULE_DEVICE_TABLE(serio, psmouse_serio_ids); +diff -urNp a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c +--- a/drivers/input/mouse/synaptics.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/input/mouse/synaptics.c 2009-05-24 18:10:25.142209179 -0700 +@@ -417,7 +417,7 @@ static void synaptics_process_packet(str + break; + case 2: + if (SYN_MODEL_PEN(priv->model_id)) +- ; /* Nothing, treat a pen as a single finger */ ++ break; /* Nothing, treat a pen as a single finger */ + break; + case 4 ... 15: + if (SYN_CAP_PALMDETECT(priv->capabilities)) +@@ -624,7 +624,7 @@ static const struct dmi_system_id toshib + DMI_MATCH(DMI_PRODUCT_NAME, "PORTEGE M300"), + }, + }, +- { } ++ { NULL, NULL, {DMI_MATCH(DMI_NONE, {0})}, NULL } + }; + #endif + +diff -urNp a/drivers/input/mousedev.c b/drivers/input/mousedev.c +--- a/drivers/input/mousedev.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/input/mousedev.c 2009-05-24 18:10:25.143209866 -0700 +@@ -1063,7 +1063,7 @@ static struct input_handler mousedev_han + + #ifdef CONFIG_INPUT_MOUSEDEV_PSAUX + static struct miscdevice psaux_mouse = { +- PSMOUSE_MINOR, "psaux", &mousedev_fops ++ PSMOUSE_MINOR, "psaux", &mousedev_fops, {NULL, NULL}, NULL, NULL + }; + static int psaux_registered; + #endif +diff -urNp a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h +--- a/drivers/input/serio/i8042-x86ia64io.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/input/serio/i8042-x86ia64io.h 2009-05-24 18:10:25.143209866 -0700 +@@ -143,7 +143,7 @@ static struct dmi_system_id __initdata i + DMI_MATCH(DMI_PRODUCT_VERSION, "M606"), + }, + }, +- { } ++ { NULL, NULL, {DMI_MATCH(DMI_NONE, {0})}, NULL } + }; + + /* +@@ -351,7 +351,7 @@ static struct dmi_system_id __initdata i + DMI_MATCH(DMI_PRODUCT_NAME, "HEL80I"), + }, + }, +- { } ++ { NULL, NULL, {DMI_MATCH(DMI_NONE, {0})}, NULL } + }; + + #ifdef CONFIG_PNP +@@ -363,7 +363,7 @@ static struct dmi_system_id __initdata i + DMI_MATCH(DMI_BOARD_VENDOR, "Intel Corporation"), + }, + }, +- { } ++ { NULL, NULL, {DMI_MATCH(DMI_NONE, {0})}, NULL } + }; + #endif + +@@ -430,7 +430,7 @@ static struct dmi_system_id __initdata i + DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 4280"), + }, + }, +- { } ++ { NULL, NULL, {DMI_MATCH(DMI_NONE, {0})}, NULL } + }; + + #endif /* CONFIG_X86 */ +diff -urNp a/drivers/input/serio/serio_raw.c b/drivers/input/serio/serio_raw.c +--- a/drivers/input/serio/serio_raw.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/input/serio/serio_raw.c 2009-05-24 18:10:25.144213695 -0700 +@@ -378,7 +378,7 @@ static struct serio_device_id serio_raw_ + .id = SERIO_ANY, + .extra = SERIO_ANY, + }, +- { 0 } ++ { 0, 0, 0, 0 } + }; + + MODULE_DEVICE_TABLE(serio, serio_raw_serio_ids); +diff -urNp a/drivers/lguest/core.c b/drivers/lguest/core.c +--- a/drivers/lguest/core.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/lguest/core.c 2009-05-24 18:10:25.144213695 -0700 +@@ -80,9 +80,17 @@ static __init int map_switcher(void) + * (SWITCHER_ADDR). We might not get it in theory, but in practice + * it's worked so far. The end address needs +1 because __get_vm_area + * allocates an extra guard page, so we need space for that. */ ++ ++#if defined(CONFIG_MODULES) && defined(CONFIG_X86_32) && defined(CONFIG_PAX_KERNEXEC) ++ switcher_vma = __get_vm_area(TOTAL_SWITCHER_PAGES * PAGE_SIZE, ++ VM_ALLOC | VM_KERNEXEC, SWITCHER_ADDR, SWITCHER_ADDR ++ + (TOTAL_SWITCHER_PAGES+1) * PAGE_SIZE); ++#else + switcher_vma = __get_vm_area(TOTAL_SWITCHER_PAGES * PAGE_SIZE, + VM_ALLOC, SWITCHER_ADDR, SWITCHER_ADDR + + (TOTAL_SWITCHER_PAGES+1) * PAGE_SIZE); ++#endif ++ + if (!switcher_vma) { + err = -ENOMEM; + printk("lguest: could not map switcher pages high\n"); +diff -urNp a/drivers/md/bitmap.c b/drivers/md/bitmap.c +--- a/drivers/md/bitmap.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/md/bitmap.c 2009-05-24 18:10:25.145209354 -0700 +@@ -57,7 +57,7 @@ + # if DEBUG > 0 + # define PRINTK(x...) printk(KERN_DEBUG x) + # else +-# define PRINTK(x...) ++# define PRINTK(x...) do {} while (0) + # endif + #endif + +diff -urNp a/drivers/mtd/devices/doc2000.c b/drivers/mtd/devices/doc2000.c +--- a/drivers/mtd/devices/doc2000.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/mtd/devices/doc2000.c 2009-05-24 18:10:25.146209273 -0700 +@@ -777,7 +777,7 @@ static int doc_write(struct mtd_info *mt + + /* The ECC will not be calculated correctly if less than 512 is written */ + /* DBB- +- if (len != 0x200 && eccbuf) ++ if (len != 0x200) + printk(KERN_WARNING + "ECC needs a full sector write (adr: %lx size %lx)\n", + (long) to, (long) len); +diff -urNp a/drivers/mtd/devices/doc2001.c b/drivers/mtd/devices/doc2001.c +--- a/drivers/mtd/devices/doc2001.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/mtd/devices/doc2001.c 2009-05-24 18:10:25.147209262 -0700 +@@ -396,6 +396,8 @@ static int doc_read (struct mtd_info *mt + /* Don't allow read past end of device */ + if (from >= this->totlen) + return -EINVAL; ++ if (!len) ++ return -EINVAL; + + /* Don't allow a single read to cross a 512-byte block boundary */ + if (from + len > ((from | 0x1ff) + 1)) +diff -urNp a/drivers/mtd/devices/slram.c b/drivers/mtd/devices/slram.c +--- a/drivers/mtd/devices/slram.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/mtd/devices/slram.c 2009-05-24 18:10:25.147209262 -0700 +@@ -273,7 +273,7 @@ static int parse_cmdline(char *devname, + } + T("slram: devname=%s, devstart=0x%lx, devlength=0x%lx\n", + devname, devstart, devlength); +- if ((devstart < 0) || (devlength < 0) || (devlength % SLRAM_BLK_SZ != 0)) { ++ if (devlength % SLRAM_BLK_SZ != 0) { + E("slram: Illegal start / length parameter.\n"); + return(-EINVAL); + } +diff -urNp a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c +--- a/drivers/mtd/ubi/build.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/mtd/ubi/build.c 2009-05-24 18:10:25.148209669 -0700 +@@ -1104,7 +1104,7 @@ static int __init bytes_str_to_int(const + unsigned long result; + + result = simple_strtoul(str, &endp, 0); +- if (str == endp || result < 0) { ++ if (str == endp) { + printk(KERN_ERR "UBI error: incorrect bytes count: \"%s\"\n", + str); + return -EINVAL; +diff -urNp a/drivers/net/eepro100.c b/drivers/net/eepro100.c +--- a/drivers/net/eepro100.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/net/eepro100.c 2009-05-24 18:10:25.150209296 -0700 +@@ -47,7 +47,7 @@ static int rxdmacount /* = 0 */; + # define rx_align(skb) skb_reserve((skb), 2) + # define RxFD_ALIGNMENT __attribute__ ((aligned (2), packed)) + #else +-# define rx_align(skb) ++# define rx_align(skb) do {} while (0) + # define RxFD_ALIGNMENT + #endif + +@@ -2334,33 +2334,33 @@ static void __devexit eepro100_remove_on + } + + static struct pci_device_id eepro100_pci_tbl[] = { +- { PCI_VENDOR_ID_INTEL, 0x1229, PCI_ANY_ID, PCI_ANY_ID, }, +- { PCI_VENDOR_ID_INTEL, 0x1209, PCI_ANY_ID, PCI_ANY_ID, }, +- { PCI_VENDOR_ID_INTEL, 0x1029, PCI_ANY_ID, PCI_ANY_ID, }, +- { PCI_VENDOR_ID_INTEL, 0x1030, PCI_ANY_ID, PCI_ANY_ID, }, +- { PCI_VENDOR_ID_INTEL, 0x1031, PCI_ANY_ID, PCI_ANY_ID, }, +- { PCI_VENDOR_ID_INTEL, 0x1032, PCI_ANY_ID, PCI_ANY_ID, }, +- { PCI_VENDOR_ID_INTEL, 0x1033, PCI_ANY_ID, PCI_ANY_ID, }, +- { PCI_VENDOR_ID_INTEL, 0x1034, PCI_ANY_ID, PCI_ANY_ID, }, +- { PCI_VENDOR_ID_INTEL, 0x1035, PCI_ANY_ID, PCI_ANY_ID, }, +- { PCI_VENDOR_ID_INTEL, 0x1036, PCI_ANY_ID, PCI_ANY_ID, }, +- { PCI_VENDOR_ID_INTEL, 0x1037, PCI_ANY_ID, PCI_ANY_ID, }, +- { PCI_VENDOR_ID_INTEL, 0x1038, PCI_ANY_ID, PCI_ANY_ID, }, +- { PCI_VENDOR_ID_INTEL, 0x1039, PCI_ANY_ID, PCI_ANY_ID, }, +- { PCI_VENDOR_ID_INTEL, 0x103A, PCI_ANY_ID, PCI_ANY_ID, }, +- { PCI_VENDOR_ID_INTEL, 0x103B, PCI_ANY_ID, PCI_ANY_ID, }, +- { PCI_VENDOR_ID_INTEL, 0x103C, PCI_ANY_ID, PCI_ANY_ID, }, +- { PCI_VENDOR_ID_INTEL, 0x103D, PCI_ANY_ID, PCI_ANY_ID, }, +- { PCI_VENDOR_ID_INTEL, 0x103E, PCI_ANY_ID, PCI_ANY_ID, }, +- { PCI_VENDOR_ID_INTEL, 0x1050, PCI_ANY_ID, PCI_ANY_ID, }, +- { PCI_VENDOR_ID_INTEL, 0x1059, PCI_ANY_ID, PCI_ANY_ID, }, +- { PCI_VENDOR_ID_INTEL, 0x1227, PCI_ANY_ID, PCI_ANY_ID, }, +- { PCI_VENDOR_ID_INTEL, 0x2449, PCI_ANY_ID, PCI_ANY_ID, }, +- { PCI_VENDOR_ID_INTEL, 0x2459, PCI_ANY_ID, PCI_ANY_ID, }, +- { PCI_VENDOR_ID_INTEL, 0x245D, PCI_ANY_ID, PCI_ANY_ID, }, +- { PCI_VENDOR_ID_INTEL, 0x5200, PCI_ANY_ID, PCI_ANY_ID, }, +- { PCI_VENDOR_ID_INTEL, 0x5201, PCI_ANY_ID, PCI_ANY_ID, }, +- { 0,} ++ { PCI_VENDOR_ID_INTEL, 0x1229, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_INTEL, 0x1209, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_INTEL, 0x1029, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_INTEL, 0x1030, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_INTEL, 0x1031, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_INTEL, 0x1032, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_INTEL, 0x1033, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_INTEL, 0x1034, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_INTEL, 0x1035, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_INTEL, 0x1036, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_INTEL, 0x1037, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_INTEL, 0x1038, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_INTEL, 0x1039, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_INTEL, 0x103A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_INTEL, 0x103B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_INTEL, 0x103C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_INTEL, 0x103D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_INTEL, 0x103E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_INTEL, 0x1050, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_INTEL, 0x1059, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_INTEL, 0x1227, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_INTEL, 0x2449, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_INTEL, 0x2459, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_INTEL, 0x245D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_INTEL, 0x5200, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_INTEL, 0x5201, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { 0, 0, 0, 0, 0, 0, 0 } + }; + MODULE_DEVICE_TABLE(pci, eepro100_pci_tbl); + +diff -urNp a/drivers/net/irda/vlsi_ir.c b/drivers/net/irda/vlsi_ir.c +--- a/drivers/net/irda/vlsi_ir.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/net/irda/vlsi_ir.c 2009-05-24 18:10:25.151209704 -0700 +@@ -906,13 +906,12 @@ static int vlsi_hard_start_xmit(struct s + /* no race - tx-ring already empty */ + vlsi_set_baud(idev, iobase); + netif_wake_queue(ndev); +- } +- else +- ; ++ } else { + /* keep the speed change pending like it would + * for any len>0 packet. tx completion interrupt + * will apply it when the tx ring becomes empty. + */ ++ } + spin_unlock_irqrestore(&idev->lock, flags); + dev_kfree_skb_any(skb); + return 0; +diff -urNp a/drivers/net/pcnet32.c b/drivers/net/pcnet32.c +--- a/drivers/net/pcnet32.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/net/pcnet32.c 2009-05-24 18:10:25.152210042 -0700 +@@ -78,7 +78,7 @@ static int cards_found; + /* + * VLB I/O addresses + */ +-static unsigned int pcnet32_portlist[] __initdata = ++static unsigned int pcnet32_portlist[] __devinitdata = + { 0x300, 0x320, 0x340, 0x360, 0 }; + + static int pcnet32_debug = 0; +diff -urNp a/drivers/net/tg3.h b/drivers/net/tg3.h +--- a/drivers/net/tg3.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/net/tg3.h 2009-05-24 18:10:25.154209320 -0700 +@@ -102,6 +102,7 @@ + #define CHIPREV_ID_5750_A0 0x4000 + #define CHIPREV_ID_5750_A1 0x4001 + #define CHIPREV_ID_5750_A3 0x4003 ++#define CHIPREV_ID_5750_C1 0x4201 + #define CHIPREV_ID_5750_C2 0x4202 + #define CHIPREV_ID_5752_A0_HW 0x5000 + #define CHIPREV_ID_5752_A0 0x6000 +diff -urNp a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c +--- a/drivers/net/wireless/iwlwifi/iwl3945-base.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c 2009-05-24 18:10:25.158209553 -0700 +@@ -785,7 +785,7 @@ static int iwl3945_send_cmd_sync(struct + IWL_ERROR("Error: Response NULL in '%s'\n", + get_cmd_string(cmd->id)); + ret = -EIO; +- goto out; ++ goto cancel; + } + + ret = 0; +diff -urNp a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c +--- a/drivers/oprofile/buffer_sync.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/oprofile/buffer_sync.c 2009-05-24 18:10:25.159209682 -0700 +@@ -361,7 +361,7 @@ static void add_ibs_begin(struct oprofil + if (ibs_cookie == NO_COOKIE) + offset = rip; + if (ibs_cookie == INVALID_COOKIE) { +- atomic_inc(&oprofile_stats.sample_lost_no_mapping); ++ atomic_inc_unchecked(&oprofile_stats.sample_lost_no_mapping); + offset = rip; + } + if (ibs_cookie != last_cookie) { +@@ -408,7 +408,7 @@ static int add_us_sample(struct mm_struc + cookie = lookup_dcookie(mm, s->eip, &offset); + + if (cookie == INVALID_COOKIE) { +- atomic_inc(&oprofile_stats.sample_lost_no_mapping); ++ atomic_inc_unchecked(&oprofile_stats.sample_lost_no_mapping); + return 0; + } + +@@ -436,7 +436,7 @@ add_sample(struct mm_struct *mm, struct + } else if (mm) { + return add_us_sample(mm, s); + } else { +- atomic_inc(&oprofile_stats.sample_lost_no_mm); ++ atomic_inc_unchecked(&oprofile_stats.sample_lost_no_mm); + } + return 0; + } +@@ -619,7 +619,7 @@ void sync_buffer(int cpu) + !add_sample(mm, s, in_kernel)) { + if (state == sb_bt_start) { + state = sb_bt_ignore; +- atomic_inc(&oprofile_stats.bt_lost_no_mapping); ++ atomic_inc_unchecked(&oprofile_stats.bt_lost_no_mapping); + } + } + +diff -urNp a/drivers/oprofile/event_buffer.c b/drivers/oprofile/event_buffer.c +--- a/drivers/oprofile/event_buffer.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/oprofile/event_buffer.c 2009-05-24 18:10:25.160209391 -0700 +@@ -42,7 +42,7 @@ static atomic_t buffer_ready = ATOMIC_IN + void add_event_entry(unsigned long value) + { + if (buffer_pos == buffer_size) { +- atomic_inc(&oprofile_stats.event_lost_overflow); ++ atomic_inc_unchecked(&oprofile_stats.event_lost_overflow); + return; + } + +diff -urNp a/drivers/pci/hotplug/cpqphp_nvram.c b/drivers/pci/hotplug/cpqphp_nvram.c +--- a/drivers/pci/hotplug/cpqphp_nvram.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/pci/hotplug/cpqphp_nvram.c 2009-05-24 18:10:25.160209391 -0700 +@@ -425,9 +425,13 @@ static u32 store_HRT (void __iomem *rom_ + + void compaq_nvram_init (void __iomem *rom_start) + { ++ ++#ifndef CONFIG_PAX_KERNEXEC + if (rom_start) { + compaq_int15_entry_point = (rom_start + ROM_INT15_PHY_ADDR - ROM_PHY_ADDR); + } ++#endif ++ + dbg("int15 entry = %p\n", compaq_int15_entry_point); + + /* initialize our int15 lock */ +diff -urNp a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c +--- a/drivers/pci/pcie/aer/aerdrv.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/pci/pcie/aer/aerdrv.c 2009-05-24 18:10:25.161135911 -0700 +@@ -59,7 +59,7 @@ static struct pcie_port_service_id aer_i + .port_type = PCIE_RC_PORT, + .service_type = PCIE_PORT_SERVICE_AER, + }, +- { /* end: all zeroes */ } ++ { 0, 0, 0, 0, 0, 0, 0, 0, 0 } + }; + + static struct pci_error_handlers aer_error_handlers = { +diff -urNp a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c +--- a/drivers/pci/pcie/aer/aerdrv_core.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/pci/pcie/aer/aerdrv_core.c 2009-05-24 18:10:25.161135911 -0700 +@@ -667,7 +667,7 @@ static void aer_isr_one_error(struct pci + struct aer_err_source *e_src) + { + struct device *s_device; +- struct aer_err_info e_info = {0, 0, 0,}; ++ struct aer_err_info e_info = {0, 0, 0, {0, 0, 0, 0}}; + int i; + u16 id; + +diff -urNp a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c +--- a/drivers/pci/pcie/portdrv_pci.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/pci/pcie/portdrv_pci.c 2009-05-24 18:10:25.162209368 -0700 +@@ -263,7 +263,7 @@ static void pcie_portdrv_err_resume(stru + static const struct pci_device_id port_pci_ids[] = { { + /* handle any PCI-Express port */ + PCI_DEVICE_CLASS(((PCI_CLASS_BRIDGE_PCI << 8) | 0x00), ~0), +- }, { /* end: all zeroes */ } ++ }, { 0, 0, 0, 0, 0, 0, 0 } + }; + MODULE_DEVICE_TABLE(pci, port_pci_ids); + +diff -urNp a/drivers/pci/proc.c b/drivers/pci/proc.c +--- a/drivers/pci/proc.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/pci/proc.c 2009-05-24 18:10:25.163209426 -0700 +@@ -470,7 +470,16 @@ static const struct file_operations proc + static int __init pci_proc_init(void) + { + struct pci_dev *dev = NULL; ++ ++#ifdef CONFIG_GRKERNSEC_PROC_ADD ++#ifdef CONFIG_GRKERNSEC_PROC_USER ++ proc_bus_pci_dir = proc_mkdir_mode("bus/pci", S_IRUSR | S_IXUSR, NULL); ++#elif defined(CONFIG_GRKERNSEC_PROC_USERGROUP) ++ proc_bus_pci_dir = proc_mkdir_mode("bus/pci", S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP, NULL); ++#endif ++#else + proc_bus_pci_dir = proc_mkdir("bus/pci", NULL); ++#endif + proc_create("devices", 0, proc_bus_pci_dir, + &proc_bus_pci_dev_operations); + proc_initialized = 1; +diff -urNp a/drivers/pcmcia/ti113x.h b/drivers/pcmcia/ti113x.h +--- a/drivers/pcmcia/ti113x.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/pcmcia/ti113x.h 2009-05-24 18:10:25.164209345 -0700 +@@ -903,7 +903,7 @@ static struct pci_device_id ene_tune_tbl + DEVID(PCI_VENDOR_ID_MOTOROLA, 0x3410, 0xECC0, PCI_ANY_ID, + ENE_TEST_C9_TLTENABLE | ENE_TEST_C9_PFENABLE, ENE_TEST_C9_TLTENABLE), + +- {} ++ { 0, 0, 0, 0, 0, 0, 0 } + }; + + static void ene_tune_bridge(struct pcmcia_socket *sock, struct pci_bus *bus) +diff -urNp a/drivers/pcmcia/yenta_socket.c b/drivers/pcmcia/yenta_socket.c +--- a/drivers/pcmcia/yenta_socket.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/pcmcia/yenta_socket.c 2009-05-24 18:10:25.165209194 -0700 +@@ -1366,7 +1366,7 @@ static struct pci_device_id yenta_table + + /* match any cardbus bridge */ + CB_ID(PCI_ANY_ID, PCI_ANY_ID, DEFAULT), +- { /* all zeroes */ } ++ { 0, 0, 0, 0, 0, 0, 0 } + }; + MODULE_DEVICE_TABLE(pci, yenta_table); + +diff -urNp a/drivers/pnp/pnpbios/bioscalls.c b/drivers/pnp/pnpbios/bioscalls.c +--- a/drivers/pnp/pnpbios/bioscalls.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/pnp/pnpbios/bioscalls.c 2009-05-24 18:10:25.165209194 -0700 +@@ -60,7 +60,7 @@ set_base(gdt[(selname) >> 3], (u32)(addr + set_limit(gdt[(selname) >> 3], size); \ + } while(0) + +-static struct desc_struct bad_bios_desc; ++static struct desc_struct bad_bios_desc __read_only; + + /* + * At some point we want to use this stack frame pointer to unwind +@@ -87,6 +87,10 @@ static inline u16 call_pnp_bios(u16 func + struct desc_struct save_desc_40; + int cpu; + ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++#endif ++ + /* + * PnP BIOSes are generally not terribly re-entrant. + * Also, don't rely on them to save everything correctly. +@@ -96,8 +100,17 @@ static inline u16 call_pnp_bios(u16 func + + cpu = get_cpu(); + save_desc_40 = get_cpu_gdt_table(cpu)[0x40 / 8]; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + get_cpu_gdt_table(cpu)[0x40 / 8] = bad_bios_desc; + ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + /* On some boxes IRQ's during PnP BIOS calls are deadly. */ + spin_lock_irqsave(&pnp_bios_lock, flags); + +@@ -134,7 +147,16 @@ static inline u16 call_pnp_bios(u16 func + :"memory"); + spin_unlock_irqrestore(&pnp_bios_lock, flags); + ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + get_cpu_gdt_table(cpu)[0x40 / 8] = save_desc_40; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + put_cpu(); + + /* If we get here and this is set then the PnP BIOS faulted on us. */ +@@ -468,16 +490,24 @@ int pnp_bios_read_escd(char *data, u32 n + return status; + } + +-void pnpbios_calls_init(union pnp_bios_install_struct *header) ++void __init pnpbios_calls_init(union pnp_bios_install_struct *header) + { + int i; + ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++#endif ++ + spin_lock_init(&pnp_bios_lock); + pnp_bios_callpoint.offset = header->fields.pm16offset; + pnp_bios_callpoint.segment = PNP_CS16; + ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + bad_bios_desc.a = 0; +- bad_bios_desc.b = 0x00409200; ++ bad_bios_desc.b = 0x00409300; + + set_base(bad_bios_desc, __va((unsigned long)0x40 << 4)); + _set_limit((char *)&bad_bios_desc, 4095 - (0x40 << 4)); +@@ -491,4 +521,9 @@ void pnpbios_calls_init(union pnp_bios_i + set_base(gdt[GDT_ENTRY_PNPBIOS_DS], + __va(header->fields.pm16dseg)); + } ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + } +diff -urNp a/drivers/pnp/quirks.c b/drivers/pnp/quirks.c +--- a/drivers/pnp/quirks.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/pnp/quirks.c 2009-05-24 18:10:25.166209671 -0700 +@@ -327,7 +327,7 @@ static struct pnp_fixup pnp_fixups[] = { + /* PnP resources that might overlap PCI BARs */ + {"PNP0c01", quirk_system_pci_resources}, + {"PNP0c02", quirk_system_pci_resources}, +- {""} ++ {"", NULL} + }; + + void pnp_fixup_device(struct pnp_dev *dev) +diff -urNp a/drivers/pnp/resource.c b/drivers/pnp/resource.c +--- a/drivers/pnp/resource.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/pnp/resource.c 2009-05-24 18:10:25.166209671 -0700 +@@ -355,7 +355,7 @@ int pnp_check_irq(struct pnp_dev *dev, s + return 1; + + /* check if the resource is valid */ +- if (*irq < 0 || *irq > 15) ++ if (*irq > 15) + return 0; + + /* check if the resource is reserved */ +@@ -419,7 +419,7 @@ int pnp_check_dma(struct pnp_dev *dev, s + return 1; + + /* check if the resource is valid */ +- if (*dma < 0 || *dma == 4 || *dma > 7) ++ if (*dma == 4 || *dma > 7) + return 0; + + /* check if the resource is reserved */ +diff -urNp a/drivers/scsi/scsi_logging.h b/drivers/scsi/scsi_logging.h +--- a/drivers/scsi/scsi_logging.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/scsi/scsi_logging.h 2009-05-24 18:10:25.167210567 -0700 +@@ -51,7 +51,7 @@ do { \ + } while (0); \ + } while (0) + #else +-#define SCSI_CHECK_LOGGING(SHIFT, BITS, LEVEL, CMD) ++#define SCSI_CHECK_LOGGING(SHIFT, BITS, LEVEL, CMD) do {} while (0) + #endif /* CONFIG_SCSI_LOGGING */ + + /* +diff -urNp a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c +--- a/drivers/serial/8250_pci.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/serial/8250_pci.c 2009-05-24 18:10:25.168209717 -0700 +@@ -3138,7 +3138,7 @@ static struct pci_device_id serial_pci_t + PCI_ANY_ID, PCI_ANY_ID, + PCI_CLASS_COMMUNICATION_MULTISERIAL << 8, + 0xffff00, pbn_default }, +- { 0, } ++ { 0, 0, 0, 0, 0, 0, 0 } + }; + + static struct pci_driver serial_pci_driver = { +diff -urNp a/drivers/usb/atm/usbatm.c b/drivers/usb/atm/usbatm.c +--- a/drivers/usb/atm/usbatm.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/usb/atm/usbatm.c 2009-05-24 18:10:25.169086305 -0700 +@@ -333,7 +333,7 @@ static void usbatm_extract_one_cell(stru + if (printk_ratelimit()) + atm_warn(instance, "%s: OAM not supported (vpi %d, vci %d)!\n", + __func__, vpi, vci); +- atomic_inc(&vcc->stats->rx_err); ++ atomic_inc_unchecked(&vcc->stats->rx_err); + return; + } + +@@ -361,7 +361,7 @@ static void usbatm_extract_one_cell(stru + if (length > ATM_MAX_AAL5_PDU) { + atm_rldbg(instance, "%s: bogus length %u (vcc: 0x%p)!\n", + __func__, length, vcc); +- atomic_inc(&vcc->stats->rx_err); ++ atomic_inc_unchecked(&vcc->stats->rx_err); + goto out; + } + +@@ -370,14 +370,14 @@ static void usbatm_extract_one_cell(stru + if (sarb->len < pdu_length) { + atm_rldbg(instance, "%s: bogus pdu_length %u (sarb->len: %u, vcc: 0x%p)!\n", + __func__, pdu_length, sarb->len, vcc); +- atomic_inc(&vcc->stats->rx_err); ++ atomic_inc_unchecked(&vcc->stats->rx_err); + goto out; + } + + if (crc32_be(~0, skb_tail_pointer(sarb) - pdu_length, pdu_length) != 0xc704dd7b) { + atm_rldbg(instance, "%s: packet failed crc check (vcc: 0x%p)!\n", + __func__, vcc); +- atomic_inc(&vcc->stats->rx_err); ++ atomic_inc_unchecked(&vcc->stats->rx_err); + goto out; + } + +@@ -387,7 +387,7 @@ static void usbatm_extract_one_cell(stru + if (printk_ratelimit()) + atm_err(instance, "%s: no memory for skb (length: %u)!\n", + __func__, length); +- atomic_inc(&vcc->stats->rx_drop); ++ atomic_inc_unchecked(&vcc->stats->rx_drop); + goto out; + } + +@@ -412,7 +412,7 @@ static void usbatm_extract_one_cell(stru + + vcc->push(vcc, skb); + +- atomic_inc(&vcc->stats->rx); ++ atomic_inc_unchecked(&vcc->stats->rx); + out: + skb_trim(sarb, 0); + } +@@ -616,7 +616,7 @@ static void usbatm_tx_process(unsigned l + struct atm_vcc *vcc = UDSL_SKB(skb)->atm.vcc; + + usbatm_pop(vcc, skb); +- atomic_inc(&vcc->stats->tx); ++ atomic_inc_unchecked(&vcc->stats->tx); + + skb = skb_dequeue(&instance->sndqueue); + } +diff -urNp a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c +--- a/drivers/usb/class/cdc-acm.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/usb/class/cdc-acm.c 2009-05-24 18:10:25.171139986 -0700 +@@ -1388,7 +1388,7 @@ static struct usb_device_id acm_ids[] = + USB_CDC_ACM_PROTO_AT_CDMA) }, + + /* NOTE: COMM/ACM/0xff is likely MSFT RNDIS ... NOT a modem!! */ +- { } ++ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } + }; + + MODULE_DEVICE_TABLE (usb, acm_ids); +diff -urNp a/drivers/usb/class/usblp.c b/drivers/usb/class/usblp.c +--- a/drivers/usb/class/usblp.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/usb/class/usblp.c 2009-05-24 18:10:25.171139986 -0700 +@@ -227,7 +227,7 @@ static const struct quirk_printer_struct + { 0x0409, 0xf1be, USBLP_QUIRK_BIDIR }, /* NEC Picty800 (HP OEM) */ + { 0x0482, 0x0010, USBLP_QUIRK_BIDIR }, /* Kyocera Mita FS 820, by zut <kernel@zut.de> */ + { 0x04b8, 0x0202, USBLP_QUIRK_BAD_CLASS }, /* Seiko Epson Receipt Printer M129C */ +- { 0, 0 } ++ { 0, 0, 0 } + }; + + static int usblp_wwait(struct usblp *usblp, int nonblock); +@@ -1402,7 +1402,7 @@ static struct usb_device_id usblp_ids [] + { USB_INTERFACE_INFO(7, 1, 2) }, + { USB_INTERFACE_INFO(7, 1, 3) }, + { USB_DEVICE(0x04b8, 0x0202) }, /* Seiko Epson Receipt Printer M129C */ +- { } /* Terminating entry */ ++ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } /* Terminating entry */ + }; + + MODULE_DEVICE_TABLE (usb, usblp_ids); +diff -urNp a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c +--- a/drivers/usb/core/hub.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/usb/core/hub.c 2009-05-24 18:10:25.173336693 -0700 +@@ -3194,7 +3194,7 @@ static struct usb_device_id hub_id_table + .bDeviceClass = USB_CLASS_HUB}, + { .match_flags = USB_DEVICE_ID_MATCH_INT_CLASS, + .bInterfaceClass = USB_CLASS_HUB}, +- { } /* Terminating entry */ ++ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } /* Terminating entry */ + }; + + MODULE_DEVICE_TABLE (usb, hub_id_table); +diff -urNp a/drivers/usb/host/ehci-pci.c b/drivers/usb/host/ehci-pci.c +--- a/drivers/usb/host/ehci-pci.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/usb/host/ehci-pci.c 2009-05-24 18:10:25.174339195 -0700 +@@ -414,7 +414,7 @@ static const struct pci_device_id pci_id + PCI_DEVICE_CLASS(PCI_CLASS_SERIAL_USB_EHCI, ~0), + .driver_data = (unsigned long) &ehci_pci_hc_driver, + }, +- { /* end: all zeroes */ } ++ { 0, 0, 0, 0, 0, 0, 0 } + }; + MODULE_DEVICE_TABLE(pci, pci_ids); + +diff -urNp a/drivers/usb/host/uhci-hcd.c b/drivers/usb/host/uhci-hcd.c +--- a/drivers/usb/host/uhci-hcd.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/usb/host/uhci-hcd.c 2009-05-24 18:10:25.174339195 -0700 +@@ -927,7 +927,7 @@ static const struct pci_device_id uhci_p + /* handle any USB UHCI controller */ + PCI_DEVICE_CLASS(PCI_CLASS_SERIAL_USB_UHCI, ~0), + .driver_data = (unsigned long) &uhci_driver, +- }, { /* end: all zeroes */ } ++ }, { 0, 0, 0, 0, 0, 0, 0 } + }; + + MODULE_DEVICE_TABLE(pci, uhci_pci_ids); +diff -urNp a/drivers/usb/storage/debug.h b/drivers/usb/storage/debug.h +--- a/drivers/usb/storage/debug.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/usb/storage/debug.h 2009-05-24 18:10:25.175337368 -0700 +@@ -54,9 +54,9 @@ void usb_stor_show_sense( unsigned char + #define US_DEBUGPX(x...) printk( x ) + #define US_DEBUG(x) x + #else +-#define US_DEBUGP(x...) +-#define US_DEBUGPX(x...) +-#define US_DEBUG(x) ++#define US_DEBUGP(x...) do {} while (0) ++#define US_DEBUGPX(x...) do {} while (0) ++#define US_DEBUG(x) do {} while (0) + #endif + + #endif +diff -urNp a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c +--- a/drivers/usb/storage/usb.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/usb/storage/usb.c 2009-05-24 18:10:25.175337368 -0700 +@@ -139,7 +139,7 @@ static struct usb_device_id storage_usb_ + #undef COMPLIANT_DEV + #undef USUAL_DEV + /* Terminating entry */ +- { } ++ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } + }; + + MODULE_DEVICE_TABLE (usb, storage_usb_ids); +@@ -182,7 +182,7 @@ static struct us_unusual_dev us_unusual_ + # undef USUAL_DEV + + /* Terminating entry */ +- { NULL } ++ { NULL, NULL, 0, 0, NULL } + }; + + +diff -urNp a/drivers/uwb/wlp/messages.c b/drivers/uwb/wlp/messages.c +--- a/drivers/uwb/wlp/messages.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/uwb/wlp/messages.c 2009-05-24 18:10:25.177337205 -0700 +@@ -988,7 +988,7 @@ int wlp_parse_f0(struct wlp *wlp, struct + size_t len = skb->len; + size_t used; + ssize_t result; +- struct wlp_nonce enonce, rnonce; ++ struct wlp_nonce enonce = {{0}}, rnonce = {{0}}; + enum wlp_assc_error assc_err; + char enonce_buf[WLP_WSS_NONCE_STRSIZE]; + char rnonce_buf[WLP_WSS_NONCE_STRSIZE]; +diff -urNp a/drivers/video/fbcmap.c b/drivers/video/fbcmap.c +--- a/drivers/video/fbcmap.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/video/fbcmap.c 2009-05-24 18:10:25.178336705 -0700 +@@ -250,8 +250,7 @@ int fb_set_user_cmap(struct fb_cmap_user + int rc, size = cmap->len * sizeof(u16); + struct fb_cmap umap; + +- if (cmap->start < 0 || (!info->fbops->fb_setcolreg && +- !info->fbops->fb_setcmap)) ++ if (!info->fbops->fb_setcolreg && !info->fbops->fb_setcmap) + return -EINVAL; + + memset(&umap, 0, sizeof(struct fb_cmap)); +diff -urNp a/drivers/video/fbmem.c b/drivers/video/fbmem.c +--- a/drivers/video/fbmem.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/video/fbmem.c 2009-05-24 18:10:25.179336134 -0700 +@@ -393,7 +393,7 @@ static void fb_do_show_logo(struct fb_in + image->dx += image->width + 8; + } + } else if (rotate == FB_ROTATE_UD) { +- for (x = 0; x < num && image->dx >= 0; x++) { ++ for (x = 0; x < num && (__s32)image->dx >= 0; x++) { + info->fbops->fb_imageblit(info, image); + image->dx -= image->width + 8; + } +@@ -405,7 +405,7 @@ static void fb_do_show_logo(struct fb_in + image->dy += image->height + 8; + } + } else if (rotate == FB_ROTATE_CCW) { +- for (x = 0; x < num && image->dy >= 0; x++) { ++ for (x = 0; x < num && (__s32)image->dy >= 0; x++) { + info->fbops->fb_imageblit(info, image); + image->dy -= image->height + 8; + } +@@ -1090,7 +1090,7 @@ static long do_fb_ioctl(struct fb_info * + ret = -EINVAL; + break; + } +- if (con2fb.framebuffer < 0 || con2fb.framebuffer >= FB_MAX) { ++ if (con2fb.framebuffer >= FB_MAX) { + ret = -EINVAL; + break; + } +diff -urNp a/drivers/video/fbmon.c b/drivers/video/fbmon.c +--- a/drivers/video/fbmon.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/video/fbmon.c 2009-05-24 18:10:25.180336262 -0700 +@@ -45,7 +45,7 @@ + #ifdef DEBUG + #define DPRINTK(fmt, args...) printk(fmt,## args) + #else +-#define DPRINTK(fmt, args...) ++#define DPRINTK(fmt, args...) do {} while (0) + #endif + + #define FBMON_FIX_HEADER 1 +diff -urNp a/drivers/video/i810/i810_accel.c b/drivers/video/i810/i810_accel.c +--- a/drivers/video/i810/i810_accel.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/video/i810/i810_accel.c 2009-05-24 18:10:25.181336391 -0700 +@@ -73,6 +73,7 @@ static inline int wait_for_space(struct + } + } + printk("ringbuffer lockup!!!\n"); ++ printk("head:%u tail:%u iring.size:%u space:%u\n", head, tail, par->iring.size, space); + i810_report_error(mmio); + par->dev_flags |= LOCKUP; + info->pixmap.scan_align = 1; +diff -urNp a/drivers/video/i810/i810_main.c b/drivers/video/i810/i810_main.c +--- a/drivers/video/i810/i810_main.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/video/i810/i810_main.c 2009-05-24 18:10:25.182335611 -0700 +@@ -120,7 +120,7 @@ static struct pci_device_id i810fb_pci_t + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 4 }, + { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82815_CGC, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 5 }, +- { 0 }, ++ { 0, 0, 0, 0, 0, 0, 0 }, + }; + + static struct pci_driver i810fb_driver = { +diff -urNp a/drivers/video/modedb.c b/drivers/video/modedb.c +--- a/drivers/video/modedb.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/video/modedb.c 2009-05-24 18:10:25.183335320 -0700 +@@ -38,232 +38,232 @@ static const struct fb_videomode modedb[ + { + /* 640x400 @ 70 Hz, 31.5 kHz hsync */ + NULL, 70, 640, 400, 39721, 40, 24, 39, 9, 96, 2, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 640x480 @ 60 Hz, 31.5 kHz hsync */ + NULL, 60, 640, 480, 39721, 40, 24, 32, 11, 96, 2, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 800x600 @ 56 Hz, 35.15 kHz hsync */ + NULL, 56, 800, 600, 27777, 128, 24, 22, 1, 72, 2, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1024x768 @ 87 Hz interlaced, 35.5 kHz hsync */ + NULL, 87, 1024, 768, 22271, 56, 24, 33, 8, 160, 8, +- 0, FB_VMODE_INTERLACED ++ 0, FB_VMODE_INTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 640x400 @ 85 Hz, 37.86 kHz hsync */ + NULL, 85, 640, 400, 31746, 96, 32, 41, 1, 64, 3, +- FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED ++ FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 640x480 @ 72 Hz, 36.5 kHz hsync */ + NULL, 72, 640, 480, 31746, 144, 40, 30, 8, 40, 3, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 640x480 @ 75 Hz, 37.50 kHz hsync */ + NULL, 75, 640, 480, 31746, 120, 16, 16, 1, 64, 3, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 800x600 @ 60 Hz, 37.8 kHz hsync */ + NULL, 60, 800, 600, 25000, 88, 40, 23, 1, 128, 4, +- FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED ++ FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 640x480 @ 85 Hz, 43.27 kHz hsync */ + NULL, 85, 640, 480, 27777, 80, 56, 25, 1, 56, 3, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1152x864 @ 89 Hz interlaced, 44 kHz hsync */ + NULL, 89, 1152, 864, 15384, 96, 16, 110, 1, 216, 10, +- 0, FB_VMODE_INTERLACED ++ 0, FB_VMODE_INTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 800x600 @ 72 Hz, 48.0 kHz hsync */ + NULL, 72, 800, 600, 20000, 64, 56, 23, 37, 120, 6, +- FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED ++ FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1024x768 @ 60 Hz, 48.4 kHz hsync */ + NULL, 60, 1024, 768, 15384, 168, 8, 29, 3, 144, 6, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 640x480 @ 100 Hz, 53.01 kHz hsync */ + NULL, 100, 640, 480, 21834, 96, 32, 36, 8, 96, 6, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1152x864 @ 60 Hz, 53.5 kHz hsync */ + NULL, 60, 1152, 864, 11123, 208, 64, 16, 4, 256, 8, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 800x600 @ 85 Hz, 55.84 kHz hsync */ + NULL, 85, 800, 600, 16460, 160, 64, 36, 16, 64, 5, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1024x768 @ 70 Hz, 56.5 kHz hsync */ + NULL, 70, 1024, 768, 13333, 144, 24, 29, 3, 136, 6, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1280x1024 @ 87 Hz interlaced, 51 kHz hsync */ + NULL, 87, 1280, 1024, 12500, 56, 16, 128, 1, 216, 12, +- 0, FB_VMODE_INTERLACED ++ 0, FB_VMODE_INTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 800x600 @ 100 Hz, 64.02 kHz hsync */ + NULL, 100, 800, 600, 14357, 160, 64, 30, 4, 64, 6, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1024x768 @ 76 Hz, 62.5 kHz hsync */ + NULL, 76, 1024, 768, 11764, 208, 8, 36, 16, 120, 3, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1152x864 @ 70 Hz, 62.4 kHz hsync */ + NULL, 70, 1152, 864, 10869, 106, 56, 20, 1, 160, 10, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1280x1024 @ 61 Hz, 64.2 kHz hsync */ + NULL, 61, 1280, 1024, 9090, 200, 48, 26, 1, 184, 3, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1400x1050 @ 60Hz, 63.9 kHz hsync */ + NULL, 60, 1400, 1050, 9259, 136, 40, 13, 1, 112, 3, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1400x1050 @ 75,107 Hz, 82,392 kHz +hsync +vsync*/ + NULL, 75, 1400, 1050, 7190, 120, 56, 23, 10, 112, 13, +- FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED ++ FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1400x1050 @ 60 Hz, ? kHz +hsync +vsync*/ + NULL, 60, 1400, 1050, 9259, 128, 40, 12, 0, 112, 3, +- FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED ++ FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1024x768 @ 85 Hz, 70.24 kHz hsync */ + NULL, 85, 1024, 768, 10111, 192, 32, 34, 14, 160, 6, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1152x864 @ 78 Hz, 70.8 kHz hsync */ + NULL, 78, 1152, 864, 9090, 228, 88, 32, 0, 84, 12, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1280x1024 @ 70 Hz, 74.59 kHz hsync */ + NULL, 70, 1280, 1024, 7905, 224, 32, 28, 8, 160, 8, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1600x1200 @ 60Hz, 75.00 kHz hsync */ + NULL, 60, 1600, 1200, 6172, 304, 64, 46, 1, 192, 3, +- FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED ++ FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1152x864 @ 84 Hz, 76.0 kHz hsync */ + NULL, 84, 1152, 864, 7407, 184, 312, 32, 0, 128, 12, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1280x1024 @ 74 Hz, 78.85 kHz hsync */ + NULL, 74, 1280, 1024, 7407, 256, 32, 34, 3, 144, 3, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1024x768 @ 100Hz, 80.21 kHz hsync */ + NULL, 100, 1024, 768, 8658, 192, 32, 21, 3, 192, 10, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1280x1024 @ 76 Hz, 81.13 kHz hsync */ + NULL, 76, 1280, 1024, 7407, 248, 32, 34, 3, 104, 3, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1600x1200 @ 70 Hz, 87.50 kHz hsync */ + NULL, 70, 1600, 1200, 5291, 304, 64, 46, 1, 192, 3, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1152x864 @ 100 Hz, 89.62 kHz hsync */ + NULL, 100, 1152, 864, 7264, 224, 32, 17, 2, 128, 19, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1280x1024 @ 85 Hz, 91.15 kHz hsync */ + NULL, 85, 1280, 1024, 6349, 224, 64, 44, 1, 160, 3, +- FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED ++ FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1600x1200 @ 75 Hz, 93.75 kHz hsync */ + NULL, 75, 1600, 1200, 4938, 304, 64, 46, 1, 192, 3, +- FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED ++ FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1680x1050 @ 60 Hz, 65.191 kHz hsync */ + NULL, 60, 1680, 1050, 6848, 280, 104, 30, 3, 176, 6, +- FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED ++ FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1600x1200 @ 85 Hz, 105.77 kHz hsync */ + NULL, 85, 1600, 1200, 4545, 272, 16, 37, 4, 192, 3, +- FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED ++ FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1280x1024 @ 100 Hz, 107.16 kHz hsync */ + NULL, 100, 1280, 1024, 5502, 256, 32, 26, 7, 128, 15, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1800x1440 @ 64Hz, 96.15 kHz hsync */ + NULL, 64, 1800, 1440, 4347, 304, 96, 46, 1, 192, 3, +- FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED ++ FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1800x1440 @ 70Hz, 104.52 kHz hsync */ + NULL, 70, 1800, 1440, 4000, 304, 96, 46, 1, 192, 3, +- FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED ++ FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 512x384 @ 78 Hz, 31.50 kHz hsync */ + NULL, 78, 512, 384, 49603, 48, 16, 16, 1, 64, 3, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 512x384 @ 85 Hz, 34.38 kHz hsync */ + NULL, 85, 512, 384, 45454, 48, 16, 16, 1, 64, 3, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 320x200 @ 70 Hz, 31.5 kHz hsync, 8:5 aspect ratio */ + NULL, 70, 320, 200, 79440, 16, 16, 20, 4, 48, 1, +- 0, FB_VMODE_DOUBLE ++ 0, FB_VMODE_DOUBLE, FB_MODE_IS_UNKNOWN + }, { + /* 320x240 @ 60 Hz, 31.5 kHz hsync, 4:3 aspect ratio */ + NULL, 60, 320, 240, 79440, 16, 16, 16, 5, 48, 1, +- 0, FB_VMODE_DOUBLE ++ 0, FB_VMODE_DOUBLE, FB_MODE_IS_UNKNOWN + }, { + /* 320x240 @ 72 Hz, 36.5 kHz hsync */ + NULL, 72, 320, 240, 63492, 16, 16, 16, 4, 48, 2, +- 0, FB_VMODE_DOUBLE ++ 0, FB_VMODE_DOUBLE, FB_MODE_IS_UNKNOWN + }, { + /* 400x300 @ 56 Hz, 35.2 kHz hsync, 4:3 aspect ratio */ + NULL, 56, 400, 300, 55555, 64, 16, 10, 1, 32, 1, +- 0, FB_VMODE_DOUBLE ++ 0, FB_VMODE_DOUBLE, FB_MODE_IS_UNKNOWN + }, { + /* 400x300 @ 60 Hz, 37.8 kHz hsync */ + NULL, 60, 400, 300, 50000, 48, 16, 11, 1, 64, 2, +- 0, FB_VMODE_DOUBLE ++ 0, FB_VMODE_DOUBLE, FB_MODE_IS_UNKNOWN + }, { + /* 400x300 @ 72 Hz, 48.0 kHz hsync */ + NULL, 72, 400, 300, 40000, 32, 24, 11, 19, 64, 3, +- 0, FB_VMODE_DOUBLE ++ 0, FB_VMODE_DOUBLE, FB_MODE_IS_UNKNOWN + }, { + /* 480x300 @ 56 Hz, 35.2 kHz hsync, 8:5 aspect ratio */ + NULL, 56, 480, 300, 46176, 80, 16, 10, 1, 40, 1, +- 0, FB_VMODE_DOUBLE ++ 0, FB_VMODE_DOUBLE, FB_MODE_IS_UNKNOWN + }, { + /* 480x300 @ 60 Hz, 37.8 kHz hsync */ + NULL, 60, 480, 300, 41858, 56, 16, 11, 1, 80, 2, +- 0, FB_VMODE_DOUBLE ++ 0, FB_VMODE_DOUBLE, FB_MODE_IS_UNKNOWN + }, { + /* 480x300 @ 63 Hz, 39.6 kHz hsync */ + NULL, 63, 480, 300, 40000, 56, 16, 11, 1, 80, 2, +- 0, FB_VMODE_DOUBLE ++ 0, FB_VMODE_DOUBLE, FB_MODE_IS_UNKNOWN + }, { + /* 480x300 @ 72 Hz, 48.0 kHz hsync */ + NULL, 72, 480, 300, 33386, 40, 24, 11, 19, 80, 3, +- 0, FB_VMODE_DOUBLE ++ 0, FB_VMODE_DOUBLE, FB_MODE_IS_UNKNOWN + }, { + /* 1920x1200 @ 60 Hz, 74.5 Khz hsync */ + NULL, 60, 1920, 1200, 5177, 128, 336, 1, 38, 208, 3, + FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT, +- FB_VMODE_NONINTERLACED ++ FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1152x768, 60 Hz, PowerBook G4 Titanium I and II */ + NULL, 60, 1152, 768, 14047, 158, 26, 29, 3, 136, 6, +- FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED ++ FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1366x768, 60 Hz, 47.403 kHz hsync, WXGA 16:9 aspect ratio */ + NULL, 60, 1366, 768, 13806, 120, 10, 14, 3, 32, 5, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, { + /* 1280x800, 60 Hz, 47.403 kHz hsync, WXGA 16:10 aspect ratio */ + NULL, 60, 1280, 800, 12048, 200, 64, 24, 1, 136, 3, +- 0, FB_VMODE_NONINTERLACED ++ 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_UNKNOWN + }, + }; + +diff -urNp a/drivers/video/uvesafb.c b/drivers/video/uvesafb.c +--- a/drivers/video/uvesafb.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/video/uvesafb.c 2009-05-24 18:10:25.184337264 -0700 +@@ -18,6 +18,7 @@ + #include <linux/fb.h> + #include <linux/io.h> + #include <linux/mutex.h> ++#include <linux/moduleloader.h> + #include <video/edid.h> + #include <video/uvesafb.h> + #ifdef CONFIG_X86 +@@ -117,7 +118,7 @@ static int uvesafb_helper_start(void) + NULL, + }; + +- return call_usermodehelper(v86d_path, argv, envp, 1); ++ return call_usermodehelper(v86d_path, argv, envp, UMH_WAIT_PROC); + } + + /* +@@ -574,10 +575,34 @@ static int __devinit uvesafb_vbe_getpmi( + if ((task->t.regs.eax & 0xffff) != 0x4f || task->t.regs.es < 0xc000) { + par->pmi_setpal = par->ypan = 0; + } else { ++ ++#ifdef CONFIG_PAX_KERNEXEC ++#ifdef CONFIG_MODULES ++ unsigned long cr0; ++ ++ par->pmi_code = module_alloc_exec((u16)task->t.regs.ecx); ++#endif ++ if (!par->pmi_code) { ++ par->pmi_setpal = par->ypan = 0; ++ return 0; ++ } ++#endif ++ + par->pmi_base = (u16 *)phys_to_virt(((u32)task->t.regs.es << 4) + + task->t.regs.edi); ++ ++#if defined(CONFIG_MODULES) && defined(CONFIG_PAX_KERNEXEC) ++ pax_open_kernel(cr0); ++ memcpy(par->pmi_code, par->pmi_base, (u16)task->t.regs.ecx); ++ pax_close_kernel(cr0); ++ ++ par->pmi_start = ktva_ktla(par->pmi_code + par->pmi_base[1]); ++ par->pmi_pal = ktva_ktla(par->pmi_code + par->pmi_base[2]); ++#else + par->pmi_start = (u8 *)par->pmi_base + par->pmi_base[1]; + par->pmi_pal = (u8 *)par->pmi_base + par->pmi_base[2]; ++#endif ++ + printk(KERN_INFO "uvesafb: protected mode interface info at " + "%04x:%04x\n", + (u16)task->t.regs.es, (u16)task->t.regs.edi); +@@ -1832,6 +1857,11 @@ out: + if (par->vbe_modes) + kfree(par->vbe_modes); + ++#if defined(CONFIG_MODULES) && defined(CONFIG_PAX_KERNEXEC) ++ if (par->pmi_code) ++ module_free_exec(NULL, par->pmi_code); ++#endif ++ + framebuffer_release(info); + return err; + } +@@ -1858,6 +1888,12 @@ static int uvesafb_remove(struct platfor + kfree(par->vbe_state_orig); + if (par->vbe_state_saved) + kfree(par->vbe_state_saved); ++ ++#if defined(CONFIG_MODULES) && defined(CONFIG_PAX_KERNEXEC) ++ if (par->pmi_code) ++ module_free_exec(NULL, par->pmi_code); ++#endif ++ + } + + framebuffer_release(info); +diff -urNp a/drivers/video/vesafb.c b/drivers/video/vesafb.c +--- a/drivers/video/vesafb.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/drivers/video/vesafb.c 2009-05-24 18:10:25.185337672 -0700 +@@ -9,6 +9,7 @@ + */ + + #include <linux/module.h> ++#include <linux/moduleloader.h> + #include <linux/kernel.h> + #include <linux/errno.h> + #include <linux/string.h> +@@ -53,8 +54,8 @@ static int vram_remap __initdata; /* + static int vram_total __initdata; /* Set total amount of memory */ + static int pmi_setpal __read_mostly = 1; /* pmi for palette changes ??? */ + static int ypan __read_mostly; /* 0..nothing, 1..ypan, 2..ywrap */ +-static void (*pmi_start)(void) __read_mostly; +-static void (*pmi_pal) (void) __read_mostly; ++static void (*pmi_start)(void) __read_only; ++static void (*pmi_pal) (void) __read_only; + static int depth __read_mostly; + static int vga_compat __read_mostly; + /* --------------------------------------------------------------------- */ +@@ -224,6 +225,7 @@ static int __init vesafb_probe(struct pl + unsigned int size_vmode; + unsigned int size_remap; + unsigned int size_total; ++ void *pmi_code = NULL; + + if (screen_info.orig_video_isVGA != VIDEO_TYPE_VLFB) + return -ENODEV; +@@ -266,10 +268,6 @@ static int __init vesafb_probe(struct pl + size_remap = size_total; + vesafb_fix.smem_len = size_remap; + +-#ifndef __i386__ +- screen_info.vesapm_seg = 0; +-#endif +- + if (!request_mem_region(vesafb_fix.smem_start, size_total, "vesafb")) { + printk(KERN_WARNING + "vesafb: cannot reserve video memory at 0x%lx\n", +@@ -302,9 +300,21 @@ static int __init vesafb_probe(struct pl + printk(KERN_INFO "vesafb: mode is %dx%dx%d, linelength=%d, pages=%d\n", + vesafb_defined.xres, vesafb_defined.yres, vesafb_defined.bits_per_pixel, vesafb_fix.line_length, screen_info.pages); + ++#ifdef __i386__ ++ ++#if defined(CONFIG_MODULES) && defined(CONFIG_PAX_KERNEXEC) ++ pmi_code = module_alloc_exec(screen_info.vesapm_size); ++ if (!pmi_code) ++#elif !defined(CONFIG_PAX_KERNEXEC) ++ if (0) ++#endif ++ ++#endif ++ screen_info.vesapm_seg = 0; ++ + if (screen_info.vesapm_seg) { +- printk(KERN_INFO "vesafb: protected mode interface info at %04x:%04x\n", +- screen_info.vesapm_seg,screen_info.vesapm_off); ++ printk(KERN_INFO "vesafb: protected mode interface info at %04x:%04x %04x bytes\n", ++ screen_info.vesapm_seg,screen_info.vesapm_off,screen_info.vesapm_size); + } + + if (screen_info.vesapm_seg < 0xc000) +@@ -312,9 +322,29 @@ static int __init vesafb_probe(struct pl + + if (ypan || pmi_setpal) { + unsigned short *pmi_base; +- pmi_base = (unsigned short*)phys_to_virt(((unsigned long)screen_info.vesapm_seg << 4) + screen_info.vesapm_off); +- pmi_start = (void*)((char*)pmi_base + pmi_base[1]); +- pmi_pal = (void*)((char*)pmi_base + pmi_base[2]); ++ ++#if defined(CONFIG_MODULES) && defined(CONFIG_PAX_KERNEXEC) ++ unsigned long cr0; ++#endif ++ ++ pmi_base = (unsigned short*)phys_to_virt(((unsigned long)screen_info.vesapm_seg << 4) + screen_info.vesapm_off); ++ ++#if defined(CONFIG_MODULES) && defined(CONFIG_PAX_KERNEXEC) ++ pax_open_kernel(cr0); ++ memcpy(pmi_code, pmi_base, screen_info.vesapm_size); ++#else ++ pmi_code = pmi_base; ++#endif ++ ++ pmi_start = (void*)((char*)pmi_code + pmi_base[1]); ++ pmi_pal = (void*)((char*)pmi_code + pmi_base[2]); ++ ++#if defined(CONFIG_MODULES) && defined(CONFIG_PAX_KERNEXEC) ++ pmi_start = ktva_ktla(pmi_start); ++ pmi_pal = ktva_ktla(pmi_pal); ++ pax_close_kernel(cr0); ++#endif ++ + printk(KERN_INFO "vesafb: pmi: set display start = %p, set palette = %p\n",pmi_start,pmi_pal); + if (pmi_base[3]) { + printk(KERN_INFO "vesafb: pmi: ports = "); +@@ -456,6 +486,11 @@ static int __init vesafb_probe(struct pl + info->node, info->fix.id); + return 0; + err: ++ ++#if defined(__i386__) && defined(CONFIG_MODULES) && defined(CONFIG_PAX_KERNEXEC) ++ module_free_exec(NULL, pmi_code); ++#endif ++ + if (info->screen_base) + iounmap(info->screen_base); + framebuffer_release(info); +diff -urNp a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c +--- a/fs/9p/vfs_inode.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/9p/vfs_inode.c 2009-05-24 18:10:25.186335774 -0700 +@@ -1021,7 +1021,7 @@ static void *v9fs_vfs_follow_link(struct + static void + v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p) + { +- char *s = nd_get_link(nd); ++ const char *s = nd_get_link(nd); + + P9_DPRINTK(P9_DEBUG_VFS, " %s %s\n", dentry->d_name.name, + IS_ERR(s) ? "<error>" : s); +diff -urNp a/fs/aio.c b/fs/aio.c +--- a/fs/aio.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/aio.c 2009-05-24 18:10:25.187336950 -0700 +@@ -114,7 +114,7 @@ static int aio_setup_ring(struct kioctx + size += sizeof(struct io_event) * nr_events; + nr_pages = (size + PAGE_SIZE-1) >> PAGE_SHIFT; + +- if (nr_pages < 0) ++ if (nr_pages <= 0) + return -EINVAL; + + nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring)) / sizeof(struct io_event); +diff -urNp a/fs/autofs4/symlink.c b/fs/autofs4/symlink.c +--- a/fs/autofs4/symlink.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/autofs4/symlink.c 2009-05-24 18:10:25.188336729 -0700 +@@ -15,7 +15,7 @@ + static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) + { + struct autofs_info *ino = autofs4_dentry_ino(dentry); +- nd_set_link(nd, (char *)ino->u.symlink); ++ nd_set_link(nd, ino->u.symlink); + return NULL; + } + +diff -urNp a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c +--- a/fs/befs/linuxvfs.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/befs/linuxvfs.c 2009-05-24 18:10:25.189339860 -0700 +@@ -490,7 +490,7 @@ static void befs_put_link(struct dentry + { + befs_inode_info *befs_ino = BEFS_I(dentry->d_inode); + if (befs_ino->i_flags & BEFS_LONG_SYMLINK) { +- char *link = nd_get_link(nd); ++ const char *link = nd_get_link(nd); + if (!IS_ERR(link)) + kfree(link); + } +diff -urNp a/fs/binfmt_aout.c b/fs/binfmt_aout.c +--- a/fs/binfmt_aout.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/binfmt_aout.c 2009-05-24 18:10:25.189339860 -0700 +@@ -16,6 +16,7 @@ + #include <linux/string.h> + #include <linux/fs.h> + #include <linux/file.h> ++#include <linux/security.h> + #include <linux/stat.h> + #include <linux/fcntl.h> + #include <linux/ptrace.h> +@@ -124,18 +125,22 @@ static int aout_core_dump(long signr, st + /* If the size of the dump file exceeds the rlimit, then see what would happen + if we wrote the stack, but not the data area. */ + #ifdef __sparc__ ++ gr_learn_resource(current, RLIMIT_CORE, dump.u_dsize + dump.u_ssize, 1); + if ((dump.u_dsize + dump.u_ssize) > limit) + dump.u_dsize = 0; + #else ++ gr_learn_resource(current, RLIMIT_CORE, (dump.u_dsize + dump.u_ssize+1) * PAGE_SIZE, 1); + if ((dump.u_dsize + dump.u_ssize+1) * PAGE_SIZE > limit) + dump.u_dsize = 0; + #endif + + /* Make sure we have enough room to write the stack and data areas. */ + #ifdef __sparc__ ++ gr_learn_resource(current, RLIMIT_CORE, dump.u_ssize, 1); + if (dump.u_ssize > limit) + dump.u_ssize = 0; + #else ++ gr_learn_resource(current, RLIMIT_CORE, (dump.u_ssize + 1) * PAGE_SIZE, 1); + if ((dump.u_ssize + 1) * PAGE_SIZE > limit) + dump.u_ssize = 0; + #endif +@@ -291,6 +296,8 @@ static int load_aout_binary(struct linux + rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur; + if (rlim >= RLIM_INFINITY) + rlim = ~0; ++ ++ gr_learn_resource(current, RLIMIT_DATA, ex.a_data + ex.a_bss, 1); + if (ex.a_data + ex.a_bss > rlim) + return -ENOMEM; + +@@ -322,6 +329,28 @@ static int load_aout_binary(struct linux + + compute_creds(bprm); + current->flags &= ~PF_FORKNOEXEC; ++ ++#if defined(CONFIG_PAX_NOEXEC) || defined(CONFIG_PAX_ASLR) ++ current->mm->pax_flags = 0UL; ++#endif ++ ++#ifdef CONFIG_PAX_PAGEEXEC ++ if (!(N_FLAGS(ex) & F_PAX_PAGEEXEC)) { ++ current->mm->pax_flags |= MF_PAX_PAGEEXEC; ++ ++#ifdef CONFIG_PAX_EMUTRAMP ++ if (N_FLAGS(ex) & F_PAX_EMUTRAMP) ++ current->mm->pax_flags |= MF_PAX_EMUTRAMP; ++#endif ++ ++#ifdef CONFIG_PAX_MPROTECT ++ if (!(N_FLAGS(ex) & F_PAX_MPROTECT)) ++ current->mm->pax_flags |= MF_PAX_MPROTECT; ++#endif ++ ++ } ++#endif ++ + #ifdef __sparc__ + if (N_MAGIC(ex) == NMAGIC) { + loff_t pos = fd_offset; +@@ -413,7 +442,7 @@ static int load_aout_binary(struct linux + + down_write(¤t->mm->mmap_sem); + error = do_mmap(bprm->file, N_DATADDR(ex), ex.a_data, +- PROT_READ | PROT_WRITE | PROT_EXEC, ++ PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE, + fd_offset + ex.a_text); + up_write(¤t->mm->mmap_sem); +diff -urNp a/fs/binfmt_elf.c b/fs/binfmt_elf.c +--- a/fs/binfmt_elf.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/binfmt_elf.c 2009-05-24 18:10:25.191336904 -0700 +@@ -42,6 +42,10 @@ + #include <asm/param.h> + #include <asm/page.h> + ++#ifdef CONFIG_PAX_SEGMEXEC ++#include <asm/desc.h> ++#endif ++ + static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs); + static int load_elf_library(struct file *); + static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *, +@@ -57,6 +61,10 @@ static int elf_core_dump(long signr, str + #define elf_core_dump NULL + #endif + ++#ifdef CONFIG_PAX_MPROTECT ++static void elf_handle_mprotect(struct vm_area_struct *vma, unsigned long newflags); ++#endif ++ + #if ELF_EXEC_PAGESIZE > PAGE_SIZE + #define ELF_MIN_ALIGN ELF_EXEC_PAGESIZE + #else +@@ -76,6 +84,11 @@ static struct linux_binfmt elf_format = + .load_binary = load_elf_binary, + .load_shlib = load_elf_library, + .core_dump = elf_core_dump, ++ ++#ifdef CONFIG_PAX_MPROTECT ++ .handle_mprotect= elf_handle_mprotect, ++#endif ++ + .min_coredump = ELF_EXEC_PAGESIZE, + .hasvdso = 1 + }; +@@ -84,6 +97,8 @@ static struct linux_binfmt elf_format = + + static int set_brk(unsigned long start, unsigned long end) + { ++ unsigned long e = end; ++ + start = ELF_PAGEALIGN(start); + end = ELF_PAGEALIGN(end); + if (end > start) { +@@ -94,7 +109,7 @@ static int set_brk(unsigned long start, + if (BAD_ADDR(addr)) + return addr; + } +- current->mm->start_brk = current->mm->brk = end; ++ current->mm->start_brk = current->mm->brk = e; + return 0; + } + +@@ -380,10 +395,10 @@ static unsigned long load_elf_interp(str + { + struct elf_phdr *elf_phdata; + struct elf_phdr *eppnt; +- unsigned long load_addr = 0; ++ unsigned long load_addr = 0, pax_task_size = TASK_SIZE; + int load_addr_set = 0; + unsigned long last_bss = 0, elf_bss = 0; +- unsigned long error = ~0UL; ++ unsigned long error = -EINVAL; + unsigned long total_size; + int retval, i, size; + +@@ -429,6 +444,11 @@ static unsigned long load_elf_interp(str + goto out_close; + } + ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (current->mm->pax_flags & MF_PAX_SEGMEXEC) ++ pax_task_size = SEGMEXEC_TASK_SIZE; ++#endif ++ + eppnt = elf_phdata; + for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) { + if (eppnt->p_type == PT_LOAD) { +@@ -472,8 +492,8 @@ static unsigned long load_elf_interp(str + k = load_addr + eppnt->p_vaddr; + if (BAD_ADDR(k) || + eppnt->p_filesz > eppnt->p_memsz || +- eppnt->p_memsz > TASK_SIZE || +- TASK_SIZE - eppnt->p_memsz < k) { ++ eppnt->p_memsz > pax_task_size || ++ pax_task_size - eppnt->p_memsz < k) { + error = -ENOMEM; + goto out_close; + } +@@ -527,6 +547,177 @@ out: + return error; + } + ++#if (defined(CONFIG_PAX_EI_PAX) || defined(CONFIG_PAX_PT_PAX_FLAGS)) && defined(CONFIG_PAX_SOFTMODE) ++static unsigned long pax_parse_softmode(const struct elf_phdr * const elf_phdata) ++{ ++ unsigned long pax_flags = 0UL; ++ ++#ifdef CONFIG_PAX_PAGEEXEC ++ if (elf_phdata->p_flags & PF_PAGEEXEC) ++ pax_flags |= MF_PAX_PAGEEXEC; ++#endif ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (elf_phdata->p_flags & PF_SEGMEXEC) ++ pax_flags |= MF_PAX_SEGMEXEC; ++#endif ++ ++#if defined(CONFIG_PAX_PAGEEXEC) && defined(CONFIG_PAX_SEGMEXEC) ++ if ((pax_flags & (MF_PAX_PAGEEXEC | MF_PAX_SEGMEXEC)) == (MF_PAX_PAGEEXEC | MF_PAX_SEGMEXEC)) { ++ if (nx_enabled) ++ pax_flags &= ~MF_PAX_SEGMEXEC; ++ else ++ pax_flags &= ~MF_PAX_PAGEEXEC; ++ } ++#endif ++ ++#ifdef CONFIG_PAX_EMUTRAMP ++ if (elf_phdata->p_flags & PF_EMUTRAMP) ++ pax_flags |= MF_PAX_EMUTRAMP; ++#endif ++ ++#ifdef CONFIG_PAX_MPROTECT ++ if (elf_phdata->p_flags & PF_MPROTECT) ++ pax_flags |= MF_PAX_MPROTECT; ++#endif ++ ++#if defined(CONFIG_PAX_RANDMMAP) || defined(CONFIG_PAX_RANDUSTACK) ++ if (randomize_va_space && (elf_phdata->p_flags & PF_RANDMMAP)) ++ pax_flags |= MF_PAX_RANDMMAP; ++#endif ++ ++ return pax_flags; ++} ++#endif ++ ++#ifdef CONFIG_PAX_PT_PAX_FLAGS ++static unsigned long pax_parse_hardmode(const struct elf_phdr * const elf_phdata) ++{ ++ unsigned long pax_flags = 0UL; ++ ++#ifdef CONFIG_PAX_PAGEEXEC ++ if (!(elf_phdata->p_flags & PF_NOPAGEEXEC)) ++ pax_flags |= MF_PAX_PAGEEXEC; ++#endif ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (!(elf_phdata->p_flags & PF_NOSEGMEXEC)) ++ pax_flags |= MF_PAX_SEGMEXEC; ++#endif ++ ++#if defined(CONFIG_PAX_PAGEEXEC) && defined(CONFIG_PAX_SEGMEXEC) ++ if ((pax_flags & (MF_PAX_PAGEEXEC | MF_PAX_SEGMEXEC)) == (MF_PAX_PAGEEXEC | MF_PAX_SEGMEXEC)) { ++ if (nx_enabled) ++ pax_flags &= ~MF_PAX_SEGMEXEC; ++ else ++ pax_flags &= ~MF_PAX_PAGEEXEC; ++ } ++#endif ++ ++#ifdef CONFIG_PAX_EMUTRAMP ++ if (!(elf_phdata->p_flags & PF_NOEMUTRAMP)) ++ pax_flags |= MF_PAX_EMUTRAMP; ++#endif ++ ++#ifdef CONFIG_PAX_MPROTECT ++ if (!(elf_phdata->p_flags & PF_NOMPROTECT)) ++ pax_flags |= MF_PAX_MPROTECT; ++#endif ++ ++#if defined(CONFIG_PAX_RANDMMAP) || defined(CONFIG_PAX_RANDUSTACK) ++ if (randomize_va_space && !(elf_phdata->p_flags & PF_NORANDMMAP)) ++ pax_flags |= MF_PAX_RANDMMAP; ++#endif ++ ++ return pax_flags; ++} ++#endif ++ ++#ifdef CONFIG_PAX_EI_PAX ++static unsigned long pax_parse_ei_pax(const struct elfhdr * const elf_ex) ++{ ++ unsigned long pax_flags = 0UL; ++ ++#ifdef CONFIG_PAX_PAGEEXEC ++ if (!(elf_ex->e_ident[EI_PAX] & EF_PAX_PAGEEXEC)) ++ pax_flags |= MF_PAX_PAGEEXEC; ++#endif ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (!(elf_ex->e_ident[EI_PAX] & EF_PAX_SEGMEXEC)) ++ pax_flags |= MF_PAX_SEGMEXEC; ++#endif ++ ++#if defined(CONFIG_PAX_PAGEEXEC) && defined(CONFIG_PAX_SEGMEXEC) ++ if ((pax_flags & (MF_PAX_PAGEEXEC | MF_PAX_SEGMEXEC)) == (MF_PAX_PAGEEXEC | MF_PAX_SEGMEXEC)) { ++ if (nx_enabled) ++ pax_flags &= ~MF_PAX_SEGMEXEC; ++ else ++ pax_flags &= ~MF_PAX_PAGEEXEC; ++ } ++#endif ++ ++#ifdef CONFIG_PAX_EMUTRAMP ++ if ((pax_flags & (MF_PAX_PAGEEXEC | MF_PAX_SEGMEXEC)) && (elf_ex->e_ident[EI_PAX] & EF_PAX_EMUTRAMP)) ++ pax_flags |= MF_PAX_EMUTRAMP; ++#endif ++ ++#ifdef CONFIG_PAX_MPROTECT ++ if ((pax_flags & (MF_PAX_PAGEEXEC | MF_PAX_SEGMEXEC)) && !(elf_ex->e_ident[EI_PAX] & EF_PAX_MPROTECT)) ++ pax_flags |= MF_PAX_MPROTECT; ++#endif ++ ++#ifdef CONFIG_PAX_ASLR ++ if (randomize_va_space && !(elf_ex->e_ident[EI_PAX] & EF_PAX_RANDMMAP)) ++ pax_flags |= MF_PAX_RANDMMAP; ++#endif ++ ++ return pax_flags; ++} ++#endif ++ ++#if defined(CONFIG_PAX_EI_PAX) || defined(CONFIG_PAX_PT_PAX_FLAGS) ++static long pax_parse_elf_flags(const struct elfhdr * const elf_ex, const struct elf_phdr * const elf_phdata) ++{ ++ unsigned long pax_flags = 0UL; ++ ++#ifdef CONFIG_PAX_PT_PAX_FLAGS ++ unsigned long i; ++#endif ++ ++#ifdef CONFIG_PAX_EI_PAX ++ pax_flags = pax_parse_ei_pax(elf_ex); ++#endif ++ ++#ifdef CONFIG_PAX_PT_PAX_FLAGS ++ for (i = 0UL; i < elf_ex->e_phnum; i++) ++ if (elf_phdata[i].p_type == PT_PAX_FLAGS) { ++ if (((elf_phdata[i].p_flags & PF_PAGEEXEC) && (elf_phdata[i].p_flags & PF_NOPAGEEXEC)) || ++ ((elf_phdata[i].p_flags & PF_SEGMEXEC) && (elf_phdata[i].p_flags & PF_NOSEGMEXEC)) || ++ ((elf_phdata[i].p_flags & PF_EMUTRAMP) && (elf_phdata[i].p_flags & PF_NOEMUTRAMP)) || ++ ((elf_phdata[i].p_flags & PF_MPROTECT) && (elf_phdata[i].p_flags & PF_NOMPROTECT)) || ++ ((elf_phdata[i].p_flags & PF_RANDMMAP) && (elf_phdata[i].p_flags & PF_NORANDMMAP))) ++ return -EINVAL; ++ ++#ifdef CONFIG_PAX_SOFTMODE ++ if (pax_softmode) ++ pax_flags = pax_parse_softmode(&elf_phdata[i]); ++ else ++#endif ++ ++ pax_flags = pax_parse_hardmode(&elf_phdata[i]); ++ break; ++ } ++#endif ++ ++ if (0 > pax_check_flags(&pax_flags)) ++ return -EINVAL; ++ ++ current->mm->pax_flags = pax_flags; ++ return 0; ++} ++#endif ++ + /* + * These are the functions used to load ELF style executables and shared + * libraries. There is no binary dependent code anywhere else. +@@ -543,6 +734,11 @@ static unsigned long randomize_stack_top + { + unsigned int random_variable = 0; + ++#ifdef CONFIG_PAX_RANDUSTACK ++ if (randomize_va_space) ++ return stack_top - current->mm->delta_stack; ++#endif ++ + if ((current->flags & PF_RANDOMIZE) && + !(current->personality & ADDR_NO_RANDOMIZE)) { + random_variable = get_random_int() & STACK_RND_MASK; +@@ -561,7 +757,7 @@ static int load_elf_binary(struct linux_ + unsigned long load_addr = 0, load_bias = 0; + int load_addr_set = 0; + char * elf_interpreter = NULL; +- unsigned long error; ++ unsigned long error = 0; + struct elf_phdr *elf_ppnt, *elf_phdata; + unsigned long elf_bss, elf_brk; + int elf_exec_fileno; +@@ -572,11 +768,11 @@ static int load_elf_binary(struct linux_ + unsigned long start_code, end_code, start_data, end_data; + unsigned long reloc_func_desc = 0; + int executable_stack = EXSTACK_DEFAULT; +- unsigned long def_flags = 0; + struct { + struct elfhdr elf_ex; + struct elfhdr interp_elf_ex; + } *loc; ++ unsigned long pax_task_size = TASK_SIZE; + + loc = kmalloc(sizeof(*loc), GFP_KERNEL); + if (!loc) { +@@ -744,11 +940,80 @@ static int load_elf_binary(struct linux_ + + /* OK, This is the point of no return */ + current->flags &= ~PF_FORKNOEXEC; +- current->mm->def_flags = def_flags; ++ ++#if defined(CONFIG_PAX_NOEXEC) || defined(CONFIG_PAX_ASLR) ++ current->mm->pax_flags = 0UL; ++#endif ++ ++#ifdef CONFIG_PAX_DLRESOLVE ++ current->mm->call_dl_resolve = 0UL; ++#endif ++ ++#if defined(CONFIG_PPC32) && defined(CONFIG_PAX_EMUSIGRT) ++ current->mm->call_syscall = 0UL; ++#endif ++ ++#ifdef CONFIG_PAX_ASLR ++ current->mm->delta_mmap = 0UL; ++ current->mm->delta_stack = 0UL; ++#endif ++ ++ current->mm->def_flags = 0; ++ ++#if defined(CONFIG_PAX_EI_PAX) || defined(CONFIG_PAX_PT_PAX_FLAGS) ++ if (0 > pax_parse_elf_flags(&loc->elf_ex, elf_phdata)) { ++ send_sig(SIGKILL, current, 0); ++ goto out_free_dentry; ++ } ++#endif ++ ++#ifdef CONFIG_PAX_HAVE_ACL_FLAGS ++ pax_set_initial_flags(bprm); ++#elif defined(CONFIG_PAX_HOOK_ACL_FLAGS) ++ if (pax_set_initial_flags_func) ++ (pax_set_initial_flags_func)(bprm); ++#endif ++ ++#ifdef CONFIG_ARCH_TRACK_EXEC_LIMIT ++ if ((current->mm->pax_flags & MF_PAX_PAGEEXEC) && !nx_enabled) { ++ current->mm->context.user_cs_limit = PAGE_SIZE; ++ current->mm->def_flags |= VM_PAGEEXEC; ++ } ++#endif ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (current->mm->pax_flags & MF_PAX_SEGMEXEC) { ++ current->mm->context.user_cs_base = SEGMEXEC_TASK_SIZE; ++ current->mm->context.user_cs_limit = TASK_SIZE-SEGMEXEC_TASK_SIZE; ++ pax_task_size = SEGMEXEC_TASK_SIZE; ++ } ++#endif ++ ++#if defined(CONFIG_ARCH_TRACK_EXEC_LIMIT) || defined(CONFIG_PAX_SEGMEXEC) ++ if (current->mm->pax_flags & (MF_PAX_PAGEEXEC | MF_PAX_SEGMEXEC)) { ++ set_user_cs(current->mm->context.user_cs_base, current->mm->context.user_cs_limit, get_cpu()); ++ put_cpu_no_resched(); ++ } ++#endif ++ ++#ifdef CONFIG_PAX_ASLR ++ if (current->mm->pax_flags & MF_PAX_RANDMMAP) { ++ current->mm->delta_mmap = (pax_get_random_long() & ((1UL << PAX_DELTA_MMAP_LEN)-1)) << PAGE_SHIFT; ++ current->mm->delta_stack = (pax_get_random_long() & ((1UL << PAX_DELTA_STACK_LEN)-1)) << PAGE_SHIFT; ++ } ++#endif + + /* Do this immediately, since STACK_TOP as used in setup_arg_pages + may depend on the personality. */ + SET_PERSONALITY(loc->elf_ex); ++ ++#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) ++ if (current->mm->pax_flags & (MF_PAX_PAGEEXEC | MF_PAX_SEGMEXEC)) { ++ executable_stack = EXSTACK_DISABLE_X; ++ current->personality &= ~READ_IMPLIES_EXEC; ++ } else ++#endif ++ + if (elf_read_implies_exec(loc->elf_ex, executable_stack)) + current->personality |= READ_IMPLIES_EXEC; + +@@ -829,6 +1094,20 @@ static int load_elf_binary(struct linux_ + #else + load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr); + #endif ++ ++#ifdef CONFIG_PAX_RANDMMAP ++ /* PaX: randomize base address at the default exe base if requested */ ++ if ((current->mm->pax_flags & MF_PAX_RANDMMAP) && elf_interpreter) { ++#ifdef CONFIG_SPARC64 ++ load_bias = (pax_get_random_long() & ((1UL << PAX_DELTA_MMAP_LEN) - 1)) << (PAGE_SHIFT+1); ++#else ++ load_bias = (pax_get_random_long() & ((1UL << PAX_DELTA_MMAP_LEN) - 1)) << PAGE_SHIFT; ++#endif ++ load_bias = ELF_PAGESTART(PAX_ELF_ET_DYN_BASE - vaddr + load_bias); ++ elf_flags |= MAP_FIXED; ++ } ++#endif ++ + } + + error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, +@@ -861,9 +1140,9 @@ static int load_elf_binary(struct linux_ + * allowed task size. Note that p_filesz must always be + * <= p_memsz so it is only necessary to check p_memsz. + */ +- if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz || +- elf_ppnt->p_memsz > TASK_SIZE || +- TASK_SIZE - elf_ppnt->p_memsz < k) { ++ if (k >= pax_task_size || elf_ppnt->p_filesz > elf_ppnt->p_memsz || ++ elf_ppnt->p_memsz > pax_task_size || ++ pax_task_size - elf_ppnt->p_memsz < k) { + /* set_brk can never work. Avoid overflows. */ + send_sig(SIGKILL, current, 0); + retval = -EINVAL; +@@ -891,6 +1170,11 @@ static int load_elf_binary(struct linux_ + start_data += load_bias; + end_data += load_bias; + ++#ifdef CONFIG_PAX_RANDMMAP ++ if (current->mm->pax_flags & MF_PAX_RANDMMAP) ++ elf_brk += PAGE_SIZE + ((pax_get_random_long() & ~PAGE_MASK) << 4); ++#endif ++ + /* Calling set_brk effectively mmaps the pages that we need + * for the bss and break sections. We must do this before + * mapping in the interpreter, to make sure it doesn't wind +@@ -902,9 +1186,11 @@ static int load_elf_binary(struct linux_ + goto out_free_dentry; + } + if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) { +- send_sig(SIGSEGV, current, 0); +- retval = -EFAULT; /* Nobody gets to see this, but.. */ +- goto out_free_dentry; ++ /* ++ * This bss-zeroing can fail if the ELF ++ * file specifies odd protections. So ++ * we don't check the return value ++ */ + } + + if (elf_interpreter) { +@@ -1141,8 +1427,10 @@ static int dump_seek(struct file *file, + unsigned long n = off; + if (n > PAGE_SIZE) + n = PAGE_SIZE; +- if (!dump_write(file, buf, n)) ++ if (!dump_write(file, buf, n)) { ++ free_page((unsigned long)buf); + return 0; ++ } + off -= n; + } + free_page((unsigned long)buf); +@@ -1154,7 +1442,7 @@ static int dump_seek(struct file *file, + * Decide what to dump of a segment, part, all or none. + */ + static unsigned long vma_dump_size(struct vm_area_struct *vma, +- unsigned long mm_flags) ++ unsigned long mm_flags, long signr) + { + #define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type)) + +@@ -1188,7 +1476,7 @@ static unsigned long vma_dump_size(struc + if (vma->vm_file == NULL) + return 0; + +- if (FILTER(MAPPED_PRIVATE)) ++ if (signr == SIGKILL || FILTER(MAPPED_PRIVATE)) + goto whole; + + /* +@@ -1284,8 +1572,11 @@ static int writenote(struct memelfnote * + #undef DUMP_WRITE + + #define DUMP_WRITE(addr, nr) \ ++ do { \ ++ gr_learn_resource(current, RLIMIT_CORE, size + (nr), 1); \ + if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \ +- goto end_coredump; ++ goto end_coredump; \ ++ } while (0); + #define DUMP_SEEK(off) \ + if (!dump_seek(file, (off))) \ + goto end_coredump; +@@ -1986,7 +2277,7 @@ static int elf_core_dump(long signr, str + phdr.p_offset = offset; + phdr.p_vaddr = vma->vm_start; + phdr.p_paddr = 0; +- phdr.p_filesz = vma_dump_size(vma, mm_flags); ++ phdr.p_filesz = vma_dump_size(vma, mm_flags, signr); + phdr.p_memsz = vma->vm_end - vma->vm_start; + offset += phdr.p_filesz; + phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0; +@@ -2018,7 +2309,7 @@ static int elf_core_dump(long signr, str + unsigned long addr; + unsigned long end; + +- end = vma->vm_start + vma_dump_size(vma, mm_flags); ++ end = vma->vm_start + vma_dump_size(vma, mm_flags, signr); + + for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) { + struct page *page; +@@ -2038,6 +2329,7 @@ static int elf_core_dump(long signr, str + flush_cache_page(tmp_vma, addr, + page_to_pfn(page)); + kaddr = kmap(page); ++ gr_learn_resource(current, RLIMIT_CORE, size + PAGE_SIZE, 1); + if ((size += PAGE_SIZE) > limit || + !dump_write(file, kaddr, + PAGE_SIZE)) { +@@ -2068,6 +2360,99 @@ out: + + #endif /* USE_ELF_CORE_DUMP */ + ++#ifdef CONFIG_PAX_MPROTECT ++/* PaX: non-PIC ELF libraries need relocations on their executable segments ++ * therefore we'll grant them VM_MAYWRITE once during their life. Similarly ++ * we'll remove VM_MAYWRITE for good on RELRO segments. ++ * ++ * The checks favour ld-linux.so behaviour which operates on a per ELF segment ++ * basis because we want to allow the common case and not the special ones. ++ */ ++static void elf_handle_mprotect(struct vm_area_struct *vma, unsigned long newflags) ++{ ++ struct elfhdr elf_h; ++ struct elf_phdr elf_p; ++ unsigned long i; ++ unsigned long oldflags; ++ bool is_textrel_rw, is_textrel_rx, is_relro; ++ ++ if (!(vma->vm_mm->pax_flags & MF_PAX_MPROTECT)) ++ return; ++ ++ oldflags = vma->vm_flags & (VM_MAYEXEC | VM_MAYWRITE | VM_MAYREAD | VM_EXEC | VM_WRITE | VM_READ); ++ newflags &= VM_MAYEXEC | VM_MAYWRITE | VM_MAYREAD | VM_EXEC | VM_WRITE | VM_READ; ++ ++#ifdef CONFIG_PAX_NOELFRELOCS ++ is_textrel_rw = false; ++ is_textrel_rx = false; ++#else ++ /* possible TEXTREL */ ++ is_textrel_rw = vma->vm_file && !vma->anon_vma && oldflags == (VM_MAYEXEC | VM_MAYREAD | VM_EXEC | VM_READ) && newflags == (VM_WRITE | VM_READ); ++ is_textrel_rx = vma->vm_file && vma->anon_vma && oldflags == (VM_MAYEXEC | VM_MAYWRITE | VM_MAYREAD | VM_WRITE | VM_READ) && newflags == (VM_EXEC | VM_READ); ++#endif ++ ++ /* possible RELRO */ ++ is_relro = vma->vm_file && vma->anon_vma && oldflags == (VM_MAYWRITE | VM_MAYREAD | VM_READ) && newflags == (VM_MAYWRITE | VM_MAYREAD | VM_READ); ++ ++ if (!is_textrel_rw && !is_textrel_rx && !is_relro) ++ return; ++ ++ if (sizeof(elf_h) != kernel_read(vma->vm_file, 0UL, (char *)&elf_h, sizeof(elf_h)) || ++ memcmp(elf_h.e_ident, ELFMAG, SELFMAG) || ++ ++#ifdef CONFIG_PAX_ETEXECRELOCS ++ ((is_textrel_rw || is_textrel_rx) && (elf_h.e_type != ET_DYN && elf_h.e_type != ET_EXEC)) || ++#else ++ ((is_textrel_rw || is_textrel_rx) && elf_h.e_type != ET_DYN) || ++#endif ++ ++ (is_relro && (elf_h.e_type != ET_DYN && elf_h.e_type != ET_EXEC)) || ++ !elf_check_arch(&elf_h) || ++ elf_h.e_phentsize != sizeof(struct elf_phdr) || ++ elf_h.e_phnum > 65536UL / sizeof(struct elf_phdr)) ++ return; ++ ++ for (i = 0UL; i < elf_h.e_phnum; i++) { ++ if (sizeof(elf_p) != kernel_read(vma->vm_file, elf_h.e_phoff + i*sizeof(elf_p), (char *)&elf_p, sizeof(elf_p))) ++ return; ++ switch (elf_p.p_type) { ++ case PT_DYNAMIC: { ++ elf_addr_t dyn_offset = 0UL; ++ elf_dyn dyn; ++ ++ if (!is_textrel_rw && !is_textrel_rx) ++ continue; ++ dyn_offset = elf_p.p_offset; ++ i = 0UL; ++ do { ++ if (sizeof(dyn) != kernel_read(vma->vm_file, dyn_offset + i*sizeof(dyn), (char *)&dyn, sizeof(dyn))) ++ return; ++ if (dyn.d_tag == DT_TEXTREL || (dyn.d_tag == DT_FLAGS && (dyn.d_un.d_val & DF_TEXTREL))) { ++ gr_log_textrel(vma); ++ if (is_textrel_rw) ++ vma->vm_flags |= VM_MAYWRITE; ++ else ++ /* PaX: disallow write access after relocs are done, hopefully noone else needs it... */ ++ vma->vm_flags &= ~VM_MAYWRITE; ++ return; ++ } ++ i++; ++ } while (dyn.d_tag != DT_NULL); ++ return; ++ } ++ ++ case PT_GNU_RELRO: ++ if (!is_relro) ++ continue; ++ if ((elf_p.p_offset >> PAGE_SHIFT) == vma->vm_pgoff && ELF_PAGEALIGN(elf_p.p_memsz) == vma->vm_end - vma->vm_start) { ++ vma->vm_flags &= ~VM_MAYWRITE; ++ } ++ return; ++ } ++ } ++} ++#endif ++ + static int __init init_elf_binfmt(void) + { + return register_binfmt(&elf_format); +diff -urNp a/fs/binfmt_flat.c b/fs/binfmt_flat.c +--- a/fs/binfmt_flat.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/binfmt_flat.c 2009-05-24 18:10:25.192337102 -0700 +@@ -561,7 +561,9 @@ static int load_flat_file(struct linux_b + realdatastart = (unsigned long) -ENOMEM; + printk("Unable to allocate RAM for process data, errno %d\n", + (int)-realdatastart); ++ down_write(¤t->mm->mmap_sem); + do_munmap(current->mm, textpos, text_len); ++ up_write(¤t->mm->mmap_sem); + ret = realdatastart; + goto err; + } +@@ -583,8 +585,10 @@ static int load_flat_file(struct linux_b + } + if (result >= (unsigned long)-4096) { + printk("Unable to read data+bss, errno %d\n", (int)-result); ++ down_write(¤t->mm->mmap_sem); + do_munmap(current->mm, textpos, text_len); + do_munmap(current->mm, realdatastart, data_len + extra); ++ up_write(¤t->mm->mmap_sem); + ret = result; + goto err; + } +@@ -657,8 +661,10 @@ static int load_flat_file(struct linux_b + } + if (result >= (unsigned long)-4096) { + printk("Unable to read code+data+bss, errno %d\n",(int)-result); ++ down_write(¤t->mm->mmap_sem); + do_munmap(current->mm, textpos, text_len + data_len + extra + + MAX_SHARED_LIBS * sizeof(unsigned long)); ++ up_write(¤t->mm->mmap_sem); + ret = result; + goto err; + } +diff -urNp a/fs/binfmt_misc.c b/fs/binfmt_misc.c +--- a/fs/binfmt_misc.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/binfmt_misc.c 2009-05-24 18:10:25.193002783 -0700 +@@ -696,7 +696,7 @@ static int bm_fill_super(struct super_bl + static struct tree_descr bm_files[] = { + [2] = {"status", &bm_status_operations, S_IWUSR|S_IRUGO}, + [3] = {"register", &bm_register_operations, S_IWUSR}, +- /* last one */ {""} ++ /* last one */ {"", NULL, 0} + }; + int err = simple_fill_super(sb, 0x42494e4d, bm_files); + if (!err) +diff -urNp a/fs/bio.c b/fs/bio.c +--- a/fs/bio.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/bio.c 2009-05-24 18:10:25.193002783 -0700 +@@ -554,7 +554,7 @@ static int __bio_copy_iov(struct bio *bi + + while (bv_len && iov_idx < iov_count) { + unsigned int bytes; +- char *iov_addr; ++ char __user *iov_addr; + + bytes = min_t(unsigned int, + iov[iov_idx].iov_len - iov_off, bv_len); +diff -urNp a/fs/buffer.c b/fs/buffer.c +--- a/fs/buffer.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/buffer.c 2009-05-24 18:10:25.193986989 -0700 +@@ -25,6 +25,7 @@ + #include <linux/percpu.h> + #include <linux/slab.h> + #include <linux/capability.h> ++#include <linux/security.h> + #include <linux/blkdev.h> + #include <linux/file.h> + #include <linux/quotaops.h> +@@ -2249,6 +2250,7 @@ int generic_cont_expand_simple(struct in + + err = -EFBIG; + limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; ++ gr_learn_resource(current, RLIMIT_FSIZE, (unsigned long) size, 1); + if (limit != RLIM_INFINITY && size > (loff_t)limit) { + send_sig(SIGXFSZ, current, 0); + goto out; +diff -urNp a/fs/cifs/cifs_uniupr.h b/fs/cifs/cifs_uniupr.h +--- a/fs/cifs/cifs_uniupr.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/cifs/cifs_uniupr.h 2009-05-24 18:10:25.195991016 -0700 +@@ -132,7 +132,7 @@ const struct UniCaseRange CifsUniUpperRa + {0x0490, 0x04cc, UniCaseRangeU0490}, + {0x1e00, 0x1ffc, UniCaseRangeU1e00}, + {0xff40, 0xff5a, UniCaseRangeUff40}, +- {0} ++ {0, 0, NULL} + }; + #endif + +diff -urNp a/fs/cifs/link.c b/fs/cifs/link.c +--- a/fs/cifs/link.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/cifs/link.c 2009-05-24 18:10:25.195991016 -0700 +@@ -318,7 +318,7 @@ cifs_readlink(struct dentry *direntry, c + + void cifs_put_link(struct dentry *direntry, struct nameidata *nd, void *cookie) + { +- char *p = nd_get_link(nd); ++ const char *p = nd_get_link(nd); + if (!IS_ERR(p)) + kfree(p); + } +diff -urNp a/fs/compat.c b/fs/compat.c +--- a/fs/compat.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/compat.c 2009-05-24 18:10:25.197126138 -0700 +@@ -1331,14 +1331,12 @@ static int compat_copy_strings(int argc, + if (!kmapped_page || kpos != (pos & PAGE_MASK)) { + struct page *page; + +-#ifdef CONFIG_STACK_GROWSUP + ret = expand_stack_downwards(bprm->vma, pos); + if (ret < 0) { + /* We've exceed the stack rlimit. */ + ret = -E2BIG; + goto out; + } +-#endif + ret = get_user_pages(current, bprm->mm, pos, + 1, 1, 1, &page, NULL); + if (ret <= 0) { +@@ -1384,6 +1382,11 @@ int compat_do_execve(char * filename, + compat_uptr_t __user *envp, + struct pt_regs * regs) + { ++#ifdef CONFIG_GRKERNSEC ++ struct file *old_exec_file; ++ struct acl_subject_label *old_acl; ++ struct rlimit old_rlim[RLIM_NLIMITS]; ++#endif + struct linux_binprm *bprm; + struct file *file; + struct files_struct *displaced; +@@ -1409,6 +1412,14 @@ int compat_do_execve(char * filename, + bprm->filename = filename; + bprm->interp = filename; + ++ gr_learn_resource(current, RLIMIT_NPROC, atomic_read(¤t->user->processes), 1); ++ retval = -EAGAIN; ++ if (gr_handle_nproc()) ++ goto out_file; ++ retval = -EACCES; ++ if (!gr_acl_handle_execve(file->f_dentry, file->f_vfsmnt)) ++ goto out_file; ++ + retval = bprm_mm_init(bprm); + if (retval) + goto out_file; +@@ -1442,8 +1453,36 @@ int compat_do_execve(char * filename, + if (retval < 0) + goto out; + ++ if (!gr_tpe_allow(file)) { ++ retval = -EACCES; ++ goto out; ++ } ++ ++ if (gr_check_crash_exec(file)) { ++ retval = -EACCES; ++ goto out; ++ } ++ ++ gr_log_chroot_exec(file->f_dentry, file->f_vfsmnt); ++ ++ gr_handle_exec_args(bprm, (char __user * __user *)argv); ++ ++#ifdef CONFIG_GRKERNSEC ++ old_acl = current->acl; ++ memcpy(old_rlim, current->signal->rlim, sizeof(old_rlim)); ++ old_exec_file = current->exec_file; ++ get_file(file); ++ current->exec_file = file; ++#endif ++ ++ gr_set_proc_label(file->f_dentry, file->f_vfsmnt); ++ + retval = search_binary_handler(bprm, regs); + if (retval >= 0) { ++#ifdef CONFIG_GRKERNSEC ++ if (old_exec_file) ++ fput(old_exec_file); ++#endif + /* execve success */ + security_bprm_free(bprm); + acct_update_integrals(current); +@@ -1453,6 +1492,13 @@ int compat_do_execve(char * filename, + return retval; + } + ++#ifdef CONFIG_GRKERNSEC ++ current->acl = old_acl; ++ memcpy(current->signal->rlim, old_rlim, sizeof(old_rlim)); ++ fput(current->exec_file); ++ current->exec_file = old_exec_file; ++#endif ++ + out: + if (bprm->security) + security_bprm_free(bprm); +diff -urNp a/fs/compat_ioctl.c b/fs/compat_ioctl.c +--- a/fs/compat_ioctl.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/compat_ioctl.c 2009-05-24 18:10:25.199174023 -0700 +@@ -1832,15 +1832,15 @@ struct ioctl_trans { + }; + + #define HANDLE_IOCTL(cmd,handler) \ +- { (cmd), (ioctl_trans_handler_t)(handler) }, ++ { (cmd), (ioctl_trans_handler_t)(handler), NULL }, + + /* pointer to compatible structure or no argument */ + #define COMPATIBLE_IOCTL(cmd) \ +- { (cmd), do_ioctl32_pointer }, ++ { (cmd), do_ioctl32_pointer, NULL }, + + /* argument is an unsigned long integer, not a pointer */ + #define ULONG_IOCTL(cmd) \ +- { (cmd), (ioctl_trans_handler_t)sys_ioctl }, ++ { (cmd), (ioctl_trans_handler_t)sys_ioctl, NULL }, + + /* ioctl should not be warned about even if it's not implemented. + Valid reasons to use this: +diff -urNp a/fs/debugfs/inode.c b/fs/debugfs/inode.c +--- a/fs/debugfs/inode.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/debugfs/inode.c 2009-05-24 18:10:25.200269617 -0700 +@@ -120,7 +120,7 @@ static inline int debugfs_positive(struc + + static int debug_fill_super(struct super_block *sb, void *data, int silent) + { +- static struct tree_descr debug_files[] = {{""}}; ++ static struct tree_descr debug_files[] = {{"", NULL, 0}}; + + return simple_fill_super(sb, DEBUGFS_MAGIC, debug_files); + } +diff -urNp a/fs/exec.c b/fs/exec.c +--- a/fs/exec.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/exec.c 2009-05-24 18:10:25.202336009 -0700 +@@ -51,6 +51,13 @@ + #include <linux/audit.h> + #include <linux/tracehook.h> + #include <linux/kmod.h> ++#include <linux/random.h> ++#include <linux/seq_file.h> ++ ++#ifdef CONFIG_PAX_REFCOUNT ++#include <linux/kallsyms.h> ++#include <linux/kdebug.h> ++#endif + + #include <asm/uaccess.h> + #include <asm/mmu_context.h> +@@ -61,6 +68,11 @@ + #include <linux/a.out.h> + #endif + ++#ifdef CONFIG_PAX_HOOK_ACL_FLAGS ++void (*pax_set_initial_flags_func)(struct linux_binprm *bprm); ++EXPORT_SYMBOL(pax_set_initial_flags_func); ++#endif ++ + int core_uses_pid; + char core_pattern[CORENAME_MAX_SIZE] = "core"; + int suid_dumpable = 0; +@@ -169,18 +181,10 @@ static struct page *get_arg_page(struct + int write) + { + struct page *page; +- int ret; + +-#ifdef CONFIG_STACK_GROWSUP +- if (write) { +- ret = expand_stack_downwards(bprm->vma, pos); +- if (ret < 0) +- return NULL; +- } +-#endif +- ret = get_user_pages(current, bprm->mm, pos, +- 1, write, 1, &page, NULL); +- if (ret <= 0) ++ if (0 > expand_stack_downwards(bprm->vma, pos)) ++ return NULL; ++ if (0 >= get_user_pages(current, bprm->mm, pos, 1, write, 1, &page, NULL)) + return NULL; + + if (write) { +@@ -253,6 +257,11 @@ static int __bprm_mm_init(struct linux_b + vma->vm_start = vma->vm_end - PAGE_SIZE; + + vma->vm_flags = VM_STACK_FLAGS; ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ vma->vm_flags &= ~(VM_EXEC | VM_MAYEXEC); ++#endif ++ + vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); + err = insert_vm_struct(mm, vma); + if (err) { +@@ -265,6 +274,11 @@ static int __bprm_mm_init(struct linux_b + + bprm->p = vma->vm_end - sizeof(void *); + ++#ifdef CONFIG_PAX_RANDUSTACK ++ if (randomize_va_space) ++ bprm->p ^= (pax_get_random_long() & ~15) & ~PAGE_MASK; ++#endif ++ + return 0; + + err: +@@ -528,6 +542,10 @@ static int shift_arg_pages(struct vm_are + if (vma != find_vma(mm, new_start)) + return -EFAULT; + ++#ifdef CONFIG_PAX_SEGMEXEC ++ BUG_ON(pax_find_mirror_vma(vma)); ++#endif ++ + /* + * cover the whole range: [new_start, old_end) + */ +@@ -616,6 +634,14 @@ int setup_arg_pages(struct linux_binprm + bprm->exec -= stack_shift; + + down_write(&mm->mmap_sem); ++ ++ /* Move stack pages down in memory. */ ++ if (stack_shift) { ++ ret = shift_arg_pages(vma, stack_shift); ++ if (ret) ++ goto out_unlock; ++ } ++ + vm_flags = VM_STACK_FLAGS; + + /* +@@ -629,21 +655,24 @@ int setup_arg_pages(struct linux_binprm + vm_flags &= ~VM_EXEC; + vm_flags |= mm->def_flags; + ++#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) ++ if (mm->pax_flags & (MF_PAX_PAGEEXEC | MF_PAX_SEGMEXEC)) { ++ vm_flags &= ~VM_EXEC; ++ ++#ifdef CONFIG_PAX_MPROTECT ++ if (mm->pax_flags & MF_PAX_MPROTECT) ++ vm_flags &= ~VM_MAYEXEC; ++#endif ++ ++ } ++#endif ++ + ret = mprotect_fixup(vma, &prev, vma->vm_start, vma->vm_end, + vm_flags); + if (ret) + goto out_unlock; + BUG_ON(prev != vma); + +- /* Move stack pages down in memory. */ +- if (stack_shift) { +- ret = shift_arg_pages(vma, stack_shift); +- if (ret) { +- up_write(&mm->mmap_sem); +- return ret; +- } +- } +- + #ifdef CONFIG_STACK_GROWSUP + stack_base = vma->vm_end + EXTRA_STACK_VM_PAGES * PAGE_SIZE; + #else +@@ -655,7 +684,7 @@ int setup_arg_pages(struct linux_binprm + + out_unlock: + up_write(&mm->mmap_sem); +- return 0; ++ return ret; + } + EXPORT_SYMBOL(setup_arg_pages); + +@@ -1279,6 +1308,11 @@ int do_execve(char * filename, + char __user *__user *envp, + struct pt_regs * regs) + { ++#ifdef CONFIG_GRKERNSEC ++ struct file *old_exec_file; ++ struct acl_subject_label *old_acl; ++ struct rlimit old_rlim[RLIM_NLIMITS]; ++#endif + struct linux_binprm *bprm; + struct file *file; + struct files_struct *displaced; +@@ -1298,6 +1332,20 @@ int do_execve(char * filename, + if (IS_ERR(file)) + goto out_kfree; + ++ gr_learn_resource(current, RLIMIT_NPROC, atomic_read(¤t->user->processes), 1); ++ ++ if (gr_handle_nproc()) { ++ allow_write_access(file); ++ fput(file); ++ return -EAGAIN; ++ } ++ ++ if (!gr_acl_handle_execve(file->f_dentry, file->f_vfsmnt)) { ++ allow_write_access(file); ++ fput(file); ++ return -EACCES; ++ } ++ + sched_exec(); + + bprm->file = file; +@@ -1337,9 +1385,39 @@ int do_execve(char * filename, + if (retval < 0) + goto out; + ++ if (!gr_tpe_allow(file)) { ++ retval = -EACCES; ++ goto out; ++ } ++ ++ if (gr_check_crash_exec(file)) { ++ retval = -EACCES; ++ goto out; ++ } ++ ++ gr_log_chroot_exec(file->f_dentry, file->f_vfsmnt); ++ ++ gr_handle_exec_args(bprm, argv); ++ ++#ifdef CONFIG_GRKERNSEC ++ old_acl = current->acl; ++ memcpy(old_rlim, current->signal->rlim, sizeof(old_rlim)); ++ old_exec_file = current->exec_file; ++ get_file(file); ++ current->exec_file = file; ++#endif ++ ++ retval = gr_set_proc_label(file->f_dentry, file->f_vfsmnt); ++ if (retval < 0) ++ goto out_fail; ++ + current->flags &= ~PF_KTHREAD; + retval = search_binary_handler(bprm,regs); + if (retval >= 0) { ++#ifdef CONFIG_GRKERNSEC ++ if (old_exec_file) ++ fput(old_exec_file); ++#endif + /* execve success */ + security_bprm_free(bprm); + acct_update_integrals(current); +@@ -1349,6 +1427,14 @@ int do_execve(char * filename, + return retval; + } + ++out_fail: ++#ifdef CONFIG_GRKERNSEC ++ current->acl = old_acl; ++ memcpy(current->signal->rlim, old_rlim, sizeof(old_rlim)); ++ fput(current->exec_file); ++ current->exec_file = old_exec_file; ++#endif ++ + out: + if (bprm->security) + security_bprm_free(bprm); +@@ -1511,6 +1597,145 @@ out: + return ispipe; + } + ++int pax_check_flags(unsigned long *flags) ++{ ++ int retval = 0; ++ ++#if !defined(CONFIG_X86_32) || !defined(CONFIG_PAX_SEGMEXEC) ++ if (*flags & MF_PAX_SEGMEXEC) ++ { ++ *flags &= ~MF_PAX_SEGMEXEC; ++ retval = -EINVAL; ++ } ++#endif ++ ++ if ((*flags & MF_PAX_PAGEEXEC) ++ ++#ifdef CONFIG_PAX_PAGEEXEC ++ && (*flags & MF_PAX_SEGMEXEC) ++#endif ++ ++ ) ++ { ++ *flags &= ~MF_PAX_PAGEEXEC; ++ retval = -EINVAL; ++ } ++ ++ if ((*flags & MF_PAX_MPROTECT) ++ ++#ifdef CONFIG_PAX_MPROTECT ++ && !(*flags & (MF_PAX_PAGEEXEC | MF_PAX_SEGMEXEC)) ++#endif ++ ++ ) ++ { ++ *flags &= ~MF_PAX_MPROTECT; ++ retval = -EINVAL; ++ } ++ ++ if ((*flags & MF_PAX_EMUTRAMP) ++ ++#ifdef CONFIG_PAX_EMUTRAMP ++ && !(*flags & (MF_PAX_PAGEEXEC | MF_PAX_SEGMEXEC)) ++#endif ++ ++ ) ++ { ++ *flags &= ~MF_PAX_EMUTRAMP; ++ retval = -EINVAL; ++ } ++ ++ return retval; ++} ++ ++EXPORT_SYMBOL(pax_check_flags); ++ ++#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) ++void pax_report_fault(struct pt_regs *regs, void *pc, void *sp) ++{ ++ struct task_struct *tsk = current; ++ struct mm_struct *mm = current->mm; ++ char *buffer_exec = (char *)__get_free_page(GFP_KERNEL); ++ char *buffer_fault = (char *)__get_free_page(GFP_KERNEL); ++ char *path_exec = NULL; ++ char *path_fault = NULL; ++ unsigned long start = 0UL, end = 0UL, offset = 0UL; ++ ++ if (buffer_exec && buffer_fault) { ++ struct vm_area_struct *vma, *vma_exec = NULL, *vma_fault = NULL; ++ ++ down_read(&mm->mmap_sem); ++ vma = mm->mmap; ++ while (vma && (!vma_exec || !vma_fault)) { ++ if ((vma->vm_flags & VM_EXECUTABLE) && vma->vm_file) ++ vma_exec = vma; ++ if (vma->vm_start <= (unsigned long)pc && (unsigned long)pc < vma->vm_end) ++ vma_fault = vma; ++ vma = vma->vm_next; ++ } ++ if (vma_exec) { ++ path_exec = d_path(&vma_exec->vm_file->f_path, buffer_exec, PAGE_SIZE); ++ if (IS_ERR(path_exec)) ++ path_exec = "<path too long>"; ++ else { ++ path_exec = mangle_path(buffer_exec, path_exec, "\t\n\\"); ++ if (path_exec) { ++ *path_exec = 0; ++ path_exec = buffer_exec; ++ } else ++ path_exec = "<path too long>"; ++ } ++ } ++ if (vma_fault) { ++ start = vma_fault->vm_start; ++ end = vma_fault->vm_end; ++ offset = vma_fault->vm_pgoff << PAGE_SHIFT; ++ if (vma_fault->vm_file) { ++ path_fault = d_path(&vma_fault->vm_file->f_path, buffer_fault, PAGE_SIZE); ++ if (IS_ERR(path_fault)) ++ path_fault = "<path too long>"; ++ else { ++ path_fault = mangle_path(buffer_fault, path_fault, "\t\n\\"); ++ if (path_fault) { ++ *path_fault = 0; ++ path_fault = buffer_fault; ++ } else ++ path_fault = "<path too long>"; ++ } ++ } else ++ path_fault = "<anonymous mapping>"; ++ } ++ up_read(&mm->mmap_sem); ++ } ++ if (tsk->signal->curr_ip) ++ printk(KERN_ERR "PAX: From %u.%u.%u.%u: execution attempt in: %s, %08lx-%08lx %08lx\n", NIPQUAD(tsk->signal->curr_ip), path_fault, start, end, offset); ++ else ++ printk(KERN_ERR "PAX: execution attempt in: %s, %08lx-%08lx %08lx\n", path_fault, start, end, offset); ++ printk(KERN_ERR "PAX: terminating task: %s(%s):%d, uid/euid: %u/%u, " ++ "PC: %p, SP: %p\n", path_exec, tsk->comm, task_pid_nr(tsk), ++ tsk->uid, tsk->euid, pc, sp); ++ free_page((unsigned long)buffer_exec); ++ free_page((unsigned long)buffer_fault); ++ pax_report_insns(pc, sp); ++ do_coredump(SIGKILL, SIGKILL, regs); ++} ++#endif ++ ++#ifdef CONFIG_PAX_REFCOUNT ++void pax_report_refcount_overflow(struct pt_regs *regs) ++{ ++ if (current->signal->curr_ip) ++ printk(KERN_ERR "PAX: From %u.%u.%u.%u: refcount overflow detected in: %s:%d, uid/euid: %u/%u\n", ++ NIPQUAD(current->signal->curr_ip), current->comm, task_pid_nr(current), current->uid, current->euid); ++ else ++ printk(KERN_ERR "PAX: refcount overflow detected in: %s:%d, uid/euid: %u/%u\n", ++ current->comm, task_pid_nr(current), current->uid, current->euid); ++ print_symbol(KERN_ERR "PAX: refcount overflow occured at: %s\n", instruction_pointer(regs)); ++ show_registers(regs); ++ force_sig_specific(SIGKILL, current); ++} ++#endif ++ + static int zap_process(struct task_struct *start) + { + struct task_struct *t; +@@ -1757,6 +1982,10 @@ int do_coredump(long signr, int exit_cod + */ + clear_thread_flag(TIF_SIGPENDING); + ++ if (signr == SIGKILL || signr == SIGILL) ++ gr_handle_brute_attach(current); ++ gr_learn_resource(current, RLIMIT_CORE, binfmt->min_coredump, 1); ++ + /* + * lock_kernel() because format_corename() is controlled by sysctl, which + * uses lock_kernel() +@@ -1777,6 +2006,8 @@ int do_coredump(long signr, int exit_cod + + if (ispipe) { + helper_argv = argv_split(GFP_KERNEL, corename+1, &helper_argc); ++ if (!helper_argv) ++ goto fail_unlock; + /* Terminate the string before the first option */ + delimit = strchr(corename, ' '); + if (delimit) +diff -urNp a/fs/ext2/balloc.c b/fs/ext2/balloc.c +--- a/fs/ext2/balloc.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/ext2/balloc.c 2009-05-24 18:10:25.203066010 -0700 +@@ -1192,7 +1192,7 @@ static int ext2_has_free_blocks(struct e + + free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); + root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count); +- if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) && ++ if (free_blocks < root_blocks + 1 && !capable_nolog(CAP_SYS_RESOURCE) && + sbi->s_resuid != current->fsuid && + (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) { + return 0; +diff -urNp a/fs/ext3/balloc.c b/fs/ext3/balloc.c +--- a/fs/ext3/balloc.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/ext3/balloc.c 2009-05-24 18:10:25.204053777 -0700 +@@ -1421,7 +1421,7 @@ static int ext3_has_free_blocks(struct e + + free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); + root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count); +- if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) && ++ if (free_blocks < root_blocks + 1 && !capable_nolog(CAP_SYS_RESOURCE) && + sbi->s_resuid != current->fsuid && + (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) { + return 0; +diff -urNp a/fs/ext3/namei.c b/fs/ext3/namei.c +--- a/fs/ext3/namei.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/ext3/namei.c 2009-05-24 18:10:25.205254336 -0700 +@@ -1156,9 +1156,9 @@ static struct ext3_dir_entry_2 *do_split + u32 hash2; + struct dx_map_entry *map; + char *data1 = (*bh)->b_data, *data2; +- unsigned split, move, size, i; ++ unsigned split, move, size; + struct ext3_dir_entry_2 *de = NULL, *de2; +- int err = 0; ++ int i, err = 0; + + bh2 = ext3_append (handle, dir, &newblock, &err); + if (!(bh2)) { +diff -urNp a/fs/ext3/xattr.c b/fs/ext3/xattr.c +--- a/fs/ext3/xattr.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/ext3/xattr.c 2009-05-24 18:10:25.206086856 -0700 +@@ -89,8 +89,8 @@ + printk("\n"); \ + } while (0) + #else +-# define ea_idebug(f...) +-# define ea_bdebug(f...) ++# define ea_idebug(f...) do {} while (0) ++# define ea_bdebug(f...) do {} while (0) + #endif + + static void ext3_xattr_cache_insert(struct buffer_head *); +diff -urNp a/fs/ext4/balloc.c b/fs/ext4/balloc.c +--- a/fs/ext4/balloc.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/ext4/balloc.c 2009-05-24 18:10:25.206998083 -0700 +@@ -576,7 +576,7 @@ int ext4_has_free_blocks(struct ext4_sb_ + /* Hm, nope. Are (enough) root reserved blocks available? */ + if (sbi->s_resuid == current->fsuid || + ((sbi->s_resgid != 0) && in_group_p(sbi->s_resgid)) || +- capable(CAP_SYS_RESOURCE)) { ++ capable_nolog(CAP_SYS_RESOURCE)) { + if (free_blocks >= (nblocks + dirty_blocks)) + return 1; + } +diff -urNp a/fs/ext4/namei.c b/fs/ext4/namei.c +--- a/fs/ext4/namei.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/ext4/namei.c 2009-05-24 18:10:25.207965178 -0700 +@@ -1171,9 +1171,9 @@ static struct ext4_dir_entry_2 *do_split + u32 hash2; + struct dx_map_entry *map; + char *data1 = (*bh)->b_data, *data2; +- unsigned split, move, size, i; ++ unsigned split, move, size; + struct ext4_dir_entry_2 *de = NULL, *de2; +- int err = 0; ++ int i, err = 0; + + bh2 = ext4_append (handle, dir, &newblock, &err); + if (!(bh2)) { +diff -urNp a/fs/fcntl.c b/fs/fcntl.c +--- a/fs/fcntl.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/fcntl.c 2009-05-24 18:10:25.209335719 -0700 +@@ -266,6 +266,7 @@ static long do_fcntl(int fd, unsigned in + switch (cmd) { + case F_DUPFD: + case F_DUPFD_CLOEXEC: ++ gr_learn_resource(current, RLIMIT_NOFILE, arg, 0); + if (arg >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) + break; + err = alloc_fd(arg, cmd == F_DUPFD_CLOEXEC ? O_CLOEXEC : 0); +@@ -411,7 +412,8 @@ static inline int sigio_perm(struct task + return (((fown->euid == 0) || + (fown->euid == p->suid) || (fown->euid == p->uid) || + (fown->uid == p->suid) || (fown->uid == p->uid)) && +- !security_file_send_sigiotask(p, fown, sig)); ++ !security_file_send_sigiotask(p, fown, sig) && ++ !gr_check_protected_task(p) && !gr_pid_is_chrooted(p)); + } + + static void send_sigio_to_task(struct task_struct *p, +diff -urNp a/fs/file.c b/fs/file.c +--- a/fs/file.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/file.c 2009-05-24 18:10:25.210329980 -0700 +@@ -13,6 +13,7 @@ + #include <linux/slab.h> + #include <linux/vmalloc.h> + #include <linux/file.h> ++#include <linux/security.h> + #include <linux/fdtable.h> + #include <linux/bitops.h> + #include <linux/interrupt.h> +@@ -256,6 +257,8 @@ int expand_files(struct files_struct *fi + * N.B. For clone tasks sharing a files structure, this test + * will limit the total number of files that can be opened. + */ ++ ++ gr_learn_resource(current, RLIMIT_NOFILE, nr, 0); + if (nr >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) + return -EMFILE; + +diff -urNp a/fs/fuse/control.c b/fs/fuse/control.c +--- a/fs/fuse/control.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/fuse/control.c 2009-05-24 18:10:25.210984349 -0700 +@@ -159,7 +159,7 @@ void fuse_ctl_remove_conn(struct fuse_co + + static int fuse_ctl_fill_super(struct super_block *sb, void *data, int silent) + { +- struct tree_descr empty_descr = {""}; ++ struct tree_descr empty_descr = {"", NULL, 0}; + struct fuse_conn *fc; + int err; + +diff -urNp a/fs/fuse/dir.c b/fs/fuse/dir.c +--- a/fs/fuse/dir.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/fuse/dir.c 2009-05-24 18:10:25.210984349 -0700 +@@ -1072,7 +1072,7 @@ static char *read_link(struct dentry *de + return link; + } + +-static void free_link(char *link) ++static void free_link(const char *link) + { + if (!IS_ERR(link)) + free_page((unsigned long) link); +diff -urNp a/fs/hfs/inode.c b/fs/hfs/inode.c +--- a/fs/hfs/inode.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/hfs/inode.c 2009-05-24 18:10:25.212337081 -0700 +@@ -419,7 +419,7 @@ int hfs_write_inode(struct inode *inode, + + if (S_ISDIR(main_inode->i_mode)) { + if (fd.entrylength < sizeof(struct hfs_cat_dir)) +- /* panic? */; ++ {/* panic? */} + hfs_bnode_read(fd.bnode, &rec, fd.entryoffset, + sizeof(struct hfs_cat_dir)); + if (rec.type != HFS_CDR_DIR || +@@ -440,7 +440,7 @@ int hfs_write_inode(struct inode *inode, + sizeof(struct hfs_cat_file)); + } else { + if (fd.entrylength < sizeof(struct hfs_cat_file)) +- /* panic? */; ++ {/* panic? */} + hfs_bnode_read(fd.bnode, &rec, fd.entryoffset, + sizeof(struct hfs_cat_file)); + if (rec.type != HFS_CDR_FIL || +diff -urNp a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c +--- a/fs/hfsplus/inode.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/hfsplus/inode.c 2009-05-24 18:10:25.213098578 -0700 +@@ -406,7 +406,7 @@ int hfsplus_cat_read_inode(struct inode + struct hfsplus_cat_folder *folder = &entry.folder; + + if (fd->entrylength < sizeof(struct hfsplus_cat_folder)) +- /* panic? */; ++ {/* panic? */} + hfs_bnode_read(fd->bnode, &entry, fd->entryoffset, + sizeof(struct hfsplus_cat_folder)); + hfsplus_get_perms(inode, &folder->permissions, 1); +@@ -423,7 +423,7 @@ int hfsplus_cat_read_inode(struct inode + struct hfsplus_cat_file *file = &entry.file; + + if (fd->entrylength < sizeof(struct hfsplus_cat_file)) +- /* panic? */; ++ {/* panic? */} + hfs_bnode_read(fd->bnode, &entry, fd->entryoffset, + sizeof(struct hfsplus_cat_file)); + +@@ -479,7 +479,7 @@ int hfsplus_cat_write_inode(struct inode + struct hfsplus_cat_folder *folder = &entry.folder; + + if (fd.entrylength < sizeof(struct hfsplus_cat_folder)) +- /* panic? */; ++ {/* panic? */} + hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, + sizeof(struct hfsplus_cat_folder)); + /* simple node checks? */ +@@ -501,7 +501,7 @@ int hfsplus_cat_write_inode(struct inode + struct hfsplus_cat_file *file = &entry.file; + + if (fd.entrylength < sizeof(struct hfsplus_cat_file)) +- /* panic? */; ++ {/* panic? */} + hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, + sizeof(struct hfsplus_cat_file)); + hfsplus_inode_write_fork(inode, &file->data_fork); +diff -urNp a/fs/jffs2/debug.h b/fs/jffs2/debug.h +--- a/fs/jffs2/debug.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/jffs2/debug.h 2009-05-24 18:10:25.213098578 -0700 +@@ -52,13 +52,13 @@ + #if CONFIG_JFFS2_FS_DEBUG > 0 + #define D1(x) x + #else +-#define D1(x) ++#define D1(x) do {} while (0); + #endif + + #if CONFIG_JFFS2_FS_DEBUG > 1 + #define D2(x) x + #else +-#define D2(x) ++#define D2(x) do {} while (0); + #endif + + /* The prefixes of JFFS2 messages */ +@@ -114,73 +114,73 @@ + #ifdef JFFS2_DBG_READINODE_MESSAGES + #define dbg_readinode(fmt, ...) JFFS2_DEBUG(fmt, ##__VA_ARGS__) + #else +-#define dbg_readinode(fmt, ...) ++#define dbg_readinode(fmt, ...) do {} while (0) + #endif + #ifdef JFFS2_DBG_READINODE2_MESSAGES + #define dbg_readinode2(fmt, ...) JFFS2_DEBUG(fmt, ##__VA_ARGS__) + #else +-#define dbg_readinode2(fmt, ...) ++#define dbg_readinode2(fmt, ...) do {} while (0) + #endif + + /* Fragtree build debugging messages */ + #ifdef JFFS2_DBG_FRAGTREE_MESSAGES + #define dbg_fragtree(fmt, ...) JFFS2_DEBUG(fmt, ##__VA_ARGS__) + #else +-#define dbg_fragtree(fmt, ...) ++#define dbg_fragtree(fmt, ...) do {} while (0) + #endif + #ifdef JFFS2_DBG_FRAGTREE2_MESSAGES + #define dbg_fragtree2(fmt, ...) JFFS2_DEBUG(fmt, ##__VA_ARGS__) + #else +-#define dbg_fragtree2(fmt, ...) ++#define dbg_fragtree2(fmt, ...) do {} while (0) + #endif + + /* Directory entry list manilulation debugging messages */ + #ifdef JFFS2_DBG_DENTLIST_MESSAGES + #define dbg_dentlist(fmt, ...) JFFS2_DEBUG(fmt, ##__VA_ARGS__) + #else +-#define dbg_dentlist(fmt, ...) ++#define dbg_dentlist(fmt, ...) do {} while (0) + #endif + + /* Print the messages about manipulating node_refs */ + #ifdef JFFS2_DBG_NODEREF_MESSAGES + #define dbg_noderef(fmt, ...) JFFS2_DEBUG(fmt, ##__VA_ARGS__) + #else +-#define dbg_noderef(fmt, ...) ++#define dbg_noderef(fmt, ...) do {} while (0) + #endif + + /* Manipulations with the list of inodes (JFFS2 inocache) */ + #ifdef JFFS2_DBG_INOCACHE_MESSAGES + #define dbg_inocache(fmt, ...) JFFS2_DEBUG(fmt, ##__VA_ARGS__) + #else +-#define dbg_inocache(fmt, ...) ++#define dbg_inocache(fmt, ...) do {} while (0) + #endif + + /* Summary debugging messages */ + #ifdef JFFS2_DBG_SUMMARY_MESSAGES + #define dbg_summary(fmt, ...) JFFS2_DEBUG(fmt, ##__VA_ARGS__) + #else +-#define dbg_summary(fmt, ...) ++#define dbg_summary(fmt, ...) do {} while (0) + #endif + + /* File system build messages */ + #ifdef JFFS2_DBG_FSBUILD_MESSAGES + #define dbg_fsbuild(fmt, ...) JFFS2_DEBUG(fmt, ##__VA_ARGS__) + #else +-#define dbg_fsbuild(fmt, ...) ++#define dbg_fsbuild(fmt, ...) do {} while (0) + #endif + + /* Watch the object allocations */ + #ifdef JFFS2_DBG_MEMALLOC_MESSAGES + #define dbg_memalloc(fmt, ...) JFFS2_DEBUG(fmt, ##__VA_ARGS__) + #else +-#define dbg_memalloc(fmt, ...) ++#define dbg_memalloc(fmt, ...) do {} while (0) + #endif + + /* Watch the XATTR subsystem */ + #ifdef JFFS2_DBG_XATTR_MESSAGES + #define dbg_xattr(fmt, ...) JFFS2_DEBUG(fmt, ##__VA_ARGS__) + #else +-#define dbg_xattr(fmt, ...) ++#define dbg_xattr(fmt, ...) do {} while (0) + #endif + + /* "Sanity" checks */ +diff -urNp a/fs/jffs2/erase.c b/fs/jffs2/erase.c +--- a/fs/jffs2/erase.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/jffs2/erase.c 2009-05-24 18:10:25.214336010 -0700 +@@ -431,7 +431,8 @@ static void jffs2_mark_erased_block(stru + struct jffs2_unknown_node marker = { + .magic = cpu_to_je16(JFFS2_MAGIC_BITMASK), + .nodetype = cpu_to_je16(JFFS2_NODETYPE_CLEANMARKER), +- .totlen = cpu_to_je32(c->cleanmarker_size) ++ .totlen = cpu_to_je32(c->cleanmarker_size), ++ .hdr_crc = cpu_to_je32(0) + }; + + jffs2_prealloc_raw_node_refs(c, jeb, 1); +diff -urNp a/fs/jffs2/summary.h b/fs/jffs2/summary.h +--- a/fs/jffs2/summary.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/jffs2/summary.h 2009-05-24 18:10:25.215130400 -0700 +@@ -194,18 +194,18 @@ int jffs2_sum_scan_sumnode(struct jffs2_ + + #define jffs2_sum_active() (0) + #define jffs2_sum_init(a) (0) +-#define jffs2_sum_exit(a) +-#define jffs2_sum_disable_collecting(a) ++#define jffs2_sum_exit(a) do {} while (0) ++#define jffs2_sum_disable_collecting(a) do {} while (0) + #define jffs2_sum_is_disabled(a) (0) +-#define jffs2_sum_reset_collected(a) ++#define jffs2_sum_reset_collected(a) do {} while (0) + #define jffs2_sum_add_kvec(a,b,c,d) (0) +-#define jffs2_sum_move_collected(a,b) ++#define jffs2_sum_move_collected(a,b) do {} while (0) + #define jffs2_sum_write_sumnode(a) (0) +-#define jffs2_sum_add_padding_mem(a,b) +-#define jffs2_sum_add_inode_mem(a,b,c) +-#define jffs2_sum_add_dirent_mem(a,b,c) +-#define jffs2_sum_add_xattr_mem(a,b,c) +-#define jffs2_sum_add_xref_mem(a,b,c) ++#define jffs2_sum_add_padding_mem(a,b) do {} while (0) ++#define jffs2_sum_add_inode_mem(a,b,c) do {} while (0) ++#define jffs2_sum_add_dirent_mem(a,b,c) do {} while (0) ++#define jffs2_sum_add_xattr_mem(a,b,c) do {} while (0) ++#define jffs2_sum_add_xref_mem(a,b,c) do {} while (0) + #define jffs2_sum_scan_sumnode(a,b,c,d,e) (0) + + #endif /* CONFIG_JFFS2_SUMMARY */ +diff -urNp a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c +--- a/fs/jffs2/wbuf.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/jffs2/wbuf.c 2009-05-24 18:10:25.215130400 -0700 +@@ -1012,7 +1012,8 @@ static const struct jffs2_unknown_node o + { + .magic = constant_cpu_to_je16(JFFS2_MAGIC_BITMASK), + .nodetype = constant_cpu_to_je16(JFFS2_NODETYPE_CLEANMARKER), +- .totlen = constant_cpu_to_je32(8) ++ .totlen = constant_cpu_to_je32(8), ++ .hdr_crc = constant_cpu_to_je32(0) + }; + + /* +diff -urNp a/fs/locks.c b/fs/locks.c +--- a/fs/locks.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/locks.c 2009-05-24 18:10:25.216336336 -0700 +@@ -2006,16 +2006,16 @@ void locks_remove_flock(struct file *fil + return; + + if (filp->f_op && filp->f_op->flock) { +- struct file_lock fl = { ++ struct file_lock flock = { + .fl_pid = current->tgid, + .fl_file = filp, + .fl_flags = FL_FLOCK, + .fl_type = F_UNLCK, + .fl_end = OFFSET_MAX, + }; +- filp->f_op->flock(filp, F_SETLKW, &fl); +- if (fl.fl_ops && fl.fl_ops->fl_release_private) +- fl.fl_ops->fl_release_private(&fl); ++ filp->f_op->flock(filp, F_SETLKW, &flock); ++ if (flock.fl_ops && flock.fl_ops->fl_release_private) ++ flock.fl_ops->fl_release_private(&flock); + } + + lock_kernel(); +diff -urNp a/fs/namei.c b/fs/namei.c +--- a/fs/namei.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/namei.c 2009-05-24 18:10:25.219088313 -0700 +@@ -633,7 +633,7 @@ static __always_inline int __do_follow_l + cookie = dentry->d_inode->i_op->follow_link(dentry, nd); + error = PTR_ERR(cookie); + if (!IS_ERR(cookie)) { +- char *s = nd_get_link(nd); ++ const char *s = nd_get_link(nd); + error = 0; + if (s) + error = __vfs_follow_link(nd, s); +@@ -664,6 +664,13 @@ static inline int do_follow_link(struct + err = security_inode_follow_link(path->dentry, nd); + if (err) + goto loop; ++ ++ if (gr_handle_follow_link(path->dentry->d_parent->d_inode, ++ path->dentry->d_inode, path->dentry, nd->path.mnt)) { ++ err = -EACCES; ++ goto loop; ++ } ++ + current->link_count++; + current->total_link_count++; + nd->depth++; +@@ -1012,11 +1019,18 @@ return_reval: + break; + } + return_base: ++ if (!gr_acl_handle_hidden_file(nd->path.dentry, nd->path.mnt)) { ++ path_put(&nd->path); ++ return -ENOENT; ++ } + return 0; + out_dput: + path_put_conditional(&next, nd); + break; + } ++ if (!gr_acl_handle_hidden_file(nd->path.dentry, nd->path.mnt)) ++ err = -ENOENT; ++ + path_put(&nd->path); + return_err: + return err; +@@ -1582,9 +1596,17 @@ static int __open_namei_create(struct na + int error; + struct dentry *dir = nd->path.dentry; + ++ if (!gr_acl_handle_creat(path->dentry, nd->path.dentry, nd->path.mnt, flag, mode)) { ++ error = -EACCES; ++ goto out_unlock_dput; ++ } ++ + if (!IS_POSIXACL(dir->d_inode)) + mode &= ~current->fs->umask; + error = vfs_create(dir->d_inode, path->dentry, mode, nd); ++ if (!error) ++ gr_handle_create(path->dentry, nd->path.mnt); ++out_unlock_dput: + mutex_unlock(&dir->d_inode->i_mutex); + dput(nd->path.dentry); + nd->path.dentry = path->dentry; +@@ -1665,6 +1687,17 @@ struct file *do_filp_open(int dfd, const + &nd, flag); + if (error) + return ERR_PTR(error); ++ ++ if (gr_handle_rawio(nd.path.dentry->d_inode)) { ++ error = -EPERM; ++ goto exit; ++ } ++ ++ if (!gr_acl_handle_open(nd.path.dentry, nd.path.mnt, flag)) { ++ error = -EACCES; ++ goto exit; ++ } ++ + goto ok; + } + +@@ -1737,6 +1770,20 @@ do_last: + /* + * It already exists. + */ ++ ++ if (gr_handle_rawio(path.dentry->d_inode)) { ++ error = -EPERM; ++ goto exit_mutex_unlock; ++ } ++ if (!gr_acl_handle_open(path.dentry, nd.path.mnt, flag)) { ++ error = -EACCES; ++ goto exit_mutex_unlock; ++ } ++ if (gr_handle_fifo(path.dentry, nd.path.mnt, dir, flag, acc_mode)) { ++ error = -EACCES; ++ goto exit_mutex_unlock; ++ } ++ + mutex_unlock(&dir->d_inode->i_mutex); + audit_inode(pathname, path.dentry); + +@@ -1822,6 +1869,13 @@ do_link: + error = security_inode_follow_link(path.dentry, &nd); + if (error) + goto exit_dput; ++ ++ if (gr_handle_follow_link(path.dentry->d_parent->d_inode, path.dentry->d_inode, ++ path.dentry, nd.path.mnt)) { ++ error = -EACCES; ++ goto exit_dput; ++ } ++ + error = __do_follow_link(&path, &nd); + if (error) { + /* Does someone understand code flow here? Or it is only +@@ -1994,6 +2048,17 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const + error = may_mknod(mode); + if (error) + goto out_dput; ++ ++ if (gr_handle_chroot_mknod(dentry, nd.path.mnt, mode)) { ++ error = -EPERM; ++ goto out_dput; ++ } ++ ++ if (!gr_acl_handle_mknod(dentry, nd.path.dentry, nd.path.mnt, mode)) { ++ error = -EACCES; ++ goto out_dput; ++ } ++ + error = mnt_want_write(nd.path.mnt); + if (error) + goto out_dput; +@@ -2010,6 +2075,9 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const + break; + } + mnt_drop_write(nd.path.mnt); ++ ++ if (!error) ++ gr_handle_create(dentry, nd.path.mnt); + out_dput: + dput(dentry); + out_unlock: +@@ -2063,6 +2131,11 @@ SYSCALL_DEFINE3(mkdirat, int, dfd, const + if (IS_ERR(dentry)) + goto out_unlock; + ++ if (!gr_acl_handle_mkdir(dentry, nd.path.dentry, nd.path.mnt)) { ++ error = -EACCES; ++ goto out_dput; ++ } ++ + if (!IS_POSIXACL(nd.path.dentry->d_inode)) + mode &= ~current->fs->umask; + error = mnt_want_write(nd.path.mnt); +@@ -2070,6 +2143,10 @@ SYSCALL_DEFINE3(mkdirat, int, dfd, const + goto out_dput; + error = vfs_mkdir(nd.path.dentry->d_inode, dentry, mode); + mnt_drop_write(nd.path.mnt); ++ ++ if (!error) ++ gr_handle_create(dentry, nd.path.mnt); ++ + out_dput: + dput(dentry); + out_unlock: +@@ -2151,6 +2228,8 @@ static long do_rmdir(int dfd, const char + char * name; + struct dentry *dentry; + struct nameidata nd; ++ ino_t saved_ino = 0; ++ dev_t saved_dev = 0; + + error = user_path_parent(dfd, pathname, &nd, &name); + if (error) +@@ -2175,11 +2254,26 @@ static long do_rmdir(int dfd, const char + error = PTR_ERR(dentry); + if (IS_ERR(dentry)) + goto exit2; ++ ++ if (dentry->d_inode != NULL) { ++ if (dentry->d_inode->i_nlink <= 1) { ++ saved_ino = dentry->d_inode->i_ino; ++ saved_dev = dentry->d_inode->i_sb->s_dev; ++ } ++ ++ if (!gr_acl_handle_rmdir(dentry, nd.path.mnt)) { ++ error = -EACCES; ++ goto exit3; ++ } ++ } ++ + error = mnt_want_write(nd.path.mnt); + if (error) + goto exit3; + error = vfs_rmdir(nd.path.dentry->d_inode, dentry); + mnt_drop_write(nd.path.mnt); ++ if (!error && (saved_dev || saved_ino)) ++ gr_handle_delete(saved_ino, saved_dev); + exit3: + dput(dentry); + exit2: +@@ -2239,6 +2333,8 @@ static long do_unlinkat(int dfd, const c + struct dentry *dentry; + struct nameidata nd; + struct inode *inode = NULL; ++ ino_t saved_ino = 0; ++ dev_t saved_dev = 0; + + error = user_path_parent(dfd, pathname, &nd, &name); + if (error) +@@ -2258,12 +2354,25 @@ static long do_unlinkat(int dfd, const c + if (nd.last.name[nd.last.len]) + goto slashes; + inode = dentry->d_inode; +- if (inode) ++ if (inode) { ++ if (inode->i_nlink <= 1) { ++ saved_ino = inode->i_ino; ++ saved_dev = inode->i_sb->s_dev; ++ } ++ + atomic_inc(&inode->i_count); ++ ++ if (!gr_acl_handle_unlink(dentry, nd.path.mnt)) { ++ error = -EACCES; ++ goto exit2; ++ } ++ } + error = mnt_want_write(nd.path.mnt); + if (error) + goto exit2; + error = vfs_unlink(nd.path.dentry->d_inode, dentry); ++ if (!error && (saved_ino || saved_dev)) ++ gr_handle_delete(saved_ino, saved_dev); + mnt_drop_write(nd.path.mnt); + exit2: + dput(dentry); +@@ -2341,10 +2450,17 @@ SYSCALL_DEFINE3(symlinkat, const char __ + if (IS_ERR(dentry)) + goto out_unlock; + ++ if (!gr_acl_handle_symlink(dentry, nd.path.dentry, nd.path.mnt, from)) { ++ error = -EACCES; ++ goto out_dput; ++ } ++ + error = mnt_want_write(nd.path.mnt); + if (error) + goto out_dput; + error = vfs_symlink(nd.path.dentry->d_inode, dentry, from); ++ if (!error) ++ gr_handle_create(dentry, nd.path.mnt); + mnt_drop_write(nd.path.mnt); + out_dput: + dput(dentry); +@@ -2437,10 +2553,26 @@ SYSCALL_DEFINE5(linkat, int, olddfd, con + error = PTR_ERR(new_dentry); + if (IS_ERR(new_dentry)) + goto out_unlock; ++ ++ if (gr_handle_hardlink(old_path.dentry, old_path.mnt, ++ old_path.dentry->d_inode, ++ old_path.dentry->d_inode->i_mode, to)) { ++ error = -EACCES; ++ goto out_dput; ++ } ++ ++ if (!gr_acl_handle_link(new_dentry, nd.path.dentry, nd.path.mnt, ++ old_path.dentry, old_path.mnt, to)) { ++ error = -EACCES; ++ goto out_dput; ++ } ++ + error = mnt_want_write(nd.path.mnt); + if (error) + goto out_dput; + error = vfs_link(old_path.dentry, nd.path.dentry->d_inode, new_dentry); ++ if (!error) ++ gr_handle_create(new_dentry, nd.path.mnt); + mnt_drop_write(nd.path.mnt); + out_dput: + dput(new_dentry); +@@ -2673,11 +2805,21 @@ SYSCALL_DEFINE4(renameat, int, olddfd, c + if (new_dentry == trap) + goto exit5; + ++ error = gr_acl_handle_rename(new_dentry, new_dir, newnd.path.mnt, ++ old_dentry, old_dir->d_inode, oldnd.path.mnt, ++ to); ++ if (error) ++ goto exit5; ++ + error = mnt_want_write(oldnd.path.mnt); + if (error) + goto exit5; + error = vfs_rename(old_dir->d_inode, old_dentry, + new_dir->d_inode, new_dentry); ++ if (!error) ++ gr_handle_rename(old_dir->d_inode, new_dir->d_inode, old_dentry, ++ new_dentry, oldnd.path.mnt, new_dentry->d_inode ? 1 : 0); ++ + mnt_drop_write(oldnd.path.mnt); + exit5: + dput(new_dentry); +diff -urNp a/fs/namespace.c b/fs/namespace.c +--- a/fs/namespace.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/namespace.c 2009-05-24 18:10:25.220336989 -0700 +@@ -1094,6 +1094,8 @@ static int do_umount(struct vfsmount *mn + lock_kernel(); + retval = do_remount_sb(sb, MS_RDONLY, NULL, 0); + unlock_kernel(); ++ ++ gr_log_remount(mnt->mnt_devname, retval); + } + up_write(&sb->s_umount); + return retval; +@@ -1117,6 +1119,9 @@ static int do_umount(struct vfsmount *mn + security_sb_umount_busy(mnt); + up_write(&namespace_sem); + release_mounts(&umount_list); ++ ++ gr_log_unmount(mnt->mnt_devname, retval); ++ + return retval; + } + +@@ -1946,6 +1951,11 @@ long do_mount(char *dev_name, char *dir_ + if (retval) + goto dput_out; + ++ if (gr_handle_chroot_mount(path.dentry, path.mnt, dev_name)) { ++ retval = -EPERM; ++ goto dput_out; ++ } ++ + if (flags & MS_REMOUNT) + retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags, + data_page); +@@ -1960,6 +1970,9 @@ long do_mount(char *dev_name, char *dir_ + dev_name, data_page); + dput_out: + path_put(&path); ++ ++ gr_log_mount(dev_name, dir_name, retval); ++ + return retval; + } + +@@ -2071,6 +2084,9 @@ SYSCALL_DEFINE5(mount, char __user *, de + if (retval < 0) + goto out3; + ++ if (gr_handle_chroot_pivot()) ++ return -EPERM; ++ + lock_kernel(); + retval = do_mount((char *)dev_page, dir_page, (char *)type_page, + flags, (void *)data_page); +diff -urNp a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c +--- a/fs/nfs/nfs4proc.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/nfs/nfs4proc.c 2009-05-24 18:10:25.222087580 -0700 +@@ -653,7 +653,7 @@ static int _nfs4_do_open_reclaim(struct + static int nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state *state) + { + struct nfs_server *server = NFS_SERVER(state->inode); +- struct nfs4_exception exception = { }; ++ struct nfs4_exception exception = {0, 0}; + int err; + do { + err = _nfs4_do_open_reclaim(ctx, state); +@@ -695,7 +695,7 @@ static int _nfs4_open_delegation_recall( + + int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid) + { +- struct nfs4_exception exception = { }; ++ struct nfs4_exception exception = {0, 0}; + struct nfs_server *server = NFS_SERVER(state->inode); + int err; + do { +@@ -988,7 +988,7 @@ static int _nfs4_open_expired(struct nfs + static inline int nfs4_do_open_expired(struct nfs_open_context *ctx, struct nfs4_state *state) + { + struct nfs_server *server = NFS_SERVER(state->inode); +- struct nfs4_exception exception = { }; ++ struct nfs4_exception exception = {0, 0}; + int err; + + do { +@@ -1090,7 +1090,7 @@ out_err: + + static struct nfs4_state *nfs4_do_open(struct inode *dir, struct path *path, int flags, struct iattr *sattr, struct rpc_cred *cred) + { +- struct nfs4_exception exception = { }; ++ struct nfs4_exception exception = {0, 0}; + struct nfs4_state *res; + int status; + +@@ -1181,7 +1181,7 @@ static int nfs4_do_setattr(struct inode + struct nfs4_state *state) + { + struct nfs_server *server = NFS_SERVER(inode); +- struct nfs4_exception exception = { }; ++ struct nfs4_exception exception = {0, 0}; + int err; + do { + err = nfs4_handle_exception(server, +@@ -1494,7 +1494,7 @@ static int _nfs4_server_capabilities(str + + int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) + { +- struct nfs4_exception exception = { }; ++ struct nfs4_exception exception = {0, 0}; + int err; + do { + err = nfs4_handle_exception(server, +@@ -1527,7 +1527,7 @@ static int _nfs4_lookup_root(struct nfs_ + static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsinfo *info) + { +- struct nfs4_exception exception = { }; ++ struct nfs4_exception exception = {0, 0}; + int err; + do { + err = nfs4_handle_exception(server, +@@ -1616,7 +1616,7 @@ static int _nfs4_proc_getattr(struct nfs + + static int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr) + { +- struct nfs4_exception exception = { }; ++ struct nfs4_exception exception = {0, 0}; + int err; + do { + err = nfs4_handle_exception(server, +@@ -1704,7 +1704,7 @@ static int nfs4_proc_lookupfh(struct nfs + struct qstr *name, struct nfs_fh *fhandle, + struct nfs_fattr *fattr) + { +- struct nfs4_exception exception = { }; ++ struct nfs4_exception exception = {0, 0}; + int err; + do { + err = _nfs4_proc_lookupfh(server, dirfh, name, fhandle, fattr); +@@ -1733,7 +1733,7 @@ static int _nfs4_proc_lookup(struct inod + + static int nfs4_proc_lookup(struct inode *dir, struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr) + { +- struct nfs4_exception exception = { }; ++ struct nfs4_exception exception = {0, 0}; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(dir), +@@ -1797,7 +1797,7 @@ static int _nfs4_proc_access(struct inod + + static int nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry) + { +- struct nfs4_exception exception = { }; ++ struct nfs4_exception exception = {0, 0}; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(inode), +@@ -1852,7 +1852,7 @@ static int _nfs4_proc_readlink(struct in + static int nfs4_proc_readlink(struct inode *inode, struct page *page, + unsigned int pgbase, unsigned int pglen) + { +- struct nfs4_exception exception = { }; ++ struct nfs4_exception exception = {0, 0}; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(inode), +@@ -1949,7 +1949,7 @@ static int _nfs4_proc_remove(struct inod + + static int nfs4_proc_remove(struct inode *dir, struct qstr *name) + { +- struct nfs4_exception exception = { }; ++ struct nfs4_exception exception = {0, 0}; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(dir), +@@ -2021,7 +2021,7 @@ static int _nfs4_proc_rename(struct inod + static int nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name, + struct inode *new_dir, struct qstr *new_name) + { +- struct nfs4_exception exception = { }; ++ struct nfs4_exception exception = {0, 0}; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(old_dir), +@@ -2068,7 +2068,7 @@ static int _nfs4_proc_link(struct inode + + static int nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) + { +- struct nfs4_exception exception = { }; ++ struct nfs4_exception exception = {0, 0}; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(inode), +@@ -2159,7 +2159,7 @@ out: + static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry, + struct page *page, unsigned int len, struct iattr *sattr) + { +- struct nfs4_exception exception = { }; ++ struct nfs4_exception exception = {0, 0}; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(dir), +@@ -2190,7 +2190,7 @@ out: + static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, + struct iattr *sattr) + { +- struct nfs4_exception exception = { }; ++ struct nfs4_exception exception = {0, 0}; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(dir), +@@ -2239,7 +2239,7 @@ static int _nfs4_proc_readdir(struct den + static int nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, + u64 cookie, struct page *page, unsigned int count, int plus) + { +- struct nfs4_exception exception = { }; ++ struct nfs4_exception exception = {0, 0}; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(dentry->d_inode), +@@ -2287,7 +2287,7 @@ out: + static int nfs4_proc_mknod(struct inode *dir, struct dentry *dentry, + struct iattr *sattr, dev_t rdev) + { +- struct nfs4_exception exception = { }; ++ struct nfs4_exception exception = {0, 0}; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(dir), +@@ -2316,7 +2316,7 @@ static int _nfs4_proc_statfs(struct nfs_ + + static int nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsstat *fsstat) + { +- struct nfs4_exception exception = { }; ++ struct nfs4_exception exception = {0, 0}; + int err; + do { + err = nfs4_handle_exception(server, +@@ -2344,7 +2344,7 @@ static int _nfs4_do_fsinfo(struct nfs_se + + static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo) + { +- struct nfs4_exception exception = { }; ++ struct nfs4_exception exception = {0, 0}; + int err; + + do { +@@ -2387,7 +2387,7 @@ static int _nfs4_proc_pathconf(struct nf + static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_pathconf *pathconf) + { +- struct nfs4_exception exception = { }; ++ struct nfs4_exception exception = {0, 0}; + int err; + + do { +@@ -2674,7 +2674,7 @@ out_free: + + static ssize_t nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen) + { +- struct nfs4_exception exception = { }; ++ struct nfs4_exception exception = {0, 0}; + ssize_t ret; + do { + ret = __nfs4_get_acl_uncached(inode, buf, buflen); +@@ -2731,7 +2731,7 @@ static int __nfs4_proc_set_acl(struct in + + static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen) + { +- struct nfs4_exception exception = { }; ++ struct nfs4_exception exception = {0, 0}; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(inode), +@@ -3022,7 +3022,7 @@ out: + int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync) + { + struct nfs_server *server = NFS_SERVER(inode); +- struct nfs4_exception exception = { }; ++ struct nfs4_exception exception = {0, 0}; + int err; + do { + err = _nfs4_proc_delegreturn(inode, cred, stateid, issync); +@@ -3097,7 +3097,7 @@ out: + + static int nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock *request) + { +- struct nfs4_exception exception = { }; ++ struct nfs4_exception exception = {0, 0}; + int err; + + do { +@@ -3447,7 +3447,7 @@ static int _nfs4_do_setlk(struct nfs4_st + static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request) + { + struct nfs_server *server = NFS_SERVER(state->inode); +- struct nfs4_exception exception = { }; ++ struct nfs4_exception exception = {0, 0}; + int err; + + do { +@@ -3465,7 +3465,7 @@ static int nfs4_lock_reclaim(struct nfs4 + static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request) + { + struct nfs_server *server = NFS_SERVER(state->inode); +- struct nfs4_exception exception = { }; ++ struct nfs4_exception exception = {0, 0}; + int err; + + err = nfs4_set_lock_state(state, request); +@@ -3526,7 +3526,7 @@ out: + + static int nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) + { +- struct nfs4_exception exception = { }; ++ struct nfs4_exception exception = {0, 0}; + int err; + + do { +@@ -3576,7 +3576,7 @@ nfs4_proc_lock(struct file *filp, int cm + int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl) + { + struct nfs_server *server = NFS_SERVER(state->inode); +- struct nfs4_exception exception = { }; ++ struct nfs4_exception exception = {0, 0}; + int err; + + err = nfs4_set_lock_state(state, fl); +diff -urNp a/fs/nfsd/export.c b/fs/nfsd/export.c +--- a/fs/nfsd/export.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/nfsd/export.c 2009-05-24 18:10:25.223147418 -0700 +@@ -472,7 +472,7 @@ static int secinfo_parse(char **mesg, ch + * probably discover the problem when someone fails to + * authenticate. + */ +- if (f->pseudoflavor < 0) ++ if ((s32)f->pseudoflavor < 0) + return -EINVAL; + err = get_int(mesg, &f->flags); + if (err) +diff -urNp a/fs/nls/nls_base.c b/fs/nls/nls_base.c +--- a/fs/nls/nls_base.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/nls/nls_base.c 2009-05-24 18:10:25.224087836 -0700 +@@ -40,7 +40,7 @@ static const struct utf8_table utf8_tabl + {0xF8, 0xF0, 3*6, 0x1FFFFF, 0x10000, /* 4 byte sequence */}, + {0xFC, 0xF8, 4*6, 0x3FFFFFF, 0x200000, /* 5 byte sequence */}, + {0xFE, 0xFC, 5*6, 0x7FFFFFFF, 0x4000000, /* 6 byte sequence */}, +- {0, /* end of table */} ++ {0, 0, 0, 0, 0, /* end of table */} + }; + + int +diff -urNp a/fs/ntfs/file.c b/fs/ntfs/file.c +--- a/fs/ntfs/file.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/ntfs/file.c 2009-05-24 18:10:25.226337478 -0700 +@@ -2291,6 +2291,6 @@ const struct inode_operations ntfs_file_ + #endif /* NTFS_RW */ + }; + +-const struct file_operations ntfs_empty_file_ops = {}; ++const struct file_operations ntfs_empty_file_ops; + +-const struct inode_operations ntfs_empty_inode_ops = {}; ++const struct inode_operations ntfs_empty_inode_ops; +diff -urNp a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c +--- a/fs/ocfs2/localalloc.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/ocfs2/localalloc.c 2009-05-24 18:10:25.227335651 -0700 +@@ -1270,7 +1270,7 @@ static int ocfs2_local_alloc_slide_windo + goto bail; + } + +- atomic_inc(&osb->alloc_stats.moves); ++ atomic_inc_unchecked(&osb->alloc_stats.moves); + + status = 0; + bail: +diff -urNp a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c +--- a/fs/ocfs2/suballoc.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/ocfs2/suballoc.c 2009-05-24 18:10:25.228335709 -0700 +@@ -480,7 +480,7 @@ static int ocfs2_reserve_suballoc_bits(s + mlog_errno(status); + goto bail; + } +- atomic_inc(&osb->alloc_stats.bg_extends); ++ atomic_inc_unchecked(&osb->alloc_stats.bg_extends); + + /* You should never ask for this much metadata */ + BUG_ON(bits_wanted > +@@ -638,7 +638,7 @@ int ocfs2_reserve_new_inode(struct ocfs2 + + inode_steal: + status = ocfs2_steal_inode_from_other_nodes(osb, *ac); +- atomic_inc(&osb->s_num_inodes_stolen); ++ atomic_inc_unchecked(&osb->s_num_inodes_stolen); + if (status < 0) { + if (status != -ENOSPC) + mlog_errno(status); +@@ -1514,7 +1514,7 @@ int ocfs2_claim_metadata(struct ocfs2_su + mlog_errno(status); + goto bail; + } +- atomic_inc(&osb->alloc_stats.bg_allocs); ++ atomic_inc_unchecked(&osb->alloc_stats.bg_allocs); + + *blkno_start = bg_blkno + (u64) *suballoc_bit_start; + ac->ac_bits_given += (*num_bits); +@@ -1553,7 +1553,7 @@ int ocfs2_claim_new_inode(struct ocfs2_s + mlog_errno(status); + goto bail; + } +- atomic_inc(&osb->alloc_stats.bg_allocs); ++ atomic_inc_unchecked(&osb->alloc_stats.bg_allocs); + + BUG_ON(num_bits != 1); + +@@ -1654,7 +1654,7 @@ int __ocfs2_claim_clusters(struct ocfs2_ + cluster_start, + num_clusters); + if (!status) +- atomic_inc(&osb->alloc_stats.local_data); ++ atomic_inc_unchecked(&osb->alloc_stats.local_data); + } else { + if (min_clusters > (osb->bitmap_cpg - 1)) { + /* The only paths asking for contiguousness +@@ -1682,7 +1682,7 @@ int __ocfs2_claim_clusters(struct ocfs2_ + ocfs2_desc_bitmap_to_cluster_off(ac->ac_inode, + bg_blkno, + bg_bit_off); +- atomic_inc(&osb->alloc_stats.bitmap_data); ++ atomic_inc_unchecked(&osb->alloc_stats.bitmap_data); + } + } + if (status < 0) { +diff -urNp a/fs/open.c b/fs/open.c +--- a/fs/open.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/open.c 2009-05-24 18:10:25.229003905 -0700 +@@ -205,6 +205,9 @@ int do_truncate(struct dentry *dentry, l + if (length < 0) + return -EINVAL; + ++ if (filp && !gr_acl_handle_truncate(dentry, filp->f_path.mnt)) ++ return -EACCES; ++ + newattrs.ia_size = length; + newattrs.ia_valid = ATTR_SIZE | time_attrs; + if (filp) { +@@ -510,6 +513,9 @@ SYSCALL_DEFINE3(faccessat, int, dfd, con + if (__mnt_is_readonly(path.mnt)) + res = -EROFS; + ++ if (!res && !gr_acl_handle_access(path.dentry, path.mnt, mode)) ++ res = -EACCES; ++ + out_path_release: + path_put(&path); + out: +@@ -540,6 +546,8 @@ SYSCALL_DEFINE1(chdir, const char __user + if (error) + goto dput_and_out; + ++ gr_log_chdir(path.dentry, path.mnt); ++ + set_fs_pwd(current->fs, &path); + + dput_and_out: +@@ -566,6 +574,13 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd + goto out_putf; + + error = inode_permission(inode, MAY_EXEC | MAY_ACCESS); ++ ++ if (!error && !gr_chroot_fchdir(file->f_path.dentry, file->f_path.mnt)) ++ error = -EPERM; ++ ++ if (!error) ++ gr_log_chdir(file->f_path.dentry, file->f_path.mnt); ++ + if (!error) + set_fs_pwd(current->fs, &file->f_path); + out_putf: +@@ -591,7 +606,14 @@ SYSCALL_DEFINE1(chroot, const char __use + if (!capable(CAP_SYS_CHROOT)) + goto dput_and_out; + ++ if (gr_handle_chroot_chroot(path.dentry, path.mnt)) ++ goto dput_and_out; ++ + set_fs_root(current->fs, &path); ++ ++ gr_handle_chroot_caps(current); ++ gr_handle_chroot_chdir(&path); ++ + error = 0; + dput_and_out: + path_put(&path); +@@ -619,13 +641,28 @@ SYSCALL_DEFINE2(fchmod, unsigned int, fd + err = mnt_want_write(file->f_path.mnt); + if (err) + goto out_putf; ++ ++ if (!gr_acl_handle_fchmod(dentry, file->f_path.mnt, mode)) { ++ err = -EACCES; ++ goto out_drop_write; ++ } ++ + mutex_lock(&inode->i_mutex); + if (mode == (mode_t) -1) + mode = inode->i_mode; ++ ++ if (gr_handle_chroot_chmod(dentry, file->f_path.mnt, mode)) { ++ err = -EPERM; ++ mutex_unlock(&inode->i_mutex); ++ goto out_drop_write; ++ } ++ + newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); + newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; + err = notify_change(dentry, &newattrs); + mutex_unlock(&inode->i_mutex); ++ ++out_drop_write: + mnt_drop_write(file->f_path.mnt); + out_putf: + fput(file); +@@ -648,13 +685,28 @@ SYSCALL_DEFINE3(fchmodat, int, dfd, cons + error = mnt_want_write(path.mnt); + if (error) + goto dput_and_out; ++ ++ if (!gr_acl_handle_chmod(path.dentry, path.mnt, mode)) { ++ error = -EACCES; ++ goto out_drop_write; ++ } ++ + mutex_lock(&inode->i_mutex); + if (mode == (mode_t) -1) + mode = inode->i_mode; ++ ++ if (gr_handle_chroot_chmod(path.dentry, path.mnt, mode)) { ++ error = -EACCES; ++ mutex_unlock(&inode->i_mutex); ++ goto out_drop_write; ++ } ++ + newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); + newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; + error = notify_change(path.dentry, &newattrs); + mutex_unlock(&inode->i_mutex); ++ ++out_drop_write: + mnt_drop_write(path.mnt); + dput_and_out: + path_put(&path); +@@ -667,12 +719,15 @@ SYSCALL_DEFINE2(chmod, const char __user + return sys_fchmodat(AT_FDCWD, filename, mode); + } + +-static int chown_common(struct dentry * dentry, uid_t user, gid_t group) ++static int chown_common(struct dentry * dentry, uid_t user, gid_t group, struct vfsmount *mnt) + { + struct inode *inode = dentry->d_inode; + int error; + struct iattr newattrs; + ++ if (!gr_acl_handle_chown(dentry, mnt)) ++ return -EACCES; ++ + newattrs.ia_valid = ATTR_CTIME; + if (user != (uid_t) -1) { + newattrs.ia_valid |= ATTR_UID; +@@ -703,7 +758,7 @@ SYSCALL_DEFINE3(chown, const char __user + error = mnt_want_write(path.mnt); + if (error) + goto out_release; +- error = chown_common(path.dentry, user, group); ++ error = chown_common(path.dentry, user, group, path.mnt); + mnt_drop_write(path.mnt); + out_release: + path_put(&path); +@@ -728,7 +783,7 @@ SYSCALL_DEFINE5(fchownat, int, dfd, cons + error = mnt_want_write(path.mnt); + if (error) + goto out_release; +- error = chown_common(path.dentry, user, group); ++ error = chown_common(path.dentry, user, group, path.mnt); + mnt_drop_write(path.mnt); + out_release: + path_put(&path); +@@ -747,7 +802,7 @@ SYSCALL_DEFINE3(lchown, const char __use + error = mnt_want_write(path.mnt); + if (error) + goto out_release; +- error = chown_common(path.dentry, user, group); ++ error = chown_common(path.dentry, user, group, path.mnt); + mnt_drop_write(path.mnt); + out_release: + path_put(&path); +@@ -770,7 +825,7 @@ SYSCALL_DEFINE3(fchown, unsigned int, fd + goto out_fput; + dentry = file->f_path.dentry; + audit_inode(NULL, dentry); +- error = chown_common(dentry, user, group); ++ error = chown_common(dentry, user, group, file->f_path.mnt); + mnt_drop_write(file->f_path.mnt); + out_fput: + fput(file); +diff -urNp a/fs/pipe.c b/fs/pipe.c +--- a/fs/pipe.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/pipe.c 2009-05-24 18:10:25.229995792 -0700 +@@ -848,7 +848,7 @@ void free_pipe_info(struct inode *inode) + inode->i_pipe = NULL; + } + +-static struct vfsmount *pipe_mnt __read_mostly; ++struct vfsmount *pipe_mnt __read_mostly; + static int pipefs_delete_dentry(struct dentry *dentry) + { + /* +diff -urNp a/fs/proc/Kconfig b/fs/proc/Kconfig +--- a/fs/proc/Kconfig 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/proc/Kconfig 2009-05-24 18:10:25.229995792 -0700 +@@ -30,12 +30,12 @@ config PROC_FS + + config PROC_KCORE + bool "/proc/kcore support" if !ARM +- depends on PROC_FS && MMU ++ depends on PROC_FS && MMU && !GRKERNSEC_PROC_ADD + + config PROC_VMCORE + bool "/proc/vmcore support (EXPERIMENTAL)" +- depends on PROC_FS && CRASH_DUMP +- default y ++ depends on PROC_FS && CRASH_DUMP && !GRKERNSEC ++ default n + help + Exports the dump image of crashed kernel in ELF format. + +@@ -59,8 +59,8 @@ config PROC_SYSCTL + limited in memory. + + config PROC_PAGE_MONITOR +- default y +- depends on PROC_FS && MMU ++ default n ++ depends on PROC_FS && MMU && !GRKERNSEC + bool "Enable /proc page monitoring" if EMBEDDED + help + Various /proc files exist to monitor process memory utilization: +diff -urNp a/fs/proc/array.c b/fs/proc/array.c +--- a/fs/proc/array.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/proc/array.c 2009-05-24 18:10:25.231182034 -0700 +@@ -308,6 +308,21 @@ static inline void task_context_switch_c + p->nivcsw); + } + ++#if defined(CONFIG_PAX_NOEXEC) || defined(CONFIG_PAX_ASLR) ++static inline void task_pax(struct seq_file *m, struct task_struct *p) ++{ ++ if (p->mm) ++ seq_printf(m, "PaX:\t%c%c%c%c%c\n", ++ p->mm->pax_flags & MF_PAX_PAGEEXEC ? 'P' : 'p', ++ p->mm->pax_flags & MF_PAX_EMUTRAMP ? 'E' : 'e', ++ p->mm->pax_flags & MF_PAX_MPROTECT ? 'M' : 'm', ++ p->mm->pax_flags & MF_PAX_RANDMMAP ? 'R' : 'r', ++ p->mm->pax_flags & MF_PAX_SEGMEXEC ? 'S' : 's'); ++ else ++ seq_printf(m, "PaX:\t-----\n"); ++} ++#endif ++ + int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task) + { +@@ -327,9 +342,20 @@ int proc_pid_status(struct seq_file *m, + task_show_regs(m, task); + #endif + task_context_switch_counts(m, task); ++ ++#if defined(CONFIG_PAX_NOEXEC) || defined(CONFIG_PAX_ASLR) ++ task_pax(m, task); ++#endif ++ + return 0; + } + ++#ifdef CONFIG_GRKERNSEC_PROC_MEMMAP ++#define PAX_RAND_FLAGS(_mm) (_mm != NULL && _mm != current->mm && \ ++ (_mm->pax_flags & MF_PAX_RANDMMAP || \ ++ _mm->pax_flags & MF_PAX_SEGMEXEC)) ++#endif ++ + static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task, int whole) + { +@@ -422,6 +448,19 @@ static int do_task_stat(struct seq_file + gtime = task_gtime(task); + } + ++#ifdef CONFIG_GRKERNSEC_PROC_MEMMAP ++ if (PAX_RAND_FLAGS(mm)) { ++ eip = 0; ++ esp = 0; ++ wchan = 0; ++ } ++#endif ++#ifdef CONFIG_GRKERNSEC_HIDESYM ++ wchan = 0; ++ eip =0; ++ esp =0; ++#endif ++ + /* scale priority and nice values from timeslices to -20..20 */ + /* to make it look like a "normal" Unix priority/nice value */ + priority = task_prio(task); +@@ -462,9 +501,15 @@ static int do_task_stat(struct seq_file + vsize, + mm ? get_mm_rss(mm) : 0, + rsslim, ++#ifdef CONFIG_GRKERNSEC_PROC_MEMMAP ++ PAX_RAND_FLAGS(mm) ? 1 : (mm ? mm->start_code : 0), ++ PAX_RAND_FLAGS(mm) ? 1 : (mm ? mm->end_code : 0), ++ PAX_RAND_FLAGS(mm) ? 0 : (mm ? mm->start_stack : 0), ++#else + mm ? mm->start_code : 0, + mm ? mm->end_code : 0, + mm ? mm->start_stack : 0, ++#endif + esp, + eip, + /* The signal information here is obsolete. +@@ -517,3 +562,10 @@ int proc_pid_statm(struct seq_file *m, s + + return 0; + } ++ ++#ifdef CONFIG_GRKERNSEC_PROC_IPADDR ++int proc_pid_ipaddr(struct task_struct *task, char *buffer) ++{ ++ return sprintf(buffer, "%u.%u.%u.%u\n", NIPQUAD(task->signal->curr_ip)); ++} ++#endif +diff -urNp a/fs/proc/base.c b/fs/proc/base.c +--- a/fs/proc/base.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/proc/base.c 2009-05-24 18:10:25.233336001 -0700 +@@ -214,6 +214,9 @@ static int check_mem_permission(struct t + if (task == current) + return 0; + ++ if (gr_handle_proc_ptrace(task) || gr_acl_handle_procpidmem(task)) ++ return -EPERM; ++ + /* + * If current is actively ptrace'ing, and would also be + * permitted to freshly attach with ptrace now, permit it. +@@ -291,15 +294,29 @@ out: + return res; + } + +-static int proc_pid_auxv(struct task_struct *task, char *buffer) ++#ifdef CONFIG_GRKERNSEC_PROC_MEMMAP ++#define PAX_RAND_FLAGS(_mm) (_mm != NULL && _mm != current->mm && \ ++ (_mm->pax_flags & MF_PAX_RANDMMAP || \ ++ _mm->pax_flags & MF_PAX_SEGMEXEC)) ++#endif ++ ++static ++int proc_pid_auxv(struct task_struct *task, char *buffer) + { + int res = 0; + struct mm_struct *mm = get_task_mm(task); + if (mm) { + unsigned int nwords = 0; +- do ++ ++#ifdef CONFIG_GRKERNSEC_PROC_MEMMAP ++ if (PAX_RAND_FLAGS(mm)) { ++ mmput(mm); ++ return res; ++ } ++#endif ++ do { + nwords += 2; +- while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */ ++ } while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */ + res = nwords * sizeof(mm->saved_auxv[0]); + if (res > PAGE_SIZE) + res = PAGE_SIZE; +@@ -491,7 +508,7 @@ static int proc_pid_limits(struct task_s + return count; + } + +-#ifdef CONFIG_HAVE_ARCH_TRACEHOOK ++#if defined(CONFIG_HAVE_ARCH_TRACEHOOK) && !defined(CONFIG_GRKERNSEC_PROC_MEMMAP) + static int proc_pid_syscall(struct task_struct *task, char *buffer) + { + long nr; +@@ -1411,7 +1428,11 @@ static struct inode *proc_pid_make_inode + inode->i_gid = 0; + if (task_dumpable(task)) { + inode->i_uid = task->euid; ++#ifdef CONFIG_GRKERNSEC_PROC_USERGROUP ++ inode->i_gid = CONFIG_GRKERNSEC_PROC_GID; ++#else + inode->i_gid = task->egid; ++#endif + } + security_task_to_inode(task, inode); + +@@ -1427,17 +1448,45 @@ static int pid_getattr(struct vfsmount * + { + struct inode *inode = dentry->d_inode; + struct task_struct *task; ++#if defined(CONFIG_GRKERNSEC_PROC_USER) || defined(CONFIG_GRKERNSEC_PROC_USERGROUP) ++ struct task_struct *tmp = current; ++#endif ++ + generic_fillattr(inode, stat); + + rcu_read_lock(); + stat->uid = 0; + stat->gid = 0; + task = pid_task(proc_pid(inode), PIDTYPE_PID); +- if (task) { ++ ++ if (task && (gr_pid_is_chrooted(task) || gr_check_hidden_task(task))) { ++ rcu_read_unlock(); ++ return -ENOENT; ++ } ++ ++ ++ if (task ++#if defined(CONFIG_GRKERNSEC_PROC_USER) || defined(CONFIG_GRKERNSEC_PROC_USERGROUP) ++ && (!tmp->uid || (tmp->uid == task->uid) ++#ifdef CONFIG_GRKERNSEC_PROC_USERGROUP ++ || in_group_p(CONFIG_GRKERNSEC_PROC_GID) ++#endif ++ ) ++#endif ++ ) { + if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || ++#ifdef CONFIG_GRKERNSEC_PROC_USER ++ (inode->i_mode == (S_IFDIR|S_IRUSR|S_IXUSR)) || ++#elif defined(CONFIG_GRKERNSEC_PROC_USERGROUP) ++ (inode->i_mode == (S_IFDIR|S_IRUSR|S_IRGRP|S_IXUSR|S_IXGRP)) || ++#endif + task_dumpable(task)) { + stat->uid = task->euid; ++#ifdef CONFIG_GRKERNSEC_PROC_USERGROUP ++ stat->gid = CONFIG_GRKERNSEC_PROC_GID; ++#else + stat->gid = task->egid; ++#endif + } + } + rcu_read_unlock(); +@@ -1465,11 +1514,21 @@ static int pid_revalidate(struct dentry + { + struct inode *inode = dentry->d_inode; + struct task_struct *task = get_proc_task(inode); ++ + if (task) { + if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || ++#ifdef CONFIG_GRKERNSEC_PROC_USER ++ (inode->i_mode == (S_IFDIR|S_IRUSR|S_IXUSR)) || ++#elif defined(CONFIG_GRKERNSEC_PROC_USERGROUP) ++ (inode->i_mode == (S_IFDIR|S_IRUSR|S_IRGRP|S_IXUSR|S_IXGRP)) || ++#endif + task_dumpable(task)) { + inode->i_uid = task->euid; ++#ifdef CONFIG_GRKERNSEC_PROC_USERGROUP ++ inode->i_gid = CONFIG_GRKERNSEC_PROC_GID; ++#else + inode->i_gid = task->egid; ++#endif + } else { + inode->i_uid = 0; + inode->i_gid = 0; +@@ -1837,12 +1896,22 @@ static const struct file_operations proc + static int proc_fd_permission(struct inode *inode, int mask) + { + int rv; ++ struct task_struct *task; + + rv = generic_permission(inode, mask, NULL); +- if (rv == 0) +- return 0; ++ + if (task_pid(current) == proc_pid(inode)) + rv = 0; ++ ++ task = get_proc_task(inode); ++ if (task == NULL) ++ return rv; ++ ++ if (gr_acl_handle_procpidmem(task)) ++ rv = -EACCES; ++ ++ put_task_struct(task); ++ + return rv; + } + +@@ -1953,6 +2022,9 @@ static struct dentry *proc_pident_lookup + if (!task) + goto out_no_task; + ++ if (gr_pid_is_chrooted(task) || gr_check_hidden_task(task)) ++ goto out; ++ + /* + * Yes, it does not scale. And it should not. Don't add + * new entries into /proc/<tgid>/ without very good reasons. +@@ -1997,6 +2069,9 @@ static int proc_pident_readdir(struct fi + if (!task) + goto out_no_task; + ++ if (gr_pid_is_chrooted(task) || gr_check_hidden_task(task)) ++ goto out; ++ + ret = 0; + i = filp->f_pos; + switch (i) { +@@ -2359,6 +2434,9 @@ static struct dentry *proc_base_lookup(s + if (p > last) + goto out; + ++ if (gr_pid_is_chrooted(task) || gr_check_hidden_task(task)) ++ goto out; ++ + error = proc_base_instantiate(dir, dentry, task, p); + + out: +@@ -2445,7 +2523,7 @@ static const struct pid_entry tgid_base_ + #ifdef CONFIG_SCHED_DEBUG + REG("sched", S_IRUGO|S_IWUSR, pid_sched), + #endif +-#ifdef CONFIG_HAVE_ARCH_TRACEHOOK ++#if defined(CONFIG_HAVE_ARCH_TRACEHOOK) && !defined(CONFIG_GRKERNSEC_PROC_MEMMAP) + INF("syscall", S_IRUSR, pid_syscall), + #endif + INF("cmdline", S_IRUGO, pid_cmdline), +@@ -2500,6 +2578,9 @@ static const struct pid_entry tgid_base_ + #ifdef CONFIG_TASK_IO_ACCOUNTING + INF("io", S_IRUGO, tgid_io_accounting), + #endif ++#ifdef CONFIG_GRKERNSEC_PROC_IPADDR ++ INF("ipaddr", S_IRUSR, pid_ipaddr), ++#endif + }; + + static int proc_tgid_base_readdir(struct file * filp, +@@ -2629,7 +2710,14 @@ static struct dentry *proc_pid_instantia + if (!inode) + goto out; + ++#ifdef CONFIG_GRKERNSEC_PROC_USER ++ inode->i_mode = S_IFDIR|S_IRUSR|S_IXUSR; ++#elif defined(CONFIG_GRKERNSEC_PROC_USERGROUP) ++ inode->i_gid = CONFIG_GRKERNSEC_PROC_GID; ++ inode->i_mode = S_IFDIR|S_IRUSR|S_IRGRP|S_IXUSR|S_IXGRP; ++#else + inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; ++#endif + inode->i_op = &proc_tgid_base_inode_operations; + inode->i_fop = &proc_tgid_base_operations; + inode->i_flags|=S_IMMUTABLE; +@@ -2671,7 +2759,11 @@ struct dentry *proc_pid_lookup(struct in + if (!task) + goto out; + ++ if (gr_check_hidden_task(task)) ++ goto out_put_task; ++ + result = proc_pid_instantiate(dir, dentry, task, NULL); ++out_put_task: + put_task_struct(task); + out: + return result; +@@ -2736,6 +2828,9 @@ int proc_pid_readdir(struct file * filp, + { + unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY; + struct task_struct *reaper = get_proc_task(filp->f_path.dentry->d_inode); ++#if defined(CONFIG_GRKERNSEC_PROC_USER) || defined(CONFIG_GRKERNSEC_PROC_USERGROUP) ++ struct task_struct *tmp = current; ++#endif + struct tgid_iter iter; + struct pid_namespace *ns; + +@@ -2754,6 +2849,17 @@ int proc_pid_readdir(struct file * filp, + for (iter = next_tgid(ns, iter); + iter.task; + iter.tgid += 1, iter = next_tgid(ns, iter)) { ++ if (gr_pid_is_chrooted(iter.task) || gr_check_hidden_task(iter.task) ++#if defined(CONFIG_GRKERNSEC_PROC_USER) || defined(CONFIG_GRKERNSEC_PROC_USERGROUP) ++ || (tmp->uid && (iter.task->uid != tmp->uid) ++#ifdef CONFIG_GRKERNSEC_PROC_USERGROUP ++ && !in_group_p(CONFIG_GRKERNSEC_PROC_GID) ++#endif ++ ) ++#endif ++ ) ++ continue; ++ + filp->f_pos = iter.tgid + TGID_OFFSET; + if (proc_pid_fill_cache(filp, dirent, filldir, iter) < 0) { + put_task_struct(iter.task); +@@ -2781,7 +2887,7 @@ static const struct pid_entry tid_base_s + #ifdef CONFIG_SCHED_DEBUG + REG("sched", S_IRUGO|S_IWUSR, pid_sched), + #endif +-#ifdef CONFIG_HAVE_ARCH_TRACEHOOK ++#if defined(CONFIG_HAVE_ARCH_TRACEHOOK) && !defined(CONFIG_GRKERNSEC_PROC_MEMMAP) + INF("syscall", S_IRUSR, pid_syscall), + #endif + INF("cmdline", S_IRUGO, pid_cmdline), +diff -urNp a/fs/proc/cmdline.c b/fs/proc/cmdline.c +--- a/fs/proc/cmdline.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/proc/cmdline.c 2009-05-24 18:10:25.234086952 -0700 +@@ -23,7 +23,11 @@ static const struct file_operations cmdl + + static int __init proc_cmdline_init(void) + { ++#ifdef CONFIG_GRKERNSEC_PROC_ADD ++ proc_create_grsec("cmdline", 0, NULL, &cmdline_proc_fops); ++#else + proc_create("cmdline", 0, NULL, &cmdline_proc_fops); ++#endif + return 0; + } + module_init(proc_cmdline_init); +diff -urNp a/fs/proc/devices.c b/fs/proc/devices.c +--- a/fs/proc/devices.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/proc/devices.c 2009-05-24 18:10:25.235276477 -0700 +@@ -64,7 +64,11 @@ static const struct file_operations proc + + static int __init proc_devices_init(void) + { ++#ifdef CONFIG_GRKERNSEC_PROC_ADD ++ proc_create_grsec("devices", 0, NULL, &proc_devinfo_operations); ++#else + proc_create("devices", 0, NULL, &proc_devinfo_operations); ++#endif + return 0; + } + module_init(proc_devices_init); +diff -urNp a/fs/proc/inode.c b/fs/proc/inode.c +--- a/fs/proc/inode.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/proc/inode.c 2009-05-24 18:10:25.235276477 -0700 +@@ -466,7 +466,11 @@ struct inode *proc_get_inode(struct supe + if (de->mode) { + inode->i_mode = de->mode; + inode->i_uid = de->uid; ++#ifdef CONFIG_GRKERNSEC_PROC_USERGROUP ++ inode->i_gid = CONFIG_GRKERNSEC_PROC_GID; ++#else + inode->i_gid = de->gid; ++#endif + } + if (de->size) + inode->i_size = de->size; +diff -urNp a/fs/proc/internal.h b/fs/proc/internal.h +--- a/fs/proc/internal.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/proc/internal.h 2009-05-24 18:10:25.236303352 -0700 +@@ -53,6 +53,9 @@ extern int proc_pid_status(struct seq_fi + struct pid *pid, struct task_struct *task); + extern int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task); ++#ifdef CONFIG_GRKERNSEC_PROC_IPADDR ++extern int proc_pid_ipaddr(struct task_struct *task, char *buffer); ++#endif + extern loff_t mem_lseek(struct file *file, loff_t offset, int orig); + + extern const struct file_operations proc_maps_operations; +diff -urNp a/fs/proc/kcore.c b/fs/proc/kcore.c +--- a/fs/proc/kcore.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/proc/kcore.c 2009-05-24 18:10:25.236303352 -0700 +@@ -404,10 +404,12 @@ read_kcore(struct file *file, char __use + + static int __init proc_kcore_init(void) + { ++#if !defined(CONFIG_GRKERNSEC_PROC_ADD) + proc_root_kcore = proc_create("kcore", S_IRUSR, NULL, &proc_kcore_operations); + if (proc_root_kcore) + proc_root_kcore->size = + (size_t)high_memory - PAGE_OFFSET + PAGE_SIZE; ++#endif + return 0; + } + module_init(proc_kcore_init); +diff -urNp a/fs/proc/proc_net.c b/fs/proc/proc_net.c +--- a/fs/proc/proc_net.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/proc/proc_net.c 2009-05-24 18:10:25.237335605 -0700 +@@ -106,6 +106,14 @@ static struct net *get_proc_task_net(str + struct nsproxy *ns; + struct net *net = NULL; + ++#ifdef CONFIG_GRKERNSEC_PROC_USER ++ if (current->fsuid) ++ return net; ++#elif defined(CONFIG_GRKERNSEC_PROC_USERGROUP) ++ if (current->fsuid && !in_group_p(CONFIG_GRKERNSEC_PROC_GID)) ++ return net; ++#endif ++ + rcu_read_lock(); + task = pid_task(proc_pid(dir), PIDTYPE_PID); + if (task != NULL) { +diff -urNp a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c +--- a/fs/proc/proc_sysctl.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/proc/proc_sysctl.c 2009-05-24 18:10:25.237335605 -0700 +@@ -7,6 +7,8 @@ + #include <linux/security.h> + #include "internal.h" + ++extern __u32 gr_handle_sysctl(const struct ctl_table *table, const int op); ++ + static struct dentry_operations proc_sys_dentry_operations; + static const struct file_operations proc_sys_file_operations; + static const struct inode_operations proc_sys_inode_operations; +@@ -110,6 +112,9 @@ static struct dentry *proc_sys_lookup(st + if (!p) + goto out; + ++ if (gr_handle_sysctl(p, MAY_EXEC)) ++ goto out; ++ + err = ERR_PTR(-ENOMEM); + inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p); + if (h) +@@ -229,6 +234,9 @@ static int scan(struct ctl_table_header + if (*pos < file->f_pos) + continue; + ++ if (gr_handle_sysctl(table, 0)) ++ continue; ++ + res = proc_sys_fill_cache(file, dirent, filldir, head, table); + if (res) + return res; +@@ -345,6 +353,9 @@ static int proc_sys_getattr(struct vfsmo + if (IS_ERR(head)) + return PTR_ERR(head); + ++ if (table && gr_handle_sysctl(table, MAY_EXEC)) ++ return -ENOENT; ++ + generic_fillattr(inode, stat); + if (table) + stat->mode = (stat->mode & S_IFMT) | table->mode; +diff -urNp a/fs/proc/root.c b/fs/proc/root.c +--- a/fs/proc/root.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/proc/root.c 2009-05-24 18:10:25.238335943 -0700 +@@ -135,7 +135,15 @@ void __init proc_root_init(void) + #ifdef CONFIG_PROC_DEVICETREE + proc_device_tree_init(); + #endif ++#ifdef CONFIG_GRKERNSEC_PROC_ADD ++#ifdef CONFIG_GRKERNSEC_PROC_USER ++ proc_mkdir_mode("bus", S_IRUSR | S_IXUSR, NULL); ++#elif defined(CONFIG_GRKERNSEC_PROC_USERGROUP) ++ proc_mkdir_mode("bus", S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP, NULL); ++#endif ++#else + proc_mkdir("bus", NULL); ++#endif + proc_sys_init(); + } + +diff -urNp a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c +--- a/fs/proc/task_mmu.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/proc/task_mmu.c 2009-05-24 18:10:25.239336350 -0700 +@@ -46,15 +46,26 @@ void task_mem(struct seq_file *m, struct + "VmStk:\t%8lu kB\n" + "VmExe:\t%8lu kB\n" + "VmLib:\t%8lu kB\n" +- "VmPTE:\t%8lu kB\n", +- hiwater_vm << (PAGE_SHIFT-10), ++ "VmPTE:\t%8lu kB\n" ++ ++#ifdef CONFIG_ARCH_TRACK_EXEC_LIMIT ++ "CsBase:\t%8lx\nCsLim:\t%8lx\n" ++#endif ++ ++ ,hiwater_vm << (PAGE_SHIFT-10), + (total_vm - mm->reserved_vm) << (PAGE_SHIFT-10), + mm->locked_vm << (PAGE_SHIFT-10), + hiwater_rss << (PAGE_SHIFT-10), + total_rss << (PAGE_SHIFT-10), + data << (PAGE_SHIFT-10), + mm->stack_vm << (PAGE_SHIFT-10), text, lib, +- (PTRS_PER_PTE*sizeof(pte_t)*mm->nr_ptes) >> 10); ++ (PTRS_PER_PTE*sizeof(pte_t)*mm->nr_ptes) >> 10 ++ ++#ifdef CONFIG_ARCH_TRACK_EXEC_LIMIT ++ , mm->context.user_cs_base, mm->context.user_cs_limit ++#endif ++ ++ ); + } + + unsigned long task_vsize(struct mm_struct *mm) +@@ -198,6 +209,12 @@ static int do_maps_open(struct inode *in + return ret; + } + ++#ifdef CONFIG_GRKERNSEC_PROC_MEMMAP ++#define PAX_RAND_FLAGS(_mm) (_mm != NULL && _mm != current->mm && \ ++ (_mm->pax_flags & MF_PAX_RANDMMAP || \ ++ _mm->pax_flags & MF_PAX_SEGMEXEC)) ++#endif ++ + static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) + { + struct mm_struct *mm = vma->vm_mm; +@@ -214,13 +231,22 @@ static void show_map_vma(struct seq_file + } + + seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", ++#ifdef CONFIG_GRKERNSEC_PROC_MEMMAP ++ PAX_RAND_FLAGS(mm) ? 0UL : vma->vm_start, ++ PAX_RAND_FLAGS(mm) ? 0UL : vma->vm_end, ++#else + vma->vm_start, + vma->vm_end, ++#endif + flags & VM_READ ? 'r' : '-', + flags & VM_WRITE ? 'w' : '-', + flags & VM_EXEC ? 'x' : '-', + flags & VM_MAYSHARE ? 's' : 'p', ++#ifdef CONFIG_GRKERNSEC_PROC_MEMMAP ++ PAX_RAND_FLAGS(mm) ? 0UL : ((loff_t)vma->vm_pgoff) << PAGE_SHIFT, ++#else + ((loff_t)vma->vm_pgoff) << PAGE_SHIFT, ++#endif + MAJOR(dev), MINOR(dev), ino, &len); + + /* +@@ -234,11 +260,11 @@ static void show_map_vma(struct seq_file + const char *name = arch_vma_name(vma); + if (!name) { + if (mm) { +- if (vma->vm_start <= mm->start_brk && +- vma->vm_end >= mm->brk) { ++ if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { + name = "[heap]"; +- } else if (vma->vm_start <= mm->start_stack && +- vma->vm_end >= mm->start_stack) { ++ } else if ((vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP)) || ++ (vma->vm_start <= mm->start_stack && ++ vma->vm_end >= mm->start_stack)) { + name = "[stack]"; + } + } else { +@@ -381,9 +407,16 @@ static int show_smap(struct seq_file *m, + }; + + memset(&mss, 0, sizeof mss); +- mss.vma = vma; +- if (vma->vm_mm && !is_vm_hugetlb_page(vma)) +- walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk); ++ ++#ifdef CONFIG_GRKERNSEC_PROC_MEMMAP ++ if (!PAX_RAND_FLAGS(vma->vm_mm)) { ++#endif ++ mss.vma = vma; ++ if (vma->vm_mm && !is_vm_hugetlb_page(vma)) ++ walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk); ++#ifdef CONFIG_GRKERNSEC_PROC_MEMMAP ++ } ++#endif + + show_map_vma(m, vma); + +@@ -397,7 +430,11 @@ static int show_smap(struct seq_file *m, + "Private_Dirty: %8lu kB\n" + "Referenced: %8lu kB\n" + "Swap: %8lu kB\n", ++#ifdef CONFIG_GRKERNSEC_PROC_MEMMAP ++ PAX_RAND_FLAGS(vma->vm_mm) ? 0UL : (vma->vm_end - vma->vm_start) >> 10, ++#else + (vma->vm_end - vma->vm_start) >> 10, ++#endif + mss.resident >> 10, + (unsigned long)(mss.pss >> (10 + PSS_SHIFT)), + mss.shared_clean >> 10, +diff -urNp a/fs/readdir.c b/fs/readdir.c +--- a/fs/readdir.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/readdir.c 2009-05-24 18:10:25.239336350 -0700 +@@ -16,6 +16,7 @@ + #include <linux/security.h> + #include <linux/syscalls.h> + #include <linux/unistd.h> ++#include <linux/namei.h> + + #include <asm/uaccess.h> + +@@ -67,6 +68,7 @@ struct old_linux_dirent { + + struct readdir_callback { + struct old_linux_dirent __user * dirent; ++ struct file * file; + int result; + }; + +@@ -84,6 +86,10 @@ static int fillonedir(void * __buf, cons + buf->result = -EOVERFLOW; + return -EOVERFLOW; + } ++ ++ if (!gr_acl_handle_filldir(buf->file, name, namlen, ino)) ++ return 0; ++ + buf->result++; + dirent = buf->dirent; + if (!access_ok(VERIFY_WRITE, dirent, +@@ -116,6 +122,7 @@ SYSCALL_DEFINE3(old_readdir, unsigned in + + buf.result = 0; + buf.dirent = dirent; ++ buf.file = file; + + error = vfs_readdir(file, fillonedir, &buf); + if (buf.result) +@@ -142,6 +149,7 @@ struct linux_dirent { + struct getdents_callback { + struct linux_dirent __user * current_dir; + struct linux_dirent __user * previous; ++ struct file * file; + int count; + int error; + }; +@@ -162,6 +170,10 @@ static int filldir(void * __buf, const c + buf->error = -EOVERFLOW; + return -EOVERFLOW; + } ++ ++ if (!gr_acl_handle_filldir(buf->file, name, namlen, ino)) ++ return 0; ++ + dirent = buf->previous; + if (dirent) { + if (__put_user(offset, &dirent->d_off)) +@@ -209,6 +221,7 @@ SYSCALL_DEFINE3(getdents, unsigned int, + buf.previous = NULL; + buf.count = count; + buf.error = 0; ++ buf.file = file; + + error = vfs_readdir(file, filldir, &buf); + if (error >= 0) +@@ -228,6 +241,7 @@ out: + struct getdents_callback64 { + struct linux_dirent64 __user * current_dir; + struct linux_dirent64 __user * previous; ++ struct file *file; + int count; + int error; + }; +@@ -242,6 +256,10 @@ static int filldir64(void * __buf, const + buf->error = -EINVAL; /* only used if we fail.. */ + if (reclen > buf->count) + return -EINVAL; ++ ++ if (!gr_acl_handle_filldir(buf->file, name, namlen, ino)) ++ return 0; ++ + dirent = buf->previous; + if (dirent) { + if (__put_user(offset, &dirent->d_off)) +@@ -289,6 +307,7 @@ SYSCALL_DEFINE3(getdents64, unsigned int + + buf.current_dir = dirent; + buf.previous = NULL; ++ buf.file = file; + buf.count = count; + buf.error = 0; + +diff -urNp a/fs/select.c b/fs/select.c +--- a/fs/select.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/select.c 2009-05-24 18:10:25.240162097 -0700 +@@ -19,6 +19,7 @@ + #include <linux/module.h> + #include <linux/slab.h> + #include <linux/poll.h> ++#include <linux/security.h> + #include <linux/personality.h> /* for STICKY_TIMEOUTS */ + #include <linux/file.h> + #include <linux/fdtable.h> +@@ -733,6 +734,7 @@ int do_sys_poll(struct pollfd __user *uf + struct poll_list *walk = head; + unsigned long todo = nfds; + ++ gr_learn_resource(current, RLIMIT_NOFILE, nfds, 1); + if (nfds > current->signal->rlim[RLIMIT_NOFILE].rlim_cur) + return -EINVAL; + +diff -urNp a/fs/seq_file.c b/fs/seq_file.c +--- a/fs/seq_file.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/seq_file.c 2009-05-24 18:10:25.241335559 -0700 +@@ -386,7 +386,7 @@ int seq_printf(struct seq_file *m, const + } + EXPORT_SYMBOL(seq_printf); + +-static char *mangle_path(char *s, char *p, char *esc) ++char *mangle_path(char *s, char *p, char *esc) + { + while (s <= p) { + char c = *p++; +@@ -405,6 +405,7 @@ static char *mangle_path(char *s, char * + } + return NULL; + } ++EXPORT_SYMBOL(mangle_path); + + /* + * return the absolute path of 'dentry' residing in mount 'mnt'. +diff -urNp a/fs/smbfs/symlink.c b/fs/smbfs/symlink.c +--- a/fs/smbfs/symlink.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/smbfs/symlink.c 2009-05-24 18:10:25.242248531 -0700 +@@ -55,7 +55,7 @@ static void *smb_follow_link(struct dent + + static void smb_put_link(struct dentry *dentry, struct nameidata *nd, void *p) + { +- char *s = nd_get_link(nd); ++ const char *s = nd_get_link(nd); + if (!IS_ERR(s)) + __putname(s); + } +diff -urNp a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c +--- a/fs/sysfs/symlink.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/sysfs/symlink.c 2009-05-24 18:10:25.243335536 -0700 +@@ -200,7 +200,7 @@ static void *sysfs_follow_link(struct de + + static void sysfs_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie) + { +- char *page = nd_get_link(nd); ++ const char *page = nd_get_link(nd); + if (!IS_ERR(page)) + free_page((unsigned long)page); + } +diff -urNp a/fs/udf/balloc.c b/fs/udf/balloc.c +--- a/fs/udf/balloc.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/udf/balloc.c 2009-05-24 18:10:25.243335536 -0700 +@@ -169,9 +169,7 @@ static void udf_bitmap_free_blocks(struc + unsigned long overflow; + + mutex_lock(&sbi->s_alloc_mutex); +- if (bloc.logicalBlockNum < 0 || +- (bloc.logicalBlockNum + count) > +- sbi->s_partmaps[bloc.partitionReferenceNum].s_partition_len) { ++ if (bloc.logicalBlockNum + count > sbi->s_partmaps[bloc.partitionReferenceNum].s_partition_len) { + udf_debug("%d < %d || %d + %d > %d\n", + bloc.logicalBlockNum, 0, bloc.logicalBlockNum, count, + sbi->s_partmaps[bloc.partitionReferenceNum]. +@@ -239,7 +237,7 @@ static int udf_bitmap_prealloc_blocks(st + + mutex_lock(&sbi->s_alloc_mutex); + part_len = sbi->s_partmaps[partition].s_partition_len; +- if (first_block < 0 || first_block >= part_len) ++ if (first_block >= part_len) + goto out; + + if (first_block + block_count > part_len) +@@ -300,7 +298,7 @@ static int udf_bitmap_new_block(struct s + mutex_lock(&sbi->s_alloc_mutex); + + repeat: +- if (goal < 0 || goal >= sbi->s_partmaps[partition].s_partition_len) ++ if (goal >= sbi->s_partmaps[partition].s_partition_len) + goal = 0; + + nr_groups = bitmap->s_nr_groups; +@@ -438,9 +436,7 @@ static void udf_table_free_blocks(struct + struct udf_inode_info *iinfo; + + mutex_lock(&sbi->s_alloc_mutex); +- if (bloc.logicalBlockNum < 0 || +- (bloc.logicalBlockNum + count) > +- sbi->s_partmaps[bloc.partitionReferenceNum].s_partition_len) { ++ if (bloc.logicalBlockNum + count > sbi->s_partmaps[bloc.partitionReferenceNum].s_partition_len) { + udf_debug("%d < %d || %d + %d > %d\n", + bloc.logicalBlockNum, 0, bloc.logicalBlockNum, count, + sbi->s_partmaps[bloc.partitionReferenceNum]. +@@ -671,8 +667,7 @@ static int udf_table_prealloc_blocks(str + int8_t etype = -1; + struct udf_inode_info *iinfo; + +- if (first_block < 0 || +- first_block >= sbi->s_partmaps[partition].s_partition_len) ++ if (first_block >= sbi->s_partmaps[partition].s_partition_len) + return 0; + + iinfo = UDF_I(table); +@@ -750,7 +745,7 @@ static int udf_table_new_block(struct su + return newblock; + + mutex_lock(&sbi->s_alloc_mutex); +- if (goal < 0 || goal >= sbi->s_partmaps[partition].s_partition_len) ++ if (goal >= sbi->s_partmaps[partition].s_partition_len) + goal = 0; + + /* We search for the closest matching block to goal. If we find +diff -urNp a/fs/ufs/inode.c b/fs/ufs/inode.c +--- a/fs/ufs/inode.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/ufs/inode.c 2009-05-24 18:10:25.244335664 -0700 +@@ -56,9 +56,7 @@ static int ufs_block_to_path(struct inod + + + UFSD("ptrs=uspi->s_apb = %d,double_blocks=%ld \n",ptrs,double_blocks); +- if (i_block < 0) { +- ufs_warning(inode->i_sb, "ufs_block_to_path", "block < 0"); +- } else if (i_block < direct_blocks) { ++ if (i_block < direct_blocks) { + offsets[n++] = i_block; + } else if ((i_block -= direct_blocks) < indirect_blocks) { + offsets[n++] = UFS_IND_BLOCK; +@@ -440,8 +438,6 @@ int ufs_getfrag_block(struct inode *inod + lock_kernel(); + + UFSD("ENTER, ino %lu, fragment %llu\n", inode->i_ino, (unsigned long long)fragment); +- if (fragment < 0) +- goto abort_negative; + if (fragment > + ((UFS_NDADDR + uspi->s_apb + uspi->s_2apb + uspi->s_3apb) + << uspi->s_fpbshift)) +@@ -504,10 +500,6 @@ abort: + unlock_kernel(); + return err; + +-abort_negative: +- ufs_warning(sb, "ufs_get_block", "block < 0"); +- goto abort; +- + abort_too_big: + ufs_warning(sb, "ufs_get_block", "block > big"); + goto abort; +diff -urNp a/fs/utimes.c b/fs/utimes.c +--- a/fs/utimes.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/utimes.c 2009-05-24 18:10:25.245336281 -0700 +@@ -1,6 +1,7 @@ + #include <linux/compiler.h> + #include <linux/file.h> + #include <linux/fs.h> ++#include <linux/security.h> + #include <linux/linkage.h> + #include <linux/mount.h> + #include <linux/namei.h> +@@ -101,6 +102,12 @@ static int utimes_common(struct path *pa + goto mnt_drop_write_and_out; + } + } ++ ++ if (!gr_acl_handle_utime(path->dentry, path->mnt)) { ++ error = -EACCES; ++ goto mnt_drop_write_and_out; ++ } ++ + mutex_lock(&inode->i_mutex); + error = notify_change(path->dentry, &newattrs); + mutex_unlock(&inode->i_mutex); +diff -urNp a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c +--- a/fs/xfs/linux-2.6/xfs_iops.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/xfs/linux-2.6/xfs_iops.c 2009-05-24 18:10:25.245336281 -0700 +@@ -500,7 +500,7 @@ xfs_vn_put_link( + struct nameidata *nd, + void *p) + { +- char *s = nd_get_link(nd); ++ const char *s = nd_get_link(nd); + + if (!IS_ERR(s)) + kfree(s); +diff -urNp a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c +--- a/fs/xfs/xfs_bmap.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/fs/xfs/xfs_bmap.c 2009-05-24 18:10:25.248087140 -0700 +@@ -360,7 +360,7 @@ xfs_bmap_validate_ret( + int nmap, + int ret_nmap); + #else +-#define xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap) ++#define xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap) do {} while (0) + #endif /* DEBUG */ + + #if defined(XFS_RW_TRACE) +diff -urNp a/grsecurity/Kconfig b/grsecurity/Kconfig +--- a/grsecurity/Kconfig 1969-12-31 16:00:00.000000000 -0800 ++++ b/grsecurity/Kconfig 2009-05-24 18:10:25.252084859 -0700 +@@ -0,0 +1,869 @@ ++# ++# grecurity configuration ++# ++ ++menu "Grsecurity" ++ ++config GRKERNSEC ++ bool "Grsecurity" ++ select CRYPTO ++ select CRYPTO_SHA256 ++ select SECURITY ++ select SECURITY_CAPABILITIES ++ help ++ If you say Y here, you will be able to configure many features ++ that will enhance the security of your system. It is highly ++ recommended that you say Y here and read through the help ++ for each option so that you fully understand the features and ++ can evaluate their usefulness for your machine. ++ ++choice ++ prompt "Security Level" ++ depends on GRKERNSEC ++ default GRKERNSEC_CUSTOM ++ ++config GRKERNSEC_LOW ++ bool "Low" ++ select GRKERNSEC_LINK ++ select GRKERNSEC_FIFO ++ select GRKERNSEC_EXECVE ++ select GRKERNSEC_RANDNET ++ select GRKERNSEC_DMESG ++ select GRKERNSEC_CHROOT ++ select GRKERNSEC_CHROOT_CHDIR ++ select GRKERNSEC_MODSTOP if (MODULES) ++ ++ help ++ If you choose this option, several of the grsecurity options will ++ be enabled that will give you greater protection against a number ++ of attacks, while assuring that none of your software will have any ++ conflicts with the additional security measures. If you run a lot ++ of unusual software, or you are having problems with the higher ++ security levels, you should say Y here. With this option, the ++ following features are enabled: ++ ++ - Linking restrictions ++ - FIFO restrictions ++ - Enforcing RLIMIT_NPROC on execve ++ - Restricted dmesg ++ - Enforced chdir("/") on chroot ++ - Runtime module disabling ++ ++config GRKERNSEC_MEDIUM ++ bool "Medium" ++ select PAX ++ select PAX_EI_PAX ++ select PAX_PT_PAX_FLAGS ++ select PAX_HAVE_ACL_FLAGS ++ select GRKERNSEC_PROC_MEMMAP if (PAX_NOEXEC || PAX_ASLR) ++ select GRKERNSEC_CHROOT ++ select GRKERNSEC_CHROOT_SYSCTL ++ select GRKERNSEC_LINK ++ select GRKERNSEC_FIFO ++ select GRKERNSEC_EXECVE ++ select GRKERNSEC_DMESG ++ select GRKERNSEC_RANDNET ++ select GRKERNSEC_FORKFAIL ++ select GRKERNSEC_TIME ++ select GRKERNSEC_SIGNAL ++ select GRKERNSEC_CHROOT ++ select GRKERNSEC_CHROOT_UNIX ++ select GRKERNSEC_CHROOT_MOUNT ++ select GRKERNSEC_CHROOT_PIVOT ++ select GRKERNSEC_CHROOT_DOUBLE ++ select GRKERNSEC_CHROOT_CHDIR ++ select GRKERNSEC_CHROOT_MKNOD ++ select GRKERNSEC_PROC ++ select GRKERNSEC_PROC_USERGROUP ++ select GRKERNSEC_MODSTOP if (MODULES) ++ select PAX_RANDUSTACK ++ select PAX_ASLR ++ select PAX_RANDMMAP ++ select PAX_REFCOUNT if (X86) ++ ++ help ++ If you say Y here, several features in addition to those included ++ in the low additional security level will be enabled. These ++ features provide even more security to your system, though in rare ++ cases they may be incompatible with very old or poorly written ++ software. If you enable this option, make sure that your auth ++ service (identd) is running as gid 1001. With this option, ++ the following features (in addition to those provided in the ++ low additional security level) will be enabled: ++ ++ - Failed fork logging ++ - Time change logging ++ - Signal logging ++ - Deny mounts in chroot ++ - Deny double chrooting ++ - Deny sysctl writes in chroot ++ - Deny mknod in chroot ++ - Deny access to abstract AF_UNIX sockets out of chroot ++ - Deny pivot_root in chroot ++ - Denied writes of /dev/kmem, /dev/mem, and /dev/port ++ - /proc restrictions with special GID set to 10 (usually wheel) ++ - Address Space Layout Randomization (ASLR) ++ - Prevent exploitation of most refcount overflows ++ ++config GRKERNSEC_HIGH ++ bool "High" ++ select GRKERNSEC_LINK ++ select GRKERNSEC_FIFO ++ select GRKERNSEC_EXECVE ++ select GRKERNSEC_DMESG ++ select GRKERNSEC_FORKFAIL ++ select GRKERNSEC_TIME ++ select GRKERNSEC_SIGNAL ++ select GRKERNSEC_CHROOT ++ select GRKERNSEC_CHROOT_SHMAT ++ select GRKERNSEC_CHROOT_UNIX ++ select GRKERNSEC_CHROOT_MOUNT ++ select GRKERNSEC_CHROOT_FCHDIR ++ select GRKERNSEC_CHROOT_PIVOT ++ select GRKERNSEC_CHROOT_DOUBLE ++ select GRKERNSEC_CHROOT_CHDIR ++ select GRKERNSEC_CHROOT_MKNOD ++ select GRKERNSEC_CHROOT_CAPS ++ select GRKERNSEC_CHROOT_SYSCTL ++ select GRKERNSEC_CHROOT_FINDTASK ++ select GRKERNSEC_PROC ++ select GRKERNSEC_PROC_MEMMAP if (PAX_NOEXEC || PAX_ASLR) ++ select GRKERNSEC_HIDESYM ++ select GRKERNSEC_BRUTE ++ select GRKERNSEC_PROC_USERGROUP ++ select GRKERNSEC_KMEM ++ select GRKERNSEC_RESLOG ++ select GRKERNSEC_RANDNET ++ select GRKERNSEC_PROC_ADD ++ select GRKERNSEC_CHROOT_CHMOD ++ select GRKERNSEC_CHROOT_NICE ++ select GRKERNSEC_AUDIT_MOUNT ++ select GRKERNSEC_MODSTOP if (MODULES) ++ select PAX ++ select PAX_RANDUSTACK ++ select PAX_ASLR ++ select PAX_RANDMMAP ++ select PAX_NOEXEC ++ select PAX_MPROTECT ++ select PAX_EI_PAX ++ select PAX_PT_PAX_FLAGS ++ select PAX_HAVE_ACL_FLAGS ++ select PAX_KERNEXEC if (X86 && !EFI && !COMPAT_VDSO && !PARAVIRT && (!X86_32 || X86_WP_WORKS_OK)) ++ select PAX_MEMORY_UDEREF if (!X86_64 && !COMPAT_VDSO) ++ select PAX_RANDKSTACK if (X86_TSC && !X86_64) ++ select PAX_SEGMEXEC if (X86 && !X86_64) ++ select PAX_PAGEEXEC if (!X86) ++ select PAX_EMUPLT if (ALPHA || PARISC || PPC32 || SPARC32 || SPARC64) ++ select PAX_DLRESOLVE if (SPARC32 || SPARC64) ++ select PAX_SYSCALL if (PPC32) ++ select PAX_EMUTRAMP if (PARISC) ++ select PAX_EMUSIGRT if (PARISC) ++ select PAX_ETEXECRELOCS if (ALPHA || IA64 || PARISC) ++ select PAX_REFCOUNT if (X86) ++ help ++ If you say Y here, many of the features of grsecurity will be ++ enabled, which will protect you against many kinds of attacks ++ against your system. The heightened security comes at a cost ++ of an increased chance of incompatibilities with rare software ++ on your machine. Since this security level enables PaX, you should ++ view <http://pax.grsecurity.net> and read about the PaX ++ project. While you are there, download chpax and run it on ++ binaries that cause problems with PaX. Also remember that ++ since the /proc restrictions are enabled, you must run your ++ identd as gid 1001. This security level enables the following ++ features in addition to those listed in the low and medium ++ security levels: ++ ++ - Additional /proc restrictions ++ - Chmod restrictions in chroot ++ - No signals, ptrace, or viewing of processes outside of chroot ++ - Capability restrictions in chroot ++ - Deny fchdir out of chroot ++ - Priority restrictions in chroot ++ - Segmentation-based implementation of PaX ++ - Mprotect restrictions ++ - Removal of addresses from /proc/<pid>/[smaps|maps|stat] ++ - Kernel stack randomization ++ - Mount/unmount/remount logging ++ - Kernel symbol hiding ++ - Prevention of memory exhaustion-based exploits ++config GRKERNSEC_CUSTOM ++ bool "Custom" ++ help ++ If you say Y here, you will be able to configure every grsecurity ++ option, which allows you to enable many more features that aren't ++ covered in the basic security levels. These additional features ++ include TPE, socket restrictions, and the sysctl system for ++ grsecurity. It is advised that you read through the help for ++ each option to determine its usefulness in your situation. ++ ++endchoice ++ ++menu "Address Space Protection" ++depends on GRKERNSEC ++ ++config GRKERNSEC_KMEM ++ bool "Deny writing to /dev/kmem, /dev/mem, and /dev/port" ++ help ++ If you say Y here, /dev/kmem and /dev/mem won't be allowed to ++ be written to via mmap or otherwise to modify the running kernel. ++ /dev/port will also not be allowed to be opened. If you have module ++ support disabled, enabling this will close up four ways that are ++ currently used to insert malicious code into the running kernel. ++ Even with all these features enabled, we still highly recommend that ++ you use the RBAC system, as it is still possible for an attacker to ++ modify the running kernel through privileged I/O granted by ioperm/iopl. ++ If you are not using XFree86, you may be able to stop this additional ++ case by enabling the 'Disable privileged I/O' option. Though nothing ++ legitimately writes to /dev/kmem, XFree86 does need to write to /dev/mem, ++ but only to video memory, which is the only writing we allow in this ++ case. If /dev/kmem or /dev/mem are mmaped without PROT_WRITE, they will ++ not be allowed to mprotect it with PROT_WRITE later. ++ It is highly recommended that you say Y here if you meet all the ++ conditions above. ++ ++config GRKERNSEC_IO ++ bool "Disable privileged I/O" ++ depends on X86 ++ select RTC_CLASS ++ select RTC_INTF_DEV ++ select RTC_DRV_CMOS ++ help ++ If you say Y here, all ioperm and iopl calls will return an error. ++ Ioperm and iopl can be used to modify the running kernel. ++ Unfortunately, some programs need this access to operate properly, ++ the most notable of which are XFree86 and hwclock. hwclock can be ++ remedied by having RTC support in the kernel, so real-time ++ clock support is enabled if this option is enabled, to ensure ++ that hwclock operates correctly. XFree86 still will not ++ operate correctly with this option enabled, so DO NOT CHOOSE Y ++ IF YOU USE XFree86. If you use XFree86 and you still want to ++ protect your kernel against modification, use the RBAC system. ++ ++config GRKERNSEC_PROC_MEMMAP ++ bool "Remove addresses from /proc/<pid>/[smaps|maps|stat]" ++ depends on PAX_NOEXEC || PAX_ASLR ++ help ++ If you say Y here, the /proc/<pid>/maps and /proc/<pid>/stat files will ++ give no information about the addresses of its mappings if ++ PaX features that rely on random addresses are enabled on the task. ++ If you use PaX it is greatly recommended that you say Y here as it ++ closes up a hole that makes the full ASLR useless for suid ++ binaries. ++ ++config GRKERNSEC_BRUTE ++ bool "Deter exploit bruteforcing" ++ help ++ If you say Y here, attempts to bruteforce exploits against forking ++ daemons such as apache or sshd will be deterred. When a child of a ++ forking daemon is killed by PaX or crashes due to an illegal ++ instruction, the parent process will be delayed 30 seconds upon every ++ subsequent fork until the administrator is able to assess the ++ situation and restart the daemon. It is recommended that you also ++ enable signal logging in the auditing section so that logs are ++ generated when a process performs an illegal instruction. ++ ++config GRKERNSEC_MODSTOP ++ bool "Runtime module disabling" ++ depends on MODULES ++ help ++ If you say Y here, you will be able to disable the ability to (un)load ++ modules at runtime. This feature is useful if you need the ability ++ to load kernel modules at boot time, but do not want to allow an ++ attacker to load a rootkit kernel module into the system, or to remove ++ a loaded kernel module important to system functioning. You should ++ enable the /dev/mem protection feature as well, since rootkits can be ++ inserted into the kernel via other methods than kernel modules. Since ++ an untrusted module could still be loaded by modifying init scripts and ++ rebooting the system, it is also recommended that you enable the RBAC ++ system. If you enable this option, a sysctl option with name ++ "disable_modules" will be created. Setting this option to "1" disables ++ module loading. After this option is set, no further writes to it are ++ allowed until the system is rebooted. ++ ++config GRKERNSEC_HIDESYM ++ bool "Hide kernel symbols" ++ help ++ If you say Y here, getting information on loaded modules, and ++ displaying all kernel symbols through a syscall will be restricted ++ to users with CAP_SYS_MODULE. This option is only effective ++ provided the following conditions are met: ++ 1) The kernel using grsecurity is not precompiled by some distribution ++ 2) You are using the RBAC system and hiding other files such as your ++ kernel image and System.map ++ 3) You have the additional /proc restrictions enabled, which removes ++ /proc/kcore ++ If the above conditions are met, this option will aid to provide a ++ useful protection against local and remote kernel exploitation of ++ overflows and arbitrary read/write vulnerabilities. ++ ++endmenu ++menu "Role Based Access Control Options" ++depends on GRKERNSEC ++ ++config GRKERNSEC_ACL_HIDEKERN ++ bool "Hide kernel processes" ++ help ++ If you say Y here, all kernel threads will be hidden to all ++ processes but those whose subject has the "view hidden processes" ++ flag. ++ ++config GRKERNSEC_ACL_MAXTRIES ++ int "Maximum tries before password lockout" ++ default 3 ++ help ++ This option enforces the maximum number of times a user can attempt ++ to authorize themselves with the grsecurity RBAC system before being ++ denied the ability to attempt authorization again for a specified time. ++ The lower the number, the harder it will be to brute-force a password. ++ ++config GRKERNSEC_ACL_TIMEOUT ++ int "Time to wait after max password tries, in seconds" ++ default 30 ++ help ++ This option specifies the time the user must wait after attempting to ++ authorize to the RBAC system with the maximum number of invalid ++ passwords. The higher the number, the harder it will be to brute-force ++ a password. ++ ++endmenu ++menu "Filesystem Protections" ++depends on GRKERNSEC ++ ++config GRKERNSEC_PROC ++ bool "Proc restrictions" ++ help ++ If you say Y here, the permissions of the /proc filesystem ++ will be altered to enhance system security and privacy. You MUST ++ choose either a user only restriction or a user and group restriction. ++ Depending upon the option you choose, you can either restrict users to ++ see only the processes they themselves run, or choose a group that can ++ view all processes and files normally restricted to root if you choose ++ the "restrict to user only" option. NOTE: If you're running identd as ++ a non-root user, you will have to run it as the group you specify here. ++ ++config GRKERNSEC_PROC_USER ++ bool "Restrict /proc to user only" ++ depends on GRKERNSEC_PROC ++ help ++ If you say Y here, non-root users will only be able to view their own ++ processes, and restricts them from viewing network-related information, ++ and viewing kernel symbol and module information. ++ ++config GRKERNSEC_PROC_USERGROUP ++ bool "Allow special group" ++ depends on GRKERNSEC_PROC && !GRKERNSEC_PROC_USER ++ help ++ If you say Y here, you will be able to select a group that will be ++ able to view all processes, network-related information, and ++ kernel and symbol information. This option is useful if you want ++ to run identd as a non-root user. ++ ++config GRKERNSEC_PROC_GID ++ int "GID for special group" ++ depends on GRKERNSEC_PROC_USERGROUP ++ default 1001 ++ ++config GRKERNSEC_PROC_ADD ++ bool "Additional restrictions" ++ depends on GRKERNSEC_PROC_USER || GRKERNSEC_PROC_USERGROUP ++ help ++ If you say Y here, additional restrictions will be placed on ++ /proc that keep normal users from viewing device information and ++ slabinfo information that could be useful for exploits. ++ ++config GRKERNSEC_LINK ++ bool "Linking restrictions" ++ help ++ If you say Y here, /tmp race exploits will be prevented, since users ++ will no longer be able to follow symlinks owned by other users in ++ world-writable +t directories (i.e. /tmp), unless the owner of the ++ symlink is the owner of the directory. users will also not be ++ able to hardlink to files they do not own. If the sysctl option is ++ enabled, a sysctl option with name "linking_restrictions" is created. ++ ++config GRKERNSEC_FIFO ++ bool "FIFO restrictions" ++ help ++ If you say Y here, users will not be able to write to FIFOs they don't ++ own in world-writable +t directories (i.e. /tmp), unless the owner of ++ the FIFO is the same owner of the directory it's held in. If the sysctl ++ option is enabled, a sysctl option with name "fifo_restrictions" is ++ created. ++ ++config GRKERNSEC_CHROOT ++ bool "Chroot jail restrictions" ++ help ++ If you say Y here, you will be able to choose several options that will ++ make breaking out of a chrooted jail much more difficult. If you ++ encounter no software incompatibilities with the following options, it ++ is recommended that you enable each one. ++ ++config GRKERNSEC_CHROOT_MOUNT ++ bool "Deny mounts" ++ depends on GRKERNSEC_CHROOT ++ help ++ If you say Y here, processes inside a chroot will not be able to ++ mount or remount filesystems. If the sysctl option is enabled, a ++ sysctl option with name "chroot_deny_mount" is created. ++ ++config GRKERNSEC_CHROOT_DOUBLE ++ bool "Deny double-chroots" ++ depends on GRKERNSEC_CHROOT ++ help ++ If you say Y here, processes inside a chroot will not be able to chroot ++ again outside the chroot. This is a widely used method of breaking ++ out of a chroot jail and should not be allowed. If the sysctl ++ option is enabled, a sysctl option with name ++ "chroot_deny_chroot" is created. ++ ++config GRKERNSEC_CHROOT_PIVOT ++ bool "Deny pivot_root in chroot" ++ depends on GRKERNSEC_CHROOT ++ help ++ If you say Y here, processes inside a chroot will not be able to use ++ a function called pivot_root() that was introduced in Linux 2.3.41. It ++ works similar to chroot in that it changes the root filesystem. This ++ function could be misused in a chrooted process to attempt to break out ++ of the chroot, and therefore should not be allowed. If the sysctl ++ option is enabled, a sysctl option with name "chroot_deny_pivot" is ++ created. ++ ++config GRKERNSEC_CHROOT_CHDIR ++ bool "Enforce chdir(\"/\") on all chroots" ++ depends on GRKERNSEC_CHROOT ++ help ++ If you say Y here, the current working directory of all newly-chrooted ++ applications will be set to the the root directory of the chroot. ++ The man page on chroot(2) states: ++ Note that this call does not change the current working ++ directory, so that `.' can be outside the tree rooted at ++ `/'. In particular, the super-user can escape from a ++ `chroot jail' by doing `mkdir foo; chroot foo; cd ..'. ++ ++ It is recommended that you say Y here, since it's not known to break ++ any software. If the sysctl option is enabled, a sysctl option with ++ name "chroot_enforce_chdir" is created. ++ ++config GRKERNSEC_CHROOT_CHMOD ++ bool "Deny (f)chmod +s" ++ depends on GRKERNSEC_CHROOT ++ help ++ If you say Y here, processes inside a chroot will not be able to chmod ++ or fchmod files to make them have suid or sgid bits. This protects ++ against another published method of breaking a chroot. If the sysctl ++ option is enabled, a sysctl option with name "chroot_deny_chmod" is ++ created. ++ ++config GRKERNSEC_CHROOT_FCHDIR ++ bool "Deny fchdir out of chroot" ++ depends on GRKERNSEC_CHROOT ++ help ++ If you say Y here, a well-known method of breaking chroots by fchdir'ing ++ to a file descriptor of the chrooting process that points to a directory ++ outside the filesystem will be stopped. If the sysctl option ++ is enabled, a sysctl option with name "chroot_deny_fchdir" is created. ++ ++config GRKERNSEC_CHROOT_MKNOD ++ bool "Deny mknod" ++ depends on GRKERNSEC_CHROOT ++ help ++ If you say Y here, processes inside a chroot will not be allowed to ++ mknod. The problem with using mknod inside a chroot is that it ++ would allow an attacker to create a device entry that is the same ++ as one on the physical root of your system, which could range from ++ anything from the console device to a device for your harddrive (which ++ they could then use to wipe the drive or steal data). It is recommended ++ that you say Y here, unless you run into software incompatibilities. ++ If the sysctl option is enabled, a sysctl option with name ++ "chroot_deny_mknod" is created. ++ ++config GRKERNSEC_CHROOT_SHMAT ++ bool "Deny shmat() out of chroot" ++ depends on GRKERNSEC_CHROOT ++ help ++ If you say Y here, processes inside a chroot will not be able to attach ++ to shared memory segments that were created outside of the chroot jail. ++ It is recommended that you say Y here. If the sysctl option is enabled, ++ a sysctl option with name "chroot_deny_shmat" is created. ++ ++config GRKERNSEC_CHROOT_UNIX ++ bool "Deny access to abstract AF_UNIX sockets out of chroot" ++ depends on GRKERNSEC_CHROOT ++ help ++ If you say Y here, processes inside a chroot will not be able to ++ connect to abstract (meaning not belonging to a filesystem) Unix ++ domain sockets that were bound outside of a chroot. It is recommended ++ that you say Y here. If the sysctl option is enabled, a sysctl option ++ with name "chroot_deny_unix" is created. ++ ++config GRKERNSEC_CHROOT_FINDTASK ++ bool "Protect outside processes" ++ depends on GRKERNSEC_CHROOT ++ help ++ If you say Y here, processes inside a chroot will not be able to ++ kill, send signals with fcntl, ptrace, capget, getpgid, getsid, ++ or view any process outside of the chroot. If the sysctl ++ option is enabled, a sysctl option with name "chroot_findtask" is ++ created. ++ ++config GRKERNSEC_CHROOT_NICE ++ bool "Restrict priority changes" ++ depends on GRKERNSEC_CHROOT ++ help ++ If you say Y here, processes inside a chroot will not be able to raise ++ the priority of processes in the chroot, or alter the priority of ++ processes outside the chroot. This provides more security than simply ++ removing CAP_SYS_NICE from the process' capability set. If the ++ sysctl option is enabled, a sysctl option with name "chroot_restrict_nice" ++ is created. ++ ++config GRKERNSEC_CHROOT_SYSCTL ++ bool "Deny sysctl writes" ++ depends on GRKERNSEC_CHROOT ++ help ++ If you say Y here, an attacker in a chroot will not be able to ++ write to sysctl entries, either by sysctl(2) or through a /proc ++ interface. It is strongly recommended that you say Y here. If the ++ sysctl option is enabled, a sysctl option with name ++ "chroot_deny_sysctl" is created. ++ ++config GRKERNSEC_CHROOT_CAPS ++ bool "Capability restrictions" ++ depends on GRKERNSEC_CHROOT ++ help ++ If you say Y here, the capabilities on all root processes within a ++ chroot jail will be lowered to stop module insertion, raw i/o, ++ system and net admin tasks, rebooting the system, modifying immutable ++ files, modifying IPC owned by another, and changing the system time. ++ This is left an option because it can break some apps. Disable this ++ if your chrooted apps are having problems performing those kinds of ++ tasks. If the sysctl option is enabled, a sysctl option with ++ name "chroot_caps" is created. ++ ++endmenu ++menu "Kernel Auditing" ++depends on GRKERNSEC ++ ++config GRKERNSEC_AUDIT_GROUP ++ bool "Single group for auditing" ++ help ++ If you say Y here, the exec, chdir, (un)mount, and ipc logging features ++ will only operate on a group you specify. This option is recommended ++ if you only want to watch certain users instead of having a large ++ amount of logs from the entire system. If the sysctl option is enabled, ++ a sysctl option with name "audit_group" is created. ++ ++config GRKERNSEC_AUDIT_GID ++ int "GID for auditing" ++ depends on GRKERNSEC_AUDIT_GROUP ++ default 1007 ++ ++config GRKERNSEC_EXECLOG ++ bool "Exec logging" ++ help ++ If you say Y here, all execve() calls will be logged (since the ++ other exec*() calls are frontends to execve(), all execution ++ will be logged). Useful for shell-servers that like to keep track ++ of their users. If the sysctl option is enabled, a sysctl option with ++ name "exec_logging" is created. ++ WARNING: This option when enabled will produce a LOT of logs, especially ++ on an active system. ++ ++config GRKERNSEC_RESLOG ++ bool "Resource logging" ++ help ++ If you say Y here, all attempts to overstep resource limits will ++ be logged with the resource name, the requested size, and the current ++ limit. It is highly recommended that you say Y here. If the sysctl ++ option is enabled, a sysctl option with name "resource_logging" is ++ created. If the RBAC system is enabled, the sysctl value is ignored. ++ ++config GRKERNSEC_CHROOT_EXECLOG ++ bool "Log execs within chroot" ++ help ++ If you say Y here, all executions inside a chroot jail will be logged ++ to syslog. This can cause a large amount of logs if certain ++ applications (eg. djb's daemontools) are installed on the system, and ++ is therefore left as an option. If the sysctl option is enabled, a ++ sysctl option with name "chroot_execlog" is created. ++ ++config GRKERNSEC_AUDIT_CHDIR ++ bool "Chdir logging" ++ help ++ If you say Y here, all chdir() calls will be logged. If the sysctl ++ option is enabled, a sysctl option with name "audit_chdir" is created. ++ ++config GRKERNSEC_AUDIT_MOUNT ++ bool "(Un)Mount logging" ++ help ++ If you say Y here, all mounts and unmounts will be logged. If the ++ sysctl option is enabled, a sysctl option with name "audit_mount" is ++ created. ++ ++config GRKERNSEC_AUDIT_IPC ++ bool "IPC logging" ++ help ++ If you say Y here, creation and removal of message queues, semaphores, ++ and shared memory will be logged. If the sysctl option is enabled, a ++ sysctl option with name "audit_ipc" is created. ++ ++config GRKERNSEC_SIGNAL ++ bool "Signal logging" ++ help ++ If you say Y here, certain important signals will be logged, such as ++ SIGSEGV, which will as a result inform you of when a error in a program ++ occurred, which in some cases could mean a possible exploit attempt. ++ If the sysctl option is enabled, a sysctl option with name ++ "signal_logging" is created. ++ ++config GRKERNSEC_FORKFAIL ++ bool "Fork failure logging" ++ help ++ If you say Y here, all failed fork() attempts will be logged. ++ This could suggest a fork bomb, or someone attempting to overstep ++ their process limit. If the sysctl option is enabled, a sysctl option ++ with name "forkfail_logging" is created. ++ ++config GRKERNSEC_TIME ++ bool "Time change logging" ++ help ++ If you say Y here, any changes of the system clock will be logged. ++ If the sysctl option is enabled, a sysctl option with name ++ "timechange_logging" is created. ++ ++config GRKERNSEC_PROC_IPADDR ++ bool "/proc/<pid>/ipaddr support" ++ help ++ If you say Y here, a new entry will be added to each /proc/<pid> ++ directory that contains the IP address of the person using the task. ++ The IP is carried across local TCP and AF_UNIX stream sockets. ++ This information can be useful for IDS/IPSes to perform remote response ++ to a local attack. The entry is readable by only the owner of the ++ process (and root if he has CAP_DAC_OVERRIDE, which can be removed via ++ the RBAC system), and thus does not create privacy concerns. ++ ++config GRKERNSEC_AUDIT_TEXTREL ++ bool 'ELF text relocations logging (READ HELP)' ++ depends on PAX_MPROTECT ++ help ++ If you say Y here, text relocations will be logged with the filename ++ of the offending library or binary. The purpose of the feature is ++ to help Linux distribution developers get rid of libraries and ++ binaries that need text relocations which hinder the future progress ++ of PaX. Only Linux distribution developers should say Y here, and ++ never on a production machine, as this option creates an information ++ leak that could aid an attacker in defeating the randomization of ++ a single memory region. If the sysctl option is enabled, a sysctl ++ option with name "audit_textrel" is created. ++ ++endmenu ++ ++menu "Executable Protections" ++depends on GRKERNSEC ++ ++config GRKERNSEC_EXECVE ++ bool "Enforce RLIMIT_NPROC on execs" ++ help ++ If you say Y here, users with a resource limit on processes will ++ have the value checked during execve() calls. The current system ++ only checks the system limit during fork() calls. If the sysctl option ++ is enabled, a sysctl option with name "execve_limiting" is created. ++ ++config GRKERNSEC_DMESG ++ bool "Dmesg(8) restriction" ++ help ++ If you say Y here, non-root users will not be able to use dmesg(8) ++ to view up to the last 4kb of messages in the kernel's log buffer. ++ If the sysctl option is enabled, a sysctl option with name "dmesg" is ++ created. ++ ++config GRKERNSEC_TPE ++ bool "Trusted Path Execution (TPE)" ++ help ++ If you say Y here, you will be able to choose a gid to add to the ++ supplementary groups of users you want to mark as "untrusted." ++ These users will not be able to execute any files that are not in ++ root-owned directories writable only by root. If the sysctl option ++ is enabled, a sysctl option with name "tpe" is created. ++ ++config GRKERNSEC_TPE_ALL ++ bool "Partially restrict non-root users" ++ depends on GRKERNSEC_TPE ++ help ++ If you say Y here, All non-root users other than the ones in the ++ group specified in the main TPE option will only be allowed to ++ execute files in directories they own that are not group or ++ world-writable, or in directories owned by root and writable only by ++ root. If the sysctl option is enabled, a sysctl option with name ++ "tpe_restrict_all" is created. ++ ++config GRKERNSEC_TPE_INVERT ++ bool "Invert GID option" ++ depends on GRKERNSEC_TPE ++ help ++ If you say Y here, the group you specify in the TPE configuration will ++ decide what group TPE restrictions will be *disabled* for. This ++ option is useful if you want TPE restrictions to be applied to most ++ users on the system. ++ ++config GRKERNSEC_TPE_GID ++ int "GID for untrusted users" ++ depends on GRKERNSEC_TPE && !GRKERNSEC_TPE_INVERT ++ default 1005 ++ help ++ If you have selected the "Invert GID option" above, setting this ++ GID determines what group TPE restrictions will be *disabled* for. ++ If you have not selected the "Invert GID option" above, setting this ++ GID determines what group TPE restrictions will be *enabled* for. ++ If the sysctl option is enabled, a sysctl option with name "tpe_gid" ++ is created. ++ ++config GRKERNSEC_TPE_GID ++ int "GID for trusted users" ++ depends on GRKERNSEC_TPE && GRKERNSEC_TPE_INVERT ++ default 1005 ++ help ++ If you have selected the "Invert GID option" above, setting this ++ GID determines what group TPE restrictions will be *disabled* for. ++ If you have not selected the "Invert GID option" above, setting this ++ GID determines what group TPE restrictions will be *enabled* for. ++ If the sysctl option is enabled, a sysctl option with name "tpe_gid" ++ is created. ++ ++endmenu ++menu "Network Protections" ++depends on GRKERNSEC ++ ++config GRKERNSEC_RANDNET ++ bool "Larger entropy pools" ++ help ++ If you say Y here, the entropy pools used for many features of Linux ++ and grsecurity will be doubled in size. Since several grsecurity ++ features use additional randomness, it is recommended that you say Y ++ here. Saying Y here has a similar effect as modifying ++ /proc/sys/kernel/random/poolsize. ++ ++config GRKERNSEC_SOCKET ++ bool "Socket restrictions" ++ help ++ If you say Y here, you will be able to choose from several options. ++ If you assign a GID on your system and add it to the supplementary ++ groups of users you want to restrict socket access to, this patch ++ will perform up to three things, based on the option(s) you choose. ++ ++config GRKERNSEC_SOCKET_ALL ++ bool "Deny any sockets to group" ++ depends on GRKERNSEC_SOCKET ++ help ++ If you say Y here, you will be able to choose a GID of whose users will ++ be unable to connect to other hosts from your machine or run server ++ applications from your machine. If the sysctl option is enabled, a ++ sysctl option with name "socket_all" is created. ++ ++config GRKERNSEC_SOCKET_ALL_GID ++ int "GID to deny all sockets for" ++ depends on GRKERNSEC_SOCKET_ALL ++ default 1004 ++ help ++ Here you can choose the GID to disable socket access for. Remember to ++ add the users you want socket access disabled for to the GID ++ specified here. If the sysctl option is enabled, a sysctl option ++ with name "socket_all_gid" is created. ++ ++config GRKERNSEC_SOCKET_CLIENT ++ bool "Deny client sockets to group" ++ depends on GRKERNSEC_SOCKET ++ help ++ If you say Y here, you will be able to choose a GID of whose users will ++ be unable to connect to other hosts from your machine, but will be ++ able to run servers. If this option is enabled, all users in the group ++ you specify will have to use passive mode when initiating ftp transfers ++ from the shell on your machine. If the sysctl option is enabled, a ++ sysctl option with name "socket_client" is created. ++ ++config GRKERNSEC_SOCKET_CLIENT_GID ++ int "GID to deny client sockets for" ++ depends on GRKERNSEC_SOCKET_CLIENT ++ default 1003 ++ help ++ Here you can choose the GID to disable client socket access for. ++ Remember to add the users you want client socket access disabled for to ++ the GID specified here. If the sysctl option is enabled, a sysctl ++ option with name "socket_client_gid" is created. ++ ++config GRKERNSEC_SOCKET_SERVER ++ bool "Deny server sockets to group" ++ depends on GRKERNSEC_SOCKET ++ help ++ If you say Y here, you will be able to choose a GID of whose users will ++ be unable to run server applications from your machine. If the sysctl ++ option is enabled, a sysctl option with name "socket_server" is created. ++ ++config GRKERNSEC_SOCKET_SERVER_GID ++ int "GID to deny server sockets for" ++ depends on GRKERNSEC_SOCKET_SERVER ++ default 1002 ++ help ++ Here you can choose the GID to disable server socket access for. ++ Remember to add the users you want server socket access disabled for to ++ the GID specified here. If the sysctl option is enabled, a sysctl ++ option with name "socket_server_gid" is created. ++ ++endmenu ++menu "Sysctl support" ++depends on GRKERNSEC && SYSCTL ++ ++config GRKERNSEC_SYSCTL ++ bool "Sysctl support" ++ help ++ If you say Y here, you will be able to change the options that ++ grsecurity runs with at bootup, without having to recompile your ++ kernel. You can echo values to files in /proc/sys/kernel/grsecurity ++ to enable (1) or disable (0) various features. All the sysctl entries ++ are mutable until the "grsec_lock" entry is set to a non-zero value. ++ All features enabled in the kernel configuration are disabled at boot ++ if you do not say Y to the "Turn on features by default" option. ++ All options should be set at startup, and the grsec_lock entry should ++ be set to a non-zero value after all the options are set. ++ *THIS IS EXTREMELY IMPORTANT* ++ ++config GRKERNSEC_SYSCTL_ON ++ bool "Turn on features by default" ++ depends on GRKERNSEC_SYSCTL ++ help ++ If you say Y here, instead of having all features enabled in the ++ kernel configuration disabled at boot time, the features will be ++ enabled at boot time. It is recommended you say Y here unless ++ there is some reason you would want all sysctl-tunable features to ++ be disabled by default. As mentioned elsewhere, it is important ++ to enable the grsec_lock entry once you have finished modifying ++ the sysctl entries. ++ ++endmenu ++menu "Logging Options" ++depends on GRKERNSEC ++ ++config GRKERNSEC_FLOODTIME ++ int "Seconds in between log messages (minimum)" ++ default 10 ++ help ++ This option allows you to enforce the number of seconds between ++ grsecurity log messages. The default should be suitable for most ++ people, however, if you choose to change it, choose a value small enough ++ to allow informative logs to be produced, but large enough to ++ prevent flooding. ++ ++config GRKERNSEC_FLOODBURST ++ int "Number of messages in a burst (maximum)" ++ default 4 ++ help ++ This option allows you to choose the maximum number of messages allowed ++ within the flood time interval you chose in a separate option. The ++ default should be suitable for most people, however if you find that ++ many of your logs are being interpreted as flooding, you may want to ++ raise this value. ++ ++endmenu ++ ++endmenu +diff -urNp a/grsecurity/Makefile b/grsecurity/Makefile +--- a/grsecurity/Makefile 1969-12-31 16:00:00.000000000 -0800 ++++ b/grsecurity/Makefile 2009-05-24 18:10:25.252084859 -0700 +@@ -0,0 +1,20 @@ ++# grsecurity's ACL system was originally written in 2001 by Michael Dalton ++# during 2001-2005 it has been completely redesigned by Brad Spengler ++# into an RBAC system ++# ++# All code in this directory and various hooks inserted throughout the kernel ++# are copyright Brad Spengler, and released under the GPL v2 or higher ++ ++obj-y = grsec_chdir.o grsec_chroot.o grsec_exec.o grsec_fifo.o grsec_fork.o \ ++ grsec_mount.o grsec_sig.o grsec_sock.o grsec_sysctl.o \ ++ grsec_time.o grsec_tpe.o grsec_ipc.o grsec_link.o grsec_textrel.o ++ ++obj-$(CONFIG_GRKERNSEC) += grsec_init.o grsum.o gracl.o gracl_ip.o gracl_segv.o \ ++ gracl_cap.o gracl_alloc.o gracl_shm.o grsec_mem.o gracl_fs.o \ ++ gracl_learn.o grsec_log.o ++obj-$(CONFIG_GRKERNSEC_RESLOG) += gracl_res.o ++ ++ifndef CONFIG_GRKERNSEC ++obj-y += grsec_disabled.o ++endif ++ +diff -urNp a/grsecurity/gracl.c b/grsecurity/gracl.c +--- a/grsecurity/gracl.c 1969-12-31 16:00:00.000000000 -0800 ++++ b/grsecurity/gracl.c 2009-05-24 18:10:25.255084545 -0700 +@@ -0,0 +1,3722 @@ ++#include <linux/kernel.h> ++#include <linux/module.h> ++#include <linux/sched.h> ++#include <linux/mm.h> ++#include <linux/file.h> ++#include <linux/fs.h> ++#include <linux/namei.h> ++#include <linux/mount.h> ++#include <linux/tty.h> ++#include <linux/proc_fs.h> ++#include <linux/smp_lock.h> ++#include <linux/slab.h> ++#include <linux/vmalloc.h> ++#include <linux/types.h> ++#include <linux/sysctl.h> ++#include <linux/netdevice.h> ++#include <linux/ptrace.h> ++#include <linux/gracl.h> ++#include <linux/gralloc.h> ++#include <linux/grsecurity.h> ++#include <linux/grinternal.h> ++#include <linux/pid_namespace.h> ++#include <linux/fdtable.h> ++#include <linux/percpu.h> ++ ++#include <asm/uaccess.h> ++#include <asm/errno.h> ++#include <asm/mman.h> ++ ++static struct acl_role_db acl_role_set; ++static struct name_db name_set; ++static struct inodev_db inodev_set; ++ ++/* for keeping track of userspace pointers used for subjects, so we ++ can share references in the kernel as well ++*/ ++ ++static struct dentry *real_root; ++static struct vfsmount *real_root_mnt; ++ ++static struct acl_subj_map_db subj_map_set; ++ ++static struct acl_role_label *default_role; ++ ++static u16 acl_sp_role_value; ++ ++extern char *gr_shared_page[4]; ++static DECLARE_MUTEX(gr_dev_sem); ++DEFINE_RWLOCK(gr_inode_lock); ++ ++struct gr_arg *gr_usermode; ++ ++static unsigned int gr_status = GR_STATUS_INIT; ++ ++extern int chkpw(struct gr_arg *entry, unsigned char *salt, unsigned char *sum); ++extern void gr_clear_learn_entries(void); ++ ++#ifdef CONFIG_GRKERNSEC_RESLOG ++extern void gr_log_resource(const struct task_struct *task, ++ const int res, const unsigned long wanted, const int gt); ++#endif ++ ++unsigned char *gr_system_salt; ++unsigned char *gr_system_sum; ++ ++static struct sprole_pw **acl_special_roles = NULL; ++static __u16 num_sprole_pws = 0; ++ ++static struct acl_role_label *kernel_role = NULL; ++ ++static unsigned int gr_auth_attempts = 0; ++static unsigned long gr_auth_expires = 0UL; ++ ++extern struct vfsmount *sock_mnt; ++extern struct vfsmount *pipe_mnt; ++extern struct vfsmount *shm_mnt; ++static struct acl_object_label *fakefs_obj; ++ ++extern int gr_init_uidset(void); ++extern void gr_free_uidset(void); ++extern void gr_remove_uid(uid_t uid); ++extern int gr_find_uid(uid_t uid); ++ ++__inline__ int ++gr_acl_is_enabled(void) ++{ ++ return (gr_status & GR_READY); ++} ++ ++char gr_roletype_to_char(void) ++{ ++ switch (current->role->roletype & ++ (GR_ROLE_DEFAULT | GR_ROLE_USER | GR_ROLE_GROUP | ++ GR_ROLE_SPECIAL)) { ++ case GR_ROLE_DEFAULT: ++ return 'D'; ++ case GR_ROLE_USER: ++ return 'U'; ++ case GR_ROLE_GROUP: ++ return 'G'; ++ case GR_ROLE_SPECIAL: ++ return 'S'; ++ } ++ ++ return 'X'; ++} ++ ++__inline__ int ++gr_acl_tpe_check(void) ++{ ++ if (unlikely(!(gr_status & GR_READY))) ++ return 0; ++ if (current->role->roletype & GR_ROLE_TPE) ++ return 1; ++ else ++ return 0; ++} ++ ++int ++gr_handle_rawio(const struct inode *inode) ++{ ++#ifdef CONFIG_GRKERNSEC_CHROOT_CAPS ++ if (inode && S_ISBLK(inode->i_mode) && ++ grsec_enable_chroot_caps && proc_is_chrooted(current) && ++ !capable(CAP_SYS_RAWIO)) ++ return 1; ++#endif ++ return 0; ++} ++ ++static int ++gr_streq(const char *a, const char *b, const unsigned int lena, const unsigned int lenb) ++{ ++ int i; ++ unsigned long *l1; ++ unsigned long *l2; ++ unsigned char *c1; ++ unsigned char *c2; ++ int num_longs; ++ ++ if (likely(lena != lenb)) ++ return 0; ++ ++ l1 = (unsigned long *)a; ++ l2 = (unsigned long *)b; ++ ++ num_longs = lena / sizeof(unsigned long); ++ ++ for (i = num_longs; i--; l1++, l2++) { ++ if (unlikely(*l1 != *l2)) ++ return 0; ++ } ++ ++ c1 = (unsigned char *) l1; ++ c2 = (unsigned char *) l2; ++ ++ i = lena - (num_longs * sizeof(unsigned long)); ++ ++ for (; i--; c1++, c2++) { ++ if (unlikely(*c1 != *c2)) ++ return 0; ++ } ++ ++ return 1; ++} ++ ++static char * __our_d_path(struct dentry *dentry, struct vfsmount *vfsmnt, ++ struct dentry *root, struct vfsmount *rootmnt, ++ char *buffer, int buflen) ++{ ++ char * end = buffer+buflen; ++ char * retval; ++ int namelen; ++ ++ *--end = '\0'; ++ buflen--; ++ ++ if (buflen < 1) ++ goto Elong; ++ /* Get '/' right */ ++ retval = end-1; ++ *retval = '/'; ++ ++ for (;;) { ++ struct dentry * parent; ++ ++ if (dentry == root && vfsmnt == rootmnt) ++ break; ++ if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { ++ /* Global root? */ ++ spin_lock(&vfsmount_lock); ++ if (vfsmnt->mnt_parent == vfsmnt) { ++ spin_unlock(&vfsmount_lock); ++ goto global_root; ++ } ++ dentry = vfsmnt->mnt_mountpoint; ++ vfsmnt = vfsmnt->mnt_parent; ++ spin_unlock(&vfsmount_lock); ++ continue; ++ } ++ parent = dentry->d_parent; ++ prefetch(parent); ++ namelen = dentry->d_name.len; ++ buflen -= namelen + 1; ++ if (buflen < 0) ++ goto Elong; ++ end -= namelen; ++ memcpy(end, dentry->d_name.name, namelen); ++ *--end = '/'; ++ retval = end; ++ dentry = parent; ++ } ++ ++ return retval; ++ ++global_root: ++ namelen = dentry->d_name.len; ++ buflen -= namelen; ++ if (buflen < 0) ++ goto Elong; ++ retval -= namelen-1; /* hit the slash */ ++ memcpy(retval, dentry->d_name.name, namelen); ++ return retval; ++Elong: ++ return ERR_PTR(-ENAMETOOLONG); ++} ++ ++static char * ++gen_full_path(struct dentry *dentry, struct vfsmount *vfsmnt, ++ struct dentry *root, struct vfsmount *rootmnt, char *buf, int buflen) ++{ ++ char *retval; ++ ++ retval = __our_d_path(dentry, vfsmnt, root, rootmnt, buf, buflen); ++ if (unlikely(IS_ERR(retval))) ++ retval = strcpy(buf, "<path too long>"); ++ else if (unlikely(retval[1] == '/' && retval[2] == '\0')) ++ retval[1] = '\0'; ++ ++ return retval; ++} ++ ++static char * ++__d_real_path(const struct dentry *dentry, const struct vfsmount *vfsmnt, ++ char *buf, int buflen) ++{ ++ char *res; ++ ++ /* we can use real_root, real_root_mnt, because this is only called ++ by the RBAC system */ ++ res = gen_full_path((struct dentry *)dentry, (struct vfsmount *)vfsmnt, real_root, real_root_mnt, buf, buflen); ++ ++ return res; ++} ++ ++static char * ++d_real_path(const struct dentry *dentry, const struct vfsmount *vfsmnt, ++ char *buf, int buflen) ++{ ++ char *res; ++ struct dentry *root; ++ struct vfsmount *rootmnt; ++ struct task_struct *reaper = current->nsproxy->pid_ns->child_reaper; ++ ++ /* we can't use real_root, real_root_mnt, because they belong only to the RBAC system */ ++ read_lock(&reaper->fs->lock); ++ root = dget(reaper->fs->root.dentry); ++ rootmnt = mntget(reaper->fs->root.mnt); ++ read_unlock(&reaper->fs->lock); ++ ++ spin_lock(&dcache_lock); ++ res = gen_full_path((struct dentry *)dentry, (struct vfsmount *)vfsmnt, root, rootmnt, buf, buflen); ++ spin_unlock(&dcache_lock); ++ ++ dput(root); ++ mntput(rootmnt); ++ return res; ++} ++ ++static char * ++gr_to_filename_rbac(const struct dentry *dentry, const struct vfsmount *mnt) ++{ ++ char *ret; ++ spin_lock(&dcache_lock); ++ ret = __d_real_path(dentry, mnt, per_cpu_ptr(gr_shared_page[0],smp_processor_id()), ++ PAGE_SIZE); ++ spin_unlock(&dcache_lock); ++ return ret; ++} ++ ++char * ++gr_to_filename_nolock(const struct dentry *dentry, const struct vfsmount *mnt) ++{ ++ return __d_real_path(dentry, mnt, per_cpu_ptr(gr_shared_page[0],smp_processor_id()), ++ PAGE_SIZE); ++} ++ ++char * ++gr_to_filename(const struct dentry *dentry, const struct vfsmount *mnt) ++{ ++ return d_real_path(dentry, mnt, per_cpu_ptr(gr_shared_page[0], smp_processor_id()), ++ PAGE_SIZE); ++} ++ ++char * ++gr_to_filename1(const struct dentry *dentry, const struct vfsmount *mnt) ++{ ++ return d_real_path(dentry, mnt, per_cpu_ptr(gr_shared_page[1], smp_processor_id()), ++ PAGE_SIZE); ++} ++ ++char * ++gr_to_filename2(const struct dentry *dentry, const struct vfsmount *mnt) ++{ ++ return d_real_path(dentry, mnt, per_cpu_ptr(gr_shared_page[2], smp_processor_id()), ++ PAGE_SIZE); ++} ++ ++char * ++gr_to_filename3(const struct dentry *dentry, const struct vfsmount *mnt) ++{ ++ return d_real_path(dentry, mnt, per_cpu_ptr(gr_shared_page[3], smp_processor_id()), ++ PAGE_SIZE); ++} ++ ++__inline__ __u32 ++to_gr_audit(const __u32 reqmode) ++{ ++ /* masks off auditable permission flags, then shifts them to create ++ auditing flags, and adds the special case of append auditing if ++ we're requesting write */ ++ return (((reqmode & ~GR_AUDITS) << 10) | ((reqmode & GR_WRITE) ? GR_AUDIT_APPEND : 0)); ++} ++ ++struct acl_subject_label * ++lookup_subject_map(const struct acl_subject_label *userp) ++{ ++ unsigned int index = shash(userp, subj_map_set.s_size); ++ struct subject_map *match; ++ ++ match = subj_map_set.s_hash[index]; ++ ++ while (match && match->user != userp) ++ match = match->next; ++ ++ if (match != NULL) ++ return match->kernel; ++ else ++ return NULL; ++} ++ ++static void ++insert_subj_map_entry(struct subject_map *subjmap) ++{ ++ unsigned int index = shash(subjmap->user, subj_map_set.s_size); ++ struct subject_map **curr; ++ ++ subjmap->prev = NULL; ++ ++ curr = &subj_map_set.s_hash[index]; ++ if (*curr != NULL) ++ (*curr)->prev = subjmap; ++ ++ subjmap->next = *curr; ++ *curr = subjmap; ++ ++ return; ++} ++ ++static struct acl_role_label * ++lookup_acl_role_label(const struct task_struct *task, const uid_t uid, ++ const gid_t gid) ++{ ++ unsigned int index = rhash(uid, GR_ROLE_USER, acl_role_set.r_size); ++ struct acl_role_label *match; ++ struct role_allowed_ip *ipp; ++ unsigned int x; ++ ++ match = acl_role_set.r_hash[index]; ++ ++ while (match) { ++ if ((match->roletype & (GR_ROLE_DOMAIN | GR_ROLE_USER)) == (GR_ROLE_DOMAIN | GR_ROLE_USER)) { ++ for (x = 0; x < match->domain_child_num; x++) { ++ if (match->domain_children[x] == uid) ++ goto found; ++ } ++ } else if (match->uidgid == uid && match->roletype & GR_ROLE_USER) ++ break; ++ match = match->next; ++ } ++found: ++ if (match == NULL) { ++ try_group: ++ index = rhash(gid, GR_ROLE_GROUP, acl_role_set.r_size); ++ match = acl_role_set.r_hash[index]; ++ ++ while (match) { ++ if ((match->roletype & (GR_ROLE_DOMAIN | GR_ROLE_GROUP)) == (GR_ROLE_DOMAIN | GR_ROLE_GROUP)) { ++ for (x = 0; x < match->domain_child_num; x++) { ++ if (match->domain_children[x] == gid) ++ goto found2; ++ } ++ } else if (match->uidgid == gid && match->roletype & GR_ROLE_GROUP) ++ break; ++ match = match->next; ++ } ++found2: ++ if (match == NULL) ++ match = default_role; ++ if (match->allowed_ips == NULL) ++ return match; ++ else { ++ for (ipp = match->allowed_ips; ipp; ipp = ipp->next) { ++ if (likely ++ ((ntohl(task->signal->curr_ip) & ipp->netmask) == ++ (ntohl(ipp->addr) & ipp->netmask))) ++ return match; ++ } ++ match = default_role; ++ } ++ } else if (match->allowed_ips == NULL) { ++ return match; ++ } else { ++ for (ipp = match->allowed_ips; ipp; ipp = ipp->next) { ++ if (likely ++ ((ntohl(task->signal->curr_ip) & ipp->netmask) == ++ (ntohl(ipp->addr) & ipp->netmask))) ++ return match; ++ } ++ goto try_group; ++ } ++ ++ return match; ++} ++ ++struct acl_subject_label * ++lookup_acl_subj_label(const ino_t ino, const dev_t dev, ++ const struct acl_role_label *role) ++{ ++ unsigned int index = fhash(ino, dev, role->subj_hash_size); ++ struct acl_subject_label *match; ++ ++ match = role->subj_hash[index]; ++ ++ while (match && (match->inode != ino || match->device != dev || ++ (match->mode & GR_DELETED))) { ++ match = match->next; ++ } ++ ++ if (match && !(match->mode & GR_DELETED)) ++ return match; ++ else ++ return NULL; ++} ++ ++static struct acl_object_label * ++lookup_acl_obj_label(const ino_t ino, const dev_t dev, ++ const struct acl_subject_label *subj) ++{ ++ unsigned int index = fhash(ino, dev, subj->obj_hash_size); ++ struct acl_object_label *match; ++ ++ match = subj->obj_hash[index]; ++ ++ while (match && (match->inode != ino || match->device != dev || ++ (match->mode & GR_DELETED))) { ++ match = match->next; ++ } ++ ++ if (match && !(match->mode & GR_DELETED)) ++ return match; ++ else ++ return NULL; ++} ++ ++static struct acl_object_label * ++lookup_acl_obj_label_create(const ino_t ino, const dev_t dev, ++ const struct acl_subject_label *subj) ++{ ++ unsigned int index = fhash(ino, dev, subj->obj_hash_size); ++ struct acl_object_label *match; ++ ++ match = subj->obj_hash[index]; ++ ++ while (match && (match->inode != ino || match->device != dev || ++ !(match->mode & GR_DELETED))) { ++ match = match->next; ++ } ++ ++ if (match && (match->mode & GR_DELETED)) ++ return match; ++ ++ match = subj->obj_hash[index]; ++ ++ while (match && (match->inode != ino || match->device != dev || ++ (match->mode & GR_DELETED))) { ++ match = match->next; ++ } ++ ++ if (match && !(match->mode & GR_DELETED)) ++ return match; ++ else ++ return NULL; ++} ++ ++static struct name_entry * ++lookup_name_entry(const char *name) ++{ ++ unsigned int len = strlen(name); ++ unsigned int key = full_name_hash(name, len); ++ unsigned int index = key % name_set.n_size; ++ struct name_entry *match; ++ ++ match = name_set.n_hash[index]; ++ ++ while (match && (match->key != key || !gr_streq(match->name, name, match->len, len))) ++ match = match->next; ++ ++ return match; ++} ++ ++static struct name_entry * ++lookup_name_entry_create(const char *name) ++{ ++ unsigned int len = strlen(name); ++ unsigned int key = full_name_hash(name, len); ++ unsigned int index = key % name_set.n_size; ++ struct name_entry *match; ++ ++ match = name_set.n_hash[index]; ++ ++ while (match && (match->key != key || !gr_streq(match->name, name, match->len, len) || ++ !match->deleted)) ++ match = match->next; ++ ++ if (match && match->deleted) ++ return match; ++ ++ match = name_set.n_hash[index]; ++ ++ while (match && (match->key != key || !gr_streq(match->name, name, match->len, len) || ++ match->deleted)) ++ match = match->next; ++ ++ if (match && !match->deleted) ++ return match; ++ else ++ return NULL; ++} ++ ++static struct inodev_entry * ++lookup_inodev_entry(const ino_t ino, const dev_t dev) ++{ ++ unsigned int index = fhash(ino, dev, inodev_set.i_size); ++ struct inodev_entry *match; ++ ++ match = inodev_set.i_hash[index]; ++ ++ while (match && (match->nentry->inode != ino || match->nentry->device != dev)) ++ match = match->next; ++ ++ return match; ++} ++ ++static void ++insert_inodev_entry(struct inodev_entry *entry) ++{ ++ unsigned int index = fhash(entry->nentry->inode, entry->nentry->device, ++ inodev_set.i_size); ++ struct inodev_entry **curr; ++ ++ entry->prev = NULL; ++ ++ curr = &inodev_set.i_hash[index]; ++ if (*curr != NULL) ++ (*curr)->prev = entry; ++ ++ entry->next = *curr; ++ *curr = entry; ++ ++ return; ++} ++ ++static void ++__insert_acl_role_label(struct acl_role_label *role, uid_t uidgid) ++{ ++ unsigned int index = ++ rhash(uidgid, role->roletype & (GR_ROLE_USER | GR_ROLE_GROUP), acl_role_set.r_size); ++ struct acl_role_label **curr; ++ ++ role->prev = NULL; ++ ++ curr = &acl_role_set.r_hash[index]; ++ if (*curr != NULL) ++ (*curr)->prev = role; ++ ++ role->next = *curr; ++ *curr = role; ++ ++ return; ++} ++ ++static void ++insert_acl_role_label(struct acl_role_label *role) ++{ ++ int i; ++ ++ if (role->roletype & GR_ROLE_DOMAIN) { ++ for (i = 0; i < role->domain_child_num; i++) ++ __insert_acl_role_label(role, role->domain_children[i]); ++ } else ++ __insert_acl_role_label(role, role->uidgid); ++} ++ ++static int ++insert_name_entry(char *name, const ino_t inode, const dev_t device, __u8 deleted) ++{ ++ struct name_entry **curr, *nentry; ++ struct inodev_entry *ientry; ++ unsigned int len = strlen(name); ++ unsigned int key = full_name_hash(name, len); ++ unsigned int index = key % name_set.n_size; ++ ++ curr = &name_set.n_hash[index]; ++ ++ while (*curr && ((*curr)->key != key || !gr_streq((*curr)->name, name, (*curr)->len, len))) ++ curr = &((*curr)->next); ++ ++ if (*curr != NULL) ++ return 1; ++ ++ nentry = acl_alloc(sizeof (struct name_entry)); ++ if (nentry == NULL) ++ return 0; ++ ientry = acl_alloc(sizeof (struct inodev_entry)); ++ if (ientry == NULL) ++ return 0; ++ ientry->nentry = nentry; ++ ++ nentry->key = key; ++ nentry->name = name; ++ nentry->inode = inode; ++ nentry->device = device; ++ nentry->len = len; ++ nentry->deleted = deleted; ++ ++ nentry->prev = NULL; ++ curr = &name_set.n_hash[index]; ++ if (*curr != NULL) ++ (*curr)->prev = nentry; ++ nentry->next = *curr; ++ *curr = nentry; ++ ++ /* insert us into the table searchable by inode/dev */ ++ insert_inodev_entry(ientry); ++ ++ return 1; ++} ++ ++static void ++insert_acl_obj_label(struct acl_object_label *obj, ++ struct acl_subject_label *subj) ++{ ++ unsigned int index = ++ fhash(obj->inode, obj->device, subj->obj_hash_size); ++ struct acl_object_label **curr; ++ ++ ++ obj->prev = NULL; ++ ++ curr = &subj->obj_hash[index]; ++ if (*curr != NULL) ++ (*curr)->prev = obj; ++ ++ obj->next = *curr; ++ *curr = obj; ++ ++ return; ++} ++ ++static void ++insert_acl_subj_label(struct acl_subject_label *obj, ++ struct acl_role_label *role) ++{ ++ unsigned int index = fhash(obj->inode, obj->device, role->subj_hash_size); ++ struct acl_subject_label **curr; ++ ++ obj->prev = NULL; ++ ++ curr = &role->subj_hash[index]; ++ if (*curr != NULL) ++ (*curr)->prev = obj; ++ ++ obj->next = *curr; ++ *curr = obj; ++ ++ return; ++} ++ ++/* allocating chained hash tables, so optimal size is where lambda ~ 1 */ ++ ++static void * ++create_table(__u32 * len, int elementsize) ++{ ++ unsigned int table_sizes[] = { ++ 7, 13, 31, 61, 127, 251, 509, 1021, 2039, 4093, 8191, 16381, ++ 32749, 65521, 131071, 262139, 524287, 1048573, 2097143, ++ 4194301, 8388593, 16777213, 33554393, 67108859, 134217689, ++ 268435399, 536870909, 1073741789, 2147483647 ++ }; ++ void *newtable = NULL; ++ unsigned int pwr = 0; ++ ++ while ((pwr < ((sizeof (table_sizes) / sizeof (table_sizes[0])) - 1)) && ++ table_sizes[pwr] <= *len) ++ pwr++; ++ ++ if (table_sizes[pwr] <= *len) ++ return newtable; ++ ++ if ((table_sizes[pwr] * elementsize) <= PAGE_SIZE) ++ newtable = ++ kmalloc(table_sizes[pwr] * elementsize, GFP_KERNEL); ++ else ++ newtable = vmalloc(table_sizes[pwr] * elementsize); ++ ++ *len = table_sizes[pwr]; ++ ++ return newtable; ++} ++ ++static int ++init_variables(const struct gr_arg *arg) ++{ ++ struct task_struct *reaper = current->nsproxy->pid_ns->child_reaper; ++ unsigned int stacksize; ++ ++ subj_map_set.s_size = arg->role_db.num_subjects; ++ acl_role_set.r_size = arg->role_db.num_roles + arg->role_db.num_domain_children; ++ name_set.n_size = arg->role_db.num_objects; ++ inodev_set.i_size = arg->role_db.num_objects; ++ ++ if (!subj_map_set.s_size || !acl_role_set.r_size || ++ !name_set.n_size || !inodev_set.i_size) ++ return 1; ++ ++ if (!gr_init_uidset()) ++ return 1; ++ ++ /* set up the stack that holds allocation info */ ++ ++ stacksize = arg->role_db.num_pointers + 5; ++ ++ if (!acl_alloc_stack_init(stacksize)) ++ return 1; ++ ++ /* grab reference for the real root dentry and vfsmount */ ++ read_lock(&reaper->fs->lock); ++ real_root_mnt = mntget(reaper->fs->root.mnt); ++ real_root = dget(reaper->fs->root.dentry); ++ read_unlock(&reaper->fs->lock); ++ ++ fakefs_obj = acl_alloc(sizeof(struct acl_object_label)); ++ if (fakefs_obj == NULL) ++ return 1; ++ fakefs_obj->mode = GR_FIND | GR_READ | GR_WRITE | GR_EXEC; ++ ++ subj_map_set.s_hash = ++ (struct subject_map **) create_table(&subj_map_set.s_size, sizeof(void *)); ++ acl_role_set.r_hash = ++ (struct acl_role_label **) create_table(&acl_role_set.r_size, sizeof(void *)); ++ name_set.n_hash = (struct name_entry **) create_table(&name_set.n_size, sizeof(void *)); ++ inodev_set.i_hash = ++ (struct inodev_entry **) create_table(&inodev_set.i_size, sizeof(void *)); ++ ++ if (!subj_map_set.s_hash || !acl_role_set.r_hash || ++ !name_set.n_hash || !inodev_set.i_hash) ++ return 1; ++ ++ memset(subj_map_set.s_hash, 0, ++ sizeof(struct subject_map *) * subj_map_set.s_size); ++ memset(acl_role_set.r_hash, 0, ++ sizeof (struct acl_role_label *) * acl_role_set.r_size); ++ memset(name_set.n_hash, 0, ++ sizeof (struct name_entry *) * name_set.n_size); ++ memset(inodev_set.i_hash, 0, ++ sizeof (struct inodev_entry *) * inodev_set.i_size); ++ ++ return 0; ++} ++ ++/* free information not needed after startup ++ currently contains user->kernel pointer mappings for subjects ++*/ ++ ++static void ++free_init_variables(void) ++{ ++ __u32 i; ++ ++ if (subj_map_set.s_hash) { ++ for (i = 0; i < subj_map_set.s_size; i++) { ++ if (subj_map_set.s_hash[i]) { ++ kfree(subj_map_set.s_hash[i]); ++ subj_map_set.s_hash[i] = NULL; ++ } ++ } ++ ++ if ((subj_map_set.s_size * sizeof (struct subject_map *)) <= ++ PAGE_SIZE) ++ kfree(subj_map_set.s_hash); ++ else ++ vfree(subj_map_set.s_hash); ++ } ++ ++ return; ++} ++ ++static void ++free_variables(void) ++{ ++ struct acl_subject_label *s; ++ struct acl_role_label *r; ++ struct task_struct *task, *task2; ++ unsigned int i, x; ++ ++ gr_clear_learn_entries(); ++ ++ read_lock(&tasklist_lock); ++ do_each_thread(task2, task) { ++ task->acl_sp_role = 0; ++ task->acl_role_id = 0; ++ task->acl = NULL; ++ task->role = NULL; ++ } while_each_thread(task2, task); ++ read_unlock(&tasklist_lock); ++ ++ /* release the reference to the real root dentry and vfsmount */ ++ if (real_root) ++ dput(real_root); ++ real_root = NULL; ++ if (real_root_mnt) ++ mntput(real_root_mnt); ++ real_root_mnt = NULL; ++ ++ /* free all object hash tables */ ++ ++ FOR_EACH_ROLE_START(r, i) ++ if (r->subj_hash == NULL) ++ break; ++ FOR_EACH_SUBJECT_START(r, s, x) ++ if (s->obj_hash == NULL) ++ break; ++ if ((s->obj_hash_size * sizeof (struct acl_object_label *)) <= PAGE_SIZE) ++ kfree(s->obj_hash); ++ else ++ vfree(s->obj_hash); ++ FOR_EACH_SUBJECT_END(s, x) ++ FOR_EACH_NESTED_SUBJECT_START(r, s) ++ if (s->obj_hash == NULL) ++ break; ++ if ((s->obj_hash_size * sizeof (struct acl_object_label *)) <= PAGE_SIZE) ++ kfree(s->obj_hash); ++ else ++ vfree(s->obj_hash); ++ FOR_EACH_NESTED_SUBJECT_END(s) ++ if ((r->subj_hash_size * sizeof (struct acl_subject_label *)) <= PAGE_SIZE) ++ kfree(r->subj_hash); ++ else ++ vfree(r->subj_hash); ++ r->subj_hash = NULL; ++ FOR_EACH_ROLE_END(r,i) ++ ++ acl_free_all(); ++ ++ if (acl_role_set.r_hash) { ++ if ((acl_role_set.r_size * sizeof (struct acl_role_label *)) <= ++ PAGE_SIZE) ++ kfree(acl_role_set.r_hash); ++ else ++ vfree(acl_role_set.r_hash); ++ } ++ if (name_set.n_hash) { ++ if ((name_set.n_size * sizeof (struct name_entry *)) <= ++ PAGE_SIZE) ++ kfree(name_set.n_hash); ++ else ++ vfree(name_set.n_hash); ++ } ++ ++ if (inodev_set.i_hash) { ++ if ((inodev_set.i_size * sizeof (struct inodev_entry *)) <= ++ PAGE_SIZE) ++ kfree(inodev_set.i_hash); ++ else ++ vfree(inodev_set.i_hash); ++ } ++ ++ gr_free_uidset(); ++ ++ memset(&name_set, 0, sizeof (struct name_db)); ++ memset(&inodev_set, 0, sizeof (struct inodev_db)); ++ memset(&acl_role_set, 0, sizeof (struct acl_role_db)); ++ memset(&subj_map_set, 0, sizeof (struct acl_subj_map_db)); ++ ++ default_role = NULL; ++ ++ return; ++} ++ ++static __u32 ++count_user_objs(struct acl_object_label *userp) ++{ ++ struct acl_object_label o_tmp; ++ __u32 num = 0; ++ ++ while (userp) { ++ if (copy_from_user(&o_tmp, userp, ++ sizeof (struct acl_object_label))) ++ break; ++ ++ userp = o_tmp.prev; ++ num++; ++ } ++ ++ return num; ++} ++ ++static struct acl_subject_label * ++do_copy_user_subj(struct acl_subject_label *userp, struct acl_role_label *role); ++ ++static int ++copy_user_glob(struct acl_object_label *obj) ++{ ++ struct acl_object_label *g_tmp, **guser; ++ unsigned int len; ++ char *tmp; ++ ++ if (obj->globbed == NULL) ++ return 0; ++ ++ guser = &obj->globbed; ++ while (*guser) { ++ g_tmp = (struct acl_object_label *) ++ acl_alloc(sizeof (struct acl_object_label)); ++ if (g_tmp == NULL) ++ return -ENOMEM; ++ ++ if (copy_from_user(g_tmp, *guser, ++ sizeof (struct acl_object_label))) ++ return -EFAULT; ++ ++ len = strnlen_user(g_tmp->filename, PATH_MAX); ++ ++ if (!len || len >= PATH_MAX) ++ return -EINVAL; ++ ++ if ((tmp = (char *) acl_alloc(len)) == NULL) ++ return -ENOMEM; ++ ++ if (copy_from_user(tmp, g_tmp->filename, len)) ++ return -EFAULT; ++ ++ g_tmp->filename = tmp; ++ ++ *guser = g_tmp; ++ guser = &(g_tmp->next); ++ } ++ ++ return 0; ++} ++ ++static int ++copy_user_objs(struct acl_object_label *userp, struct acl_subject_label *subj, ++ struct acl_role_label *role) ++{ ++ struct acl_object_label *o_tmp; ++ unsigned int len; ++ int ret; ++ char *tmp; ++ ++ while (userp) { ++ if ((o_tmp = (struct acl_object_label *) ++ acl_alloc(sizeof (struct acl_object_label))) == NULL) ++ return -ENOMEM; ++ ++ if (copy_from_user(o_tmp, userp, ++ sizeof (struct acl_object_label))) ++ return -EFAULT; ++ ++ userp = o_tmp->prev; ++ ++ len = strnlen_user(o_tmp->filename, PATH_MAX); ++ ++ if (!len || len >= PATH_MAX) ++ return -EINVAL; ++ ++ if ((tmp = (char *) acl_alloc(len)) == NULL) ++ return -ENOMEM; ++ ++ if (copy_from_user(tmp, o_tmp->filename, len)) ++ return -EFAULT; ++ ++ o_tmp->filename = tmp; ++ ++ insert_acl_obj_label(o_tmp, subj); ++ if (!insert_name_entry(o_tmp->filename, o_tmp->inode, ++ o_tmp->device, (o_tmp->mode & GR_DELETED) ? 1 : 0)) ++ return -ENOMEM; ++ ++ ret = copy_user_glob(o_tmp); ++ if (ret) ++ return ret; ++ ++ if (o_tmp->nested) { ++ o_tmp->nested = do_copy_user_subj(o_tmp->nested, role); ++ if (IS_ERR(o_tmp->nested)) ++ return PTR_ERR(o_tmp->nested); ++ ++ /* insert into nested subject list */ ++ o_tmp->nested->next = role->hash->first; ++ role->hash->first = o_tmp->nested; ++ } ++ } ++ ++ return 0; ++} ++ ++static __u32 ++count_user_subjs(struct acl_subject_label *userp) ++{ ++ struct acl_subject_label s_tmp; ++ __u32 num = 0; ++ ++ while (userp) { ++ if (copy_from_user(&s_tmp, userp, ++ sizeof (struct acl_subject_label))) ++ break; ++ ++ userp = s_tmp.prev; ++ /* do not count nested subjects against this count, since ++ they are not included in the hash table, but are ++ attached to objects. We have already counted ++ the subjects in userspace for the allocation ++ stack ++ */ ++ if (!(s_tmp.mode & GR_NESTED)) ++ num++; ++ } ++ ++ return num; ++} ++ ++static int ++copy_user_allowedips(struct acl_role_label *rolep) ++{ ++ struct role_allowed_ip *ruserip, *rtmp = NULL, *rlast; ++ ++ ruserip = rolep->allowed_ips; ++ ++ while (ruserip) { ++ rlast = rtmp; ++ ++ if ((rtmp = (struct role_allowed_ip *) ++ acl_alloc(sizeof (struct role_allowed_ip))) == NULL) ++ return -ENOMEM; ++ ++ if (copy_from_user(rtmp, ruserip, ++ sizeof (struct role_allowed_ip))) ++ return -EFAULT; ++ ++ ruserip = rtmp->prev; ++ ++ if (!rlast) { ++ rtmp->prev = NULL; ++ rolep->allowed_ips = rtmp; ++ } else { ++ rlast->next = rtmp; ++ rtmp->prev = rlast; ++ } ++ ++ if (!ruserip) ++ rtmp->next = NULL; ++ } ++ ++ return 0; ++} ++ ++static int ++copy_user_transitions(struct acl_role_label *rolep) ++{ ++ struct role_transition *rusertp, *rtmp = NULL, *rlast; ++ ++ unsigned int len; ++ char *tmp; ++ ++ rusertp = rolep->transitions; ++ ++ while (rusertp) { ++ rlast = rtmp; ++ ++ if ((rtmp = (struct role_transition *) ++ acl_alloc(sizeof (struct role_transition))) == NULL) ++ return -ENOMEM; ++ ++ if (copy_from_user(rtmp, rusertp, ++ sizeof (struct role_transition))) ++ return -EFAULT; ++ ++ rusertp = rtmp->prev; ++ ++ len = strnlen_user(rtmp->rolename, GR_SPROLE_LEN); ++ ++ if (!len || len >= GR_SPROLE_LEN) ++ return -EINVAL; ++ ++ if ((tmp = (char *) acl_alloc(len)) == NULL) ++ return -ENOMEM; ++ ++ if (copy_from_user(tmp, rtmp->rolename, len)) ++ return -EFAULT; ++ ++ rtmp->rolename = tmp; ++ ++ if (!rlast) { ++ rtmp->prev = NULL; ++ rolep->transitions = rtmp; ++ } else { ++ rlast->next = rtmp; ++ rtmp->prev = rlast; ++ } ++ ++ if (!rusertp) ++ rtmp->next = NULL; ++ } ++ ++ return 0; ++} ++ ++static struct acl_subject_label * ++do_copy_user_subj(struct acl_subject_label *userp, struct acl_role_label *role) ++{ ++ struct acl_subject_label *s_tmp = NULL, *s_tmp2; ++ unsigned int len; ++ char *tmp; ++ __u32 num_objs; ++ struct acl_ip_label **i_tmp, *i_utmp2; ++ struct gr_hash_struct ghash; ++ struct subject_map *subjmap; ++ unsigned int i_num; ++ int err; ++ ++ s_tmp = lookup_subject_map(userp); ++ ++ /* we've already copied this subject into the kernel, just return ++ the reference to it, and don't copy it over again ++ */ ++ if (s_tmp) ++ return(s_tmp); ++ ++ if ((s_tmp = (struct acl_subject_label *) ++ acl_alloc(sizeof (struct acl_subject_label))) == NULL) ++ return ERR_PTR(-ENOMEM); ++ ++ subjmap = (struct subject_map *)kmalloc(sizeof (struct subject_map), GFP_KERNEL); ++ if (subjmap == NULL) ++ return ERR_PTR(-ENOMEM); ++ ++ subjmap->user = userp; ++ subjmap->kernel = s_tmp; ++ insert_subj_map_entry(subjmap); ++ ++ if (copy_from_user(s_tmp, userp, ++ sizeof (struct acl_subject_label))) ++ return ERR_PTR(-EFAULT); ++ ++ len = strnlen_user(s_tmp->filename, PATH_MAX); ++ ++ if (!len || len >= PATH_MAX) ++ return ERR_PTR(-EINVAL); ++ ++ if ((tmp = (char *) acl_alloc(len)) == NULL) ++ return ERR_PTR(-ENOMEM); ++ ++ if (copy_from_user(tmp, s_tmp->filename, len)) ++ return ERR_PTR(-EFAULT); ++ ++ s_tmp->filename = tmp; ++ ++ if (!strcmp(s_tmp->filename, "/")) ++ role->root_label = s_tmp; ++ ++ if (copy_from_user(&ghash, s_tmp->hash, sizeof(struct gr_hash_struct))) ++ return ERR_PTR(-EFAULT); ++ ++ /* copy user and group transition tables */ ++ ++ if (s_tmp->user_trans_num) { ++ uid_t *uidlist; ++ ++ uidlist = (uid_t *)acl_alloc(s_tmp->user_trans_num * sizeof(uid_t)); ++ if (uidlist == NULL) ++ return ERR_PTR(-ENOMEM); ++ if (copy_from_user(uidlist, s_tmp->user_transitions, s_tmp->user_trans_num * sizeof(uid_t))) ++ return ERR_PTR(-EFAULT); ++ ++ s_tmp->user_transitions = uidlist; ++ } ++ ++ if (s_tmp->group_trans_num) { ++ gid_t *gidlist; ++ ++ gidlist = (gid_t *)acl_alloc(s_tmp->group_trans_num * sizeof(gid_t)); ++ if (gidlist == NULL) ++ return ERR_PTR(-ENOMEM); ++ if (copy_from_user(gidlist, s_tmp->group_transitions, s_tmp->group_trans_num * sizeof(gid_t))) ++ return ERR_PTR(-EFAULT); ++ ++ s_tmp->group_transitions = gidlist; ++ } ++ ++ /* set up object hash table */ ++ num_objs = count_user_objs(ghash.first); ++ ++ s_tmp->obj_hash_size = num_objs; ++ s_tmp->obj_hash = ++ (struct acl_object_label **) ++ create_table(&(s_tmp->obj_hash_size), sizeof(void *)); ++ ++ if (!s_tmp->obj_hash) ++ return ERR_PTR(-ENOMEM); ++ ++ memset(s_tmp->obj_hash, 0, ++ s_tmp->obj_hash_size * ++ sizeof (struct acl_object_label *)); ++ ++ /* add in objects */ ++ err = copy_user_objs(ghash.first, s_tmp, role); ++ ++ if (err) ++ return ERR_PTR(err); ++ ++ /* set pointer for parent subject */ ++ if (s_tmp->parent_subject) { ++ s_tmp2 = do_copy_user_subj(s_tmp->parent_subject, role); ++ ++ if (IS_ERR(s_tmp2)) ++ return s_tmp2; ++ ++ s_tmp->parent_subject = s_tmp2; ++ } ++ ++ /* add in ip acls */ ++ ++ if (!s_tmp->ip_num) { ++ s_tmp->ips = NULL; ++ goto insert; ++ } ++ ++ i_tmp = ++ (struct acl_ip_label **) acl_alloc(s_tmp->ip_num * ++ sizeof (struct ++ acl_ip_label *)); ++ ++ if (!i_tmp) ++ return ERR_PTR(-ENOMEM); ++ ++ for (i_num = 0; i_num < s_tmp->ip_num; i_num++) { ++ *(i_tmp + i_num) = ++ (struct acl_ip_label *) ++ acl_alloc(sizeof (struct acl_ip_label)); ++ if (!*(i_tmp + i_num)) ++ return ERR_PTR(-ENOMEM); ++ ++ if (copy_from_user ++ (&i_utmp2, s_tmp->ips + i_num, ++ sizeof (struct acl_ip_label *))) ++ return ERR_PTR(-EFAULT); ++ ++ if (copy_from_user ++ (*(i_tmp + i_num), i_utmp2, ++ sizeof (struct acl_ip_label))) ++ return ERR_PTR(-EFAULT); ++ ++ if ((*(i_tmp + i_num))->iface == NULL) ++ continue; ++ ++ len = strnlen_user((*(i_tmp + i_num))->iface, IFNAMSIZ); ++ if (!len || len >= IFNAMSIZ) ++ return ERR_PTR(-EINVAL); ++ tmp = acl_alloc(len); ++ if (tmp == NULL) ++ return ERR_PTR(-ENOMEM); ++ if (copy_from_user(tmp, (*(i_tmp + i_num))->iface, len)) ++ return ERR_PTR(-EFAULT); ++ (*(i_tmp + i_num))->iface = tmp; ++ } ++ ++ s_tmp->ips = i_tmp; ++ ++insert: ++ if (!insert_name_entry(s_tmp->filename, s_tmp->inode, ++ s_tmp->device, (s_tmp->mode & GR_DELETED) ? 1 : 0)) ++ return ERR_PTR(-ENOMEM); ++ ++ return s_tmp; ++} ++ ++static int ++copy_user_subjs(struct acl_subject_label *userp, struct acl_role_label *role) ++{ ++ struct acl_subject_label s_pre; ++ struct acl_subject_label * ret; ++ int err; ++ ++ while (userp) { ++ if (copy_from_user(&s_pre, userp, ++ sizeof (struct acl_subject_label))) ++ return -EFAULT; ++ ++ /* do not add nested subjects here, add ++ while parsing objects ++ */ ++ ++ if (s_pre.mode & GR_NESTED) { ++ userp = s_pre.prev; ++ continue; ++ } ++ ++ ret = do_copy_user_subj(userp, role); ++ ++ err = PTR_ERR(ret); ++ if (IS_ERR(ret)) ++ return err; ++ ++ insert_acl_subj_label(ret, role); ++ ++ userp = s_pre.prev; ++ } ++ ++ return 0; ++} ++ ++static int ++copy_user_acl(struct gr_arg *arg) ++{ ++ struct acl_role_label *r_tmp = NULL, **r_utmp, *r_utmp2; ++ struct sprole_pw *sptmp; ++ struct gr_hash_struct *ghash; ++ uid_t *domainlist; ++ unsigned int r_num; ++ unsigned int len; ++ char *tmp; ++ int err = 0; ++ __u16 i; ++ __u32 num_subjs; ++ ++ /* we need a default and kernel role */ ++ if (arg->role_db.num_roles < 2) ++ return -EINVAL; ++ ++ /* copy special role authentication info from userspace */ ++ ++ num_sprole_pws = arg->num_sprole_pws; ++ acl_special_roles = (struct sprole_pw **) acl_alloc(num_sprole_pws * sizeof(struct sprole_pw *)); ++ ++ if (!acl_special_roles) { ++ err = -ENOMEM; ++ goto cleanup; ++ } ++ ++ for (i = 0; i < num_sprole_pws; i++) { ++ sptmp = (struct sprole_pw *) acl_alloc(sizeof(struct sprole_pw)); ++ if (!sptmp) { ++ err = -ENOMEM; ++ goto cleanup; ++ } ++ if (copy_from_user(sptmp, arg->sprole_pws + i, ++ sizeof (struct sprole_pw))) { ++ err = -EFAULT; ++ goto cleanup; ++ } ++ ++ len = ++ strnlen_user(sptmp->rolename, GR_SPROLE_LEN); ++ ++ if (!len || len >= GR_SPROLE_LEN) { ++ err = -EINVAL; ++ goto cleanup; ++ } ++ ++ if ((tmp = (char *) acl_alloc(len)) == NULL) { ++ err = -ENOMEM; ++ goto cleanup; ++ } ++ ++ if (copy_from_user(tmp, sptmp->rolename, len)) { ++ err = -EFAULT; ++ goto cleanup; ++ } ++ ++#ifdef CONFIG_GRKERNSEC_ACL_DEBUG ++ printk(KERN_ALERT "Copying special role %s\n", tmp); ++#endif ++ sptmp->rolename = tmp; ++ acl_special_roles[i] = sptmp; ++ } ++ ++ r_utmp = (struct acl_role_label **) arg->role_db.r_table; ++ ++ for (r_num = 0; r_num < arg->role_db.num_roles; r_num++) { ++ r_tmp = acl_alloc(sizeof (struct acl_role_label)); ++ ++ if (!r_tmp) { ++ err = -ENOMEM; ++ goto cleanup; ++ } ++ ++ if (copy_from_user(&r_utmp2, r_utmp + r_num, ++ sizeof (struct acl_role_label *))) { ++ err = -EFAULT; ++ goto cleanup; ++ } ++ ++ if (copy_from_user(r_tmp, r_utmp2, ++ sizeof (struct acl_role_label))) { ++ err = -EFAULT; ++ goto cleanup; ++ } ++ ++ len = strnlen_user(r_tmp->rolename, GR_SPROLE_LEN); ++ ++ if (!len || len >= PATH_MAX) { ++ err = -EINVAL; ++ goto cleanup; ++ } ++ ++ if ((tmp = (char *) acl_alloc(len)) == NULL) { ++ err = -ENOMEM; ++ goto cleanup; ++ } ++ if (copy_from_user(tmp, r_tmp->rolename, len)) { ++ err = -EFAULT; ++ goto cleanup; ++ } ++ r_tmp->rolename = tmp; ++ ++ if (!strcmp(r_tmp->rolename, "default") ++ && (r_tmp->roletype & GR_ROLE_DEFAULT)) { ++ default_role = r_tmp; ++ } else if (!strcmp(r_tmp->rolename, ":::kernel:::")) { ++ kernel_role = r_tmp; ++ } ++ ++ if ((ghash = (struct gr_hash_struct *) acl_alloc(sizeof(struct gr_hash_struct))) == NULL) { ++ err = -ENOMEM; ++ goto cleanup; ++ } ++ if (copy_from_user(ghash, r_tmp->hash, sizeof(struct gr_hash_struct))) { ++ err = -EFAULT; ++ goto cleanup; ++ } ++ ++ r_tmp->hash = ghash; ++ ++ num_subjs = count_user_subjs(r_tmp->hash->first); ++ ++ r_tmp->subj_hash_size = num_subjs; ++ r_tmp->subj_hash = ++ (struct acl_subject_label **) ++ create_table(&(r_tmp->subj_hash_size), sizeof(void *)); ++ ++ if (!r_tmp->subj_hash) { ++ err = -ENOMEM; ++ goto cleanup; ++ } ++ ++ err = copy_user_allowedips(r_tmp); ++ if (err) ++ goto cleanup; ++ ++ /* copy domain info */ ++ if (r_tmp->domain_children != NULL) { ++ domainlist = acl_alloc(r_tmp->domain_child_num * sizeof(uid_t)); ++ if (domainlist == NULL) { ++ err = -ENOMEM; ++ goto cleanup; ++ } ++ if (copy_from_user(domainlist, r_tmp->domain_children, r_tmp->domain_child_num * sizeof(uid_t))) { ++ err = -EFAULT; ++ goto cleanup; ++ } ++ r_tmp->domain_children = domainlist; ++ } ++ ++ err = copy_user_transitions(r_tmp); ++ if (err) ++ goto cleanup; ++ ++ memset(r_tmp->subj_hash, 0, ++ r_tmp->subj_hash_size * ++ sizeof (struct acl_subject_label *)); ++ ++ err = copy_user_subjs(r_tmp->hash->first, r_tmp); ++ ++ if (err) ++ goto cleanup; ++ ++ /* set nested subject list to null */ ++ r_tmp->hash->first = NULL; ++ ++ insert_acl_role_label(r_tmp); ++ } ++ ++ goto return_err; ++ cleanup: ++ free_variables(); ++ return_err: ++ return err; ++ ++} ++ ++static int ++gracl_init(struct gr_arg *args) ++{ ++ int error = 0; ++ ++ memcpy(gr_system_salt, args->salt, GR_SALT_LEN); ++ memcpy(gr_system_sum, args->sum, GR_SHA_LEN); ++ ++ if (init_variables(args)) { ++ gr_log_str(GR_DONT_AUDIT_GOOD, GR_INITF_ACL_MSG, GR_VERSION); ++ error = -ENOMEM; ++ free_variables(); ++ goto out; ++ } ++ ++ error = copy_user_acl(args); ++ free_init_variables(); ++ if (error) { ++ free_variables(); ++ goto out; ++ } ++ ++ if ((error = gr_set_acls(0))) { ++ free_variables(); ++ goto out; ++ } ++ ++ gr_status |= GR_READY; ++ out: ++ return error; ++} ++ ++/* derived from glibc fnmatch() 0: match, 1: no match*/ ++ ++static int ++glob_match(const char *p, const char *n) ++{ ++ char c; ++ ++ while ((c = *p++) != '\0') { ++ switch (c) { ++ case '?': ++ if (*n == '\0') ++ return 1; ++ else if (*n == '/') ++ return 1; ++ break; ++ case '\\': ++ if (*n != c) ++ return 1; ++ break; ++ case '*': ++ for (c = *p++; c == '?' || c == '*'; c = *p++) { ++ if (*n == '/') ++ return 1; ++ else if (c == '?') { ++ if (*n == '\0') ++ return 1; ++ else ++ ++n; ++ } ++ } ++ if (c == '\0') { ++ return 0; ++ } else { ++ const char *endp; ++ ++ if ((endp = strchr(n, '/')) == NULL) ++ endp = n + strlen(n); ++ ++ if (c == '[') { ++ for (--p; n < endp; ++n) ++ if (!glob_match(p, n)) ++ return 0; ++ } else if (c == '/') { ++ while (*n != '\0' && *n != '/') ++ ++n; ++ if (*n == '/' && !glob_match(p, n + 1)) ++ return 0; ++ } else { ++ for (--p; n < endp; ++n) ++ if (*n == c && !glob_match(p, n)) ++ return 0; ++ } ++ ++ return 1; ++ } ++ case '[': ++ { ++ int not; ++ char cold; ++ ++ if (*n == '\0' || *n == '/') ++ return 1; ++ ++ not = (*p == '!' || *p == '^'); ++ if (not) ++ ++p; ++ ++ c = *p++; ++ for (;;) { ++ unsigned char fn = (unsigned char)*n; ++ ++ if (c == '\0') ++ return 1; ++ else { ++ if (c == fn) ++ goto matched; ++ cold = c; ++ c = *p++; ++ ++ if (c == '-' && *p != ']') { ++ unsigned char cend = *p++; ++ ++ if (cend == '\0') ++ return 1; ++ ++ if (cold <= fn && fn <= cend) ++ goto matched; ++ ++ c = *p++; ++ } ++ } ++ ++ if (c == ']') ++ break; ++ } ++ if (!not) ++ return 1; ++ break; ++ matched: ++ while (c != ']') { ++ if (c == '\0') ++ return 1; ++ ++ c = *p++; ++ } ++ if (not) ++ return 1; ++ } ++ break; ++ default: ++ if (c != *n) ++ return 1; ++ } ++ ++ ++n; ++ } ++ ++ if (*n == '\0') ++ return 0; ++ ++ if (*n == '/') ++ return 0; ++ ++ return 1; ++} ++ ++static struct acl_object_label * ++chk_glob_label(struct acl_object_label *globbed, ++ struct dentry *dentry, struct vfsmount *mnt, char **path) ++{ ++ struct acl_object_label *tmp; ++ ++ if (*path == NULL) ++ *path = gr_to_filename_nolock(dentry, mnt); ++ ++ tmp = globbed; ++ ++ while (tmp) { ++ if (!glob_match(tmp->filename, *path)) ++ return tmp; ++ tmp = tmp->next; ++ } ++ ++ return NULL; ++} ++ ++static struct acl_object_label * ++__full_lookup(const struct dentry *orig_dentry, const struct vfsmount *orig_mnt, ++ const ino_t curr_ino, const dev_t curr_dev, ++ const struct acl_subject_label *subj, char **path) ++{ ++ struct acl_subject_label *tmpsubj; ++ struct acl_object_label *retval; ++ struct acl_object_label *retval2; ++ ++ tmpsubj = (struct acl_subject_label *) subj; ++ read_lock(&gr_inode_lock); ++ do { ++ retval = lookup_acl_obj_label(curr_ino, curr_dev, tmpsubj); ++ if (retval) { ++ if (retval->globbed) { ++ retval2 = chk_glob_label(retval->globbed, (struct dentry *)orig_dentry, ++ (struct vfsmount *)orig_mnt, path); ++ if (retval2) ++ retval = retval2; ++ } ++ break; ++ } ++ } while ((tmpsubj = tmpsubj->parent_subject)); ++ read_unlock(&gr_inode_lock); ++ ++ return retval; ++} ++ ++static __inline__ struct acl_object_label * ++full_lookup(const struct dentry *orig_dentry, const struct vfsmount *orig_mnt, ++ const struct dentry *curr_dentry, ++ const struct acl_subject_label *subj, char **path) ++{ ++ return __full_lookup(orig_dentry, orig_mnt, ++ curr_dentry->d_inode->i_ino, ++ curr_dentry->d_inode->i_sb->s_dev, subj, path); ++} ++ ++static struct acl_object_label * ++__chk_obj_label(const struct dentry *l_dentry, const struct vfsmount *l_mnt, ++ const struct acl_subject_label *subj, char *path) ++{ ++ struct dentry *dentry = (struct dentry *) l_dentry; ++ struct vfsmount *mnt = (struct vfsmount *) l_mnt; ++ struct acl_object_label *retval; ++ ++ spin_lock(&dcache_lock); ++ ++ if (unlikely(mnt == shm_mnt || mnt == pipe_mnt || mnt == sock_mnt || ++ /* ignore Eric Biederman */ ++ IS_PRIVATE(l_dentry->d_inode))) { ++ retval = fakefs_obj; ++ goto out; ++ } ++ ++ for (;;) { ++ if (dentry == real_root && mnt == real_root_mnt) ++ break; ++ ++ if (dentry == mnt->mnt_root || IS_ROOT(dentry)) { ++ if (mnt->mnt_parent == mnt) ++ break; ++ ++ retval = full_lookup(l_dentry, l_mnt, dentry, subj, &path); ++ if (retval != NULL) ++ goto out; ++ ++ dentry = mnt->mnt_mountpoint; ++ mnt = mnt->mnt_parent; ++ continue; ++ } ++ ++ retval = full_lookup(l_dentry, l_mnt, dentry, subj, &path); ++ if (retval != NULL) ++ goto out; ++ ++ dentry = dentry->d_parent; ++ } ++ ++ retval = full_lookup(l_dentry, l_mnt, dentry, subj, &path); ++ ++ if (retval == NULL) ++ retval = full_lookup(l_dentry, l_mnt, real_root, subj, &path); ++out: ++ spin_unlock(&dcache_lock); ++ return retval; ++} ++ ++static __inline__ struct acl_object_label * ++chk_obj_label(const struct dentry *l_dentry, const struct vfsmount *l_mnt, ++ const struct acl_subject_label *subj) ++{ ++ char *path = NULL; ++ return __chk_obj_label(l_dentry, l_mnt, subj, path); ++} ++ ++static __inline__ struct acl_object_label * ++chk_obj_create_label(const struct dentry *l_dentry, const struct vfsmount *l_mnt, ++ const struct acl_subject_label *subj, char *path) ++{ ++ return __chk_obj_label(l_dentry, l_mnt, subj, path); ++} ++ ++static struct acl_subject_label * ++chk_subj_label(const struct dentry *l_dentry, const struct vfsmount *l_mnt, ++ const struct acl_role_label *role) ++{ ++ struct dentry *dentry = (struct dentry *) l_dentry; ++ struct vfsmount *mnt = (struct vfsmount *) l_mnt; ++ struct acl_subject_label *retval; ++ ++ spin_lock(&dcache_lock); ++ ++ for (;;) { ++ if (dentry == real_root && mnt == real_root_mnt) ++ break; ++ if (dentry == mnt->mnt_root || IS_ROOT(dentry)) { ++ if (mnt->mnt_parent == mnt) ++ break; ++ ++ read_lock(&gr_inode_lock); ++ retval = ++ lookup_acl_subj_label(dentry->d_inode->i_ino, ++ dentry->d_inode->i_sb->s_dev, role); ++ read_unlock(&gr_inode_lock); ++ if (retval != NULL) ++ goto out; ++ ++ dentry = mnt->mnt_mountpoint; ++ mnt = mnt->mnt_parent; ++ continue; ++ } ++ ++ read_lock(&gr_inode_lock); ++ retval = lookup_acl_subj_label(dentry->d_inode->i_ino, ++ dentry->d_inode->i_sb->s_dev, role); ++ read_unlock(&gr_inode_lock); ++ if (retval != NULL) ++ goto out; ++ ++ dentry = dentry->d_parent; ++ } ++ ++ read_lock(&gr_inode_lock); ++ retval = lookup_acl_subj_label(dentry->d_inode->i_ino, ++ dentry->d_inode->i_sb->s_dev, role); ++ read_unlock(&gr_inode_lock); ++ ++ if (unlikely(retval == NULL)) { ++ read_lock(&gr_inode_lock); ++ retval = lookup_acl_subj_label(real_root->d_inode->i_ino, ++ real_root->d_inode->i_sb->s_dev, role); ++ read_unlock(&gr_inode_lock); ++ } ++out: ++ spin_unlock(&dcache_lock); ++ ++ return retval; ++} ++ ++static void ++gr_log_learn(const struct task_struct *task, const struct dentry *dentry, const struct vfsmount *mnt, const __u32 mode) ++{ ++ security_learn(GR_LEARN_AUDIT_MSG, task->role->rolename, task->role->roletype, ++ task->uid, task->gid, task->exec_file ? gr_to_filename1(task->exec_file->f_path.dentry, ++ task->exec_file->f_path.mnt) : task->acl->filename, task->acl->filename, ++ 1, 1, gr_to_filename(dentry, mnt), (unsigned long) mode, NIPQUAD(task->signal->curr_ip)); ++ ++ return; ++} ++ ++static void ++gr_log_learn_sysctl(const struct task_struct *task, const char *path, const __u32 mode) ++{ ++ security_learn(GR_LEARN_AUDIT_MSG, task->role->rolename, task->role->roletype, ++ task->uid, task->gid, task->exec_file ? gr_to_filename1(task->exec_file->f_path.dentry, ++ task->exec_file->f_path.mnt) : task->acl->filename, task->acl->filename, ++ 1, 1, path, (unsigned long) mode, NIPQUAD(task->signal->curr_ip)); ++ ++ return; ++} ++ ++static void ++gr_log_learn_id_change(const struct task_struct *task, const char type, const unsigned int real, ++ const unsigned int effective, const unsigned int fs) ++{ ++ security_learn(GR_ID_LEARN_MSG, task->role->rolename, task->role->roletype, ++ task->uid, task->gid, task->exec_file ? gr_to_filename1(task->exec_file->f_path.dentry, ++ task->exec_file->f_path.mnt) : task->acl->filename, task->acl->filename, ++ type, real, effective, fs, NIPQUAD(task->signal->curr_ip)); ++ ++ return; ++} ++ ++__u32 ++gr_check_link(const struct dentry * new_dentry, ++ const struct dentry * parent_dentry, ++ const struct vfsmount * parent_mnt, ++ const struct dentry * old_dentry, const struct vfsmount * old_mnt) ++{ ++ struct acl_object_label *obj; ++ __u32 oldmode, newmode; ++ __u32 needmode; ++ ++ if (unlikely(!(gr_status & GR_READY))) ++ return (GR_CREATE | GR_LINK); ++ ++ obj = chk_obj_label(old_dentry, old_mnt, current->acl); ++ oldmode = obj->mode; ++ ++ if (current->acl->mode & (GR_LEARN | GR_INHERITLEARN)) ++ oldmode |= (GR_CREATE | GR_LINK); ++ ++ needmode = GR_CREATE | GR_AUDIT_CREATE | GR_SUPPRESS; ++ if (old_dentry->d_inode->i_mode & (S_ISUID | S_ISGID)) ++ needmode |= GR_SETID | GR_AUDIT_SETID; ++ ++ newmode = ++ gr_check_create(new_dentry, parent_dentry, parent_mnt, ++ oldmode | needmode); ++ ++ needmode = newmode & (GR_FIND | GR_APPEND | GR_WRITE | GR_EXEC | ++ GR_SETID | GR_READ | GR_FIND | GR_DELETE | ++ GR_INHERIT | GR_AUDIT_INHERIT); ++ ++ if (old_dentry->d_inode->i_mode & (S_ISUID | S_ISGID) && !(newmode & GR_SETID)) ++ goto bad; ++ ++ if ((oldmode & needmode) != needmode) ++ goto bad; ++ ++ needmode = oldmode & (GR_NOPTRACE | GR_PTRACERD | GR_INHERIT | GR_AUDITS); ++ if ((newmode & needmode) != needmode) ++ goto bad; ++ ++ if ((newmode & (GR_CREATE | GR_LINK)) == (GR_CREATE | GR_LINK)) ++ return newmode; ++bad: ++ needmode = oldmode; ++ if (old_dentry->d_inode->i_mode & (S_ISUID | S_ISGID)) ++ needmode |= GR_SETID; ++ ++ if (current->acl->mode & (GR_LEARN | GR_INHERITLEARN)) { ++ gr_log_learn(current, old_dentry, old_mnt, needmode); ++ return (GR_CREATE | GR_LINK); ++ } else if (newmode & GR_SUPPRESS) ++ return GR_SUPPRESS; ++ else ++ return 0; ++} ++ ++__u32 ++gr_search_file(const struct dentry * dentry, const __u32 mode, ++ const struct vfsmount * mnt) ++{ ++ __u32 retval = mode; ++ struct acl_subject_label *curracl; ++ struct acl_object_label *currobj; ++ ++ if (unlikely(!(gr_status & GR_READY))) ++ return (mode & ~GR_AUDITS); ++ ++ curracl = current->acl; ++ ++ currobj = chk_obj_label(dentry, mnt, curracl); ++ retval = currobj->mode & mode; ++ ++ if (unlikely ++ ((curracl->mode & (GR_LEARN | GR_INHERITLEARN)) && !(mode & GR_NOPTRACE) ++ && (retval != (mode & ~(GR_AUDITS | GR_SUPPRESS))))) { ++ __u32 new_mode = mode; ++ ++ new_mode &= ~(GR_AUDITS | GR_SUPPRESS); ++ ++ retval = new_mode; ++ ++ if (new_mode & GR_EXEC && curracl->mode & GR_INHERITLEARN) ++ new_mode |= GR_INHERIT; ++ ++ if (!(mode & GR_NOLEARN)) ++ gr_log_learn(current, dentry, mnt, new_mode); ++ } ++ ++ return retval; ++} ++ ++__u32 ++gr_check_create(const struct dentry * new_dentry, const struct dentry * parent, ++ const struct vfsmount * mnt, const __u32 mode) ++{ ++ struct name_entry *match; ++ struct acl_object_label *matchpo; ++ struct acl_subject_label *curracl; ++ char *path; ++ __u32 retval; ++ ++ if (unlikely(!(gr_status & GR_READY))) ++ return (mode & ~GR_AUDITS); ++ ++ preempt_disable(); ++ path = gr_to_filename_rbac(new_dentry, mnt); ++ match = lookup_name_entry_create(path); ++ ++ if (!match) ++ goto check_parent; ++ ++ curracl = current->acl; ++ ++ read_lock(&gr_inode_lock); ++ matchpo = lookup_acl_obj_label_create(match->inode, match->device, curracl); ++ read_unlock(&gr_inode_lock); ++ ++ if (matchpo) { ++ if ((matchpo->mode & mode) != ++ (mode & ~(GR_AUDITS | GR_SUPPRESS)) ++ && curracl->mode & (GR_LEARN | GR_INHERITLEARN)) { ++ __u32 new_mode = mode; ++ ++ new_mode &= ~(GR_AUDITS | GR_SUPPRESS); ++ ++ gr_log_learn(current, new_dentry, mnt, new_mode); ++ ++ preempt_enable(); ++ return new_mode; ++ } ++ preempt_enable(); ++ return (matchpo->mode & mode); ++ } ++ ++ check_parent: ++ curracl = current->acl; ++ ++ matchpo = chk_obj_create_label(parent, mnt, curracl, path); ++ retval = matchpo->mode & mode; ++ ++ if ((retval != (mode & ~(GR_AUDITS | GR_SUPPRESS))) ++ && (curracl->mode & (GR_LEARN | GR_INHERITLEARN))) { ++ __u32 new_mode = mode; ++ ++ new_mode &= ~(GR_AUDITS | GR_SUPPRESS); ++ ++ gr_log_learn(current, new_dentry, mnt, new_mode); ++ preempt_enable(); ++ return new_mode; ++ } ++ ++ preempt_enable(); ++ return retval; ++} ++ ++int ++gr_check_hidden_task(const struct task_struct *task) ++{ ++ if (unlikely(!(gr_status & GR_READY))) ++ return 0; ++ ++ if (!(task->acl->mode & GR_PROCFIND) && !(current->acl->mode & GR_VIEW)) ++ return 1; ++ ++ return 0; ++} ++ ++int ++gr_check_protected_task(const struct task_struct *task) ++{ ++ if (unlikely(!(gr_status & GR_READY) || !task)) ++ return 0; ++ ++ if ((task->acl->mode & GR_PROTECTED) && !(current->acl->mode & GR_KILL) && ++ task->acl != current->acl) ++ return 1; ++ ++ return 0; ++} ++ ++void ++gr_copy_label(struct task_struct *tsk) ++{ ++ tsk->signal->used_accept = 0; ++ tsk->acl_sp_role = 0; ++ tsk->acl_role_id = current->acl_role_id; ++ tsk->acl = current->acl; ++ tsk->role = current->role; ++ tsk->signal->curr_ip = current->signal->curr_ip; ++ if (current->exec_file) ++ get_file(current->exec_file); ++ tsk->exec_file = current->exec_file; ++ tsk->is_writable = current->is_writable; ++ if (unlikely(current->signal->used_accept)) ++ current->signal->curr_ip = 0; ++ ++ return; ++} ++ ++static void ++gr_set_proc_res(struct task_struct *task) ++{ ++ struct acl_subject_label *proc; ++ unsigned short i; ++ ++ proc = task->acl; ++ ++ if (proc->mode & (GR_LEARN | GR_INHERITLEARN)) ++ return; ++ ++ for (i = 0; i < (GR_NLIMITS - 1); i++) { ++ if (!(proc->resmask & (1 << i))) ++ continue; ++ ++ task->signal->rlim[i].rlim_cur = proc->res[i].rlim_cur; ++ task->signal->rlim[i].rlim_max = proc->res[i].rlim_max; ++ } ++ ++ return; ++} ++ ++int ++gr_check_user_change(int real, int effective, int fs) ++{ ++ unsigned int i; ++ __u16 num; ++ uid_t *uidlist; ++ int curuid; ++ int realok = 0; ++ int effectiveok = 0; ++ int fsok = 0; ++ ++ if (unlikely(!(gr_status & GR_READY))) ++ return 0; ++ ++ if (current->acl->mode & (GR_LEARN | GR_INHERITLEARN)) ++ gr_log_learn_id_change(current, 'u', real, effective, fs); ++ ++ num = current->acl->user_trans_num; ++ uidlist = current->acl->user_transitions; ++ ++ if (uidlist == NULL) ++ return 0; ++ ++ if (real == -1) ++ realok = 1; ++ if (effective == -1) ++ effectiveok = 1; ++ if (fs == -1) ++ fsok = 1; ++ ++ if (current->acl->user_trans_type & GR_ID_ALLOW) { ++ for (i = 0; i < num; i++) { ++ curuid = (int)uidlist[i]; ++ if (real == curuid) ++ realok = 1; ++ if (effective == curuid) ++ effectiveok = 1; ++ if (fs == curuid) ++ fsok = 1; ++ } ++ } else if (current->acl->user_trans_type & GR_ID_DENY) { ++ for (i = 0; i < num; i++) { ++ curuid = (int)uidlist[i]; ++ if (real == curuid) ++ break; ++ if (effective == curuid) ++ break; ++ if (fs == curuid) ++ break; ++ } ++ /* not in deny list */ ++ if (i == num) { ++ realok = 1; ++ effectiveok = 1; ++ fsok = 1; ++ } ++ } ++ ++ if (realok && effectiveok && fsok) ++ return 0; ++ else { ++ gr_log_int(GR_DONT_AUDIT, GR_USRCHANGE_ACL_MSG, realok ? (effectiveok ? (fsok ? 0 : fs) : effective) : real); ++ return 1; ++ } ++} ++ ++int ++gr_check_group_change(int real, int effective, int fs) ++{ ++ unsigned int i; ++ __u16 num; ++ gid_t *gidlist; ++ int curgid; ++ int realok = 0; ++ int effectiveok = 0; ++ int fsok = 0; ++ ++ if (unlikely(!(gr_status & GR_READY))) ++ return 0; ++ ++ if (current->acl->mode & (GR_LEARN | GR_INHERITLEARN)) ++ gr_log_learn_id_change(current, 'g', real, effective, fs); ++ ++ num = current->acl->group_trans_num; ++ gidlist = current->acl->group_transitions; ++ ++ if (gidlist == NULL) ++ return 0; ++ ++ if (real == -1) ++ realok = 1; ++ if (effective == -1) ++ effectiveok = 1; ++ if (fs == -1) ++ fsok = 1; ++ ++ if (current->acl->group_trans_type & GR_ID_ALLOW) { ++ for (i = 0; i < num; i++) { ++ curgid = (int)gidlist[i]; ++ if (real == curgid) ++ realok = 1; ++ if (effective == curgid) ++ effectiveok = 1; ++ if (fs == curgid) ++ fsok = 1; ++ } ++ } else if (current->acl->group_trans_type & GR_ID_DENY) { ++ for (i = 0; i < num; i++) { ++ curgid = (int)gidlist[i]; ++ if (real == curgid) ++ break; ++ if (effective == curgid) ++ break; ++ if (fs == curgid) ++ break; ++ } ++ /* not in deny list */ ++ if (i == num) { ++ realok = 1; ++ effectiveok = 1; ++ fsok = 1; ++ } ++ } ++ ++ if (realok && effectiveok && fsok) ++ return 0; ++ else { ++ gr_log_int(GR_DONT_AUDIT, GR_GRPCHANGE_ACL_MSG, realok ? (effectiveok ? (fsok ? 0 : fs) : effective) : real); ++ return 1; ++ } ++} ++ ++void ++gr_set_role_label(struct task_struct *task, const uid_t uid, const uid_t gid) ++{ ++ struct acl_role_label *role = task->role; ++ struct acl_subject_label *subj = NULL; ++ struct acl_object_label *obj; ++ struct file *filp; ++ ++ if (unlikely(!(gr_status & GR_READY))) ++ return; ++ ++ filp = task->exec_file; ++ ++ /* kernel process, we'll give them the kernel role */ ++ if (unlikely(!filp)) { ++ task->role = kernel_role; ++ task->acl = kernel_role->root_label; ++ return; ++ } else if (!task->role || !(task->role->roletype & GR_ROLE_SPECIAL)) ++ role = lookup_acl_role_label(task, uid, gid); ++ ++ /* perform subject lookup in possibly new role ++ we can use this result below in the case where role == task->role ++ */ ++ subj = chk_subj_label(filp->f_path.dentry, filp->f_path.mnt, role); ++ ++ /* if we changed uid/gid, but result in the same role ++ and are using inheritance, don't lose the inherited subject ++ if current subject is other than what normal lookup ++ would result in, we arrived via inheritance, don't ++ lose subject ++ */ ++ if (role != task->role || (!(task->acl->mode & GR_INHERITLEARN) && ++ (subj == task->acl))) ++ task->acl = subj; ++ ++ task->role = role; ++ ++ task->is_writable = 0; ++ ++ /* ignore additional mmap checks for processes that are writable ++ by the default ACL */ ++ obj = chk_obj_label(filp->f_path.dentry, filp->f_path.mnt, default_role->root_label); ++ if (unlikely(obj->mode & GR_WRITE)) ++ task->is_writable = 1; ++ obj = chk_obj_label(filp->f_path.dentry, filp->f_path.mnt, task->role->root_label); ++ if (unlikely(obj->mode & GR_WRITE)) ++ task->is_writable = 1; ++ ++#ifdef CONFIG_GRKERNSEC_ACL_DEBUG ++ printk(KERN_ALERT "Set role label for (%s:%d): role:%s, subject:%s\n", task->comm, task->pid, task->role->rolename, task->acl->filename); ++#endif ++ ++ gr_set_proc_res(task); ++ ++ return; ++} ++ ++int ++gr_set_proc_label(const struct dentry *dentry, const struct vfsmount *mnt) ++{ ++ struct task_struct *task = current; ++ struct acl_subject_label *newacl; ++ struct acl_object_label *obj; ++ __u32 retmode; ++ ++ if (unlikely(!(gr_status & GR_READY))) ++ return 0; ++ ++ newacl = chk_subj_label(dentry, mnt, task->role); ++ ++ task_lock(task); ++ if (((task->ptrace & PT_PTRACED) && !(task->acl->mode & ++ GR_POVERRIDE) && (task->acl != newacl) && ++ !(task->role->roletype & GR_ROLE_GOD) && ++ !gr_search_file(dentry, GR_PTRACERD, mnt) && ++ !(task->acl->mode & (GR_LEARN | GR_INHERITLEARN))) || ++ (atomic_read(&task->fs->count) > 1 || ++ atomic_read(&task->files->count) > 1 || ++ atomic_read(&task->sighand->count) > 1)) { ++ task_unlock(task); ++ gr_log_fs_generic(GR_DONT_AUDIT, GR_PTRACE_EXEC_ACL_MSG, dentry, mnt); ++ return -EACCES; ++ } ++ task_unlock(task); ++ ++ obj = chk_obj_label(dentry, mnt, task->acl); ++ retmode = obj->mode & (GR_INHERIT | GR_AUDIT_INHERIT); ++ ++ if (!(task->acl->mode & GR_INHERITLEARN) && ++ ((newacl->mode & GR_LEARN) || !(retmode & GR_INHERIT))) { ++ if (obj->nested) ++ task->acl = obj->nested; ++ else ++ task->acl = newacl; ++ } else if (retmode & GR_INHERIT && retmode & GR_AUDIT_INHERIT) ++ gr_log_str_fs(GR_DO_AUDIT, GR_INHERIT_ACL_MSG, task->acl->filename, dentry, mnt); ++ ++ task->is_writable = 0; ++ ++ /* ignore additional mmap checks for processes that are writable ++ by the default ACL */ ++ obj = chk_obj_label(dentry, mnt, default_role->root_label); ++ if (unlikely(obj->mode & GR_WRITE)) ++ task->is_writable = 1; ++ obj = chk_obj_label(dentry, mnt, task->role->root_label); ++ if (unlikely(obj->mode & GR_WRITE)) ++ task->is_writable = 1; ++ ++ gr_set_proc_res(task); ++ ++#ifdef CONFIG_GRKERNSEC_ACL_DEBUG ++ printk(KERN_ALERT "Set subject label for (%s:%d): role:%s, subject:%s\n", task->comm, task->pid, task->role->rolename, task->acl->filename); ++#endif ++ return 0; ++} ++ ++/* always called with valid inodev ptr */ ++static void ++do_handle_delete(struct inodev_entry *inodev, const ino_t ino, const dev_t dev) ++{ ++ struct acl_object_label *matchpo; ++ struct acl_subject_label *matchps; ++ struct acl_subject_label *subj; ++ struct acl_role_label *role; ++ unsigned int i, x; ++ ++ FOR_EACH_ROLE_START(role, i) ++ FOR_EACH_SUBJECT_START(role, subj, x) ++ if ((matchpo = lookup_acl_obj_label(ino, dev, subj)) != NULL) ++ matchpo->mode |= GR_DELETED; ++ FOR_EACH_SUBJECT_END(subj,x) ++ FOR_EACH_NESTED_SUBJECT_START(role, subj) ++ if (subj->inode == ino && subj->device == dev) ++ subj->mode |= GR_DELETED; ++ FOR_EACH_NESTED_SUBJECT_END(subj) ++ if ((matchps = lookup_acl_subj_label(ino, dev, role)) != NULL) ++ matchps->mode |= GR_DELETED; ++ FOR_EACH_ROLE_END(role,i) ++ ++ inodev->nentry->deleted = 1; ++ ++ return; ++} ++ ++void ++gr_handle_delete(const ino_t ino, const dev_t dev) ++{ ++ struct inodev_entry *inodev; ++ ++ if (unlikely(!(gr_status & GR_READY))) ++ return; ++ ++ write_lock(&gr_inode_lock); ++ inodev = lookup_inodev_entry(ino, dev); ++ if (inodev != NULL) ++ do_handle_delete(inodev, ino, dev); ++ write_unlock(&gr_inode_lock); ++ ++ return; ++} ++ ++static void ++update_acl_obj_label(const ino_t oldinode, const dev_t olddevice, ++ const ino_t newinode, const dev_t newdevice, ++ struct acl_subject_label *subj) ++{ ++ unsigned int index = fhash(oldinode, olddevice, subj->obj_hash_size); ++ struct acl_object_label *match; ++ ++ match = subj->obj_hash[index]; ++ ++ while (match && (match->inode != oldinode || ++ match->device != olddevice || ++ !(match->mode & GR_DELETED))) ++ match = match->next; ++ ++ if (match && (match->inode == oldinode) ++ && (match->device == olddevice) ++ && (match->mode & GR_DELETED)) { ++ if (match->prev == NULL) { ++ subj->obj_hash[index] = match->next; ++ if (match->next != NULL) ++ match->next->prev = NULL; ++ } else { ++ match->prev->next = match->next; ++ if (match->next != NULL) ++ match->next->prev = match->prev; ++ } ++ match->prev = NULL; ++ match->next = NULL; ++ match->inode = newinode; ++ match->device = newdevice; ++ match->mode &= ~GR_DELETED; ++ ++ insert_acl_obj_label(match, subj); ++ } ++ ++ return; ++} ++ ++static void ++update_acl_subj_label(const ino_t oldinode, const dev_t olddevice, ++ const ino_t newinode, const dev_t newdevice, ++ struct acl_role_label *role) ++{ ++ unsigned int index = fhash(oldinode, olddevice, role->subj_hash_size); ++ struct acl_subject_label *match; ++ ++ match = role->subj_hash[index]; ++ ++ while (match && (match->inode != oldinode || ++ match->device != olddevice || ++ !(match->mode & GR_DELETED))) ++ match = match->next; ++ ++ if (match && (match->inode == oldinode) ++ && (match->device == olddevice) ++ && (match->mode & GR_DELETED)) { ++ if (match->prev == NULL) { ++ role->subj_hash[index] = match->next; ++ if (match->next != NULL) ++ match->next->prev = NULL; ++ } else { ++ match->prev->next = match->next; ++ if (match->next != NULL) ++ match->next->prev = match->prev; ++ } ++ match->prev = NULL; ++ match->next = NULL; ++ match->inode = newinode; ++ match->device = newdevice; ++ match->mode &= ~GR_DELETED; ++ ++ insert_acl_subj_label(match, role); ++ } ++ ++ return; ++} ++ ++static void ++update_inodev_entry(const ino_t oldinode, const dev_t olddevice, ++ const ino_t newinode, const dev_t newdevice) ++{ ++ unsigned int index = fhash(oldinode, olddevice, inodev_set.i_size); ++ struct inodev_entry *match; ++ ++ match = inodev_set.i_hash[index]; ++ ++ while (match && (match->nentry->inode != oldinode || ++ match->nentry->device != olddevice || !match->nentry->deleted)) ++ match = match->next; ++ ++ if (match && (match->nentry->inode == oldinode) ++ && (match->nentry->device == olddevice) && ++ match->nentry->deleted) { ++ if (match->prev == NULL) { ++ inodev_set.i_hash[index] = match->next; ++ if (match->next != NULL) ++ match->next->prev = NULL; ++ } else { ++ match->prev->next = match->next; ++ if (match->next != NULL) ++ match->next->prev = match->prev; ++ } ++ match->prev = NULL; ++ match->next = NULL; ++ match->nentry->inode = newinode; ++ match->nentry->device = newdevice; ++ match->nentry->deleted = 0; ++ ++ insert_inodev_entry(match); ++ } ++ ++ return; ++} ++ ++static void ++do_handle_create(const struct name_entry *matchn, const struct dentry *dentry, ++ const struct vfsmount *mnt) ++{ ++ struct acl_subject_label *subj; ++ struct acl_role_label *role; ++ unsigned int i, x; ++ ++ FOR_EACH_ROLE_START(role, i) ++ update_acl_subj_label(matchn->inode, matchn->device, ++ dentry->d_inode->i_ino, ++ dentry->d_inode->i_sb->s_dev, role); ++ ++ FOR_EACH_NESTED_SUBJECT_START(role, subj) ++ if ((subj->inode == dentry->d_inode->i_ino) && ++ (subj->device == dentry->d_inode->i_sb->s_dev)) { ++ subj->inode = dentry->d_inode->i_ino; ++ subj->device = dentry->d_inode->i_sb->s_dev; ++ } ++ FOR_EACH_NESTED_SUBJECT_END(subj) ++ FOR_EACH_SUBJECT_START(role, subj, x) ++ update_acl_obj_label(matchn->inode, matchn->device, ++ dentry->d_inode->i_ino, ++ dentry->d_inode->i_sb->s_dev, subj); ++ FOR_EACH_SUBJECT_END(subj,x) ++ FOR_EACH_ROLE_END(role,i) ++ ++ update_inodev_entry(matchn->inode, matchn->device, ++ dentry->d_inode->i_ino, dentry->d_inode->i_sb->s_dev); ++ ++ return; ++} ++ ++void ++gr_handle_create(const struct dentry *dentry, const struct vfsmount *mnt) ++{ ++ struct name_entry *matchn; ++ ++ if (unlikely(!(gr_status & GR_READY))) ++ return; ++ ++ preempt_disable(); ++ matchn = lookup_name_entry(gr_to_filename_rbac(dentry, mnt)); ++ ++ if (unlikely((unsigned long)matchn)) { ++ write_lock(&gr_inode_lock); ++ do_handle_create(matchn, dentry, mnt); ++ write_unlock(&gr_inode_lock); ++ } ++ preempt_enable(); ++ ++ return; ++} ++ ++void ++gr_handle_rename(struct inode *old_dir, struct inode *new_dir, ++ struct dentry *old_dentry, ++ struct dentry *new_dentry, ++ struct vfsmount *mnt, const __u8 replace) ++{ ++ struct name_entry *matchn; ++ struct inodev_entry *inodev; ++ ++ /* vfs_rename swaps the name and parent link for old_dentry and ++ new_dentry ++ at this point, old_dentry has the new name, parent link, and inode ++ for the renamed file ++ if a file is being replaced by a rename, new_dentry has the inode ++ and name for the replaced file ++ */ ++ ++ if (unlikely(!(gr_status & GR_READY))) ++ return; ++ ++ preempt_disable(); ++ matchn = lookup_name_entry(gr_to_filename_rbac(old_dentry, mnt)); ++ ++ /* we wouldn't have to check d_inode if it weren't for ++ NFS silly-renaming ++ */ ++ ++ write_lock(&gr_inode_lock); ++ if (unlikely(replace && new_dentry->d_inode)) { ++ inodev = lookup_inodev_entry(new_dentry->d_inode->i_ino, ++ new_dentry->d_inode->i_sb->s_dev); ++ if (inodev != NULL && (new_dentry->d_inode->i_nlink <= 1)) ++ do_handle_delete(inodev, new_dentry->d_inode->i_ino, ++ new_dentry->d_inode->i_sb->s_dev); ++ } ++ ++ inodev = lookup_inodev_entry(old_dentry->d_inode->i_ino, ++ old_dentry->d_inode->i_sb->s_dev); ++ if (inodev != NULL && (old_dentry->d_inode->i_nlink <= 1)) ++ do_handle_delete(inodev, old_dentry->d_inode->i_ino, ++ old_dentry->d_inode->i_sb->s_dev); ++ ++ if (unlikely((unsigned long)matchn)) ++ do_handle_create(matchn, old_dentry, mnt); ++ ++ write_unlock(&gr_inode_lock); ++ preempt_enable(); ++ ++ return; ++} ++ ++static int ++lookup_special_role_auth(__u16 mode, const char *rolename, unsigned char **salt, ++ unsigned char **sum) ++{ ++ struct acl_role_label *r; ++ struct role_allowed_ip *ipp; ++ struct role_transition *trans; ++ unsigned int i; ++ int found = 0; ++ ++ /* check transition table */ ++ ++ for (trans = current->role->transitions; trans; trans = trans->next) { ++ if (!strcmp(rolename, trans->rolename)) { ++ found = 1; ++ break; ++ } ++ } ++ ++ if (!found) ++ return 0; ++ ++ /* handle special roles that do not require authentication ++ and check ip */ ++ ++ FOR_EACH_ROLE_START(r, i) ++ if (!strcmp(rolename, r->rolename) && ++ (r->roletype & GR_ROLE_SPECIAL)) { ++ found = 0; ++ if (r->allowed_ips != NULL) { ++ for (ipp = r->allowed_ips; ipp; ipp = ipp->next) { ++ if ((ntohl(current->signal->curr_ip) & ipp->netmask) == ++ (ntohl(ipp->addr) & ipp->netmask)) ++ found = 1; ++ } ++ } else ++ found = 2; ++ if (!found) ++ return 0; ++ ++ if (((mode == GR_SPROLE) && (r->roletype & GR_ROLE_NOPW)) || ++ ((mode == GR_SPROLEPAM) && (r->roletype & GR_ROLE_PAM))) { ++ *salt = NULL; ++ *sum = NULL; ++ return 1; ++ } ++ } ++ FOR_EACH_ROLE_END(r,i) ++ ++ for (i = 0; i < num_sprole_pws; i++) { ++ if (!strcmp(rolename, acl_special_roles[i]->rolename)) { ++ *salt = acl_special_roles[i]->salt; ++ *sum = acl_special_roles[i]->sum; ++ return 1; ++ } ++ } ++ ++ return 0; ++} ++ ++static void ++assign_special_role(char *rolename) ++{ ++ struct acl_object_label *obj; ++ struct acl_role_label *r; ++ struct acl_role_label *assigned = NULL; ++ struct task_struct *tsk; ++ struct file *filp; ++ unsigned int i; ++ ++ FOR_EACH_ROLE_START(r, i) ++ if (!strcmp(rolename, r->rolename) && ++ (r->roletype & GR_ROLE_SPECIAL)) ++ assigned = r; ++ FOR_EACH_ROLE_END(r,i) ++ ++ if (!assigned) ++ return; ++ ++ read_lock(&tasklist_lock); ++ read_lock(&grsec_exec_file_lock); ++ ++ tsk = current->parent; ++ if (tsk == NULL) ++ goto out_unlock; ++ ++ filp = tsk->exec_file; ++ if (filp == NULL) ++ goto out_unlock; ++ ++ tsk->is_writable = 0; ++ ++ tsk->acl_sp_role = 1; ++ tsk->acl_role_id = ++acl_sp_role_value; ++ tsk->role = assigned; ++ tsk->acl = chk_subj_label(filp->f_path.dentry, filp->f_path.mnt, tsk->role); ++ ++ /* ignore additional mmap checks for processes that are writable ++ by the default ACL */ ++ obj = chk_obj_label(filp->f_path.dentry, filp->f_path.mnt, default_role->root_label); ++ if (unlikely(obj->mode & GR_WRITE)) ++ tsk->is_writable = 1; ++ obj = chk_obj_label(filp->f_path.dentry, filp->f_path.mnt, tsk->role->root_label); ++ if (unlikely(obj->mode & GR_WRITE)) ++ tsk->is_writable = 1; ++ ++#ifdef CONFIG_GRKERNSEC_ACL_DEBUG ++ printk(KERN_ALERT "Assigning special role:%s subject:%s to process (%s:%d)\n", tsk->role->rolename, tsk->acl->filename, tsk->comm, tsk->pid); ++#endif ++ ++out_unlock: ++ read_unlock(&grsec_exec_file_lock); ++ read_unlock(&tasklist_lock); ++ return; ++} ++ ++int gr_check_secure_terminal(struct task_struct *task) ++{ ++ struct task_struct *p, *p2, *p3; ++ struct files_struct *files; ++ struct fdtable *fdt; ++ struct file *our_file = NULL, *file; ++ int i; ++ ++ if (task->signal->tty == NULL) ++ return 1; ++ ++ files = get_files_struct(task); ++ if (files != NULL) { ++ rcu_read_lock(); ++ fdt = files_fdtable(files); ++ for (i=0; i < fdt->max_fds; i++) { ++ file = fcheck_files(files, i); ++ if (file && (our_file == NULL) && (file->private_data == task->signal->tty)) { ++ get_file(file); ++ our_file = file; ++ } ++ } ++ rcu_read_unlock(); ++ put_files_struct(files); ++ } ++ ++ if (our_file == NULL) ++ return 1; ++ ++ read_lock(&tasklist_lock); ++ do_each_thread(p2, p) { ++ files = get_files_struct(p); ++ if (files == NULL || ++ (p->signal && p->signal->tty == task->signal->tty)) { ++ if (files != NULL) ++ put_files_struct(files); ++ continue; ++ } ++ rcu_read_lock(); ++ fdt = files_fdtable(files); ++ for (i=0; i < fdt->max_fds; i++) { ++ file = fcheck_files(files, i); ++ if (file && S_ISCHR(file->f_path.dentry->d_inode->i_mode) && ++ file->f_path.dentry->d_inode->i_rdev == our_file->f_path.dentry->d_inode->i_rdev) { ++ p3 = task; ++ while (p3->pid > 0) { ++ if (p3 == p) ++ break; ++ p3 = p3->parent; ++ } ++ if (p3 == p) ++ break; ++ gr_log_ttysniff(GR_DONT_AUDIT_GOOD, GR_TTYSNIFF_ACL_MSG, p); ++ gr_handle_alertkill(p); ++ rcu_read_unlock(); ++ put_files_struct(files); ++ read_unlock(&tasklist_lock); ++ fput(our_file); ++ return 0; ++ } ++ } ++ rcu_read_unlock(); ++ put_files_struct(files); ++ } while_each_thread(p2, p); ++ read_unlock(&tasklist_lock); ++ ++ fput(our_file); ++ return 1; ++} ++ ++ssize_t ++write_grsec_handler(struct file *file, const char * buf, size_t count, loff_t *ppos) ++{ ++ struct gr_arg_wrapper uwrap; ++ unsigned char *sprole_salt; ++ unsigned char *sprole_sum; ++ int error = sizeof (struct gr_arg_wrapper); ++ int error2 = 0; ++ ++ down(&gr_dev_sem); ++ ++ if ((gr_status & GR_READY) && !(current->acl->mode & GR_KERNELAUTH)) { ++ error = -EPERM; ++ goto out; ++ } ++ ++ if (count != sizeof (struct gr_arg_wrapper)) { ++ gr_log_int_int(GR_DONT_AUDIT_GOOD, GR_DEV_ACL_MSG, (int)count, (int)sizeof(struct gr_arg_wrapper)); ++ error = -EINVAL; ++ goto out; ++ } ++ ++ ++ if (gr_auth_expires && time_after_eq(get_seconds(), gr_auth_expires)) { ++ gr_auth_expires = 0; ++ gr_auth_attempts = 0; ++ } ++ ++ if (copy_from_user(&uwrap, buf, sizeof (struct gr_arg_wrapper))) { ++ error = -EFAULT; ++ goto out; ++ } ++ ++ if ((uwrap.version != GRSECURITY_VERSION) || (uwrap.size != sizeof(struct gr_arg))) { ++ error = -EINVAL; ++ goto out; ++ } ++ ++ if (copy_from_user(gr_usermode, uwrap.arg, sizeof (struct gr_arg))) { ++ error = -EFAULT; ++ goto out; ++ } ++ ++ if (gr_usermode->mode != GR_SPROLE && gr_usermode->mode != GR_SPROLEPAM && ++ gr_auth_attempts >= CONFIG_GRKERNSEC_ACL_MAXTRIES && ++ time_after(gr_auth_expires, get_seconds())) { ++ error = -EBUSY; ++ goto out; ++ } ++ ++ /* if non-root trying to do anything other than use a special role, ++ do not attempt authentication, do not count towards authentication ++ locking ++ */ ++ ++ if (gr_usermode->mode != GR_SPROLE && gr_usermode->mode != GR_STATUS && ++ gr_usermode->mode != GR_UNSPROLE && gr_usermode->mode != GR_SPROLEPAM && ++ current->uid) { ++ error = -EPERM; ++ goto out; ++ } ++ ++ /* ensure pw and special role name are null terminated */ ++ ++ gr_usermode->pw[GR_PW_LEN - 1] = '\0'; ++ gr_usermode->sp_role[GR_SPROLE_LEN - 1] = '\0'; ++ ++ /* Okay. ++ * We have our enough of the argument structure..(we have yet ++ * to copy_from_user the tables themselves) . Copy the tables ++ * only if we need them, i.e. for loading operations. */ ++ ++ switch (gr_usermode->mode) { ++ case GR_STATUS: ++ if (gr_status & GR_READY) { ++ error = 1; ++ if (!gr_check_secure_terminal(current)) ++ error = 3; ++ } else ++ error = 2; ++ goto out; ++ case GR_SHUTDOWN: ++ if ((gr_status & GR_READY) ++ && !(chkpw(gr_usermode, gr_system_salt, gr_system_sum))) { ++ gr_status &= ~GR_READY; ++ gr_log_noargs(GR_DONT_AUDIT_GOOD, GR_SHUTS_ACL_MSG); ++ free_variables(); ++ memset(gr_usermode, 0, sizeof (struct gr_arg)); ++ memset(gr_system_salt, 0, GR_SALT_LEN); ++ memset(gr_system_sum, 0, GR_SHA_LEN); ++ } else if (gr_status & GR_READY) { ++ gr_log_noargs(GR_DONT_AUDIT, GR_SHUTF_ACL_MSG); ++ error = -EPERM; ++ } else { ++ gr_log_noargs(GR_DONT_AUDIT_GOOD, GR_SHUTI_ACL_MSG); ++ error = -EAGAIN; ++ } ++ break; ++ case GR_ENABLE: ++ if (!(gr_status & GR_READY) && !(error2 = gracl_init(gr_usermode))) ++ gr_log_str(GR_DONT_AUDIT_GOOD, GR_ENABLE_ACL_MSG, GR_VERSION); ++ else { ++ if (gr_status & GR_READY) ++ error = -EAGAIN; ++ else ++ error = error2; ++ gr_log_str(GR_DONT_AUDIT, GR_ENABLEF_ACL_MSG, GR_VERSION); ++ } ++ break; ++ case GR_RELOAD: ++ if (!(gr_status & GR_READY)) { ++ gr_log_str(GR_DONT_AUDIT_GOOD, GR_RELOADI_ACL_MSG, GR_VERSION); ++ error = -EAGAIN; ++ } else if (!(chkpw(gr_usermode, gr_system_salt, gr_system_sum))) { ++ lock_kernel(); ++ gr_status &= ~GR_READY; ++ free_variables(); ++ if (!(error2 = gracl_init(gr_usermode))) { ++ unlock_kernel(); ++ gr_log_str(GR_DONT_AUDIT_GOOD, GR_RELOAD_ACL_MSG, GR_VERSION); ++ } else { ++ unlock_kernel(); ++ error = error2; ++ gr_log_str(GR_DONT_AUDIT, GR_RELOADF_ACL_MSG, GR_VERSION); ++ } ++ } else { ++ gr_log_str(GR_DONT_AUDIT, GR_RELOADF_ACL_MSG, GR_VERSION); ++ error = -EPERM; ++ } ++ break; ++ case GR_SEGVMOD: ++ if (unlikely(!(gr_status & GR_READY))) { ++ gr_log_noargs(GR_DONT_AUDIT_GOOD, GR_SEGVMODI_ACL_MSG); ++ error = -EAGAIN; ++ break; ++ } ++ ++ if (!(chkpw(gr_usermode, gr_system_salt, gr_system_sum))) { ++ gr_log_noargs(GR_DONT_AUDIT_GOOD, GR_SEGVMODS_ACL_MSG); ++ if (gr_usermode->segv_device && gr_usermode->segv_inode) { ++ struct acl_subject_label *segvacl; ++ segvacl = ++ lookup_acl_subj_label(gr_usermode->segv_inode, ++ gr_usermode->segv_device, ++ current->role); ++ if (segvacl) { ++ segvacl->crashes = 0; ++ segvacl->expires = 0; ++ } ++ } else if (gr_find_uid(gr_usermode->segv_uid) >= 0) { ++ gr_remove_uid(gr_usermode->segv_uid); ++ } ++ } else { ++ gr_log_noargs(GR_DONT_AUDIT, GR_SEGVMODF_ACL_MSG); ++ error = -EPERM; ++ } ++ break; ++ case GR_SPROLE: ++ case GR_SPROLEPAM: ++ if (unlikely(!(gr_status & GR_READY))) { ++ gr_log_noargs(GR_DONT_AUDIT_GOOD, GR_SPROLEI_ACL_MSG); ++ error = -EAGAIN; ++ break; ++ } ++ ++ if (current->role->expires && time_after_eq(get_seconds(), current->role->expires)) { ++ current->role->expires = 0; ++ current->role->auth_attempts = 0; ++ } ++ ++ if (current->role->auth_attempts >= CONFIG_GRKERNSEC_ACL_MAXTRIES && ++ time_after(current->role->expires, get_seconds())) { ++ error = -EBUSY; ++ goto out; ++ } ++ ++ if (lookup_special_role_auth ++ (gr_usermode->mode, gr_usermode->sp_role, &sprole_salt, &sprole_sum) ++ && ((!sprole_salt && !sprole_sum) ++ || !(chkpw(gr_usermode, sprole_salt, sprole_sum)))) { ++ char *p = ""; ++ assign_special_role(gr_usermode->sp_role); ++ read_lock(&tasklist_lock); ++ if (current->parent) ++ p = current->parent->role->rolename; ++ read_unlock(&tasklist_lock); ++ gr_log_str_int(GR_DONT_AUDIT_GOOD, GR_SPROLES_ACL_MSG, ++ p, acl_sp_role_value); ++ } else { ++ gr_log_str(GR_DONT_AUDIT, GR_SPROLEF_ACL_MSG, gr_usermode->sp_role); ++ error = -EPERM; ++ if(!(current->role->auth_attempts++)) ++ current->role->expires = get_seconds() + CONFIG_GRKERNSEC_ACL_TIMEOUT; ++ ++ goto out; ++ } ++ break; ++ case GR_UNSPROLE: ++ if (unlikely(!(gr_status & GR_READY))) { ++ gr_log_noargs(GR_DONT_AUDIT_GOOD, GR_UNSPROLEI_ACL_MSG); ++ error = -EAGAIN; ++ break; ++ } ++ ++ if (current->role->roletype & GR_ROLE_SPECIAL) { ++ char *p = ""; ++ int i = 0; ++ ++ read_lock(&tasklist_lock); ++ if (current->parent) { ++ p = current->parent->role->rolename; ++ i = current->parent->acl_role_id; ++ } ++ read_unlock(&tasklist_lock); ++ ++ gr_log_str_int(GR_DONT_AUDIT_GOOD, GR_UNSPROLES_ACL_MSG, p, i); ++ gr_set_acls(1); ++ } else { ++ gr_log_str(GR_DONT_AUDIT, GR_UNSPROLEF_ACL_MSG, current->role->rolename); ++ error = -EPERM; ++ goto out; ++ } ++ break; ++ default: ++ gr_log_int(GR_DONT_AUDIT, GR_INVMODE_ACL_MSG, gr_usermode->mode); ++ error = -EINVAL; ++ break; ++ } ++ ++ if (error != -EPERM) ++ goto out; ++ ++ if(!(gr_auth_attempts++)) ++ gr_auth_expires = get_seconds() + CONFIG_GRKERNSEC_ACL_TIMEOUT; ++ ++ out: ++ up(&gr_dev_sem); ++ return error; ++} ++ ++int ++gr_set_acls(const int type) ++{ ++ struct acl_object_label *obj; ++ struct task_struct *task, *task2; ++ struct file *filp; ++ struct acl_role_label *role = current->role; ++ __u16 acl_role_id = current->acl_role_id; ++ ++ read_lock(&tasklist_lock); ++ read_lock(&grsec_exec_file_lock); ++ do_each_thread(task2, task) { ++ /* check to see if we're called from the exit handler, ++ if so, only replace ACLs that have inherited the admin ++ ACL */ ++ ++ if (type && (task->role != role || ++ task->acl_role_id != acl_role_id)) ++ continue; ++ ++ task->acl_role_id = 0; ++ task->acl_sp_role = 0; ++ ++ if ((filp = task->exec_file)) { ++ task->role = lookup_acl_role_label(task, task->uid, task->gid); ++ ++ task->acl = ++ chk_subj_label(filp->f_path.dentry, filp->f_path.mnt, ++ task->role); ++ if (task->acl) { ++ struct acl_subject_label *curr; ++ curr = task->acl; ++ ++ task->is_writable = 0; ++ /* ignore additional mmap checks for processes that are writable ++ by the default ACL */ ++ obj = chk_obj_label(filp->f_path.dentry, filp->f_path.mnt, default_role->root_label); ++ if (unlikely(obj->mode & GR_WRITE)) ++ task->is_writable = 1; ++ obj = chk_obj_label(filp->f_path.dentry, filp->f_path.mnt, task->role->root_label); ++ if (unlikely(obj->mode & GR_WRITE)) ++ task->is_writable = 1; ++ ++ gr_set_proc_res(task); ++ ++#ifdef CONFIG_GRKERNSEC_ACL_DEBUG ++ printk(KERN_ALERT "gr_set_acls for (%s:%d): role:%s, subject:%s\n", task->comm, task->pid, task->role->rolename, task->acl->filename); ++#endif ++ } else { ++ read_unlock(&grsec_exec_file_lock); ++ read_unlock(&tasklist_lock); ++ gr_log_str_int(GR_DONT_AUDIT_GOOD, GR_DEFACL_MSG, task->comm, task->pid); ++ return 1; ++ } ++ } else { ++ // it's a kernel process ++ task->role = kernel_role; ++ task->acl = kernel_role->root_label; ++#ifdef CONFIG_GRKERNSEC_ACL_HIDEKERN ++ task->acl->mode &= ~GR_PROCFIND; ++#endif ++ } ++ } while_each_thread(task2, task); ++ read_unlock(&grsec_exec_file_lock); ++ read_unlock(&tasklist_lock); ++ return 0; ++} ++ ++void ++gr_learn_resource(const struct task_struct *task, ++ const int res, const unsigned long wanted, const int gt) ++{ ++ struct acl_subject_label *acl; ++ ++ if (unlikely((gr_status & GR_READY) && ++ task->acl && (task->acl->mode & (GR_LEARN | GR_INHERITLEARN)))) ++ goto skip_reslog; ++ ++#ifdef CONFIG_GRKERNSEC_RESLOG ++ gr_log_resource(task, res, wanted, gt); ++#endif ++ skip_reslog: ++ ++ if (unlikely(!(gr_status & GR_READY) || !wanted)) ++ return; ++ ++ acl = task->acl; ++ ++ if (likely(!acl || !(acl->mode & (GR_LEARN | GR_INHERITLEARN)) || ++ !(acl->resmask & (1 << (unsigned short) res)))) ++ return; ++ ++ if (wanted >= acl->res[res].rlim_cur) { ++ unsigned long res_add; ++ ++ res_add = wanted; ++ switch (res) { ++ case RLIMIT_CPU: ++ res_add += GR_RLIM_CPU_BUMP; ++ break; ++ case RLIMIT_FSIZE: ++ res_add += GR_RLIM_FSIZE_BUMP; ++ break; ++ case RLIMIT_DATA: ++ res_add += GR_RLIM_DATA_BUMP; ++ break; ++ case RLIMIT_STACK: ++ res_add += GR_RLIM_STACK_BUMP; ++ break; ++ case RLIMIT_CORE: ++ res_add += GR_RLIM_CORE_BUMP; ++ break; ++ case RLIMIT_RSS: ++ res_add += GR_RLIM_RSS_BUMP; ++ break; ++ case RLIMIT_NPROC: ++ res_add += GR_RLIM_NPROC_BUMP; ++ break; ++ case RLIMIT_NOFILE: ++ res_add += GR_RLIM_NOFILE_BUMP; ++ break; ++ case RLIMIT_MEMLOCK: ++ res_add += GR_RLIM_MEMLOCK_BUMP; ++ break; ++ case RLIMIT_AS: ++ res_add += GR_RLIM_AS_BUMP; ++ break; ++ case RLIMIT_LOCKS: ++ res_add += GR_RLIM_LOCKS_BUMP; ++ break; ++ } ++ ++ acl->res[res].rlim_cur = res_add; ++ ++ if (wanted > acl->res[res].rlim_max) ++ acl->res[res].rlim_max = res_add; ++ ++ security_learn(GR_LEARN_AUDIT_MSG, task->role->rolename, ++ task->role->roletype, acl->filename, ++ acl->res[res].rlim_cur, acl->res[res].rlim_max, ++ "", (unsigned long) res); ++ } ++ ++ return; ++} ++ ++#ifdef CONFIG_PAX_HAVE_ACL_FLAGS ++void ++pax_set_initial_flags(struct linux_binprm *bprm) ++{ ++ struct task_struct *task = current; ++ struct acl_subject_label *proc; ++ unsigned long flags; ++ ++ if (unlikely(!(gr_status & GR_READY))) ++ return; ++ ++ flags = pax_get_flags(task); ++ ++ proc = task->acl; ++ ++ if (proc->pax_flags & GR_PAX_DISABLE_PAGEEXEC) ++ flags &= ~MF_PAX_PAGEEXEC; ++ if (proc->pax_flags & GR_PAX_DISABLE_SEGMEXEC) ++ flags &= ~MF_PAX_SEGMEXEC; ++ if (proc->pax_flags & GR_PAX_DISABLE_RANDMMAP) ++ flags &= ~MF_PAX_RANDMMAP; ++ if (proc->pax_flags & GR_PAX_DISABLE_EMUTRAMP) ++ flags &= ~MF_PAX_EMUTRAMP; ++ if (proc->pax_flags & GR_PAX_DISABLE_MPROTECT) ++ flags &= ~MF_PAX_MPROTECT; ++ ++ if (proc->pax_flags & GR_PAX_ENABLE_PAGEEXEC) ++ flags |= MF_PAX_PAGEEXEC; ++ if (proc->pax_flags & GR_PAX_ENABLE_SEGMEXEC) ++ flags |= MF_PAX_SEGMEXEC; ++ if (proc->pax_flags & GR_PAX_ENABLE_RANDMMAP) ++ flags |= MF_PAX_RANDMMAP; ++ if (proc->pax_flags & GR_PAX_ENABLE_EMUTRAMP) ++ flags |= MF_PAX_EMUTRAMP; ++ if (proc->pax_flags & GR_PAX_ENABLE_MPROTECT) ++ flags |= MF_PAX_MPROTECT; ++ ++ pax_set_flags(task, flags); ++ ++ return; ++} ++#endif ++ ++#ifdef CONFIG_SYSCTL ++/* Eric Biederman likes breaking userland ABI and every inode-based security ++ system to save 35kb of memory */ ++ ++/* we modify the passed in filename, but adjust it back before returning */ ++static struct acl_object_label *gr_lookup_by_name(char *name, unsigned int len) ++{ ++ struct name_entry *nmatch; ++ char *p, *lastp = NULL; ++ struct acl_object_label *obj = NULL, *tmp; ++ struct acl_subject_label *tmpsubj; ++ char c = '\0'; ++ ++ read_lock(&gr_inode_lock); ++ ++ p = name + len - 1; ++ do { ++ nmatch = lookup_name_entry(name); ++ if (lastp != NULL) ++ *lastp = c; ++ ++ if (nmatch == NULL) ++ goto next_component; ++ tmpsubj = current->acl; ++ do { ++ obj = lookup_acl_obj_label(nmatch->inode, nmatch->device, tmpsubj); ++ if (obj != NULL) { ++ tmp = obj->globbed; ++ while (tmp) { ++ if (!glob_match(tmp->filename, name)) { ++ obj = tmp; ++ goto found_obj; ++ } ++ tmp = tmp->next; ++ } ++ goto found_obj; ++ } ++ } while ((tmpsubj = tmpsubj->parent_subject)); ++next_component: ++ /* end case */ ++ if (p == name) ++ break; ++ ++ while (*p != '/') ++ p--; ++ if (p == name) ++ lastp = p + 1; ++ else { ++ lastp = p; ++ p--; ++ } ++ c = *lastp; ++ *lastp = '\0'; ++ } while (1); ++found_obj: ++ read_unlock(&gr_inode_lock); ++ /* obj returned will always be non-null */ ++ return obj; ++} ++ ++/* returns 0 when allowing, non-zero on error ++ op of 0 is used for readdir, so we don't log the names of hidden files ++*/ ++__u32 ++gr_handle_sysctl(const struct ctl_table *table, const int op) ++{ ++ ctl_table *tmp; ++ const char *proc_sys = "/proc/sys"; ++ char *path; ++ struct acl_object_label *obj; ++ unsigned short len = 0, pos = 0, depth = 0, i; ++ __u32 err = 0; ++ __u32 mode = 0; ++ ++ if (unlikely(!(gr_status & GR_READY))) ++ return 0; ++ ++ /* for now, ignore operations on non-sysctl entries if it's not a ++ readdir*/ ++ if (table->child != NULL && op != 0) ++ return 0; ++ ++ mode |= GR_FIND; ++ /* it's only a read if it's an entry, read on dirs is for readdir */ ++ if (op & MAY_READ) ++ mode |= GR_READ; ++ if (op & MAY_WRITE) ++ mode |= GR_WRITE; ++ ++ preempt_disable(); ++ ++ path = per_cpu_ptr(gr_shared_page[0], smp_processor_id()); ++ ++ /* it's only a read/write if it's an actual entry, not a dir ++ (which are opened for readdir) ++ */ ++ ++ /* convert the requested sysctl entry into a pathname */ ++ ++ for (tmp = (ctl_table *)table; tmp != NULL; tmp = tmp->parent) { ++ len += strlen(tmp->procname); ++ len++; ++ depth++; ++ } ++ ++ if ((len + depth + strlen(proc_sys) + 1) > PAGE_SIZE) { ++ /* deny */ ++ goto out; ++ } ++ ++ memset(path, 0, PAGE_SIZE); ++ ++ memcpy(path, proc_sys, strlen(proc_sys)); ++ ++ pos += strlen(proc_sys); ++ ++ for (; depth > 0; depth--) { ++ path[pos] = '/'; ++ pos++; ++ for (i = 1, tmp = (ctl_table *)table; tmp != NULL; tmp = tmp->parent) { ++ if (depth == i) { ++ memcpy(path + pos, tmp->procname, ++ strlen(tmp->procname)); ++ pos += strlen(tmp->procname); ++ } ++ i++; ++ } ++ } ++ ++ obj = gr_lookup_by_name(path, pos); ++ err = obj->mode & (mode | to_gr_audit(mode) | GR_SUPPRESS); ++ ++ if (unlikely((current->acl->mode & (GR_LEARN | GR_INHERITLEARN)) && ++ ((err & mode) != mode))) { ++ __u32 new_mode = mode; ++ ++ new_mode &= ~(GR_AUDITS | GR_SUPPRESS); ++ ++ err = 0; ++ gr_log_learn_sysctl(current, path, new_mode); ++ } else if (!(err & GR_FIND) && !(err & GR_SUPPRESS) && op != 0) { ++ gr_log_hidden_sysctl(GR_DONT_AUDIT, GR_HIDDEN_ACL_MSG, path); ++ err = -ENOENT; ++ } else if (!(err & GR_FIND)) { ++ err = -ENOENT; ++ } else if (((err & mode) & ~GR_FIND) != (mode & ~GR_FIND) && !(err & GR_SUPPRESS)) { ++ gr_log_str4(GR_DONT_AUDIT, GR_SYSCTL_ACL_MSG, "denied", ++ path, (mode & GR_READ) ? " reading" : "", ++ (mode & GR_WRITE) ? " writing" : ""); ++ err = -EACCES; ++ } else if ((err & mode) != mode) { ++ err = -EACCES; ++ } else if ((((err & mode) & ~GR_FIND) == (mode & ~GR_FIND)) && (err & GR_AUDITS)) { ++ gr_log_str4(GR_DO_AUDIT, GR_SYSCTL_ACL_MSG, "successful", ++ path, (mode & GR_READ) ? " reading" : "", ++ (mode & GR_WRITE) ? " writing" : ""); ++ err = 0; ++ } else ++ err = 0; ++ ++ out: ++ preempt_enable(); ++ ++ return err; ++} ++#endif ++ ++int ++gr_handle_proc_ptrace(struct task_struct *task) ++{ ++ struct file *filp; ++ struct task_struct *tmp = task; ++ struct task_struct *curtemp = current; ++ __u32 retmode; ++ ++ if (unlikely(!(gr_status & GR_READY))) ++ return 0; ++ ++ read_lock(&tasklist_lock); ++ read_lock(&grsec_exec_file_lock); ++ filp = task->exec_file; ++ ++ while (tmp->pid > 0) { ++ if (tmp == curtemp) ++ break; ++ tmp = tmp->parent; ++ } ++ ++ if (!filp || (tmp->pid == 0 && !(current->acl->mode & GR_RELAXPTRACE))) { ++ read_unlock(&grsec_exec_file_lock); ++ read_unlock(&tasklist_lock); ++ return 1; ++ } ++ ++ retmode = gr_search_file(filp->f_path.dentry, GR_NOPTRACE, filp->f_path.mnt); ++ read_unlock(&grsec_exec_file_lock); ++ read_unlock(&tasklist_lock); ++ ++ if (retmode & GR_NOPTRACE) ++ return 1; ++ ++ if (!(current->acl->mode & GR_POVERRIDE) && !(current->role->roletype & GR_ROLE_GOD) ++ && (current->acl != task->acl || (current->acl != current->role->root_label ++ && current->pid != task->pid))) ++ return 1; ++ ++ return 0; ++} ++ ++int ++gr_handle_ptrace(struct task_struct *task, const long request) ++{ ++ struct task_struct *tmp = task; ++ struct task_struct *curtemp = current; ++ __u32 retmode; ++ ++ if (unlikely(!(gr_status & GR_READY))) ++ return 0; ++ ++ read_lock(&tasklist_lock); ++ while (tmp->pid > 0) { ++ if (tmp == curtemp) ++ break; ++ tmp = tmp->parent; ++ } ++ ++ if (tmp->pid == 0 && !(current->acl->mode & GR_RELAXPTRACE)) { ++ read_unlock(&tasklist_lock); ++ gr_log_ptrace(GR_DONT_AUDIT, GR_PTRACE_ACL_MSG, task); ++ return 1; ++ } ++ read_unlock(&tasklist_lock); ++ ++ read_lock(&grsec_exec_file_lock); ++ if (unlikely(!task->exec_file)) { ++ read_unlock(&grsec_exec_file_lock); ++ return 0; ++ } ++ ++ retmode = gr_search_file(task->exec_file->f_path.dentry, GR_PTRACERD | GR_NOPTRACE, task->exec_file->f_path.mnt); ++ read_unlock(&grsec_exec_file_lock); ++ ++ if (retmode & GR_NOPTRACE) { ++ gr_log_ptrace(GR_DONT_AUDIT, GR_PTRACE_ACL_MSG, task); ++ return 1; ++ } ++ ++ if (retmode & GR_PTRACERD) { ++ switch (request) { ++ case PTRACE_POKETEXT: ++ case PTRACE_POKEDATA: ++ case PTRACE_POKEUSR: ++#if !defined(CONFIG_PPC32) && !defined(CONFIG_PPC64) && !defined(CONFIG_PARISC) && !defined(CONFIG_ALPHA) && !defined(CONFIG_IA64) ++ case PTRACE_SETREGS: ++ case PTRACE_SETFPREGS: ++#endif ++#ifdef CONFIG_X86 ++ case PTRACE_SETFPXREGS: ++#endif ++#ifdef CONFIG_ALTIVEC ++ case PTRACE_SETVRREGS: ++#endif ++ return 1; ++ default: ++ return 0; ++ } ++ } else if (!(current->acl->mode & GR_POVERRIDE) && ++ !(current->role->roletype & GR_ROLE_GOD) && ++ (current->acl != task->acl)) { ++ gr_log_ptrace(GR_DONT_AUDIT, GR_PTRACE_ACL_MSG, task); ++ return 1; ++ } ++ ++ return 0; ++} ++ ++static int is_writable_mmap(const struct file *filp) ++{ ++ struct task_struct *task = current; ++ struct acl_object_label *obj, *obj2; ++ ++ if (gr_status & GR_READY && !(task->acl->mode & GR_OVERRIDE) && ++ !task->is_writable && S_ISREG(filp->f_path.dentry->d_inode->i_mode)) { ++ obj = chk_obj_label(filp->f_path.dentry, filp->f_path.mnt, default_role->root_label); ++ obj2 = chk_obj_label(filp->f_path.dentry, filp->f_path.mnt, ++ task->role->root_label); ++ if (unlikely((obj->mode & GR_WRITE) || (obj2->mode & GR_WRITE))) { ++ gr_log_fs_generic(GR_DONT_AUDIT, GR_WRITLIB_ACL_MSG, filp->f_path.dentry, filp->f_path.mnt); ++ return 1; ++ } ++ } ++ return 0; ++} ++ ++int ++gr_acl_handle_mmap(const struct file *file, const unsigned long prot) ++{ ++ __u32 mode; ++ ++ if (unlikely(!file || !(prot & PROT_EXEC))) ++ return 1; ++ ++ if (is_writable_mmap(file)) ++ return 0; ++ ++ mode = ++ gr_search_file(file->f_path.dentry, ++ GR_EXEC | GR_AUDIT_EXEC | GR_SUPPRESS, ++ file->f_path.mnt); ++ ++ if (!gr_tpe_allow(file)) ++ return 0; ++ ++ if (unlikely(!(mode & GR_EXEC) && !(mode & GR_SUPPRESS))) { ++ gr_log_fs_rbac_generic(GR_DONT_AUDIT, GR_MMAP_ACL_MSG, file->f_path.dentry, file->f_path.mnt); ++ return 0; ++ } else if (unlikely(!(mode & GR_EXEC))) { ++ return 0; ++ } else if (unlikely(mode & GR_EXEC && mode & GR_AUDIT_EXEC)) { ++ gr_log_fs_rbac_generic(GR_DO_AUDIT, GR_MMAP_ACL_MSG, file->f_path.dentry, file->f_path.mnt); ++ return 1; ++ } ++ ++ return 1; ++} ++ ++int ++gr_acl_handle_mprotect(const struct file *file, const unsigned long prot) ++{ ++ __u32 mode; ++ ++ if (unlikely(!file || !(prot & PROT_EXEC))) ++ return 1; ++ ++ if (is_writable_mmap(file)) ++ return 0; ++ ++ mode = ++ gr_search_file(file->f_path.dentry, ++ GR_EXEC | GR_AUDIT_EXEC | GR_SUPPRESS, ++ file->f_path.mnt); ++ ++ if (!gr_tpe_allow(file)) ++ return 0; ++ ++ if (unlikely(!(mode & GR_EXEC) && !(mode & GR_SUPPRESS))) { ++ gr_log_fs_rbac_generic(GR_DONT_AUDIT, GR_MPROTECT_ACL_MSG, file->f_path.dentry, file->f_path.mnt); ++ return 0; ++ } else if (unlikely(!(mode & GR_EXEC))) { ++ return 0; ++ } else if (unlikely(mode & GR_EXEC && mode & GR_AUDIT_EXEC)) { ++ gr_log_fs_rbac_generic(GR_DO_AUDIT, GR_MPROTECT_ACL_MSG, file->f_path.dentry, file->f_path.mnt); ++ return 1; ++ } ++ ++ return 1; ++} ++ ++void ++gr_acl_handle_psacct(struct task_struct *task, const long code) ++{ ++ unsigned long runtime; ++ unsigned long cputime; ++ unsigned int wday, cday; ++ __u8 whr, chr; ++ __u8 wmin, cmin; ++ __u8 wsec, csec; ++ struct timespec timeval; ++ ++ if (unlikely(!(gr_status & GR_READY) || !task->acl || ++ !(task->acl->mode & GR_PROCACCT))) ++ return; ++ ++ do_posix_clock_monotonic_gettime(&timeval); ++ runtime = timeval.tv_sec - task->start_time.tv_sec; ++ wday = runtime / (3600 * 24); ++ runtime -= wday * (3600 * 24); ++ whr = runtime / 3600; ++ runtime -= whr * 3600; ++ wmin = runtime / 60; ++ runtime -= wmin * 60; ++ wsec = runtime; ++ ++ cputime = (task->utime + task->stime) / HZ; ++ cday = cputime / (3600 * 24); ++ cputime -= cday * (3600 * 24); ++ chr = cputime / 3600; ++ cputime -= chr * 3600; ++ cmin = cputime / 60; ++ cputime -= cmin * 60; ++ csec = cputime; ++ ++ gr_log_procacct(GR_DO_AUDIT, GR_ACL_PROCACCT_MSG, task, wday, whr, wmin, wsec, cday, chr, cmin, csec, code); ++ ++ return; ++} ++ ++void gr_set_kernel_label(struct task_struct *task) ++{ ++ if (gr_status & GR_READY) { ++ task->role = kernel_role; ++ task->acl = kernel_role->root_label; ++ } ++ return; ++} ++ ++int gr_acl_handle_filldir(const struct file *file, const char *name, const unsigned int namelen, const ino_t ino) ++{ ++ struct task_struct *task = current; ++ struct dentry *dentry = file->f_path.dentry; ++ struct vfsmount *mnt = file->f_path.mnt; ++ struct acl_object_label *obj, *tmp; ++ struct acl_subject_label *subj; ++ unsigned int bufsize; ++ int is_not_root; ++ char *path; ++ ++ if (unlikely(!(gr_status & GR_READY))) ++ return 1; ++ ++ if (task->acl->mode & (GR_LEARN | GR_INHERITLEARN)) ++ return 1; ++ ++ /* ignore Eric Biederman */ ++ if (IS_PRIVATE(dentry->d_inode)) ++ return 1; ++ ++ subj = task->acl; ++ do { ++ obj = lookup_acl_obj_label(ino, dentry->d_inode->i_sb->s_dev, subj); ++ if (obj != NULL) ++ return (obj->mode & GR_FIND) ? 1 : 0; ++ } while ((subj = subj->parent_subject)); ++ ++ obj = chk_obj_label(dentry, mnt, task->acl); ++ if (obj->globbed == NULL) ++ return (obj->mode & GR_FIND) ? 1 : 0; ++ ++ is_not_root = ((obj->filename[0] == '/') && ++ (obj->filename[1] == '\0')) ? 0 : 1; ++ bufsize = PAGE_SIZE - namelen - is_not_root; ++ ++ /* check bufsize > PAGE_SIZE || bufsize == 0 */ ++ if (unlikely((bufsize - 1) > (PAGE_SIZE - 1))) ++ return 1; ++ ++ preempt_disable(); ++ path = d_real_path(dentry, mnt, per_cpu_ptr(gr_shared_page[0], smp_processor_id()), ++ bufsize); ++ ++ bufsize = strlen(path); ++ ++ /* if base is "/", don't append an additional slash */ ++ if (is_not_root) ++ *(path + bufsize) = '/'; ++ memcpy(path + bufsize + is_not_root, name, namelen); ++ *(path + bufsize + namelen + is_not_root) = '\0'; ++ ++ tmp = obj->globbed; ++ while (tmp) { ++ if (!glob_match(tmp->filename, path)) { ++ preempt_enable(); ++ return (tmp->mode & GR_FIND) ? 1 : 0; ++ } ++ tmp = tmp->next; ++ } ++ preempt_enable(); ++ return (obj->mode & GR_FIND) ? 1 : 0; ++} ++ ++EXPORT_SYMBOL(gr_learn_resource); ++EXPORT_SYMBOL(gr_set_kernel_label); ++#ifdef CONFIG_SECURITY ++EXPORT_SYMBOL(gr_check_user_change); ++EXPORT_SYMBOL(gr_check_group_change); ++#endif ++ +diff -urNp a/grsecurity/gracl_alloc.c b/grsecurity/gracl_alloc.c +--- a/grsecurity/gracl_alloc.c 1969-12-31 16:00:00.000000000 -0800 ++++ b/grsecurity/gracl_alloc.c 2009-05-24 18:10:25.256085162 -0700 +@@ -0,0 +1,91 @@ ++#include <linux/kernel.h> ++#include <linux/mm.h> ++#include <linux/slab.h> ++#include <linux/vmalloc.h> ++#include <linux/gracl.h> ++#include <linux/grsecurity.h> ++ ++static unsigned long alloc_stack_next = 1; ++static unsigned long alloc_stack_size = 1; ++static void **alloc_stack; ++ ++static __inline__ int ++alloc_pop(void) ++{ ++ if (alloc_stack_next == 1) ++ return 0; ++ ++ kfree(alloc_stack[alloc_stack_next - 2]); ++ ++ alloc_stack_next--; ++ ++ return 1; ++} ++ ++static __inline__ void ++alloc_push(void *buf) ++{ ++ if (alloc_stack_next >= alloc_stack_size) ++ BUG(); ++ ++ alloc_stack[alloc_stack_next - 1] = buf; ++ ++ alloc_stack_next++; ++ ++ return; ++} ++ ++void * ++acl_alloc(unsigned long len) ++{ ++ void *ret; ++ ++ if (len > PAGE_SIZE) ++ BUG(); ++ ++ ret = kmalloc(len, GFP_KERNEL); ++ ++ if (ret) ++ alloc_push(ret); ++ ++ return ret; ++} ++ ++void ++acl_free_all(void) ++{ ++ if (gr_acl_is_enabled() || !alloc_stack) ++ return; ++ ++ while (alloc_pop()) ; ++ ++ if (alloc_stack) { ++ if ((alloc_stack_size * sizeof (void *)) <= PAGE_SIZE) ++ kfree(alloc_stack); ++ else ++ vfree(alloc_stack); ++ } ++ ++ alloc_stack = NULL; ++ alloc_stack_size = 1; ++ alloc_stack_next = 1; ++ ++ return; ++} ++ ++int ++acl_alloc_stack_init(unsigned long size) ++{ ++ if ((size * sizeof (void *)) <= PAGE_SIZE) ++ alloc_stack = ++ (void **) kmalloc(size * sizeof (void *), GFP_KERNEL); ++ else ++ alloc_stack = (void **) vmalloc(size * sizeof (void *)); ++ ++ alloc_stack_size = size; ++ ++ if (!alloc_stack) ++ return 0; ++ else ++ return 1; ++} +diff -urNp a/grsecurity/gracl_cap.c b/grsecurity/gracl_cap.c +--- a/grsecurity/gracl_cap.c 1969-12-31 16:00:00.000000000 -0800 ++++ b/grsecurity/gracl_cap.c 2009-05-24 18:10:25.256085162 -0700 +@@ -0,0 +1,129 @@ ++#include <linux/kernel.h> ++#include <linux/module.h> ++#include <linux/sched.h> ++#include <linux/gracl.h> ++#include <linux/grsecurity.h> ++#include <linux/grinternal.h> ++ ++static const char *captab_log[] = { ++ "CAP_CHOWN", ++ "CAP_DAC_OVERRIDE", ++ "CAP_DAC_READ_SEARCH", ++ "CAP_FOWNER", ++ "CAP_FSETID", ++ "CAP_KILL", ++ "CAP_SETGID", ++ "CAP_SETUID", ++ "CAP_SETPCAP", ++ "CAP_LINUX_IMMUTABLE", ++ "CAP_NET_BIND_SERVICE", ++ "CAP_NET_BROADCAST", ++ "CAP_NET_ADMIN", ++ "CAP_NET_RAW", ++ "CAP_IPC_LOCK", ++ "CAP_IPC_OWNER", ++ "CAP_SYS_MODULE", ++ "CAP_SYS_RAWIO", ++ "CAP_SYS_CHROOT", ++ "CAP_SYS_PTRACE", ++ "CAP_SYS_PACCT", ++ "CAP_SYS_ADMIN", ++ "CAP_SYS_BOOT", ++ "CAP_SYS_NICE", ++ "CAP_SYS_RESOURCE", ++ "CAP_SYS_TIME", ++ "CAP_SYS_TTY_CONFIG", ++ "CAP_MKNOD", ++ "CAP_LEASE", ++ "CAP_AUDIT_WRITE", ++ "CAP_AUDIT_CONTROL", ++ "CAP_SETFCAP", ++ "CAP_MAC_OVERRIDE", ++ "CAP_MAC_ADMIN" ++}; ++ ++EXPORT_SYMBOL(gr_task_is_capable); ++EXPORT_SYMBOL(gr_is_capable_nolog); ++ ++int ++gr_task_is_capable(struct task_struct *task, const int cap) ++{ ++ struct acl_subject_label *curracl; ++ kernel_cap_t cap_drop = __cap_empty_set, cap_mask = __cap_empty_set; ++ ++ if (!gr_acl_is_enabled()) ++ return 1; ++ ++ curracl = task->acl; ++ ++ cap_drop = curracl->cap_lower; ++ cap_mask = curracl->cap_mask; ++ ++ while ((curracl = curracl->parent_subject)) { ++ /* if the cap isn't specified in the current computed mask but is specified in the ++ current level subject, and is lowered in the current level subject, then add ++ it to the set of dropped capabilities ++ otherwise, add the current level subject's mask to the current computed mask ++ */ ++ if (!cap_raised(cap_mask, cap) && cap_raised(curracl->cap_mask, cap)) { ++ cap_raise(cap_mask, cap); ++ if (cap_raised(curracl->cap_lower, cap)) ++ cap_raise(cap_drop, cap); ++ } ++ } ++ ++ if (!cap_raised(cap_drop, cap)) ++ return 1; ++ ++ curracl = task->acl; ++ ++ if ((curracl->mode & (GR_LEARN | GR_INHERITLEARN)) ++ && cap_raised(task->cap_effective, cap)) { ++ security_learn(GR_LEARN_AUDIT_MSG, task->role->rolename, ++ task->role->roletype, task->uid, ++ task->gid, task->exec_file ? ++ gr_to_filename(task->exec_file->f_path.dentry, ++ task->exec_file->f_path.mnt) : curracl->filename, ++ curracl->filename, 0UL, ++ 0UL, "", (unsigned long) cap, NIPQUAD(task->signal->curr_ip)); ++ return 1; ++ } ++ ++ if ((cap >= 0) && (cap < (sizeof(captab_log)/sizeof(captab_log[0]))) && cap_raised(task->cap_effective, cap)) ++ gr_log_cap(GR_DONT_AUDIT, GR_CAP_ACL_MSG, task, captab_log[cap]); ++ return 0; ++} ++ ++int ++gr_is_capable_nolog(const int cap) ++{ ++ struct acl_subject_label *curracl; ++ kernel_cap_t cap_drop = __cap_empty_set, cap_mask = __cap_empty_set; ++ ++ if (!gr_acl_is_enabled()) ++ return 1; ++ ++ curracl = current->acl; ++ ++ cap_drop = curracl->cap_lower; ++ cap_mask = curracl->cap_mask; ++ ++ while ((curracl = curracl->parent_subject)) { ++ /* if the cap isn't specified in the current computed mask but is specified in the ++ current level subject, and is lowered in the current level subject, then add ++ it to the set of dropped capabilities ++ otherwise, add the current level subject's mask to the current computed mask ++ */ ++ if (!cap_raised(cap_mask, cap) && cap_raised(curracl->cap_mask, cap)) { ++ cap_raise(cap_mask, cap); ++ if (cap_raised(curracl->cap_lower, cap)) ++ cap_raise(cap_drop, cap); ++ } ++ } ++ ++ if (!cap_raised(cap_drop, cap)) ++ return 1; ++ ++ return 0; ++} ++ +diff -urNp a/grsecurity/gracl_fs.c b/grsecurity/gracl_fs.c +--- a/grsecurity/gracl_fs.c 1969-12-31 16:00:00.000000000 -0800 ++++ b/grsecurity/gracl_fs.c 2009-05-24 18:10:25.257085221 -0700 +@@ -0,0 +1,423 @@ ++#include <linux/kernel.h> ++#include <linux/sched.h> ++#include <linux/types.h> ++#include <linux/fs.h> ++#include <linux/file.h> ++#include <linux/stat.h> ++#include <linux/grsecurity.h> ++#include <linux/grinternal.h> ++#include <linux/gracl.h> ++ ++__u32 ++gr_acl_handle_hidden_file(const struct dentry * dentry, ++ const struct vfsmount * mnt) ++{ ++ __u32 mode; ++ ++ if (unlikely(!dentry->d_inode)) ++ return GR_FIND; ++ ++ mode = ++ gr_search_file(dentry, GR_FIND | GR_AUDIT_FIND | GR_SUPPRESS, mnt); ++ ++ if (unlikely(mode & GR_FIND && mode & GR_AUDIT_FIND)) { ++ gr_log_fs_rbac_generic(GR_DO_AUDIT, GR_HIDDEN_ACL_MSG, dentry, mnt); ++ return mode; ++ } else if (unlikely(!(mode & GR_FIND) && !(mode & GR_SUPPRESS))) { ++ gr_log_fs_rbac_generic(GR_DONT_AUDIT, GR_HIDDEN_ACL_MSG, dentry, mnt); ++ return 0; ++ } else if (unlikely(!(mode & GR_FIND))) ++ return 0; ++ ++ return GR_FIND; ++} ++ ++__u32 ++gr_acl_handle_open(const struct dentry * dentry, const struct vfsmount * mnt, ++ const int fmode) ++{ ++ __u32 reqmode = GR_FIND; ++ __u32 mode; ++ ++ if (unlikely(!dentry->d_inode)) ++ return reqmode; ++ ++ if (unlikely(fmode & O_APPEND)) ++ reqmode |= GR_APPEND; ++ else if (unlikely(fmode & FMODE_WRITE)) ++ reqmode |= GR_WRITE; ++ if (likely((fmode & FMODE_READ) && !(fmode & O_DIRECTORY))) ++ reqmode |= GR_READ; ++ ++ mode = ++ gr_search_file(dentry, reqmode | to_gr_audit(reqmode) | GR_SUPPRESS, ++ mnt); ++ ++ if (unlikely(((mode & reqmode) == reqmode) && mode & GR_AUDITS)) { ++ gr_log_fs_rbac_mode2(GR_DO_AUDIT, GR_OPEN_ACL_MSG, dentry, mnt, ++ reqmode & GR_READ ? " reading" : "", ++ reqmode & GR_WRITE ? " writing" : reqmode & ++ GR_APPEND ? " appending" : ""); ++ return reqmode; ++ } else ++ if (unlikely((mode & reqmode) != reqmode && !(mode & GR_SUPPRESS))) ++ { ++ gr_log_fs_rbac_mode2(GR_DONT_AUDIT, GR_OPEN_ACL_MSG, dentry, mnt, ++ reqmode & GR_READ ? " reading" : "", ++ reqmode & GR_WRITE ? " writing" : reqmode & ++ GR_APPEND ? " appending" : ""); ++ return 0; ++ } else if (unlikely((mode & reqmode) != reqmode)) ++ return 0; ++ ++ return reqmode; ++} ++ ++__u32 ++gr_acl_handle_creat(const struct dentry * dentry, ++ const struct dentry * p_dentry, ++ const struct vfsmount * p_mnt, const int fmode, ++ const int imode) ++{ ++ __u32 reqmode = GR_WRITE | GR_CREATE; ++ __u32 mode; ++ ++ if (unlikely(fmode & O_APPEND)) ++ reqmode |= GR_APPEND; ++ if (unlikely((fmode & FMODE_READ) && !(fmode & O_DIRECTORY))) ++ reqmode |= GR_READ; ++ if (unlikely((fmode & O_CREAT) && (imode & (S_ISUID | S_ISGID)))) ++ reqmode |= GR_SETID; ++ ++ mode = ++ gr_check_create(dentry, p_dentry, p_mnt, ++ reqmode | to_gr_audit(reqmode) | GR_SUPPRESS); ++ ++ if (unlikely(((mode & reqmode) == reqmode) && mode & GR_AUDITS)) { ++ gr_log_fs_rbac_mode2(GR_DO_AUDIT, GR_CREATE_ACL_MSG, dentry, p_mnt, ++ reqmode & GR_READ ? " reading" : "", ++ reqmode & GR_WRITE ? " writing" : reqmode & ++ GR_APPEND ? " appending" : ""); ++ return reqmode; ++ } else ++ if (unlikely((mode & reqmode) != reqmode && !(mode & GR_SUPPRESS))) ++ { ++ gr_log_fs_rbac_mode2(GR_DONT_AUDIT, GR_CREATE_ACL_MSG, dentry, p_mnt, ++ reqmode & GR_READ ? " reading" : "", ++ reqmode & GR_WRITE ? " writing" : reqmode & ++ GR_APPEND ? " appending" : ""); ++ return 0; ++ } else if (unlikely((mode & reqmode) != reqmode)) ++ return 0; ++ ++ return reqmode; ++} ++ ++__u32 ++gr_acl_handle_access(const struct dentry * dentry, const struct vfsmount * mnt, ++ const int fmode) ++{ ++ __u32 mode, reqmode = GR_FIND; ++ ++ if ((fmode & S_IXOTH) && !S_ISDIR(dentry->d_inode->i_mode)) ++ reqmode |= GR_EXEC; ++ if (fmode & S_IWOTH) ++ reqmode |= GR_WRITE; ++ if (fmode & S_IROTH) ++ reqmode |= GR_READ; ++ ++ mode = ++ gr_search_file(dentry, reqmode | to_gr_audit(reqmode) | GR_SUPPRESS, ++ mnt); ++ ++ if (unlikely(((mode & reqmode) == reqmode) && mode & GR_AUDITS)) { ++ gr_log_fs_rbac_mode3(GR_DO_AUDIT, GR_ACCESS_ACL_MSG, dentry, mnt, ++ reqmode & GR_READ ? " reading" : "", ++ reqmode & GR_WRITE ? " writing" : "", ++ reqmode & GR_EXEC ? " executing" : ""); ++ return reqmode; ++ } else ++ if (unlikely((mode & reqmode) != reqmode && !(mode & GR_SUPPRESS))) ++ { ++ gr_log_fs_rbac_mode3(GR_DONT_AUDIT, GR_ACCESS_ACL_MSG, dentry, mnt, ++ reqmode & GR_READ ? " reading" : "", ++ reqmode & GR_WRITE ? " writing" : "", ++ reqmode & GR_EXEC ? " executing" : ""); ++ return 0; ++ } else if (unlikely((mode & reqmode) != reqmode)) ++ return 0; ++ ++ return reqmode; ++} ++ ++static __u32 generic_fs_handler(const struct dentry *dentry, const struct vfsmount *mnt, __u32 reqmode, const char *fmt) ++{ ++ __u32 mode; ++ ++ mode = gr_search_file(dentry, reqmode | to_gr_audit(reqmode) | GR_SUPPRESS, mnt); ++ ++ if (unlikely(((mode & (reqmode)) == (reqmode)) && mode & GR_AUDITS)) { ++ gr_log_fs_rbac_generic(GR_DO_AUDIT, fmt, dentry, mnt); ++ return mode; ++ } else if (unlikely((mode & (reqmode)) != (reqmode) && !(mode & GR_SUPPRESS))) { ++ gr_log_fs_rbac_generic(GR_DONT_AUDIT, fmt, dentry, mnt); ++ return 0; ++ } else if (unlikely((mode & (reqmode)) != (reqmode))) ++ return 0; ++ ++ return (reqmode); ++} ++ ++__u32 ++gr_acl_handle_rmdir(const struct dentry * dentry, const struct vfsmount * mnt) ++{ ++ return generic_fs_handler(dentry, mnt, GR_WRITE | GR_DELETE , GR_RMDIR_ACL_MSG); ++} ++ ++__u32 ++gr_acl_handle_unlink(const struct dentry *dentry, const struct vfsmount *mnt) ++{ ++ return generic_fs_handler(dentry, mnt, GR_WRITE | GR_DELETE , GR_UNLINK_ACL_MSG); ++} ++ ++__u32 ++gr_acl_handle_truncate(const struct dentry *dentry, const struct vfsmount *mnt) ++{ ++ return generic_fs_handler(dentry, mnt, GR_WRITE, GR_TRUNCATE_ACL_MSG); ++} ++ ++__u32 ++gr_acl_handle_utime(const struct dentry *dentry, const struct vfsmount *mnt) ++{ ++ return generic_fs_handler(dentry, mnt, GR_WRITE, GR_ATIME_ACL_MSG); ++} ++ ++__u32 ++gr_acl_handle_fchmod(const struct dentry *dentry, const struct vfsmount *mnt, ++ mode_t mode) ++{ ++ if (unlikely(dentry->d_inode && S_ISSOCK(dentry->d_inode->i_mode))) ++ return 1; ++ ++ if (unlikely((mode != (mode_t)-1) && (mode & (S_ISUID | S_ISGID)))) { ++ return generic_fs_handler(dentry, mnt, GR_WRITE | GR_SETID, ++ GR_FCHMOD_ACL_MSG); ++ } else { ++ return generic_fs_handler(dentry, mnt, GR_WRITE, GR_FCHMOD_ACL_MSG); ++ } ++} ++ ++__u32 ++gr_acl_handle_chmod(const struct dentry *dentry, const struct vfsmount *mnt, ++ mode_t mode) ++{ ++ if (unlikely((mode != (mode_t)-1) && (mode & (S_ISUID | S_ISGID)))) { ++ return generic_fs_handler(dentry, mnt, GR_WRITE | GR_SETID, ++ GR_CHMOD_ACL_MSG); ++ } else { ++ return generic_fs_handler(dentry, mnt, GR_WRITE, GR_CHMOD_ACL_MSG); ++ } ++} ++ ++__u32 ++gr_acl_handle_chown(const struct dentry *dentry, const struct vfsmount *mnt) ++{ ++ return generic_fs_handler(dentry, mnt, GR_WRITE, GR_CHOWN_ACL_MSG); ++} ++ ++__u32 ++gr_acl_handle_execve(const struct dentry *dentry, const struct vfsmount *mnt) ++{ ++ return generic_fs_handler(dentry, mnt, GR_EXEC, GR_EXEC_ACL_MSG); ++} ++ ++__u32 ++gr_acl_handle_unix(const struct dentry *dentry, const struct vfsmount *mnt) ++{ ++ return generic_fs_handler(dentry, mnt, GR_READ | GR_WRITE, ++ GR_UNIXCONNECT_ACL_MSG); ++} ++ ++/* hardlinks require at minimum create permission, ++ any additional privilege required is based on the ++ privilege of the file being linked to ++*/ ++__u32 ++gr_acl_handle_link(const struct dentry * new_dentry, ++ const struct dentry * parent_dentry, ++ const struct vfsmount * parent_mnt, ++ const struct dentry * old_dentry, ++ const struct vfsmount * old_mnt, const char *to) ++{ ++ __u32 mode; ++ __u32 needmode = GR_CREATE | GR_LINK; ++ __u32 needaudit = GR_AUDIT_CREATE | GR_AUDIT_LINK; ++ ++ mode = ++ gr_check_link(new_dentry, parent_dentry, parent_mnt, old_dentry, ++ old_mnt); ++ ++ if (unlikely(((mode & needmode) == needmode) && (mode & needaudit))) { ++ gr_log_fs_rbac_str(GR_DO_AUDIT, GR_LINK_ACL_MSG, old_dentry, old_mnt, to); ++ return mode; ++ } else if (unlikely(((mode & needmode) != needmode) && !(mode & GR_SUPPRESS))) { ++ gr_log_fs_rbac_str(GR_DONT_AUDIT, GR_LINK_ACL_MSG, old_dentry, old_mnt, to); ++ return 0; ++ } else if (unlikely((mode & needmode) != needmode)) ++ return 0; ++ ++ return 1; ++} ++ ++__u32 ++gr_acl_handle_symlink(const struct dentry * new_dentry, ++ const struct dentry * parent_dentry, ++ const struct vfsmount * parent_mnt, const char *from) ++{ ++ __u32 needmode = GR_WRITE | GR_CREATE; ++ __u32 mode; ++ ++ mode = ++ gr_check_create(new_dentry, parent_dentry, parent_mnt, ++ GR_CREATE | GR_AUDIT_CREATE | ++ GR_WRITE | GR_AUDIT_WRITE | GR_SUPPRESS); ++ ++ if (unlikely(mode & GR_WRITE && mode & GR_AUDITS)) { ++ gr_log_fs_str_rbac(GR_DO_AUDIT, GR_SYMLINK_ACL_MSG, from, new_dentry, parent_mnt); ++ return mode; ++ } else if (unlikely(((mode & needmode) != needmode) && !(mode & GR_SUPPRESS))) { ++ gr_log_fs_str_rbac(GR_DONT_AUDIT, GR_SYMLINK_ACL_MSG, from, new_dentry, parent_mnt); ++ return 0; ++ } else if (unlikely((mode & needmode) != needmode)) ++ return 0; ++ ++ return (GR_WRITE | GR_CREATE); ++} ++ ++static __u32 generic_fs_create_handler(const struct dentry *new_dentry, const struct dentry *parent_dentry, const struct vfsmount *parent_mnt, __u32 reqmode, const char *fmt) ++{ ++ __u32 mode; ++ ++ mode = gr_check_create(new_dentry, parent_dentry, parent_mnt, reqmode | to_gr_audit(reqmode) | GR_SUPPRESS); ++ ++ if (unlikely(((mode & (reqmode)) == (reqmode)) && mode & GR_AUDITS)) { ++ gr_log_fs_rbac_generic(GR_DO_AUDIT, fmt, new_dentry, parent_mnt); ++ return mode; ++ } else if (unlikely((mode & (reqmode)) != (reqmode) && !(mode & GR_SUPPRESS))) { ++ gr_log_fs_rbac_generic(GR_DONT_AUDIT, fmt, new_dentry, parent_mnt); ++ return 0; ++ } else if (unlikely((mode & (reqmode)) != (reqmode))) ++ return 0; ++ ++ return (reqmode); ++} ++ ++__u32 ++gr_acl_handle_mknod(const struct dentry * new_dentry, ++ const struct dentry * parent_dentry, ++ const struct vfsmount * parent_mnt, ++ const int mode) ++{ ++ __u32 reqmode = GR_WRITE | GR_CREATE; ++ if (unlikely(mode & (S_ISUID | S_ISGID))) ++ reqmode |= GR_SETID; ++ ++ return generic_fs_create_handler(new_dentry, parent_dentry, parent_mnt, ++ reqmode, GR_MKNOD_ACL_MSG); ++} ++ ++__u32 ++gr_acl_handle_mkdir(const struct dentry *new_dentry, ++ const struct dentry *parent_dentry, ++ const struct vfsmount *parent_mnt) ++{ ++ return generic_fs_create_handler(new_dentry, parent_dentry, parent_mnt, ++ GR_WRITE | GR_CREATE, GR_MKDIR_ACL_MSG); ++} ++ ++#define RENAME_CHECK_SUCCESS(old, new) \ ++ (((old & (GR_WRITE | GR_READ)) == (GR_WRITE | GR_READ)) && \ ++ ((new & (GR_WRITE | GR_READ)) == (GR_WRITE | GR_READ))) ++ ++int ++gr_acl_handle_rename(struct dentry *new_dentry, ++ struct dentry *parent_dentry, ++ const struct vfsmount *parent_mnt, ++ struct dentry *old_dentry, ++ struct inode *old_parent_inode, ++ struct vfsmount *old_mnt, const char *newname) ++{ ++ __u32 comp1, comp2; ++ int error = 0; ++ ++ if (unlikely(!gr_acl_is_enabled())) ++ return 0; ++ ++ if (!new_dentry->d_inode) { ++ comp1 = gr_check_create(new_dentry, parent_dentry, parent_mnt, ++ GR_READ | GR_WRITE | GR_CREATE | GR_AUDIT_READ | ++ GR_AUDIT_WRITE | GR_AUDIT_CREATE | GR_SUPPRESS); ++ comp2 = gr_search_file(old_dentry, GR_READ | GR_WRITE | ++ GR_DELETE | GR_AUDIT_DELETE | ++ GR_AUDIT_READ | GR_AUDIT_WRITE | ++ GR_SUPPRESS, old_mnt); ++ } else { ++ comp1 = gr_search_file(new_dentry, GR_READ | GR_WRITE | ++ GR_CREATE | GR_DELETE | ++ GR_AUDIT_CREATE | GR_AUDIT_DELETE | ++ GR_AUDIT_READ | GR_AUDIT_WRITE | ++ GR_SUPPRESS, parent_mnt); ++ comp2 = ++ gr_search_file(old_dentry, ++ GR_READ | GR_WRITE | GR_AUDIT_READ | ++ GR_DELETE | GR_AUDIT_DELETE | ++ GR_AUDIT_WRITE | GR_SUPPRESS, old_mnt); ++ } ++ ++ if (RENAME_CHECK_SUCCESS(comp1, comp2) && ++ ((comp1 & GR_AUDITS) || (comp2 & GR_AUDITS))) ++ gr_log_fs_rbac_str(GR_DO_AUDIT, GR_RENAME_ACL_MSG, old_dentry, old_mnt, newname); ++ else if (!RENAME_CHECK_SUCCESS(comp1, comp2) && !(comp1 & GR_SUPPRESS) ++ && !(comp2 & GR_SUPPRESS)) { ++ gr_log_fs_rbac_str(GR_DONT_AUDIT, GR_RENAME_ACL_MSG, old_dentry, old_mnt, newname); ++ error = -EACCES; ++ } else if (unlikely(!RENAME_CHECK_SUCCESS(comp1, comp2))) ++ error = -EACCES; ++ ++ return error; ++} ++ ++void ++gr_acl_handle_exit(void) ++{ ++ u16 id; ++ char *rolename; ++ struct file *exec_file; ++ ++ if (unlikely(current->acl_sp_role && gr_acl_is_enabled())) { ++ id = current->acl_role_id; ++ rolename = current->role->rolename; ++ gr_set_acls(1); ++ gr_log_str_int(GR_DONT_AUDIT_GOOD, GR_SPROLEL_ACL_MSG, rolename, id); ++ } ++ ++ write_lock(&grsec_exec_file_lock); ++ exec_file = current->exec_file; ++ current->exec_file = NULL; ++ write_unlock(&grsec_exec_file_lock); ++ ++ if (exec_file) ++ fput(exec_file); ++} ++ ++int ++gr_acl_handle_procpidmem(const struct task_struct *task) ++{ ++ if (unlikely(!gr_acl_is_enabled())) ++ return 0; ++ ++ if (task != current && task->acl->mode & GR_PROTPROCFD) ++ return -EACCES; ++ ++ return 0; ++} +diff -urNp a/grsecurity/gracl_ip.c b/grsecurity/gracl_ip.c +--- a/grsecurity/gracl_ip.c 1969-12-31 16:00:00.000000000 -0800 ++++ b/grsecurity/gracl_ip.c 2009-05-24 18:10:25.257967116 -0700 +@@ -0,0 +1,338 @@ ++#include <linux/kernel.h> ++#include <asm/uaccess.h> ++#include <asm/errno.h> ++#include <net/sock.h> ++#include <linux/file.h> ++#include <linux/fs.h> ++#include <linux/net.h> ++#include <linux/in.h> ++#include <linux/skbuff.h> ++#include <linux/ip.h> ++#include <linux/udp.h> ++#include <linux/smp_lock.h> ++#include <linux/types.h> ++#include <linux/sched.h> ++#include <linux/netdevice.h> ++#include <linux/inetdevice.h> ++#include <linux/gracl.h> ++#include <linux/grsecurity.h> ++#include <linux/grinternal.h> ++ ++#define GR_BIND 0x01 ++#define GR_CONNECT 0x02 ++#define GR_INVERT 0x04 ++#define GR_BINDOVERRIDE 0x08 ++#define GR_CONNECTOVERRIDE 0x10 ++ ++static const char * gr_protocols[256] = { ++ "ip", "icmp", "igmp", "ggp", "ipencap", "st", "tcp", "cbt", ++ "egp", "igp", "bbn-rcc", "nvp", "pup", "argus", "emcon", "xnet", ++ "chaos", "udp", "mux", "dcn", "hmp", "prm", "xns-idp", "trunk-1", ++ "trunk-2", "leaf-1", "leaf-2", "rdp", "irtp", "iso-tp4", "netblt", "mfe-nsp", ++ "merit-inp", "sep", "3pc", "idpr", "xtp", "ddp", "idpr-cmtp", "tp++", ++ "il", "ipv6", "sdrp", "ipv6-route", "ipv6-frag", "idrp", "rsvp", "gre", ++ "mhrp", "bna", "ipv6-crypt", "ipv6-auth", "i-nlsp", "swipe", "narp", "mobile", ++ "tlsp", "skip", "ipv6-icmp", "ipv6-nonxt", "ipv6-opts", "unknown:61", "cftp", "unknown:63", ++ "sat-expak", "kryptolan", "rvd", "ippc", "unknown:68", "sat-mon", "visa", "ipcv", ++ "cpnx", "cphb", "wsn", "pvp", "br-sat-mon", "sun-nd", "wb-mon", "wb-expak", ++ "iso-ip", "vmtp", "secure-vmtp", "vines", "ttp", "nfsnet-igp", "dgp", "tcf", ++ "eigrp", "ospf", "sprite-rpc", "larp", "mtp", "ax.25", "ipip", "micp", ++ "scc-sp", "etherip", "encap", "unknown:99", "gmtp", "ifmp", "pnni", "pim", ++ "aris", "scps", "qnx", "a/n", "ipcomp", "snp", "compaq-peer", "ipx-in-ip", ++ "vrrp", "pgm", "unknown:114", "l2tp", "ddx", "iatp", "stp", "srp", ++ "uti", "smp", "sm", "ptp", "isis", "fire", "crtp", "crdup", ++ "sscopmce", "iplt", "sps", "pipe", "sctp", "fc", "unkown:134", "unknown:135", ++ "unknown:136", "unknown:137", "unknown:138", "unknown:139", "unknown:140", "unknown:141", "unknown:142", "unknown:143", ++ "unknown:144", "unknown:145", "unknown:146", "unknown:147", "unknown:148", "unknown:149", "unknown:150", "unknown:151", ++ "unknown:152", "unknown:153", "unknown:154", "unknown:155", "unknown:156", "unknown:157", "unknown:158", "unknown:159", ++ "unknown:160", "unknown:161", "unknown:162", "unknown:163", "unknown:164", "unknown:165", "unknown:166", "unknown:167", ++ "unknown:168", "unknown:169", "unknown:170", "unknown:171", "unknown:172", "unknown:173", "unknown:174", "unknown:175", ++ "unknown:176", "unknown:177", "unknown:178", "unknown:179", "unknown:180", "unknown:181", "unknown:182", "unknown:183", ++ "unknown:184", "unknown:185", "unknown:186", "unknown:187", "unknown:188", "unknown:189", "unknown:190", "unknown:191", ++ "unknown:192", "unknown:193", "unknown:194", "unknown:195", "unknown:196", "unknown:197", "unknown:198", "unknown:199", ++ "unknown:200", "unknown:201", "unknown:202", "unknown:203", "unknown:204", "unknown:205", "unknown:206", "unknown:207", ++ "unknown:208", "unknown:209", "unknown:210", "unknown:211", "unknown:212", "unknown:213", "unknown:214", "unknown:215", ++ "unknown:216", "unknown:217", "unknown:218", "unknown:219", "unknown:220", "unknown:221", "unknown:222", "unknown:223", ++ "unknown:224", "unknown:225", "unknown:226", "unknown:227", "unknown:228", "unknown:229", "unknown:230", "unknown:231", ++ "unknown:232", "unknown:233", "unknown:234", "unknown:235", "unknown:236", "unknown:237", "unknown:238", "unknown:239", ++ "unknown:240", "unknown:241", "unknown:242", "unknown:243", "unknown:244", "unknown:245", "unknown:246", "unknown:247", ++ "unknown:248", "unknown:249", "unknown:250", "unknown:251", "unknown:252", "unknown:253", "unknown:254", "unknown:255", ++ }; ++ ++static const char * gr_socktypes[11] = { ++ "unknown:0", "stream", "dgram", "raw", "rdm", "seqpacket", "unknown:6", ++ "unknown:7", "unknown:8", "unknown:9", "packet" ++ }; ++ ++const char * ++gr_proto_to_name(unsigned char proto) ++{ ++ return gr_protocols[proto]; ++} ++ ++const char * ++gr_socktype_to_name(unsigned char type) ++{ ++ return gr_socktypes[type]; ++} ++ ++int ++gr_search_socket(const int domain, const int type, const int protocol) ++{ ++ struct acl_subject_label *curr; ++ ++ if (unlikely(!gr_acl_is_enabled())) ++ goto exit; ++ ++ if ((domain < 0) || (type < 0) || (protocol < 0) || (domain != PF_INET) ++ || (domain >= NPROTO) || (type >= SOCK_MAX) || (protocol > 255)) ++ goto exit; // let the kernel handle it ++ ++ curr = current->acl; ++ ++ if (!curr->ips) ++ goto exit; ++ ++ if ((curr->ip_type & (1 << type)) && ++ (curr->ip_proto[protocol / 32] & (1 << (protocol % 32)))) ++ goto exit; ++ ++ if (curr->mode & (GR_LEARN | GR_INHERITLEARN)) { ++ /* we don't place acls on raw sockets , and sometimes ++ dgram/ip sockets are opened for ioctl and not ++ bind/connect, so we'll fake a bind learn log */ ++ if (type == SOCK_RAW || type == SOCK_PACKET) { ++ __u32 fakeip = 0; ++ security_learn(GR_IP_LEARN_MSG, current->role->rolename, ++ current->role->roletype, current->uid, ++ current->gid, current->exec_file ? ++ gr_to_filename(current->exec_file->f_path.dentry, ++ current->exec_file->f_path.mnt) : ++ curr->filename, curr->filename, ++ NIPQUAD(fakeip), 0, type, ++ protocol, GR_CONNECT, ++NIPQUAD(current->signal->curr_ip)); ++ } else if ((type == SOCK_DGRAM) && (protocol == IPPROTO_IP)) { ++ __u32 fakeip = 0; ++ security_learn(GR_IP_LEARN_MSG, current->role->rolename, ++ current->role->roletype, current->uid, ++ current->gid, current->exec_file ? ++ gr_to_filename(current->exec_file->f_path.dentry, ++ current->exec_file->f_path.mnt) : ++ curr->filename, curr->filename, ++ NIPQUAD(fakeip), 0, type, ++ protocol, GR_BIND, NIPQUAD(current->signal->curr_ip)); ++ } ++ /* we'll log when they use connect or bind */ ++ goto exit; ++ } ++ ++ gr_log_str3(GR_DONT_AUDIT, GR_SOCK_MSG, "inet", ++ gr_socktype_to_name(type), gr_proto_to_name(protocol)); ++ ++ return 0; ++ exit: ++ return 1; ++} ++ ++int check_ip_policy(struct acl_ip_label *ip, __u32 ip_addr, __u16 ip_port, __u8 protocol, const int mode, const int type, __u32 our_addr, __u32 our_netmask) ++{ ++ if ((ip->mode & mode) && ++ (ip_port >= ip->low) && ++ (ip_port <= ip->high) && ++ ((ntohl(ip_addr) & our_netmask) == ++ (ntohl(our_addr) & our_netmask)) ++ && (ip->proto[protocol / 32] & (1 << (protocol % 32))) ++ && (ip->type & (1 << type))) { ++ if (ip->mode & GR_INVERT) ++ return 2; // specifically denied ++ else ++ return 1; // allowed ++ } ++ ++ return 0; // not specifically allowed, may continue parsing ++} ++ ++static int ++gr_search_connectbind(const int full_mode, struct sock *sk, ++ struct sockaddr_in *addr, const int type) ++{ ++ char iface[IFNAMSIZ] = {0}; ++ struct acl_subject_label *curr; ++ struct acl_ip_label *ip; ++ struct inet_sock *isk; ++ struct net_device *dev; ++ struct in_device *idev; ++ unsigned long i; ++ int ret; ++ int mode = full_mode & (GR_BIND | GR_CONNECT); ++ __u32 ip_addr = 0; ++ __u32 our_addr; ++ __u32 our_netmask; ++ char *p; ++ __u16 ip_port = 0; ++ ++ if (unlikely(!gr_acl_is_enabled() || sk->sk_family != PF_INET)) ++ return 0; ++ ++ curr = current->acl; ++ isk = inet_sk(sk); ++ ++ /* INADDR_ANY overriding for binds, inaddr_any_override is already in network order */ ++ if ((full_mode & GR_BINDOVERRIDE) && addr->sin_addr.s_addr == htonl(INADDR_ANY) && curr->inaddr_any_override != 0) ++ addr->sin_addr.s_addr = curr->inaddr_any_override; ++ if ((full_mode & GR_CONNECT) && isk->saddr == htonl(INADDR_ANY) && curr->inaddr_any_override != 0) { ++ struct sockaddr_in saddr; ++ int err; ++ ++ saddr.sin_family = AF_INET; ++ saddr.sin_addr.s_addr = curr->inaddr_any_override; ++ saddr.sin_port = isk->sport; ++ ++ err = security_socket_bind(sk->sk_socket, (struct sockaddr *)&saddr, sizeof(struct sockaddr_in)); ++ if (err) ++ return err; ++ ++ err = sk->sk_socket->ops->bind(sk->sk_socket, (struct sockaddr *)&saddr, sizeof(struct sockaddr_in)); ++ if (err) ++ return err; ++ } ++ ++ if (!curr->ips) ++ return 0; ++ ++ ip_addr = addr->sin_addr.s_addr; ++ ip_port = ntohs(addr->sin_port); ++ ++ if (curr->mode & (GR_LEARN | GR_INHERITLEARN)) { ++ security_learn(GR_IP_LEARN_MSG, current->role->rolename, ++ current->role->roletype, current->uid, ++ current->gid, current->exec_file ? ++ gr_to_filename(current->exec_file->f_path.dentry, ++ current->exec_file->f_path.mnt) : ++ curr->filename, curr->filename, ++ NIPQUAD(ip_addr), ip_port, type, ++ sk->sk_protocol, mode, NIPQUAD(current->signal->curr_ip)); ++ return 0; ++ } ++ ++ for (i = 0; i < curr->ip_num; i++) { ++ ip = *(curr->ips + i); ++ if (ip->iface != NULL) { ++ strncpy(iface, ip->iface, IFNAMSIZ - 1); ++ p = strchr(iface, ':'); ++ if (p != NULL) ++ *p = '\0'; ++ dev = dev_get_by_name(sock_net(sk), iface); ++ if (dev == NULL) ++ continue; ++ idev = in_dev_get(dev); ++ if (idev == NULL) { ++ dev_put(dev); ++ continue; ++ } ++ rcu_read_lock(); ++ for_ifa(idev) { ++ if (!strcmp(ip->iface, ifa->ifa_label)) { ++ our_addr = ifa->ifa_address; ++ our_netmask = 0xffffffff; ++ ret = check_ip_policy(ip, ip_addr, ip_port, sk->sk_protocol, mode, type, our_addr, our_netmask); ++ if (ret == 1) { ++ rcu_read_unlock(); ++ in_dev_put(idev); ++ dev_put(dev); ++ return 0; ++ } else if (ret == 2) { ++ rcu_read_unlock(); ++ in_dev_put(idev); ++ dev_put(dev); ++ goto denied; ++ } ++ } ++ } endfor_ifa(idev); ++ rcu_read_unlock(); ++ in_dev_put(idev); ++ dev_put(dev); ++ } else { ++ our_addr = ip->addr; ++ our_netmask = ip->netmask; ++ ret = check_ip_policy(ip, ip_addr, ip_port, sk->sk_protocol, mode, type, our_addr, our_netmask); ++ if (ret == 1) ++ return 0; ++ else if (ret == 2) ++ goto denied; ++ } ++ } ++ ++denied: ++ if (mode == GR_BIND) ++ gr_log_int5_str2(GR_DONT_AUDIT, GR_BIND_ACL_MSG, NIPQUAD(ip_addr), ip_port, gr_socktype_to_name(type), gr_proto_to_name(sk->sk_protocol)); ++ else if (mode == GR_CONNECT) ++ gr_log_int5_str2(GR_DONT_AUDIT, GR_CONNECT_ACL_MSG, NIPQUAD(ip_addr), ip_port, gr_socktype_to_name(type), gr_proto_to_name(sk->sk_protocol)); ++ ++ return -EACCES; ++} ++ ++int ++gr_search_connect(struct socket *sock, struct sockaddr_in *addr) ++{ ++ return gr_search_connectbind(GR_CONNECT | GR_CONNECTOVERRIDE, sock->sk, addr, sock->type); ++} ++ ++int ++gr_search_bind(struct socket *sock, struct sockaddr_in *addr) ++{ ++ return gr_search_connectbind(GR_BIND | GR_BINDOVERRIDE, sock->sk, addr, sock->type); ++} ++ ++int gr_search_listen(struct socket *sock) ++{ ++ struct sock *sk = sock->sk; ++ struct sockaddr_in addr; ++ ++ addr.sin_addr.s_addr = inet_sk(sk)->saddr; ++ addr.sin_port = inet_sk(sk)->sport; ++ ++ return gr_search_connectbind(GR_BIND | GR_CONNECTOVERRIDE, sock->sk, &addr, sock->type); ++} ++ ++int gr_search_accept(struct socket *sock) ++{ ++ struct sock *sk = sock->sk; ++ struct sockaddr_in addr; ++ ++ addr.sin_addr.s_addr = inet_sk(sk)->saddr; ++ addr.sin_port = inet_sk(sk)->sport; ++ ++ return gr_search_connectbind(GR_BIND | GR_CONNECTOVERRIDE, sock->sk, &addr, sock->type); ++} ++ ++int ++gr_search_udp_sendmsg(struct sock *sk, struct sockaddr_in *addr) ++{ ++ if (addr) ++ return gr_search_connectbind(GR_CONNECT, sk, addr, SOCK_DGRAM); ++ else { ++ struct sockaddr_in sin; ++ const struct inet_sock *inet = inet_sk(sk); ++ ++ sin.sin_addr.s_addr = inet->daddr; ++ sin.sin_port = inet->dport; ++ ++ return gr_search_connectbind(GR_CONNECT | GR_CONNECTOVERRIDE, sk, &sin, SOCK_DGRAM); ++ } ++} ++ ++int ++gr_search_udp_recvmsg(struct sock *sk, const struct sk_buff *skb) ++{ ++ struct sockaddr_in sin; ++ ++ if (unlikely(skb->len < sizeof (struct udphdr))) ++ return 0; // skip this packet ++ ++ sin.sin_addr.s_addr = ip_hdr(skb)->saddr; ++ sin.sin_port = udp_hdr(skb)->source; ++ ++ return gr_search_connectbind(GR_CONNECT | GR_CONNECTOVERRIDE, sk, &sin, SOCK_DGRAM); ++} +diff -urNp a/grsecurity/gracl_learn.c b/grsecurity/gracl_learn.c +--- a/grsecurity/gracl_learn.c 1969-12-31 16:00:00.000000000 -0800 ++++ b/grsecurity/gracl_learn.c 2009-05-24 18:10:25.257967116 -0700 +@@ -0,0 +1,211 @@ ++#include <linux/kernel.h> ++#include <linux/mm.h> ++#include <linux/sched.h> ++#include <linux/poll.h> ++#include <linux/smp_lock.h> ++#include <linux/string.h> ++#include <linux/file.h> ++#include <linux/types.h> ++#include <linux/vmalloc.h> ++#include <linux/grinternal.h> ++ ++extern ssize_t write_grsec_handler(struct file * file, const char __user * buf, ++ size_t count, loff_t *ppos); ++extern int gr_acl_is_enabled(void); ++ ++static DECLARE_WAIT_QUEUE_HEAD(learn_wait); ++static int gr_learn_attached; ++ ++/* use a 512k buffer */ ++#define LEARN_BUFFER_SIZE (512 * 1024) ++ ++static DEFINE_SPINLOCK(gr_learn_lock); ++static DECLARE_MUTEX(gr_learn_user_sem); ++ ++/* we need to maintain two buffers, so that the kernel context of grlearn ++ uses a semaphore around the userspace copying, and the other kernel contexts ++ use a spinlock when copying into the buffer, since they cannot sleep ++*/ ++static char *learn_buffer; ++static char *learn_buffer_user; ++static int learn_buffer_len; ++static int learn_buffer_user_len; ++ ++static ssize_t ++read_learn(struct file *file, char __user * buf, size_t count, loff_t * ppos) ++{ ++ DECLARE_WAITQUEUE(wait, current); ++ ssize_t retval = 0; ++ ++ add_wait_queue(&learn_wait, &wait); ++ set_current_state(TASK_INTERRUPTIBLE); ++ do { ++ down(&gr_learn_user_sem); ++ spin_lock(&gr_learn_lock); ++ if (learn_buffer_len) ++ break; ++ spin_unlock(&gr_learn_lock); ++ up(&gr_learn_user_sem); ++ if (file->f_flags & O_NONBLOCK) { ++ retval = -EAGAIN; ++ goto out; ++ } ++ if (signal_pending(current)) { ++ retval = -ERESTARTSYS; ++ goto out; ++ } ++ ++ schedule(); ++ } while (1); ++ ++ memcpy(learn_buffer_user, learn_buffer, learn_buffer_len); ++ learn_buffer_user_len = learn_buffer_len; ++ retval = learn_buffer_len; ++ learn_buffer_len = 0; ++ ++ spin_unlock(&gr_learn_lock); ++ ++ if (copy_to_user(buf, learn_buffer_user, learn_buffer_user_len)) ++ retval = -EFAULT; ++ ++ up(&gr_learn_user_sem); ++out: ++ set_current_state(TASK_RUNNING); ++ remove_wait_queue(&learn_wait, &wait); ++ return retval; ++} ++ ++static unsigned int ++poll_learn(struct file * file, poll_table * wait) ++{ ++ poll_wait(file, &learn_wait, wait); ++ ++ if (learn_buffer_len) ++ return (POLLIN | POLLRDNORM); ++ ++ return 0; ++} ++ ++void ++gr_clear_learn_entries(void) ++{ ++ char *tmp; ++ ++ down(&gr_learn_user_sem); ++ if (learn_buffer != NULL) { ++ spin_lock(&gr_learn_lock); ++ tmp = learn_buffer; ++ learn_buffer = NULL; ++ spin_unlock(&gr_learn_lock); ++ vfree(learn_buffer); ++ } ++ if (learn_buffer_user != NULL) { ++ vfree(learn_buffer_user); ++ learn_buffer_user = NULL; ++ } ++ learn_buffer_len = 0; ++ up(&gr_learn_user_sem); ++ ++ return; ++} ++ ++void ++gr_add_learn_entry(const char *fmt, ...) ++{ ++ va_list args; ++ unsigned int len; ++ ++ if (!gr_learn_attached) ++ return; ++ ++ spin_lock(&gr_learn_lock); ++ ++ /* leave a gap at the end so we know when it's "full" but don't have to ++ compute the exact length of the string we're trying to append ++ */ ++ if (learn_buffer_len > LEARN_BUFFER_SIZE - 16384) { ++ spin_unlock(&gr_learn_lock); ++ wake_up_interruptible(&learn_wait); ++ return; ++ } ++ if (learn_buffer == NULL) { ++ spin_unlock(&gr_learn_lock); ++ return; ++ } ++ ++ va_start(args, fmt); ++ len = vsnprintf(learn_buffer + learn_buffer_len, LEARN_BUFFER_SIZE - learn_buffer_len, fmt, args); ++ va_end(args); ++ ++ learn_buffer_len += len + 1; ++ ++ spin_unlock(&gr_learn_lock); ++ wake_up_interruptible(&learn_wait); ++ ++ return; ++} ++ ++static int ++open_learn(struct inode *inode, struct file *file) ++{ ++ if (file->f_mode & FMODE_READ && gr_learn_attached) ++ return -EBUSY; ++ if (file->f_mode & FMODE_READ) { ++ int retval = 0; ++ down(&gr_learn_user_sem); ++ if (learn_buffer == NULL) ++ learn_buffer = vmalloc(LEARN_BUFFER_SIZE); ++ if (learn_buffer_user == NULL) ++ learn_buffer_user = vmalloc(LEARN_BUFFER_SIZE); ++ if (learn_buffer == NULL) { ++ retval = -ENOMEM; ++ goto out_error; ++ } ++ if (learn_buffer_user == NULL) { ++ retval = -ENOMEM; ++ goto out_error; ++ } ++ learn_buffer_len = 0; ++ learn_buffer_user_len = 0; ++ gr_learn_attached = 1; ++out_error: ++ up(&gr_learn_user_sem); ++ return retval; ++ } ++ return 0; ++} ++ ++static int ++close_learn(struct inode *inode, struct file *file) ++{ ++ char *tmp; ++ ++ if (file->f_mode & FMODE_READ) { ++ down(&gr_learn_user_sem); ++ if (learn_buffer != NULL) { ++ spin_lock(&gr_learn_lock); ++ tmp = learn_buffer; ++ learn_buffer = NULL; ++ spin_unlock(&gr_learn_lock); ++ vfree(tmp); ++ } ++ if (learn_buffer_user != NULL) { ++ vfree(learn_buffer_user); ++ learn_buffer_user = NULL; ++ } ++ learn_buffer_len = 0; ++ learn_buffer_user_len = 0; ++ gr_learn_attached = 0; ++ up(&gr_learn_user_sem); ++ } ++ ++ return 0; ++} ++ ++struct file_operations grsec_fops = { ++ .read = read_learn, ++ .write = write_grsec_handler, ++ .open = open_learn, ++ .release = close_learn, ++ .poll = poll_learn, ++}; +diff -urNp a/grsecurity/gracl_res.c b/grsecurity/gracl_res.c +--- a/grsecurity/gracl_res.c 1969-12-31 16:00:00.000000000 -0800 ++++ b/grsecurity/gracl_res.c 2009-05-24 18:10:25.259085826 -0700 +@@ -0,0 +1,45 @@ ++#include <linux/kernel.h> ++#include <linux/sched.h> ++#include <linux/gracl.h> ++#include <linux/grinternal.h> ++ ++static const char *restab_log[] = { ++ [RLIMIT_CPU] = "RLIMIT_CPU", ++ [RLIMIT_FSIZE] = "RLIMIT_FSIZE", ++ [RLIMIT_DATA] = "RLIMIT_DATA", ++ [RLIMIT_STACK] = "RLIMIT_STACK", ++ [RLIMIT_CORE] = "RLIMIT_CORE", ++ [RLIMIT_RSS] = "RLIMIT_RSS", ++ [RLIMIT_NPROC] = "RLIMIT_NPROC", ++ [RLIMIT_NOFILE] = "RLIMIT_NOFILE", ++ [RLIMIT_MEMLOCK] = "RLIMIT_MEMLOCK", ++ [RLIMIT_AS] = "RLIMIT_AS", ++ [RLIMIT_LOCKS] = "RLIMIT_LOCKS", ++ [RLIMIT_LOCKS + 1] = "RLIMIT_CRASH" ++}; ++ ++void ++gr_log_resource(const struct task_struct *task, ++ const int res, const unsigned long wanted, const int gt) ++{ ++ if (res == RLIMIT_NPROC && ++ (cap_raised(task->cap_effective, CAP_SYS_ADMIN) || ++ cap_raised(task->cap_effective, CAP_SYS_RESOURCE))) ++ return; ++ else if (res == RLIMIT_MEMLOCK && ++ cap_raised(task->cap_effective, CAP_IPC_LOCK)) ++ return; ++ ++ if (!gr_acl_is_enabled() && !grsec_resource_logging) ++ return; ++ ++ preempt_disable(); ++ ++ if (unlikely(((gt && wanted > task->signal->rlim[res].rlim_cur) || ++ (!gt && wanted >= task->signal->rlim[res].rlim_cur)) && ++ task->signal->rlim[res].rlim_cur != RLIM_INFINITY)) ++ gr_log_res_ulong2_str(GR_DONT_AUDIT, GR_RESOURCE_MSG, task, wanted, restab_log[res], task->signal->rlim[res].rlim_cur); ++ preempt_enable_no_resched(); ++ ++ return; ++} +diff -urNp a/grsecurity/gracl_segv.c b/grsecurity/gracl_segv.c +--- a/grsecurity/gracl_segv.c 1969-12-31 16:00:00.000000000 -0800 ++++ b/grsecurity/gracl_segv.c 2009-05-24 18:10:25.259085826 -0700 +@@ -0,0 +1,304 @@ ++#include <linux/kernel.h> ++#include <linux/mm.h> ++#include <asm/uaccess.h> ++#include <asm/errno.h> ++#include <asm/mman.h> ++#include <net/sock.h> ++#include <linux/file.h> ++#include <linux/fs.h> ++#include <linux/net.h> ++#include <linux/in.h> ++#include <linux/smp_lock.h> ++#include <linux/slab.h> ++#include <linux/types.h> ++#include <linux/sched.h> ++#include <linux/timer.h> ++#include <linux/gracl.h> ++#include <linux/grsecurity.h> ++#include <linux/grinternal.h> ++ ++static struct crash_uid *uid_set; ++static unsigned short uid_used; ++static DEFINE_SPINLOCK(gr_uid_lock); ++extern rwlock_t gr_inode_lock; ++extern struct acl_subject_label * ++ lookup_acl_subj_label(const ino_t inode, const dev_t dev, ++ struct acl_role_label *role); ++extern int specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t); ++ ++int ++gr_init_uidset(void) ++{ ++ uid_set = ++ kmalloc(GR_UIDTABLE_MAX * sizeof (struct crash_uid), GFP_KERNEL); ++ uid_used = 0; ++ ++ return uid_set ? 1 : 0; ++} ++ ++void ++gr_free_uidset(void) ++{ ++ if (uid_set) ++ kfree(uid_set); ++ ++ return; ++} ++ ++int ++gr_find_uid(const uid_t uid) ++{ ++ struct crash_uid *tmp = uid_set; ++ uid_t buid; ++ int low = 0, high = uid_used - 1, mid; ++ ++ while (high >= low) { ++ mid = (low + high) >> 1; ++ buid = tmp[mid].uid; ++ if (buid == uid) ++ return mid; ++ if (buid > uid) ++ high = mid - 1; ++ if (buid < uid) ++ low = mid + 1; ++ } ++ ++ return -1; ++} ++ ++static __inline__ void ++gr_insertsort(void) ++{ ++ unsigned short i, j; ++ struct crash_uid index; ++ ++ for (i = 1; i < uid_used; i++) { ++ index = uid_set[i]; ++ j = i; ++ while ((j > 0) && uid_set[j - 1].uid > index.uid) { ++ uid_set[j] = uid_set[j - 1]; ++ j--; ++ } ++ uid_set[j] = index; ++ } ++ ++ return; ++} ++ ++static __inline__ void ++gr_insert_uid(const uid_t uid, const unsigned long expires) ++{ ++ int loc; ++ ++ if (uid_used == GR_UIDTABLE_MAX) ++ return; ++ ++ loc = gr_find_uid(uid); ++ ++ if (loc >= 0) { ++ uid_set[loc].expires = expires; ++ return; ++ } ++ ++ uid_set[uid_used].uid = uid; ++ uid_set[uid_used].expires = expires; ++ uid_used++; ++ ++ gr_insertsort(); ++ ++ return; ++} ++ ++void ++gr_remove_uid(const unsigned short loc) ++{ ++ unsigned short i; ++ ++ for (i = loc + 1; i < uid_used; i++) ++ uid_set[i - 1] = uid_set[i]; ++ ++ uid_used--; ++ ++ return; ++} ++ ++int ++gr_check_crash_uid(const uid_t uid) ++{ ++ int loc; ++ int ret = 0; ++ ++ if (unlikely(!gr_acl_is_enabled())) ++ return 0; ++ ++ spin_lock(&gr_uid_lock); ++ loc = gr_find_uid(uid); ++ ++ if (loc < 0) ++ goto out_unlock; ++ ++ if (time_before_eq(uid_set[loc].expires, get_seconds())) ++ gr_remove_uid(loc); ++ else ++ ret = 1; ++ ++out_unlock: ++ spin_unlock(&gr_uid_lock); ++ return ret; ++} ++ ++static __inline__ int ++proc_is_setxid(const struct task_struct *task) ++{ ++ if (task->uid != task->euid || task->uid != task->suid || ++ task->uid != task->fsuid) ++ return 1; ++ if (task->gid != task->egid || task->gid != task->sgid || ++ task->gid != task->fsgid) ++ return 1; ++ ++ return 0; ++} ++static __inline__ int ++gr_fake_force_sig(int sig, struct task_struct *t) ++{ ++ unsigned long int flags; ++ int ret, blocked, ignored; ++ struct k_sigaction *action; ++ ++ spin_lock_irqsave(&t->sighand->siglock, flags); ++ action = &t->sighand->action[sig-1]; ++ ignored = action->sa.sa_handler == SIG_IGN; ++ blocked = sigismember(&t->blocked, sig); ++ if (blocked || ignored) { ++ action->sa.sa_handler = SIG_DFL; ++ if (blocked) { ++ sigdelset(&t->blocked, sig); ++ recalc_sigpending_and_wake(t); ++ } ++ } ++ if (action->sa.sa_handler == SIG_DFL) ++ t->signal->flags &= ~SIGNAL_UNKILLABLE; ++ ret = specific_send_sig_info(sig, SEND_SIG_PRIV, t); ++ ++ spin_unlock_irqrestore(&t->sighand->siglock, flags); ++ ++ return ret; ++} ++ ++void ++gr_handle_crash(struct task_struct *task, const int sig) ++{ ++ struct acl_subject_label *curr; ++ struct acl_subject_label *curr2; ++ struct task_struct *tsk, *tsk2; ++ ++ if (sig != SIGSEGV && sig != SIGKILL && sig != SIGBUS && sig != SIGILL) ++ return; ++ ++ if (unlikely(!gr_acl_is_enabled())) ++ return; ++ ++ curr = task->acl; ++ ++ if (!(curr->resmask & (1 << GR_CRASH_RES))) ++ return; ++ ++ if (time_before_eq(curr->expires, get_seconds())) { ++ curr->expires = 0; ++ curr->crashes = 0; ++ } ++ ++ curr->crashes++; ++ ++ if (!curr->expires) ++ curr->expires = get_seconds() + curr->res[GR_CRASH_RES].rlim_max; ++ ++ if ((curr->crashes >= curr->res[GR_CRASH_RES].rlim_cur) && ++ time_after(curr->expires, get_seconds())) { ++ if (task->uid && proc_is_setxid(task)) { ++ gr_log_crash1(GR_DONT_AUDIT, GR_SEGVSTART_ACL_MSG, task, curr->res[GR_CRASH_RES].rlim_max); ++ spin_lock(&gr_uid_lock); ++ gr_insert_uid(task->uid, curr->expires); ++ spin_unlock(&gr_uid_lock); ++ curr->expires = 0; ++ curr->crashes = 0; ++ read_lock(&tasklist_lock); ++ do_each_thread(tsk2, tsk) { ++ if (tsk != task && tsk->uid == task->uid) ++ gr_fake_force_sig(SIGKILL, tsk); ++ } while_each_thread(tsk2, tsk); ++ read_unlock(&tasklist_lock); ++ } else { ++ gr_log_crash2(GR_DONT_AUDIT, GR_SEGVNOSUID_ACL_MSG, task, curr->res[GR_CRASH_RES].rlim_max); ++ read_lock(&tasklist_lock); ++ do_each_thread(tsk2, tsk) { ++ if (likely(tsk != task)) { ++ curr2 = tsk->acl; ++ ++ if (curr2->device == curr->device && ++ curr2->inode == curr->inode) ++ gr_fake_force_sig(SIGKILL, tsk); ++ } ++ } while_each_thread(tsk2, tsk); ++ read_unlock(&tasklist_lock); ++ } ++ } ++ ++ return; ++} ++ ++int ++gr_check_crash_exec(const struct file *filp) ++{ ++ struct acl_subject_label *curr; ++ ++ if (unlikely(!gr_acl_is_enabled())) ++ return 0; ++ ++ read_lock(&gr_inode_lock); ++ curr = lookup_acl_subj_label(filp->f_path.dentry->d_inode->i_ino, ++ filp->f_path.dentry->d_inode->i_sb->s_dev, ++ current->role); ++ read_unlock(&gr_inode_lock); ++ ++ if (!curr || !(curr->resmask & (1 << GR_CRASH_RES)) || ++ (!curr->crashes && !curr->expires)) ++ return 0; ++ ++ if ((curr->crashes >= curr->res[GR_CRASH_RES].rlim_cur) && ++ time_after(curr->expires, get_seconds())) ++ return 1; ++ else if (time_before_eq(curr->expires, get_seconds())) { ++ curr->crashes = 0; ++ curr->expires = 0; ++ } ++ ++ return 0; ++} ++ ++void ++gr_handle_alertkill(struct task_struct *task) ++{ ++ struct acl_subject_label *curracl; ++ __u32 curr_ip; ++ struct task_struct *p, *p2; ++ ++ if (unlikely(!gr_acl_is_enabled())) ++ return; ++ ++ curracl = task->acl; ++ curr_ip = task->signal->curr_ip; ++ ++ if ((curracl->mode & GR_KILLIPPROC) && curr_ip) { ++ read_lock(&tasklist_lock); ++ do_each_thread(p2, p) { ++ if (p->signal->curr_ip == curr_ip) ++ gr_fake_force_sig(SIGKILL, p); ++ } while_each_thread(p2, p); ++ read_unlock(&tasklist_lock); ++ } else if (curracl->mode & GR_KILLPROC) ++ gr_fake_force_sig(SIGKILL, task); ++ ++ return; ++} +diff -urNp a/grsecurity/gracl_shm.c b/grsecurity/gracl_shm.c +--- a/grsecurity/gracl_shm.c 1969-12-31 16:00:00.000000000 -0800 ++++ b/grsecurity/gracl_shm.c 2009-05-24 18:10:25.260087141 -0700 +@@ -0,0 +1,33 @@ ++#include <linux/kernel.h> ++#include <linux/mm.h> ++#include <linux/sched.h> ++#include <linux/file.h> ++#include <linux/ipc.h> ++#include <linux/gracl.h> ++#include <linux/grsecurity.h> ++#include <linux/grinternal.h> ++ ++int ++gr_handle_shmat(const pid_t shm_cprid, const pid_t shm_lapid, ++ const time_t shm_createtime, const uid_t cuid, const int shmid) ++{ ++ struct task_struct *task; ++ ++ if (!gr_acl_is_enabled()) ++ return 1; ++ ++ task = find_task_by_vpid(shm_cprid); ++ ++ if (unlikely(!task)) ++ task = find_task_by_vpid(shm_lapid); ++ ++ if (unlikely(task && (time_before_eq((unsigned long)task->start_time.tv_sec, (unsigned long)shm_createtime) || ++ (task->pid == shm_lapid)) && ++ (task->acl->mode & GR_PROTSHM) && ++ (task->acl != current->acl))) { ++ gr_log_int3(GR_DONT_AUDIT, GR_SHMAT_ACL_MSG, cuid, shm_cprid, shmid); ++ return 0; ++ } ++ ++ return 1; ++} +diff -urNp a/grsecurity/grsec_chdir.c b/grsecurity/grsec_chdir.c +--- a/grsecurity/grsec_chdir.c 1969-12-31 16:00:00.000000000 -0800 ++++ b/grsecurity/grsec_chdir.c 2009-05-24 18:10:25.260087141 -0700 +@@ -0,0 +1,19 @@ ++#include <linux/kernel.h> ++#include <linux/sched.h> ++#include <linux/fs.h> ++#include <linux/file.h> ++#include <linux/grsecurity.h> ++#include <linux/grinternal.h> ++ ++void ++gr_log_chdir(const struct dentry *dentry, const struct vfsmount *mnt) ++{ ++#ifdef CONFIG_GRKERNSEC_AUDIT_CHDIR ++ if ((grsec_enable_chdir && grsec_enable_group && ++ in_group_p(grsec_audit_gid)) || (grsec_enable_chdir && ++ !grsec_enable_group)) { ++ gr_log_fs_generic(GR_DO_AUDIT, GR_CHDIR_AUDIT_MSG, dentry, mnt); ++ } ++#endif ++ return; ++} +diff -urNp a/grsecurity/grsec_chroot.c b/grsecurity/grsec_chroot.c +--- a/grsecurity/grsec_chroot.c 1969-12-31 16:00:00.000000000 -0800 ++++ b/grsecurity/grsec_chroot.c 2009-05-24 18:10:25.261084895 -0700 +@@ -0,0 +1,336 @@ ++#include <linux/kernel.h> ++#include <linux/module.h> ++#include <linux/sched.h> ++#include <linux/file.h> ++#include <linux/fs.h> ++#include <linux/mount.h> ++#include <linux/types.h> ++#include <linux/pid_namespace.h> ++#include <linux/grsecurity.h> ++#include <linux/grinternal.h> ++ ++int ++gr_handle_chroot_unix(const pid_t pid) ++{ ++#ifdef CONFIG_GRKERNSEC_CHROOT_UNIX ++ struct pid *spid = NULL; ++ ++ if (unlikely(!grsec_enable_chroot_unix)) ++ return 1; ++ ++ if (likely(!proc_is_chrooted(current))) ++ return 1; ++ ++ read_lock(&tasklist_lock); ++ ++ spid = find_vpid(pid); ++ if (spid) { ++ struct task_struct *p; ++ p = pid_task(spid, PIDTYPE_PID); ++ task_lock(p); ++ if (unlikely(!have_same_root(current, p))) { ++ task_unlock(p); ++ read_unlock(&tasklist_lock); ++ gr_log_noargs(GR_DONT_AUDIT, GR_UNIX_CHROOT_MSG); ++ return 0; ++ } ++ task_unlock(p); ++ } ++ read_unlock(&tasklist_lock); ++#endif ++ return 1; ++} ++ ++int ++gr_handle_chroot_nice(void) ++{ ++#ifdef CONFIG_GRKERNSEC_CHROOT_NICE ++ if (grsec_enable_chroot_nice && proc_is_chrooted(current)) { ++ gr_log_noargs(GR_DONT_AUDIT, GR_NICE_CHROOT_MSG); ++ return -EPERM; ++ } ++#endif ++ return 0; ++} ++ ++int ++gr_handle_chroot_setpriority(struct task_struct *p, const int niceval) ++{ ++#ifdef CONFIG_GRKERNSEC_CHROOT_NICE ++ if (grsec_enable_chroot_nice && (niceval < task_nice(p)) ++ && proc_is_chrooted(current)) { ++ gr_log_str_int(GR_DONT_AUDIT, GR_PRIORITY_CHROOT_MSG, p->comm, p->pid); ++ return -EACCES; ++ } ++#endif ++ return 0; ++} ++ ++int ++gr_handle_chroot_rawio(const struct inode *inode) ++{ ++#ifdef CONFIG_GRKERNSEC_CHROOT_CAPS ++ if (grsec_enable_chroot_caps && proc_is_chrooted(current) && ++ inode && S_ISBLK(inode->i_mode) && !capable(CAP_SYS_RAWIO)) ++ return 1; ++#endif ++ return 0; ++} ++ ++int ++gr_pid_is_chrooted(struct task_struct *p) ++{ ++#ifdef CONFIG_GRKERNSEC_CHROOT_FINDTASK ++ if (!grsec_enable_chroot_findtask || !proc_is_chrooted(current) || p == NULL) ++ return 0; ++ ++ task_lock(p); ++ if ((p->exit_state & (EXIT_ZOMBIE | EXIT_DEAD)) || ++ !have_same_root(current, p)) { ++ task_unlock(p); ++ return 1; ++ } ++ task_unlock(p); ++#endif ++ return 0; ++} ++ ++EXPORT_SYMBOL(gr_pid_is_chrooted); ++ ++#if defined(CONFIG_GRKERNSEC_CHROOT_DOUBLE) || defined(CONFIG_GRKERNSEC_CHROOT_FCHDIR) ++int gr_is_outside_chroot(const struct dentry *u_dentry, const struct vfsmount *u_mnt) ++{ ++ struct dentry *dentry = (struct dentry *)u_dentry; ++ struct vfsmount *mnt = (struct vfsmount *)u_mnt; ++ struct dentry *realroot; ++ struct vfsmount *realrootmnt; ++ struct dentry *currentroot; ++ struct vfsmount *currentmnt; ++ struct task_struct *reaper = current->nsproxy->pid_ns->child_reaper; ++ int ret = 1; ++ ++ read_lock(&reaper->fs->lock); ++ realrootmnt = mntget(reaper->fs->root.mnt); ++ realroot = dget(reaper->fs->root.dentry); ++ read_unlock(&reaper->fs->lock); ++ ++ read_lock(¤t->fs->lock); ++ currentmnt = mntget(current->fs->root.mnt); ++ currentroot = dget(current->fs->root.dentry); ++ read_unlock(¤t->fs->lock); ++ ++ spin_lock(&dcache_lock); ++ for (;;) { ++ if (unlikely((dentry == realroot && mnt == realrootmnt) ++ || (dentry == currentroot && mnt == currentmnt))) ++ break; ++ if (unlikely(dentry == mnt->mnt_root || IS_ROOT(dentry))) { ++ if (mnt->mnt_parent == mnt) ++ break; ++ dentry = mnt->mnt_mountpoint; ++ mnt = mnt->mnt_parent; ++ continue; ++ } ++ dentry = dentry->d_parent; ++ } ++ spin_unlock(&dcache_lock); ++ ++ dput(currentroot); ++ mntput(currentmnt); ++ ++ /* access is outside of chroot */ ++ if (dentry == realroot && mnt == realrootmnt) ++ ret = 0; ++ ++ dput(realroot); ++ mntput(realrootmnt); ++ return ret; ++} ++#endif ++ ++int ++gr_chroot_fchdir(struct dentry *u_dentry, struct vfsmount *u_mnt) ++{ ++#ifdef CONFIG_GRKERNSEC_CHROOT_FCHDIR ++ if (!grsec_enable_chroot_fchdir) ++ return 1; ++ ++ if (!proc_is_chrooted(current)) ++ return 1; ++ else if (!gr_is_outside_chroot(u_dentry, u_mnt)) { ++ gr_log_fs_generic(GR_DONT_AUDIT, GR_CHROOT_FCHDIR_MSG, u_dentry, u_mnt); ++ return 0; ++ } ++#endif ++ return 1; ++} ++ ++int ++gr_chroot_shmat(const pid_t shm_cprid, const pid_t shm_lapid, ++ const time_t shm_createtime) ++{ ++#ifdef CONFIG_GRKERNSEC_CHROOT_SHMAT ++ struct pid *pid = NULL; ++ time_t starttime; ++ ++ if (unlikely(!grsec_enable_chroot_shmat)) ++ return 1; ++ ++ if (likely(!proc_is_chrooted(current))) ++ return 1; ++ ++ read_lock(&tasklist_lock); ++ ++ pid = find_vpid(shm_cprid); ++ if (pid) { ++ struct task_struct *p; ++ p = pid_task(pid, PIDTYPE_PID); ++ task_lock(p); ++ starttime = p->start_time.tv_sec; ++ if (unlikely(!have_same_root(current, p) && ++ time_before_eq((unsigned long)starttime, (unsigned long)shm_createtime))) { ++ task_unlock(p); ++ read_unlock(&tasklist_lock); ++ gr_log_noargs(GR_DONT_AUDIT, GR_SHMAT_CHROOT_MSG); ++ return 0; ++ } ++ task_unlock(p); ++ } else { ++ pid = find_vpid(shm_lapid); ++ if (pid) { ++ struct task_struct *p; ++ p = pid_task(pid, PIDTYPE_PID); ++ task_lock(p); ++ if (unlikely(!have_same_root(current, p))) { ++ task_unlock(p); ++ read_unlock(&tasklist_lock); ++ gr_log_noargs(GR_DONT_AUDIT, GR_SHMAT_CHROOT_MSG); ++ return 0; ++ } ++ task_unlock(p); ++ } ++ } ++ ++ read_unlock(&tasklist_lock); ++#endif ++ return 1; ++} ++ ++void ++gr_log_chroot_exec(const struct dentry *dentry, const struct vfsmount *mnt) ++{ ++#ifdef CONFIG_GRKERNSEC_CHROOT_EXECLOG ++ if (grsec_enable_chroot_execlog && proc_is_chrooted(current)) ++ gr_log_fs_generic(GR_DO_AUDIT, GR_EXEC_CHROOT_MSG, dentry, mnt); ++#endif ++ return; ++} ++ ++int ++gr_handle_chroot_mknod(const struct dentry *dentry, ++ const struct vfsmount *mnt, const int mode) ++{ ++#ifdef CONFIG_GRKERNSEC_CHROOT_MKNOD ++ if (grsec_enable_chroot_mknod && !S_ISFIFO(mode) && !S_ISREG(mode) && ++ proc_is_chrooted(current)) { ++ gr_log_fs_generic(GR_DONT_AUDIT, GR_MKNOD_CHROOT_MSG, dentry, mnt); ++ return -EPERM; ++ } ++#endif ++ return 0; ++} ++ ++int ++gr_handle_chroot_mount(const struct dentry *dentry, ++ const struct vfsmount *mnt, const char *dev_name) ++{ ++#ifdef CONFIG_GRKERNSEC_CHROOT_MOUNT ++ if (grsec_enable_chroot_mount && proc_is_chrooted(current)) { ++ gr_log_str_fs(GR_DONT_AUDIT, GR_MOUNT_CHROOT_MSG, dev_name, dentry, mnt); ++ return -EPERM; ++ } ++#endif ++ return 0; ++} ++ ++int ++gr_handle_chroot_pivot(void) ++{ ++#ifdef CONFIG_GRKERNSEC_CHROOT_PIVOT ++ if (grsec_enable_chroot_pivot && proc_is_chrooted(current)) { ++ gr_log_noargs(GR_DONT_AUDIT, GR_PIVOT_CHROOT_MSG); ++ return -EPERM; ++ } ++#endif ++ return 0; ++} ++ ++int ++gr_handle_chroot_chroot(const struct dentry *dentry, const struct vfsmount *mnt) ++{ ++#ifdef CONFIG_GRKERNSEC_CHROOT_DOUBLE ++ if (grsec_enable_chroot_double && proc_is_chrooted(current) && ++ !gr_is_outside_chroot(dentry, mnt)) { ++ gr_log_fs_generic(GR_DONT_AUDIT, GR_CHROOT_CHROOT_MSG, dentry, mnt); ++ return -EPERM; ++ } ++#endif ++ return 0; ++} ++ ++void ++gr_handle_chroot_caps(struct task_struct *task) ++{ ++#ifdef CONFIG_GRKERNSEC_CHROOT_CAPS ++ if (grsec_enable_chroot_caps && proc_is_chrooted(task)) { ++ kernel_cap_t chroot_caps = GR_CHROOT_CAPS; ++ task->cap_permitted = ++ cap_drop(task->cap_permitted, chroot_caps); ++ task->cap_inheritable = ++ cap_drop(task->cap_inheritable, chroot_caps); ++ task->cap_effective = ++ cap_drop(task->cap_effective, chroot_caps); ++ } ++#endif ++ return; ++} ++ ++int ++gr_handle_chroot_sysctl(const int op) ++{ ++#ifdef CONFIG_GRKERNSEC_CHROOT_SYSCTL ++ if (grsec_enable_chroot_sysctl && proc_is_chrooted(current) ++ && (op & MAY_WRITE)) ++ return -EACCES; ++#endif ++ return 0; ++} ++ ++void ++gr_handle_chroot_chdir(struct path *path) ++{ ++#ifdef CONFIG_GRKERNSEC_CHROOT_CHDIR ++ if (grsec_enable_chroot_chdir) ++ set_fs_pwd(current->fs, path); ++#endif ++ return; ++} ++ ++int ++gr_handle_chroot_chmod(const struct dentry *dentry, ++ const struct vfsmount *mnt, const int mode) ++{ ++#ifdef CONFIG_GRKERNSEC_CHROOT_CHMOD ++ if (grsec_enable_chroot_chmod && ++ ((mode & S_ISUID) || ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP))) && ++ proc_is_chrooted(current)) { ++ gr_log_fs_generic(GR_DONT_AUDIT, GR_CHMOD_CHROOT_MSG, dentry, mnt); ++ return -EPERM; ++ } ++#endif ++ return 0; ++} ++ ++#ifdef CONFIG_SECURITY ++EXPORT_SYMBOL(gr_handle_chroot_caps); ++#endif +diff -urNp a/grsecurity/grsec_disabled.c b/grsecurity/grsec_disabled.c +--- a/grsecurity/grsec_disabled.c 1969-12-31 16:00:00.000000000 -0800 ++++ b/grsecurity/grsec_disabled.c 2009-05-24 18:10:25.261084895 -0700 +@@ -0,0 +1,418 @@ ++#include <linux/kernel.h> ++#include <linux/module.h> ++#include <linux/sched.h> ++#include <linux/file.h> ++#include <linux/fs.h> ++#include <linux/kdev_t.h> ++#include <linux/net.h> ++#include <linux/in.h> ++#include <linux/ip.h> ++#include <linux/skbuff.h> ++#include <linux/sysctl.h> ++ ++#ifdef CONFIG_PAX_HAVE_ACL_FLAGS ++void ++pax_set_initial_flags(struct linux_binprm *bprm) ++{ ++ return; ++} ++#endif ++ ++#ifdef CONFIG_SYSCTL ++__u32 ++gr_handle_sysctl(const struct ctl_table * table, const int op) ++{ ++ return 0; ++} ++#endif ++ ++int ++gr_acl_is_enabled(void) ++{ ++ return 0; ++} ++ ++int ++gr_handle_rawio(const struct inode *inode) ++{ ++ return 0; ++} ++ ++void ++gr_acl_handle_psacct(struct task_struct *task, const long code) ++{ ++ return; ++} ++ ++int ++gr_handle_ptrace(struct task_struct *task, const long request) ++{ ++ return 0; ++} ++ ++int ++gr_handle_proc_ptrace(struct task_struct *task) ++{ ++ return 0; ++} ++ ++void ++gr_learn_resource(const struct task_struct *task, ++ const int res, const unsigned long wanted, const int gt) ++{ ++ return; ++} ++ ++int ++gr_set_acls(const int type) ++{ ++ return 0; ++} ++ ++int ++gr_check_hidden_task(const struct task_struct *tsk) ++{ ++ return 0; ++} ++ ++int ++gr_check_protected_task(const struct task_struct *task) ++{ ++ return 0; ++} ++ ++void ++gr_copy_label(struct task_struct *tsk) ++{ ++ return; ++} ++ ++void ++gr_set_pax_flags(struct task_struct *task) ++{ ++ return; ++} ++ ++int ++gr_set_proc_label(const struct dentry *dentry, const struct vfsmount *mnt) ++{ ++ return 0; ++} ++ ++void ++gr_handle_delete(const ino_t ino, const dev_t dev) ++{ ++ return; ++} ++ ++void ++gr_handle_create(const struct dentry *dentry, const struct vfsmount *mnt) ++{ ++ return; ++} ++ ++void ++gr_handle_crash(struct task_struct *task, const int sig) ++{ ++ return; ++} ++ ++int ++gr_check_crash_exec(const struct file *filp) ++{ ++ return 0; ++} ++ ++int ++gr_check_crash_uid(const uid_t uid) ++{ ++ return 0; ++} ++ ++void ++gr_handle_rename(struct inode *old_dir, struct inode *new_dir, ++ struct dentry *old_dentry, ++ struct dentry *new_dentry, ++ struct vfsmount *mnt, const __u8 replace) ++{ ++ return; ++} ++ ++int ++gr_search_socket(const int family, const int type, const int protocol) ++{ ++ return 1; ++} ++ ++int ++gr_search_connectbind(const int mode, const struct socket *sock, ++ const struct sockaddr_in *addr) ++{ ++ return 0; ++} ++ ++int ++gr_task_is_capable(struct task_struct *task, const int cap) ++{ ++ return 1; ++} ++ ++int ++gr_is_capable_nolog(const int cap) ++{ ++ return 1; ++} ++ ++void ++gr_handle_alertkill(struct task_struct *task) ++{ ++ return; ++} ++ ++__u32 ++gr_acl_handle_execve(const struct dentry * dentry, const struct vfsmount * mnt) ++{ ++ return 1; ++} ++ ++__u32 ++gr_acl_handle_hidden_file(const struct dentry * dentry, ++ const struct vfsmount * mnt) ++{ ++ return 1; ++} ++ ++__u32 ++gr_acl_handle_open(const struct dentry * dentry, const struct vfsmount * mnt, ++ const int fmode) ++{ ++ return 1; ++} ++ ++__u32 ++gr_acl_handle_rmdir(const struct dentry * dentry, const struct vfsmount * mnt) ++{ ++ return 1; ++} ++ ++__u32 ++gr_acl_handle_unlink(const struct dentry * dentry, const struct vfsmount * mnt) ++{ ++ return 1; ++} ++ ++int ++gr_acl_handle_mmap(const struct file *file, const unsigned long prot, ++ unsigned int *vm_flags) ++{ ++ return 1; ++} ++ ++__u32 ++gr_acl_handle_truncate(const struct dentry * dentry, ++ const struct vfsmount * mnt) ++{ ++ return 1; ++} ++ ++__u32 ++gr_acl_handle_utime(const struct dentry * dentry, const struct vfsmount * mnt) ++{ ++ return 1; ++} ++ ++__u32 ++gr_acl_handle_access(const struct dentry * dentry, ++ const struct vfsmount * mnt, const int fmode) ++{ ++ return 1; ++} ++ ++__u32 ++gr_acl_handle_fchmod(const struct dentry * dentry, const struct vfsmount * mnt, ++ mode_t mode) ++{ ++ return 1; ++} ++ ++__u32 ++gr_acl_handle_chmod(const struct dentry * dentry, const struct vfsmount * mnt, ++ mode_t mode) ++{ ++ return 1; ++} ++ ++__u32 ++gr_acl_handle_chown(const struct dentry * dentry, const struct vfsmount * mnt) ++{ ++ return 1; ++} ++ ++void ++grsecurity_init(void) ++{ ++ return; ++} ++ ++__u32 ++gr_acl_handle_mknod(const struct dentry * new_dentry, ++ const struct dentry * parent_dentry, ++ const struct vfsmount * parent_mnt, ++ const int mode) ++{ ++ return 1; ++} ++ ++__u32 ++gr_acl_handle_mkdir(const struct dentry * new_dentry, ++ const struct dentry * parent_dentry, ++ const struct vfsmount * parent_mnt) ++{ ++ return 1; ++} ++ ++__u32 ++gr_acl_handle_symlink(const struct dentry * new_dentry, ++ const struct dentry * parent_dentry, ++ const struct vfsmount * parent_mnt, const char *from) ++{ ++ return 1; ++} ++ ++__u32 ++gr_acl_handle_link(const struct dentry * new_dentry, ++ const struct dentry * parent_dentry, ++ const struct vfsmount * parent_mnt, ++ const struct dentry * old_dentry, ++ const struct vfsmount * old_mnt, const char *to) ++{ ++ return 1; ++} ++ ++int ++gr_acl_handle_rename(const struct dentry *new_dentry, ++ const struct dentry *parent_dentry, ++ const struct vfsmount *parent_mnt, ++ const struct dentry *old_dentry, ++ const struct inode *old_parent_inode, ++ const struct vfsmount *old_mnt, const char *newname) ++{ ++ return 0; ++} ++ ++int ++gr_acl_handle_filldir(const struct file *file, const char *name, ++ const int namelen, const ino_t ino) ++{ ++ return 1; ++} ++ ++int ++gr_handle_shmat(const pid_t shm_cprid, const pid_t shm_lapid, ++ const time_t shm_createtime, const uid_t cuid, const int shmid) ++{ ++ return 1; ++} ++ ++int ++gr_search_bind(const struct socket *sock, const struct sockaddr_in *addr) ++{ ++ return 0; ++} ++ ++int ++gr_search_accept(const struct socket *sock) ++{ ++ return 0; ++} ++ ++int ++gr_search_listen(const struct socket *sock) ++{ ++ return 0; ++} ++ ++int ++gr_search_connect(const struct socket *sock, const struct sockaddr_in *addr) ++{ ++ return 0; ++} ++ ++__u32 ++gr_acl_handle_unix(const struct dentry * dentry, const struct vfsmount * mnt) ++{ ++ return 1; ++} ++ ++__u32 ++gr_acl_handle_creat(const struct dentry * dentry, ++ const struct dentry * p_dentry, ++ const struct vfsmount * p_mnt, const int fmode, ++ const int imode) ++{ ++ return 1; ++} ++ ++void ++gr_acl_handle_exit(void) ++{ ++ return; ++} ++ ++int ++gr_acl_handle_mprotect(const struct file *file, const unsigned long prot) ++{ ++ return 1; ++} ++ ++void ++gr_set_role_label(const uid_t uid, const gid_t gid) ++{ ++ return; ++} ++ ++int ++gr_acl_handle_procpidmem(const struct task_struct *task) ++{ ++ return 0; ++} ++ ++int ++gr_search_udp_recvmsg(const struct sock *sk, const struct sk_buff *skb) ++{ ++ return 0; ++} ++ ++int ++gr_search_udp_sendmsg(const struct sock *sk, const struct sockaddr_in *addr) ++{ ++ return 0; ++} ++ ++void ++gr_set_kernel_label(struct task_struct *task) ++{ ++ return; ++} ++ ++int ++gr_check_user_change(int real, int effective, int fs) ++{ ++ return 0; ++} ++ ++int ++gr_check_group_change(int real, int effective, int fs) ++{ ++ return 0; ++} ++ ++ ++EXPORT_SYMBOL(gr_task_is_capable); ++EXPORT_SYMBOL(gr_is_capable_nolog); ++EXPORT_SYMBOL(gr_learn_resource); ++EXPORT_SYMBOL(gr_set_kernel_label); ++#ifdef CONFIG_SECURITY ++EXPORT_SYMBOL(gr_check_user_change); ++EXPORT_SYMBOL(gr_check_group_change); ++#endif +diff -urNp a/grsecurity/grsec_exec.c b/grsecurity/grsec_exec.c +--- a/grsecurity/grsec_exec.c 1969-12-31 16:00:00.000000000 -0800 ++++ b/grsecurity/grsec_exec.c 2009-05-24 18:10:25.261084895 -0700 +@@ -0,0 +1,88 @@ ++#include <linux/kernel.h> ++#include <linux/sched.h> ++#include <linux/file.h> ++#include <linux/binfmts.h> ++#include <linux/smp_lock.h> ++#include <linux/fs.h> ++#include <linux/types.h> ++#include <linux/grdefs.h> ++#include <linux/grinternal.h> ++#include <linux/capability.h> ++ ++#include <asm/uaccess.h> ++ ++#ifdef CONFIG_GRKERNSEC_EXECLOG ++static char gr_exec_arg_buf[132]; ++static DECLARE_MUTEX(gr_exec_arg_sem); ++#endif ++ ++int ++gr_handle_nproc(void) ++{ ++#ifdef CONFIG_GRKERNSEC_EXECVE ++ if (grsec_enable_execve && current->user && ++ (atomic_read(¤t->user->processes) > ++ current->signal->rlim[RLIMIT_NPROC].rlim_cur) && ++ !capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE)) { ++ gr_log_noargs(GR_DONT_AUDIT, GR_NPROC_MSG); ++ return -EAGAIN; ++ } ++#endif ++ return 0; ++} ++ ++void ++gr_handle_exec_args(struct linux_binprm *bprm, const char __user *__user *argv) ++{ ++#ifdef CONFIG_GRKERNSEC_EXECLOG ++ char *grarg = gr_exec_arg_buf; ++ unsigned int i, x, execlen = 0; ++ char c; ++ ++ if (!((grsec_enable_execlog && grsec_enable_group && ++ in_group_p(grsec_audit_gid)) ++ || (grsec_enable_execlog && !grsec_enable_group))) ++ return; ++ ++ down(&gr_exec_arg_sem); ++ memset(grarg, 0, sizeof(gr_exec_arg_buf)); ++ ++ if (unlikely(argv == NULL)) ++ goto log; ++ ++ for (i = 0; i < bprm->argc && execlen < 128; i++) { ++ const char __user *p; ++ unsigned int len; ++ ++ if (copy_from_user(&p, argv + i, sizeof(p))) ++ goto log; ++ if (!p) ++ goto log; ++ len = strnlen_user(p, 128 - execlen); ++ if (len > 128 - execlen) ++ len = 128 - execlen; ++ else if (len > 0) ++ len--; ++ if (copy_from_user(grarg + execlen, p, len)) ++ goto log; ++ ++ /* rewrite unprintable characters */ ++ for (x = 0; x < len; x++) { ++ c = *(grarg + execlen + x); ++ if (c < 32 || c > 126) ++ *(grarg + execlen + x) = ' '; ++ } ++ ++ execlen += len; ++ *(grarg + execlen) = ' '; ++ *(grarg + execlen + 1) = '\0'; ++ execlen++; ++ } ++ ++ log: ++ gr_log_fs_str(GR_DO_AUDIT, GR_EXEC_AUDIT_MSG, bprm->file->f_path.dentry, ++ bprm->file->f_path.mnt, grarg); ++ up(&gr_exec_arg_sem); ++#endif ++ return; ++} +diff -urNp a/grsecurity/grsec_fifo.c b/grsecurity/grsec_fifo.c +--- a/grsecurity/grsec_fifo.c 1969-12-31 16:00:00.000000000 -0800 ++++ b/grsecurity/grsec_fifo.c 2009-05-24 18:10:25.262088236 -0700 +@@ -0,0 +1,22 @@ ++#include <linux/kernel.h> ++#include <linux/sched.h> ++#include <linux/fs.h> ++#include <linux/file.h> ++#include <linux/grinternal.h> ++ ++int ++gr_handle_fifo(const struct dentry *dentry, const struct vfsmount *mnt, ++ const struct dentry *dir, const int flag, const int acc_mode) ++{ ++#ifdef CONFIG_GRKERNSEC_FIFO ++ if (grsec_enable_fifo && S_ISFIFO(dentry->d_inode->i_mode) && ++ !(flag & O_EXCL) && (dir->d_inode->i_mode & S_ISVTX) && ++ (dentry->d_inode->i_uid != dir->d_inode->i_uid) && ++ (current->fsuid != dentry->d_inode->i_uid)) { ++ if (!generic_permission(dentry->d_inode, acc_mode, NULL)) ++ gr_log_fs_int2(GR_DONT_AUDIT, GR_FIFO_MSG, dentry, mnt, dentry->d_inode->i_uid, dentry->d_inode->i_gid); ++ return -EACCES; ++ } ++#endif ++ return 0; ++} +diff -urNp a/grsecurity/grsec_fork.c b/grsecurity/grsec_fork.c +--- a/grsecurity/grsec_fork.c 1969-12-31 16:00:00.000000000 -0800 ++++ b/grsecurity/grsec_fork.c 2009-05-24 18:10:25.262088236 -0700 +@@ -0,0 +1,15 @@ ++#include <linux/kernel.h> ++#include <linux/sched.h> ++#include <linux/grsecurity.h> ++#include <linux/grinternal.h> ++#include <linux/errno.h> ++ ++void ++gr_log_forkfail(const int retval) ++{ ++#ifdef CONFIG_GRKERNSEC_FORKFAIL ++ if (grsec_enable_forkfail && retval != -ERESTARTNOINTR) ++ gr_log_int(GR_DONT_AUDIT, GR_FAILFORK_MSG, retval); ++#endif ++ return; ++} +diff -urNp a/grsecurity/grsec_init.c b/grsecurity/grsec_init.c +--- a/grsecurity/grsec_init.c 1969-12-31 16:00:00.000000000 -0800 ++++ b/grsecurity/grsec_init.c 2009-05-24 18:10:25.263084663 -0700 +@@ -0,0 +1,230 @@ ++#include <linux/kernel.h> ++#include <linux/sched.h> ++#include <linux/mm.h> ++#include <linux/smp_lock.h> ++#include <linux/gracl.h> ++#include <linux/slab.h> ++#include <linux/vmalloc.h> ++#include <linux/percpu.h> ++ ++int grsec_enable_link; ++int grsec_enable_dmesg; ++int grsec_enable_fifo; ++int grsec_enable_execve; ++int grsec_enable_execlog; ++int grsec_enable_signal; ++int grsec_enable_forkfail; ++int grsec_enable_time; ++int grsec_enable_audit_textrel; ++int grsec_enable_group; ++int grsec_audit_gid; ++int grsec_enable_chdir; ++int grsec_enable_audit_ipc; ++int grsec_enable_mount; ++int grsec_enable_chroot_findtask; ++int grsec_enable_chroot_mount; ++int grsec_enable_chroot_shmat; ++int grsec_enable_chroot_fchdir; ++int grsec_enable_chroot_double; ++int grsec_enable_chroot_pivot; ++int grsec_enable_chroot_chdir; ++int grsec_enable_chroot_chmod; ++int grsec_enable_chroot_mknod; ++int grsec_enable_chroot_nice; ++int grsec_enable_chroot_execlog; ++int grsec_enable_chroot_caps; ++int grsec_enable_chroot_sysctl; ++int grsec_enable_chroot_unix; ++int grsec_enable_tpe; ++int grsec_tpe_gid; ++int grsec_enable_tpe_all; ++int grsec_enable_socket_all; ++int grsec_socket_all_gid; ++int grsec_enable_socket_client; ++int grsec_socket_client_gid; ++int grsec_enable_socket_server; ++int grsec_socket_server_gid; ++int grsec_resource_logging; ++int grsec_lock; ++ ++DEFINE_SPINLOCK(grsec_alert_lock); ++unsigned long grsec_alert_wtime = 0; ++unsigned long grsec_alert_fyet = 0; ++ ++DEFINE_SPINLOCK(grsec_audit_lock); ++ ++DEFINE_RWLOCK(grsec_exec_file_lock); ++ ++char *gr_shared_page[4]; ++ ++char *gr_alert_log_fmt; ++char *gr_audit_log_fmt; ++char *gr_alert_log_buf; ++char *gr_audit_log_buf; ++ ++extern struct gr_arg *gr_usermode; ++extern unsigned char *gr_system_salt; ++extern unsigned char *gr_system_sum; ++ ++void ++grsecurity_init(void) ++{ ++ int j; ++ /* create the per-cpu shared pages */ ++ ++#ifdef CONFIG_X86 ++ memset((char *)(0x41a + PAGE_OFFSET), 0, 36); ++#endif ++ ++ for (j = 0; j < 4; j++) { ++ gr_shared_page[j] = (char *)__alloc_percpu(PAGE_SIZE); ++ if (gr_shared_page[j] == NULL) { ++ panic("Unable to allocate grsecurity shared page"); ++ return; ++ } ++ } ++ ++ /* allocate log buffers */ ++ gr_alert_log_fmt = kmalloc(512, GFP_KERNEL); ++ if (!gr_alert_log_fmt) { ++ panic("Unable to allocate grsecurity alert log format buffer"); ++ return; ++ } ++ gr_audit_log_fmt = kmalloc(512, GFP_KERNEL); ++ if (!gr_audit_log_fmt) { ++ panic("Unable to allocate grsecurity audit log format buffer"); ++ return; ++ } ++ gr_alert_log_buf = (char *) get_zeroed_page(GFP_KERNEL); ++ if (!gr_alert_log_buf) { ++ panic("Unable to allocate grsecurity alert log buffer"); ++ return; ++ } ++ gr_audit_log_buf = (char *) get_zeroed_page(GFP_KERNEL); ++ if (!gr_audit_log_buf) { ++ panic("Unable to allocate grsecurity audit log buffer"); ++ return; ++ } ++ ++ /* allocate memory for authentication structure */ ++ gr_usermode = kmalloc(sizeof(struct gr_arg), GFP_KERNEL); ++ gr_system_salt = kmalloc(GR_SALT_LEN, GFP_KERNEL); ++ gr_system_sum = kmalloc(GR_SHA_LEN, GFP_KERNEL); ++ ++ if (!gr_usermode || !gr_system_salt || !gr_system_sum) { ++ panic("Unable to allocate grsecurity authentication structure"); ++ return; ++ } ++ ++#if !defined(CONFIG_GRKERNSEC_SYSCTL) || defined(CONFIG_GRKERNSEC_SYSCTL_ON) ++#ifndef CONFIG_GRKERNSEC_SYSCTL ++ grsec_lock = 1; ++#endif ++#ifdef CONFIG_GRKERNSEC_AUDIT_TEXTREL ++ grsec_enable_audit_textrel = 1; ++#endif ++#ifdef CONFIG_GRKERNSEC_AUDIT_GROUP ++ grsec_enable_group = 1; ++ grsec_audit_gid = CONFIG_GRKERNSEC_AUDIT_GID; ++#endif ++#ifdef CONFIG_GRKERNSEC_AUDIT_CHDIR ++ grsec_enable_chdir = 1; ++#endif ++#ifdef CONFIG_GRKERNSEC_AUDIT_IPC ++ grsec_enable_audit_ipc = 1; ++#endif ++#ifdef CONFIG_GRKERNSEC_AUDIT_MOUNT ++ grsec_enable_mount = 1; ++#endif ++#ifdef CONFIG_GRKERNSEC_LINK ++ grsec_enable_link = 1; ++#endif ++#ifdef CONFIG_GRKERNSEC_DMESG ++ grsec_enable_dmesg = 1; ++#endif ++#ifdef CONFIG_GRKERNSEC_FIFO ++ grsec_enable_fifo = 1; ++#endif ++#ifdef CONFIG_GRKERNSEC_EXECVE ++ grsec_enable_execve = 1; ++#endif ++#ifdef CONFIG_GRKERNSEC_EXECLOG ++ grsec_enable_execlog = 1; ++#endif ++#ifdef CONFIG_GRKERNSEC_SIGNAL ++ grsec_enable_signal = 1; ++#endif ++#ifdef CONFIG_GRKERNSEC_FORKFAIL ++ grsec_enable_forkfail = 1; ++#endif ++#ifdef CONFIG_GRKERNSEC_TIME ++ grsec_enable_time = 1; ++#endif ++#ifdef CONFIG_GRKERNSEC_RESLOG ++ grsec_resource_logging = 1; ++#endif ++#ifdef CONFIG_GRKERNSEC_CHROOT_FINDTASK ++ grsec_enable_chroot_findtask = 1; ++#endif ++#ifdef CONFIG_GRKERNSEC_CHROOT_UNIX ++ grsec_enable_chroot_unix = 1; ++#endif ++#ifdef CONFIG_GRKERNSEC_CHROOT_MOUNT ++ grsec_enable_chroot_mount = 1; ++#endif ++#ifdef CONFIG_GRKERNSEC_CHROOT_FCHDIR ++ grsec_enable_chroot_fchdir = 1; ++#endif ++#ifdef CONFIG_GRKERNSEC_CHROOT_SHMAT ++ grsec_enable_chroot_shmat = 1; ++#endif ++#ifdef CONFIG_GRKERNSEC_CHROOT_DOUBLE ++ grsec_enable_chroot_double = 1; ++#endif ++#ifdef CONFIG_GRKERNSEC_CHROOT_PIVOT ++ grsec_enable_chroot_pivot = 1; ++#endif ++#ifdef CONFIG_GRKERNSEC_CHROOT_CHDIR ++ grsec_enable_chroot_chdir = 1; ++#endif ++#ifdef CONFIG_GRKERNSEC_CHROOT_CHMOD ++ grsec_enable_chroot_chmod = 1; ++#endif ++#ifdef CONFIG_GRKERNSEC_CHROOT_MKNOD ++ grsec_enable_chroot_mknod = 1; ++#endif ++#ifdef CONFIG_GRKERNSEC_CHROOT_NICE ++ grsec_enable_chroot_nice = 1; ++#endif ++#ifdef CONFIG_GRKERNSEC_CHROOT_EXECLOG ++ grsec_enable_chroot_execlog = 1; ++#endif ++#ifdef CONFIG_GRKERNSEC_CHROOT_CAPS ++ grsec_enable_chroot_caps = 1; ++#endif ++#ifdef CONFIG_GRKERNSEC_CHROOT_SYSCTL ++ grsec_enable_chroot_sysctl = 1; ++#endif ++#ifdef CONFIG_GRKERNSEC_TPE ++ grsec_enable_tpe = 1; ++ grsec_tpe_gid = CONFIG_GRKERNSEC_TPE_GID; ++#ifdef CONFIG_GRKERNSEC_TPE_ALL ++ grsec_enable_tpe_all = 1; ++#endif ++#endif ++#ifdef CONFIG_GRKERNSEC_SOCKET_ALL ++ grsec_enable_socket_all = 1; ++ grsec_socket_all_gid = CONFIG_GRKERNSEC_SOCKET_ALL_GID; ++#endif ++#ifdef CONFIG_GRKERNSEC_SOCKET_CLIENT ++ grsec_enable_socket_client = 1; ++ grsec_socket_client_gid = CONFIG_GRKERNSEC_SOCKET_CLIENT_GID; ++#endif ++#ifdef CONFIG_GRKERNSEC_SOCKET_SERVER ++ grsec_enable_socket_server = 1; ++ grsec_socket_server_gid = CONFIG_GRKERNSEC_SOCKET_SERVER_GID; ++#endif ++#endif ++ ++ return; ++} +diff -urNp a/grsecurity/grsec_ipc.c b/grsecurity/grsec_ipc.c +--- a/grsecurity/grsec_ipc.c 1969-12-31 16:00:00.000000000 -0800 ++++ b/grsecurity/grsec_ipc.c 2009-05-24 18:10:25.263084663 -0700 +@@ -0,0 +1,81 @@ ++#include <linux/kernel.h> ++#include <linux/sched.h> ++#include <linux/types.h> ++#include <linux/ipc.h> ++#include <linux/grsecurity.h> ++#include <linux/grinternal.h> ++ ++void ++gr_log_msgget(const int ret, const int msgflg) ++{ ++#ifdef CONFIG_GRKERNSEC_AUDIT_IPC ++ if (((grsec_enable_group && in_group_p(grsec_audit_gid) && ++ grsec_enable_audit_ipc) || (grsec_enable_audit_ipc && ++ !grsec_enable_group)) && (ret >= 0) ++ && (msgflg & IPC_CREAT)) ++ gr_log_noargs(GR_DO_AUDIT, GR_MSGQ_AUDIT_MSG); ++#endif ++ return; ++} ++ ++void ++gr_log_msgrm(const uid_t uid, const uid_t cuid) ++{ ++#ifdef CONFIG_GRKERNSEC_AUDIT_IPC ++ if ((grsec_enable_group && in_group_p(grsec_audit_gid) && ++ grsec_enable_audit_ipc) || ++ (grsec_enable_audit_ipc && !grsec_enable_group)) ++ gr_log_int_int(GR_DO_AUDIT, GR_MSGQR_AUDIT_MSG, uid, cuid); ++#endif ++ return; ++} ++ ++void ++gr_log_semget(const int err, const int semflg) ++{ ++#ifdef CONFIG_GRKERNSEC_AUDIT_IPC ++ if (((grsec_enable_group && in_group_p(grsec_audit_gid) && ++ grsec_enable_audit_ipc) || (grsec_enable_audit_ipc && ++ !grsec_enable_group)) && (err >= 0) ++ && (semflg & IPC_CREAT)) ++ gr_log_noargs(GR_DO_AUDIT, GR_SEM_AUDIT_MSG); ++#endif ++ return; ++} ++ ++void ++gr_log_semrm(const uid_t uid, const uid_t cuid) ++{ ++#ifdef CONFIG_GRKERNSEC_AUDIT_IPC ++ if ((grsec_enable_group && in_group_p(grsec_audit_gid) && ++ grsec_enable_audit_ipc) || ++ (grsec_enable_audit_ipc && !grsec_enable_group)) ++ gr_log_int_int(GR_DO_AUDIT, GR_SEMR_AUDIT_MSG, uid, cuid); ++#endif ++ return; ++} ++ ++void ++gr_log_shmget(const int err, const int shmflg, const size_t size) ++{ ++#ifdef CONFIG_GRKERNSEC_AUDIT_IPC ++ if (((grsec_enable_group && in_group_p(grsec_audit_gid) && ++ grsec_enable_audit_ipc) || (grsec_enable_audit_ipc && ++ !grsec_enable_group)) && (err >= 0) ++ && (shmflg & IPC_CREAT)) ++ gr_log_int(GR_DO_AUDIT, GR_SHM_AUDIT_MSG, size); ++#endif ++ return; ++} ++ ++void ++gr_log_shmrm(const uid_t uid, const uid_t cuid) ++{ ++#ifdef CONFIG_GRKERNSEC_AUDIT_IPC ++ if ((grsec_enable_group && in_group_p(grsec_audit_gid) && ++ grsec_enable_audit_ipc) || ++ (grsec_enable_audit_ipc && !grsec_enable_group)) ++ gr_log_int_int(GR_DO_AUDIT, GR_SHMR_AUDIT_MSG, uid, cuid); ++#endif ++ return; ++} +diff -urNp a/grsecurity/grsec_link.c b/grsecurity/grsec_link.c +--- a/grsecurity/grsec_link.c 1969-12-31 16:00:00.000000000 -0800 ++++ b/grsecurity/grsec_link.c 2009-05-24 18:10:25.263084663 -0700 +@@ -0,0 +1,39 @@ ++#include <linux/kernel.h> ++#include <linux/sched.h> ++#include <linux/fs.h> ++#include <linux/file.h> ++#include <linux/grinternal.h> ++ ++int ++gr_handle_follow_link(const struct inode *parent, ++ const struct inode *inode, ++ const struct dentry *dentry, const struct vfsmount *mnt) ++{ ++#ifdef CONFIG_GRKERNSEC_LINK ++ if (grsec_enable_link && S_ISLNK(inode->i_mode) && ++ (parent->i_mode & S_ISVTX) && (parent->i_uid != inode->i_uid) && ++ (parent->i_mode & S_IWOTH) && (current->fsuid != inode->i_uid)) { ++ gr_log_fs_int2(GR_DONT_AUDIT, GR_SYMLINK_MSG, dentry, mnt, inode->i_uid, inode->i_gid); ++ return -EACCES; ++ } ++#endif ++ return 0; ++} ++ ++int ++gr_handle_hardlink(const struct dentry *dentry, ++ const struct vfsmount *mnt, ++ struct inode *inode, const int mode, const char *to) ++{ ++#ifdef CONFIG_GRKERNSEC_LINK ++ if (grsec_enable_link && current->fsuid != inode->i_uid && ++ (!S_ISREG(mode) || (mode & S_ISUID) || ++ ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) || ++ (generic_permission(inode, MAY_READ | MAY_WRITE, NULL))) && ++ !capable(CAP_FOWNER) && current->uid) { ++ gr_log_fs_int2_str(GR_DONT_AUDIT, GR_HARDLINK_MSG, dentry, mnt, inode->i_uid, inode->i_gid, to); ++ return -EPERM; ++ } ++#endif ++ return 0; ++} +diff -urNp a/grsecurity/grsec_log.c b/grsecurity/grsec_log.c +--- a/grsecurity/grsec_log.c 1969-12-31 16:00:00.000000000 -0800 ++++ b/grsecurity/grsec_log.c 2009-05-24 18:10:25.264085908 -0700 +@@ -0,0 +1,269 @@ ++#include <linux/kernel.h> ++#include <linux/sched.h> ++#include <linux/file.h> ++#include <linux/tty.h> ++#include <linux/fs.h> ++#include <linux/grinternal.h> ++ ++#define BEGIN_LOCKS(x) \ ++ read_lock(&tasklist_lock); \ ++ read_lock(&grsec_exec_file_lock); \ ++ if (x != GR_DO_AUDIT) \ ++ spin_lock(&grsec_alert_lock); \ ++ else \ ++ spin_lock(&grsec_audit_lock) ++ ++#define END_LOCKS(x) \ ++ if (x != GR_DO_AUDIT) \ ++ spin_unlock(&grsec_alert_lock); \ ++ else \ ++ spin_unlock(&grsec_audit_lock); \ ++ read_unlock(&grsec_exec_file_lock); \ ++ read_unlock(&tasklist_lock); \ ++ if (x == GR_DONT_AUDIT) \ ++ gr_handle_alertkill(current) ++ ++enum { ++ FLOODING, ++ NO_FLOODING ++}; ++ ++extern char *gr_alert_log_fmt; ++extern char *gr_audit_log_fmt; ++extern char *gr_alert_log_buf; ++extern char *gr_audit_log_buf; ++ ++static int gr_log_start(int audit) ++{ ++ char *loglevel = (audit == GR_DO_AUDIT) ? KERN_INFO : KERN_ALERT; ++ char *fmt = (audit == GR_DO_AUDIT) ? gr_audit_log_fmt : gr_alert_log_fmt; ++ char *buf = (audit == GR_DO_AUDIT) ? gr_audit_log_buf : gr_alert_log_buf; ++ ++ if (audit == GR_DO_AUDIT) ++ goto set_fmt; ++ ++ if (!grsec_alert_wtime || jiffies - grsec_alert_wtime > CONFIG_GRKERNSEC_FLOODTIME * HZ) { ++ grsec_alert_wtime = jiffies; ++ grsec_alert_fyet = 0; ++ } else if ((jiffies - grsec_alert_wtime < CONFIG_GRKERNSEC_FLOODTIME * HZ) && (grsec_alert_fyet < CONFIG_GRKERNSEC_FLOODBURST)) { ++ grsec_alert_fyet++; ++ } else if (grsec_alert_fyet == CONFIG_GRKERNSEC_FLOODBURST) { ++ grsec_alert_wtime = jiffies; ++ grsec_alert_fyet++; ++ printk(KERN_ALERT "grsec: more alerts, logging disabled for %d seconds\n", CONFIG_GRKERNSEC_FLOODTIME); ++ return FLOODING; ++ } else return FLOODING; ++ ++set_fmt: ++ memset(buf, 0, PAGE_SIZE); ++ if (current->signal->curr_ip && gr_acl_is_enabled()) { ++ sprintf(fmt, "%s%s", loglevel, "grsec: From %u.%u.%u.%u: (%.64s:%c:%.950s) "); ++ snprintf(buf, PAGE_SIZE - 1, fmt, NIPQUAD(current->signal->curr_ip), current->role->rolename, gr_roletype_to_char(), current->acl->filename); ++ } else if (current->signal->curr_ip) { ++ sprintf(fmt, "%s%s", loglevel, "grsec: From %u.%u.%u.%u: "); ++ snprintf(buf, PAGE_SIZE - 1, fmt, NIPQUAD(current->signal->curr_ip)); ++ } else if (gr_acl_is_enabled()) { ++ sprintf(fmt, "%s%s", loglevel, "grsec: (%.64s:%c:%.950s) "); ++ snprintf(buf, PAGE_SIZE - 1, fmt, current->role->rolename, gr_roletype_to_char(), current->acl->filename); ++ } else { ++ sprintf(fmt, "%s%s", loglevel, "grsec: "); ++ strcpy(buf, fmt); ++ } ++ ++ return NO_FLOODING; ++} ++ ++static void gr_log_middle(int audit, const char *msg, va_list ap) ++{ ++ char *buf = (audit == GR_DO_AUDIT) ? gr_audit_log_buf : gr_alert_log_buf; ++ unsigned int len = strlen(buf); ++ ++ vsnprintf(buf + len, PAGE_SIZE - len - 1, msg, ap); ++ ++ return; ++} ++ ++static void gr_log_middle_varargs(int audit, const char *msg, ...) ++{ ++ char *buf = (audit == GR_DO_AUDIT) ? gr_audit_log_buf : gr_alert_log_buf; ++ unsigned int len = strlen(buf); ++ va_list ap; ++ ++ va_start(ap, msg); ++ vsnprintf(buf + len, PAGE_SIZE - len - 1, msg, ap); ++ va_end(ap); ++ ++ return; ++} ++ ++static void gr_log_end(int audit) ++{ ++ char *buf = (audit == GR_DO_AUDIT) ? gr_audit_log_buf : gr_alert_log_buf; ++ unsigned int len = strlen(buf); ++ ++ snprintf(buf + len, PAGE_SIZE - len - 1, DEFAULTSECMSG, DEFAULTSECARGS(current)); ++ printk("%s\n", buf); ++ ++ return; ++} ++ ++void gr_log_varargs(int audit, const char *msg, int argtypes, ...) ++{ ++ int logtype; ++ char *result = (audit == GR_DO_AUDIT) ? "successful" : "denied"; ++ char *str1, *str2, *str3; ++ int num1, num2; ++ unsigned long ulong1, ulong2; ++ struct dentry *dentry; ++ struct vfsmount *mnt; ++ struct file *file; ++ struct task_struct *task; ++ va_list ap; ++ ++ BEGIN_LOCKS(audit); ++ logtype = gr_log_start(audit); ++ if (logtype == FLOODING) { ++ END_LOCKS(audit); ++ return; ++ } ++ va_start(ap, argtypes); ++ switch (argtypes) { ++ case GR_TTYSNIFF: ++ task = va_arg(ap, struct task_struct *); ++ gr_log_middle_varargs(audit, msg, NIPQUAD(task->signal->curr_ip), gr_task_fullpath0(task), task->comm, task->pid, gr_parent_task_fullpath0(task), task->parent->comm, task->parent->pid); ++ break; ++ case GR_SYSCTL_HIDDEN: ++ str1 = va_arg(ap, char *); ++ gr_log_middle_varargs(audit, msg, result, str1); ++ break; ++ case GR_RBAC: ++ dentry = va_arg(ap, struct dentry *); ++ mnt = va_arg(ap, struct vfsmount *); ++ gr_log_middle_varargs(audit, msg, result, gr_to_filename(dentry, mnt)); ++ break; ++ case GR_RBAC_STR: ++ dentry = va_arg(ap, struct dentry *); ++ mnt = va_arg(ap, struct vfsmount *); ++ str1 = va_arg(ap, char *); ++ gr_log_middle_varargs(audit, msg, result, gr_to_filename(dentry, mnt), str1); ++ break; ++ case GR_STR_RBAC: ++ str1 = va_arg(ap, char *); ++ dentry = va_arg(ap, struct dentry *); ++ mnt = va_arg(ap, struct vfsmount *); ++ gr_log_middle_varargs(audit, msg, result, str1, gr_to_filename(dentry, mnt)); ++ break; ++ case GR_RBAC_MODE2: ++ dentry = va_arg(ap, struct dentry *); ++ mnt = va_arg(ap, struct vfsmount *); ++ str1 = va_arg(ap, char *); ++ str2 = va_arg(ap, char *); ++ gr_log_middle_varargs(audit, msg, result, gr_to_filename(dentry, mnt), str1, str2); ++ break; ++ case GR_RBAC_MODE3: ++ dentry = va_arg(ap, struct dentry *); ++ mnt = va_arg(ap, struct vfsmount *); ++ str1 = va_arg(ap, char *); ++ str2 = va_arg(ap, char *); ++ str3 = va_arg(ap, char *); ++ gr_log_middle_varargs(audit, msg, result, gr_to_filename(dentry, mnt), str1, str2, str3); ++ break; ++ case GR_FILENAME: ++ dentry = va_arg(ap, struct dentry *); ++ mnt = va_arg(ap, struct vfsmount *); ++ gr_log_middle_varargs(audit, msg, gr_to_filename(dentry, mnt)); ++ break; ++ case GR_STR_FILENAME: ++ str1 = va_arg(ap, char *); ++ dentry = va_arg(ap, struct dentry *); ++ mnt = va_arg(ap, struct vfsmount *); ++ gr_log_middle_varargs(audit, msg, str1, gr_to_filename(dentry, mnt)); ++ break; ++ case GR_FILENAME_STR: ++ dentry = va_arg(ap, struct dentry *); ++ mnt = va_arg(ap, struct vfsmount *); ++ str1 = va_arg(ap, char *); ++ gr_log_middle_varargs(audit, msg, gr_to_filename(dentry, mnt), str1); ++ break; ++ case GR_FILENAME_TWO_INT: ++ dentry = va_arg(ap, struct dentry *); ++ mnt = va_arg(ap, struct vfsmount *); ++ num1 = va_arg(ap, int); ++ num2 = va_arg(ap, int); ++ gr_log_middle_varargs(audit, msg, gr_to_filename(dentry, mnt), num1, num2); ++ break; ++ case GR_FILENAME_TWO_INT_STR: ++ dentry = va_arg(ap, struct dentry *); ++ mnt = va_arg(ap, struct vfsmount *); ++ num1 = va_arg(ap, int); ++ num2 = va_arg(ap, int); ++ str1 = va_arg(ap, char *); ++ gr_log_middle_varargs(audit, msg, gr_to_filename(dentry, mnt), num1, num2, str1); ++ break; ++ case GR_TEXTREL: ++ file = va_arg(ap, struct file *); ++ ulong1 = va_arg(ap, unsigned long); ++ ulong2 = va_arg(ap, unsigned long); ++ gr_log_middle_varargs(audit, msg, file ? gr_to_filename(file->f_path.dentry, file->f_path.mnt) : "<anonymous mapping>", ulong1, ulong2); ++ break; ++ case GR_PTRACE: ++ task = va_arg(ap, struct task_struct *); ++ gr_log_middle_varargs(audit, msg, task->exec_file ? gr_to_filename(task->exec_file->f_path.dentry, task->exec_file->f_path.mnt) : "(none)", task->comm, task->pid); ++ break; ++ case GR_RESOURCE: ++ task = va_arg(ap, struct task_struct *); ++ ulong1 = va_arg(ap, unsigned long); ++ str1 = va_arg(ap, char *); ++ ulong2 = va_arg(ap, unsigned long); ++ gr_log_middle_varargs(audit, msg, ulong1, str1, ulong2, gr_task_fullpath(task), task->comm, task->pid, task->uid, task->euid, task->gid, task->egid, gr_parent_task_fullpath(task), task->parent->comm, task->parent->pid, task->parent->uid, task->parent->euid, task->parent->gid, task->parent->egid); ++ break; ++ case GR_CAP: ++ task = va_arg(ap, struct task_struct *); ++ str1 = va_arg(ap, char *); ++ gr_log_middle_varargs(audit, msg, str1, gr_task_fullpath(task), task->comm, task->pid, task->uid, task->euid, task->gid, task->egid, gr_parent_task_fullpath(task), task->parent->comm, task->parent->pid, task->parent->uid, task->parent->euid, task->parent->gid, task->parent->egid); ++ break; ++ case GR_SIG: ++ task = va_arg(ap, struct task_struct *); ++ num1 = va_arg(ap, int); ++ gr_log_middle_varargs(audit, msg, num1, gr_task_fullpath0(task), task->comm, task->pid, task->uid, task->euid, task->gid, task->egid, gr_parent_task_fullpath0(task), task->parent->comm, task->parent->pid, task->parent->uid, task->parent->euid, task->parent->gid, task->parent->egid); ++ break; ++ case GR_CRASH1: ++ task = va_arg(ap, struct task_struct *); ++ ulong1 = va_arg(ap, unsigned long); ++ gr_log_middle_varargs(audit, msg, gr_task_fullpath(task), task->comm, task->pid, task->uid, task->euid, task->gid, task->egid, gr_parent_task_fullpath(task), task->parent->comm, task->parent->pid, task->parent->uid, task->parent->euid, task->parent->gid, task->parent->egid, task->uid, ulong1); ++ break; ++ case GR_CRASH2: ++ task = va_arg(ap, struct task_struct *); ++ ulong1 = va_arg(ap, unsigned long); ++ gr_log_middle_varargs(audit, msg, gr_task_fullpath(task), task->comm, task->pid, task->uid, task->euid, task->gid, task->egid, gr_parent_task_fullpath(task), task->parent->comm, task->parent->pid, task->parent->uid, task->parent->euid, task->parent->gid, task->parent->egid, ulong1); ++ break; ++ case GR_PSACCT: ++ { ++ unsigned int wday, cday; ++ __u8 whr, chr; ++ __u8 wmin, cmin; ++ __u8 wsec, csec; ++ char cur_tty[64] = { 0 }; ++ char parent_tty[64] = { 0 }; ++ ++ task = va_arg(ap, struct task_struct *); ++ wday = va_arg(ap, unsigned int); ++ cday = va_arg(ap, unsigned int); ++ whr = va_arg(ap, int); ++ chr = va_arg(ap, int); ++ wmin = va_arg(ap, int); ++ cmin = va_arg(ap, int); ++ wsec = va_arg(ap, int); ++ csec = va_arg(ap, int); ++ ulong1 = va_arg(ap, unsigned long); ++ ++ gr_log_middle_varargs(audit, msg, gr_task_fullpath(task), task->comm, task->pid, NIPQUAD(task->signal->curr_ip), tty_name(task->signal->tty, cur_tty), task->uid, task->euid, task->gid, task->egid, wday, whr, wmin, wsec, cday, chr, cmin, csec, (task->flags & PF_SIGNALED) ? "killed by signal" : "exited", ulong1, gr_parent_task_fullpath(task), task->parent->comm, task->parent->pid, NIPQUAD(task->parent->signal->curr_ip), tty_name(task->parent->signal->tty, parent_tty), task->parent->uid, task->parent->euid, task->parent->gid, task->parent->egid); ++ } ++ break; ++ default: ++ gr_log_middle(audit, msg, ap); ++ } ++ va_end(ap); ++ gr_log_end(audit); ++ END_LOCKS(audit); ++} +diff -urNp a/grsecurity/grsec_mem.c b/grsecurity/grsec_mem.c +--- a/grsecurity/grsec_mem.c 1969-12-31 16:00:00.000000000 -0800 ++++ b/grsecurity/grsec_mem.c 2009-05-24 18:10:25.264085908 -0700 +@@ -0,0 +1,71 @@ ++#include <linux/kernel.h> ++#include <linux/sched.h> ++#include <linux/mm.h> ++#include <linux/mman.h> ++#include <linux/grinternal.h> ++ ++void ++gr_handle_ioperm(void) ++{ ++ gr_log_noargs(GR_DONT_AUDIT, GR_IOPERM_MSG); ++ return; ++} ++ ++void ++gr_handle_iopl(void) ++{ ++ gr_log_noargs(GR_DONT_AUDIT, GR_IOPL_MSG); ++ return; ++} ++ ++void ++gr_handle_mem_write(void) ++{ ++ gr_log_noargs(GR_DONT_AUDIT, GR_MEM_WRITE_MSG); ++ return; ++} ++ ++void ++gr_handle_kmem_write(void) ++{ ++ gr_log_noargs(GR_DONT_AUDIT, GR_KMEM_MSG); ++ return; ++} ++ ++void ++gr_handle_open_port(void) ++{ ++ gr_log_noargs(GR_DONT_AUDIT, GR_PORT_OPEN_MSG); ++ return; ++} ++ ++int ++gr_handle_mem_mmap(const unsigned long offset, struct vm_area_struct *vma) ++{ ++ unsigned long start, end; ++ ++ start = offset; ++ end = start + vma->vm_end - vma->vm_start; ++ ++ if (start > end) { ++ gr_log_noargs(GR_DONT_AUDIT, GR_MEM_MMAP_MSG); ++ return -EPERM; ++ } ++ ++ /* allowed ranges : ISA I/O BIOS */ ++ if ((start >= __pa(high_memory)) ++#ifdef CONFIG_X86 ++ || (start >= 0x000a0000 && end <= 0x00100000) ++ || (start >= 0x00000000 && end <= 0x00001000) ++#endif ++ ) ++ return 0; ++ ++ if (vma->vm_flags & VM_WRITE) { ++ gr_log_noargs(GR_DONT_AUDIT, GR_MEM_MMAP_MSG); ++ return -EPERM; ++ } else ++ vma->vm_flags &= ~VM_MAYWRITE; ++ ++ return 0; ++} +diff -urNp a/grsecurity/grsec_mount.c b/grsecurity/grsec_mount.c +--- a/grsecurity/grsec_mount.c 1969-12-31 16:00:00.000000000 -0800 ++++ b/grsecurity/grsec_mount.c 2009-05-24 18:10:25.265085058 -0700 +@@ -0,0 +1,34 @@ ++#include <linux/kernel.h> ++#include <linux/sched.h> ++#include <linux/grsecurity.h> ++#include <linux/grinternal.h> ++ ++void ++gr_log_remount(const char *devname, const int retval) ++{ ++#ifdef CONFIG_GRKERNSEC_AUDIT_MOUNT ++ if (grsec_enable_mount && (retval >= 0)) ++ gr_log_str(GR_DO_AUDIT, GR_REMOUNT_AUDIT_MSG, devname ? devname : "none"); ++#endif ++ return; ++} ++ ++void ++gr_log_unmount(const char *devname, const int retval) ++{ ++#ifdef CONFIG_GRKERNSEC_AUDIT_MOUNT ++ if (grsec_enable_mount && (retval >= 0)) ++ gr_log_str(GR_DO_AUDIT, GR_UNMOUNT_AUDIT_MSG, devname ? devname : "none"); ++#endif ++ return; ++} ++ ++void ++gr_log_mount(const char *from, const char *to, const int retval) ++{ ++#ifdef CONFIG_GRKERNSEC_AUDIT_MOUNT ++ if (grsec_enable_mount && (retval >= 0)) ++ gr_log_str_str(GR_DO_AUDIT, GR_MOUNT_AUDIT_MSG, from, to); ++#endif ++ return; ++} +diff -urNp a/grsecurity/grsec_sig.c b/grsecurity/grsec_sig.c +--- a/grsecurity/grsec_sig.c 1969-12-31 16:00:00.000000000 -0800 ++++ b/grsecurity/grsec_sig.c 2009-05-24 18:10:25.265085058 -0700 +@@ -0,0 +1,58 @@ ++#include <linux/kernel.h> ++#include <linux/sched.h> ++#include <linux/delay.h> ++#include <linux/grsecurity.h> ++#include <linux/grinternal.h> ++ ++void ++gr_log_signal(const int sig, const struct task_struct *t) ++{ ++#ifdef CONFIG_GRKERNSEC_SIGNAL ++ if (grsec_enable_signal && ((sig == SIGSEGV) || (sig == SIGILL) || ++ (sig == SIGABRT) || (sig == SIGBUS))) { ++ if (t->pid == current->pid) { ++ gr_log_int(GR_DONT_AUDIT_GOOD, GR_UNISIGLOG_MSG, sig); ++ } else { ++ gr_log_sig(GR_DONT_AUDIT_GOOD, GR_DUALSIGLOG_MSG, t, sig); ++ } ++ } ++#endif ++ return; ++} ++ ++int ++gr_handle_signal(const struct task_struct *p, const int sig) ++{ ++#ifdef CONFIG_GRKERNSEC ++ if (current->pid > 1 && gr_check_protected_task(p)) { ++ gr_log_sig(GR_DONT_AUDIT, GR_SIG_ACL_MSG, p, sig); ++ return -EPERM; ++ } else if (gr_pid_is_chrooted((struct task_struct *)p)) { ++ return -EPERM; ++ } ++#endif ++ return 0; ++} ++ ++void gr_handle_brute_attach(struct task_struct *p) ++{ ++#ifdef CONFIG_GRKERNSEC_BRUTE ++ read_lock(&tasklist_lock); ++ read_lock(&grsec_exec_file_lock); ++ if (p->parent && p->parent->exec_file == p->exec_file) ++ p->parent->brute = 1; ++ read_unlock(&grsec_exec_file_lock); ++ read_unlock(&tasklist_lock); ++#endif ++ return; ++} ++ ++void gr_handle_brute_check(void) ++{ ++#ifdef CONFIG_GRKERNSEC_BRUTE ++ if (current->brute) ++ msleep(30 * 1000); ++#endif ++ return; ++} ++ +diff -urNp a/grsecurity/grsec_sock.c b/grsecurity/grsec_sock.c +--- a/grsecurity/grsec_sock.c 1969-12-31 16:00:00.000000000 -0800 ++++ b/grsecurity/grsec_sock.c 2009-05-24 18:10:25.266085466 -0700 +@@ -0,0 +1,274 @@ ++#include <linux/kernel.h> ++#include <linux/module.h> ++#include <linux/sched.h> ++#include <linux/file.h> ++#include <linux/net.h> ++#include <linux/in.h> ++#include <linux/ip.h> ++#include <net/sock.h> ++#include <net/inet_sock.h> ++#include <linux/grsecurity.h> ++#include <linux/grinternal.h> ++#include <linux/gracl.h> ++ ++#if defined(CONFIG_IP_NF_MATCH_STEALTH_MODULE) ++extern struct sock *udp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif); ++EXPORT_SYMBOL(udp_v4_lookup); ++#endif ++ ++kernel_cap_t gr_cap_rtnetlink(struct sock *sock); ++EXPORT_SYMBOL(gr_cap_rtnetlink); ++ ++extern int gr_search_udp_recvmsg(const struct sock *sk, const struct sk_buff *skb); ++extern int gr_search_udp_sendmsg(const struct sock *sk, const struct sockaddr_in *addr); ++ ++EXPORT_SYMBOL(gr_search_udp_recvmsg); ++EXPORT_SYMBOL(gr_search_udp_sendmsg); ++ ++#ifdef CONFIG_UNIX_MODULE ++EXPORT_SYMBOL(gr_acl_handle_unix); ++EXPORT_SYMBOL(gr_acl_handle_mknod); ++EXPORT_SYMBOL(gr_handle_chroot_unix); ++EXPORT_SYMBOL(gr_handle_create); ++#endif ++ ++#ifdef CONFIG_GRKERNSEC ++#define gr_conn_table_size 32749 ++struct conn_table_entry { ++ struct conn_table_entry *next; ++ struct signal_struct *sig; ++}; ++ ++struct conn_table_entry *gr_conn_table[gr_conn_table_size]; ++DEFINE_SPINLOCK(gr_conn_table_lock); ++ ++extern const char * gr_socktype_to_name(unsigned char type); ++extern const char * gr_proto_to_name(unsigned char proto); ++ ++static __inline__ int ++conn_hash(__u32 saddr, __u32 daddr, __u16 sport, __u16 dport, unsigned int size) ++{ ++ return ((daddr + saddr + (sport << 8) + (dport << 16)) % size); ++} ++ ++static __inline__ int ++conn_match(const struct signal_struct *sig, __u32 saddr, __u32 daddr, ++ __u16 sport, __u16 dport) ++{ ++ if (unlikely(sig->gr_saddr == saddr && sig->gr_daddr == daddr && ++ sig->gr_sport == sport && sig->gr_dport == dport)) ++ return 1; ++ else ++ return 0; ++} ++ ++static void gr_add_to_task_ip_table_nolock(struct signal_struct *sig, struct conn_table_entry *newent) ++{ ++ struct conn_table_entry **match; ++ unsigned int index; ++ ++ index = conn_hash(sig->gr_saddr, sig->gr_daddr, ++ sig->gr_sport, sig->gr_dport, ++ gr_conn_table_size); ++ ++ newent->sig = sig; ++ ++ match = &gr_conn_table[index]; ++ newent->next = *match; ++ *match = newent; ++ ++ return; ++} ++ ++static void gr_del_task_from_ip_table_nolock(struct signal_struct *sig) ++{ ++ struct conn_table_entry *match, *last = NULL; ++ unsigned int index; ++ ++ index = conn_hash(sig->gr_saddr, sig->gr_daddr, ++ sig->gr_sport, sig->gr_dport, ++ gr_conn_table_size); ++ ++ match = gr_conn_table[index]; ++ while (match && !conn_match(match->sig, ++ sig->gr_saddr, sig->gr_daddr, sig->gr_sport, ++ sig->gr_dport)) { ++ last = match; ++ match = match->next; ++ } ++ ++ if (match) { ++ if (last) ++ last->next = match->next; ++ else ++ gr_conn_table[index] = NULL; ++ kfree(match); ++ } ++ ++ return; ++} ++ ++static struct signal_struct * gr_lookup_task_ip_table(__u32 saddr, __u32 daddr, ++ __u16 sport, __u16 dport) ++{ ++ struct conn_table_entry *match; ++ unsigned int index; ++ ++ index = conn_hash(saddr, daddr, sport, dport, gr_conn_table_size); ++ ++ match = gr_conn_table[index]; ++ while (match && !conn_match(match->sig, saddr, daddr, sport, dport)) ++ match = match->next; ++ ++ if (match) ++ return match->sig; ++ else ++ return NULL; ++} ++ ++#endif ++ ++void gr_update_task_in_ip_table(struct task_struct *task, const struct inet_sock *inet) ++{ ++#ifdef CONFIG_GRKERNSEC ++ struct signal_struct *sig = task->signal; ++ struct conn_table_entry *newent; ++ ++ newent = kmalloc(sizeof(struct conn_table_entry), GFP_ATOMIC); ++ if (newent == NULL) ++ return; ++ /* no bh lock needed since we are called with bh disabled */ ++ spin_lock(&gr_conn_table_lock); ++ gr_del_task_from_ip_table_nolock(sig); ++ sig->gr_saddr = inet->rcv_saddr; ++ sig->gr_daddr = inet->daddr; ++ sig->gr_sport = inet->sport; ++ sig->gr_dport = inet->dport; ++ gr_add_to_task_ip_table_nolock(sig, newent); ++ spin_unlock(&gr_conn_table_lock); ++#endif ++ return; ++} ++ ++void gr_del_task_from_ip_table(struct task_struct *task) ++{ ++#ifdef CONFIG_GRKERNSEC ++ spin_lock_bh(&gr_conn_table_lock); ++ gr_del_task_from_ip_table_nolock(task->signal); ++ spin_unlock_bh(&gr_conn_table_lock); ++#endif ++ return; ++} ++ ++void ++gr_attach_curr_ip(const struct sock *sk) ++{ ++#ifdef CONFIG_GRKERNSEC ++ struct signal_struct *p, *set; ++ const struct inet_sock *inet = inet_sk(sk); ++ ++ if (unlikely(sk->sk_protocol != IPPROTO_TCP)) ++ return; ++ ++ set = current->signal; ++ ++ spin_lock_bh(&gr_conn_table_lock); ++ p = gr_lookup_task_ip_table(inet->daddr, inet->rcv_saddr, ++ inet->dport, inet->sport); ++ if (unlikely(p != NULL)) { ++ set->curr_ip = p->curr_ip; ++ set->used_accept = 1; ++ gr_del_task_from_ip_table_nolock(p); ++ spin_unlock_bh(&gr_conn_table_lock); ++ return; ++ } ++ spin_unlock_bh(&gr_conn_table_lock); ++ ++ set->curr_ip = inet->daddr; ++ set->used_accept = 1; ++#endif ++ return; ++} ++ ++int ++gr_handle_sock_all(const int family, const int type, const int protocol) ++{ ++#ifdef CONFIG_GRKERNSEC_SOCKET_ALL ++ if (grsec_enable_socket_all && in_group_p(grsec_socket_all_gid) && ++ (family != AF_UNIX) && (family != AF_LOCAL)) { ++ gr_log_int_str2(GR_DONT_AUDIT, GR_SOCK2_MSG, family, gr_socktype_to_name(type), gr_proto_to_name(protocol)); ++ return -EACCES; ++ } ++#endif ++ return 0; ++} ++ ++int ++gr_handle_sock_server(const struct sockaddr *sck) ++{ ++#ifdef CONFIG_GRKERNSEC_SOCKET_SERVER ++ if (grsec_enable_socket_server && ++ in_group_p(grsec_socket_server_gid) && ++ sck && (sck->sa_family != AF_UNIX) && ++ (sck->sa_family != AF_LOCAL)) { ++ gr_log_noargs(GR_DONT_AUDIT, GR_BIND_MSG); ++ return -EACCES; ++ } ++#endif ++ return 0; ++} ++ ++int ++gr_handle_sock_server_other(const struct sock *sck) ++{ ++#ifdef CONFIG_GRKERNSEC_SOCKET_SERVER ++ if (grsec_enable_socket_server && ++ in_group_p(grsec_socket_server_gid) && ++ sck && (sck->sk_family != AF_UNIX) && ++ (sck->sk_family != AF_LOCAL)) { ++ gr_log_noargs(GR_DONT_AUDIT, GR_BIND_MSG); ++ return -EACCES; ++ } ++#endif ++ return 0; ++} ++ ++int ++gr_handle_sock_client(const struct sockaddr *sck) ++{ ++#ifdef CONFIG_GRKERNSEC_SOCKET_CLIENT ++ if (grsec_enable_socket_client && in_group_p(grsec_socket_client_gid) && ++ sck && (sck->sa_family != AF_UNIX) && ++ (sck->sa_family != AF_LOCAL)) { ++ gr_log_noargs(GR_DONT_AUDIT, GR_CONNECT_MSG); ++ return -EACCES; ++ } ++#endif ++ return 0; ++} ++ ++kernel_cap_t ++gr_cap_rtnetlink(struct sock *sock) ++{ ++#ifdef CONFIG_GRKERNSEC ++ if (!gr_acl_is_enabled()) ++ return current->cap_effective; ++ else if (sock->sk_protocol == NETLINK_ISCSI && ++ cap_raised(current->cap_effective, CAP_SYS_ADMIN) && ++ gr_task_is_capable(current, CAP_SYS_ADMIN)) ++ return current->cap_effective; ++ else if (sock->sk_protocol == NETLINK_AUDIT && ++ cap_raised(current->cap_effective, CAP_AUDIT_WRITE) && ++ gr_task_is_capable(current, CAP_AUDIT_WRITE) && ++ cap_raised(current->cap_effective, CAP_AUDIT_CONTROL) && ++ gr_task_is_capable(current, CAP_AUDIT_CONTROL)) ++ return current->cap_effective; ++ else if (cap_raised(current->cap_effective, CAP_NET_ADMIN) && ++ gr_task_is_capable(current, CAP_NET_ADMIN)) ++ return current->cap_effective; ++ else ++ return __cap_empty_set; ++#else ++ return current->cap_effective; ++#endif ++} +diff -urNp a/grsecurity/grsec_sysctl.c b/grsecurity/grsec_sysctl.c +--- a/grsecurity/grsec_sysctl.c 1969-12-31 16:00:00.000000000 -0800 ++++ b/grsecurity/grsec_sysctl.c 2009-05-24 18:10:25.266085466 -0700 +@@ -0,0 +1,435 @@ ++#include <linux/kernel.h> ++#include <linux/sched.h> ++#include <linux/sysctl.h> ++#include <linux/grsecurity.h> ++#include <linux/grinternal.h> ++ ++#ifdef CONFIG_GRKERNSEC_MODSTOP ++int grsec_modstop; ++#endif ++ ++int ++gr_handle_sysctl_mod(const char *dirname, const char *name, const int op) ++{ ++#ifdef CONFIG_GRKERNSEC_SYSCTL ++ if (!strcmp(dirname, "grsecurity") && grsec_lock && (op & MAY_WRITE)) { ++ gr_log_str(GR_DONT_AUDIT, GR_SYSCTL_MSG, name); ++ return -EACCES; ++ } ++#endif ++#ifdef CONFIG_GRKERNSEC_MODSTOP ++ if (!strcmp(dirname, "grsecurity") && !strcmp(name, "disable_modules") && ++ grsec_modstop && (op & MAY_WRITE)) { ++ gr_log_str(GR_DONT_AUDIT, GR_SYSCTL_MSG, name); ++ return -EACCES; ++ } ++#endif ++ return 0; ++} ++ ++#if defined(CONFIG_GRKERNSEC_SYSCTL) || defined(CONFIG_GRKERNSEC_MODSTOP) ++ctl_table grsecurity_table[] = { ++#ifdef CONFIG_GRKERNSEC_SYSCTL ++#ifdef CONFIG_GRKERNSEC_LINK ++ { ++ .ctl_name = CTL_UNNUMBERED, ++ .procname = "linking_restrictions", ++ .data = &grsec_enable_link, ++ .maxlen = sizeof(int), ++ .mode = 0600, ++ .proc_handler = &proc_dointvec, ++ }, ++#endif ++#ifdef CONFIG_GRKERNSEC_FIFO ++ { ++ .ctl_name = CTL_UNNUMBERED, ++ .procname = "fifo_restrictions", ++ .data = &grsec_enable_fifo, ++ .maxlen = sizeof(int), ++ .mode = 0600, ++ .proc_handler = &proc_dointvec, ++ }, ++#endif ++#ifdef CONFIG_GRKERNSEC_EXECVE ++ { ++ .ctl_name = CTL_UNNUMBERED, ++ .procname = "execve_limiting", ++ .data = &grsec_enable_execve, ++ .maxlen = sizeof(int), ++ .mode = 0600, ++ .proc_handler = &proc_dointvec, ++ }, ++#endif ++#ifdef CONFIG_GRKERNSEC_EXECLOG ++ { ++ .ctl_name = CTL_UNNUMBERED, ++ .procname = "exec_logging", ++ .data = &grsec_enable_execlog, ++ .maxlen = sizeof(int), ++ .mode = 0600, ++ .proc_handler = &proc_dointvec, ++ }, ++#endif ++#ifdef CONFIG_GRKERNSEC_SIGNAL ++ { ++ .ctl_name = CTL_UNNUMBERED, ++ .procname = "signal_logging", ++ .data = &grsec_enable_signal, ++ .maxlen = sizeof(int), ++ .mode = 0600, ++ .proc_handler = &proc_dointvec, ++ }, ++#endif ++#ifdef CONFIG_GRKERNSEC_FORKFAIL ++ { ++ .ctl_name = CTL_UNNUMBERED, ++ .procname = "forkfail_logging", ++ .data = &grsec_enable_forkfail, ++ .maxlen = sizeof(int), ++ .mode = 0600, ++ .proc_handler = &proc_dointvec, ++ }, ++#endif ++#ifdef CONFIG_GRKERNSEC_TIME ++ { ++ .ctl_name = CTL_UNNUMBERED, ++ .procname = "timechange_logging", ++ .data = &grsec_enable_time, ++ .maxlen = sizeof(int), ++ .mode = 0600, ++ .proc_handler = &proc_dointvec, ++ }, ++#endif ++#ifdef CONFIG_GRKERNSEC_CHROOT_SHMAT ++ { ++ .ctl_name = CTL_UNNUMBERED, ++ .procname = "chroot_deny_shmat", ++ .data = &grsec_enable_chroot_shmat, ++ .maxlen = sizeof(int), ++ .mode = 0600, ++ .proc_handler = &proc_dointvec, ++ }, ++#endif ++#ifdef CONFIG_GRKERNSEC_CHROOT_UNIX ++ { ++ .ctl_name = CTL_UNNUMBERED, ++ .procname = "chroot_deny_unix", ++ .data = &grsec_enable_chroot_unix, ++ .maxlen = sizeof(int), ++ .mode = 0600, ++ .proc_handler = &proc_dointvec, ++ }, ++#endif ++#ifdef CONFIG_GRKERNSEC_CHROOT_MOUNT ++ { ++ .ctl_name = CTL_UNNUMBERED, ++ .procname = "chroot_deny_mount", ++ .data = &grsec_enable_chroot_mount, ++ .maxlen = sizeof(int), ++ .mode = 0600, ++ .proc_handler = &proc_dointvec, ++ }, ++#endif ++#ifdef CONFIG_GRKERNSEC_CHROOT_FCHDIR ++ { ++ .ctl_name = CTL_UNNUMBERED, ++ .procname = "chroot_deny_fchdir", ++ .data = &grsec_enable_chroot_fchdir, ++ .maxlen = sizeof(int), ++ .mode = 0600, ++ .proc_handler = &proc_dointvec, ++ }, ++#endif ++#ifdef CONFIG_GRKERNSEC_CHROOT_DOUBLE ++ { ++ .ctl_name = CTL_UNNUMBERED, ++ .procname = "chroot_deny_chroot", ++ .data = &grsec_enable_chroot_double, ++ .maxlen = sizeof(int), ++ .mode = 0600, ++ .proc_handler = &proc_dointvec, ++ }, ++#endif ++#ifdef CONFIG_GRKERNSEC_CHROOT_PIVOT ++ { ++ .ctl_name = CTL_UNNUMBERED, ++ .procname = "chroot_deny_pivot", ++ .data = &grsec_enable_chroot_pivot, ++ .maxlen = sizeof(int), ++ .mode = 0600, ++ .proc_handler = &proc_dointvec, ++ }, ++#endif ++#ifdef CONFIG_GRKERNSEC_CHROOT_CHDIR ++ { ++ .ctl_name = CTL_UNNUMBERED, ++ .procname = "chroot_enforce_chdir", ++ .data = &grsec_enable_chroot_chdir, ++ .maxlen = sizeof(int), ++ .mode = 0600, ++ .proc_handler = &proc_dointvec, ++ }, ++#endif ++#ifdef CONFIG_GRKERNSEC_CHROOT_CHMOD ++ { ++ .ctl_name = CTL_UNNUMBERED, ++ .procname = "chroot_deny_chmod", ++ .data = &grsec_enable_chroot_chmod, ++ .maxlen = sizeof(int), ++ .mode = 0600, ++ .proc_handler = &proc_dointvec, ++ }, ++#endif ++#ifdef CONFIG_GRKERNSEC_CHROOT_MKNOD ++ { ++ .ctl_name = CTL_UNNUMBERED, ++ .procname = "chroot_deny_mknod", ++ .data = &grsec_enable_chroot_mknod, ++ .maxlen = sizeof(int), ++ .mode = 0600, ++ .proc_handler = &proc_dointvec, ++ }, ++#endif ++#ifdef CONFIG_GRKERNSEC_CHROOT_NICE ++ { ++ .ctl_name = CTL_UNNUMBERED, ++ .procname = "chroot_restrict_nice", ++ .data = &grsec_enable_chroot_nice, ++ .maxlen = sizeof(int), ++ .mode = 0600, ++ .proc_handler = &proc_dointvec, ++ }, ++#endif ++#ifdef CONFIG_GRKERNSEC_CHROOT_EXECLOG ++ { ++ .ctl_name = CTL_UNNUMBERED, ++ .procname = "chroot_execlog", ++ .data = &grsec_enable_chroot_execlog, ++ .maxlen = sizeof(int), ++ .mode = 0600, ++ .proc_handler = &proc_dointvec, ++ }, ++#endif ++#ifdef CONFIG_GRKERNSEC_CHROOT_CAPS ++ { ++ .ctl_name = CTL_UNNUMBERED, ++ .procname = "chroot_caps", ++ .data = &grsec_enable_chroot_caps, ++ .maxlen = sizeof(int), ++ .mode = 0600, ++ .proc_handler = &proc_dointvec, ++ }, ++#endif ++#ifdef CONFIG_GRKERNSEC_CHROOT_SYSCTL ++ { ++ .ctl_name = CTL_UNNUMBERED, ++ .procname = "chroot_deny_sysctl", ++ .data = &grsec_enable_chroot_sysctl, ++ .maxlen = sizeof(int), ++ .mode = 0600, ++ .proc_handler = &proc_dointvec, ++ }, ++#endif ++#ifdef CONFIG_GRKERNSEC_TPE ++ { ++ .ctl_name = CTL_UNNUMBERED, ++ .procname = "tpe", ++ .data = &grsec_enable_tpe, ++ .maxlen = sizeof(int), ++ .mode = 0600, ++ .proc_handler = &proc_dointvec, ++ }, ++ { ++ .ctl_name = CTL_UNNUMBERED, ++ .procname = "tpe_gid", ++ .data = &grsec_tpe_gid, ++ .maxlen = sizeof(int), ++ .mode = 0600, ++ .proc_handler = &proc_dointvec, ++ }, ++#endif ++#ifdef CONFIG_GRKERNSEC_TPE_ALL ++ { ++ .ctl_name = CTL_UNNUMBERED, ++ .procname = "tpe_restrict_all", ++ .data = &grsec_enable_tpe_all, ++ .maxlen = sizeof(int), ++ .mode = 0600, ++ .proc_handler = &proc_dointvec, ++ }, ++#endif ++#ifdef CONFIG_GRKERNSEC_SOCKET_ALL ++ { ++ .ctl_name = CTL_UNNUMBERED, ++ .procname = "socket_all", ++ .data = &grsec_enable_socket_all, ++ .maxlen = sizeof(int), ++ .mode = 0600, ++ .proc_handler = &proc_dointvec, ++ }, ++ { ++ .ctl_name = CTL_UNNUMBERED, ++ .procname = "socket_all_gid", ++ .data = &grsec_socket_all_gid, ++ .maxlen = sizeof(int), ++ .mode = 0600, ++ .proc_handler = &proc_dointvec, ++ }, ++#endif ++#ifdef CONFIG_GRKERNSEC_SOCKET_CLIENT ++ { ++ .ctl_name = CTL_UNNUMBERED, ++ .procname = "socket_client", ++ .data = &grsec_enable_socket_client, ++ .maxlen = sizeof(int), ++ .mode = 0600, ++ .proc_handler = &proc_dointvec, ++ }, ++ { ++ .ctl_name = CTL_UNNUMBERED, ++ .procname = "socket_client_gid", ++ .data = &grsec_socket_client_gid, ++ .maxlen = sizeof(int), ++ .mode = 0600, ++ .proc_handler = &proc_dointvec, ++ }, ++#endif ++#ifdef CONFIG_GRKERNSEC_SOCKET_SERVER ++ { ++ .ctl_name = CTL_UNNUMBERED, ++ .procname = "socket_server", ++ .data = &grsec_enable_socket_server, ++ .maxlen = sizeof(int), ++ .mode = 0600, ++ .proc_handler = &proc_dointvec, ++ }, ++ { ++ .ctl_name = CTL_UNNUMBERED, ++ .procname = "socket_server_gid", ++ .data = &grsec_socket_server_gid, ++ .maxlen = sizeof(int), ++ .mode = 0600, ++ .proc_handler = &proc_dointvec, ++ }, ++#endif ++#ifdef CONFIG_GRKERNSEC_AUDIT_GROUP ++ { ++ .ctl_name = CTL_UNNUMBERED, ++ .procname = "audit_group", ++ .data = &grsec_enable_group, ++ .maxlen = sizeof(int), ++ .mode = 0600, ++ .proc_handler = &proc_dointvec, ++ }, ++ { ++ .ctl_name = CTL_UNNUMBERED, ++ .procname = "audit_gid", ++ .data = &grsec_audit_gid, ++ .maxlen = sizeof(int), ++ .mode = 0600, ++ .proc_handler = &proc_dointvec, ++ }, ++#endif ++#ifdef CONFIG_GRKERNSEC_AUDIT_CHDIR ++ { ++ .ctl_name = CTL_UNNUMBERED, ++ .procname = "audit_chdir", ++ .data = &grsec_enable_chdir, ++ .maxlen = sizeof(int), ++ .mode = 0600, ++ .proc_handler = &proc_dointvec, ++ }, ++#endif ++#ifdef CONFIG_GRKERNSEC_AUDIT_MOUNT ++ { ++ .ctl_name = CTL_UNNUMBERED, ++ .procname = "audit_mount", ++ .data = &grsec_enable_mount, ++ .maxlen = sizeof(int), ++ .mode = 0600, ++ .proc_handler = &proc_dointvec, ++ }, ++#endif ++#ifdef CONFIG_GRKERNSEC_AUDIT_IPC ++ { ++ .ctl_name = CTL_UNNUMBERED, ++ .procname = "audit_ipc", ++ .data = &grsec_enable_audit_ipc, ++ .maxlen = sizeof(int), ++ .mode = 0600, ++ .proc_handler = &proc_dointvec, ++ }, ++#endif ++#ifdef CONFIG_GRKERNSEC_AUDIT_TEXTREL ++ { ++ .ctl_name = CTL_UNNUMBERED, ++ .procname = "audit_textrel", ++ .data = &grsec_enable_audit_textrel, ++ .maxlen = sizeof(int), ++ .mode = 0600, ++ .proc_handler = &proc_dointvec, ++ }, ++#endif ++#ifdef CONFIG_GRKERNSEC_DMESG ++ { ++ .ctl_name = CTL_UNNUMBERED, ++ .procname = "dmesg", ++ .data = &grsec_enable_dmesg, ++ .maxlen = sizeof(int), ++ .mode = 0600, ++ .proc_handler = &proc_dointvec, ++ }, ++#endif ++#ifdef CONFIG_GRKERNSEC_CHROOT_FINDTASK ++ { ++ .ctl_name = CTL_UNNUMBERED, ++ .procname = "chroot_findtask", ++ .data = &grsec_enable_chroot_findtask, ++ .maxlen = sizeof(int), ++ .mode = 0600, ++ .proc_handler = &proc_dointvec, ++ }, ++#endif ++#ifdef CONFIG_GRKERNSEC_RESLOG ++ { ++ .ctl_name = CTL_UNNUMBERED, ++ .procname = "resource_logging", ++ .data = &grsec_resource_logging, ++ .maxlen = sizeof(int), ++ .mode = 0600, ++ .proc_handler = &proc_dointvec, ++ }, ++#endif ++ { ++ .ctl_name = CTL_UNNUMBERED, ++ .procname = "grsec_lock", ++ .data = &grsec_lock, ++ .maxlen = sizeof(int), ++ .mode = 0600, ++ .proc_handler = &proc_dointvec, ++ }, ++#endif ++#ifdef CONFIG_GRKERNSEC_MODSTOP ++ { ++ .ctl_name = CTL_UNNUMBERED, ++ .procname = "disable_modules", ++ .data = &grsec_modstop, ++ .maxlen = sizeof(int), ++ .mode = 0600, ++ .proc_handler = &proc_dointvec, ++ }, ++#endif ++ { .ctl_name = 0 } ++}; ++#endif ++ ++int gr_check_modstop(void) ++{ ++#ifdef CONFIG_GRKERNSEC_MODSTOP ++ if (grsec_modstop == 1) { ++ gr_log_noargs(GR_DONT_AUDIT, GR_STOPMOD_MSG); ++ return 1; ++ } ++#endif ++ return 0; ++} +diff -urNp a/grsecurity/grsec_textrel.c b/grsecurity/grsec_textrel.c +--- a/grsecurity/grsec_textrel.c 1969-12-31 16:00:00.000000000 -0800 ++++ b/grsecurity/grsec_textrel.c 2009-05-24 18:10:25.267085804 -0700 +@@ -0,0 +1,16 @@ ++#include <linux/kernel.h> ++#include <linux/sched.h> ++#include <linux/mm.h> ++#include <linux/file.h> ++#include <linux/grinternal.h> ++#include <linux/grsecurity.h> ++ ++void ++gr_log_textrel(struct vm_area_struct * vma) ++{ ++#ifdef CONFIG_GRKERNSEC_AUDIT_TEXTREL ++ if (grsec_enable_audit_textrel) ++ gr_log_textrel_ulong_ulong(GR_DO_AUDIT, GR_TEXTREL_AUDIT_MSG, vma->vm_file, vma->vm_start, vma->vm_pgoff); ++#endif ++ return; ++} +diff -urNp a/grsecurity/grsec_time.c b/grsecurity/grsec_time.c +--- a/grsecurity/grsec_time.c 1969-12-31 16:00:00.000000000 -0800 ++++ b/grsecurity/grsec_time.c 2009-05-24 18:10:25.267085804 -0700 +@@ -0,0 +1,13 @@ ++#include <linux/kernel.h> ++#include <linux/sched.h> ++#include <linux/grinternal.h> ++ ++void ++gr_log_timechange(void) ++{ ++#ifdef CONFIG_GRKERNSEC_TIME ++ if (grsec_enable_time) ++ gr_log_noargs(GR_DONT_AUDIT_GOOD, GR_TIME_MSG); ++#endif ++ return; ++} +diff -urNp a/grsecurity/grsec_tpe.c b/grsecurity/grsec_tpe.c +--- a/grsecurity/grsec_tpe.c 1969-12-31 16:00:00.000000000 -0800 ++++ b/grsecurity/grsec_tpe.c 2009-05-24 18:10:25.267085804 -0700 +@@ -0,0 +1,37 @@ ++#include <linux/kernel.h> ++#include <linux/sched.h> ++#include <linux/file.h> ++#include <linux/fs.h> ++#include <linux/grinternal.h> ++ ++extern int gr_acl_tpe_check(void); ++ ++int ++gr_tpe_allow(const struct file *file) ++{ ++#ifdef CONFIG_GRKERNSEC ++ struct inode *inode = file->f_path.dentry->d_parent->d_inode; ++ ++ if (current->uid && ((grsec_enable_tpe && ++#ifdef CONFIG_GRKERNSEC_TPE_INVERT ++ !in_group_p(grsec_tpe_gid) ++#else ++ in_group_p(grsec_tpe_gid) ++#endif ++ ) || gr_acl_tpe_check()) && ++ (inode->i_uid || (!inode->i_uid && ((inode->i_mode & S_IWGRP) || ++ (inode->i_mode & S_IWOTH))))) { ++ gr_log_fs_generic(GR_DONT_AUDIT, GR_EXEC_TPE_MSG, file->f_path.dentry, file->f_path.mnt); ++ return 0; ++ } ++#ifdef CONFIG_GRKERNSEC_TPE_ALL ++ if (current->uid && grsec_enable_tpe && grsec_enable_tpe_all && ++ ((inode->i_uid && (inode->i_uid != current->uid)) || ++ (inode->i_mode & S_IWGRP) || (inode->i_mode & S_IWOTH))) { ++ gr_log_fs_generic(GR_DONT_AUDIT, GR_EXEC_TPE_MSG, file->f_path.dentry, file->f_path.mnt); ++ return 0; ++ } ++#endif ++#endif ++ return 1; ++} +diff -urNp a/grsecurity/grsum.c b/grsecurity/grsum.c +--- a/grsecurity/grsum.c 1969-12-31 16:00:00.000000000 -0800 ++++ b/grsecurity/grsum.c 2009-05-24 18:10:25.268087887 -0700 +@@ -0,0 +1,59 @@ ++#include <linux/err.h> ++#include <linux/kernel.h> ++#include <linux/sched.h> ++#include <linux/mm.h> ++#include <linux/scatterlist.h> ++#include <linux/crypto.h> ++#include <linux/gracl.h> ++ ++ ++#if !defined(CONFIG_CRYPTO) || defined(CONFIG_CRYPTO_MODULE) || !defined(CONFIG_CRYPTO_SHA256) || defined(CONFIG_CRYPTO_SHA256_MODULE) ++#error "crypto and sha256 must be built into the kernel" ++#endif ++ ++int ++chkpw(struct gr_arg *entry, unsigned char *salt, unsigned char *sum) ++{ ++ char *p; ++ struct crypto_hash *tfm; ++ struct hash_desc desc; ++ struct scatterlist sg; ++ unsigned char temp_sum[GR_SHA_LEN]; ++ volatile int retval = 0; ++ volatile int dummy = 0; ++ unsigned int i; ++ ++ tfm = crypto_alloc_hash("sha256", 0, CRYPTO_ALG_ASYNC); ++ if (IS_ERR(tfm)) { ++ /* should never happen, since sha256 should be built in */ ++ return 1; ++ } ++ ++ desc.tfm = tfm; ++ desc.flags = 0; ++ ++ crypto_hash_init(&desc); ++ ++ p = salt; ++ sg_set_buf(&sg, p, GR_SALT_LEN); ++ crypto_hash_update(&desc, &sg, sg.length); ++ ++ p = entry->pw; ++ sg_set_buf(&sg, p, strlen(p)); ++ ++ crypto_hash_update(&desc, &sg, sg.length); ++ ++ crypto_hash_final(&desc, temp_sum); ++ ++ memset(entry->pw, 0, GR_PW_LEN); ++ ++ for (i = 0; i < GR_SHA_LEN; i++) ++ if (sum[i] != temp_sum[i]) ++ retval = 1; ++ else ++ dummy = 1; // waste a cycle ++ ++ crypto_free_hash(tfm); ++ ++ return retval; ++} +diff -urNp a/include/asm-frv/kmap_types.h b/include/asm-frv/kmap_types.h +--- a/include/asm-frv/kmap_types.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/include/asm-frv/kmap_types.h 2009-05-24 18:10:25.268087887 -0700 +@@ -23,6 +23,7 @@ enum km_type { + KM_IRQ1, + KM_SOFTIRQ0, + KM_SOFTIRQ1, ++ KM_CLEARPAGE, + KM_TYPE_NR + }; + +diff -urNp a/include/asm-generic/futex.h b/include/asm-generic/futex.h +--- a/include/asm-generic/futex.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/include/asm-generic/futex.h 2009-05-24 18:10:25.268087887 -0700 +@@ -6,7 +6,7 @@ + #include <asm/errno.h> + + static inline int +-futex_atomic_op_inuser (int encoded_op, int __user *uaddr) ++futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) + { + int op = (encoded_op >> 28) & 7; + int cmp = (encoded_op >> 24) & 15; +@@ -48,7 +48,7 @@ futex_atomic_op_inuser (int encoded_op, + } + + static inline int +-futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) ++futex_atomic_cmpxchg_inatomic(u32 __user *uaddr, int oldval, int newval) + { + return -ENOSYS; + } +diff -urNp a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h +--- a/include/asm-generic/vmlinux.lds.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/include/asm-generic/vmlinux.lds.h 2009-05-24 18:10:25.269084733 -0700 +@@ -69,6 +69,7 @@ + .rodata : AT(ADDR(.rodata) - LOAD_OFFSET) { \ + VMLINUX_SYMBOL(__start_rodata) = .; \ + *(.rodata) *(.rodata.*) \ ++ *(.data.read_only) \ + *(__vermagic) /* Kernel version magic */ \ + *(__markers_strings) /* Markers: strings */ \ + *(__tracepoints_strings)/* Tracepoints: strings */ \ +diff -urNp a/include/asm-m32r/kmap_types.h b/include/asm-m32r/kmap_types.h +--- a/include/asm-m32r/kmap_types.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/include/asm-m32r/kmap_types.h 2009-05-24 18:10:25.269084733 -0700 +@@ -21,7 +21,8 @@ D(9) KM_IRQ0, + D(10) KM_IRQ1, + D(11) KM_SOFTIRQ0, + D(12) KM_SOFTIRQ1, +-D(13) KM_TYPE_NR ++D(13) KM_CLEARPAGE, ++D(14) KM_TYPE_NR + }; + + #undef D +diff -urNp a/include/asm-m68k/kmap_types.h b/include/asm-m68k/kmap_types.h +--- a/include/asm-m68k/kmap_types.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/include/asm-m68k/kmap_types.h 2009-05-24 18:10:25.270085629 -0700 +@@ -15,6 +15,7 @@ enum km_type { + KM_IRQ1, + KM_SOFTIRQ0, + KM_SOFTIRQ1, ++ KM_CLEARPAGE, + KM_TYPE_NR + }; + +diff -urNp a/include/asm-mn10300/kmap_types.h b/include/asm-mn10300/kmap_types.h +--- a/include/asm-mn10300/kmap_types.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/include/asm-mn10300/kmap_types.h 2009-05-24 18:10:25.270085629 -0700 +@@ -25,6 +25,7 @@ enum km_type { + KM_IRQ1, + KM_SOFTIRQ0, + KM_SOFTIRQ1, ++ KM_CLEARPAGE, + KM_TYPE_NR + }; + +diff -urNp a/include/asm-xtensa/kmap_types.h b/include/asm-xtensa/kmap_types.h +--- a/include/asm-xtensa/kmap_types.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/include/asm-xtensa/kmap_types.h 2009-05-24 18:10:25.270085629 -0700 +@@ -25,6 +25,7 @@ enum km_type { + KM_IRQ1, + KM_SOFTIRQ0, + KM_SOFTIRQ1, ++ KM_CLEARPAGE, + KM_TYPE_NR + }; + +diff -urNp a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h +--- a/include/drm/drm_pciids.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/include/drm/drm_pciids.h 2009-05-24 18:10:25.271024930 -0700 +@@ -243,7 +243,7 @@ + {0x1002, 0x796d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS740|RADEON_IS_IGP|RADEON_NEW_MEMMAP|RADEON_IS_IGPGART}, \ + {0x1002, 0x796e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS740|RADEON_IS_IGP|RADEON_NEW_MEMMAP|RADEON_IS_IGPGART}, \ + {0x1002, 0x796f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS740|RADEON_IS_IGP|RADEON_NEW_MEMMAP|RADEON_IS_IGPGART}, \ +- {0, 0, 0} ++ {0, 0, 0, 0, 0, 0} + + #define r128_PCI_IDS \ + {0x1002, 0x4c45, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ +@@ -283,14 +283,14 @@ + {0x1002, 0x5446, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ + {0x1002, 0x544C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ + {0x1002, 0x5452, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ +- {0, 0, 0} ++ {0, 0, 0, 0, 0, 0} + + #define mga_PCI_IDS \ + {0x102b, 0x0520, PCI_ANY_ID, PCI_ANY_ID, 0, 0, MGA_CARD_TYPE_G200}, \ + {0x102b, 0x0521, PCI_ANY_ID, PCI_ANY_ID, 0, 0, MGA_CARD_TYPE_G200}, \ + {0x102b, 0x0525, PCI_ANY_ID, PCI_ANY_ID, 0, 0, MGA_CARD_TYPE_G400}, \ + {0x102b, 0x2527, PCI_ANY_ID, PCI_ANY_ID, 0, 0, MGA_CARD_TYPE_G550}, \ +- {0, 0, 0} ++ {0, 0, 0, 0, 0, 0} + + #define mach64_PCI_IDS \ + {0x1002, 0x4749, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ +@@ -313,7 +313,7 @@ + {0x1002, 0x4c53, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ + {0x1002, 0x4c4d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ + {0x1002, 0x4c4e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ +- {0, 0, 0} ++ {0, 0, 0, 0, 0, 0} + + #define sisdrv_PCI_IDS \ + {0x1039, 0x0300, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ +@@ -324,7 +324,7 @@ + {0x1039, 0x7300, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ + {0x18CA, 0x0040, PCI_ANY_ID, PCI_ANY_ID, 0, 0, SIS_CHIP_315}, \ + {0x18CA, 0x0042, PCI_ANY_ID, PCI_ANY_ID, 0, 0, SIS_CHIP_315}, \ +- {0, 0, 0} ++ {0, 0, 0, 0, 0, 0} + + #define tdfx_PCI_IDS \ + {0x121a, 0x0003, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ +@@ -333,7 +333,7 @@ + {0x121a, 0x0007, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ + {0x121a, 0x0009, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ + {0x121a, 0x000b, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ +- {0, 0, 0} ++ {0, 0, 0, 0, 0, 0} + + #define viadrv_PCI_IDS \ + {0x1106, 0x3022, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ +@@ -345,25 +345,25 @@ + {0x1106, 0x3343, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ + {0x1106, 0x3230, PCI_ANY_ID, PCI_ANY_ID, 0, 0, VIA_DX9_0}, \ + {0x1106, 0x3157, PCI_ANY_ID, PCI_ANY_ID, 0, 0, VIA_PRO_GROUP_A}, \ +- {0, 0, 0} ++ {0, 0, 0, 0, 0, 0} + + #define i810_PCI_IDS \ + {0x8086, 0x7121, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ + {0x8086, 0x7123, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ + {0x8086, 0x7125, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ + {0x8086, 0x1132, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ +- {0, 0, 0} ++ {0, 0, 0, 0, 0, 0} + + #define i830_PCI_IDS \ + {0x8086, 0x3577, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ + {0x8086, 0x2562, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ + {0x8086, 0x3582, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ + {0x8086, 0x2572, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ +- {0, 0, 0} ++ {0, 0, 0, 0, 0, 0} + + #define gamma_PCI_IDS \ + {0x3d3d, 0x0008, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ +- {0, 0, 0} ++ {0, 0, 0, 0, 0, 0} + + #define savage_PCI_IDS \ + {0x5333, 0x8a20, PCI_ANY_ID, PCI_ANY_ID, 0, 0, S3_SAVAGE3D}, \ +@@ -389,10 +389,10 @@ + {0x5333, 0x8d02, PCI_ANY_ID, PCI_ANY_ID, 0, 0, S3_TWISTER}, \ + {0x5333, 0x8d03, PCI_ANY_ID, PCI_ANY_ID, 0, 0, S3_PROSAVAGEDDR}, \ + {0x5333, 0x8d04, PCI_ANY_ID, PCI_ANY_ID, 0, 0, S3_PROSAVAGEDDR}, \ +- {0, 0, 0} ++ {0, 0, 0, 0, 0, 0} + + #define ffb_PCI_IDS \ +- {0, 0, 0} ++ {0, 0, 0, 0, 0, 0} + + #define i915_PCI_IDS \ + {0x8086, 0x3577, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \ +@@ -418,4 +418,4 @@ + {0x8086, 0x2e02, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \ + {0x8086, 0x2e12, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \ + {0x8086, 0x2e22, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \ +- {0, 0, 0} ++ {0, 0, 0, 0, 0, 0} +diff -urNp a/include/linux/a.out.h b/include/linux/a.out.h +--- a/include/linux/a.out.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/include/linux/a.out.h 2009-05-24 18:10:25.271024930 -0700 +@@ -39,6 +39,14 @@ enum machine_type { + M_MIPS2 = 152 /* MIPS R6000/R4000 binary */ + }; + ++/* Constants for the N_FLAGS field */ ++#define F_PAX_PAGEEXEC 1 /* Paging based non-executable pages */ ++#define F_PAX_EMUTRAMP 2 /* Emulate trampolines */ ++#define F_PAX_MPROTECT 4 /* Restrict mprotect() */ ++#define F_PAX_RANDMMAP 8 /* Randomize mmap() base */ ++/*#define F_PAX_RANDEXEC 16*/ /* Randomize ET_EXEC base */ ++#define F_PAX_SEGMEXEC 32 /* Segmentation based non-executable pages */ ++ + #if !defined (N_MAGIC) + #define N_MAGIC(exec) ((exec).a_info & 0xffff) + #endif +diff -urNp a/include/linux/binfmts.h b/include/linux/binfmts.h +--- a/include/linux/binfmts.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/include/linux/binfmts.h 2009-05-24 18:10:25.272086305 -0700 +@@ -74,6 +74,7 @@ struct linux_binfmt { + int (*load_binary)(struct linux_binprm *, struct pt_regs * regs); + int (*load_shlib)(struct file *); + int (*core_dump)(long signr, struct pt_regs *regs, struct file *file, unsigned long limit); ++ void (*handle_mprotect)(struct vm_area_struct *vma, unsigned long newflags); + unsigned long min_coredump; /* minimal dump size */ + int hasvdso; + }; +diff -urNp a/include/linux/cache.h b/include/linux/cache.h +--- a/include/linux/cache.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/include/linux/cache.h 2009-05-24 18:10:25.272086305 -0700 +@@ -16,6 +16,10 @@ + #define __read_mostly + #endif + ++#ifndef __read_only ++#define __read_only __read_mostly ++#endif ++ + #ifndef ____cacheline_aligned + #define ____cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES))) + #endif +diff -urNp a/include/linux/capability.h b/include/linux/capability.h +--- a/include/linux/capability.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/include/linux/capability.h 2009-05-24 18:10:25.273003746 -0700 +@@ -533,6 +533,7 @@ kernel_cap_t cap_set_effective(const ker + #define has_capability(t, cap) (security_capable((t), (cap)) == 0) + + extern int capable(int cap); ++int capable_nolog(int cap); + + #endif /* __KERNEL__ */ + +diff -urNp a/include/linux/cpumask.h b/include/linux/cpumask.h +--- a/include/linux/cpumask.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/include/linux/cpumask.h 2009-05-24 18:10:25.273003746 -0700 +@@ -142,7 +142,6 @@ + #include <linux/bitmap.h> + + typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t; +-extern cpumask_t _unused_cpumask_arg_; + + #define cpu_set(cpu, dst) __cpu_set((cpu), &(dst)) + static inline void __cpu_set(int cpu, volatile cpumask_t *dstp) +diff -urNp a/include/linux/elf.h b/include/linux/elf.h +--- a/include/linux/elf.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/include/linux/elf.h 2009-05-24 18:10:25.274959238 -0700 +@@ -49,6 +49,17 @@ typedef __s64 Elf64_Sxword; + #define PT_GNU_EH_FRAME 0x6474e550 + + #define PT_GNU_STACK (PT_LOOS + 0x474e551) ++#define PT_GNU_RELRO (PT_LOOS + 0x474e552) ++ ++#define PT_PAX_FLAGS (PT_LOOS + 0x5041580) ++ ++/* Constants for the e_flags field */ ++#define EF_PAX_PAGEEXEC 1 /* Paging based non-executable pages */ ++#define EF_PAX_EMUTRAMP 2 /* Emulate trampolines */ ++#define EF_PAX_MPROTECT 4 /* Restrict mprotect() */ ++#define EF_PAX_RANDMMAP 8 /* Randomize mmap() base */ ++/*#define EF_PAX_RANDEXEC 16*/ /* Randomize ET_EXEC base */ ++#define EF_PAX_SEGMEXEC 32 /* Segmentation based non-executable pages */ + + /* These constants define the different elf file types */ + #define ET_NONE 0 +@@ -84,6 +95,8 @@ typedef __s64 Elf64_Sxword; + #define DT_DEBUG 21 + #define DT_TEXTREL 22 + #define DT_JMPREL 23 ++#define DT_FLAGS 30 ++ #define DF_TEXTREL 0x00000004 + #define DT_ENCODING 32 + #define OLD_DT_LOOS 0x60000000 + #define DT_LOOS 0x6000000d +@@ -230,6 +243,19 @@ typedef struct elf64_hdr { + #define PF_W 0x2 + #define PF_X 0x1 + ++#define PF_PAGEEXEC (1U << 4) /* Enable PAGEEXEC */ ++#define PF_NOPAGEEXEC (1U << 5) /* Disable PAGEEXEC */ ++#define PF_SEGMEXEC (1U << 6) /* Enable SEGMEXEC */ ++#define PF_NOSEGMEXEC (1U << 7) /* Disable SEGMEXEC */ ++#define PF_MPROTECT (1U << 8) /* Enable MPROTECT */ ++#define PF_NOMPROTECT (1U << 9) /* Disable MPROTECT */ ++/*#define PF_RANDEXEC (1U << 10)*/ /* Enable RANDEXEC */ ++/*#define PF_NORANDEXEC (1U << 11)*/ /* Disable RANDEXEC */ ++#define PF_EMUTRAMP (1U << 12) /* Enable EMUTRAMP */ ++#define PF_NOEMUTRAMP (1U << 13) /* Disable EMUTRAMP */ ++#define PF_RANDMMAP (1U << 14) /* Enable RANDMMAP */ ++#define PF_NORANDMMAP (1U << 15) /* Disable RANDMMAP */ ++ + typedef struct elf32_phdr{ + Elf32_Word p_type; + Elf32_Off p_offset; +@@ -322,6 +348,8 @@ typedef struct elf64_shdr { + #define EI_OSABI 7 + #define EI_PAD 8 + ++#define EI_PAX 14 ++ + #define ELFMAG0 0x7f /* EI_MAG */ + #define ELFMAG1 'E' + #define ELFMAG2 'L' +@@ -384,6 +412,7 @@ extern Elf32_Dyn _DYNAMIC []; + #define elf_phdr elf32_phdr + #define elf_note elf32_note + #define elf_addr_t Elf32_Off ++#define elf_dyn Elf32_Dyn + + #else + +@@ -392,6 +421,7 @@ extern Elf64_Dyn _DYNAMIC []; + #define elf_phdr elf64_phdr + #define elf_note elf64_note + #define elf_addr_t Elf64_Off ++#define elf_dyn Elf64_Dyn + + #endif + +diff -urNp a/include/linux/gracl.h b/include/linux/gracl.h +--- a/include/linux/gracl.h 1969-12-31 16:00:00.000000000 -0800 ++++ b/include/linux/gracl.h 2009-05-24 18:10:25.274959238 -0700 +@@ -0,0 +1,318 @@ ++#ifndef GR_ACL_H ++#define GR_ACL_H ++ ++#include <linux/grdefs.h> ++#include <linux/resource.h> ++#include <linux/capability.h> ++#include <linux/dcache.h> ++#include <asm/resource.h> ++ ++/* Major status information */ ++ ++#define GR_VERSION "grsecurity 2.1.13" ++#define GRSECURITY_VERSION 0x2113 ++ ++enum { ++ GR_SHUTDOWN = 0, ++ GR_ENABLE = 1, ++ GR_SPROLE = 2, ++ GR_RELOAD = 3, ++ GR_SEGVMOD = 4, ++ GR_STATUS = 5, ++ GR_UNSPROLE = 6, ++ GR_PASSSET = 7, ++ GR_SPROLEPAM = 8, ++}; ++ ++/* Password setup definitions ++ * kernel/grhash.c */ ++enum { ++ GR_PW_LEN = 128, ++ GR_SALT_LEN = 16, ++ GR_SHA_LEN = 32, ++}; ++ ++enum { ++ GR_SPROLE_LEN = 64, ++}; ++ ++#define GR_NLIMITS (RLIMIT_LOCKS + 2) ++ ++/* Begin Data Structures */ ++ ++struct sprole_pw { ++ unsigned char *rolename; ++ unsigned char salt[GR_SALT_LEN]; ++ unsigned char sum[GR_SHA_LEN]; /* 256-bit SHA hash of the password */ ++}; ++ ++struct name_entry { ++ __u32 key; ++ ino_t inode; ++ dev_t device; ++ char *name; ++ __u16 len; ++ __u8 deleted; ++ struct name_entry *prev; ++ struct name_entry *next; ++}; ++ ++struct inodev_entry { ++ struct name_entry *nentry; ++ struct inodev_entry *prev; ++ struct inodev_entry *next; ++}; ++ ++struct acl_role_db { ++ struct acl_role_label **r_hash; ++ __u32 r_size; ++}; ++ ++struct inodev_db { ++ struct inodev_entry **i_hash; ++ __u32 i_size; ++}; ++ ++struct name_db { ++ struct name_entry **n_hash; ++ __u32 n_size; ++}; ++ ++struct crash_uid { ++ uid_t uid; ++ unsigned long expires; ++}; ++ ++struct gr_hash_struct { ++ void **table; ++ void **nametable; ++ void *first; ++ __u32 table_size; ++ __u32 used_size; ++ int type; ++}; ++ ++/* Userspace Grsecurity ACL data structures */ ++ ++struct acl_subject_label { ++ char *filename; ++ ino_t inode; ++ dev_t device; ++ __u32 mode; ++ kernel_cap_t cap_mask; ++ kernel_cap_t cap_lower; ++ ++ struct rlimit res[GR_NLIMITS]; ++ __u16 resmask; ++ ++ __u8 user_trans_type; ++ __u8 group_trans_type; ++ uid_t *user_transitions; ++ gid_t *group_transitions; ++ __u16 user_trans_num; ++ __u16 group_trans_num; ++ ++ __u32 ip_proto[8]; ++ __u32 ip_type; ++ struct acl_ip_label **ips; ++ __u32 ip_num; ++ __u32 inaddr_any_override; ++ ++ __u32 crashes; ++ unsigned long expires; ++ ++ struct acl_subject_label *parent_subject; ++ struct gr_hash_struct *hash; ++ struct acl_subject_label *prev; ++ struct acl_subject_label *next; ++ ++ struct acl_object_label **obj_hash; ++ __u32 obj_hash_size; ++ __u16 pax_flags; ++}; ++ ++struct role_allowed_ip { ++ __u32 addr; ++ __u32 netmask; ++ ++ struct role_allowed_ip *prev; ++ struct role_allowed_ip *next; ++}; ++ ++struct role_transition { ++ char *rolename; ++ ++ struct role_transition *prev; ++ struct role_transition *next; ++}; ++ ++struct acl_role_label { ++ char *rolename; ++ uid_t uidgid; ++ __u16 roletype; ++ ++ __u16 auth_attempts; ++ unsigned long expires; ++ ++ struct acl_subject_label *root_label; ++ struct gr_hash_struct *hash; ++ ++ struct acl_role_label *prev; ++ struct acl_role_label *next; ++ ++ struct role_transition *transitions; ++ struct role_allowed_ip *allowed_ips; ++ uid_t *domain_children; ++ __u16 domain_child_num; ++ ++ struct acl_subject_label **subj_hash; ++ __u32 subj_hash_size; ++}; ++ ++struct user_acl_role_db { ++ struct acl_role_label **r_table; ++ __u32 num_pointers; /* Number of allocations to track */ ++ __u32 num_roles; /* Number of roles */ ++ __u32 num_domain_children; /* Number of domain children */ ++ __u32 num_subjects; /* Number of subjects */ ++ __u32 num_objects; /* Number of objects */ ++}; ++ ++struct acl_object_label { ++ char *filename; ++ ino_t inode; ++ dev_t device; ++ __u32 mode; ++ ++ struct acl_subject_label *nested; ++ struct acl_object_label *globbed; ++ ++ /* next two structures not used */ ++ ++ struct acl_object_label *prev; ++ struct acl_object_label *next; ++}; ++ ++struct acl_ip_label { ++ char *iface; ++ __u32 addr; ++ __u32 netmask; ++ __u16 low, high; ++ __u8 mode; ++ __u32 type; ++ __u32 proto[8]; ++ ++ /* next two structures not used */ ++ ++ struct acl_ip_label *prev; ++ struct acl_ip_label *next; ++}; ++ ++struct gr_arg { ++ struct user_acl_role_db role_db; ++ unsigned char pw[GR_PW_LEN]; ++ unsigned char salt[GR_SALT_LEN]; ++ unsigned char sum[GR_SHA_LEN]; ++ unsigned char sp_role[GR_SPROLE_LEN]; ++ struct sprole_pw *sprole_pws; ++ dev_t segv_device; ++ ino_t segv_inode; ++ uid_t segv_uid; ++ __u16 num_sprole_pws; ++ __u16 mode; ++}; ++ ++struct gr_arg_wrapper { ++ struct gr_arg *arg; ++ __u32 version; ++ __u32 size; ++}; ++ ++struct subject_map { ++ struct acl_subject_label *user; ++ struct acl_subject_label *kernel; ++ struct subject_map *prev; ++ struct subject_map *next; ++}; ++ ++struct acl_subj_map_db { ++ struct subject_map **s_hash; ++ __u32 s_size; ++}; ++ ++/* End Data Structures Section */ ++ ++/* Hash functions generated by empirical testing by Brad Spengler ++ Makes good use of the low bits of the inode. Generally 0-1 times ++ in loop for successful match. 0-3 for unsuccessful match. ++ Shift/add algorithm with modulus of table size and an XOR*/ ++ ++static __inline__ unsigned int ++rhash(const uid_t uid, const __u16 type, const unsigned int sz) ++{ ++ return (((uid << type) + (uid ^ type)) % sz); ++} ++ ++ static __inline__ unsigned int ++shash(const struct acl_subject_label *userp, const unsigned int sz) ++{ ++ return ((const unsigned long)userp % sz); ++} ++ ++static __inline__ unsigned int ++fhash(const ino_t ino, const dev_t dev, const unsigned int sz) ++{ ++ return (((ino + dev) ^ ((ino << 13) + (ino << 23) + (dev << 9))) % sz); ++} ++ ++static __inline__ unsigned int ++nhash(const char *name, const __u16 len, const unsigned int sz) ++{ ++ return full_name_hash(name, len) % sz; ++} ++ ++#define FOR_EACH_ROLE_START(role,iter) \ ++ role = NULL; \ ++ iter = 0; \ ++ while (iter < acl_role_set.r_size) { \ ++ if (role == NULL) \ ++ role = acl_role_set.r_hash[iter]; \ ++ if (role == NULL) { \ ++ iter++; \ ++ continue; \ ++ } ++ ++#define FOR_EACH_ROLE_END(role,iter) \ ++ role = role->next; \ ++ if (role == NULL) \ ++ iter++; \ ++ } ++ ++#define FOR_EACH_SUBJECT_START(role,subj,iter) \ ++ subj = NULL; \ ++ iter = 0; \ ++ while (iter < role->subj_hash_size) { \ ++ if (subj == NULL) \ ++ subj = role->subj_hash[iter]; \ ++ if (subj == NULL) { \ ++ iter++; \ ++ continue; \ ++ } ++ ++#define FOR_EACH_SUBJECT_END(subj,iter) \ ++ subj = subj->next; \ ++ if (subj == NULL) \ ++ iter++; \ ++ } ++ ++ ++#define FOR_EACH_NESTED_SUBJECT_START(role,subj) \ ++ subj = role->hash->first; \ ++ while (subj != NULL) { ++ ++#define FOR_EACH_NESTED_SUBJECT_END(subj) \ ++ subj = subj->next; \ ++ } ++ ++#endif ++ +diff -urNp a/include/linux/gralloc.h b/include/linux/gralloc.h +--- a/include/linux/gralloc.h 1969-12-31 16:00:00.000000000 -0800 ++++ b/include/linux/gralloc.h 2009-05-24 18:10:25.275960273 -0700 +@@ -0,0 +1,8 @@ ++#ifndef __GRALLOC_H ++#define __GRALLOC_H ++ ++void acl_free_all(void); ++int acl_alloc_stack_init(unsigned long size); ++void *acl_alloc(unsigned long len); ++ ++#endif +diff -urNp a/include/linux/grdefs.h b/include/linux/grdefs.h +--- a/include/linux/grdefs.h 1969-12-31 16:00:00.000000000 -0800 ++++ b/include/linux/grdefs.h 2009-05-24 18:10:25.275960273 -0700 +@@ -0,0 +1,131 @@ ++#ifndef GRDEFS_H ++#define GRDEFS_H ++ ++/* Begin grsecurity status declarations */ ++ ++enum { ++ GR_READY = 0x01, ++ GR_STATUS_INIT = 0x00 // disabled state ++}; ++ ++/* Begin ACL declarations */ ++ ++/* Role flags */ ++ ++enum { ++ GR_ROLE_USER = 0x0001, ++ GR_ROLE_GROUP = 0x0002, ++ GR_ROLE_DEFAULT = 0x0004, ++ GR_ROLE_SPECIAL = 0x0008, ++ GR_ROLE_AUTH = 0x0010, ++ GR_ROLE_NOPW = 0x0020, ++ GR_ROLE_GOD = 0x0040, ++ GR_ROLE_LEARN = 0x0080, ++ GR_ROLE_TPE = 0x0100, ++ GR_ROLE_DOMAIN = 0x0200, ++ GR_ROLE_PAM = 0x0400 ++}; ++ ++/* ACL Subject and Object mode flags */ ++enum { ++ GR_DELETED = 0x80000000 ++}; ++ ++/* ACL Object-only mode flags */ ++enum { ++ GR_READ = 0x00000001, ++ GR_APPEND = 0x00000002, ++ GR_WRITE = 0x00000004, ++ GR_EXEC = 0x00000008, ++ GR_FIND = 0x00000010, ++ GR_INHERIT = 0x00000020, ++ GR_SETID = 0x00000040, ++ GR_CREATE = 0x00000080, ++ GR_DELETE = 0x00000100, ++ GR_LINK = 0x00000200, ++ GR_AUDIT_READ = 0x00000400, ++ GR_AUDIT_APPEND = 0x00000800, ++ GR_AUDIT_WRITE = 0x00001000, ++ GR_AUDIT_EXEC = 0x00002000, ++ GR_AUDIT_FIND = 0x00004000, ++ GR_AUDIT_INHERIT= 0x00008000, ++ GR_AUDIT_SETID = 0x00010000, ++ GR_AUDIT_CREATE = 0x00020000, ++ GR_AUDIT_DELETE = 0x00040000, ++ GR_AUDIT_LINK = 0x00080000, ++ GR_PTRACERD = 0x00100000, ++ GR_NOPTRACE = 0x00200000, ++ GR_SUPPRESS = 0x00400000, ++ GR_NOLEARN = 0x00800000 ++}; ++ ++#define GR_AUDITS (GR_AUDIT_READ | GR_AUDIT_WRITE | GR_AUDIT_APPEND | GR_AUDIT_EXEC | \ ++ GR_AUDIT_FIND | GR_AUDIT_INHERIT | GR_AUDIT_SETID | \ ++ GR_AUDIT_CREATE | GR_AUDIT_DELETE | GR_AUDIT_LINK) ++ ++/* ACL subject-only mode flags */ ++enum { ++ GR_KILL = 0x00000001, ++ GR_VIEW = 0x00000002, ++ GR_PROTECTED = 0x00000004, ++ GR_LEARN = 0x00000008, ++ GR_OVERRIDE = 0x00000010, ++ /* just a placeholder, this mode is only used in userspace */ ++ GR_DUMMY = 0x00000020, ++ GR_PROTSHM = 0x00000040, ++ GR_KILLPROC = 0x00000080, ++ GR_KILLIPPROC = 0x00000100, ++ /* just a placeholder, this mode is only used in userspace */ ++ GR_NOTROJAN = 0x00000200, ++ GR_PROTPROCFD = 0x00000400, ++ GR_PROCACCT = 0x00000800, ++ GR_RELAXPTRACE = 0x00001000, ++ GR_NESTED = 0x00002000, ++ GR_INHERITLEARN = 0x00004000, ++ GR_PROCFIND = 0x00008000, ++ GR_POVERRIDE = 0x00010000, ++ GR_KERNELAUTH = 0x00020000, ++}; ++ ++enum { ++ GR_PAX_ENABLE_SEGMEXEC = 0x0001, ++ GR_PAX_ENABLE_PAGEEXEC = 0x0002, ++ GR_PAX_ENABLE_MPROTECT = 0x0004, ++ GR_PAX_ENABLE_RANDMMAP = 0x0008, ++ GR_PAX_ENABLE_EMUTRAMP = 0x0010, ++ GR_PAX_DISABLE_SEGMEXEC = 0x0100, ++ GR_PAX_DISABLE_PAGEEXEC = 0x0200, ++ GR_PAX_DISABLE_MPROTECT = 0x0400, ++ GR_PAX_DISABLE_RANDMMAP = 0x0800, ++ GR_PAX_DISABLE_EMUTRAMP = 0x1000, ++}; ++ ++enum { ++ GR_ID_USER = 0x01, ++ GR_ID_GROUP = 0x02, ++}; ++ ++enum { ++ GR_ID_ALLOW = 0x01, ++ GR_ID_DENY = 0x02, ++}; ++ ++#define GR_CRASH_RES 11 ++#define GR_UIDTABLE_MAX 500 ++ ++/* begin resource learning section */ ++enum { ++ GR_RLIM_CPU_BUMP = 60, ++ GR_RLIM_FSIZE_BUMP = 50000, ++ GR_RLIM_DATA_BUMP = 10000, ++ GR_RLIM_STACK_BUMP = 1000, ++ GR_RLIM_CORE_BUMP = 10000, ++ GR_RLIM_RSS_BUMP = 500000, ++ GR_RLIM_NPROC_BUMP = 1, ++ GR_RLIM_NOFILE_BUMP = 5, ++ GR_RLIM_MEMLOCK_BUMP = 50000, ++ GR_RLIM_AS_BUMP = 500000, ++ GR_RLIM_LOCKS_BUMP = 2 ++}; ++ ++#endif +diff -urNp a/include/linux/grinternal.h b/include/linux/grinternal.h +--- a/include/linux/grinternal.h 1969-12-31 16:00:00.000000000 -0800 ++++ b/include/linux/grinternal.h 2009-05-24 18:10:25.276959494 -0700 +@@ -0,0 +1,210 @@ ++#ifndef __GRINTERNAL_H ++#define __GRINTERNAL_H ++ ++#ifdef CONFIG_GRKERNSEC ++ ++#include <linux/fs.h> ++#include <linux/gracl.h> ++#include <linux/grdefs.h> ++#include <linux/grmsg.h> ++ ++void gr_add_learn_entry(const char *fmt, ...); ++__u32 gr_search_file(const struct dentry *dentry, const __u32 mode, ++ const struct vfsmount *mnt); ++__u32 gr_check_create(const struct dentry *new_dentry, ++ const struct dentry *parent, ++ const struct vfsmount *mnt, const __u32 mode); ++int gr_check_protected_task(const struct task_struct *task); ++__u32 to_gr_audit(const __u32 reqmode); ++int gr_set_acls(const int type); ++ ++int gr_acl_is_enabled(void); ++char gr_roletype_to_char(void); ++ ++void gr_handle_alertkill(struct task_struct *task); ++char *gr_to_filename(const struct dentry *dentry, ++ const struct vfsmount *mnt); ++char *gr_to_filename1(const struct dentry *dentry, ++ const struct vfsmount *mnt); ++char *gr_to_filename2(const struct dentry *dentry, ++ const struct vfsmount *mnt); ++char *gr_to_filename3(const struct dentry *dentry, ++ const struct vfsmount *mnt); ++ ++extern int grsec_enable_link; ++extern int grsec_enable_fifo; ++extern int grsec_enable_execve; ++extern int grsec_enable_shm; ++extern int grsec_enable_execlog; ++extern int grsec_enable_signal; ++extern int grsec_enable_forkfail; ++extern int grsec_enable_time; ++extern int grsec_enable_chroot_shmat; ++extern int grsec_enable_chroot_findtask; ++extern int grsec_enable_chroot_mount; ++extern int grsec_enable_chroot_double; ++extern int grsec_enable_chroot_pivot; ++extern int grsec_enable_chroot_chdir; ++extern int grsec_enable_chroot_chmod; ++extern int grsec_enable_chroot_mknod; ++extern int grsec_enable_chroot_fchdir; ++extern int grsec_enable_chroot_nice; ++extern int grsec_enable_chroot_execlog; ++extern int grsec_enable_chroot_caps; ++extern int grsec_enable_chroot_sysctl; ++extern int grsec_enable_chroot_unix; ++extern int grsec_enable_tpe; ++extern int grsec_tpe_gid; ++extern int grsec_enable_tpe_all; ++extern int grsec_enable_sidcaps; ++extern int grsec_enable_socket_all; ++extern int grsec_socket_all_gid; ++extern int grsec_enable_socket_client; ++extern int grsec_socket_client_gid; ++extern int grsec_enable_socket_server; ++extern int grsec_socket_server_gid; ++extern int grsec_audit_gid; ++extern int grsec_enable_group; ++extern int grsec_enable_audit_ipc; ++extern int grsec_enable_audit_textrel; ++extern int grsec_enable_mount; ++extern int grsec_enable_chdir; ++extern int grsec_resource_logging; ++extern int grsec_lock; ++ ++extern spinlock_t grsec_alert_lock; ++extern unsigned long grsec_alert_wtime; ++extern unsigned long grsec_alert_fyet; ++ ++extern spinlock_t grsec_audit_lock; ++ ++extern rwlock_t grsec_exec_file_lock; ++ ++#define gr_task_fullpath(tsk) (tsk->exec_file ? \ ++ gr_to_filename2(tsk->exec_file->f_path.dentry, \ ++ tsk->exec_file->f_vfsmnt) : "/") ++ ++#define gr_parent_task_fullpath(tsk) (tsk->parent->exec_file ? \ ++ gr_to_filename3(tsk->parent->exec_file->f_path.dentry, \ ++ tsk->parent->exec_file->f_vfsmnt) : "/") ++ ++#define gr_task_fullpath0(tsk) (tsk->exec_file ? \ ++ gr_to_filename(tsk->exec_file->f_path.dentry, \ ++ tsk->exec_file->f_vfsmnt) : "/") ++ ++#define gr_parent_task_fullpath0(tsk) (tsk->parent->exec_file ? \ ++ gr_to_filename1(tsk->parent->exec_file->f_path.dentry, \ ++ tsk->parent->exec_file->f_vfsmnt) : "/") ++ ++#define proc_is_chrooted(tsk_a) ((tsk_a->pid > 1) && (tsk_a->fs != NULL) && \ ++ ((tsk_a->fs->root.dentry->d_inode->i_sb->s_dev != \ ++ tsk_a->nsproxy->pid_ns->child_reaper->fs->root.dentry->d_inode->i_sb->s_dev) || \ ++ (tsk_a->fs->root.dentry->d_inode->i_ino != \ ++ tsk_a->nsproxy->pid_ns->child_reaper->fs->root.dentry->d_inode->i_ino))) ++ ++#define have_same_root(tsk_a,tsk_b) ((tsk_a->fs != NULL) && (tsk_b->fs != NULL) && \ ++ (tsk_a->fs->root.dentry->d_inode->i_sb->s_dev == \ ++ tsk_b->fs->root.dentry->d_inode->i_sb->s_dev) && \ ++ (tsk_a->fs->root.dentry->d_inode->i_ino == \ ++ tsk_b->fs->root.dentry->d_inode->i_ino)) ++ ++#define DEFAULTSECARGS(task) gr_task_fullpath(task), task->comm, \ ++ task->pid, task->uid, \ ++ task->euid, task->gid, task->egid, \ ++ gr_parent_task_fullpath(task), \ ++ task->parent->comm, task->parent->pid, \ ++ task->parent->uid, task->parent->euid, \ ++ task->parent->gid, task->parent->egid ++ ++#define GR_CHROOT_CAPS {{ \ ++ CAP_TO_MASK(CAP_LINUX_IMMUTABLE) | CAP_TO_MASK(CAP_NET_ADMIN) | \ ++ CAP_TO_MASK(CAP_SYS_MODULE) | CAP_TO_MASK(CAP_SYS_RAWIO) | \ ++ CAP_TO_MASK(CAP_SYS_PACCT) | CAP_TO_MASK(CAP_SYS_ADMIN) | \ ++ CAP_TO_MASK(CAP_SYS_BOOT) | CAP_TO_MASK(CAP_SYS_TIME) | \ ++ CAP_TO_MASK(CAP_NET_RAW) | CAP_TO_MASK(CAP_SYS_TTY_CONFIG) | \ ++ CAP_TO_MASK(CAP_IPC_OWNER) , 0 }} ++ ++#define security_learn(normal_msg,args...) \ ++({ \ ++ read_lock(&grsec_exec_file_lock); \ ++ gr_add_learn_entry(normal_msg "\n", ## args); \ ++ read_unlock(&grsec_exec_file_lock); \ ++}) ++ ++enum { ++ GR_DO_AUDIT, ++ GR_DONT_AUDIT, ++ GR_DONT_AUDIT_GOOD ++}; ++ ++enum { ++ GR_TTYSNIFF, ++ GR_RBAC, ++ GR_RBAC_STR, ++ GR_STR_RBAC, ++ GR_RBAC_MODE2, ++ GR_RBAC_MODE3, ++ GR_FILENAME, ++ GR_SYSCTL_HIDDEN, ++ GR_NOARGS, ++ GR_ONE_INT, ++ GR_ONE_INT_TWO_STR, ++ GR_ONE_STR, ++ GR_STR_INT, ++ GR_TWO_INT, ++ GR_THREE_INT, ++ GR_FIVE_INT_TWO_STR, ++ GR_TWO_STR, ++ GR_THREE_STR, ++ GR_FOUR_STR, ++ GR_STR_FILENAME, ++ GR_FILENAME_STR, ++ GR_FILENAME_TWO_INT, ++ GR_FILENAME_TWO_INT_STR, ++ GR_TEXTREL, ++ GR_PTRACE, ++ GR_RESOURCE, ++ GR_CAP, ++ GR_SIG, ++ GR_CRASH1, ++ GR_CRASH2, ++ GR_PSACCT ++}; ++ ++#define gr_log_hidden_sysctl(audit, msg, str) gr_log_varargs(audit, msg, GR_SYSCTL_HIDDEN, str) ++#define gr_log_ttysniff(audit, msg, task) gr_log_varargs(audit, msg, GR_TTYSNIFF, task) ++#define gr_log_fs_rbac_generic(audit, msg, dentry, mnt) gr_log_varargs(audit, msg, GR_RBAC, dentry, mnt) ++#define gr_log_fs_rbac_str(audit, msg, dentry, mnt, str) gr_log_varargs(audit, msg, GR_RBAC_STR, dentry, mnt, str) ++#define gr_log_fs_str_rbac(audit, msg, str, dentry, mnt) gr_log_varargs(audit, msg, GR_STR_RBAC, str, dentry, mnt) ++#define gr_log_fs_rbac_mode2(audit, msg, dentry, mnt, str1, str2) gr_log_varargs(audit, msg, GR_RBAC_MODE2, dentry, mnt, str1, str2) ++#define gr_log_fs_rbac_mode3(audit, msg, dentry, mnt, str1, str2, str3) gr_log_varargs(audit, msg, GR_RBAC_MODE3, dentry, mnt, str1, str2, str3) ++#define gr_log_fs_generic(audit, msg, dentry, mnt) gr_log_varargs(audit, msg, GR_FILENAME, dentry, mnt) ++#define gr_log_noargs(audit, msg) gr_log_varargs(audit, msg, GR_NOARGS) ++#define gr_log_int(audit, msg, num) gr_log_varargs(audit, msg, GR_ONE_INT, num) ++#define gr_log_int_str2(audit, msg, num, str1, str2) gr_log_varargs(audit, msg, GR_ONE_INT_TWO_STR, num, str1, str2) ++#define gr_log_str(audit, msg, str) gr_log_varargs(audit, msg, GR_ONE_STR, str) ++#define gr_log_str_int(audit, msg, str, num) gr_log_varargs(audit, msg, GR_STR_INT, str, num) ++#define gr_log_int_int(audit, msg, num1, num2) gr_log_varargs(audit, msg, GR_TWO_INT, num1, num2) ++#define gr_log_int3(audit, msg, num1, num2, num3) gr_log_varargs(audit, msg, GR_THREE_INT, num1, num2, num3) ++#define gr_log_int5_str2(audit, msg, num1, num2, str1, str2) gr_log_varargs(audit, msg, GR_FIVE_INT_TWO_STR, num1, num2, str1, str2) ++#define gr_log_str_str(audit, msg, str1, str2) gr_log_varargs(audit, msg, GR_TWO_STR, str1, str2) ++#define gr_log_str3(audit, msg, str1, str2, str3) gr_log_varargs(audit, msg, GR_THREE_STR, str1, str2, str3) ++#define gr_log_str4(audit, msg, str1, str2, str3, str4) gr_log_varargs(audit, msg, GR_FOUR_STR, str1, str2, str3, str4) ++#define gr_log_str_fs(audit, msg, str, dentry, mnt) gr_log_varargs(audit, msg, GR_STR_FILENAME, str, dentry, mnt) ++#define gr_log_fs_str(audit, msg, dentry, mnt, str) gr_log_varargs(audit, msg, GR_FILENAME_STR, dentry, mnt, str) ++#define gr_log_fs_int2(audit, msg, dentry, mnt, num1, num2) gr_log_varargs(audit, msg, GR_FILENAME_TWO_INT, dentry, mnt, num1, num2) ++#define gr_log_fs_int2_str(audit, msg, dentry, mnt, num1, num2, str) gr_log_varargs(audit, msg, GR_FILENAME_TWO_INT_STR, dentry, mnt, num1, num2, str) ++#define gr_log_textrel_ulong_ulong(audit, msg, file, ulong1, ulong2) gr_log_varargs(audit, msg, GR_TEXTREL, file, ulong1, ulong2) ++#define gr_log_ptrace(audit, msg, task) gr_log_varargs(audit, msg, GR_PTRACE, task) ++#define gr_log_res_ulong2_str(audit, msg, task, ulong1, str, ulong2) gr_log_varargs(audit, msg, GR_RESOURCE, task, ulong1, str, ulong2) ++#define gr_log_cap(audit, msg, task, str) gr_log_varargs(audit, msg, GR_CAP, task, str) ++#define gr_log_sig(audit, msg, task, num) gr_log_varargs(audit, msg, GR_SIG, task, num) ++#define gr_log_crash1(audit, msg, task, ulong) gr_log_varargs(audit, msg, GR_CRASH1, task, ulong) ++#define gr_log_crash2(audit, msg, task, ulong1) gr_log_varargs(audit, msg, GR_CRASH2, task, ulong1) ++#define gr_log_procacct(audit, msg, task, num1, num2, num3, num4, num5, num6, num7, num8, num9) gr_log_varargs(audit, msg, GR_PSACCT, task, num1, num2, num3, num4, num5, num6, num7, num8, num9) ++ ++void gr_log_varargs(int audit, const char *msg, int argtypes, ...); ++ ++#endif ++ ++#endif +diff -urNp a/include/linux/grmsg.h b/include/linux/grmsg.h +--- a/include/linux/grmsg.h 1969-12-31 16:00:00.000000000 -0800 ++++ b/include/linux/grmsg.h 2009-05-24 18:10:25.276959494 -0700 +@@ -0,0 +1,108 @@ ++#define DEFAULTSECMSG "%.256s[%.16s:%d] uid/euid:%u/%u gid/egid:%u/%u, parent %.256s[%.16s:%d] uid/euid:%u/%u gid/egid:%u/%u" ++#define GR_ACL_PROCACCT_MSG "%.256s[%.16s:%d] IP:%u.%u.%u.%u TTY:%.64s uid/euid:%u/%u gid/egid:%u/%u run time:[%ud %uh %um %us] cpu time:[%ud %uh %um %us] %s with exit code %ld, parent %.256s[%.16s:%d] IP:%u.%u.%u.%u TTY:%.64s uid/euid:%u/%u gid/egid:%u/%u" ++#define GR_PTRACE_ACL_MSG "denied ptrace of %.950s(%.16s:%d) by " ++#define GR_STOPMOD_MSG "denied modification of module state by " ++#define GR_IOPERM_MSG "denied use of ioperm() by " ++#define GR_IOPL_MSG "denied use of iopl() by " ++#define GR_SHMAT_ACL_MSG "denied attach of shared memory of UID %u, PID %d, ID %u by " ++#define GR_UNIX_CHROOT_MSG "denied connect() to abstract AF_UNIX socket outside of chroot by " ++#define GR_SHMAT_CHROOT_MSG "denied attach of shared memory outside of chroot by " ++#define GR_KMEM_MSG "denied write of /dev/kmem by " ++#define GR_PORT_OPEN_MSG "denied open of /dev/port by " ++#define GR_MEM_WRITE_MSG "denied write of /dev/mem by " ++#define GR_MEM_MMAP_MSG "denied mmap write of /dev/[k]mem by " ++#define GR_SYMLINK_MSG "not following symlink %.950s owned by %d.%d by " ++#define GR_LEARN_AUDIT_MSG "%s\t%u\t%u\t%u\t%.4095s\t%.4095s\t%lu\t%lu\t%.4095s\t%lu\t%u.%u.%u.%u" ++#define GR_ID_LEARN_MSG "%s\t%u\t%u\t%u\t%.4095s\t%.4095s\t%c\t%d\t%d\t%d\t%u.%u.%u.%u" ++#define GR_HIDDEN_ACL_MSG "%s access to hidden file %.950s by " ++#define GR_OPEN_ACL_MSG "%s open of %.950s for%s%s by " ++#define GR_CREATE_ACL_MSG "%s create of %.950s for%s%s by " ++#define GR_FIFO_MSG "denied writing FIFO %.950s of %d.%d by " ++#define GR_MKNOD_CHROOT_MSG "denied mknod of %.950s from chroot by " ++#define GR_MKNOD_ACL_MSG "%s mknod of %.950s by " ++#define GR_UNIXCONNECT_ACL_MSG "%s connect() to the unix domain socket %.950s by " ++#define GR_TTYSNIFF_ACL_MSG "terminal being sniffed by IP:%u.%u.%u.%u %.480s[%.16s:%d], parent %.480s[%.16s:%d] against " ++#define GR_MKDIR_ACL_MSG "%s mkdir of %.950s by " ++#define GR_RMDIR_ACL_MSG "%s rmdir of %.950s by " ++#define GR_UNLINK_ACL_MSG "%s unlink of %.950s by " ++#define GR_SYMLINK_ACL_MSG "%s symlink from %.480s to %.480s by " ++#define GR_HARDLINK_MSG "denied hardlink of %.930s (owned by %d.%d) to %.30s for " ++#define GR_LINK_ACL_MSG "%s link of %.480s to %.480s by " ++#define GR_INHERIT_ACL_MSG "successful inherit of %.480s's ACL for %.480s by " ++#define GR_RENAME_ACL_MSG "%s rename of %.480s to %.480s by " ++#define GR_PTRACE_EXEC_ACL_MSG "denied ptrace of %.950s by " ++#define GR_NPROC_MSG "denied overstep of process limit by " ++#define GR_EXEC_ACL_MSG "%s execution of %.950s by " ++#define GR_EXEC_TPE_MSG "denied untrusted exec of %.950s by " ++#define GR_SEGVSTART_ACL_MSG "possible exploit bruteforcing on " DEFAULTSECMSG " banning uid %u from login for %lu seconds" ++#define GR_SEGVNOSUID_ACL_MSG "possible exploit bruteforcing on " DEFAULTSECMSG " banning execution for %lu seconds" ++#define GR_MOUNT_CHROOT_MSG "denied mount of %.256s as %.930s from chroot by " ++#define GR_PIVOT_CHROOT_MSG "denied pivot_root from chroot by " ++#define GR_TRUNCATE_ACL_MSG "%s truncate of %.950s by " ++#define GR_ATIME_ACL_MSG "%s access time change of %.950s by " ++#define GR_ACCESS_ACL_MSG "%s access of %.950s for%s%s%s by " ++#define GR_CHROOT_CHROOT_MSG "denied double chroot to %.950s by " ++#define GR_FCHMOD_ACL_MSG "%s fchmod of %.950s by " ++#define GR_CHMOD_CHROOT_MSG "denied chmod +s of %.950s by " ++#define GR_CHMOD_ACL_MSG "%s chmod of %.950s by " ++#define GR_CHROOT_FCHDIR_MSG "denied fchdir outside of chroot to %.950s by " ++#define GR_CHOWN_ACL_MSG "%s chown of %.950s by " ++#define GR_WRITLIB_ACL_MSG "denied load of writable library %.950s by " ++#define GR_INITF_ACL_MSG "init_variables() failed %s by " ++#define GR_DISABLED_ACL_MSG "Error loading %s, trying to run kernel with acls disabled. To disable acls at startup use <kernel image name> gracl=off from your boot loader" ++#define GR_DEV_ACL_MSG "/dev/grsec: %d bytes sent %d required, being fed garbaged by " ++#define GR_SHUTS_ACL_MSG "shutdown auth success for " ++#define GR_SHUTF_ACL_MSG "shutdown auth failure for " ++#define GR_SHUTI_ACL_MSG "ignoring shutdown for disabled RBAC system for " ++#define GR_SEGVMODS_ACL_MSG "segvmod auth success for " ++#define GR_SEGVMODF_ACL_MSG "segvmod auth failure for " ++#define GR_SEGVMODI_ACL_MSG "ignoring segvmod for disabled RBAC system for " ++#define GR_ENABLE_ACL_MSG "%s RBAC system loaded by " ++#define GR_ENABLEF_ACL_MSG "unable to load %s for " ++#define GR_RELOADI_ACL_MSG "ignoring reload request for disabled RBAC system" ++#define GR_RELOAD_ACL_MSG "%s RBAC system reloaded by " ++#define GR_RELOADF_ACL_MSG "failed reload of %s for " ++#define GR_SPROLEI_ACL_MSG "ignoring change to special role for disabled RBAC system for " ++#define GR_SPROLES_ACL_MSG "successful change to special role %s (id %d) by " ++#define GR_SPROLEL_ACL_MSG "special role %s (id %d) exited by " ++#define GR_SPROLEF_ACL_MSG "special role %s failure for " ++#define GR_UNSPROLEI_ACL_MSG "ignoring unauth of special role for disabled RBAC system for " ++#define GR_UNSPROLES_ACL_MSG "successful unauth of special role %s (id %d) by " ++#define GR_UNSPROLEF_ACL_MSG "special role unauth of %s failure for " ++#define GR_INVMODE_ACL_MSG "invalid mode %d by " ++#define GR_PRIORITY_CHROOT_MSG "denied priority change of process (%.16s:%d) by " ++#define GR_FAILFORK_MSG "failed fork with errno %d by " ++#define GR_NICE_CHROOT_MSG "denied priority change by " ++#define GR_UNISIGLOG_MSG "signal %d sent to " ++#define GR_DUALSIGLOG_MSG "signal %d sent to " DEFAULTSECMSG " by " ++#define GR_SIG_ACL_MSG "denied send of signal %d to protected task " DEFAULTSECMSG " by " ++#define GR_SYSCTL_MSG "denied modification of grsecurity sysctl value : %.32s by " ++#define GR_SYSCTL_ACL_MSG "%s sysctl of %.950s for%s%s by " ++#define GR_TIME_MSG "time set by " ++#define GR_DEFACL_MSG "fatal: unable to find subject for (%.16s:%d), loaded by " ++#define GR_MMAP_ACL_MSG "%s executable mmap of %.950s by " ++#define GR_MPROTECT_ACL_MSG "%s executable mprotect of %.950s by " ++#define GR_SOCK_MSG "denied socket(%.16s,%.16s,%.16s) by " ++#define GR_SOCK2_MSG "denied socket(%d,%.16s,%.16s) by " ++#define GR_BIND_MSG "denied bind() by " ++#define GR_CONNECT_MSG "denied connect() by " ++#define GR_BIND_ACL_MSG "denied bind() to %u.%u.%u.%u port %u sock type %.16s protocol %.16s by " ++#define GR_CONNECT_ACL_MSG "denied connect() to %u.%u.%u.%u port %u sock type %.16s protocol %.16s by " ++#define GR_IP_LEARN_MSG "%s\t%u\t%u\t%u\t%.4095s\t%.4095s\t%u.%u.%u.%u\t%u\t%u\t%u\t%u\t%u.%u.%u.%u" ++#define GR_EXEC_CHROOT_MSG "exec of %.980s within chroot by process " ++#define GR_CAP_ACL_MSG "use of %s denied for " ++#define GR_USRCHANGE_ACL_MSG "change to uid %u denied for " ++#define GR_GRPCHANGE_ACL_MSG "change to gid %u denied for " ++#define GR_REMOUNT_AUDIT_MSG "remount of %.256s by " ++#define GR_UNMOUNT_AUDIT_MSG "unmount of %.256s by " ++#define GR_MOUNT_AUDIT_MSG "mount of %.256s to %.256s by " ++#define GR_CHDIR_AUDIT_MSG "chdir to %.980s by " ++#define GR_EXEC_AUDIT_MSG "exec of %.930s (%.128s) by " ++#define GR_MSGQ_AUDIT_MSG "message queue created by " ++#define GR_MSGQR_AUDIT_MSG "message queue of uid:%u euid:%u removed by " ++#define GR_SEM_AUDIT_MSG "semaphore created by " ++#define GR_SEMR_AUDIT_MSG "semaphore of uid:%u euid:%u removed by " ++#define GR_SHM_AUDIT_MSG "shared memory of size %d created by " ++#define GR_SHMR_AUDIT_MSG "shared memory of uid:%u euid:%u removed by " ++#define GR_RESOURCE_MSG "denied resource overstep by requesting %lu for %.16s against limit %lu for " ++#define GR_TEXTREL_AUDIT_MSG "text relocation in %s, VMA:0x%08lx 0x%08lx by " +diff -urNp a/include/linux/grsecurity.h b/include/linux/grsecurity.h +--- a/include/linux/grsecurity.h 1969-12-31 16:00:00.000000000 -0800 ++++ b/include/linux/grsecurity.h 2009-05-24 18:10:25.277960320 -0700 +@@ -0,0 +1,200 @@ ++#ifndef GR_SECURITY_H ++#define GR_SECURITY_H ++#include <linux/fs.h> ++#include <linux/binfmts.h> ++#include <linux/gracl.h> ++ ++/* notify of brain-dead configs */ ++#if defined(CONFIG_PAX_NOEXEC) && !defined(CONFIG_PAX_PAGEEXEC) && !defined(CONFIG_PAX_SEGMEXEC) && !defined(CONFIG_PAX_KERNEXEC) ++#error "CONFIG_PAX_NOEXEC enabled, but PAGEEXEC, SEGMEXEC, and KERNEXEC are disabled." ++#endif ++#if defined(CONFIG_PAX_NOEXEC) && !defined(CONFIG_PAX_EI_PAX) && !defined(CONFIG_PAX_PT_PAX_FLAGS) ++#error "CONFIG_PAX_NOEXEC enabled, but neither CONFIG_PAX_EI_PAX nor CONFIG_PAX_PT_PAX_FLAGS are enabled." ++#endif ++#if defined(CONFIG_PAX_ASLR) && (defined(CONFIG_PAX_RANDMMAP) || defined(CONFIG_PAX_RANDUSTACK)) && !defined(CONFIG_PAX_EI_PAX) && !defined(CONFIG_PAX_PT_PAX_FLAGS) ++#error "CONFIG_PAX_ASLR enabled, but neither CONFIG_PAX_EI_PAX nor CONFIG_PAX_PT_PAX_FLAGS are enabled." ++#endif ++#if defined(CONFIG_PAX_ASLR) && !defined(CONFIG_PAX_RANDKSTACK) && !defined(CONFIG_PAX_RANDUSTACK) && !defined(CONFIG_PAX_RANDMMAP) ++#error "CONFIG_PAX_ASLR enabled, but RANDKSTACK, RANDUSTACK, and RANDMMAP are disabled." ++#endif ++#if defined(CONFIG_PAX) && !defined(CONFIG_PAX_NOEXEC) && !defined(CONFIG_PAX_ASLR) ++#error "CONFIG_PAX enabled, but no PaX options are enabled." ++#endif ++ ++void gr_handle_brute_attach(struct task_struct *p); ++void gr_handle_brute_check(void); ++ ++char gr_roletype_to_char(void); ++ ++int gr_check_user_change(int real, int effective, int fs); ++int gr_check_group_change(int real, int effective, int fs); ++ ++void gr_del_task_from_ip_table(struct task_struct *p); ++ ++int gr_pid_is_chrooted(struct task_struct *p); ++int gr_handle_chroot_nice(void); ++int gr_handle_chroot_sysctl(const int op); ++int gr_handle_chroot_setpriority(struct task_struct *p, ++ const int niceval); ++int gr_chroot_fchdir(struct dentry *u_dentry, struct vfsmount *u_mnt); ++int gr_handle_chroot_chroot(const struct dentry *dentry, ++ const struct vfsmount *mnt); ++void gr_handle_chroot_caps(struct task_struct *task); ++void gr_handle_chroot_chdir(struct path *path); ++int gr_handle_chroot_chmod(const struct dentry *dentry, ++ const struct vfsmount *mnt, const int mode); ++int gr_handle_chroot_mknod(const struct dentry *dentry, ++ const struct vfsmount *mnt, const int mode); ++int gr_handle_chroot_mount(const struct dentry *dentry, ++ const struct vfsmount *mnt, ++ const char *dev_name); ++int gr_handle_chroot_pivot(void); ++int gr_handle_chroot_unix(const pid_t pid); ++ ++int gr_handle_rawio(const struct inode *inode); ++int gr_handle_nproc(void); ++ ++void gr_handle_ioperm(void); ++void gr_handle_iopl(void); ++ ++int gr_tpe_allow(const struct file *file); ++ ++int gr_random_pid(void); ++ ++void gr_log_forkfail(const int retval); ++void gr_log_timechange(void); ++void gr_log_signal(const int sig, const struct task_struct *t); ++void gr_log_chdir(const struct dentry *dentry, ++ const struct vfsmount *mnt); ++void gr_log_chroot_exec(const struct dentry *dentry, ++ const struct vfsmount *mnt); ++void gr_handle_exec_args(struct linux_binprm *bprm, char **argv); ++void gr_log_remount(const char *devname, const int retval); ++void gr_log_unmount(const char *devname, const int retval); ++void gr_log_mount(const char *from, const char *to, const int retval); ++void gr_log_msgget(const int ret, const int msgflg); ++void gr_log_msgrm(const uid_t uid, const uid_t cuid); ++void gr_log_semget(const int err, const int semflg); ++void gr_log_semrm(const uid_t uid, const uid_t cuid); ++void gr_log_shmget(const int err, const int shmflg, const size_t size); ++void gr_log_shmrm(const uid_t uid, const uid_t cuid); ++void gr_log_textrel(struct vm_area_struct *vma); ++ ++int gr_handle_follow_link(const struct inode *parent, ++ const struct inode *inode, ++ const struct dentry *dentry, ++ const struct vfsmount *mnt); ++int gr_handle_fifo(const struct dentry *dentry, ++ const struct vfsmount *mnt, ++ const struct dentry *dir, const int flag, ++ const int acc_mode); ++int gr_handle_hardlink(const struct dentry *dentry, ++ const struct vfsmount *mnt, ++ struct inode *inode, ++ const int mode, const char *to); ++ ++int gr_task_is_capable(struct task_struct *task, const int cap); ++int gr_is_capable_nolog(const int cap); ++void gr_learn_resource(const struct task_struct *task, const int limit, ++ const unsigned long wanted, const int gt); ++void gr_copy_label(struct task_struct *tsk); ++void gr_handle_crash(struct task_struct *task, const int sig); ++int gr_handle_signal(const struct task_struct *p, const int sig); ++int gr_check_crash_uid(const uid_t uid); ++int gr_check_protected_task(const struct task_struct *task); ++int gr_acl_handle_mmap(const struct file *file, ++ const unsigned long prot); ++int gr_acl_handle_mprotect(const struct file *file, ++ const unsigned long prot); ++int gr_check_hidden_task(const struct task_struct *tsk); ++__u32 gr_acl_handle_truncate(const struct dentry *dentry, ++ const struct vfsmount *mnt); ++__u32 gr_acl_handle_utime(const struct dentry *dentry, ++ const struct vfsmount *mnt); ++__u32 gr_acl_handle_access(const struct dentry *dentry, ++ const struct vfsmount *mnt, const int fmode); ++__u32 gr_acl_handle_fchmod(const struct dentry *dentry, ++ const struct vfsmount *mnt, mode_t mode); ++__u32 gr_acl_handle_chmod(const struct dentry *dentry, ++ const struct vfsmount *mnt, mode_t mode); ++__u32 gr_acl_handle_chown(const struct dentry *dentry, ++ const struct vfsmount *mnt); ++int gr_handle_ptrace(struct task_struct *task, const long request); ++int gr_handle_proc_ptrace(struct task_struct *task); ++__u32 gr_acl_handle_execve(const struct dentry *dentry, ++ const struct vfsmount *mnt); ++int gr_check_crash_exec(const struct file *filp); ++int gr_acl_is_enabled(void); ++void gr_set_kernel_label(struct task_struct *task); ++void gr_set_role_label(struct task_struct *task, const uid_t uid, ++ const gid_t gid); ++int gr_set_proc_label(const struct dentry *dentry, ++ const struct vfsmount *mnt); ++__u32 gr_acl_handle_hidden_file(const struct dentry *dentry, ++ const struct vfsmount *mnt); ++__u32 gr_acl_handle_open(const struct dentry *dentry, ++ const struct vfsmount *mnt, const int fmode); ++__u32 gr_acl_handle_creat(const struct dentry *dentry, ++ const struct dentry *p_dentry, ++ const struct vfsmount *p_mnt, const int fmode, ++ const int imode); ++void gr_handle_create(const struct dentry *dentry, ++ const struct vfsmount *mnt); ++__u32 gr_acl_handle_mknod(const struct dentry *new_dentry, ++ const struct dentry *parent_dentry, ++ const struct vfsmount *parent_mnt, ++ const int mode); ++__u32 gr_acl_handle_mkdir(const struct dentry *new_dentry, ++ const struct dentry *parent_dentry, ++ const struct vfsmount *parent_mnt); ++__u32 gr_acl_handle_rmdir(const struct dentry *dentry, ++ const struct vfsmount *mnt); ++void gr_handle_delete(const ino_t ino, const dev_t dev); ++__u32 gr_acl_handle_unlink(const struct dentry *dentry, ++ const struct vfsmount *mnt); ++__u32 gr_acl_handle_symlink(const struct dentry *new_dentry, ++ const struct dentry *parent_dentry, ++ const struct vfsmount *parent_mnt, ++ const char *from); ++__u32 gr_acl_handle_link(const struct dentry *new_dentry, ++ const struct dentry *parent_dentry, ++ const struct vfsmount *parent_mnt, ++ const struct dentry *old_dentry, ++ const struct vfsmount *old_mnt, const char *to); ++int gr_acl_handle_rename(struct dentry *new_dentry, ++ struct dentry *parent_dentry, ++ const struct vfsmount *parent_mnt, ++ struct dentry *old_dentry, ++ struct inode *old_parent_inode, ++ struct vfsmount *old_mnt, const char *newname); ++void gr_handle_rename(struct inode *old_dir, struct inode *new_dir, ++ struct dentry *old_dentry, ++ struct dentry *new_dentry, ++ struct vfsmount *mnt, const __u8 replace); ++__u32 gr_check_link(const struct dentry *new_dentry, ++ const struct dentry *parent_dentry, ++ const struct vfsmount *parent_mnt, ++ const struct dentry *old_dentry, ++ const struct vfsmount *old_mnt); ++int gr_acl_handle_filldir(const struct file *file, const char *name, ++ const unsigned int namelen, const ino_t ino); ++ ++__u32 gr_acl_handle_unix(const struct dentry *dentry, ++ const struct vfsmount *mnt); ++void gr_acl_handle_exit(void); ++void gr_acl_handle_psacct(struct task_struct *task, const long code); ++int gr_acl_handle_procpidmem(const struct task_struct *task); ++ ++#ifdef CONFIG_GRKERNSEC ++void gr_handle_mem_write(void); ++void gr_handle_kmem_write(void); ++void gr_handle_open_port(void); ++int gr_handle_mem_mmap(const unsigned long offset, ++ struct vm_area_struct *vma); ++ ++extern int grsec_enable_dmesg; ++extern int grsec_enable_randsrc; ++extern int grsec_enable_shm; ++#endif ++ ++#endif +diff -urNp a/include/linux/highmem.h b/include/linux/highmem.h +--- a/include/linux/highmem.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/include/linux/highmem.h 2009-05-24 18:10:25.278960379 -0700 +@@ -124,6 +124,18 @@ static inline void clear_highpage(struct + kunmap_atomic(kaddr, KM_USER0); + } + ++static inline void sanitize_highpage(struct page *page) ++{ ++ void *kaddr; ++ unsigned long flags; ++ ++ local_irq_save(flags); ++ kaddr = kmap_atomic(page, KM_CLEARPAGE); ++ clear_page(kaddr); ++ kunmap_atomic(kaddr, KM_CLEARPAGE); ++ local_irq_restore(flags); ++} ++ + static inline void zero_user_segments(struct page *page, + unsigned start1, unsigned end1, + unsigned start2, unsigned end2) +diff -urNp a/include/linux/jbd.h b/include/linux/jbd.h +--- a/include/linux/jbd.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/include/linux/jbd.h 2009-05-24 18:10:25.279959389 -0700 +@@ -66,7 +66,7 @@ extern u8 journal_enable_debug; + } \ + } while (0) + #else +-#define jbd_debug(f, a...) /**/ ++#define jbd_debug(f, a...) do {} while (0) + #endif + + static inline void *jbd_alloc(size_t size, gfp_t flags) +diff -urNp a/include/linux/jbd2.h b/include/linux/jbd2.h +--- a/include/linux/jbd2.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/include/linux/jbd2.h 2009-05-24 18:10:25.279959389 -0700 +@@ -66,7 +66,7 @@ extern u8 jbd2_journal_enable_debug; + } \ + } while (0) + #else +-#define jbd_debug(f, a...) /**/ ++#define jbd_debug(f, a...) do {} while (0) + #endif + + static inline void *jbd2_alloc(size_t size, gfp_t flags) +diff -urNp a/include/linux/kvm_host.h b/include/linux/kvm_host.h +--- a/include/linux/kvm_host.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/include/linux/kvm_host.h 2009-05-24 18:10:25.280960635 -0700 +@@ -150,7 +150,7 @@ void kvm_vcpu_uninit(struct kvm_vcpu *vc + void vcpu_load(struct kvm_vcpu *vcpu); + void vcpu_put(struct kvm_vcpu *vcpu); + +-int kvm_init(void *opaque, unsigned int vcpu_size, ++int kvm_init(const void *opaque, unsigned int vcpu_size, + struct module *module); + void kvm_exit(void); + +@@ -258,7 +258,7 @@ int kvm_arch_vcpu_ioctl_debug_guest(stru + struct kvm_debug_guest *dbg); + int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run); + +-int kvm_arch_init(void *opaque); ++int kvm_arch_init(const void *opaque); + void kvm_arch_exit(void); + + int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu); +diff -urNp a/include/linux/libata.h b/include/linux/libata.h +--- a/include/linux/libata.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/include/linux/libata.h 2009-05-24 18:10:25.281959506 -0700 +@@ -64,11 +64,11 @@ + #ifdef ATA_VERBOSE_DEBUG + #define VPRINTK(fmt, args...) printk(KERN_ERR "%s: " fmt, __func__, ## args) + #else +-#define VPRINTK(fmt, args...) ++#define VPRINTK(fmt, args...) do {} while (0) + #endif /* ATA_VERBOSE_DEBUG */ + #else +-#define DPRINTK(fmt, args...) +-#define VPRINTK(fmt, args...) ++#define DPRINTK(fmt, args...) do {} while (0) ++#define VPRINTK(fmt, args...) do {} while (0) + #endif /* ATA_DEBUG */ + + #define BPRINTK(fmt, args...) if (ap->flags & ATA_FLAG_DEBUGMSG) printk(KERN_ERR "%s: " fmt, __func__, ## args) +diff -urNp a/include/linux/mm.h b/include/linux/mm.h +--- a/include/linux/mm.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/include/linux/mm.h 2009-05-24 18:10:25.282959495 -0700 +@@ -39,6 +39,7 @@ extern unsigned long mmap_min_addr; + #include <asm/page.h> + #include <asm/pgtable.h> + #include <asm/processor.h> ++#include <asm/mman.h> + + #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n)) + +@@ -115,6 +116,10 @@ extern unsigned int kobjsize(const void + #define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */ + #define VM_SAO 0x20000000 /* Strong Access Ordering (powerpc) */ + ++#ifdef CONFIG_PAX_PAGEEXEC ++#define VM_PAGEEXEC 0x40000000 /* vma->vm_page_prot needs special handling */ ++#endif ++ + #ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ + #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS + #endif +@@ -873,6 +878,8 @@ struct shrinker { + extern void register_shrinker(struct shrinker *); + extern void unregister_shrinker(struct shrinker *); + ++pgprot_t vm_get_page_prot(unsigned long vm_flags); ++ + int vma_wants_writenotify(struct vm_area_struct *vma); + + extern pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl); +@@ -1141,6 +1148,7 @@ out: + } + + extern int do_munmap(struct mm_struct *, unsigned long, size_t); ++extern int __do_munmap(struct mm_struct *, unsigned long, size_t); + + extern unsigned long do_brk(unsigned long, unsigned long); + +@@ -1193,6 +1201,10 @@ extern struct vm_area_struct * find_vma( + extern struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned long addr, + struct vm_area_struct **pprev); + ++extern struct vm_area_struct *pax_find_mirror_vma(struct vm_area_struct *vma); ++extern void pax_mirror_vma(struct vm_area_struct *vma_m, struct vm_area_struct *vma); ++extern void pax_mirror_file_pte(struct vm_area_struct *vma, unsigned long address, struct page *page_m, spinlock_t *ptl); ++ + /* Look up the first VMA which intersects the interval start_addr..end_addr-1, + NULL if none. Assume start_addr < end_addr. */ + static inline struct vm_area_struct * find_vma_intersection(struct mm_struct * mm, unsigned long start_addr, unsigned long end_addr) +@@ -1209,7 +1221,6 @@ static inline unsigned long vma_pages(st + return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + } + +-pgprot_t vm_get_page_prot(unsigned long vm_flags); + struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr); + int remap_pfn_range(struct vm_area_struct *, unsigned long addr, + unsigned long pfn, unsigned long size, pgprot_t); +@@ -1298,5 +1309,11 @@ int vmemmap_populate_basepages(struct pa + int vmemmap_populate(struct page *start_page, unsigned long pages, int node); + void vmemmap_populate_print_last(void); + ++#ifdef CONFIG_ARCH_TRACK_EXEC_LIMIT ++extern void track_exec_limit(struct mm_struct *mm, unsigned long start, unsigned long end, unsigned long prot); ++#else ++static inline void track_exec_limit(struct mm_struct *mm, unsigned long start, unsigned long end, unsigned long prot) {} ++#endif ++ + #endif /* __KERNEL__ */ + #endif /* _LINUX_MM_H */ +diff -urNp a/include/linux/mm_types.h b/include/linux/mm_types.h +--- a/include/linux/mm_types.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/include/linux/mm_types.h 2009-05-24 18:10:25.283959553 -0700 +@@ -157,6 +157,8 @@ struct vm_area_struct { + #ifdef CONFIG_NUMA + struct mempolicy *vm_policy; /* NUMA policy for the VMA */ + #endif ++ ++ struct vm_area_struct *vm_mirror;/* PaX: mirror vma or NULL */ + }; + + struct core_thread { +@@ -256,6 +258,24 @@ struct mm_struct { + #ifdef CONFIG_MMU_NOTIFIER + struct mmu_notifier_mm *mmu_notifier_mm; + #endif ++ ++#if defined(CONFIG_PAX_EI_PAX) || defined(CONFIG_PAX_PT_PAX_FLAGS) || defined(CONFIG_PAX_NOEXEC) || defined(CONFIG_PAX_ASLR) ++ unsigned long pax_flags; ++#endif ++ ++#ifdef CONFIG_PAX_DLRESOLVE ++ unsigned long call_dl_resolve; ++#endif ++ ++#if defined(CONFIG_PPC32) && defined(CONFIG_PAX_EMUSIGRT) ++ unsigned long call_syscall; ++#endif ++ ++#ifdef CONFIG_PAX_ASLR ++ unsigned long delta_mmap; /* randomized offset */ ++ unsigned long delta_stack; /* randomized offset */ ++#endif ++ + }; + + #endif /* _LINUX_MM_TYPES_H */ +diff -urNp a/include/linux/module.h b/include/linux/module.h +--- a/include/linux/module.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/include/linux/module.h 2009-05-24 18:10:25.283959553 -0700 +@@ -283,16 +283,16 @@ struct module + int (*init)(void); + + /* If this is non-NULL, vfree after init() returns */ +- void *module_init; ++ void *module_init_rx, *module_init_rw; + + /* Here is the actual code + data, vfree'd on unload. */ +- void *module_core; ++ void *module_core_rx, *module_core_rw; + + /* Here are the sizes of the init and core sections */ +- unsigned int init_size, core_size; ++ unsigned int init_size_rw, core_size_rw; + + /* The size of the executable code in each section. */ +- unsigned int init_text_size, core_text_size; ++ unsigned int init_size_rx, core_size_rx; + + /* The handle returned from unwind_add_table. */ + void *unwind_info; +diff -urNp a/include/linux/moduleloader.h b/include/linux/moduleloader.h +--- a/include/linux/moduleloader.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/include/linux/moduleloader.h 2009-05-24 18:10:25.284960449 -0700 +@@ -17,9 +17,21 @@ int module_frob_arch_sections(Elf_Ehdr * + sections. Returns NULL on failure. */ + void *module_alloc(unsigned long size); + ++#ifdef CONFIG_PAX_KERNEXEC ++void *module_alloc_exec(unsigned long size); ++#else ++#define module_alloc_exec(x) module_alloc(x) ++#endif ++ + /* Free memory returned from module_alloc. */ + void module_free(struct module *mod, void *module_region); + ++#ifdef CONFIG_PAX_KERNEXEC ++void module_free_exec(struct module *mod, void *module_region); ++#else ++#define module_free_exec(x, y) module_free(x, y) ++#endif ++ + /* Apply the given relocation to the (simplified) ELF. Return -error + or 0. */ + int apply_relocate(Elf_Shdr *sechdrs, +diff -urNp a/include/linux/namei.h b/include/linux/namei.h +--- a/include/linux/namei.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/include/linux/namei.h 2009-05-24 18:10:25.285959390 -0700 +@@ -21,7 +21,7 @@ struct nameidata { + unsigned int flags; + int last_type; + unsigned depth; +- char *saved_names[MAX_NESTED_LINKS + 1]; ++ const char *saved_names[MAX_NESTED_LINKS + 1]; + + /* Intent data */ + union { +@@ -84,12 +84,12 @@ extern int follow_up(struct vfsmount **, + extern struct dentry *lock_rename(struct dentry *, struct dentry *); + extern void unlock_rename(struct dentry *, struct dentry *); + +-static inline void nd_set_link(struct nameidata *nd, char *path) ++static inline void nd_set_link(struct nameidata *nd, const char *path) + { + nd->saved_names[nd->depth] = path; + } + +-static inline char *nd_get_link(struct nameidata *nd) ++static inline const char *nd_get_link(struct nameidata *nd) + { + return nd->saved_names[nd->depth]; + } +diff -urNp a/include/linux/nodemask.h b/include/linux/nodemask.h +--- a/include/linux/nodemask.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/include/linux/nodemask.h 2009-05-24 18:10:25.285959390 -0700 +@@ -442,11 +442,11 @@ static inline int num_node_state(enum no + + #define any_online_node(mask) \ + ({ \ +- int node; \ +- for_each_node_mask(node, (mask)) \ +- if (node_online(node)) \ ++ int __node; \ ++ for_each_node_mask(__node, (mask)) \ ++ if (node_online(__node)) \ + break; \ +- node; \ ++ __node; \ + }) + + #define num_online_nodes() num_node_state(N_ONLINE) +diff -urNp a/include/linux/percpu.h b/include/linux/percpu.h +--- a/include/linux/percpu.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/include/linux/percpu.h 2009-05-24 18:10:25.286960496 -0700 +@@ -50,7 +50,7 @@ + #endif + + #define PERCPU_ENOUGH_ROOM \ +- (__per_cpu_end - __per_cpu_start + PERCPU_MODULE_RESERVE) ++ ((unsigned long)(__per_cpu_end - __per_cpu_start + PERCPU_MODULE_RESERVE)) + #endif /* PERCPU_ENOUGH_ROOM */ + + /* +diff -urNp a/include/linux/poison.h b/include/linux/poison.h +--- a/include/linux/poison.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/include/linux/poison.h 2009-05-24 18:10:25.287960414 -0700 +@@ -7,8 +7,8 @@ + * under normal circumstances, used to verify that nobody uses + * non-initialized list entries. + */ +-#define LIST_POISON1 ((void *) 0x00100100) +-#define LIST_POISON2 ((void *) 0x00200200) ++#define LIST_POISON1 ((void *) 0xFF1001FFFF1001FFULL) ++#define LIST_POISON2 ((void *) 0xFF2002FFFF2002FFULL) + + /********** include/linux/timer.h **********/ + /* +diff -urNp a/include/linux/proc_fs.h b/include/linux/proc_fs.h +--- a/include/linux/proc_fs.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/include/linux/proc_fs.h 2009-05-24 18:10:25.287960414 -0700 +@@ -174,6 +174,19 @@ static inline struct proc_dir_entry *pro + return proc_create_data(name, mode, parent, proc_fops, NULL); + } + ++static inline struct proc_dir_entry *proc_create_grsec(const char *name, mode_t mode, ++ struct proc_dir_entry *parent, const struct file_operations *proc_fops) ++{ ++#ifdef CONFIG_GRKERNSEC_PROC_USER ++ return proc_create_data(name, S_IRUSR, parent, proc_fops, NULL); ++#elif defined(CONFIG_GRKERNSEC_PROC_USERGROUP) ++ return proc_create_data(name, S_IRUSR | S_IRGRP, parent, proc_fops, NULL); ++#else ++ return proc_create_data(name, mode, parent, proc_fops, NULL); ++#endif ++} ++ ++ + static inline struct proc_dir_entry *create_proc_read_entry(const char *name, + mode_t mode, struct proc_dir_entry *base, + read_proc_t *read_proc, void * data) +diff -urNp a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h +--- a/include/linux/raid/md_k.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/include/linux/raid/md_k.h 2009-05-24 18:10:25.288959495 -0700 +@@ -286,7 +286,13 @@ static inline void rdev_dec_pending(mdk_ + + static inline void md_sync_acct(struct block_device *bdev, unsigned long nr_sectors) + { ++ ++#ifdef CONFIG_PAX_REFCOUNT ++ atomic_add_unchecked(nr_sectors, &bdev->bd_contains->bd_disk->sync_io); ++#else + atomic_add(nr_sectors, &bdev->bd_contains->bd_disk->sync_io); ++#endif ++ + } + + struct mdk_personality +diff -urNp a/include/linux/random.h b/include/linux/random.h +--- a/include/linux/random.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/include/linux/random.h 2009-05-24 18:10:25.288959495 -0700 +@@ -72,6 +72,11 @@ unsigned long randomize_range(unsigned l + u32 random32(void); + void srandom32(u32 seed); + ++static inline unsigned long pax_get_random_long(void) ++{ ++ return random32() + (sizeof(long) > 4 ? (unsigned long)random32() << 32 : 0); ++} ++ + #endif /* __KERNEL___ */ + + #endif /* _LINUX_RANDOM_H */ +diff -urNp a/include/linux/sched.h b/include/linux/sched.h +--- a/include/linux/sched.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/include/linux/sched.h 2009-05-24 18:10:25.289959553 -0700 +@@ -96,6 +96,7 @@ struct exec_domain; + struct futex_pi_state; + struct robust_list_head; + struct bio; ++struct linux_binprm; + + /* + * List of flags we want to share for kernel threads, +@@ -589,6 +590,15 @@ struct signal_struct { + unsigned audit_tty; + struct tty_audit_buf *tty_audit_buf; + #endif ++ ++#ifdef CONFIG_GRKERNSEC ++ u32 curr_ip; ++ u32 gr_saddr; ++ u32 gr_daddr; ++ u16 gr_sport; ++ u16 gr_dport; ++ u8 used_accept:1; ++#endif + }; + + /* Context switch must be unlocked if interrupts are to be enabled */ +@@ -1074,7 +1084,7 @@ struct sched_rt_entity { + + struct task_struct { + volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ +- void *stack; ++ struct thread_info *stack; + atomic_t usage; + unsigned int flags; /* per process flags, defined below */ + unsigned int ptrace; +@@ -1139,10 +1149,9 @@ struct task_struct { + pid_t pid; + pid_t tgid; + +-#ifdef CONFIG_CC_STACKPROTECTOR + /* Canary value for the -fstack-protector gcc feature */ + unsigned long stack_canary; +-#endif ++ + /* + * pointers to (original) parent process, youngest child, younger sibling, + * older sibling, respectively. (p->father can be replaced with +@@ -1170,8 +1179,8 @@ struct task_struct { + struct list_head thread_group; + + struct completion *vfork_done; /* for vfork() */ +- int __user *set_child_tid; /* CLONE_CHILD_SETTID */ +- int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */ ++ pid_t __user *set_child_tid; /* CLONE_CHILD_SETTID */ ++ pid_t __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */ + + cputime_t utime, stime, utimescaled, stimescaled; + cputime_t gtime; +@@ -1356,8 +1365,64 @@ struct task_struct { + unsigned long default_timer_slack_ns; + + struct list_head *scm_work_list; ++ ++#ifdef CONFIG_GRKERNSEC ++ /* grsecurity */ ++ struct acl_subject_label *acl; ++ struct acl_role_label *role; ++ struct file *exec_file; ++ u16 acl_role_id; ++ u8 acl_sp_role; ++ u8 is_writable; ++ u8 brute; ++#endif ++ + }; + ++#define MF_PAX_PAGEEXEC 0x01000000 /* Paging based non-executable pages */ ++#define MF_PAX_EMUTRAMP 0x02000000 /* Emulate trampolines */ ++#define MF_PAX_MPROTECT 0x04000000 /* Restrict mprotect() */ ++#define MF_PAX_RANDMMAP 0x08000000 /* Randomize mmap() base */ ++/*#define MF_PAX_RANDEXEC 0x10000000*/ /* Randomize ET_EXEC base */ ++#define MF_PAX_SEGMEXEC 0x20000000 /* Segmentation based non-executable pages */ ++ ++#ifdef CONFIG_PAX_SOFTMODE ++extern unsigned int pax_softmode; ++#endif ++ ++extern int pax_check_flags(unsigned long *); ++ ++/* if tsk != current then task_lock must be held on it */ ++#if defined(CONFIG_PAX_NOEXEC) || defined(CONFIG_PAX_ASLR) ++static inline unsigned long pax_get_flags(struct task_struct *tsk) ++{ ++ if (likely(tsk->mm)) ++ return tsk->mm->pax_flags; ++ else ++ return 0UL; ++} ++ ++/* if tsk != current then task_lock must be held on it */ ++static inline long pax_set_flags(struct task_struct *tsk, unsigned long flags) ++{ ++ if (likely(tsk->mm)) { ++ tsk->mm->pax_flags = flags; ++ return 0; ++ } ++ return -EINVAL; ++} ++#endif ++ ++#ifdef CONFIG_PAX_HAVE_ACL_FLAGS ++extern void pax_set_initial_flags(struct linux_binprm *bprm); ++#elif defined(CONFIG_PAX_HOOK_ACL_FLAGS) ++extern void (*pax_set_initial_flags_func)(struct linux_binprm *bprm); ++#endif ++ ++void pax_report_fault(struct pt_regs *regs, void *pc, void *sp); ++void pax_report_insns(void *pc, void *sp); ++void pax_report_refcount_overflow(struct pt_regs *regs); ++ + /* + * Priority of a process goes from 0..MAX_PRIO-1, valid RT + * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH +@@ -1900,7 +1965,7 @@ extern void __cleanup_sighand(struct sig + extern void exit_itimers(struct signal_struct *); + extern void flush_itimer_signals(void); + +-extern NORET_TYPE void do_group_exit(int); ++extern NORET_TYPE void do_group_exit(int) ATTRIB_NORET; + + extern void daemonize(const char *, ...); + extern int allow_signal(int); +@@ -2003,8 +2068,8 @@ static inline void unlock_task_sighand(s + + #ifndef __HAVE_THREAD_FUNCTIONS + +-#define task_thread_info(task) ((struct thread_info *)(task)->stack) +-#define task_stack_page(task) ((task)->stack) ++#define task_thread_info(task) ((task)->stack) ++#define task_stack_page(task) ((void *)(task)->stack) + + static inline void setup_thread_stack(struct task_struct *p, struct task_struct *org) + { +diff -urNp a/include/linux/screen_info.h b/include/linux/screen_info.h +--- a/include/linux/screen_info.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/include/linux/screen_info.h 2009-05-24 18:10:25.290960729 -0700 +@@ -42,7 +42,8 @@ struct screen_info { + __u16 pages; /* 0x32 */ + __u16 vesa_attributes; /* 0x34 */ + __u32 capabilities; /* 0x36 */ +- __u8 _reserved[6]; /* 0x3a */ ++ __u16 vesapm_size; /* 0x3a */ ++ __u8 _reserved[4]; /* 0x3c */ + } __attribute__((packed)); + + #define VIDEO_TYPE_MDA 0x10 /* Monochrome Text Display */ +diff -urNp a/include/linux/security.h b/include/linux/security.h +--- a/include/linux/security.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/include/linux/security.h 2009-05-24 18:10:25.291959600 -0700 +@@ -32,6 +32,7 @@ + #include <linux/sched.h> + #include <linux/key.h> + #include <linux/xfrm.h> ++#include <linux/grsecurity.h> + #include <net/flow.h> + + /* Maximum number of letters for an LSM name string */ +diff -urNp a/include/linux/seq_file.h b/include/linux/seq_file.h +--- a/include/linux/seq_file.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/include/linux/seq_file.h 2009-05-24 18:10:25.292960706 -0700 +@@ -35,6 +35,7 @@ struct seq_operations { + + #define SEQ_SKIP 1 + ++char *mangle_path(char *s, char *p, char *esc); + int seq_open(struct file *, const struct seq_operations *); + ssize_t seq_read(struct file *, char __user *, size_t, loff_t *); + loff_t seq_lseek(struct file *, loff_t, int); +diff -urNp a/include/linux/shm.h b/include/linux/shm.h +--- a/include/linux/shm.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/include/linux/shm.h 2009-05-24 18:10:25.293960485 -0700 +@@ -95,6 +95,10 @@ struct shmid_kernel /* private to the ke + pid_t shm_cprid; + pid_t shm_lprid; + struct user_struct *mlock_user; ++#ifdef CONFIG_GRKERNSEC ++ time_t shm_createtime; ++ pid_t shm_lapid; ++#endif + }; + + /* shm_mode upper byte flags */ +diff -urNp a/include/linux/slab.h b/include/linux/slab.h +--- a/include/linux/slab.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/include/linux/slab.h 2009-05-24 18:10:25.294962638 -0700 +@@ -73,10 +73,9 @@ + * ZERO_SIZE_PTR can be passed to kfree though in the same way that NULL can. + * Both make kfree a no-op. + */ +-#define ZERO_SIZE_PTR ((void *)16) ++#define ZERO_SIZE_PTR ((void *)-1024L) + +-#define ZERO_OR_NULL_PTR(x) ((unsigned long)(x) <= \ +- (unsigned long)ZERO_SIZE_PTR) ++#define ZERO_OR_NULL_PTR(x) (!(x) || (x) == ZERO_SIZE_PTR) + + /* + * struct kmem_cache related prototypes +diff -urNp a/include/linux/slub_def.h b/include/linux/slub_def.h +--- a/include/linux/slub_def.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/include/linux/slub_def.h 2009-05-24 18:10:25.294962638 -0700 +@@ -85,7 +85,7 @@ struct kmem_cache { + struct kmem_cache_order_objects max; + struct kmem_cache_order_objects min; + gfp_t allocflags; /* gfp flags to use on each alloc */ +- int refcount; /* Refcount for slab cache destroy */ ++ atomic_t refcount; /* Refcount for slab cache destroy */ + void (*ctor)(void *); + int inuse; /* Offset to metadata */ + int align; /* Alignment */ +diff -urNp a/include/linux/sysctl.h b/include/linux/sysctl.h +--- a/include/linux/sysctl.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/include/linux/sysctl.h 2009-05-24 18:10:25.295962976 -0700 +@@ -165,7 +165,11 @@ enum + KERN_PANIC_ON_NMI=76, /* int: whether we will panic on an unrecovered */ + }; + +- ++#ifdef CONFIG_PAX_SOFTMODE ++enum { ++ PAX_SOFTMODE=1 /* PaX: disable/enable soft mode */ ++}; ++#endif + + /* CTL_VM names: */ + enum +diff -urNp a/include/linux/thread_info.h b/include/linux/thread_info.h +--- a/include/linux/thread_info.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/include/linux/thread_info.h 2009-05-24 18:10:25.295962976 -0700 +@@ -23,7 +23,7 @@ struct restart_block { + }; + /* For futex_wait */ + struct { +- u32 *uaddr; ++ u32 __user *uaddr; + u32 val; + u32 flags; + u32 bitset; +diff -urNp a/include/linux/tty_ldisc.h b/include/linux/tty_ldisc.h +--- a/include/linux/tty_ldisc.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/include/linux/tty_ldisc.h 2009-05-24 18:10:25.296961219 -0700 +@@ -139,12 +139,12 @@ struct tty_ldisc_ops { + + struct module *owner; + +- int refcount; ++ atomic_t refcount; + }; + + struct tty_ldisc { + struct tty_ldisc_ops *ops; +- int refcount; ++ atomic_t refcount; + }; + + #define TTY_LDISC_MAGIC 0x5403 +diff -urNp a/include/linux/uaccess.h b/include/linux/uaccess.h +--- a/include/linux/uaccess.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/include/linux/uaccess.h 2009-05-24 18:10:25.296961219 -0700 +@@ -76,11 +76,11 @@ static inline unsigned long __copy_from_ + long ret; \ + mm_segment_t old_fs = get_fs(); \ + \ +- set_fs(KERNEL_DS); \ + pagefault_disable(); \ ++ set_fs(KERNEL_DS); \ + ret = __get_user(retval, (__force typeof(retval) __user *)(addr)); \ +- pagefault_enable(); \ + set_fs(old_fs); \ ++ pagefault_enable(); \ + ret; \ + }) + +diff -urNp a/include/linux/vmalloc.h b/include/linux/vmalloc.h +--- a/include/linux/vmalloc.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/include/linux/vmalloc.h 2009-05-24 18:10:25.297960020 -0700 +@@ -13,6 +13,11 @@ struct vm_area_struct; /* vma defining + #define VM_MAP 0x00000004 /* vmap()ed pages */ + #define VM_USERMAP 0x00000008 /* suitable for remap_vmalloc_range */ + #define VM_VPAGES 0x00000010 /* buffer for pages was vmalloc'ed */ ++ ++#if defined(CONFIG_MODULES) && defined(CONFIG_X86_32) && defined(CONFIG_PAX_KERNEXEC) ++#define VM_KERNEXEC 0x00000020 /* allocate from executable kernel memory range */ ++#endif ++ + /* bits [20..32] reserved for arch specific ioremap internals */ + + /* +diff -urNp a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h +--- a/include/net/sctp/sctp.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/include/net/sctp/sctp.h 2009-05-24 18:10:25.297960020 -0700 +@@ -309,8 +309,8 @@ extern int sctp_debug_flag; + + #else /* SCTP_DEBUG */ + +-#define SCTP_DEBUG_PRINTK(whatever...) +-#define SCTP_DEBUG_PRINTK_IPADDR(whatever...) ++#define SCTP_DEBUG_PRINTK(whatever...) do {} while (0) ++#define SCTP_DEBUG_PRINTK_IPADDR(whatever...) do {} while (0) + #define SCTP_ENABLE_DEBUG + #define SCTP_DISABLE_DEBUG + #define SCTP_ASSERT(expr, str, func) +diff -urNp a/include/sound/core.h b/include/sound/core.h +--- a/include/sound/core.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/include/sound/core.h 2009-05-24 18:10:25.298959729 -0700 +@@ -405,7 +405,7 @@ static inline int __snd_bug_on(void) + */ + #define snd_printdd(format, args...) snd_printk(format, ##args) + #else +-#define snd_printdd(format, args...) /* nothing */ ++#define snd_printdd(format, args...) do {} while (0) + #endif + + +diff -urNp a/include/video/uvesafb.h b/include/video/uvesafb.h +--- a/include/video/uvesafb.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/include/video/uvesafb.h 2009-05-24 18:10:25.298959729 -0700 +@@ -175,6 +175,7 @@ struct uvesafb_par { + u8 ypan; /* 0 - nothing, 1 - ypan, 2 - ywrap */ + u8 pmi_setpal; /* PMI for palette changes */ + u16 *pmi_base; /* protected mode interface location */ ++ u8 *pmi_code; /* protected mode code location */ + void *pmi_start; + void *pmi_pal; + u8 *vbe_state_orig; /* +diff -urNp a/init/Kconfig b/init/Kconfig +--- a/init/Kconfig 2009-05-02 11:54:43.000000000 -0700 ++++ b/init/Kconfig 2009-05-24 18:10:25.299961324 -0700 +@@ -575,6 +575,7 @@ config SYSCTL_SYSCALL + config KALLSYMS + bool "Load all symbols for debugging/ksymoops" if EMBEDDED + default y ++ depends on !GRKERNSEC_HIDESYM + help + Say Y here to let the kernel print out symbolic crash information and + symbolic stack backtraces. This increases the size of the kernel +@@ -822,9 +823,9 @@ config HAVE_GENERIC_DMA_COHERENT + + config SLABINFO + bool +- depends on PROC_FS ++ depends on PROC_FS && !GRKERNSEC_PROC_ADD + depends on SLAB || SLUB_DEBUG +- default y ++ default n + + config RT_MUTEXES + boolean +diff -urNp a/init/do_mounts.c b/init/do_mounts.c +--- a/init/do_mounts.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/init/do_mounts.c 2009-05-24 18:10:25.301960253 -0700 +@@ -214,11 +214,11 @@ static void __init get_fs_names(char *pa + + static int __init do_mount_root(char *name, char *fs, int flags, void *data) + { +- int err = sys_mount(name, "/root", fs, flags, data); ++ int err = sys_mount((char __user *)name, (char __user *)"/root", (char __user *)fs, flags, (void __user *)data); + if (err) + return err; + +- sys_chdir("/root"); ++ sys_chdir((char __user *)"/root"); + ROOT_DEV = current->fs->pwd.mnt->mnt_sb->s_dev; + printk("VFS: Mounted root (%s filesystem)%s.\n", + current->fs->pwd.mnt->mnt_sb->s_type->name, +@@ -308,18 +308,18 @@ void __init change_floppy(char *fmt, ... + va_start(args, fmt); + vsprintf(buf, fmt, args); + va_end(args); +- fd = sys_open("/dev/root", O_RDWR | O_NDELAY, 0); ++ fd = sys_open((char __user *)"/dev/root", O_RDWR | O_NDELAY, 0); + if (fd >= 0) { + sys_ioctl(fd, FDEJECT, 0); + sys_close(fd); + } + printk(KERN_NOTICE "VFS: Insert %s and press ENTER\n", buf); +- fd = sys_open("/dev/console", O_RDWR, 0); ++ fd = sys_open((char __user *)"/dev/console", O_RDWR, 0); + if (fd >= 0) { + sys_ioctl(fd, TCGETS, (long)&termios); + termios.c_lflag &= ~ICANON; + sys_ioctl(fd, TCSETSF, (long)&termios); +- sys_read(fd, &c, 1); ++ sys_read(fd, (char __user *)&c, 1); + termios.c_lflag |= ICANON; + sys_ioctl(fd, TCSETSF, (long)&termios); + sys_close(fd); +@@ -406,7 +406,7 @@ void __init prepare_namespace(void) + + mount_root(); + out: +- sys_mount(".", "/", NULL, MS_MOVE, NULL); +- sys_chroot("."); ++ sys_mount((char __user *)".", (char __user *)"/", NULL, MS_MOVE, NULL); ++ sys_chroot((char __user *)"."); + } + +diff -urNp a/init/do_mounts.h b/init/do_mounts.h +--- a/init/do_mounts.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/init/do_mounts.h 2009-05-24 18:10:25.301960253 -0700 +@@ -14,15 +14,15 @@ extern int root_mountflags; + + static inline int create_dev(char *name, dev_t dev) + { +- sys_unlink(name); +- return sys_mknod(name, S_IFBLK|0600, new_encode_dev(dev)); ++ sys_unlink((char __user *)name); ++ return sys_mknod((char __user *)name, S_IFBLK|0600, new_encode_dev(dev)); + } + + #if BITS_PER_LONG == 32 + static inline u32 bstat(char *name) + { + struct stat64 stat; +- if (sys_stat64(name, &stat) != 0) ++ if (sys_stat64((char __user *)name, (struct stat64 __user *)&stat) != 0) + return 0; + if (!S_ISBLK(stat.st_mode)) + return 0; +diff -urNp a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c +--- a/init/do_mounts_initrd.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/init/do_mounts_initrd.c 2009-05-24 18:10:25.301960253 -0700 +@@ -32,7 +32,7 @@ static int __init do_linuxrc(void * shel + sys_close(old_fd);sys_close(root_fd); + sys_close(0);sys_close(1);sys_close(2); + sys_setsid(); +- (void) sys_open("/dev/console",O_RDWR,0); ++ (void) sys_open((const char __user *)"/dev/console",O_RDWR,0); + (void) sys_dup(0); + (void) sys_dup(0); + return kernel_execve(shell, argv, envp_init); +@@ -47,13 +47,13 @@ static void __init handle_initrd(void) + create_dev("/dev/root.old", Root_RAM0); + /* mount initrd on rootfs' /root */ + mount_block_root("/dev/root.old", root_mountflags & ~MS_RDONLY); +- sys_mkdir("/old", 0700); +- root_fd = sys_open("/", 0, 0); +- old_fd = sys_open("/old", 0, 0); ++ sys_mkdir((const char __user *)"/old", 0700); ++ root_fd = sys_open((const char __user *)"/", 0, 0); ++ old_fd = sys_open((const char __user *)"/old", 0, 0); + /* move initrd over / and chdir/chroot in initrd root */ +- sys_chdir("/root"); +- sys_mount(".", "/", NULL, MS_MOVE, NULL); +- sys_chroot("."); ++ sys_chdir((const char __user *)"/root"); ++ sys_mount((char __user *)".", (char __user *)"/", NULL, MS_MOVE, NULL); ++ sys_chroot((const char __user *)"."); + + /* + * In case that a resume from disk is carried out by linuxrc or one of +@@ -70,15 +70,15 @@ static void __init handle_initrd(void) + + /* move initrd to rootfs' /old */ + sys_fchdir(old_fd); +- sys_mount("/", ".", NULL, MS_MOVE, NULL); ++ sys_mount((char __user *)"/", (char __user *)".", NULL, MS_MOVE, NULL); + /* switch root and cwd back to / of rootfs */ + sys_fchdir(root_fd); +- sys_chroot("."); ++ sys_chroot((const char __user *)"."); + sys_close(old_fd); + sys_close(root_fd); + + if (new_decode_dev(real_root_dev) == Root_RAM0) { +- sys_chdir("/old"); ++ sys_chdir((const char __user *)"/old"); + return; + } + +@@ -86,17 +86,17 @@ static void __init handle_initrd(void) + mount_root(); + + printk(KERN_NOTICE "Trying to move old root to /initrd ... "); +- error = sys_mount("/old", "/root/initrd", NULL, MS_MOVE, NULL); ++ error = sys_mount((char __user *)"/old", (char __user *)"/root/initrd", NULL, MS_MOVE, NULL); + if (!error) + printk("okay\n"); + else { +- int fd = sys_open("/dev/root.old", O_RDWR, 0); ++ int fd = sys_open((const char __user *)"/dev/root.old", O_RDWR, 0); + if (error == -ENOENT) + printk("/initrd does not exist. Ignored.\n"); + else + printk("failed\n"); + printk(KERN_NOTICE "Unmounting old root\n"); +- sys_umount("/old", MNT_DETACH); ++ sys_umount((char __user *)"/old", MNT_DETACH); + printk(KERN_NOTICE "Trying to free ramdisk memory ... "); + if (fd < 0) { + error = fd; +@@ -119,11 +119,11 @@ int __init initrd_load(void) + * mounted in the normal path. + */ + if (rd_load_image("/initrd.image") && ROOT_DEV != Root_RAM0) { +- sys_unlink("/initrd.image"); ++ sys_unlink((const char __user *)"/initrd.image"); + handle_initrd(); + return 1; + } + } +- sys_unlink("/initrd.image"); ++ sys_unlink((const char __user *)"/initrd.image"); + return 0; + } +diff -urNp a/init/do_mounts_md.c b/init/do_mounts_md.c +--- a/init/do_mounts_md.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/init/do_mounts_md.c 2009-05-24 18:10:25.302961709 -0700 +@@ -171,7 +171,7 @@ static void __init md_setup_drive(void) + partitioned ? "_d" : "", minor, + md_setup_args[ent].device_names); + +- fd = sys_open(name, 0, 0); ++ fd = sys_open((char __user *)name, 0, 0); + if (fd < 0) { + printk(KERN_ERR "md: open failed - cannot start " + "array %s\n", name); +@@ -234,7 +234,7 @@ static void __init md_setup_drive(void) + * array without it + */ + sys_close(fd); +- fd = sys_open(name, 0, 0); ++ fd = sys_open((char __user *)name, 0, 0); + sys_ioctl(fd, BLKRRPART, 0); + } + sys_close(fd); +@@ -283,7 +283,7 @@ static void autodetect_raid(void) + printk(KERN_INFO "md: If you don't use raid, use raid=noautodetect\n"); + while (driver_probe_done() < 0) + msleep(100); +- fd = sys_open("/dev/md0", 0, 0); ++ fd = sys_open((char __user *)"/dev/md0", 0, 0); + if (fd >= 0) { + sys_ioctl(fd, RAID_AUTORUN, raid_autopart); + sys_close(fd); +diff -urNp a/init/initramfs.c b/init/initramfs.c +--- a/init/initramfs.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/init/initramfs.c 2009-05-24 18:10:25.302961709 -0700 +@@ -276,7 +276,7 @@ static int __init maybe_link(void) + if (nlink >= 2) { + char *old = find_link(major, minor, ino, mode, collected); + if (old) +- return (sys_link(old, collected) < 0) ? -1 : 1; ++ return (sys_link((char __user *)old, (char __user *)collected) < 0) ? -1 : 1; + } + return 0; + } +@@ -285,11 +285,11 @@ static void __init clean_path(char *path + { + struct stat st; + +- if (!sys_newlstat(path, &st) && (st.st_mode^mode) & S_IFMT) { ++ if (!sys_newlstat((char __user *)path, (struct stat __user *)&st) && (st.st_mode^mode) & S_IFMT) { + if (S_ISDIR(st.st_mode)) +- sys_rmdir(path); ++ sys_rmdir((char __user *)path); + else +- sys_unlink(path); ++ sys_unlink((char __user *)path); + } + } + +@@ -312,7 +312,7 @@ static int __init do_name(void) + int openflags = O_WRONLY|O_CREAT; + if (ml != 1) + openflags |= O_TRUNC; +- wfd = sys_open(collected, openflags, mode); ++ wfd = sys_open((char __user *)collected, openflags, mode); + + if (wfd >= 0) { + sys_fchown(wfd, uid, gid); +@@ -322,16 +322,16 @@ static int __init do_name(void) + } + } + } else if (S_ISDIR(mode)) { +- sys_mkdir(collected, mode); +- sys_chown(collected, uid, gid); +- sys_chmod(collected, mode); ++ sys_mkdir((char __user *)collected, mode); ++ sys_chown((char __user *)collected, uid, gid); ++ sys_chmod((char __user *)collected, mode); + dir_add(collected, mtime); + } else if (S_ISBLK(mode) || S_ISCHR(mode) || + S_ISFIFO(mode) || S_ISSOCK(mode)) { + if (maybe_link() == 0) { +- sys_mknod(collected, mode, rdev); +- sys_chown(collected, uid, gid); +- sys_chmod(collected, mode); ++ sys_mknod((char __user *)collected, mode, rdev); ++ sys_chown((char __user *)collected, uid, gid); ++ sys_chmod((char __user *)collected, mode); + do_utime(collected, mtime); + } + } +@@ -341,7 +341,7 @@ static int __init do_name(void) + static int __init do_copy(void) + { + if (count >= body_len) { +- sys_write(wfd, victim, body_len); ++ sys_write(wfd, (char __user *)victim, body_len); + sys_close(wfd); + do_utime(vcollected, mtime); + kfree(vcollected); +@@ -349,7 +349,7 @@ static int __init do_copy(void) + state = SkipIt; + return 0; + } else { +- sys_write(wfd, victim, count); ++ sys_write(wfd, (char __user *)victim, count); + body_len -= count; + eat(count); + return 1; +@@ -360,8 +360,8 @@ static int __init do_symlink(void) + { + collected[N_ALIGN(name_len) + body_len] = '\0'; + clean_path(collected, 0); +- sys_symlink(collected + N_ALIGN(name_len), collected); +- sys_lchown(collected, uid, gid); ++ sys_symlink((char __user *)collected + N_ALIGN(name_len), (char __user *)collected); ++ sys_lchown((char __user *)collected, uid, gid); + do_utime(collected, mtime); + state = SkipIt; + next_state = Reset; +diff -urNp a/init/main.c b/init/main.c +--- a/init/main.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/init/main.c 2009-05-24 18:10:25.303962186 -0700 +@@ -104,6 +104,7 @@ static inline void mark_rodata_ro(void) + #ifdef CONFIG_TC + extern void tc_init(void); + #endif ++extern void grsecurity_init(void); + + enum system_states system_state; + EXPORT_SYMBOL(system_state); +@@ -190,6 +191,40 @@ static int __init set_reset_devices(char + + __setup("reset_devices", set_reset_devices); + ++#if defined(CONFIG_PAX_MEMORY_UDEREF) && defined(CONFIG_X86_32) ++static int __init setup_pax_nouderef(char *str) ++{ ++ unsigned int cpu; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++ ++ pax_open_kernel(cr0); ++#endif ++ ++ for (cpu = 0; cpu < NR_CPUS; cpu++) ++ get_cpu_gdt_table(cpu)[GDT_ENTRY_KERNEL_DS].b = 0x00cf9300; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ ++ return 1; ++} ++__setup("pax_nouderef", setup_pax_nouderef); ++#endif ++ ++#ifdef CONFIG_PAX_SOFTMODE ++unsigned int pax_softmode; ++ ++static int __init setup_pax_softmode(char *str) ++{ ++ get_option(&str, &pax_softmode); ++ return 1; ++} ++__setup("pax_softmode=", setup_pax_softmode); ++#endif ++ + static char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, }; + char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, }; + static const char *panic_later, *panic_param; +@@ -388,7 +423,7 @@ static void __init setup_nr_cpu_ids(void + } + + #ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA +-unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; ++unsigned long __per_cpu_offset[NR_CPUS] __read_only; + + EXPORT_SYMBOL(__per_cpu_offset); + +@@ -704,6 +739,7 @@ int do_one_initcall(initcall_t fn) + { + int count = preempt_count(); + ktime_t delta; ++ const char *msg1 = "", *msg2 = ""; + char msgbuf[64]; + struct boot_trace it; + +@@ -730,15 +766,15 @@ int do_one_initcall(initcall_t fn) + sprintf(msgbuf, "error code %d ", it.result); + + if (preempt_count() != count) { +- strlcat(msgbuf, "preemption imbalance ", sizeof(msgbuf)); ++ msg1 = " preemption imbalance"; + preempt_count() = count; + } + if (irqs_disabled()) { +- strlcat(msgbuf, "disabled interrupts ", sizeof(msgbuf)); ++ msg2 = " disabled interrupts"; + local_irq_enable(); + } +- if (msgbuf[0]) { +- printk("initcall %pF returned with %s\n", fn, msgbuf); ++ if (msgbuf[0] || *msg1 || *msg2) { ++ printk("initcall %pF returned with %s%s%s\n", fn, msgbuf, msg1, msg2); + } + + return it.result; +@@ -877,6 +913,8 @@ static int __init kernel_init(void * unu + prepare_namespace(); + } + ++ grsecurity_init(); ++ + /* + * Ok, we have completed the initial bootup, and + * we're essentially up and running. Get rid of the +diff -urNp a/init/noinitramfs.c b/init/noinitramfs.c +--- a/init/noinitramfs.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/init/noinitramfs.c 2009-05-24 18:10:25.304959591 -0700 +@@ -29,7 +29,7 @@ static int __init default_rootfs(void) + { + int err; + +- err = sys_mkdir("/dev", 0755); ++ err = sys_mkdir((const char __user *)"/dev", 0755); + if (err < 0) + goto out; + +@@ -39,7 +39,7 @@ static int __init default_rootfs(void) + if (err < 0) + goto out; + +- err = sys_mkdir("/root", 0700); ++ err = sys_mkdir((const char __user *)"/root", 0700); + if (err < 0) + goto out; + +diff -urNp a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c +--- a/ipc/ipc_sysctl.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/ipc/ipc_sysctl.c 2009-05-24 18:10:25.304959591 -0700 +@@ -267,7 +267,7 @@ static struct ctl_table ipc_kern_table[] + .extra1 = &zero, + .extra2 = &one, + }, +- {} ++ { 0, NULL, NULL, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL } + }; + + static struct ctl_table ipc_root_table[] = { +@@ -277,7 +277,7 @@ static struct ctl_table ipc_root_table[] + .mode = 0555, + .child = ipc_kern_table, + }, +- {} ++ { 0, NULL, NULL, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL } + }; + + static int __init ipc_sysctl_init(void) +diff -urNp a/ipc/msg.c b/ipc/msg.c +--- a/ipc/msg.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/ipc/msg.c 2009-05-24 18:10:25.305959579 -0700 +@@ -314,6 +314,7 @@ SYSCALL_DEFINE2(msgget, key_t, key, int, + struct ipc_namespace *ns; + struct ipc_ops msg_ops; + struct ipc_params msg_params; ++ long err; + + ns = current->nsproxy->ipc_ns; + +@@ -324,7 +325,11 @@ SYSCALL_DEFINE2(msgget, key_t, key, int, + msg_params.key = key; + msg_params.flg = msgflg; + +- return ipcget(ns, &msg_ids(ns), &msg_ops, &msg_params); ++ err = ipcget(ns, &msg_ids(ns), &msg_ops, &msg_params); ++ ++ gr_log_msgget(err, msgflg); ++ ++ return err; + } + + static inline unsigned long +@@ -434,6 +439,7 @@ static int msgctl_down(struct ipc_namesp + + switch (cmd) { + case IPC_RMID: ++ gr_log_msgrm(ipcp->uid, ipcp->cuid); + freeque(ns, ipcp); + goto out_up; + case IPC_SET: +diff -urNp a/ipc/sem.c b/ipc/sem.c +--- a/ipc/sem.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/ipc/sem.c 2009-05-24 18:10:25.305959579 -0700 +@@ -313,6 +313,7 @@ SYSCALL_DEFINE3(semget, key_t, key, int, + struct ipc_namespace *ns; + struct ipc_ops sem_ops; + struct ipc_params sem_params; ++ long err; + + ns = current->nsproxy->ipc_ns; + +@@ -327,7 +328,11 @@ SYSCALL_DEFINE3(semget, key_t, key, int, + sem_params.flg = semflg; + sem_params.u.nsems = nsems; + +- return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params); ++ err = ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params); ++ ++ gr_log_semget(err, semflg); ++ ++ return err; + } + + /* +@@ -870,6 +875,7 @@ static int semctl_down(struct ipc_namesp + + switch(cmd){ + case IPC_RMID: ++ gr_log_semrm(ipcp->uid, ipcp->cuid); + freeary(ns, ipcp); + goto out_up; + case IPC_SET: +diff -urNp a/ipc/shm.c b/ipc/shm.c +--- a/ipc/shm.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/ipc/shm.c 2009-05-24 18:10:25.306961872 -0700 +@@ -69,6 +69,14 @@ static void shm_destroy (struct ipc_name + static int sysvipc_shm_proc_show(struct seq_file *s, void *it); + #endif + ++#ifdef CONFIG_GRKERNSEC ++extern int gr_handle_shmat(const pid_t shm_cprid, const pid_t shm_lapid, ++ const time_t shm_createtime, const uid_t cuid, ++ const int shmid); ++extern int gr_chroot_shmat(const pid_t shm_cprid, const pid_t shm_lapid, ++ const time_t shm_createtime); ++#endif ++ + void shm_init_ns(struct ipc_namespace *ns) + { + ns->shm_ctlmax = SHMMAX; +@@ -87,6 +95,8 @@ static void do_shm_rmid(struct ipc_names + struct shmid_kernel *shp; + shp = container_of(ipcp, struct shmid_kernel, shm_perm); + ++ gr_log_shmrm(shp->shm_perm.uid, shp->shm_perm.cuid); ++ + if (shp->shm_nattch){ + shp->shm_perm.mode |= SHM_DEST; + /* Do not find it any more */ +@@ -392,6 +402,14 @@ static int newseg(struct ipc_namespace * + shp->shm_lprid = 0; + shp->shm_atim = shp->shm_dtim = 0; + shp->shm_ctim = get_seconds(); ++#ifdef CONFIG_GRKERNSEC ++ { ++ struct timespec timeval; ++ do_posix_clock_monotonic_gettime(&timeval); ++ ++ shp->shm_createtime = timeval.tv_sec; ++ } ++#endif + shp->shm_segsz = size; + shp->shm_nattch = 0; + shp->shm_file = file; +@@ -445,6 +463,7 @@ SYSCALL_DEFINE3(shmget, key_t, key, size + struct ipc_namespace *ns; + struct ipc_ops shm_ops; + struct ipc_params shm_params; ++ long err; + + ns = current->nsproxy->ipc_ns; + +@@ -456,7 +475,11 @@ SYSCALL_DEFINE3(shmget, key_t, key, size + shm_params.flg = shmflg; + shm_params.u.size = size; + +- return ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params); ++ err = ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params); ++ ++ gr_log_shmget(err, shmflg, size); ++ ++ return err; + } + + static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version) +@@ -877,9 +900,21 @@ long do_shmat(int shmid, char __user *sh + if (err) + goto out_unlock; + ++#ifdef CONFIG_GRKERNSEC ++ if (!gr_handle_shmat(shp->shm_cprid, shp->shm_lapid, shp->shm_createtime, ++ shp->shm_perm.cuid, shmid) || ++ !gr_chroot_shmat(shp->shm_cprid, shp->shm_lapid, shp->shm_createtime)) { ++ err = -EACCES; ++ goto out_unlock; ++ } ++#endif ++ + path.dentry = dget(shp->shm_file->f_path.dentry); + path.mnt = shp->shm_file->f_path.mnt; + shp->shm_nattch++; ++#ifdef CONFIG_GRKERNSEC ++ shp->shm_lapid = current->pid; ++#endif + size = i_size_read(path.dentry->d_inode); + shm_unlock(shp); + +diff -urNp a/kernel/acct.c b/kernel/acct.c +--- a/kernel/acct.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/kernel/acct.c 2009-05-24 18:10:25.307961651 -0700 +@@ -573,7 +573,7 @@ static void do_acct_process(struct bsd_a + */ + flim = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; + current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY; +- file->f_op->write(file, (char *)&ac, ++ file->f_op->write(file, (char __user *)&ac, + sizeof(acct_t), &file->f_pos); + current->signal->rlim[RLIMIT_FSIZE].rlim_cur = flim; + set_fs(fs); +diff -urNp a/kernel/capability.c b/kernel/capability.c +--- a/kernel/capability.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/kernel/capability.c 2009-05-24 18:10:25.308962617 -0700 +@@ -498,10 +498,21 @@ SYSCALL_DEFINE2(capset, cap_user_header_ + */ + int capable(int cap) + { +- if (has_capability(current, cap)) { ++ if (has_capability(current, cap) && gr_task_is_capable(current, cap)) { + current->flags |= PF_SUPERPRIV; + return 1; + } + return 0; + } ++ ++int capable_nolog(int cap) ++{ ++ if (has_capability(current, cap) && gr_is_capable_nolog(cap)) { ++ current->flags |= PF_SUPERPRIV; ++ return 1; ++ } ++ return 0; ++} ++ + EXPORT_SYMBOL(capable); ++EXPORT_SYMBOL(capable_nolog); +diff -urNp a/kernel/configs.c b/kernel/configs.c +--- a/kernel/configs.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/kernel/configs.c 2009-05-24 18:10:25.308962617 -0700 +@@ -73,8 +73,19 @@ static int __init ikconfig_init(void) + struct proc_dir_entry *entry; + + /* create the current config file */ ++#ifdef CONFIG_GRKERNSEC_PROC_ADD ++#ifdef CONFIG_GRKERNSEC_PROC_USER ++ entry = proc_create("config.gz", S_IFREG | S_IRUSR, NULL, ++ &ikconfig_file_ops); ++#elif defined(CONFIG_GRKERNSEC_PROC_USERGROUP) ++ entry = proc_create("config.gz", S_IFREG | S_IRUSR | S_IRGRP, NULL, ++ &ikconfig_file_ops); ++#endif ++#else + entry = proc_create("config.gz", S_IFREG | S_IRUGO, NULL, + &ikconfig_file_ops); ++#endif ++ + if (!entry) + return -ENOMEM; + +diff -urNp a/kernel/cpu.c b/kernel/cpu.c +--- a/kernel/cpu.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/kernel/cpu.c 2009-05-24 18:10:25.309959882 -0700 +@@ -40,7 +40,7 @@ EXPORT_SYMBOL(cpu_possible_map); + /* Serializes the updates to cpu_online_map, cpu_present_map */ + static DEFINE_MUTEX(cpu_add_remove_lock); + +-static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain); ++static RAW_NOTIFIER_HEAD(cpu_chain); + + /* If set, cpu_up and cpu_down will return -EBUSY and do nothing. + * Should always be manipulated under cpu_add_remove_lock +diff -urNp a/kernel/exit.c b/kernel/exit.c +--- a/kernel/exit.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/kernel/exit.c 2009-05-24 18:10:25.310960290 -0700 +@@ -53,6 +53,10 @@ + #include <asm/pgtable.h> + #include <asm/mmu_context.h> + ++#ifdef CONFIG_GRKERNSEC ++extern rwlock_t grsec_exec_file_lock; ++#endif ++ + static void exit_mm(struct task_struct * tsk); + + static inline int task_detached(struct task_struct *p) +@@ -163,6 +167,8 @@ void release_task(struct task_struct * p + struct task_struct *leader; + int zap_leader; + repeat: ++ gr_del_task_from_ip_table(p); ++ + tracehook_prepare_release_task(p); + atomic_dec(&p->user->processes); + proc_flush_task(p); +@@ -326,11 +332,22 @@ static void reparent_to_kthreadd(void) + { + write_lock_irq(&tasklist_lock); + ++#ifdef CONFIG_GRKERNSEC ++ write_lock(&grsec_exec_file_lock); ++ if (current->exec_file) { ++ fput(current->exec_file); ++ current->exec_file = NULL; ++ } ++ write_unlock(&grsec_exec_file_lock); ++#endif ++ + ptrace_unlink(current); + /* Reparent to init */ + current->real_parent = current->parent = kthreadd_task; + list_move_tail(¤t->sibling, ¤t->real_parent->children); + ++ gr_set_kernel_label(current); ++ + /* Set the exit signal to SIGCHLD so we signal init on exit */ + current->exit_signal = SIGCHLD; + +@@ -424,6 +441,17 @@ void daemonize(const char *name, ...) + vsnprintf(current->comm, sizeof(current->comm), name, args); + va_end(args); + ++#ifdef CONFIG_GRKERNSEC ++ write_lock(&grsec_exec_file_lock); ++ if (current->exec_file) { ++ fput(current->exec_file); ++ current->exec_file = NULL; ++ } ++ write_unlock(&grsec_exec_file_lock); ++#endif ++ ++ gr_set_kernel_label(current); ++ + /* + * If we were started as result of loading a module, close all of the + * user space pages. We don't need them, and if we didn't close them +@@ -1065,6 +1093,9 @@ NORET_TYPE void do_exit(long code) + tsk->exit_code = code; + taskstats_exit(tsk, group_dead); + ++ gr_acl_handle_psacct(tsk, code); ++ gr_acl_handle_exit(); ++ + exit_mm(tsk); + + if (group_dead) +@@ -1271,7 +1302,7 @@ static int wait_task_zombie(struct task_ + if (unlikely(options & WNOWAIT)) { + uid_t uid = p->uid; + int exit_code = p->exit_code; +- int why, status; ++ int why; + + get_task_struct(p); + read_unlock(&tasklist_lock); +diff -urNp a/kernel/fork.c b/kernel/fork.c +--- a/kernel/fork.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/kernel/fork.c 2009-05-24 18:10:25.311959719 -0700 +@@ -236,7 +236,7 @@ static struct task_struct *dup_task_stru + setup_thread_stack(tsk, orig); + + #ifdef CONFIG_CC_STACKPROTECTOR +- tsk->stack_canary = get_random_int(); ++ tsk->stack_canary = pax_get_random_long(); + #endif + + /* One for us, one for whoever does the "release_task()" (usually parent) */ +@@ -273,8 +273,8 @@ static int dup_mmap(struct mm_struct *mm + mm->locked_vm = 0; + mm->mmap = NULL; + mm->mmap_cache = NULL; +- mm->free_area_cache = oldmm->mmap_base; +- mm->cached_hole_size = ~0UL; ++ mm->free_area_cache = oldmm->free_area_cache; ++ mm->cached_hole_size = oldmm->cached_hole_size; + mm->map_count = 0; + cpus_clear(mm->cpu_vm_mask); + mm->mm_rb = RB_ROOT; +@@ -311,6 +311,7 @@ static int dup_mmap(struct mm_struct *mm + tmp->vm_flags &= ~VM_LOCKED; + tmp->vm_mm = mm; + tmp->vm_next = NULL; ++ tmp->vm_mirror = NULL; + anon_vma_link(tmp); + file = tmp->vm_file; + if (file) { +@@ -358,6 +359,31 @@ static int dup_mmap(struct mm_struct *mm + if (retval) + goto out; + } ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (oldmm->pax_flags & MF_PAX_SEGMEXEC) { ++ struct vm_area_struct *mpnt_m; ++ ++ for (mpnt = oldmm->mmap, mpnt_m = mm->mmap; mpnt; mpnt = mpnt->vm_next, mpnt_m = mpnt_m->vm_next) { ++ BUG_ON(!mpnt_m || mpnt_m->vm_mirror || mpnt->vm_mm != oldmm || mpnt_m->vm_mm != mm); ++ ++ if (!mpnt->vm_mirror) ++ continue; ++ ++ if (mpnt->vm_end <= SEGMEXEC_TASK_SIZE) { ++ BUG_ON(mpnt->vm_mirror->vm_mirror != mpnt); ++ mpnt->vm_mirror = mpnt_m; ++ } else { ++ BUG_ON(mpnt->vm_mirror->vm_mirror == mpnt || mpnt->vm_mirror->vm_mirror->vm_mm != mm); ++ mpnt_m->vm_mirror = mpnt->vm_mirror->vm_mirror; ++ mpnt_m->vm_mirror->vm_mirror = mpnt_m; ++ mpnt->vm_mirror->vm_mirror = mpnt; ++ } ++ } ++ BUG_ON(mpnt_m); ++ } ++#endif ++ + /* a new mm has just been created */ + arch_dup_mmap(oldmm, mm); + retval = 0; +@@ -527,9 +553,11 @@ void mm_release(struct task_struct *tsk, + #ifdef CONFIG_FUTEX + if (unlikely(tsk->robust_list)) + exit_robust_list(tsk); ++ tsk->robust_list = NULL; + #ifdef CONFIG_COMPAT + if (unlikely(tsk->compat_robust_list)) + compat_exit_robust_list(tsk); ++ tsk->compat_robust_list = NULL; + #endif + #endif + +@@ -551,7 +579,7 @@ void mm_release(struct task_struct *tsk, + if (tsk->clear_child_tid + && !(tsk->flags & PF_SIGNALED) + && atomic_read(&mm->mm_users) > 1) { +- u32 __user * tidptr = tsk->clear_child_tid; ++ pid_t __user * tidptr = tsk->clear_child_tid; + tsk->clear_child_tid = NULL; + + /* +@@ -559,7 +587,7 @@ void mm_release(struct task_struct *tsk, + * not set up a proper pointer then tough luck. + */ + put_user(0, tidptr); +- sys_futex(tidptr, FUTEX_WAKE, 1, NULL, NULL, 0); ++ sys_futex((u32 __user *)tidptr, FUTEX_WAKE, 1, NULL, NULL, 0); + } + } + +@@ -984,6 +1012,9 @@ static struct task_struct *copy_process( + DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); + #endif + retval = -EAGAIN; ++ ++ gr_learn_resource(p, RLIMIT_NPROC, atomic_read(&p->user->processes), 0); ++ + if (atomic_read(&p->user->processes) >= + p->signal->rlim[RLIMIT_NPROC].rlim_cur) { + if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && +@@ -1147,6 +1178,8 @@ static struct task_struct *copy_process( + goto bad_fork_free_pid; + } + ++ gr_copy_label(p); ++ + p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; + /* + * Clear TID on mm_release()? +@@ -1317,6 +1350,8 @@ bad_fork_cleanup_count: + bad_fork_free: + free_task(p); + fork_out: ++ gr_log_forkfail(retval); ++ + return ERR_PTR(retval); + } + +@@ -1395,6 +1430,8 @@ long do_fork(unsigned long clone_flags, + if (clone_flags & CLONE_PARENT_SETTID) + put_user(nr, parent_tidptr); + ++ gr_handle_brute_check(); ++ + if (clone_flags & CLONE_VFORK) { + p->vfork_done = &vfork; + init_completion(&vfork); +diff -urNp a/kernel/futex.c b/kernel/futex.c +--- a/kernel/futex.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/kernel/futex.c 2009-05-24 18:10:25.313959766 -0700 +@@ -188,6 +188,11 @@ static int get_futex_key(u32 __user *uad + struct page *page; + int err; + ++#ifdef CONFIG_PAX_SEGMEXEC ++ if ((mm->pax_flags & MF_PAX_SEGMEXEC) && address >= SEGMEXEC_TASK_SIZE) ++ return -EFAULT; ++#endif ++ + /* + * The futex address must be "naturally" aligned. + */ +@@ -214,8 +219,8 @@ static int get_futex_key(u32 __user *uad + * The futex is hashed differently depending on whether + * it's in a shared or private mapping. So check vma first. + */ +- vma = find_extend_vma(mm, address); +- if (unlikely(!vma)) ++ vma = find_vma(mm, address); ++ if (unlikely(!vma || address < vma->vm_start)) + return -EFAULT; + + /* +@@ -1348,7 +1353,7 @@ static int futex_wait(u32 __user *uaddr, + struct restart_block *restart; + restart = ¤t_thread_info()->restart_block; + restart->fn = futex_wait_restart; +- restart->futex.uaddr = (u32 *)uaddr; ++ restart->futex.uaddr = uaddr; + restart->futex.val = val; + restart->futex.time = abs_time->tv64; + restart->futex.bitset = bitset; +@@ -1908,7 +1913,7 @@ retry: + */ + static inline int fetch_robust_entry(struct robust_list __user **entry, + struct robust_list __user * __user *head, +- int *pi) ++ unsigned int *pi) + { + unsigned long uentry; + +diff -urNp a/kernel/irq/handle.c b/kernel/irq/handle.c +--- a/kernel/irq/handle.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/kernel/irq/handle.c 2009-05-24 18:10:25.314962409 -0700 +@@ -57,7 +57,8 @@ struct irq_desc irq_desc[NR_IRQS] __cach + .depth = 1, + .lock = __SPIN_LOCK_UNLOCKED(irq_desc->lock), + #ifdef CONFIG_SMP +- .affinity = CPU_MASK_ALL ++ .affinity = CPU_MASK_ALL, ++ .cpu = 0, + #endif + } + }; +diff -urNp a/kernel/kallsyms.c b/kernel/kallsyms.c +--- a/kernel/kallsyms.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/kernel/kallsyms.c 2009-05-24 18:10:25.314962409 -0700 +@@ -62,6 +62,18 @@ static inline int is_kernel_text(unsigne + + static inline int is_kernel(unsigned long addr) + { ++ ++#if defined(CONFIG_PAX_KERNEXEC) && defined(CONFIG_MODULES) ++ if ((unsigned long)MODULES_VADDR <= ktla_ktva(addr) && ++ ktla_ktva(addr) < (unsigned long)MODULES_END) ++ return 0; ++#endif ++ ++#ifdef CONFIG_X86_32 ++ if (is_kernel_inittext(addr)) ++ return 1; ++#endif ++ + if (addr >= (unsigned long)_stext && addr <= (unsigned long)_end) + return 1; + return in_gate_area_no_task(addr); +@@ -372,7 +384,6 @@ static unsigned long get_ksymbol_core(st + + static void reset_iter(struct kallsym_iter *iter, loff_t new_pos) + { +- iter->name[0] = '\0'; + iter->nameoff = get_symbol_offset(new_pos); + iter->pos = new_pos; + } +@@ -456,7 +467,7 @@ static int kallsyms_open(struct inode *i + struct kallsym_iter *iter; + int ret; + +- iter = kmalloc(sizeof(*iter), GFP_KERNEL); ++ iter = kzalloc(sizeof(*iter), GFP_KERNEL); + if (!iter) + return -ENOMEM; + reset_iter(iter, 0); +@@ -478,7 +489,15 @@ static const struct file_operations kall + + static int __init kallsyms_init(void) + { ++#ifdef CONFIG_GRKERNSEC_PROC_ADD ++#ifdef CONFIG_GRKERNSEC_PROC_USER ++ proc_create("kallsyms", S_IFREG | S_IRUSR, NULL, &kallsyms_operations); ++#elif defined(CONFIG_GRKERNSEC_PROC_USERGROUP) ++ proc_create("kallsyms", S_IFREG | S_IRUSR | S_IRGRP, NULL, &kallsyms_operations); ++#endif ++#else + proc_create("kallsyms", 0444, NULL, &kallsyms_operations); ++#endif + return 0; + } + __initcall(kallsyms_init); +diff -urNp a/kernel/kmod.c b/kernel/kmod.c +--- a/kernel/kmod.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/kernel/kmod.c 2009-05-24 18:10:25.315962257 -0700 +@@ -108,7 +108,7 @@ int request_module(const char *fmt, ...) + return -ENOMEM; + } + +- ret = call_usermodehelper(modprobe_path, argv, envp, 1); ++ ret = call_usermodehelper(modprobe_path, argv, envp, UMH_WAIT_PROC); + atomic_dec(&kmod_concurrent); + return ret; + } +diff -urNp a/kernel/kprobes.c b/kernel/kprobes.c +--- a/kernel/kprobes.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/kernel/kprobes.c 2009-05-24 18:10:25.316960640 -0700 +@@ -182,7 +182,7 @@ kprobe_opcode_t __kprobes *get_insn_slot + * kernel image and loaded module images reside. This is required + * so x86_64 can correctly handle the %rip-relative fixups. + */ +- kip->insns = module_alloc(PAGE_SIZE); ++ kip->insns = module_alloc_exec(PAGE_SIZE); + if (!kip->insns) { + kfree(kip); + return NULL; +@@ -214,7 +214,7 @@ static int __kprobes collect_one_slot(st + hlist_add_head(&kip->hlist, + &kprobe_insn_pages); + } else { +- module_free(NULL, kip->insns); ++ module_free_exec(NULL, kip->insns); + kfree(kip); + } + return 1; +diff -urNp a/kernel/lockdep.c b/kernel/lockdep.c +--- a/kernel/lockdep.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/kernel/lockdep.c 2009-05-24 18:10:25.318959988 -0700 +@@ -627,6 +627,10 @@ static int static_obj(void *obj) + int i; + #endif + ++#ifdef CONFIG_PAX_KERNEXEC ++ start = (unsigned long )&_data; ++#endif ++ + /* + * static variable? + */ +@@ -638,9 +642,12 @@ static int static_obj(void *obj) + * percpu var? + */ + for_each_possible_cpu(i) { ++#ifdef CONFIG_X86_32 ++ start = per_cpu_offset(i); ++#else + start = (unsigned long) &__per_cpu_start + per_cpu_offset(i); +- end = (unsigned long) &__per_cpu_start + PERCPU_ENOUGH_ROOM +- + per_cpu_offset(i); ++#endif ++ end = start + PERCPU_ENOUGH_ROOM; + + if ((addr >= start) && (addr < end)) + return 1; +diff -urNp a/kernel/module.c b/kernel/module.c +--- a/kernel/module.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/kernel/module.c 2009-05-24 18:10:25.320959546 -0700 +@@ -47,6 +47,11 @@ + #include <linux/rculist.h> + #include <asm/uaccess.h> + #include <asm/cacheflush.h> ++ ++#ifdef CONFIG_PAX_KERNEXEC ++#include <asm/desc.h> ++#endif ++ + #include <linux/license.h> + #include <asm/sections.h> + #include <linux/tracepoint.h> +@@ -76,7 +81,10 @@ static DECLARE_WAIT_QUEUE_HEAD(module_wq + static BLOCKING_NOTIFIER_HEAD(module_notify_list); + + /* Bounds of module allocation, for speeding __module_text_address */ +-static unsigned long module_addr_min = -1UL, module_addr_max = 0; ++static unsigned long module_addr_min_rw = -1UL, module_addr_max_rw = 0; ++static unsigned long module_addr_min_rx = -1UL, module_addr_max_rx = 0; ++ ++extern int gr_check_modstop(void); + + int register_module_notifier(struct notifier_block * nb) + { +@@ -245,7 +253,7 @@ static bool each_symbol(bool (*fn)(const + return true; + + list_for_each_entry_rcu(mod, &modules, list) { +- struct symsearch arr[] = { ++ struct symsearch modarr[] = { + { mod->syms, mod->syms + mod->num_syms, mod->crcs, + NOT_GPL_ONLY, false }, + { mod->gpl_syms, mod->gpl_syms + mod->num_gpl_syms, +@@ -267,7 +275,7 @@ static bool each_symbol(bool (*fn)(const + #endif + }; + +- if (each_symbol_in_section(arr, ARRAY_SIZE(arr), mod, fn, data)) ++ if (each_symbol_in_section(modarr, ARRAY_SIZE(modarr), mod, fn, data)) + return true; + } + return false; +@@ -403,6 +411,8 @@ static inline unsigned int block_size(in + return val; + } + ++EXPORT_SYMBOL(__per_cpu_start); ++ + static void *percpu_modalloc(unsigned long size, unsigned long align, + const char *name) + { +@@ -410,7 +420,7 @@ static void *percpu_modalloc(unsigned lo + unsigned int i; + void *ptr; + +- if (align > PAGE_SIZE) { ++ if (align-1 >= PAGE_SIZE) { + printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n", + name, align, PAGE_SIZE); + align = PAGE_SIZE; +@@ -492,7 +502,11 @@ static void percpu_modcopy(void *pcpudes + int cpu; + + for_each_possible_cpu(cpu) ++#ifdef CONFIG_X86_32 ++ memcpy(pcpudest + __per_cpu_offset[cpu], from, size); ++#else + memcpy(pcpudest + per_cpu_offset(cpu), from, size); ++#endif + } + + static int percpu_modinit(void) +@@ -750,6 +764,9 @@ SYSCALL_DEFINE2(delete_module, const cha + char name[MODULE_NAME_LEN]; + int ret, forced = 0; + ++ if (gr_check_modstop()) ++ return -EPERM; ++ + if (!capable(CAP_SYS_MODULE)) + return -EPERM; + +@@ -1448,19 +1465,22 @@ static void free_module(struct module *m + module_unload_free(mod); + + /* release any pointers to mcount in this module */ +- ftrace_release(mod->module_core, mod->core_size); ++ ftrace_release(mod->module_core_rx, mod->core_size_rx); + + /* This may be NULL, but that's OK */ +- module_free(mod, mod->module_init); ++ module_free(mod, mod->module_init_rw); ++ module_free_exec(mod, mod->module_init_rx); + kfree(mod->args); + if (mod->percpu) + percpu_modfree(mod->percpu); + + /* Free lock-classes: */ +- lockdep_free_key_range(mod->module_core, mod->core_size); ++ lockdep_free_key_range(mod->module_core_rx, mod->core_size_rx); ++ lockdep_free_key_range(mod->module_core_rw, mod->core_size_rw); + + /* Finally, free the core (containing the module structure) */ +- module_free(mod, mod->module_core); ++ module_free_exec(mod, mod->module_core_rx); ++ module_free(mod, mod->module_core_rw); + } + + void *__symbol_get(const char *symbol) +@@ -1526,10 +1546,14 @@ static int simplify_symbols(Elf_Shdr *se + struct module *mod) + { + Elf_Sym *sym = (void *)sechdrs[symindex].sh_addr; +- unsigned long secbase; ++ unsigned long secbase, symbol; + unsigned int i, n = sechdrs[symindex].sh_size / sizeof(Elf_Sym); + int ret = 0; + ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++#endif ++ + for (i = 1; i < n; i++) { + switch (sym[i].st_shndx) { + case SHN_COMMON: +@@ -1548,10 +1572,19 @@ static int simplify_symbols(Elf_Shdr *se + break; + + case SHN_UNDEF: +- sym[i].st_value +- = resolve_symbol(sechdrs, versindex, ++ symbol = resolve_symbol(sechdrs, versindex, + strtab + sym[i].st_name, mod); + ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ ++ sym[i].st_value = symbol; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + /* Ok if resolved. */ + if (!IS_ERR_VALUE(sym[i].st_value)) + break; +@@ -1566,11 +1599,27 @@ static int simplify_symbols(Elf_Shdr *se + + default: + /* Divert to percpu allocation if a percpu var. */ +- if (sym[i].st_shndx == pcpuindex) ++ if (sym[i].st_shndx == pcpuindex) { ++ ++#if defined(CONFIG_X86_32) && defined(CONFIG_SMP) ++ secbase = (unsigned long)mod->percpu - (unsigned long)__per_cpu_start; ++#else + secbase = (unsigned long)mod->percpu; +- else ++#endif ++ ++ } else + secbase = sechdrs[sym[i].st_shndx].sh_addr; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + sym[i].st_value += secbase; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ + break; + } + } +@@ -1622,11 +1671,12 @@ static void layout_sections(struct modul + || strncmp(secstrings + s->sh_name, + ".init", 5) == 0) + continue; +- s->sh_entsize = get_offset(&mod->core_size, s); ++ if ((s->sh_flags & SHF_WRITE) || !(s->sh_flags & SHF_ALLOC)) ++ s->sh_entsize = get_offset(&mod->core_size_rw, s); ++ else ++ s->sh_entsize = get_offset(&mod->core_size_rx, s); + DEBUGP("\t%s\n", secstrings + s->sh_name); + } +- if (m == 0) +- mod->core_text_size = mod->core_size; + } + + DEBUGP("Init section allocation order:\n"); +@@ -1640,12 +1690,13 @@ static void layout_sections(struct modul + || strncmp(secstrings + s->sh_name, + ".init", 5) != 0) + continue; +- s->sh_entsize = (get_offset(&mod->init_size, s) +- | INIT_OFFSET_MASK); ++ if ((s->sh_flags & SHF_WRITE) || !(s->sh_flags & SHF_ALLOC)) ++ s->sh_entsize = get_offset(&mod->init_size_rw, s); ++ else ++ s->sh_entsize = get_offset(&mod->init_size_rx, s); ++ s->sh_entsize |= INIT_OFFSET_MASK; + DEBUGP("\t%s\n", secstrings + s->sh_name); + } +- if (m == 0) +- mod->init_text_size = mod->init_size; + } + } + +@@ -1785,14 +1836,31 @@ static void add_kallsyms(struct module * + { + unsigned int i; + ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++#endif ++ + mod->symtab = (void *)sechdrs[symindex].sh_addr; + mod->num_symtab = sechdrs[symindex].sh_size / sizeof(Elf_Sym); + mod->strtab = (void *)sechdrs[strindex].sh_addr; + + /* Set types up while we still have access to sections. */ +- for (i = 0; i < mod->num_symtab; i++) +- mod->symtab[i].st_info +- = elf_type(&mod->symtab[i], sechdrs, secstrings, mod); ++ ++ for (i = 0; i < mod->num_symtab; i++) { ++ char type = elf_type(&mod->symtab[i], sechdrs, secstrings, mod); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ ++ mod->symtab[i].st_info = type; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ ++ } ++ + } + #else + static inline void add_kallsyms(struct module *mod, +@@ -1819,16 +1887,30 @@ static void dynamic_printk_setup(struct + #endif /* CONFIG_DYNAMIC_PRINTK_DEBUG */ + } + +-static void *module_alloc_update_bounds(unsigned long size) ++static void *module_alloc_update_bounds_rw(unsigned long size) + { + void *ret = module_alloc(size); + + if (ret) { + /* Update module bounds. */ +- if ((unsigned long)ret < module_addr_min) +- module_addr_min = (unsigned long)ret; +- if ((unsigned long)ret + size > module_addr_max) +- module_addr_max = (unsigned long)ret + size; ++ if ((unsigned long)ret < module_addr_min_rw) ++ module_addr_min_rw = (unsigned long)ret; ++ if ((unsigned long)ret + size > module_addr_max_rw) ++ module_addr_max_rw = (unsigned long)ret + size; ++ } ++ return ret; ++} ++ ++static void *module_alloc_update_bounds_rx(unsigned long size) ++{ ++ void *ret = module_alloc_exec(size); ++ ++ if (ret) { ++ /* Update module bounds. */ ++ if ((unsigned long)ret < module_addr_min_rx) ++ module_addr_min_rx = (unsigned long)ret; ++ if ((unsigned long)ret + size > module_addr_max_rx) ++ module_addr_max_rx = (unsigned long)ret + size; + } + return ret; + } +@@ -1856,6 +1938,10 @@ static noinline struct module *load_modu + unsigned long *mseg; + mm_segment_t old_fs; + ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++#endif ++ + DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n", + umod, len, uargs); + if (len < sizeof(*hdr)) +@@ -2010,22 +2096,57 @@ static noinline struct module *load_modu + layout_sections(mod, hdr, sechdrs, secstrings); + + /* Do the allocs. */ +- ptr = module_alloc_update_bounds(mod->core_size); ++ ptr = module_alloc_update_bounds_rw(mod->core_size_rw); + if (!ptr) { + err = -ENOMEM; + goto free_percpu; + } +- memset(ptr, 0, mod->core_size); +- mod->module_core = ptr; ++ memset(ptr, 0, mod->core_size_rw); ++ mod->module_core_rw = ptr; + +- ptr = module_alloc_update_bounds(mod->init_size); +- if (!ptr && mod->init_size) { ++ ptr = module_alloc_update_bounds_rw(mod->init_size_rw); ++ if (!ptr && mod->init_size_rw) { + err = -ENOMEM; +- goto free_core; ++ goto free_core_rw; + } +- memset(ptr, 0, mod->init_size); +- mod->module_init = ptr; ++ memset(ptr, 0, mod->init_size_rw); ++ mod->module_init_rw = ptr; ++ ++ ptr = module_alloc_update_bounds_rx(mod->core_size_rx); ++ if (!ptr) { ++ err = -ENOMEM; ++ goto free_init_rw; ++ } ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif + ++ memset(ptr, 0, mod->core_size_rx); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ ++ mod->module_core_rx = ptr; ++ ++ ptr = module_alloc_update_bounds_rx(mod->init_size_rx); ++ if (!ptr && mod->init_size_rx) { ++ err = -ENOMEM; ++ goto free_core_rx; ++ } ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ ++ memset(ptr, 0, mod->init_size_rx); ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ ++ mod->module_init_rx = ptr; + /* Transfer each section which specifies SHF_ALLOC */ + DEBUGP("final section addresses:\n"); + for (i = 0; i < hdr->e_shnum; i++) { +@@ -2034,17 +2155,41 @@ static noinline struct module *load_modu + if (!(sechdrs[i].sh_flags & SHF_ALLOC)) + continue; + +- if (sechdrs[i].sh_entsize & INIT_OFFSET_MASK) +- dest = mod->module_init +- + (sechdrs[i].sh_entsize & ~INIT_OFFSET_MASK); +- else +- dest = mod->module_core + sechdrs[i].sh_entsize; ++ if (sechdrs[i].sh_entsize & INIT_OFFSET_MASK) { ++ if ((sechdrs[i].sh_flags & SHF_WRITE) || !(sechdrs[i].sh_flags & SHF_ALLOC)) ++ dest = mod->module_init_rw ++ + (sechdrs[i].sh_entsize & ~INIT_OFFSET_MASK); ++ else ++ dest = mod->module_init_rx ++ + (sechdrs[i].sh_entsize & ~INIT_OFFSET_MASK); ++ } else { ++ if ((sechdrs[i].sh_flags & SHF_WRITE) || !(sechdrs[i].sh_flags & SHF_ALLOC)) ++ dest = mod->module_core_rw + sechdrs[i].sh_entsize; ++ else ++ dest = mod->module_core_rx + sechdrs[i].sh_entsize; ++ } ++ ++ if (sechdrs[i].sh_type != SHT_NOBITS) { + +- if (sechdrs[i].sh_type != SHT_NOBITS) +- memcpy(dest, (void *)sechdrs[i].sh_addr, +- sechdrs[i].sh_size); ++#ifdef CONFIG_PAX_KERNEXEC ++ if (!(sechdrs[i].sh_flags & SHF_WRITE) && (sechdrs[i].sh_flags & SHF_ALLOC)) { ++ pax_open_kernel(cr0); ++ memcpy(dest, (void *)sechdrs[i].sh_addr, sechdrs[i].sh_size); ++ pax_close_kernel(cr0); ++ } else ++#endif ++ ++ memcpy(dest, (void *)sechdrs[i].sh_addr, sechdrs[i].sh_size); ++ } + /* Update sh_addr to point to copy in image. */ +- sechdrs[i].sh_addr = (unsigned long)dest; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ if (sechdrs[i].sh_flags & SHF_EXECINSTR) ++ sechdrs[i].sh_addr = ktva_ktla((unsigned long)dest); ++ else ++#endif ++ ++ sechdrs[i].sh_addr = (unsigned long)dest; + DEBUGP("\t0x%lx %s\n", sechdrs[i].sh_addr, secstrings + sechdrs[i].sh_name); + } + /* Module has been moved. */ +@@ -2144,8 +2289,8 @@ static noinline struct module *load_modu + + /* Now do relocations. */ + for (i = 1; i < hdr->e_shnum; i++) { +- const char *strtab = (char *)sechdrs[strindex].sh_addr; + unsigned int info = sechdrs[i].sh_info; ++ strtab = (char *)sechdrs[strindex].sh_addr; + + /* Not a valid relocation section? */ + if (info >= hdr->e_shnum) +@@ -2216,12 +2361,12 @@ static noinline struct module *load_modu + * Do it before processing of module parameters, so the module + * can provide parameter accessor functions of its own. + */ +- if (mod->module_init) +- flush_icache_range((unsigned long)mod->module_init, +- (unsigned long)mod->module_init +- + mod->init_size); +- flush_icache_range((unsigned long)mod->module_core, +- (unsigned long)mod->module_core + mod->core_size); ++ if (mod->module_init_rx) ++ flush_icache_range((unsigned long)mod->module_init_rx, ++ (unsigned long)mod->module_init_rx ++ + mod->init_size_rx); ++ flush_icache_range((unsigned long)mod->module_core_rx, ++ (unsigned long)mod->module_core_rx + mod->core_size_rx); + + set_fs(old_fs); + +@@ -2266,12 +2411,16 @@ static noinline struct module *load_modu + cleanup: + kobject_del(&mod->mkobj.kobj); + kobject_put(&mod->mkobj.kobj); +- ftrace_release(mod->module_core, mod->core_size); ++ ftrace_release(mod->module_core_rx, mod->core_size_rx); + free_unload: + module_unload_free(mod); +- module_free(mod, mod->module_init); +- free_core: +- module_free(mod, mod->module_core); ++ module_free_exec(mod, mod->module_init_rx); ++ free_core_rx: ++ module_free_exec(mod, mod->module_core_rx); ++ free_init_rw: ++ module_free(mod, mod->module_init_rw); ++ free_core_rw: ++ module_free(mod, mod->module_core_rw); + free_percpu: + if (percpu) + percpu_modfree(percpu); +@@ -2294,6 +2443,9 @@ SYSCALL_DEFINE3(init_module, void __user + struct module *mod; + int ret = 0; + ++ if (gr_check_modstop()) ++ return -EPERM; ++ + /* Must have permission */ + if (!capable(CAP_SYS_MODULE)) + return -EPERM; +@@ -2349,10 +2501,12 @@ SYSCALL_DEFINE3(init_module, void __user + /* Drop initial reference. */ + module_put(mod); + unwind_remove_table(mod->unwind_info, 1); +- module_free(mod, mod->module_init); +- mod->module_init = NULL; +- mod->init_size = 0; +- mod->init_text_size = 0; ++ module_free(mod, mod->module_init_rw); ++ module_free_exec(mod, mod->module_init_rx); ++ mod->module_init_rw = NULL; ++ mod->module_init_rx = NULL; ++ mod->init_size_rw = 0; ++ mod->init_size_rx = 0; + mutex_unlock(&module_mutex); + + return 0; +@@ -2360,6 +2514,13 @@ SYSCALL_DEFINE3(init_module, void __user + + static inline int within(unsigned long addr, void *start, unsigned long size) + { ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ if (ktla_ktva(addr) >= (unsigned long)start && ++ ktla_ktva(addr) < (unsigned long)start + size) ++ return 1; ++#endif ++ + return ((void *)addr >= start && (void *)addr < start + size); + } + +@@ -2383,10 +2544,14 @@ static const char *get_ksymbol(struct mo + unsigned long nextval; + + /* At worse, next value is at end of module */ +- if (within(addr, mod->module_init, mod->init_size)) +- nextval = (unsigned long)mod->module_init+mod->init_text_size; ++ if (within(addr, mod->module_init_rx, mod->init_size_rx)) ++ nextval = (unsigned long)mod->module_init_rx+mod->init_size_rx; ++ else if (within(addr, mod->module_init_rw, mod->init_size_rw)) ++ nextval = (unsigned long)mod->module_init_rw+mod->init_size_rw; ++ else if (within(addr, mod->module_core_rx, mod->core_size_rx)) ++ nextval = (unsigned long)mod->module_core_rx+mod->core_size_rx; + else +- nextval = (unsigned long)mod->module_core+mod->core_text_size; ++ nextval = (unsigned long)mod->module_core_rw+mod->core_size_rw; + + /* Scan for closest preceeding symbol, and next symbol. (ELF + starts real symbols at 1). */ +@@ -2431,8 +2596,10 @@ const char *module_address_lookup(unsign + + preempt_disable(); + list_for_each_entry_rcu(mod, &modules, list) { +- if (within(addr, mod->module_init, mod->init_size) +- || within(addr, mod->module_core, mod->core_size)) { ++ if (within(addr, mod->module_init_rx, mod->init_size_rx) || ++ within(addr, mod->module_init_rw, mod->init_size_rw) || ++ within(addr, mod->module_core_rx, mod->core_size_rx) || ++ within(addr, mod->module_core_rw, mod->core_size_rw)) { + if (modname) + *modname = mod->name; + ret = get_ksymbol(mod, addr, size, offset); +@@ -2454,8 +2621,10 @@ int lookup_module_symbol_name(unsigned l + + preempt_disable(); + list_for_each_entry_rcu(mod, &modules, list) { +- if (within(addr, mod->module_init, mod->init_size) || +- within(addr, mod->module_core, mod->core_size)) { ++ if (within(addr, mod->module_init_rx, mod->init_size_rx) || ++ within(addr, mod->module_init_rw, mod->init_size_rw) || ++ within(addr, mod->module_core_rx, mod->core_size_rx) || ++ within(addr, mod->module_core_rw, mod->core_size_rw)) { + const char *sym; + + sym = get_ksymbol(mod, addr, NULL, NULL); +@@ -2478,8 +2647,10 @@ int lookup_module_symbol_attrs(unsigned + + preempt_disable(); + list_for_each_entry_rcu(mod, &modules, list) { +- if (within(addr, mod->module_init, mod->init_size) || +- within(addr, mod->module_core, mod->core_size)) { ++ if (within(addr, mod->module_init_rx, mod->init_size_rx) || ++ within(addr, mod->module_init_rw, mod->init_size_rw) || ++ within(addr, mod->module_core_rx, mod->core_size_rx) || ++ within(addr, mod->module_core_rw, mod->core_size_rw)) { + const char *sym; + + sym = get_ksymbol(mod, addr, size, offset); +@@ -2613,7 +2784,7 @@ static int m_show(struct seq_file *m, vo + char buf[8]; + + seq_printf(m, "%s %u", +- mod->name, mod->init_size + mod->core_size); ++ mod->name, mod->init_size_rx + mod->init_size_rw + mod->core_size_rx + mod->core_size_rw); + print_unload_info(m, mod); + + /* Informative for users. */ +@@ -2622,7 +2793,7 @@ static int m_show(struct seq_file *m, vo + mod->state == MODULE_STATE_COMING ? "Loading": + "Live"); + /* Used by oprofile and other similar tools. */ +- seq_printf(m, " 0x%p", mod->module_core); ++ seq_printf(m, " 0x%p 0x%p", mod->module_core_rx, mod->module_core_rw); + + /* Taints info */ + if (mod->taints) +@@ -2698,8 +2869,8 @@ int is_module_address(unsigned long addr + preempt_disable(); + + list_for_each_entry_rcu(mod, &modules, list) { +- if (within(addr, mod->module_core, mod->core_size)) { +- preempt_enable(); ++ if (within(addr, mod->module_core_rx, mod->core_size_rx) || ++ within(addr, mod->module_core_rw, mod->core_size_rw)) { + return 1; + } + } +@@ -2715,12 +2886,16 @@ struct module *__module_text_address(uns + { + struct module *mod; + +- if (addr < module_addr_min || addr > module_addr_max) ++#ifdef CONFIG_X86_32 ++ addr = ktla_ktva(addr); ++#endif ++ ++ if (addr < module_addr_min_rx || addr > module_addr_max_rx) + return NULL; + + list_for_each_entry_rcu(mod, &modules, list) +- if (within(addr, mod->module_init, mod->init_text_size) +- || within(addr, mod->module_core, mod->core_text_size)) ++ if (within(addr, mod->module_init_rx, mod->init_size_rx) ++ || within(addr, mod->module_core_rx, mod->core_size_rx)) + return mod; + return NULL; + } +diff -urNp a/kernel/mutex.c b/kernel/mutex.c +--- a/kernel/mutex.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/kernel/mutex.c 2009-05-24 18:10:25.321959464 -0700 +@@ -83,7 +83,7 @@ __mutex_lock_slowpath(atomic_t *lock_cou + * + * This function is similar to (but not equivalent to) down(). + */ +-void inline __sched mutex_lock(struct mutex *lock) ++inline void __sched mutex_lock(struct mutex *lock) + { + might_sleep(); + /* +diff -urNp a/kernel/panic.c b/kernel/panic.c +--- a/kernel/panic.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/kernel/panic.c 2009-05-24 18:10:25.321959464 -0700 +@@ -367,6 +367,8 @@ EXPORT_SYMBOL(warn_slowpath); + */ + void __stack_chk_fail(void) + { ++ print_symbol("stack corrupted in: %s\n", (unsigned long)__builtin_return_address(0)); ++ dump_stack(); + panic("stack-protector: Kernel stack is corrupted"); + } + EXPORT_SYMBOL(__stack_chk_fail); +diff -urNp a/kernel/pid.c b/kernel/pid.c +--- a/kernel/pid.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/kernel/pid.c 2009-05-24 18:10:25.322959523 -0700 +@@ -33,6 +33,7 @@ + #include <linux/rculist.h> + #include <linux/bootmem.h> + #include <linux/hash.h> ++#include <linux/security.h> + #include <linux/pid_namespace.h> + #include <linux/init_task.h> + #include <linux/syscalls.h> +@@ -45,7 +46,7 @@ struct pid init_struct_pid = INIT_STRUCT + + int pid_max = PID_MAX_DEFAULT; + +-#define RESERVED_PIDS 300 ++#define RESERVED_PIDS 500 + + int pid_max_min = RESERVED_PIDS + 1; + int pid_max_max = PID_MAX_LIMIT; +@@ -381,7 +382,14 @@ EXPORT_SYMBOL(pid_task); + struct task_struct *find_task_by_pid_type_ns(int type, int nr, + struct pid_namespace *ns) + { +- return pid_task(find_pid_ns(nr, ns), type); ++ struct task_struct *task; ++ ++ task = pid_task(find_pid_ns(nr, ns), type); ++ ++ if (gr_pid_is_chrooted(task)) ++ return NULL; ++ ++ return task; + } + + EXPORT_SYMBOL(find_task_by_pid_type_ns); +diff -urNp a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c +--- a/kernel/posix-cpu-timers.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/kernel/posix-cpu-timers.c 2009-05-24 18:10:25.322959523 -0700 +@@ -6,6 +6,7 @@ + #include <linux/posix-timers.h> + #include <linux/errno.h> + #include <linux/math64.h> ++#include <linux/security.h> + #include <asm/uaccess.h> + #include <linux/kernel_stat.h> + +@@ -1158,6 +1159,7 @@ static void check_process_timers(struct + __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk); + return; + } ++ gr_learn_resource(tsk, RLIMIT_CPU, psecs, 1); + if (psecs >= sig->rlim[RLIMIT_CPU].rlim_cur) { + /* + * At the soft limit, send a SIGXCPU every second. +@@ -1382,17 +1384,17 @@ void run_posix_cpu_timers(struct task_st + * timer call will interfere. + */ + list_for_each_entry_safe(timer, next, &firing, it.cpu.entry) { +- int firing; ++ int __firing; + spin_lock(&timer->it_lock); + list_del_init(&timer->it.cpu.entry); +- firing = timer->it.cpu.firing; ++ __firing = timer->it.cpu.firing; + timer->it.cpu.firing = 0; + /* + * The firing flag is -1 if we collided with a reset + * of the timer, which already reported this + * almost-firing as an overrun. So don't generate an event. + */ +- if (likely(firing >= 0)) { ++ if (likely(__firing >= 0)) { + cpu_timer_fire(timer); + } + spin_unlock(&timer->it_lock); +diff -urNp a/kernel/power/poweroff.c b/kernel/power/poweroff.c +--- a/kernel/power/poweroff.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/kernel/power/poweroff.c 2009-05-24 18:10:25.323960419 -0700 +@@ -37,7 +37,7 @@ static struct sysrq_key_op sysrq_powerof + .enable_mask = SYSRQ_ENABLE_BOOT, + }; + +-static int pm_sysrq_init(void) ++static int __init pm_sysrq_init(void) + { + register_sysrq_key('o', &sysrq_poweroff_op); + return 0; +diff -urNp a/kernel/printk.c b/kernel/printk.c +--- a/kernel/printk.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/kernel/printk.c 2009-05-24 18:10:25.324959779 -0700 +@@ -254,6 +254,11 @@ int do_syslog(int type, char __user *buf + char c; + int error = 0; + ++#ifdef CONFIG_GRKERNSEC_DMESG ++ if (grsec_enable_dmesg && !capable(CAP_SYS_ADMIN)) ++ return -EPERM; ++#endif ++ + error = security_syslog(type); + if (error) + return error; +diff -urNp a/kernel/ptrace.c b/kernel/ptrace.c +--- a/kernel/ptrace.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/kernel/ptrace.c 2009-05-24 18:10:25.325959348 -0700 +@@ -132,12 +132,12 @@ int __ptrace_may_access(struct task_stru + (current->uid != task->uid) || + (current->gid != task->egid) || + (current->gid != task->sgid) || +- (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE)) ++ (current->gid != task->gid)) && !capable_nolog(CAP_SYS_PTRACE)) + return -EPERM; + smp_rmb(); + if (task->mm) + dumpable = get_dumpable(task->mm); +- if (!dumpable && !capable(CAP_SYS_PTRACE)) ++ if (!dumpable && !capable_nolog(CAP_SYS_PTRACE)) + return -EPERM; + + return security_ptrace_may_access(task, mode); +@@ -193,7 +193,7 @@ repeat: + + /* Go */ + task->ptrace |= PT_PTRACED; +- if (capable(CAP_SYS_PTRACE)) ++ if (capable_nolog(CAP_SYS_PTRACE)) + task->ptrace |= PT_PTRACE_CAP; + + __ptrace_link(task, current); +@@ -582,6 +582,11 @@ SYSCALL_DEFINE4(ptrace, long, request, l + if (ret < 0) + goto out_put_task_struct; + ++ if (gr_handle_ptrace(child, request)) { ++ ret = -EPERM; ++ goto out_put_task_struct; ++ } ++ + ret = arch_ptrace(child, request, addr, data); + if (ret < 0) + goto out_put_task_struct; +diff -urNp a/kernel/relay.c b/kernel/relay.c +--- a/kernel/relay.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/kernel/relay.c 2009-05-24 18:10:25.326959616 -0700 +@@ -1292,7 +1292,7 @@ static int subbuf_splice_actor(struct fi + return 0; + + ret = *nonpad_ret = splice_to_pipe(pipe, &spd); +- if (ret < 0 || ret < total_len) ++ if ((int)ret < 0 || ret < total_len) + return ret; + + if (read_start + ret == nonpad_end) +diff -urNp a/kernel/resource.c b/kernel/resource.c +--- a/kernel/resource.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/kernel/resource.c 2009-05-24 18:10:25.327959395 -0700 +@@ -132,8 +132,18 @@ static const struct file_operations proc + + static int __init ioresources_init(void) + { ++#ifdef CONFIG_GRKERNSEC_PROC_ADD ++#ifdef CONFIG_GRKERNSEC_PROC_USER ++ proc_create("ioports", S_IRUSR, NULL, &proc_ioports_operations); ++ proc_create("iomem", S_IRUSR, NULL, &proc_iomem_operations); ++#elif defined(CONFIG_GRKERNSEC_PROC_USERGROUP) ++ proc_create("ioports", S_IRUSR | S_IRGRP, NULL, &proc_ioports_operations); ++ proc_create("iomem", S_IRUSR | S_IRGRP, NULL, &proc_iomem_operations); ++#endif ++#else + proc_create("ioports", 0, NULL, &proc_ioports_operations); + proc_create("iomem", 0, NULL, &proc_iomem_operations); ++#endif + return 0; + } + __initcall(ioresources_init); +diff -urNp a/kernel/sched.c b/kernel/sched.c +--- a/kernel/sched.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/kernel/sched.c 2009-05-24 18:10:25.330959500 -0700 +@@ -5094,7 +5094,8 @@ SYSCALL_DEFINE1(nice, int, increment) + if (nice > 19) + nice = 19; + +- if (increment < 0 && !can_nice(current, nice)) ++ if (increment < 0 && (!can_nice(current, nice) || ++ gr_handle_chroot_nice())) + return -EPERM; + + retval = security_task_setnice(current, nice); +@@ -6359,7 +6360,7 @@ static struct ctl_table sd_ctl_dir[] = { + .procname = "sched_domain", + .mode = 0555, + }, +- {0, }, ++ { 0, NULL, NULL, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL } + }; + + static struct ctl_table sd_ctl_root[] = { +@@ -6369,7 +6370,7 @@ static struct ctl_table sd_ctl_root[] = + .mode = 0555, + .child = sd_ctl_dir, + }, +- {0, }, ++ { 0, NULL, NULL, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL } + }; + + static struct ctl_table *sd_alloc_ctl_entry(int n) +diff -urNp a/kernel/signal.c b/kernel/signal.c +--- a/kernel/signal.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/kernel/signal.c 2009-05-24 18:10:25.331960746 -0700 +@@ -596,6 +596,9 @@ static int check_kill_permission(int sig + } + } + ++ if (gr_handle_signal(t, sig)) ++ return -EPERM; ++ + return security_task_kill(t, info, sig, 0); + } + +@@ -887,8 +890,8 @@ static void print_fatal_signal(struct pt + for (i = 0; i < 16; i++) { + unsigned char insn; + +- __get_user(insn, (unsigned char *)(regs->ip + i)); +- printk("%02x ", insn); ++ if (!get_user(insn, (unsigned char __user *)(regs->ip + i))) ++ printk("%02x ", insn); + } + } + #endif +@@ -911,7 +914,7 @@ __group_send_sig_info(int sig, struct si + return send_signal(sig, info, p, 1); + } + +-static int ++int + specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t) + { + return send_signal(sig, info, t, 0); +@@ -951,6 +954,9 @@ force_sig_info(int sig, struct siginfo * + ret = specific_send_sig_info(sig, info, t); + spin_unlock_irqrestore(&t->sighand->siglock, flags); + ++ gr_log_signal(sig, t); ++ gr_handle_crash(t, sig); ++ + return ret; + } + +@@ -1021,6 +1027,8 @@ int group_send_sig_info(int sig, struct + ret = __group_send_sig_info(sig, info, p); + unlock_task_sighand(p, &flags); + } ++ if (!ret) ++ gr_log_signal(sig, p); + } + + return ret; +diff -urNp a/kernel/softirq.c b/kernel/softirq.c +--- a/kernel/softirq.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/kernel/softirq.c 2009-05-24 18:10:25.333141820 -0700 +@@ -463,9 +463,9 @@ void tasklet_kill(struct tasklet_struct + printk("Attempt to kill tasklet from interrupt\n"); + + while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) { +- do ++ do { + yield(); +- while (test_bit(TASKLET_STATE_SCHED, &t->state)); ++ } while (test_bit(TASKLET_STATE_SCHED, &t->state)); + } + tasklet_unlock_wait(t); + clear_bit(TASKLET_STATE_SCHED, &t->state); +diff -urNp a/kernel/sys.c b/kernel/sys.c +--- a/kernel/sys.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/kernel/sys.c 2009-05-24 18:10:25.333968963 -0700 +@@ -125,6 +125,12 @@ static int set_one_prio(struct task_stru + error = -EACCES; + goto out; + } ++ ++ if (gr_handle_chroot_setpriority(p, niceval)) { ++ error = -EACCES; ++ goto out; ++ } ++ + no_nice = security_task_setnice(p, niceval); + if (no_nice) { + error = no_nice; +@@ -181,10 +187,10 @@ SYSCALL_DEFINE3(setpriority, int, which, + if ((who != current->uid) && !(user = find_user(who))) + goto out_unlock; /* No processes for this user */ + +- do_each_thread(g, p) ++ do_each_thread(g, p) { + if (p->uid == who) + error = set_one_prio(p, niceval, error); +- while_each_thread(g, p); ++ } while_each_thread(g, p); + if (who != current->uid) + free_uid(user); /* For find_user() */ + break; +@@ -243,13 +249,13 @@ SYSCALL_DEFINE2(getpriority, int, which, + if ((who != current->uid) && !(user = find_user(who))) + goto out_unlock; /* No processes for this user */ + +- do_each_thread(g, p) ++ do_each_thread(g, p) { + if (p->uid == who) { + niceval = 20 - task_nice(p); + if (niceval > retval) + retval = niceval; + } +- while_each_thread(g, p); ++ } while_each_thread(g, p); + if (who != current->uid) + free_uid(user); /* for find_user() */ + break; +@@ -500,6 +506,10 @@ SYSCALL_DEFINE2(setregid, gid_t, rgid, g + else + return -EPERM; + } ++ ++ if (gr_check_group_change(new_rgid, new_egid, -1)) ++ return -EPERM; ++ + if (new_egid != old_egid) { + set_dumpable(current->mm, suid_dumpable); + smp_wmb(); +@@ -507,6 +517,9 @@ SYSCALL_DEFINE2(setregid, gid_t, rgid, g + if (rgid != (gid_t) -1 || + (egid != (gid_t) -1 && egid != old_rgid)) + current->sgid = new_egid; ++ ++ gr_set_role_label(current, current->uid, new_rgid); ++ + current->fsgid = new_egid; + current->egid = new_egid; + current->gid = new_rgid; +@@ -529,11 +542,17 @@ SYSCALL_DEFINE1(setgid, gid_t, gid) + if (retval) + return retval; + ++ if (gr_check_group_change(gid, gid, gid)) ++ return -EPERM; ++ + if (capable(CAP_SETGID)) { + if (old_egid != gid) { + set_dumpable(current->mm, suid_dumpable); + smp_wmb(); + } ++ ++ gr_set_role_label(current, current->uid, gid); ++ + current->gid = current->egid = current->sgid = current->fsgid = gid; + } else if ((gid == current->gid) || (gid == current->sgid)) { + if (old_egid != gid) { +@@ -571,6 +590,9 @@ static int set_user(uid_t new_ruid, int + set_dumpable(current->mm, suid_dumpable); + smp_wmb(); + } ++ ++ gr_set_role_label(current, new_ruid, current->gid); ++ + current->uid = new_ruid; + return 0; + } +@@ -620,6 +642,9 @@ SYSCALL_DEFINE2(setreuid, uid_t, ruid, u + return -EPERM; + } + ++ if (gr_check_user_change(new_ruid, new_euid, -1)) ++ return -EPERM; ++ + if (new_ruid != old_ruid && set_user(new_ruid, new_euid != old_euid) < 0) + return -EAGAIN; + +@@ -666,6 +691,12 @@ SYSCALL_DEFINE1(setuid, uid_t, uid) + old_suid = current->suid; + new_suid = old_suid; + ++ if (gr_check_crash_uid(uid)) ++ return -EPERM; ++ ++ if (gr_check_user_change(uid, uid, uid)) ++ return -EPERM; ++ + if (capable(CAP_SETUID)) { + if (uid != old_ruid && set_user(uid, old_euid != uid) < 0) + return -EAGAIN; +@@ -713,6 +744,10 @@ SYSCALL_DEFINE3(setresuid, uid_t, ruid, + (suid != current->euid) && (suid != current->suid)) + return -EPERM; + } ++ ++ if (gr_check_user_change(ruid, euid, -1)) ++ return -EPERM; ++ + if (ruid != (uid_t) -1) { + if (ruid != current->uid && set_user(ruid, euid != current->euid) < 0) + return -EAGAIN; +@@ -767,6 +802,10 @@ SYSCALL_DEFINE3(setresgid, gid_t, rgid, + (sgid != current->egid) && (sgid != current->sgid)) + return -EPERM; + } ++ ++ if (gr_check_group_change(rgid, egid, -1)) ++ return -EPERM; ++ + if (egid != (gid_t) -1) { + if (egid != current->egid) { + set_dumpable(current->mm, suid_dumpable); +@@ -775,8 +814,10 @@ SYSCALL_DEFINE3(setresgid, gid_t, rgid, + current->egid = egid; + } + current->fsgid = current->egid; +- if (rgid != (gid_t) -1) ++ if (rgid != (gid_t) -1) { ++ gr_set_role_label(current, current->uid, rgid); + current->gid = rgid; ++ } + if (sgid != (gid_t) -1) + current->sgid = sgid; + +@@ -811,6 +852,9 @@ SYSCALL_DEFINE1(setfsuid, uid_t, uid) + if (security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS)) + return old_fsuid; + ++ if (gr_check_user_change(-1, -1, uid)) ++ return old_fsuid; ++ + if (uid == current->uid || uid == current->euid || + uid == current->suid || uid == current->fsuid || + capable(CAP_SETUID)) { +@@ -843,6 +887,9 @@ SYSCALL_DEFINE1(setfsgid, gid_t, gid) + if (gid == current->gid || gid == current->egid || + gid == current->sgid || gid == current->fsgid || + capable(CAP_SETGID)) { ++ if (gr_check_group_change(-1, -1, gid)) ++ return old_fsgid; ++ + if (gid != old_fsgid) { + set_dumpable(current->mm, suid_dumpable); + smp_wmb(); +@@ -914,7 +961,10 @@ SYSCALL_DEFINE2(setpgid, pid_t, pid, pid + write_lock_irq(&tasklist_lock); + + err = -ESRCH; +- p = find_task_by_vpid(pid); ++ /* grsec: replaced find_task_by_vpid with equivalent call which ++ lacks the chroot restriction ++ */ ++ p = pid_task(find_pid_ns(pid, current->nsproxy->pid_ns), PIDTYPE_PID); + if (!p) + goto out; + +@@ -1640,7 +1690,7 @@ SYSCALL_DEFINE5(prctl, int, option, unsi + error = get_dumpable(current->mm); + break; + case PR_SET_DUMPABLE: +- if (arg2 < 0 || arg2 > 1) { ++ if (arg2 > 1) { + error = -EINVAL; + break; + } +diff -urNp a/kernel/sysctl.c b/kernel/sysctl.c +--- a/kernel/sysctl.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/kernel/sysctl.c 2009-05-24 18:10:25.336211971 -0700 +@@ -61,6 +61,13 @@ + static int deprecated_sysctl_warning(struct __sysctl_args *args); + + #if defined(CONFIG_SYSCTL) ++#include <linux/grsecurity.h> ++#include <linux/grinternal.h> ++ ++extern __u32 gr_handle_sysctl(const ctl_table *table, const int op); ++extern int gr_handle_sysctl_mod(const char *dirname, const char *name, ++ const int op); ++extern int gr_handle_chroot_sysctl(const int op); + + /* External variables not in a header file. */ + extern int C_A_D; +@@ -152,6 +159,7 @@ static int proc_do_cad_pid(struct ctl_ta + static int proc_taint(struct ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos); + #endif ++extern ctl_table grsecurity_table[]; + + static struct ctl_table root_table[]; + static struct ctl_table_root sysctl_table_root; +@@ -184,6 +192,21 @@ extern struct ctl_table epoll_table[]; + int sysctl_legacy_va_layout; + #endif + ++#ifdef CONFIG_PAX_SOFTMODE ++static ctl_table pax_table[] = { ++ { ++ .ctl_name = CTL_UNNUMBERED, ++ .procname = "softmode", ++ .data = &pax_softmode, ++ .maxlen = sizeof(unsigned int), ++ .mode = 0600, ++ .proc_handler = &proc_dointvec, ++ }, ++ ++ { .ctl_name = 0 } ++}; ++#endif ++ + extern int prove_locking; + extern int lock_stat; + +@@ -856,6 +879,25 @@ static struct ctl_table kern_table[] = { + .proc_handler = &scan_unevictable_handler, + }, + #endif ++ ++#if defined(CONFIG_GRKERNSEC_SYSCTL) || defined(CONFIG_GRKERNSEC_MODSTOP) ++ { ++ .ctl_name = CTL_UNNUMBERED, ++ .procname = "grsecurity", ++ .mode = 0500, ++ .child = grsecurity_table, ++ }, ++#endif ++ ++#ifdef CONFIG_PAX_SOFTMODE ++ { ++ .ctl_name = CTL_UNNUMBERED, ++ .procname = "pax", ++ .mode = 0500, ++ .child = pax_table, ++ }, ++#endif ++ + /* + * NOTE: do not add new entries to this table unless you have read + * Documentation/sysctl/ctl_unnumbered.txt +@@ -1562,6 +1604,8 @@ static int do_sysctl_strategy(struct ctl + return 0; + } + ++static int sysctl_perm_nochk(struct ctl_table_root *root, struct ctl_table *table, int op); ++ + static int parse_table(int __user *name, int nlen, + void __user *oldval, size_t __user *oldlenp, + void __user *newval, size_t newlen, +@@ -1580,7 +1624,7 @@ repeat: + if (n == table->ctl_name) { + int error; + if (table->child) { +- if (sysctl_perm(root, table, MAY_EXEC)) ++ if (sysctl_perm_nochk(root, table, MAY_EXEC)) + return -EPERM; + name++; + nlen--; +@@ -1665,6 +1709,33 @@ int sysctl_perm(struct ctl_table_root *r + int error; + int mode; + ++ if (table->parent != NULL && table->parent->procname != NULL && ++ table->procname != NULL && ++ gr_handle_sysctl_mod(table->parent->procname, table->procname, op)) ++ return -EACCES; ++ if (gr_handle_chroot_sysctl(op)) ++ return -EACCES; ++ error = gr_handle_sysctl(table, op); ++ if (error) ++ return error; ++ ++ error = security_sysctl(table, op & (MAY_READ | MAY_WRITE | MAY_EXEC)); ++ if (error) ++ return error; ++ ++ if (root->permissions) ++ mode = root->permissions(root, current->nsproxy, table); ++ else ++ mode = table->mode; ++ ++ return test_perm(mode, op); ++} ++ ++int sysctl_perm_nochk(struct ctl_table_root *root, struct ctl_table *table, int op) ++{ ++ int error; ++ int mode; ++ + error = security_sysctl(table, op & (MAY_READ | MAY_WRITE | MAY_EXEC)); + if (error) + return error; +diff -urNp a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c +--- a/kernel/time/tick-broadcast.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/kernel/time/tick-broadcast.c 2009-05-24 18:10:25.336989531 -0700 +@@ -114,7 +114,7 @@ int tick_device_uses_broadcast(struct cl + * then clear the broadcast bit. + */ + if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) { +- int cpu = smp_processor_id(); ++ cpu = smp_processor_id(); + + cpu_clear(cpu, tick_broadcast_mask); + tick_broadcast_clear_oneshot(cpu); +diff -urNp a/kernel/time.c b/kernel/time.c +--- a/kernel/time.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/kernel/time.c 2009-05-24 18:10:25.338083449 -0700 +@@ -92,6 +92,9 @@ SYSCALL_DEFINE1(stime, time_t __user *, + return err; + + do_settimeofday(&tv); ++ ++ gr_log_timechange(); ++ + return 0; + } + +@@ -200,6 +203,8 @@ SYSCALL_DEFINE2(settimeofday, struct tim + return -EFAULT; + } + ++ gr_log_timechange(); ++ + return do_sys_settimeofday(tv ? &new_ts : NULL, tz ? &new_tz : NULL); + } + +@@ -238,7 +243,7 @@ EXPORT_SYMBOL(current_fs_time); + * Avoid unnecessary multiplications/divisions in the + * two most common HZ cases: + */ +-unsigned int inline jiffies_to_msecs(const unsigned long j) ++inline unsigned int jiffies_to_msecs(const unsigned long j) + { + #if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ) + return (MSEC_PER_SEC / HZ) * j; +@@ -254,7 +259,7 @@ unsigned int inline jiffies_to_msecs(con + } + EXPORT_SYMBOL(jiffies_to_msecs); + +-unsigned int inline jiffies_to_usecs(const unsigned long j) ++inline unsigned int jiffies_to_usecs(const unsigned long j) + { + #if HZ <= USEC_PER_SEC && !(USEC_PER_SEC % HZ) + return (USEC_PER_SEC / HZ) * j; +diff -urNp a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c +--- a/kernel/utsname_sysctl.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/kernel/utsname_sysctl.c 2009-05-24 18:10:25.338083449 -0700 +@@ -123,7 +123,7 @@ static struct ctl_table uts_kern_table[] + .proc_handler = proc_do_uts_string, + .strategy = sysctl_uts_string, + }, +- {} ++ { 0, NULL, NULL, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL } + }; + + static struct ctl_table uts_root_table[] = { +@@ -133,7 +133,7 @@ static struct ctl_table uts_root_table[] + .mode = 0555, + .child = uts_kern_table, + }, +- {} ++ { 0, NULL, NULL, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL } + }; + + static int __init utsname_sysctl_init(void) +diff -urNp a/lib/radix-tree.c b/lib/radix-tree.c +--- a/lib/radix-tree.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/lib/radix-tree.c 2009-05-24 18:10:25.339179812 -0700 +@@ -81,7 +81,7 @@ struct radix_tree_preload { + int nr; + struct radix_tree_node *nodes[RADIX_TREE_MAX_PATH]; + }; +-DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; ++DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads); + + static inline gfp_t root_gfp_mask(struct radix_tree_root *root) + { +diff -urNp a/localversion-grsec b/localversion-grsec +--- a/localversion-grsec 1969-12-31 16:00:00.000000000 -0800 ++++ b/localversion-grsec 2009-05-24 18:10:25.339179812 -0700 +@@ -0,0 +1 @@ ++-grsec +diff -urNp a/mm/filemap.c b/mm/filemap.c +--- a/mm/filemap.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/mm/filemap.c 2009-05-24 18:10:25.340119811 -0700 +@@ -1609,7 +1609,7 @@ int generic_file_mmap(struct file * file + struct address_space *mapping = file->f_mapping; + + if (!mapping->a_ops->readpage) +- return -ENOEXEC; ++ return -ENODEV; + file_accessed(file); + vma->vm_ops = &generic_file_vm_ops; + vma->vm_flags |= VM_CAN_NONLINEAR; +@@ -1970,6 +1970,7 @@ inline int generic_write_checks(struct f + *pos = i_size_read(inode); + + if (limit != RLIM_INFINITY) { ++ gr_learn_resource(current, RLIMIT_FSIZE,*pos, 0); + if (*pos >= limit) { + send_sig(SIGXFSZ, current, 0); + return -EFBIG; +diff -urNp a/mm/fremap.c b/mm/fremap.c +--- a/mm/fremap.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/mm/fremap.c 2009-05-24 18:10:25.341118891 -0700 +@@ -153,6 +153,11 @@ SYSCALL_DEFINE5(remap_file_pages, unsign + retry: + vma = find_vma(mm, start); + ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (vma && (mm->pax_flags & MF_PAX_SEGMEXEC) && (vma->vm_flags & VM_MAYEXEC)) ++ goto out; ++#endif ++ + /* + * Make sure the vma is shared, that it supports prefaulting, + * and that the remapped range is valid and fully within +diff -urNp a/mm/hugetlb.c b/mm/hugetlb.c +--- a/mm/hugetlb.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/mm/hugetlb.c 2009-05-24 18:10:25.342211553 -0700 +@@ -1832,6 +1832,26 @@ static int unmap_ref_private(struct mm_s + return 1; + } + ++#ifdef CONFIG_PAX_SEGMEXEC ++static void pax_mirror_huge_pte(struct vm_area_struct *vma, unsigned long address, struct page *page_m) ++{ ++ struct mm_struct *mm = vma->vm_mm; ++ struct vm_area_struct *vma_m; ++ unsigned long address_m; ++ pte_t *ptep_m; ++ ++ vma_m = pax_find_mirror_vma(vma); ++ if (!vma_m) ++ return; ++ ++ BUG_ON(address >= SEGMEXEC_TASK_SIZE); ++ address_m = address + SEGMEXEC_TASK_SIZE; ++ ptep_m = huge_pte_offset(mm, address_m & HPAGE_MASK); ++ get_page(page_m); ++ set_huge_pte_at(mm, address_m, ptep_m, make_huge_pte(vma_m, page_m, 0)); ++} ++#endif ++ + static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long address, pte_t *ptep, pte_t pte, + struct page *pagecache_page) +@@ -1903,6 +1923,11 @@ retry_avoidcopy: + huge_ptep_clear_flush(vma, address, ptep); + set_huge_pte_at(mm, address, ptep, + make_huge_pte(vma, new_page, 1)); ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ pax_mirror_huge_pte(vma, address, new_page); ++#endif ++ + /* Make the old page be freed below */ + new_page = old_page; + } +@@ -2012,6 +2037,10 @@ retry: + && (vma->vm_flags & VM_SHARED))); + set_huge_pte_at(mm, address, ptep, new_pte); + ++#ifdef CONFIG_PAX_SEGMEXEC ++ pax_mirror_huge_pte(vma, address, page); ++#endif ++ + if (write_access && !(vma->vm_flags & VM_SHARED)) { + /* Optimization, do the COW without a second fault */ + ret = hugetlb_cow(mm, vma, address, ptep, new_pte, page); +@@ -2040,6 +2069,28 @@ int hugetlb_fault(struct mm_struct *mm, + static DEFINE_MUTEX(hugetlb_instantiation_mutex); + struct hstate *h = hstate_vma(vma); + ++#ifdef CONFIG_PAX_SEGMEXEC ++ struct vm_area_struct *vma_m; ++ ++ vma_m = pax_find_mirror_vma(vma); ++ if (vma_m) { ++ unsigned long address_m; ++ ++ if (vma->vm_start > vma_m->vm_start) { ++ address_m = address; ++ address -= SEGMEXEC_TASK_SIZE; ++ vma = vma_m; ++ h = hstate_vma(vma); ++ } else ++ address_m = address + SEGMEXEC_TASK_SIZE; ++ ++ if (!huge_pte_alloc(mm, address_m, huge_page_size(h))) ++ return VM_FAULT_OOM; ++ address_m &= HPAGE_MASK; ++ unmap_hugepage_range(vma, address_m, address_m + HPAGE_SIZE, NULL); ++ } ++#endif ++ + ptep = huge_pte_alloc(mm, address, huge_page_size(h)); + if (!ptep) + return VM_FAULT_OOM; +diff -urNp a/mm/madvise.c b/mm/madvise.c +--- a/mm/madvise.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/mm/madvise.c 2009-05-24 18:10:25.343212100 -0700 +@@ -43,6 +43,10 @@ static long madvise_behavior(struct vm_a + pgoff_t pgoff; + int new_flags = vma->vm_flags; + ++#ifdef CONFIG_PAX_SEGMEXEC ++ struct vm_area_struct *vma_m; ++#endif ++ + switch (behavior) { + case MADV_NORMAL: + new_flags = new_flags & ~VM_RAND_READ & ~VM_SEQ_READ; +@@ -92,6 +96,13 @@ success: + /* + * vm_flags is protected by the mmap_sem held in write mode. + */ ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ vma_m = pax_find_mirror_vma(vma); ++ if (vma_m) ++ vma_m->vm_flags = new_flags & ~(VM_WRITE | VM_MAYWRITE | VM_ACCOUNT); ++#endif ++ + vma->vm_flags = new_flags; + + out: +@@ -236,6 +247,17 @@ madvise_vma(struct vm_area_struct *vma, + + case MADV_DONTNEED: + error = madvise_dontneed(vma, prev, start, end); ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (!error) { ++ struct vm_area_struct *vma_m, *prev_m; ++ ++ vma_m = pax_find_mirror_vma(vma); ++ if (vma_m) ++ error = madvise_dontneed(vma_m, &prev_m, start + SEGMEXEC_TASK_SIZE, end + SEGMEXEC_TASK_SIZE); ++ } ++#endif ++ + break; + + default: +@@ -308,6 +330,16 @@ SYSCALL_DEFINE3(madvise, unsigned long, + if (end < start) + goto out; + ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (current->mm->pax_flags & MF_PAX_SEGMEXEC) { ++ if (end > SEGMEXEC_TASK_SIZE) ++ goto out; ++ } else ++#endif ++ ++ if (end > TASK_SIZE) ++ goto out; ++ + error = 0; + if (end == start) + goto out; +diff -urNp a/mm/memory.c b/mm/memory.c +--- a/mm/memory.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/mm/memory.c 2009-05-24 18:10:25.344336327 -0700 +@@ -47,6 +47,7 @@ + #include <linux/pagemap.h> + #include <linux/rmap.h> + #include <linux/module.h> ++#include <linux/security.h> + #include <linux/delayacct.h> + #include <linux/init.h> + #include <linux/writeback.h> +@@ -1151,11 +1152,11 @@ int __get_user_pages(struct task_struct + vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE); + i = 0; + +- do { ++ while (len) { + struct vm_area_struct *vma; + unsigned int foll_flags; + +- vma = find_extend_vma(mm, start); ++ vma = find_vma(mm, start); + if (!vma && in_gate_area(tsk, start)) { + unsigned long pg = start & PAGE_MASK; + struct vm_area_struct *gate_vma = get_gate_vma(tsk); +@@ -1197,7 +1198,7 @@ int __get_user_pages(struct task_struct + continue; + } + +- if (!vma || ++ if (!vma || start < vma->vm_start || + (vma->vm_flags & (VM_IO | VM_PFNMAP)) || + (!ignore && !(vm_flags & vma->vm_flags))) + return i ? : -EFAULT; +@@ -1271,7 +1272,7 @@ int __get_user_pages(struct task_struct + start += PAGE_SIZE; + len--; + } while (len && start < vma->vm_end); +- } while (len); ++ } + return i; + } + +@@ -1760,6 +1761,186 @@ static inline void cow_user_page(struct + copy_user_highpage(dst, src, va, vma); + } + ++#ifdef CONFIG_PAX_SEGMEXEC ++static void pax_unmap_mirror_pte(struct vm_area_struct *vma, unsigned long address, pmd_t *pmd) ++{ ++ struct mm_struct *mm = vma->vm_mm; ++ spinlock_t *ptl; ++ pte_t *pte, entry; ++ ++ pte = pte_offset_map_lock(mm, pmd, address, &ptl); ++ entry = *pte; ++ if (!pte_present(entry)) { ++ if (!pte_none(entry)) { ++ BUG_ON(pte_file(entry)); ++ free_swap_and_cache(pte_to_swp_entry(entry)); ++ pte_clear_not_present_full(mm, address, pte, 0); ++ } ++ } else { ++ struct page *page; ++ ++ flush_cache_page(vma, address, pte_pfn(entry)); ++ entry = ptep_clear_flush(vma, address, pte); ++ BUG_ON(pte_dirty(entry)); ++ page = vm_normal_page(vma, address, entry); ++ if (page) { ++ update_hiwater_rss(mm); ++ if (PageAnon(page)) ++ dec_mm_counter(mm, anon_rss); ++ else ++ dec_mm_counter(mm, file_rss); ++ page_remove_rmap(page, vma); ++ page_cache_release(page); ++ } ++ } ++ pte_unmap_unlock(pte, ptl); ++} ++ ++/* PaX: if vma is mirrored, synchronize the mirror's PTE ++ * ++ * the ptl of the lower mapped page is held on entry and is not released on exit ++ * or inside to ensure atomic changes to the PTE states (swapout, mremap, munmap, etc) ++ */ ++static void pax_mirror_anon_pte(struct vm_area_struct *vma, unsigned long address, struct page *page_m, spinlock_t *ptl) ++{ ++ struct mm_struct *mm = vma->vm_mm; ++ unsigned long address_m; ++ spinlock_t *ptl_m; ++ struct vm_area_struct *vma_m; ++ pmd_t *pmd_m; ++ pte_t *pte_m, entry_m; ++ ++ BUG_ON(!page_m || !PageAnon(page_m)); ++ ++ vma_m = pax_find_mirror_vma(vma); ++ if (!vma_m) ++ return; ++ ++ BUG_ON(!PageLocked(page_m)); ++ BUG_ON(address >= SEGMEXEC_TASK_SIZE); ++ address_m = address + SEGMEXEC_TASK_SIZE; ++ pmd_m = pmd_offset(pud_offset(pgd_offset(mm, address_m), address_m), address_m); ++ pte_m = pte_offset_map_nested(pmd_m, address_m); ++ ptl_m = pte_lockptr(mm, pmd_m); ++ if (ptl != ptl_m) { ++ spin_lock_nested(ptl_m, SINGLE_DEPTH_NESTING); ++ if (!pte_none(*pte_m)) ++ goto out; ++ } ++ ++ entry_m = pfn_pte(page_to_pfn(page_m), vma_m->vm_page_prot); ++ page_cache_get(page_m); ++ page_add_anon_rmap(page_m, vma_m, address_m); ++ inc_mm_counter(mm, anon_rss); ++ set_pte_at(mm, address_m, pte_m, entry_m); ++ update_mmu_cache(vma_m, address_m, entry_m); ++out: ++ if (ptl != ptl_m) ++ spin_unlock(ptl_m); ++ pte_unmap_nested(pte_m); ++ unlock_page(page_m); ++} ++ ++void pax_mirror_file_pte(struct vm_area_struct *vma, unsigned long address, struct page *page_m, spinlock_t *ptl) ++{ ++ struct mm_struct *mm = vma->vm_mm; ++ unsigned long address_m; ++ spinlock_t *ptl_m; ++ struct vm_area_struct *vma_m; ++ pmd_t *pmd_m; ++ pte_t *pte_m, entry_m; ++ ++ BUG_ON(!page_m || PageAnon(page_m)); ++ ++ vma_m = pax_find_mirror_vma(vma); ++ if (!vma_m) ++ return; ++ ++ BUG_ON(address >= SEGMEXEC_TASK_SIZE); ++ address_m = address + SEGMEXEC_TASK_SIZE; ++ pmd_m = pmd_offset(pud_offset(pgd_offset(mm, address_m), address_m), address_m); ++ pte_m = pte_offset_map_nested(pmd_m, address_m); ++ ptl_m = pte_lockptr(mm, pmd_m); ++ if (ptl != ptl_m) { ++ spin_lock_nested(ptl_m, SINGLE_DEPTH_NESTING); ++ if (!pte_none(*pte_m)) ++ goto out; ++ } ++ ++ entry_m = pfn_pte(page_to_pfn(page_m), vma_m->vm_page_prot); ++ page_cache_get(page_m); ++ page_add_file_rmap(page_m); ++ inc_mm_counter(mm, file_rss); ++ set_pte_at(mm, address_m, pte_m, entry_m); ++ update_mmu_cache(vma_m, address_m, entry_m); ++out: ++ if (ptl != ptl_m) ++ spin_unlock(ptl_m); ++ pte_unmap_nested(pte_m); ++} ++ ++static void pax_mirror_pfn_pte(struct vm_area_struct *vma, unsigned long address, unsigned long pfn_m, spinlock_t *ptl) ++{ ++ struct mm_struct *mm = vma->vm_mm; ++ unsigned long address_m; ++ spinlock_t *ptl_m; ++ struct vm_area_struct *vma_m; ++ pmd_t *pmd_m; ++ pte_t *pte_m, entry_m; ++ ++ vma_m = pax_find_mirror_vma(vma); ++ if (!vma_m) ++ return; ++ ++ BUG_ON(address >= SEGMEXEC_TASK_SIZE); ++ address_m = address + SEGMEXEC_TASK_SIZE; ++ pmd_m = pmd_offset(pud_offset(pgd_offset(mm, address_m), address_m), address_m); ++ pte_m = pte_offset_map_nested(pmd_m, address_m); ++ ptl_m = pte_lockptr(mm, pmd_m); ++ if (ptl != ptl_m) { ++ spin_lock_nested(ptl_m, SINGLE_DEPTH_NESTING); ++ if (!pte_none(*pte_m)) ++ goto out; ++ } ++ ++ entry_m = pfn_pte(pfn_m, vma_m->vm_page_prot); ++ set_pte_at(mm, address_m, pte_m, entry_m); ++out: ++ if (ptl != ptl_m) ++ spin_unlock(ptl_m); ++ pte_unmap_nested(pte_m); ++} ++ ++static void pax_mirror_pte(struct vm_area_struct *vma, unsigned long address, pte_t *pte, pmd_t *pmd, spinlock_t *ptl) ++{ ++ struct page *page_m; ++ pte_t entry; ++ ++ if (!(vma->vm_mm->pax_flags & MF_PAX_SEGMEXEC)) ++ goto out; ++ ++ entry = *pte; ++ page_m = vm_normal_page(vma, address, entry); ++ if (!page_m) ++ pax_mirror_pfn_pte(vma, address, pte_pfn(entry), ptl); ++ else if (PageAnon(page_m)) { ++ if (pax_find_mirror_vma(vma)) { ++ pte_unmap_unlock(pte, ptl); ++ lock_page(page_m); ++ pte = pte_offset_map_lock(vma->vm_mm, pmd, address, &ptl); ++ if (pte_same(entry, *pte)) ++ pax_mirror_anon_pte(vma, address, page_m, ptl); ++ else ++ unlock_page(page_m); ++ } ++ } else ++ pax_mirror_file_pte(vma, address, page_m, ptl); ++ ++out: ++ pte_unmap_unlock(pte, ptl); ++} ++#endif ++ + /* + * This routine handles present pages, when users try to write + * to a shared page. It is done by copying the page to a new address +@@ -1897,6 +2078,12 @@ gotten: + */ + page_table = pte_offset_map_lock(mm, pmd, address, &ptl); + if (likely(pte_same(*page_table, orig_pte))) { ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (pax_find_mirror_vma(vma)) ++ BUG_ON(!trylock_page(new_page)); ++#endif ++ + if (old_page) { + if (!PageAnon(old_page)) { + dec_mm_counter(mm, file_rss); +@@ -1947,6 +2134,10 @@ gotten: + page_remove_rmap(old_page, vma); + } + ++#ifdef CONFIG_PAX_SEGMEXEC ++ pax_mirror_anon_pte(vma, address, new_page, ptl); ++#endif ++ + /* Free the old page.. */ + new_page = old_page; + ret |= VM_FAULT_WRITE; +@@ -2206,6 +2397,7 @@ int vmtruncate(struct inode * inode, lof + unsigned long limit; + + limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; ++ gr_learn_resource(current, RLIMIT_FSIZE, offset, 1); + if (limit != RLIM_INFINITY && offset > limit) + goto out_sig; + if (offset > inode->i_sb->s_maxbytes) +@@ -2357,6 +2549,11 @@ static int do_swap_page(struct mm_struct + swap_free(entry); + if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) + remove_exclusive_swap_page(page); ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (write_access || !pax_find_mirror_vma(vma)) ++#endif ++ + unlock_page(page); + + if (write_access) { +@@ -2368,6 +2565,11 @@ static int do_swap_page(struct mm_struct + + /* No need to invalidate - it was non-present before */ + update_mmu_cache(vma, address, pte); ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ pax_mirror_anon_pte(vma, address, page, ptl); ++#endif ++ + unlock: + pte_unmap_unlock(page_table, ptl); + out: +@@ -2412,6 +2614,12 @@ static int do_anonymous_page(struct mm_s + page_table = pte_offset_map_lock(mm, pmd, address, &ptl); + if (!pte_none(*page_table)) + goto release; ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (pax_find_mirror_vma(vma)) ++ BUG_ON(!trylock_page(page)); ++#endif ++ + inc_mm_counter(mm, anon_rss); + SetPageSwapBacked(page); + lru_cache_add_active_or_unevictable(page, vma); +@@ -2420,6 +2628,11 @@ static int do_anonymous_page(struct mm_s + + /* No need to invalidate - it was non-present before */ + update_mmu_cache(vma, address, entry); ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ pax_mirror_anon_pte(vma, address, page, ptl); ++#endif ++ + unlock: + pte_unmap_unlock(page_table, ptl); + return 0; +@@ -2556,6 +2769,12 @@ static int __do_fault(struct mm_struct * + */ + /* Only go through if we didn't race with anybody else... */ + if (likely(pte_same(*page_table, orig_pte))) { ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (anon && pax_find_mirror_vma(vma)) ++ BUG_ON(!trylock_page(page)); ++#endif ++ + flush_icache_page(vma, page); + entry = mk_pte(page, vma->vm_page_prot); + if (flags & FAULT_FLAG_WRITE) +@@ -2578,6 +2797,14 @@ static int __do_fault(struct mm_struct * + + /* no need to invalidate: a not-present page won't be cached */ + update_mmu_cache(vma, address, entry); ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (anon) ++ pax_mirror_anon_pte(vma, address, page, ptl); ++ else ++ pax_mirror_file_pte(vma, address, page, ptl); ++#endif ++ + } else { + if (charged) + mem_cgroup_uncharge_page(page); +@@ -2711,6 +2938,12 @@ static inline int handle_pte_fault(struc + if (write_access) + flush_tlb_page(vma, address); + } ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ pax_mirror_pte(vma, address, pte, pmd, ptl); ++ return 0; ++#endif ++ + unlock: + pte_unmap_unlock(pte, ptl); + return 0; +@@ -2727,6 +2960,10 @@ int handle_mm_fault(struct mm_struct *mm + pmd_t *pmd; + pte_t *pte; + ++#ifdef CONFIG_PAX_SEGMEXEC ++ struct vm_area_struct *vma_m; ++#endif ++ + __set_current_state(TASK_RUNNING); + + count_vm_event(PGFAULT); +@@ -2734,6 +2971,34 @@ int handle_mm_fault(struct mm_struct *mm + if (unlikely(is_vm_hugetlb_page(vma))) + return hugetlb_fault(mm, vma, address, write_access); + ++#ifdef CONFIG_PAX_SEGMEXEC ++ vma_m = pax_find_mirror_vma(vma); ++ if (vma_m) { ++ unsigned long address_m; ++ pgd_t *pgd_m; ++ pud_t *pud_m; ++ pmd_t *pmd_m; ++ ++ if (vma->vm_start > vma_m->vm_start) { ++ address_m = address; ++ address -= SEGMEXEC_TASK_SIZE; ++ vma = vma_m; ++ } else ++ address_m = address + SEGMEXEC_TASK_SIZE; ++ ++ pgd_m = pgd_offset(mm, address_m); ++ pud_m = pud_alloc(mm, pgd_m, address_m); ++ if (!pud_m) ++ return VM_FAULT_OOM; ++ pmd_m = pmd_alloc(mm, pud_m, address_m); ++ if (!pmd_m) ++ return VM_FAULT_OOM; ++ if (!pmd_present(*pmd_m) && __pte_alloc(mm, pmd_m, address_m)) ++ return VM_FAULT_OOM; ++ pax_unmap_mirror_pte(vma_m, address_m, pmd_m); ++ } ++#endif ++ + pgd = pgd_offset(mm, address); + pud = pud_alloc(mm, pgd, address); + if (!pud) +@@ -2831,7 +3096,7 @@ static int __init gate_vma_init(void) + gate_vma.vm_start = FIXADDR_USER_START; + gate_vma.vm_end = FIXADDR_USER_END; + gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC; +- gate_vma.vm_page_prot = __P101; ++ gate_vma.vm_page_prot = vm_get_page_prot(gate_vma.vm_flags); + /* + * Make sure the vDSO gets into every core dump. + * Dumping its contents makes post-mortem fully interpretable later +diff -urNp a/mm/mempolicy.c b/mm/mempolicy.c +--- a/mm/mempolicy.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/mm/mempolicy.c 2009-05-24 18:10:25.346336444 -0700 +@@ -551,6 +551,10 @@ static int mbind_range(struct vm_area_st + struct vm_area_struct *next; + int err; + ++#ifdef CONFIG_PAX_SEGMEXEC ++ struct vm_area_struct *vma_m; ++#endif ++ + err = 0; + for (; vma && vma->vm_start < end; vma = next) { + next = vma->vm_next; +@@ -562,6 +566,16 @@ static int mbind_range(struct vm_area_st + err = policy_vma(vma, new); + if (err) + break; ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ vma_m = pax_find_mirror_vma(vma); ++ if (vma_m) { ++ err = policy_vma(vma_m, new); ++ if (err) ++ break; ++ } ++#endif ++ + } + return err; + } +@@ -954,6 +968,17 @@ static long do_mbind(unsigned long start + + if (end < start) + return -EINVAL; ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (mm->pax_flags & MF_PAX_SEGMEXEC) { ++ if (end > SEGMEXEC_TASK_SIZE) ++ return -EINVAL; ++ } else ++#endif ++ ++ if (end > TASK_SIZE) ++ return -EINVAL; ++ + if (end == start) + return 0; + +diff -urNp a/mm/migrate.c b/mm/migrate.c +--- a/mm/migrate.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/mm/migrate.c 2009-05-24 18:10:25.346994583 -0700 +@@ -1139,7 +1139,7 @@ int migrate_vmas(struct mm_struct *mm, c + struct vm_area_struct *vma; + int err = 0; + +- for(vma = mm->mmap; vma->vm_next && !err; vma = vma->vm_next) { ++ for(vma = mm->mmap; vma && !err; vma = vma->vm_next) { + if (vma->vm_ops && vma->vm_ops->migrate) { + err = vma->vm_ops->migrate(vma, to, from, flags); + if (err) +diff -urNp a/mm/mlock.c b/mm/mlock.c +--- a/mm/mlock.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/mm/mlock.c 2009-05-24 18:10:25.346994583 -0700 +@@ -13,6 +13,7 @@ + #include <linux/pagemap.h> + #include <linux/mempolicy.h> + #include <linux/syscalls.h> ++#include <linux/security.h> + #include <linux/sched.h> + #include <linux/module.h> + #include <linux/rmap.h> +@@ -452,6 +453,17 @@ static int do_mlock(unsigned long start, + return -EINVAL; + if (end == start) + return 0; ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (current->mm->pax_flags & MF_PAX_SEGMEXEC) { ++ if (end > SEGMEXEC_TASK_SIZE) ++ return -EINVAL; ++ } else ++#endif ++ ++ if (end > TASK_SIZE) ++ return -EINVAL; ++ + vma = find_vma_prev(current->mm, start, &prev); + if (!vma || vma->vm_start > start) + return -ENOMEM; +@@ -511,6 +523,7 @@ SYSCALL_DEFINE2(mlock, unsigned long, st + lock_limit >>= PAGE_SHIFT; + + /* check against resource limits */ ++ gr_learn_resource(current, RLIMIT_MEMLOCK, (current->mm->locked_vm << PAGE_SHIFT) + len, 1); + if ((locked <= lock_limit) || capable(CAP_IPC_LOCK)) + error = do_mlock(start, len, 1); + up_write(¤t->mm->mmap_sem); +@@ -532,10 +545,10 @@ SYSCALL_DEFINE2(munlock, unsigned long, + static int do_mlockall(int flags) + { + struct vm_area_struct * vma, * prev = NULL; +- unsigned int def_flags = 0; ++ unsigned int def_flags = current->mm->def_flags & ~VM_LOCKED; + + if (flags & MCL_FUTURE) +- def_flags = VM_LOCKED; ++ def_flags |= VM_LOCKED; + current->mm->def_flags = def_flags; + if (flags == MCL_FUTURE) + goto out; +@@ -543,6 +556,12 @@ static int do_mlockall(int flags) + for (vma = current->mm->mmap; vma ; vma = prev->vm_next) { + unsigned int newflags; + ++#ifdef CONFIG_PAX_SEGMEXEC ++ if ((current->mm->pax_flags & MF_PAX_SEGMEXEC) && (vma->vm_start >= SEGMEXEC_TASK_SIZE)) ++ break; ++#endif ++ ++ BUG_ON(vma->vm_end > TASK_SIZE); + newflags = vma->vm_flags | VM_LOCKED; + if (!(flags & MCL_CURRENT)) + newflags &= ~VM_LOCKED; +@@ -574,6 +593,7 @@ SYSCALL_DEFINE1(mlockall, int, flags) + lock_limit >>= PAGE_SHIFT; + + ret = -ENOMEM; ++ gr_learn_resource(current, RLIMIT_MEMLOCK, current->mm->total_vm, 1); + if (!(flags & MCL_CURRENT) || (current->mm->total_vm <= lock_limit) || + capable(CAP_IPC_LOCK)) + ret = do_mlockall(flags); +diff -urNp a/mm/mmap.c b/mm/mmap.c +--- a/mm/mmap.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/mm/mmap.c 2009-05-24 18:10:25.350336747 -0700 +@@ -43,6 +43,16 @@ + #define arch_rebalance_pgtables(addr, len) (addr) + #endif + ++static inline void verify_mm_writelocked(struct mm_struct *mm) ++{ ++#if defined(CONFIG_DEBUG_VM) || defined(CONFIG_PAX) ++ if (unlikely(down_read_trylock(&mm->mmap_sem))) { ++ up_read(&mm->mmap_sem); ++ BUG(); ++ } ++#endif ++} ++ + static void unmap_region(struct mm_struct *mm, + struct vm_area_struct *vma, struct vm_area_struct *prev, + unsigned long start, unsigned long end); +@@ -68,16 +78,25 @@ static void unmap_region(struct mm_struc + * x: (no) no x: (no) yes x: (no) yes x: (yes) yes + * + */ +-pgprot_t protection_map[16] = { ++pgprot_t protection_map[16] __read_only = { + __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111, + __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111 + }; + + pgprot_t vm_get_page_prot(unsigned long vm_flags) + { +- return __pgprot(pgprot_val(protection_map[vm_flags & ++ pgprot_t prot = __pgprot(pgprot_val(protection_map[vm_flags & + (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) | + pgprot_val(arch_vm_get_page_prot(vm_flags))); ++ ++#if defined(CONFIG_PAX_PAGEEXEC) && defined(CONFIG_X86_32) ++ if (!nx_enabled && ++ (vm_flags & (VM_PAGEEXEC | VM_EXEC)) == VM_PAGEEXEC && ++ (vm_flags & (VM_READ | VM_WRITE))) ++ prot = __pgprot(pte_val(pte_exprotect(__pte(pgprot_val(prot))))); ++#endif ++ ++ return prot; + } + EXPORT_SYMBOL(vm_get_page_prot); + +@@ -233,6 +252,7 @@ static struct vm_area_struct *remove_vma + struct vm_area_struct *next = vma->vm_next; + + might_sleep(); ++ BUG_ON(vma->vm_mirror); + if (vma->vm_ops && vma->vm_ops->close) + vma->vm_ops->close(vma); + if (vma->vm_file) { +@@ -269,6 +289,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) + * not page aligned -Ram Gupta + */ + rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur; ++ gr_learn_resource(current, RLIMIT_DATA, (brk - mm->start_brk) + (mm->end_data - mm->start_data), 1); + if (rlim < RLIM_INFINITY && (brk - mm->start_brk) + + (mm->end_data - mm->start_data) > rlim) + goto out; +@@ -696,6 +717,12 @@ static int + can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags, + struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff) + { ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if ((vma->vm_mm->pax_flags & MF_PAX_SEGMEXEC) && vma->vm_start == SEGMEXEC_TASK_SIZE) ++ return 0; ++#endif ++ + if (is_mergeable_vma(vma, file, vm_flags) && + is_mergeable_anon_vma(anon_vma, vma->anon_vma)) { + if (vma->vm_pgoff == vm_pgoff) +@@ -715,6 +742,12 @@ static int + can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags, + struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff) + { ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if ((vma->vm_mm->pax_flags & MF_PAX_SEGMEXEC) && vma->vm_end == SEGMEXEC_TASK_SIZE) ++ return 0; ++#endif ++ + if (is_mergeable_vma(vma, file, vm_flags) && + is_mergeable_anon_vma(anon_vma, vma->anon_vma)) { + pgoff_t vm_pglen; +@@ -757,12 +790,19 @@ can_vma_merge_after(struct vm_area_struc + struct vm_area_struct *vma_merge(struct mm_struct *mm, + struct vm_area_struct *prev, unsigned long addr, + unsigned long end, unsigned long vm_flags, +- struct anon_vma *anon_vma, struct file *file, ++ struct anon_vma *anon_vma, struct file *file, + pgoff_t pgoff, struct mempolicy *policy) + { + pgoff_t pglen = (end - addr) >> PAGE_SHIFT; + struct vm_area_struct *area, *next; + ++#ifdef CONFIG_PAX_SEGMEXEC ++ unsigned long addr_m = addr + SEGMEXEC_TASK_SIZE, end_m = end + SEGMEXEC_TASK_SIZE; ++ struct vm_area_struct *area_m = NULL, *next_m = NULL, *prev_m = NULL; ++ ++ BUG_ON((mm->pax_flags & MF_PAX_SEGMEXEC) && SEGMEXEC_TASK_SIZE < end); ++#endif ++ + /* + * We later require that vma->vm_flags == vm_flags, + * so this tests vma->vm_flags & VM_SPECIAL, too. +@@ -778,6 +818,15 @@ struct vm_area_struct *vma_merge(struct + if (next && next->vm_end == end) /* cases 6, 7, 8 */ + next = next->vm_next; + ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (prev) ++ prev_m = pax_find_mirror_vma(prev); ++ if (area) ++ area_m = pax_find_mirror_vma(area); ++ if (next) ++ next_m = pax_find_mirror_vma(next); ++#endif ++ + /* + * Can it merge with the predecessor? + */ +@@ -797,9 +846,24 @@ struct vm_area_struct *vma_merge(struct + /* cases 1, 6 */ + vma_adjust(prev, prev->vm_start, + next->vm_end, prev->vm_pgoff, NULL); +- } else /* cases 2, 5, 7 */ ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (prev_m) ++ vma_adjust(prev_m, prev_m->vm_start, ++ next_m->vm_end, prev_m->vm_pgoff, NULL); ++#endif ++ ++ } else { /* cases 2, 5, 7 */ + vma_adjust(prev, prev->vm_start, + end, prev->vm_pgoff, NULL); ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (prev_m) ++ vma_adjust(prev_m, prev_m->vm_start, ++ end_m, prev_m->vm_pgoff, NULL); ++#endif ++ ++ } + return prev; + } + +@@ -810,12 +874,27 @@ struct vm_area_struct *vma_merge(struct + mpol_equal(policy, vma_policy(next)) && + can_vma_merge_before(next, vm_flags, + anon_vma, file, pgoff+pglen)) { +- if (prev && addr < prev->vm_end) /* case 4 */ ++ if (prev && addr < prev->vm_end) { /* case 4 */ + vma_adjust(prev, prev->vm_start, + addr, prev->vm_pgoff, NULL); +- else /* cases 3, 8 */ ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (prev_m) ++ vma_adjust(prev_m, prev_m->vm_start, ++ addr_m, prev_m->vm_pgoff, NULL); ++#endif ++ ++ } else { /* cases 3, 8 */ + vma_adjust(area, addr, next->vm_end, + next->vm_pgoff - pglen, NULL); ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (area_m) ++ vma_adjust(area_m, addr_m, next_m->vm_end, ++ next_m->vm_pgoff - pglen, NULL); ++#endif ++ ++ } + return area; + } + +@@ -890,14 +969,11 @@ none: + void vm_stat_account(struct mm_struct *mm, unsigned long flags, + struct file *file, long pages) + { +- const unsigned long stack_flags +- = VM_STACK_FLAGS & (VM_GROWSUP|VM_GROWSDOWN); +- + if (file) { + mm->shared_vm += pages; + if ((flags & (VM_EXEC|VM_WRITE)) == VM_EXEC) + mm->exec_vm += pages; +- } else if (flags & stack_flags) ++ } else if (flags & (VM_GROWSUP|VM_GROWSDOWN)) + mm->stack_vm += pages; + if (flags & (VM_RESERVED|VM_IO)) + mm->reserved_vm += pages; +@@ -925,7 +1001,7 @@ unsigned long do_mmap_pgoff(struct file + * (the exception is when the underlying filesystem is noexec + * mounted, in which case we dont add PROT_EXEC.) + */ +- if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC)) ++ if ((prot & (PROT_READ | PROT_WRITE)) && (current->personality & READ_IMPLIES_EXEC)) + if (!(file && (file->f_path.mnt->mnt_flags & MNT_NOEXEC))) + prot |= PROT_EXEC; + +@@ -935,15 +1011,15 @@ unsigned long do_mmap_pgoff(struct file + if (!(flags & MAP_FIXED)) + addr = round_hint_to_min(addr); + +- error = arch_mmap_check(addr, len, flags); +- if (error) +- return error; +- + /* Careful about overflows.. */ + len = PAGE_ALIGN(len); + if (!len || len > TASK_SIZE) + return -ENOMEM; + ++ error = arch_mmap_check(addr, len, flags); ++ if (error) ++ return error; ++ + /* offset overflow? */ + if ((pgoff + (len >> PAGE_SHIFT)) < pgoff) + return -EOVERFLOW; +@@ -955,7 +1031,7 @@ unsigned long do_mmap_pgoff(struct file + /* Obtain the address to map to. we verify (or select) it and ensure + * that it represents a valid section of the address space. + */ +- addr = get_unmapped_area(file, addr, len, pgoff, flags); ++ addr = get_unmapped_area(file, addr, len, pgoff, flags | ((prot & PROT_EXEC) ? MAP_EXECUTABLE : 0)); + if (addr & ~PAGE_MASK) + return addr; + +@@ -966,6 +1042,26 @@ unsigned long do_mmap_pgoff(struct file + vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) | + mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; + ++#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) ++ if (mm->pax_flags & (MF_PAX_PAGEEXEC | MF_PAX_SEGMEXEC)) { ++ ++#ifdef CONFIG_PAX_MPROTECT ++ if (mm->pax_flags & MF_PAX_MPROTECT) { ++ if ((prot & (PROT_WRITE | PROT_EXEC)) != PROT_EXEC) ++ vm_flags &= ~(VM_EXEC | VM_MAYEXEC); ++ else ++ vm_flags &= ~(VM_WRITE | VM_MAYWRITE); ++ } ++#endif ++ ++ } ++#endif ++ ++#if defined(CONFIG_PAX_PAGEEXEC) && defined(CONFIG_X86_32) ++ if ((mm->pax_flags & MF_PAX_PAGEEXEC) && file) ++ vm_flags &= ~VM_PAGEEXEC; ++#endif ++ + if (flags & MAP_LOCKED) { + if (!can_do_mlock()) + return -EPERM; +@@ -979,6 +1075,7 @@ unsigned long do_mmap_pgoff(struct file + locked += mm->locked_vm; + lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; + lock_limit >>= PAGE_SHIFT; ++ gr_learn_resource(current, RLIMIT_MEMLOCK, locked << PAGE_SHIFT, 1); + if (locked > lock_limit && !capable(CAP_IPC_LOCK)) + return -EAGAIN; + } +@@ -1051,6 +1148,9 @@ unsigned long do_mmap_pgoff(struct file + if (error) + return error; + ++ if (!gr_acl_handle_mmap(file, prot)) ++ return -EACCES; ++ + return mmap_region(file, addr, len, flags, vm_flags, pgoff, + accountable); + } +@@ -1064,10 +1164,10 @@ EXPORT_SYMBOL(do_mmap_pgoff); + */ + int vma_wants_writenotify(struct vm_area_struct *vma) + { +- unsigned int vm_flags = vma->vm_flags; ++ unsigned long vm_flags = vma->vm_flags; + + /* If it was private or non-writable, the write bit is already clear */ +- if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED))) ++ if ((vm_flags & (VM_WRITE|VM_SHARED)) != (VM_WRITE|VM_SHARED)) + return 0; + + /* The backer wishes to know when pages are first written to? */ +@@ -1102,14 +1202,24 @@ unsigned long mmap_region(struct file *f + unsigned long charged = 0; + struct inode *inode = file ? file->f_path.dentry->d_inode : NULL; + ++#ifdef CONFIG_PAX_SEGMEXEC ++ struct vm_area_struct *vma_m = NULL; ++#endif ++ ++ /* ++ * mm->mmap_sem is required to protect against another thread ++ * changing the mappings in case we sleep. ++ */ ++ verify_mm_writelocked(mm); ++ + /* Clear old maps */ + error = -ENOMEM; +-munmap_back: + vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent); + if (vma && vma->vm_start < addr + len) { + if (do_munmap(mm, addr, len)) + return -ENOMEM; +- goto munmap_back; ++ vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent); ++ BUG_ON(vma && vma->vm_start < addr + len); + } + + /* Check against address space limit. */ +@@ -1158,6 +1268,16 @@ munmap_back: + goto unacct_error; + } + ++#ifdef CONFIG_PAX_SEGMEXEC ++ if ((mm->pax_flags & MF_PAX_SEGMEXEC) && (vm_flags & VM_EXEC)) { ++ vma_m = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); ++ if (!vma_m) { ++ error = -ENOMEM; ++ goto free_vma; ++ } ++ } ++#endif ++ + vma->vm_mm = mm; + vma->vm_start = addr; + vma->vm_end = addr + len; +@@ -1180,6 +1300,19 @@ munmap_back: + error = file->f_op->mmap(file, vma); + if (error) + goto unmap_and_free_vma; ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (vma_m && (vm_flags & VM_EXECUTABLE)) ++ added_exe_file_vma(mm); ++#endif ++ ++#if defined(CONFIG_PAX_PAGEEXEC) && defined(CONFIG_X86_32) ++ if ((mm->pax_flags & MF_PAX_PAGEEXEC) && !(vma->vm_flags & VM_SPECIAL)) { ++ vma->vm_flags |= VM_PAGEEXEC; ++ vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); ++ } ++#endif ++ + if (vm_flags & VM_EXECUTABLE) + added_exe_file_vma(mm); + } else if (vm_flags & VM_SHARED) { +@@ -1215,13 +1348,30 @@ munmap_back: + if (merged_vma) { + mpol_put(vma_policy(vma)); + kmem_cache_free(vm_area_cachep, vma); ++ vma = NULL; + fput(file); ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (vma_m) { ++ kmem_cache_free(vm_area_cachep, vma_m); ++ ++ if (vm_flags & VM_EXECUTABLE) ++ removed_exe_file_vma(mm); ++ } ++#endif ++ + if (vm_flags & VM_EXECUTABLE) + removed_exe_file_vma(mm); + vma = merged_vma; + } else { + vma_link(mm, vma, prev, rb_link, rb_parent); + file = vma->vm_file; ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (vma_m) ++ pax_mirror_vma(vma_m, vma); ++#endif ++ + } + + /* Once vma denies write, undo our temporary denial count */ +@@ -1230,6 +1380,7 @@ munmap_back: + out: + mm->total_vm += len >> PAGE_SHIFT; + vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT); ++ track_exec_limit(mm, addr, addr + len, vm_flags); + if (vm_flags & VM_LOCKED) { + /* + * makes pages present; downgrades, drops, reacquires mmap_sem +@@ -1252,6 +1403,12 @@ unmap_and_free_vma: + unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end); + charged = 0; + free_vma: ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (vma_m) ++ kmem_cache_free(vm_area_cachep, vma_m); ++#endif ++ + kmem_cache_free(vm_area_cachep, vma); + unacct_error: + if (charged) +@@ -1285,6 +1442,10 @@ arch_get_unmapped_area(struct file *filp + if (flags & MAP_FIXED) + return addr; + ++#ifdef CONFIG_PAX_RANDMMAP ++ if (!(mm->pax_flags & MF_PAX_RANDMMAP)) ++#endif ++ + if (addr) { + addr = PAGE_ALIGN(addr); + vma = find_vma(mm, addr); +@@ -1293,10 +1454,10 @@ arch_get_unmapped_area(struct file *filp + return addr; + } + if (len > mm->cached_hole_size) { +- start_addr = addr = mm->free_area_cache; ++ start_addr = addr = mm->free_area_cache; + } else { +- start_addr = addr = TASK_UNMAPPED_BASE; +- mm->cached_hole_size = 0; ++ start_addr = addr = mm->mmap_base; ++ mm->cached_hole_size = 0; + } + + full_search: +@@ -1307,9 +1468,8 @@ full_search: + * Start a new search - just in case we missed + * some holes. + */ +- if (start_addr != TASK_UNMAPPED_BASE) { +- addr = TASK_UNMAPPED_BASE; +- start_addr = addr; ++ if (start_addr != mm->mmap_base) { ++ start_addr = addr = mm->mmap_base; + mm->cached_hole_size = 0; + goto full_search; + } +@@ -1331,10 +1491,16 @@ full_search: + + void arch_unmap_area(struct mm_struct *mm, unsigned long addr) + { ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if ((mm->pax_flags & MF_PAX_SEGMEXEC) && SEGMEXEC_TASK_SIZE <= addr) ++ return; ++#endif ++ + /* + * Is this a new hole at the lowest possible address? + */ +- if (addr >= TASK_UNMAPPED_BASE && addr < mm->free_area_cache) { ++ if (addr >= mm->mmap_base && addr < mm->free_area_cache) { + mm->free_area_cache = addr; + mm->cached_hole_size = ~0UL; + } +@@ -1352,7 +1518,7 @@ arch_get_unmapped_area_topdown(struct fi + { + struct vm_area_struct *vma; + struct mm_struct *mm = current->mm; +- unsigned long addr = addr0; ++ unsigned long base = mm->mmap_base, addr = addr0; + + /* requested length too big for entire address space */ + if (len > TASK_SIZE) +@@ -1361,6 +1527,10 @@ arch_get_unmapped_area_topdown(struct fi + if (flags & MAP_FIXED) + return addr; + ++#ifdef CONFIG_PAX_RANDMMAP ++ if (!(mm->pax_flags & MF_PAX_RANDMMAP)) ++#endif ++ + /* requesting a specific address */ + if (addr) { + addr = PAGE_ALIGN(addr); +@@ -1418,13 +1588,21 @@ bottomup: + * can happen with large stack limits and large mmap() + * allocations. + */ ++ mm->mmap_base = TASK_UNMAPPED_BASE; ++ ++#ifdef CONFIG_PAX_RANDMMAP ++ if (mm->pax_flags & MF_PAX_RANDMMAP) ++ mm->mmap_base += mm->delta_mmap; ++#endif ++ ++ mm->free_area_cache = mm->mmap_base; + mm->cached_hole_size = ~0UL; +- mm->free_area_cache = TASK_UNMAPPED_BASE; + addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); + /* + * Restore the topdown base: + */ +- mm->free_area_cache = mm->mmap_base; ++ mm->mmap_base = base; ++ mm->free_area_cache = base; + mm->cached_hole_size = ~0UL; + + return addr; +@@ -1433,6 +1611,12 @@ bottomup: + + void arch_unmap_area_topdown(struct mm_struct *mm, unsigned long addr) + { ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if ((mm->pax_flags & MF_PAX_SEGMEXEC) && SEGMEXEC_TASK_SIZE <= addr) ++ return; ++#endif ++ + /* + * Is this a new hole at the highest possible address? + */ +@@ -1440,8 +1624,10 @@ void arch_unmap_area_topdown(struct mm_s + mm->free_area_cache = addr; + + /* dont allow allocations above current base */ +- if (mm->free_area_cache > mm->mmap_base) ++ if (mm->free_area_cache > mm->mmap_base) { + mm->free_area_cache = mm->mmap_base; ++ mm->cached_hole_size = ~0UL; ++ } + } + + unsigned long +@@ -1541,6 +1727,27 @@ out: + return prev ? prev->vm_next : vma; + } + ++#ifdef CONFIG_PAX_SEGMEXEC ++struct vm_area_struct *pax_find_mirror_vma(struct vm_area_struct *vma) ++{ ++ struct vm_area_struct *vma_m; ++ ++ BUG_ON(!vma || vma->vm_start >= vma->vm_end); ++ if (!(vma->vm_mm->pax_flags & MF_PAX_SEGMEXEC) || !(vma->vm_flags & VM_EXEC)) { ++ BUG_ON(vma->vm_mirror); ++ return NULL; ++ } ++ BUG_ON(vma->vm_start < SEGMEXEC_TASK_SIZE && SEGMEXEC_TASK_SIZE < vma->vm_end); ++ vma_m = vma->vm_mirror; ++ BUG_ON(!vma_m || vma_m->vm_mirror != vma); ++ BUG_ON(vma->vm_file != vma_m->vm_file); ++ BUG_ON(vma->vm_end - vma->vm_start != vma_m->vm_end - vma_m->vm_start); ++ BUG_ON(vma->vm_pgoff != vma_m->vm_pgoff || vma->anon_vma != vma_m->anon_vma); ++ BUG_ON((vma->vm_flags ^ vma_m->vm_flags) & ~(VM_WRITE | VM_MAYWRITE | VM_ACCOUNT | VM_LOCKED)); ++ return vma_m; ++} ++#endif ++ + /* + * Verify that the stack growth is acceptable and + * update accounting. This is shared with both the +@@ -1557,6 +1764,7 @@ static int acct_stack_growth(struct vm_a + return -ENOMEM; + + /* Stack limit test */ ++ gr_learn_resource(current, RLIMIT_STACK, size, 1); + if (size > rlim[RLIMIT_STACK].rlim_cur) + return -ENOMEM; + +@@ -1566,6 +1774,7 @@ static int acct_stack_growth(struct vm_a + unsigned long limit; + locked = mm->locked_vm + grow; + limit = rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; ++ gr_learn_resource(current, RLIMIT_MEMLOCK, locked << PAGE_SHIFT, 1); + if (locked > limit && !capable(CAP_IPC_LOCK)) + return -ENOMEM; + } +@@ -1601,35 +1810,40 @@ static + #endif + int expand_upwards(struct vm_area_struct *vma, unsigned long address) + { +- int error; ++ int error, locknext; + + if (!(vma->vm_flags & VM_GROWSUP)) + return -EFAULT; + ++ /* Also guard against wrapping around to address 0. */ ++ if (address < PAGE_ALIGN(address+1)) ++ address = PAGE_ALIGN(address+1); ++ else ++ return -ENOMEM; ++ + /* + * We must make sure the anon_vma is allocated + * so that the anon_vma locking is not a noop. + */ + if (unlikely(anon_vma_prepare(vma))) + return -ENOMEM; ++ locknext = vma->vm_next && (vma->vm_next->vm_flags & VM_GROWSDOWN); ++ if (locknext && unlikely(anon_vma_prepare(vma->vm_next))) ++ return -ENOMEM; + anon_vma_lock(vma); ++ if (locknext) ++ anon_vma_lock(vma->vm_next); + + /* + * vma->vm_start/vm_end cannot change under us because the caller + * is required to hold the mmap_sem in read mode. We need the +- * anon_vma lock to serialize against concurrent expand_stacks. +- * Also guard against wrapping around to address 0. ++ * anon_vma locks to serialize against concurrent expand_stacks ++ * and expand_upwards. + */ +- if (address < PAGE_ALIGN(address+4)) +- address = PAGE_ALIGN(address+4); +- else { +- anon_vma_unlock(vma); +- return -ENOMEM; +- } + error = 0; + + /* Somebody else might have raced and expanded it already */ +- if (address > vma->vm_end) { ++ if (address > vma->vm_end && (!locknext || vma->vm_next->vm_start >= address)) { + unsigned long size, grow; + + size = address - vma->vm_start; +@@ -1639,6 +1853,8 @@ int expand_upwards(struct vm_area_struct + if (!error) + vma->vm_end = address; + } ++ if (locknext) ++ anon_vma_unlock(vma->vm_next); + anon_vma_unlock(vma); + return error; + } +@@ -1650,7 +1866,8 @@ int expand_upwards(struct vm_area_struct + static int expand_downwards(struct vm_area_struct *vma, + unsigned long address) + { +- int error; ++ int error, lockprev = 0; ++ struct vm_area_struct *prev = NULL; + + /* + * We must make sure the anon_vma is allocated +@@ -1664,6 +1881,15 @@ static int expand_downwards(struct vm_ar + if (error) + return error; + ++#if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64) ++ find_vma_prev(vma->vm_mm, address, &prev); ++ lockprev = prev && (prev->vm_flags & VM_GROWSUP); ++#endif ++ if (lockprev && unlikely(anon_vma_prepare(prev))) ++ return -ENOMEM; ++ if (lockprev) ++ anon_vma_lock(prev); ++ + anon_vma_lock(vma); + + /* +@@ -1673,9 +1899,15 @@ static int expand_downwards(struct vm_ar + */ + + /* Somebody else might have raced and expanded it already */ +- if (address < vma->vm_start) { ++ if (address < vma->vm_start && (!lockprev || prev->vm_end <= address)) { + unsigned long size, grow; + ++#ifdef CONFIG_PAX_SEGMEXEC ++ struct vm_area_struct *vma_m; ++ ++ vma_m = pax_find_mirror_vma(vma); ++#endif ++ + size = vma->vm_end - address; + grow = (vma->vm_start - address) >> PAGE_SHIFT; + +@@ -1683,9 +1915,20 @@ static int expand_downwards(struct vm_ar + if (!error) { + vma->vm_start = address; + vma->vm_pgoff -= grow; ++ track_exec_limit(vma->vm_mm, vma->vm_start, vma->vm_end, vma->vm_flags); ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (vma_m) { ++ vma_m->vm_start -= grow << PAGE_SHIFT; ++ vma_m->vm_pgoff -= grow; ++ } ++#endif ++ + } + } + anon_vma_unlock(vma); ++ if (lockprev) ++ anon_vma_unlock(prev); + return error; + } + +@@ -1761,6 +2004,13 @@ static void remove_vma_list(struct mm_st + do { + long nrpages = vma_pages(vma); + ++#ifdef CONFIG_PAX_SEGMEXEC ++ if ((mm->pax_flags & MF_PAX_SEGMEXEC) && (vma->vm_start >= SEGMEXEC_TASK_SIZE)) { ++ vma = remove_vma(vma); ++ continue; ++ } ++#endif ++ + mm->total_vm -= nrpages; + vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages); + vma = remove_vma(vma); +@@ -1805,6 +2055,16 @@ detach_vmas_to_be_unmapped(struct mm_str + + insertion_point = (prev ? &prev->vm_next : &mm->mmap); + do { ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (vma->vm_mirror) { ++ BUG_ON(!vma->vm_mirror->vm_mirror || vma->vm_mirror->vm_mirror != vma); ++ vma->vm_mirror->vm_mirror = NULL; ++ vma->vm_mirror->vm_flags &= ~VM_EXEC; ++ vma->vm_mirror = NULL; ++ } ++#endif ++ + rb_erase(&vma->vm_rb, &mm->mm_rb); + mm->map_count--; + tail_vma = vma; +@@ -1824,6 +2084,108 @@ detach_vmas_to_be_unmapped(struct mm_str + * Split a vma into two pieces at address 'addr', a new vma is allocated + * either for the first part or the tail. + */ ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++int split_vma(struct mm_struct * mm, struct vm_area_struct * vma, ++ unsigned long addr, int new_below) ++{ ++ struct mempolicy *pol; ++ struct vm_area_struct *new, *vma_m, *new_m = NULL; ++ unsigned long addr_m = addr + SEGMEXEC_TASK_SIZE; ++ ++ if (is_vm_hugetlb_page(vma) && (addr & ~HPAGE_MASK)) ++ return -EINVAL; ++ ++ vma_m = pax_find_mirror_vma(vma); ++ if (vma_m) { ++ BUG_ON(vma->vm_end > SEGMEXEC_TASK_SIZE); ++ if (mm->map_count >= sysctl_max_map_count-1) ++ return -ENOMEM; ++ } else if (mm->map_count >= sysctl_max_map_count) ++ return -ENOMEM; ++ ++ new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); ++ if (!new) ++ return -ENOMEM; ++ ++ if (vma_m) { ++ new_m = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); ++ if (!new_m) { ++ kmem_cache_free(vm_area_cachep, new); ++ return -ENOMEM; ++ } ++ } ++ ++ /* most fields are the same, copy all, and then fixup */ ++ *new = *vma; ++ ++ if (new_below) ++ new->vm_end = addr; ++ else { ++ new->vm_start = addr; ++ new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT); ++ } ++ ++ if (vma_m) { ++ *new_m = *vma_m; ++ new_m->vm_mirror = new; ++ new->vm_mirror = new_m; ++ ++ if (new_below) ++ new_m->vm_end = addr_m; ++ else { ++ new_m->vm_start = addr_m; ++ new_m->vm_pgoff += ((addr_m - vma_m->vm_start) >> PAGE_SHIFT); ++ } ++ } ++ ++ pol = mpol_dup(vma_policy(vma)); ++ if (IS_ERR(pol)) { ++ if (new_m) ++ kmem_cache_free(vm_area_cachep, new_m); ++ kmem_cache_free(vm_area_cachep, new); ++ return PTR_ERR(pol); ++ } ++ vma_set_policy(new, pol); ++ ++ if (new->vm_file) { ++ get_file(new->vm_file); ++ if (vma->vm_flags & VM_EXECUTABLE) ++ added_exe_file_vma(mm); ++ } ++ ++ if (new->vm_ops && new->vm_ops->open) ++ new->vm_ops->open(new); ++ ++ if (new_below) ++ vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff + ++ ((addr - new->vm_start) >> PAGE_SHIFT), new); ++ else ++ vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new); ++ ++ if (vma_m) { ++ mpol_get(pol); ++ vma_set_policy(new_m, pol); ++ ++ if (new_m->vm_file) { ++ get_file(new_m->vm_file); ++ if (vma_m->vm_flags & VM_EXECUTABLE) ++ added_exe_file_vma(mm); ++ } ++ ++ if (new_m->vm_ops && new_m->vm_ops->open) ++ new_m->vm_ops->open(new_m); ++ ++ if (new_below) ++ vma_adjust(vma_m, addr_m, vma_m->vm_end, vma_m->vm_pgoff + ++ ((addr_m - new_m->vm_start) >> PAGE_SHIFT), new_m); ++ else ++ vma_adjust(vma_m, vma_m->vm_start, addr_m, vma_m->vm_pgoff, new_m); ++ } ++ ++ return 0; ++} ++#else + int split_vma(struct mm_struct * mm, struct vm_area_struct * vma, + unsigned long addr, int new_below) + { +@@ -1875,17 +2237,37 @@ int split_vma(struct mm_struct * mm, str + + return 0; + } ++#endif + + /* Munmap is split into 2 main parts -- this part which finds + * what needs doing, and the areas themselves, which do the + * work. This now handles partial unmappings. + * Jeremy Fitzhardinge <jeremy@goop.org> + */ ++#ifdef CONFIG_PAX_SEGMEXEC + int do_munmap(struct mm_struct *mm, unsigned long start, size_t len) + { ++ int ret = __do_munmap(mm, start, len); ++ if (ret || !(mm->pax_flags & MF_PAX_SEGMEXEC)) ++ return ret; ++ ++ return __do_munmap(mm, start + SEGMEXEC_TASK_SIZE, len); ++} ++ ++int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len) ++#else ++int do_munmap(struct mm_struct *mm, unsigned long start, size_t len) ++#endif ++{ + unsigned long end; + struct vm_area_struct *vma, *prev, *last; + ++ /* ++ * mm->mmap_sem is required to protect against another thread ++ * changing the mappings in case we sleep. ++ */ ++ verify_mm_writelocked(mm); ++ + if ((start & ~PAGE_MASK) || start > TASK_SIZE || len > TASK_SIZE-start) + return -EINVAL; + +@@ -1949,6 +2331,8 @@ int do_munmap(struct mm_struct *mm, unsi + /* Fix up all other VM information */ + remove_vma_list(mm, vma); + ++ track_exec_limit(mm, start, end, 0UL); ++ + return 0; + } + +@@ -1961,22 +2345,18 @@ SYSCALL_DEFINE2(munmap, unsigned long, a + + profile_munmap(addr); + ++#ifdef CONFIG_PAX_SEGMEXEC ++ if ((mm->pax_flags & MF_PAX_SEGMEXEC) && ++ (len > SEGMEXEC_TASK_SIZE || addr > SEGMEXEC_TASK_SIZE-len)) ++ return -EINVAL; ++#endif ++ + down_write(&mm->mmap_sem); + ret = do_munmap(mm, addr, len); + up_write(&mm->mmap_sem); + return ret; + } + +-static inline void verify_mm_writelocked(struct mm_struct *mm) +-{ +-#ifdef CONFIG_DEBUG_VM +- if (unlikely(down_read_trylock(&mm->mmap_sem))) { +- WARN_ON(1); +- up_read(&mm->mmap_sem); +- } +-#endif +-} +- + /* + * this is really a simplified "do_mmap". it only handles + * anonymous maps. eventually we may be able to do some +@@ -1990,6 +2370,11 @@ unsigned long do_brk(unsigned long addr, + struct rb_node ** rb_link, * rb_parent; + pgoff_t pgoff = addr >> PAGE_SHIFT; + int error; ++ unsigned long charged; ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ struct vm_area_struct *vma_m = NULL; ++#endif + + len = PAGE_ALIGN(len); + if (!len) +@@ -2007,19 +2392,34 @@ unsigned long do_brk(unsigned long addr, + + flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags; + ++#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) ++ if (mm->pax_flags & (MF_PAX_PAGEEXEC | MF_PAX_SEGMEXEC)) { ++ flags &= ~VM_EXEC; ++ ++#ifdef CONFIG_PAX_MPROTECT ++ if (mm->pax_flags & MF_PAX_MPROTECT) ++ flags &= ~VM_MAYEXEC; ++#endif ++ ++ } ++#endif ++ + error = arch_mmap_check(addr, len, flags); + if (error) + return error; + ++ charged = len >> PAGE_SHIFT; ++ + /* + * mlock MCL_FUTURE? + */ + if (mm->def_flags & VM_LOCKED) { + unsigned long locked, lock_limit; +- locked = len >> PAGE_SHIFT; ++ locked = charged; + locked += mm->locked_vm; + lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; + lock_limit >>= PAGE_SHIFT; ++ gr_learn_resource(current, RLIMIT_MEMLOCK, locked << PAGE_SHIFT, 1); + if (locked > lock_limit && !capable(CAP_IPC_LOCK)) + return -EAGAIN; + } +@@ -2033,22 +2433,22 @@ unsigned long do_brk(unsigned long addr, + /* + * Clear old maps. this also does some error checking for us + */ +- munmap_back: + vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent); + if (vma && vma->vm_start < addr + len) { + if (do_munmap(mm, addr, len)) + return -ENOMEM; +- goto munmap_back; ++ vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent); ++ BUG_ON(vma && vma->vm_start < addr + len); + } + + /* Check against address space limits *after* clearing old maps... */ +- if (!may_expand_vm(mm, len >> PAGE_SHIFT)) ++ if (!may_expand_vm(mm, charged)) + return -ENOMEM; + + if (mm->map_count > sysctl_max_map_count) + return -ENOMEM; + +- if (security_vm_enough_memory(len >> PAGE_SHIFT)) ++ if (security_vm_enough_memory(charged)) + return -ENOMEM; + + /* Can we just expand an old private anonymous mapping? */ +@@ -2062,10 +2462,21 @@ unsigned long do_brk(unsigned long addr, + */ + vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); + if (!vma) { +- vm_unacct_memory(len >> PAGE_SHIFT); ++ vm_unacct_memory(charged); + return -ENOMEM; + } + ++#ifdef CONFIG_PAX_SEGMEXEC ++ if ((mm->pax_flags & MF_PAX_SEGMEXEC) && (flags & VM_EXEC)) { ++ vma_m = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); ++ if (!vma_m) { ++ kmem_cache_free(vm_area_cachep, vma); ++ vm_unacct_memory(charged); ++ return -ENOMEM; ++ } ++ } ++#endif ++ + vma->vm_mm = mm; + vma->vm_start = addr; + vma->vm_end = addr + len; +@@ -2074,11 +2485,12 @@ unsigned long do_brk(unsigned long addr, + vma->vm_page_prot = vm_get_page_prot(flags); + vma_link(mm, vma, prev, rb_link, rb_parent); + out: +- mm->total_vm += len >> PAGE_SHIFT; ++ mm->total_vm += charged; + if (flags & VM_LOCKED) { + if (!mlock_vma_pages_range(vma, addr, addr + len)) +- mm->locked_vm += (len >> PAGE_SHIFT); ++ mm->locked_vm += charged; + } ++ track_exec_limit(mm, addr, addr + len, flags); + return addr; + } + +@@ -2124,8 +2536,10 @@ void exit_mmap(struct mm_struct *mm) + * Walk the list again, actually closing and freeing it, + * with preemption enabled, without holding any MM locks. + */ +- while (vma) ++ while (vma) { ++ vma->vm_mirror = NULL; + vma = remove_vma(vma); ++ } + + BUG_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT); + } +@@ -2139,6 +2553,10 @@ int insert_vm_struct(struct mm_struct * + struct vm_area_struct * __vma, * prev; + struct rb_node ** rb_link, * rb_parent; + ++#ifdef CONFIG_PAX_SEGMEXEC ++ struct vm_area_struct *vma_m = NULL; ++#endif ++ + /* + * The vm_pgoff of a purely anonymous vma should be irrelevant + * until its first write fault, when page's anon_vma and index +@@ -2161,7 +2579,22 @@ int insert_vm_struct(struct mm_struct * + if ((vma->vm_flags & VM_ACCOUNT) && + security_vm_enough_memory_mm(mm, vma_pages(vma))) + return -ENOMEM; ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if ((mm->pax_flags & MF_PAX_SEGMEXEC) && (vma->vm_flags & VM_EXEC)) { ++ vma_m = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); ++ if (!vma_m) ++ return -ENOMEM; ++ } ++#endif ++ + vma_link(mm, vma, prev, rb_link, rb_parent); ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (vma_m) ++ pax_mirror_vma(vma_m, vma); ++#endif ++ + return 0; + } + +@@ -2179,6 +2612,8 @@ struct vm_area_struct *copy_vma(struct v + struct rb_node **rb_link, *rb_parent; + struct mempolicy *pol; + ++ BUG_ON(vma->vm_mirror); ++ + /* + * If anonymous vma has not yet been faulted, update new pgoff + * to match new location, to increase its chance of merging. +@@ -2222,6 +2657,35 @@ struct vm_area_struct *copy_vma(struct v + return new_vma; + } + ++#ifdef CONFIG_PAX_SEGMEXEC ++void pax_mirror_vma(struct vm_area_struct *vma_m, struct vm_area_struct *vma) ++{ ++ struct vm_area_struct *prev_m; ++ struct rb_node **rb_link_m, *rb_parent_m; ++ struct mempolicy *pol_m; ++ ++ BUG_ON(!(vma->vm_mm->pax_flags & MF_PAX_SEGMEXEC) || !(vma->vm_flags & VM_EXEC)); ++ BUG_ON(vma->vm_mirror || vma_m->vm_mirror); ++ BUG_ON(!mpol_equal(vma_policy(vma), vma_policy(vma_m))); ++ *vma_m = *vma; ++ pol_m = vma_policy(vma_m); ++ mpol_get(pol_m); ++ vma_set_policy(vma_m, pol_m); ++ vma_m->vm_start += SEGMEXEC_TASK_SIZE; ++ vma_m->vm_end += SEGMEXEC_TASK_SIZE; ++ vma_m->vm_flags &= ~(VM_WRITE | VM_MAYWRITE | VM_ACCOUNT | VM_LOCKED); ++ vma_m->vm_page_prot = vm_get_page_prot(vma_m->vm_flags); ++ if (vma_m->vm_file) ++ get_file(vma_m->vm_file); ++ if (vma_m->vm_ops && vma_m->vm_ops->open) ++ vma_m->vm_ops->open(vma_m); ++ find_vma_prepare(vma->vm_mm, vma_m->vm_start, &prev_m, &rb_link_m, &rb_parent_m); ++ vma_link(vma->vm_mm, vma_m, prev_m, rb_link_m, rb_parent_m); ++ vma_m->vm_mirror = vma; ++ vma->vm_mirror = vma_m; ++} ++#endif ++ + /* + * Return true if the calling process may expand its vm space by the passed + * number of pages +@@ -2232,7 +2696,7 @@ int may_expand_vm(struct mm_struct *mm, + unsigned long lim; + + lim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; +- ++ gr_learn_resource(current, RLIMIT_AS, (cur + npages) << PAGE_SHIFT, 1); + if (cur + npages > lim) + return 0; + return 1; +@@ -2301,6 +2765,15 @@ int install_special_mapping(struct mm_st + vma->vm_start = addr; + vma->vm_end = addr + len; + ++#ifdef CONFIG_PAX_MPROTECT ++ if (mm->pax_flags & MF_PAX_MPROTECT) { ++ if ((vm_flags & (VM_WRITE | VM_EXEC)) != VM_EXEC) ++ vm_flags &= ~(VM_EXEC | VM_MAYEXEC); ++ else ++ vm_flags &= ~(VM_WRITE | VM_MAYWRITE); ++ } ++#endif ++ + vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND; + vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); + +diff -urNp a/mm/mprotect.c b/mm/mprotect.c +--- a/mm/mprotect.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/mm/mprotect.c 2009-05-24 18:10:25.351315505 -0700 +@@ -22,10 +22,16 @@ + #include <linux/swap.h> + #include <linux/swapops.h> + #include <linux/mmu_notifier.h> ++ ++#ifdef CONFIG_PAX_MPROTECT ++#include <linux/elf.h> ++#endif ++ + #include <asm/uaccess.h> + #include <asm/pgtable.h> + #include <asm/cacheflush.h> + #include <asm/tlbflush.h> ++#include <asm/mmu_context.h> + + #ifndef pgprot_modify + static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) +@@ -133,6 +139,48 @@ static void change_protection(struct vm_ + flush_tlb_range(vma, start, end); + } + ++#ifdef CONFIG_ARCH_TRACK_EXEC_LIMIT ++/* called while holding the mmap semaphor for writing except stack expansion */ ++void track_exec_limit(struct mm_struct *mm, unsigned long start, unsigned long end, unsigned long prot) ++{ ++ unsigned long oldlimit, newlimit = 0UL; ++ ++ if (!(mm->pax_flags & MF_PAX_PAGEEXEC) || nx_enabled) ++ return; ++ ++ spin_lock(&mm->page_table_lock); ++ oldlimit = mm->context.user_cs_limit; ++ if ((prot & VM_EXEC) && oldlimit < end) ++ /* USER_CS limit moved up */ ++ newlimit = end; ++ else if (!(prot & VM_EXEC) && start < oldlimit && oldlimit <= end) ++ /* USER_CS limit moved down */ ++ newlimit = start; ++ ++ if (newlimit) { ++ mm->context.user_cs_limit = newlimit; ++ ++#ifdef CONFIG_SMP ++ wmb(); ++ cpus_clear(mm->context.cpu_user_cs_mask); ++ cpu_set(smp_processor_id(), mm->context.cpu_user_cs_mask); ++#endif ++ ++ set_user_cs(mm->context.user_cs_base, mm->context.user_cs_limit, smp_processor_id()); ++ } ++ spin_unlock(&mm->page_table_lock); ++ if (newlimit == end) { ++ struct vm_area_struct *vma = find_vma(mm, oldlimit); ++ ++ for (; vma && vma->vm_start < end; vma = vma->vm_next) ++ if (is_vm_hugetlb_page(vma)) ++ hugetlb_change_protection(vma, vma->vm_start, vma->vm_end, vma->vm_page_prot); ++ else ++ change_protection(vma, vma->vm_start, vma->vm_end, vma->vm_page_prot, vma_wants_writenotify(vma)); ++ } ++} ++#endif ++ + int + mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, + unsigned long start, unsigned long end, unsigned long newflags) +@@ -145,6 +193,14 @@ mprotect_fixup(struct vm_area_struct *vm + int error; + int dirty_accountable = 0; + ++#ifdef CONFIG_PAX_SEGMEXEC ++ struct vm_area_struct *vma_m = NULL; ++ unsigned long start_m, end_m; ++ ++ start_m = start + SEGMEXEC_TASK_SIZE; ++ end_m = end + SEGMEXEC_TASK_SIZE; ++#endif ++ + if (newflags == oldflags) { + *pprev = vma; + return 0; +@@ -165,6 +221,38 @@ mprotect_fixup(struct vm_area_struct *vm + } + } + ++#ifdef CONFIG_PAX_SEGMEXEC ++ if ((mm->pax_flags & MF_PAX_SEGMEXEC) && ((oldflags ^ newflags) & VM_EXEC)) { ++ if (start != vma->vm_start) { ++ error = split_vma(mm, vma, start, 1); ++ if (error) ++ goto fail; ++ BUG_ON(!*pprev || (*pprev)->vm_next == vma); ++ *pprev = (*pprev)->vm_next; ++ } ++ ++ if (end != vma->vm_end) { ++ error = split_vma(mm, vma, end, 0); ++ if (error) ++ goto fail; ++ } ++ ++ if (pax_find_mirror_vma(vma)) { ++ error = __do_munmap(mm, start_m, end_m - start_m); ++ if (error) ++ goto fail; ++ } else { ++ vma_m = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); ++ if (!vma_m) { ++ error = -ENOMEM; ++ goto fail; ++ } ++ vma->vm_flags = newflags; ++ pax_mirror_vma(vma_m, vma); ++ } ++ } ++#endif ++ + /* + * First try to merge with previous and/or next vma. + */ +@@ -196,8 +284,14 @@ success: + * held in write mode. + */ + vma->vm_flags = newflags; ++ ++#ifdef CONFIG_PAX_MPROTECT ++ if (current->binfmt && current->binfmt->handle_mprotect) ++ current->binfmt->handle_mprotect(vma, newflags); ++#endif ++ + vma->vm_page_prot = pgprot_modify(vma->vm_page_prot, +- vm_get_page_prot(newflags)); ++ vm_get_page_prot(vma->vm_flags)); + + if (vma_wants_writenotify(vma)) { + vma->vm_page_prot = vm_get_page_prot(newflags & ~VM_SHARED); +@@ -238,6 +332,17 @@ SYSCALL_DEFINE3(mprotect, unsigned long, + end = start + len; + if (end <= start) + return -ENOMEM; ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (current->mm->pax_flags & MF_PAX_SEGMEXEC) { ++ if (end > SEGMEXEC_TASK_SIZE) ++ return -EINVAL; ++ } else ++#endif ++ ++ if (end > TASK_SIZE) ++ return -EINVAL; ++ + if (!arch_validate_prot(prot)) + return -EINVAL; + +@@ -245,7 +350,7 @@ SYSCALL_DEFINE3(mprotect, unsigned long, + /* + * Does the application expect PROT_READ to imply PROT_EXEC: + */ +- if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC)) ++ if ((prot & (PROT_READ | PROT_WRITE)) && (current->personality & READ_IMPLIES_EXEC)) + prot |= PROT_EXEC; + + vm_flags = calc_vm_prot_bits(prot); +@@ -277,6 +382,16 @@ SYSCALL_DEFINE3(mprotect, unsigned long, + if (start > vma->vm_start) + prev = vma; + ++ if (!gr_acl_handle_mprotect(vma->vm_file, prot)) { ++ error = -EACCES; ++ goto out; ++ } ++ ++#ifdef CONFIG_PAX_MPROTECT ++ if (current->binfmt && current->binfmt->handle_mprotect) ++ current->binfmt->handle_mprotect(vma, vm_flags); ++#endif ++ + for (nstart = start ; ; ) { + unsigned long newflags; + +@@ -300,6 +415,9 @@ SYSCALL_DEFINE3(mprotect, unsigned long, + error = mprotect_fixup(vma, &prev, nstart, tmp, newflags); + if (error) + goto out; ++ ++ track_exec_limit(current->mm, nstart, tmp, vm_flags); ++ + nstart = tmp; + + if (nstart < prev->vm_end) +diff -urNp a/mm/mremap.c b/mm/mremap.c +--- a/mm/mremap.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/mm/mremap.c 2009-05-24 18:10:25.352002836 -0700 +@@ -113,6 +113,12 @@ static void move_ptes(struct vm_area_str + continue; + pte = ptep_clear_flush(vma, old_addr, old_pte); + pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr); ++ ++#ifdef CONFIG_ARCH_TRACK_EXEC_LIMIT ++ if (!nx_enabled && (new_vma->vm_flags & (VM_PAGEEXEC | VM_EXEC)) == VM_PAGEEXEC) ++ pte = pte_exprotect(pte); ++#endif ++ + set_pte_at(mm, new_addr, new_pte, pte); + } + +@@ -262,6 +268,7 @@ unsigned long do_mremap(unsigned long ad + struct vm_area_struct *vma; + unsigned long ret = -EINVAL; + unsigned long charged = 0; ++ unsigned long pax_task_size = TASK_SIZE; + + if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE)) + goto out; +@@ -280,6 +287,15 @@ unsigned long do_mremap(unsigned long ad + if (!new_len) + goto out; + ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (current->mm->pax_flags & MF_PAX_SEGMEXEC) ++ pax_task_size = SEGMEXEC_TASK_SIZE; ++#endif ++ ++ if (new_len > pax_task_size || addr > pax_task_size-new_len || ++ old_len > pax_task_size || addr > pax_task_size-old_len) ++ goto out; ++ + /* new_addr is only valid if MREMAP_FIXED is specified */ + if (flags & MREMAP_FIXED) { + if (new_addr & ~PAGE_MASK) +@@ -287,16 +303,13 @@ unsigned long do_mremap(unsigned long ad + if (!(flags & MREMAP_MAYMOVE)) + goto out; + +- if (new_len > TASK_SIZE || new_addr > TASK_SIZE - new_len) ++ if (new_addr > pax_task_size - new_len) + goto out; + + /* Check if the location we're moving into overlaps the + * old location at all, and fail if it does. + */ +- if ((new_addr <= addr) && (new_addr+new_len) > addr) +- goto out; +- +- if ((addr <= new_addr) && (addr+old_len) > new_addr) ++ if (addr + old_len > new_addr && new_addr + new_len > addr) + goto out; + + ret = security_file_mmap(NULL, 0, 0, 0, new_addr, 1); +@@ -334,6 +347,14 @@ unsigned long do_mremap(unsigned long ad + ret = -EINVAL; + goto out; + } ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ if (pax_find_mirror_vma(vma)) { ++ ret = -EINVAL; ++ goto out; ++ } ++#endif ++ + /* We can't remap across vm area boundaries */ + if (old_len > vma->vm_end - addr) + goto out; +@@ -367,7 +388,7 @@ unsigned long do_mremap(unsigned long ad + if (old_len == vma->vm_end - addr && + !((flags & MREMAP_FIXED) && (addr != new_addr)) && + (old_len != new_len || !(flags & MREMAP_MAYMOVE))) { +- unsigned long max_addr = TASK_SIZE; ++ unsigned long max_addr = pax_task_size; + if (vma->vm_next) + max_addr = vma->vm_next->vm_start; + /* can we just expand the current mapping? */ +@@ -385,6 +406,7 @@ unsigned long do_mremap(unsigned long ad + addr + new_len); + } + ret = addr; ++ track_exec_limit(vma->vm_mm, vma->vm_start, addr + new_len, vma->vm_flags); + goto out; + } + } +@@ -395,8 +417,8 @@ unsigned long do_mremap(unsigned long ad + */ + ret = -ENOMEM; + if (flags & MREMAP_MAYMOVE) { ++ unsigned long map_flags = 0; + if (!(flags & MREMAP_FIXED)) { +- unsigned long map_flags = 0; + if (vma->vm_flags & VM_MAYSHARE) + map_flags |= MAP_SHARED; + +@@ -411,7 +433,12 @@ unsigned long do_mremap(unsigned long ad + if (ret) + goto out; + } ++ map_flags = vma->vm_flags; + ret = move_vma(vma, addr, old_len, new_len, new_addr); ++ if (!(ret & ~PAGE_MASK)) { ++ track_exec_limit(current->mm, addr, addr + old_len, 0UL); ++ track_exec_limit(current->mm, new_addr, new_addr + new_len, map_flags); ++ } + } + out: + if (ret & ~PAGE_MASK) +diff -urNp a/mm/nommu.c b/mm/nommu.c +--- a/mm/nommu.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/mm/nommu.c 2009-05-24 18:10:25.353335665 -0700 +@@ -459,15 +459,6 @@ struct vm_area_struct *find_vma(struct m + } + EXPORT_SYMBOL(find_vma); + +-/* +- * find a VMA +- * - we don't extend stack VMAs under NOMMU conditions +- */ +-struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr) +-{ +- return find_vma(mm, addr); +-} +- + int expand_stack(struct vm_area_struct *vma, unsigned long address) + { + return -ENOMEM; +diff -urNp a/mm/page_alloc.c b/mm/page_alloc.c +--- a/mm/page_alloc.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/mm/page_alloc.c 2009-05-24 18:10:25.355210914 -0700 +@@ -525,6 +525,10 @@ static void __free_pages_ok(struct page + int i; + int reserved = 0; + ++#ifdef CONFIG_PAX_MEMORY_SANITIZE ++ unsigned long index = 1UL << order; ++#endif ++ + for (i = 0 ; i < (1 << order) ; ++i) + reserved += free_pages_check(page + i); + if (reserved) +@@ -535,6 +539,12 @@ static void __free_pages_ok(struct page + debug_check_no_obj_freed(page_address(page), + PAGE_SIZE << order); + } ++ ++#ifdef CONFIG_PAX_MEMORY_SANITIZE ++ for (; index; --index) ++ sanitize_highpage(page + index - 1); ++#endif ++ + arch_free_page(page, order); + kernel_map_pages(page, 1 << order, 0); + +@@ -635,8 +645,10 @@ static int prep_new_page(struct page *pa + arch_alloc_page(page, order); + kernel_map_pages(page, 1 << order, 1); + ++#ifndef CONFIG_PAX_MEMORY_SANITIZE + if (gfp_flags & __GFP_ZERO) + prep_zero_page(page, order, gfp_flags); ++#endif + + if (order && (gfp_flags & __GFP_COMP)) + prep_compound_page(page, order); +@@ -997,6 +1009,11 @@ static void free_hot_cold_page(struct pa + debug_check_no_locks_freed(page_address(page), PAGE_SIZE); + debug_check_no_obj_freed(page_address(page), PAGE_SIZE); + } ++ ++#ifdef CONFIG_PAX_MEMORY_SANITIZE ++ sanitize_highpage(page); ++#endif ++ + arch_free_page(page, 0); + kernel_map_pages(page, 1, 0); + +diff -urNp a/mm/rmap.c b/mm/rmap.c +--- a/mm/rmap.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/mm/rmap.c 2009-05-24 18:10:25.356959410 -0700 +@@ -103,6 +103,10 @@ int anon_vma_prepare(struct vm_area_stru + struct mm_struct *mm = vma->vm_mm; + struct anon_vma *allocated; + ++#ifdef CONFIG_PAX_SEGMEXEC ++ struct vm_area_struct *vma_m; ++#endif ++ + anon_vma = find_mergeable_anon_vma(vma); + allocated = NULL; + if (!anon_vma) { +@@ -116,6 +120,15 @@ int anon_vma_prepare(struct vm_area_stru + /* page_table_lock to protect against threads */ + spin_lock(&mm->page_table_lock); + if (likely(!vma->anon_vma)) { ++ ++#ifdef CONFIG_PAX_SEGMEXEC ++ vma_m = pax_find_mirror_vma(vma); ++ if (vma_m) { ++ vma_m->anon_vma = anon_vma; ++ __anon_vma_link(vma_m); ++ } ++#endif ++ + vma->anon_vma = anon_vma; + list_add_tail(&vma->anon_vma_node, &anon_vma->head); + allocated = NULL; +diff -urNp a/mm/shmem.c b/mm/shmem.c +--- a/mm/shmem.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/mm/shmem.c 2009-05-24 18:10:25.357959608 -0700 +@@ -2486,7 +2486,7 @@ static struct file_system_type tmpfs_fs_ + .get_sb = shmem_get_sb, + .kill_sb = kill_litter_super, + }; +-static struct vfsmount *shm_mnt; ++struct vfsmount *shm_mnt; + + static int __init init_tmpfs(void) + { +diff -urNp a/mm/slab.c b/mm/slab.c +--- a/mm/slab.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/mm/slab.c 2009-05-24 18:10:25.359959515 -0700 +@@ -305,7 +305,7 @@ struct kmem_list3 { + * Need this for bootstrapping a per node allocator. + */ + #define NUM_INIT_LISTS (3 * MAX_NUMNODES) +-struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS]; ++struct kmem_list3 initkmem_list3[NUM_INIT_LISTS]; + #define CACHE_CACHE 0 + #define SIZE_AC MAX_NUMNODES + #define SIZE_L3 (2 * MAX_NUMNODES) +@@ -654,14 +654,14 @@ struct cache_names { + static struct cache_names __initdata cache_names[] = { + #define CACHE(x) { .name = "size-" #x, .name_dma = "size-" #x "(DMA)" }, + #include <linux/kmalloc_sizes.h> +- {NULL,} ++ {NULL, NULL} + #undef CACHE + }; + + static struct arraycache_init initarray_cache __initdata = +- { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} }; ++ { {0, BOOT_CPUCACHE_ENTRIES, 1, 0}, {NULL} }; + static struct arraycache_init initarray_generic = +- { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} }; ++ { {0, BOOT_CPUCACHE_ENTRIES, 1, 0}, {NULL} }; + + /* internal cache of cache description objs */ + static struct kmem_cache cache_cache = { +@@ -2997,7 +2997,7 @@ retry: + * there must be at least one object available for + * allocation. + */ +- BUG_ON(slabp->inuse < 0 || slabp->inuse >= cachep->num); ++ BUG_ON(slabp->inuse >= cachep->num); + + while (slabp->inuse < cachep->num && batchcount--) { + STATS_INC_ALLOCED(cachep); +@@ -4491,10 +4491,12 @@ static const struct file_operations proc + + static int __init slab_proc_init(void) + { ++#if !defined(CONFIG_GRKERNSEC_PROC_ADD) + proc_create("slabinfo",S_IWUSR|S_IRUGO,NULL,&proc_slabinfo_operations); + #ifdef CONFIG_DEBUG_SLAB_LEAK + proc_create("slab_allocators", 0, NULL, &proc_slabstats_operations); + #endif ++#endif + return 0; + } + module_init(slab_proc_init); +diff -urNp a/mm/slub.c b/mm/slub.c +--- a/mm/slub.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/mm/slub.c 2009-05-24 18:10:25.361959562 -0700 +@@ -2312,7 +2312,7 @@ static int kmem_cache_open(struct kmem_c + if (!calculate_sizes(s, -1)) + goto error; + +- s->refcount = 1; ++ atomic_set(&s->refcount, 1); + #ifdef CONFIG_NUMA + s->remote_node_defrag_ratio = 1000; + #endif +@@ -2449,8 +2449,7 @@ static inline int kmem_cache_close(struc + void kmem_cache_destroy(struct kmem_cache *s) + { + down_write(&slub_lock); +- s->refcount--; +- if (!s->refcount) { ++ if (atomic_dec_and_test(&s->refcount)) { + list_del(&s->list); + up_write(&slub_lock); + if (kmem_cache_close(s)) { +@@ -2959,7 +2958,7 @@ void __init kmem_cache_init(void) + */ + create_kmalloc_cache(&kmalloc_caches[0], "kmem_cache_node", + sizeof(struct kmem_cache_node), GFP_KERNEL); +- kmalloc_caches[0].refcount = -1; ++ atomic_set(&kmalloc_caches[0].refcount, -1); + caches++; + + hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI); +@@ -3049,7 +3048,7 @@ static int slab_unmergeable(struct kmem_ + /* + * We may have set a slab to be unmergeable during bootstrap. + */ +- if (s->refcount < 0) ++ if (atomic_read(&s->refcount) < 0) + return 1; + + return 0; +@@ -3106,7 +3105,7 @@ struct kmem_cache *kmem_cache_create(con + if (s) { + int cpu; + +- s->refcount++; ++ atomic_inc(&s->refcount); + /* + * Adjust the object sizes so that we clear + * the complete object on kzalloc. +@@ -3123,8 +3122,12 @@ struct kmem_cache *kmem_cache_create(con + s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); + up_write(&slub_lock); + +- if (sysfs_slab_alias(s, name)) ++ if (sysfs_slab_alias(s, name)) { ++ down_write(&slub_lock); ++ atomic_dec(&s->refcount); ++ up_write(&slub_lock); + goto err; ++ } + return s; + } + +@@ -3134,8 +3137,13 @@ struct kmem_cache *kmem_cache_create(con + size, align, flags, ctor)) { + list_add(&s->list, &slab_caches); + up_write(&slub_lock); +- if (sysfs_slab_add(s)) ++ if (sysfs_slab_add(s)) { ++ down_write(&slub_lock); ++ list_del(&s->list); ++ up_write(&slub_lock); ++ kfree(s); + goto err; ++ } + return s; + } + kfree(s); +@@ -3830,7 +3838,7 @@ SLAB_ATTR_RO(ctor); + + static ssize_t aliases_show(struct kmem_cache *s, char *buf) + { +- return sprintf(buf, "%d\n", s->refcount - 1); ++ return sprintf(buf, "%d\n", atomic_read(&s->refcount) - 1); + } + SLAB_ATTR_RO(aliases); + +@@ -4508,7 +4516,9 @@ static const struct file_operations proc + + static int __init slab_proc_init(void) + { ++#if !defined(CONFIG_GRKERNSEC_PROC_ADD) + proc_create("slabinfo",S_IWUSR|S_IRUGO,NULL,&proc_slabinfo_operations); ++#endif + return 0; + } + module_init(slab_proc_init); +diff -urNp a/mm/tiny-shmem.c b/mm/tiny-shmem.c +--- a/mm/tiny-shmem.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/mm/tiny-shmem.c 2009-05-24 18:10:25.362960877 -0700 +@@ -26,7 +26,7 @@ static struct file_system_type tmpfs_fs_ + .kill_sb = kill_litter_super, + }; + +-static struct vfsmount *shm_mnt; ++struct vfsmount *shm_mnt; + + static int __init init_tmpfs(void) + { +diff -urNp a/mm/util.c b/mm/util.c +--- a/mm/util.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/mm/util.c 2009-05-24 18:10:25.362960877 -0700 +@@ -167,6 +167,12 @@ EXPORT_SYMBOL(strndup_user); + void arch_pick_mmap_layout(struct mm_struct *mm) + { + mm->mmap_base = TASK_UNMAPPED_BASE; ++ ++#ifdef CONFIG_PAX_RANDMMAP ++ if (mm->pax_flags & MF_PAX_RANDMMAP) ++ mm->mmap_base += mm->delta_mmap; ++#endif ++ + mm->get_unmapped_area = arch_get_unmapped_area; + mm->unmap_area = arch_unmap_area; + } +diff -urNp a/mm/vmalloc.c b/mm/vmalloc.c +--- a/mm/vmalloc.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/mm/vmalloc.c 2009-05-24 18:10:25.363960446 -0700 +@@ -90,6 +90,11 @@ static int vmap_pte_range(pmd_t *pmd, un + unsigned long end, pgprot_t prot, struct page **pages, int *nr) + { + pte_t *pte; ++ int ret = -ENOMEM; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ unsigned long cr0; ++#endif + + /* + * nr is a running index into the array which helps higher level +@@ -99,17 +104,33 @@ static int vmap_pte_range(pmd_t *pmd, un + pte = pte_alloc_kernel(pmd, addr); + if (!pte) + return -ENOMEM; ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_open_kernel(cr0); ++#endif ++ + do { + struct page *page = pages[*nr]; + +- if (WARN_ON(!pte_none(*pte))) +- return -EBUSY; +- if (WARN_ON(!page)) +- return -ENOMEM; ++ if (WARN_ON(!pte_none(*pte))) { ++ ret = -EBUSY; ++ goto out; ++ } ++ if (WARN_ON(!page)) { ++ ret = -ENOMEM; ++ goto out; ++ } + set_pte_at(&init_mm, addr, pte, mk_pte(page, prot)); + (*nr)++; + } while (pte++, addr += PAGE_SIZE, addr != end); +- return 0; ++ ret = 0; ++out: ++ ++#ifdef CONFIG_PAX_KERNEXEC ++ pax_close_kernel(cr0); ++#endif ++ ++ return ret; + } + + static int vmap_pmd_range(pud_t *pud, unsigned long addr, +@@ -1033,6 +1054,16 @@ static struct vm_struct *__get_vm_area_n + unsigned long align = 1; + + BUG_ON(in_interrupt()); ++ ++#if defined(CONFIG_MODULES) && defined(CONFIG_X86_32) && defined(CONFIG_PAX_KERNEXEC) ++ if (flags & VM_KERNEXEC) { ++ if (start != VMALLOC_START || end != VMALLOC_END) ++ return NULL; ++ start = (unsigned long)MODULES_VADDR; ++ end = (unsigned long)MODULES_END; ++ } ++#endif ++ + if (flags & VM_IOREMAP) { + int bit = fls(size); + +@@ -1256,6 +1287,11 @@ void *vmap(struct page **pages, unsigned + if (count > num_physpages) + return NULL; + ++#if defined(CONFIG_MODULES) && defined(CONFIG_X86_32) && defined(CONFIG_PAX_KERNEXEC) ++ if (!(pgprot_val(prot) & _PAGE_NX)) ++ flags |= VM_KERNEXEC; ++#endif ++ + area = get_vm_area_caller((count << PAGE_SHIFT), flags, + __builtin_return_address(0)); + if (!area) +@@ -1352,6 +1388,13 @@ static void *__vmalloc_node(unsigned lon + if (!size || (size >> PAGE_SHIFT) > num_physpages) + return NULL; + ++#if defined(CONFIG_MODULES) && defined(CONFIG_X86_32) && defined(CONFIG_PAX_KERNEXEC) ++ if (!(pgprot_val(prot) & _PAGE_NX)) ++ area = __get_vm_area_node(size, VM_ALLOC | VM_KERNEXEC, VMALLOC_START, VMALLOC_END, ++ node, gfp_mask, caller); ++ else ++#endif ++ + area = __get_vm_area_node(size, VM_ALLOC, VMALLOC_START, VMALLOC_END, + node, gfp_mask, caller); + +@@ -1441,7 +1484,7 @@ EXPORT_SYMBOL(vmalloc_node); + + void *vmalloc_exec(unsigned long size) + { +- return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC); ++ return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL_EXEC); + } + + #if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32) +diff -urNp a/net/atm/atm_misc.c b/net/atm/atm_misc.c +--- a/net/atm/atm_misc.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/net/atm/atm_misc.c 2009-05-24 18:10:25.364960435 -0700 +@@ -19,7 +19,7 @@ int atm_charge(struct atm_vcc *vcc,int t + if (atomic_read(&sk_atm(vcc)->sk_rmem_alloc) <= sk_atm(vcc)->sk_rcvbuf) + return 1; + atm_return(vcc,truesize); +- atomic_inc(&vcc->stats->rx_drop); ++ atomic_inc_unchecked(&vcc->stats->rx_drop); + return 0; + } + +@@ -41,7 +41,7 @@ struct sk_buff *atm_alloc_charge(struct + } + } + atm_return(vcc,guess); +- atomic_inc(&vcc->stats->rx_drop); ++ atomic_inc_unchecked(&vcc->stats->rx_drop); + return NULL; + } + +diff -urNp a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c +--- a/net/bridge/br_stp_if.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/net/bridge/br_stp_if.c 2009-05-24 18:10:25.364960435 -0700 +@@ -146,7 +146,7 @@ static void br_stp_stop(struct net_bridg + char *envp[] = { NULL }; + + if (br->stp_enabled == BR_USER_STP) { +- r = call_usermodehelper(BR_STP_PROG, argv, envp, 1); ++ r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC); + printk(KERN_INFO "%s: userspace STP stopped, return code %d\n", + br->dev->name, r); + +diff -urNp a/net/core/flow.c b/net/core/flow.c +--- a/net/core/flow.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/net/core/flow.c 2009-05-24 18:10:25.366210787 -0700 +@@ -39,7 +39,7 @@ atomic_t flow_cache_genid = ATOMIC_INIT( + + static u32 flow_hash_shift; + #define flow_hash_size (1 << flow_hash_shift) +-static DEFINE_PER_CPU(struct flow_cache_entry **, flow_tables) = { NULL }; ++static DEFINE_PER_CPU(struct flow_cache_entry **, flow_tables); + + #define flow_table(cpu) (per_cpu(flow_tables, cpu)) + +@@ -52,7 +52,7 @@ struct flow_percpu_info { + u32 hash_rnd; + int count; + }; +-static DEFINE_PER_CPU(struct flow_percpu_info, flow_hash_info) = { 0 }; ++static DEFINE_PER_CPU(struct flow_percpu_info, flow_hash_info); + + #define flow_hash_rnd_recalc(cpu) \ + (per_cpu(flow_hash_info, cpu).hash_rnd_recalc) +@@ -69,7 +69,7 @@ struct flow_flush_info { + atomic_t cpuleft; + struct completion completion; + }; +-static DEFINE_PER_CPU(struct tasklet_struct, flow_flush_tasklets) = { NULL }; ++static DEFINE_PER_CPU(struct tasklet_struct, flow_flush_tasklets); + + #define flow_flush_tasklet(cpu) (&per_cpu(flow_flush_tasklets, cpu)) + +diff -urNp a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c +--- a/net/dccp/ccids/ccid3.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/net/dccp/ccids/ccid3.c 2009-05-24 18:10:25.366210787 -0700 +@@ -43,7 +43,7 @@ + static int ccid3_debug; + #define ccid3_pr_debug(format, a...) DCCP_PR_DEBUG(ccid3_debug, format, ##a) + #else +-#define ccid3_pr_debug(format, a...) ++#define ccid3_pr_debug(format, a...) do {} while (0) + #endif + + /* +diff -urNp a/net/dccp/dccp.h b/net/dccp/dccp.h +--- a/net/dccp/dccp.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/net/dccp/dccp.h 2009-05-24 18:10:25.367169153 -0700 +@@ -43,8 +43,8 @@ extern int dccp_debug; + #define dccp_pr_debug(format, a...) DCCP_PR_DEBUG(dccp_debug, format, ##a) + #define dccp_pr_debug_cat(format, a...) DCCP_PRINTK(dccp_debug, format, ##a) + #else +-#define dccp_pr_debug(format, a...) +-#define dccp_pr_debug_cat(format, a...) ++#define dccp_pr_debug(format, a...) do {} while (0) ++#define dccp_pr_debug_cat(format, a...) do {} while (0) + #endif + + extern struct inet_hashinfo dccp_hashinfo; +diff -urNp a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c +--- a/net/ipv4/inet_connection_sock.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/net/ipv4/inet_connection_sock.c 2009-05-24 18:10:25.367169153 -0700 +@@ -15,6 +15,7 @@ + + #include <linux/module.h> + #include <linux/jhash.h> ++#include <linux/security.h> + + #include <net/inet_connection_sock.h> + #include <net/inet_hashtables.h> +diff -urNp a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c +--- a/net/ipv4/inet_hashtables.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/net/ipv4/inet_hashtables.c 2009-05-24 18:10:25.368175985 -0700 +@@ -18,11 +18,14 @@ + #include <linux/sched.h> + #include <linux/slab.h> + #include <linux/wait.h> ++#include <linux/security.h> + + #include <net/inet_connection_sock.h> + #include <net/inet_hashtables.h> + #include <net/ip.h> + ++extern void gr_update_task_in_ip_table(struct task_struct *task, const struct inet_sock *inet); ++ + /* + * Allocate and initialize a new local port bind bucket. + * The bindhash mutex for snum's hash chain must be held here. +@@ -487,6 +490,8 @@ ok: + } + spin_unlock(&head->lock); + ++ gr_update_task_in_ip_table(current, inet_sk(sk)); ++ + if (tw) { + inet_twsk_deschedule(tw, death_row); + inet_twsk_put(tw); +diff -urNp a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig +--- a/net/ipv4/netfilter/Kconfig 2009-05-02 11:54:43.000000000 -0700 ++++ b/net/ipv4/netfilter/Kconfig 2009-05-24 18:10:25.368175985 -0700 +@@ -101,6 +101,21 @@ config IP_NF_MATCH_TTL + + To compile it as a module, choose M here. If unsure, say N. + ++config IP_NF_MATCH_STEALTH ++ tristate "stealth match support" ++ depends on IP_NF_IPTABLES ++ help ++ Enabling this option will drop all syn packets coming to unserved tcp ++ ports as well as all packets coming to unserved udp ports. If you ++ are using your system to route any type of packets (ie. via NAT) ++ you should put this module at the end of your ruleset, since it will ++ drop packets that aren't going to ports that are listening on your ++ machine itself, it doesn't take into account that the packet might be ++ destined for someone on your internal network if you're using NAT for ++ instance. ++ ++ To compile it as a module, choose M here. If unsure, say N. ++ + # `filter', generic and specific targets + config IP_NF_FILTER + tristate "Packet filtering" +diff -urNp a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile +--- a/net/ipv4/netfilter/Makefile 2009-05-02 11:54:43.000000000 -0700 ++++ b/net/ipv4/netfilter/Makefile 2009-05-24 18:10:25.369210753 -0700 +@@ -61,6 +61,7 @@ obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += + obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o + obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o + obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o ++obj-$(CONFIG_IP_NF_MATCH_STEALTH) += ipt_stealth.o + obj-$(CONFIG_IP_NF_TARGET_TTL) += ipt_TTL.o + obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o + +diff -urNp a/net/ipv4/netfilter/ipt_stealth.c b/net/ipv4/netfilter/ipt_stealth.c +--- a/net/ipv4/netfilter/ipt_stealth.c 1969-12-31 16:00:00.000000000 -0800 ++++ b/net/ipv4/netfilter/ipt_stealth.c 2009-05-24 18:10:25.369210753 -0700 +@@ -0,0 +1,114 @@ ++/* Kernel module to add stealth support. ++ * ++ * Copyright (C) 2002-2006 Brad Spengler <spender@grsecurity.net> ++ * ++ */ ++ ++#include <linux/kernel.h> ++#include <linux/module.h> ++#include <linux/skbuff.h> ++#include <linux/net.h> ++#include <linux/sched.h> ++#include <linux/inet.h> ++#include <linux/stddef.h> ++ ++#include <net/ip.h> ++#include <net/sock.h> ++#include <net/tcp.h> ++#include <net/udp.h> ++#include <net/route.h> ++#include <net/inet_common.h> ++ ++#include <linux/netfilter_ipv4/ip_tables.h> ++ ++MODULE_LICENSE("GPL"); ++ ++extern struct sock *udp_v4_lookup(struct net *net, u32 saddr, u16 sport, u32 daddr, u16 dport, int dif); ++ ++static bool ++match(const struct sk_buff *skb, ++ const struct net_device *in, ++ const struct net_device *out, ++ const struct xt_match *match, ++ const void *matchinfo, ++ int offset, ++ unsigned int protoff, ++ bool *hotdrop) ++{ ++ struct iphdr *ip = ip_hdr(skb); ++ struct tcphdr th; ++ struct udphdr uh; ++ struct sock *sk = NULL; ++ ++ if (!ip || offset) return false; ++ ++ switch(ip->protocol) { ++ case IPPROTO_TCP: ++ if (skb_copy_bits(skb, (ip_hdr(skb))->ihl*4, &th, sizeof(th)) < 0) { ++ *hotdrop = true; ++ return false; ++ } ++ if (!(th.syn && !th.ack)) return false; ++ sk = inet_lookup_listener(dev_net(skb->dev), &tcp_hashinfo, ip->daddr, th.dest, inet_iif(skb)); ++ break; ++ case IPPROTO_UDP: ++ if (skb_copy_bits(skb, (ip_hdr(skb))->ihl*4, &uh, sizeof(uh)) < 0) { ++ *hotdrop = true; ++ return false; ++ } ++ sk = udp_v4_lookup(dev_net(skb->dev), ip->saddr, uh.source, ip->daddr, uh.dest, skb->dev->ifindex); ++ break; ++ default: ++ return false; ++ } ++ ++ if(!sk) // port is being listened on, match this ++ return true; ++ else { ++ sock_put(sk); ++ return false; ++ } ++} ++ ++/* Called when user tries to insert an entry of this type. */ ++static bool ++checkentry(const char *tablename, ++ const void *nip, ++ const struct xt_match *match, ++ void *matchinfo, ++ unsigned int hook_mask) ++{ ++ const struct ipt_ip *ip = (const struct ipt_ip *)nip; ++ ++ if(((ip->proto == IPPROTO_TCP && !(ip->invflags & IPT_INV_PROTO)) || ++ ((ip->proto == IPPROTO_UDP) && !(ip->invflags & IPT_INV_PROTO))) ++ && (hook_mask & (1 << NF_INET_LOCAL_IN))) ++ return true; ++ ++ printk("stealth: Only works on TCP and UDP for the INPUT chain.\n"); ++ ++ return false; ++} ++ ++ ++static struct xt_match stealth_match __read_mostly = { ++ .name = "stealth", ++ .family = AF_INET, ++ .match = match, ++ .checkentry = checkentry, ++ .destroy = NULL, ++ .me = THIS_MODULE ++}; ++ ++static int __init init(void) ++{ ++ return xt_register_match(&stealth_match); ++} ++ ++static void __exit fini(void) ++{ ++ xt_unregister_match(&stealth_match); ++} ++ ++module_init(init); ++module_exit(fini); +diff -urNp a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c +--- a/net/ipv4/tcp_ipv4.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/net/ipv4/tcp_ipv4.c 2009-05-24 18:10:25.369983074 -0700 +@@ -55,6 +55,7 @@ + #include <linux/fcntl.h> + #include <linux/module.h> + #include <linux/random.h> ++#include <linux/security.h> + #include <linux/cache.h> + #include <linux/jhash.h> + #include <linux/init.h> +diff -urNp a/net/ipv4/udp.c b/net/ipv4/udp.c +--- a/net/ipv4/udp.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/net/ipv4/udp.c 2009-05-24 18:10:25.372210718 -0700 +@@ -84,6 +84,7 @@ + #include <linux/types.h> + #include <linux/fcntl.h> + #include <linux/module.h> ++#include <linux/security.h> + #include <linux/socket.h> + #include <linux/sockios.h> + #include <linux/igmp.h> +@@ -104,6 +105,9 @@ + #include <net/xfrm.h> + #include "udp_impl.h" + ++extern int gr_search_udp_recvmsg(struct sock *sk, const struct sk_buff *skb); ++extern int gr_search_udp_sendmsg(struct sock *sk, struct sockaddr_in *addr); ++ + /* + * Snmp MIB for the UDP layer + */ +@@ -284,6 +288,13 @@ struct sock *udp4_lib_lookup(struct net + } + EXPORT_SYMBOL_GPL(udp4_lib_lookup); + ++struct sock *udp_v4_lookup(struct net *net, __be32 saddr, __be16 sport, ++ __be32 daddr, __be16 dport, int dif) ++{ ++ return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, udp_hash); ++} ++ ++ + static inline struct sock *udp_v4_mcast_next(struct net *net, struct sock *sk, + __be16 loc_port, __be32 loc_addr, + __be16 rmt_port, __be32 rmt_addr, +@@ -574,9 +585,18 @@ int udp_sendmsg(struct kiocb *iocb, stru + dport = usin->sin_port; + if (dport == 0) + return -EINVAL; ++ ++ err = gr_search_udp_sendmsg(sk, usin); ++ if (err) ++ return err; + } else { + if (sk->sk_state != TCP_ESTABLISHED) + return -EDESTADDRREQ; ++ ++ err = gr_search_udp_sendmsg(sk, NULL); ++ if (err) ++ return err; ++ + daddr = inet->daddr; + dport = inet->dport; + /* Open fast path for connected socket. +@@ -842,6 +862,10 @@ try_again: + if (!skb) + goto out; + ++ err = gr_search_udp_recvmsg(sk, skb); ++ if (err) ++ goto out_free; ++ + ulen = skb->len - sizeof(struct udphdr); + copied = len; + if (copied > ulen) +diff -urNp a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c +--- a/net/ipv6/exthdrs.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/net/ipv6/exthdrs.c 2009-05-24 18:10:25.375368304 -0700 +@@ -630,7 +630,7 @@ static struct tlvtype_proc tlvprochopopt + .type = IPV6_TLV_JUMBO, + .func = ipv6_hop_jumbo, + }, +- { -1, } ++ { -1, NULL } + }; + + int ipv6_parse_hopopts(struct sk_buff *skb) +diff -urNp a/net/ipv6/raw.c b/net/ipv6/raw.c +--- a/net/ipv6/raw.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/net/ipv6/raw.c 2009-05-24 18:10:25.375368304 -0700 +@@ -600,7 +600,7 @@ out: + return err; + } + +-static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, ++static int rawv6_send_hdrinc(struct sock *sk, void *from, unsigned int length, + struct flowi *fl, struct rt6_info *rt, + unsigned int flags) + { +diff -urNp a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c +--- a/net/irda/ircomm/ircomm_tty.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/net/irda/ircomm/ircomm_tty.c 2009-05-24 18:10:25.376209833 -0700 +@@ -371,7 +371,7 @@ static int ircomm_tty_open(struct tty_st + IRDA_DEBUG(2, "%s()\n", __func__ ); + + line = tty->index; +- if ((line < 0) || (line >= IRCOMM_TTY_PORTS)) { ++ if (line >= IRCOMM_TTY_PORTS) { + return -ENODEV; + } + +diff -urNp a/net/sctp/socket.c b/net/sctp/socket.c +--- a/net/sctp/socket.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/net/sctp/socket.c 2009-05-24 18:10:25.379209659 -0700 +@@ -1434,7 +1434,7 @@ SCTP_STATIC int sctp_sendmsg(struct kioc + struct sctp_sndrcvinfo *sinfo; + struct sctp_initmsg *sinit; + sctp_assoc_t associd = 0; +- sctp_cmsgs_t cmsgs = { NULL }; ++ sctp_cmsgs_t cmsgs = { NULL, NULL }; + int err; + sctp_scope_t scope; + long timeo; +@@ -5616,7 +5616,6 @@ pp_found: + */ + int reuse = sk->sk_reuse; + struct sock *sk2; +- struct hlist_node *node; + + SCTP_DEBUG_PRINTK("sctp_get_port() found a possible match\n"); + if (pp->fastreuse && sk->sk_reuse && +diff -urNp a/net/socket.c b/net/socket.c +--- a/net/socket.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/net/socket.c 2009-05-24 18:10:25.381210963 -0700 +@@ -87,6 +87,7 @@ + #include <linux/audit.h> + #include <linux/wireless.h> + #include <linux/nsproxy.h> ++#include <linux/in.h> + + #include <asm/uaccess.h> + #include <asm/unistd.h> +@@ -97,6 +98,21 @@ + #include <net/sock.h> + #include <linux/netfilter.h> + ++extern void gr_attach_curr_ip(const struct sock *sk); ++extern int gr_handle_sock_all(const int family, const int type, ++ const int protocol); ++extern int gr_handle_sock_server(const struct sockaddr *sck); ++extern int gr_handle_sock_server_other(const struct socket *sck); ++extern int gr_handle_sock_client(const struct sockaddr *sck); ++extern int gr_search_connect(struct socket * sock, ++ struct sockaddr_in * addr); ++extern int gr_search_bind(struct socket * sock, ++ struct sockaddr_in * addr); ++extern int gr_search_listen(struct socket * sock); ++extern int gr_search_accept(struct socket * sock); ++extern int gr_search_socket(const int domain, const int type, ++ const int protocol); ++ + static int sock_no_open(struct inode *irrelevant, struct file *dontcare); + static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos); +@@ -300,7 +316,7 @@ static int sockfs_get_sb(struct file_sys + mnt); + } + +-static struct vfsmount *sock_mnt __read_mostly; ++struct vfsmount *sock_mnt __read_mostly; + + static struct file_system_type sock_fs_type = { + .name = "sockfs", +@@ -1235,6 +1251,16 @@ SYSCALL_DEFINE3(socket, int, family, int + if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) + flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; + ++ if(!gr_search_socket(family, type, protocol)) { ++ retval = -EACCES; ++ goto out; ++ } ++ ++ if (gr_handle_sock_all(family, type, protocol)) { ++ retval = -EACCES; ++ goto out; ++ } ++ + retval = sock_create(family, type, protocol, &sock); + if (retval < 0) + goto out; +@@ -1374,6 +1400,14 @@ SYSCALL_DEFINE3(bind, int, fd, struct so + if (sock) { + err = move_addr_to_kernel(umyaddr, addrlen, (struct sockaddr *)&address); + if (err >= 0) { ++ if (gr_handle_sock_server((struct sockaddr *)&address)) { ++ err = -EACCES; ++ goto error; ++ } ++ err = gr_search_bind(sock, (struct sockaddr_in *)&address); ++ if (err) ++ goto error; ++ + err = security_socket_bind(sock, + (struct sockaddr *)&address, + addrlen); +@@ -1382,6 +1416,7 @@ SYSCALL_DEFINE3(bind, int, fd, struct so + (struct sockaddr *) + &address, addrlen); + } ++error: + fput_light(sock->file, fput_needed); + } + return err; +@@ -1405,10 +1440,20 @@ SYSCALL_DEFINE2(listen, int, fd, int, ba + if ((unsigned)backlog > somaxconn) + backlog = somaxconn; + ++ if (gr_handle_sock_server_other(sock)) { ++ err = -EPERM; ++ goto error; ++ } ++ ++ err = gr_search_listen(sock); ++ if (err) ++ goto error; ++ + err = security_socket_listen(sock, backlog); + if (!err) + err = sock->ops->listen(sock, backlog); + ++error: + fput_light(sock->file, fput_needed); + } + return err; +@@ -1451,6 +1496,18 @@ SYSCALL_DEFINE4(accept4, int, fd, struct + newsock->type = sock->type; + newsock->ops = sock->ops; + ++ if (gr_handle_sock_server_other(sock)) { ++ err = -EPERM; ++ sock_release(newsock); ++ goto out_put; ++ } ++ ++ err = gr_search_accept(sock); ++ if (err) { ++ sock_release(newsock); ++ goto out_put; ++ } ++ + /* + * We don't need try_module_get here, as the listening socket (sock) + * has the protocol module (sock->ops->owner) held. +@@ -1494,6 +1551,7 @@ SYSCALL_DEFINE4(accept4, int, fd, struct + err = newfd; + + security_socket_post_accept(sock, newsock); ++ gr_attach_curr_ip(newsock->sk); + + out_put: + fput_light(sock->file, fput_needed); +@@ -1532,6 +1590,7 @@ SYSCALL_DEFINE3(connect, int, fd, struct + int, addrlen) + { + struct socket *sock; ++ struct sockaddr *sck; + struct sockaddr_storage address; + int err, fput_needed; + +@@ -1542,6 +1601,17 @@ SYSCALL_DEFINE3(connect, int, fd, struct + if (err < 0) + goto out_put; + ++ sck = (struct sockaddr *)&address; ++ ++ if (gr_handle_sock_client(sck)) { ++ err = -EACCES; ++ goto out_put; ++ } ++ ++ err = gr_search_connect(sock, (struct sockaddr_in *)sck); ++ if (err) ++ goto out_put; ++ + err = + security_socket_connect(sock, (struct sockaddr *)&address, addrlen); + if (err) +diff -urNp a/net/unix/af_unix.c b/net/unix/af_unix.c +--- a/net/unix/af_unix.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/net/unix/af_unix.c 2009-05-24 18:10:25.383209683 -0700 +@@ -727,6 +727,12 @@ static struct sock *unix_find_other(stru + err = -ECONNREFUSED; + if (!S_ISSOCK(inode->i_mode)) + goto put_fail; ++ ++ if (!gr_acl_handle_unix(path.dentry, path.mnt)) { ++ err = -EACCES; ++ goto put_fail; ++ } ++ + u = unix_find_socket_byinode(net, inode); + if (!u) + goto put_fail; +@@ -747,6 +753,13 @@ static struct sock *unix_find_other(stru + if (u) { + struct dentry *dentry; + dentry = unix_sk(u)->dentry; ++ ++ if (!gr_handle_chroot_unix(u->sk_peercred.pid)) { ++ err = -EPERM; ++ sock_put(u); ++ goto fail; ++ } ++ + if (dentry) + touch_atime(unix_sk(u)->mnt, dentry); + } else +@@ -829,10 +842,20 @@ static int unix_bind(struct socket *sock + err = mnt_want_write(nd.path.mnt); + if (err) + goto out_mknod_dput; ++ ++ if (!gr_acl_handle_mknod(dentry, nd.path.dentry, nd.path.mnt, mode)) { ++ err = -EACCES; ++ mnt_drop_write(nd.path.mnt); ++ goto out_mknod_dput; ++ } ++ + err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0); + mnt_drop_write(nd.path.mnt); + if (err) + goto out_mknod_dput; ++ ++ gr_handle_create(dentry, nd.path.mnt); ++ + mutex_unlock(&nd.path.dentry->d_inode->i_mutex); + dput(nd.path.dentry); + nd.path.dentry = dentry; +@@ -850,6 +873,10 @@ static int unix_bind(struct socket *sock + goto out_unlock; + } + ++#ifdef CONFIG_GRKERNSEC_CHROOT_UNIX ++ sk->sk_peercred.pid = current->pid; ++#endif ++ + list = &unix_socket_table[addr->hash]; + } else { + list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)]; +diff -urNp a/scripts/mod/modpost.c b/scripts/mod/modpost.c +--- a/scripts/mod/modpost.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/scripts/mod/modpost.c 2009-05-24 18:10:25.384209532 -0700 +@@ -830,6 +830,7 @@ enum mismatch { + INIT_TO_EXIT, + EXIT_TO_INIT, + EXPORT_TO_INIT_EXIT, ++ DATA_TO_TEXT + }; + + struct sectioncheck { +@@ -891,6 +892,12 @@ const struct sectioncheck sectioncheck[] + .fromsec = { "__ksymtab*", NULL }, + .tosec = { INIT_SECTIONS, EXIT_SECTIONS, NULL }, + .mismatch = EXPORT_TO_INIT_EXIT ++}, ++/* Do not reference code from writable data */ ++{ ++ .fromsec = { DATA_SECTIONS, NULL }, ++ .tosec = { TEXT_SECTIONS, NULL }, ++ .mismatch = DATA_TO_TEXT + } + }; + +@@ -1249,6 +1256,14 @@ static void report_sec_mismatch(const ch + "Fix this by removing the %sannotation of %s " + "or drop the export.\n", + tosym, sec2annotation(tosec), sec2annotation(tosec), tosym); ++ case DATA_TO_TEXT: ++/* ++ fprintf(stderr, ++ "The variable %s references\n" ++ "the %s %s%s%s\n" ++ fromsym, to, sec2annotation(tosec), tosym, to_p); ++*/ ++ break; + case NO_MISMATCH: + /* To get warnings on missing members */ + break; +diff -urNp a/scripts/pnmtologo.c b/scripts/pnmtologo.c +--- a/scripts/pnmtologo.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/scripts/pnmtologo.c 2009-05-24 18:10:25.385209660 -0700 +@@ -237,14 +237,14 @@ static void write_header(void) + fprintf(out, " * Linux logo %s\n", logoname); + fputs(" */\n\n", out); + fputs("#include <linux/linux_logo.h>\n\n", out); +- fprintf(out, "static unsigned char %s_data[] __initdata = {\n", ++ fprintf(out, "static unsigned char %s_data[] = {\n", + logoname); + } + + static void write_footer(void) + { + fputs("\n};\n\n", out); +- fprintf(out, "struct linux_logo %s __initdata = {\n", logoname); ++ fprintf(out, "struct linux_logo %s = {\n", logoname); + fprintf(out, " .type\t= %s,\n", logo_types[logo_type]); + fprintf(out, " .width\t= %d,\n", logo_width); + fprintf(out, " .height\t= %d,\n", logo_height); +@@ -374,7 +374,7 @@ static void write_logo_clut224(void) + fputs("\n};\n\n", out); + + /* write logo clut */ +- fprintf(out, "static unsigned char %s_clut[] __initdata = {\n", ++ fprintf(out, "static unsigned char %s_clut[] = {\n", + logoname); + write_hex_cnt = 0; + for (i = 0; i < logo_clutsize; i++) { +diff -urNp a/security/Kconfig b/security/Kconfig +--- a/security/Kconfig 2009-05-02 11:54:43.000000000 -0700 ++++ b/security/Kconfig 2009-05-24 18:10:25.386209788 -0700 +@@ -4,6 +4,447 @@ + + menu "Security options" + ++source grsecurity/Kconfig ++ ++menu "PaX" ++ ++config PAX ++ bool "Enable various PaX features" ++ depends on GRKERNSEC && (ALPHA || ARM || AVR32 || IA64 || MIPS32 || MIPS64 || PARISC || PPC32 || PPC64 || SPARC32 || SPARC64 || X86) ++ help ++ This allows you to enable various PaX features. PaX adds ++ intrusion prevention mechanisms to the kernel that reduce ++ the risks posed by exploitable memory corruption bugs. ++ ++menu "PaX Control" ++ depends on PAX ++ ++config PAX_SOFTMODE ++ bool 'Support soft mode' ++ help ++ Enabling this option will allow you to run PaX in soft mode, that ++ is, PaX features will not be enforced by default, only on executables ++ marked explicitly. You must also enable PT_PAX_FLAGS support as it ++ is the only way to mark executables for soft mode use. ++ ++ Soft mode can be activated by using the "pax_softmode=1" kernel command ++ line option on boot. Furthermore you can control various PaX features ++ at runtime via the entries in /proc/sys/kernel/pax. ++ ++config PAX_EI_PAX ++ bool 'Use legacy ELF header marking' ++ help ++ Enabling this option will allow you to control PaX features on ++ a per executable basis via the 'chpax' utility available at ++ http://pax.grsecurity.net/. The control flags will be read from ++ an otherwise reserved part of the ELF header. This marking has ++ numerous drawbacks (no support for soft-mode, toolchain does not ++ know about the non-standard use of the ELF header) therefore it ++ has been deprecated in favour of PT_PAX_FLAGS support. ++ ++ If you have applications not marked by the PT_PAX_FLAGS ELF ++ program header then you MUST enable this option otherwise they ++ will not get any protection. ++ ++ Note that if you enable PT_PAX_FLAGS marking support as well, ++ the PT_PAX_FLAG marks will override the legacy EI_PAX marks. ++ ++config PAX_PT_PAX_FLAGS ++ bool 'Use ELF program header marking' ++ help ++ Enabling this option will allow you to control PaX features on ++ a per executable basis via the 'paxctl' utility available at ++ http://pax.grsecurity.net/. The control flags will be read from ++ a PaX specific ELF program header (PT_PAX_FLAGS). This marking ++ has the benefits of supporting both soft mode and being fully ++ integrated into the toolchain (the binutils patch is available ++ from http://pax.grsecurity.net). ++ ++ If you have applications not marked by the PT_PAX_FLAGS ELF ++ program header then you MUST enable the EI_PAX marking support ++ otherwise they will not get any protection. ++ ++ Note that if you enable the legacy EI_PAX marking support as well, ++ the EI_PAX marks will be overridden by the PT_PAX_FLAGS marks. ++ ++choice ++ prompt 'MAC system integration' ++ default PAX_HAVE_ACL_FLAGS ++ help ++ Mandatory Access Control systems have the option of controlling ++ PaX flags on a per executable basis, choose the method supported ++ by your particular system. ++ ++ - "none": if your MAC system does not interact with PaX, ++ - "direct": if your MAC system defines pax_set_initial_flags() itself, ++ - "hook": if your MAC system uses the pax_set_initial_flags_func callback. ++ ++ NOTE: this option is for developers/integrators only. ++ ++ config PAX_NO_ACL_FLAGS ++ bool 'none' ++ ++ config PAX_HAVE_ACL_FLAGS ++ bool 'direct' ++ ++ config PAX_HOOK_ACL_FLAGS ++ bool 'hook' ++endchoice ++ ++endmenu ++ ++menu "Non-executable pages" ++ depends on PAX ++ ++config PAX_NOEXEC ++ bool "Enforce non-executable pages" ++ depends on (PAX_EI_PAX || PAX_PT_PAX_FLAGS || PAX_HAVE_ACL_FLAGS || PAX_HOOK_ACL_FLAGS) && (ALPHA || IA64 || MIPS32 || MIPS64 || PARISC || PPC32 || PPC64 || SPARC32 || SPARC64 || X86) ++ help ++ By design some architectures do not allow for protecting memory ++ pages against execution or even if they do, Linux does not make ++ use of this feature. In practice this means that if a page is ++ readable (such as the stack or heap) it is also executable. ++ ++ There is a well known exploit technique that makes use of this ++ fact and a common programming mistake where an attacker can ++ introduce code of his choice somewhere in the attacked program's ++ memory (typically the stack or the heap) and then execute it. ++ ++ If the attacked program was running with different (typically ++ higher) privileges than that of the attacker, then he can elevate ++ his own privilege level (e.g. get a root shell, write to files for ++ which he does not have write access to, etc). ++ ++ Enabling this option will let you choose from various features ++ that prevent the injection and execution of 'foreign' code in ++ a program. ++ ++ This will also break programs that rely on the old behaviour and ++ expect that dynamically allocated memory via the malloc() family ++ of functions is executable (which it is not). Notable examples ++ are the XFree86 4.x server, the java runtime and wine. ++ ++config PAX_PAGEEXEC ++ bool "Paging based non-executable pages" ++ depends on !COMPAT_VDSO && PAX_NOEXEC && (!X86_32 || M586 || M586TSC || M586MMX || M686 || MPENTIUMII || MPENTIUMIII || MPENTIUMM || MCORE2 || MPENTIUM4 || MPSC || MK7 || MK8 || MWINCHIPC6 || MWINCHIP2 || MWINCHIP3D || MVIAC3_2 || MVIAC7) ++ help ++ This implementation is based on the paging feature of the CPU. ++ On i386 without hardware non-executable bit support there is a ++ variable but usually low performance impact, however on Intel's ++ P4 core based CPUs it is very high so you should not enable this ++ for kernels meant to be used on such CPUs. ++ ++ On alpha, avr32, ia64, parisc, sparc, sparc64, x86_64 and i386 ++ with hardware non-executable bit support there is no performance ++ impact, on ppc the impact is negligible. ++ ++ Note that several architectures require various emulations due to ++ badly designed userland ABIs, this will cause a performance impact ++ but will disappear as soon as userland is fixed (e.g., ppc users ++ can make use of the secure-plt feature found in binutils). ++ ++config PAX_SEGMEXEC ++ bool "Segmentation based non-executable pages" ++ depends on !COMPAT_VDSO && PAX_NOEXEC && X86_32 ++ help ++ This implementation is based on the segmentation feature of the ++ CPU and has a very small performance impact, however applications ++ will be limited to a 1.5 GB address space instead of the normal ++ 3 GB. ++ ++config PAX_EMUTRAMP ++ bool "Emulate trampolines" if (PAX_PAGEEXEC || PAX_SEGMEXEC) && (PARISC || PPC32 || X86) ++ default y if PARISC || PPC32 ++ help ++ There are some programs and libraries that for one reason or ++ another attempt to execute special small code snippets from ++ non-executable memory pages. Most notable examples are the ++ signal handler return code generated by the kernel itself and ++ the GCC trampolines. ++ ++ If you enabled CONFIG_PAX_PAGEEXEC or CONFIG_PAX_SEGMEXEC then ++ such programs will no longer work under your kernel. ++ ++ As a remedy you can say Y here and use the 'chpax' or 'paxctl' ++ utilities to enable trampoline emulation for the affected programs ++ yet still have the protection provided by the non-executable pages. ++ ++ On parisc and ppc you MUST enable this option and EMUSIGRT as ++ well, otherwise your system will not even boot. ++ ++ Alternatively you can say N here and use the 'chpax' or 'paxctl' ++ utilities to disable CONFIG_PAX_PAGEEXEC and CONFIG_PAX_SEGMEXEC ++ for the affected files. ++ ++ NOTE: enabling this feature *may* open up a loophole in the ++ protection provided by non-executable pages that an attacker ++ could abuse. Therefore the best solution is to not have any ++ files on your system that would require this option. This can ++ be achieved by not using libc5 (which relies on the kernel ++ signal handler return code) and not using or rewriting programs ++ that make use of the nested function implementation of GCC. ++ Skilled users can just fix GCC itself so that it implements ++ nested function calls in a way that does not interfere with PaX. ++ ++config PAX_EMUSIGRT ++ bool "Automatically emulate sigreturn trampolines" ++ depends on PAX_EMUTRAMP && (PARISC || PPC32) ++ default y ++ help ++ Enabling this option will have the kernel automatically detect ++ and emulate signal return trampolines executing on the stack ++ that would otherwise lead to task termination. ++ ++ This solution is intended as a temporary one for users with ++ legacy versions of libc (libc5, glibc 2.0, uClibc before 0.9.17, ++ Modula-3 runtime, etc) or executables linked to such, basically ++ everything that does not specify its own SA_RESTORER function in ++ normal executable memory like glibc 2.1+ does. ++ ++ On parisc and ppc you MUST enable this option, otherwise your ++ system will not even boot. ++ ++ NOTE: this feature cannot be disabled on a per executable basis ++ and since it *does* open up a loophole in the protection provided ++ by non-executable pages, the best solution is to not have any ++ files on your system that would require this option. ++ ++config PAX_MPROTECT ++ bool "Restrict mprotect()" ++ depends on (PAX_PAGEEXEC || PAX_SEGMEXEC) && !PPC64 ++ help ++ Enabling this option will prevent programs from ++ - changing the executable status of memory pages that were ++ not originally created as executable, ++ - making read-only executable pages writable again, ++ - creating executable pages from anonymous memory. ++ ++ You should say Y here to complete the protection provided by ++ the enforcement of non-executable pages. ++ ++ NOTE: you can use the 'chpax' or 'paxctl' utilities to control ++ this feature on a per file basis. ++ ++config PAX_NOELFRELOCS ++ bool "Disallow ELF text relocations" ++ depends on PAX_MPROTECT && !PAX_ETEXECRELOCS && (IA64 || X86) ++ help ++ Non-executable pages and mprotect() restrictions are effective ++ in preventing the introduction of new executable code into an ++ attacked task's address space. There remain only two venues ++ for this kind of attack: if the attacker can execute already ++ existing code in the attacked task then he can either have it ++ create and mmap() a file containing his code or have it mmap() ++ an already existing ELF library that does not have position ++ independent code in it and use mprotect() on it to make it ++ writable and copy his code there. While protecting against ++ the former approach is beyond PaX, the latter can be prevented ++ by having only PIC ELF libraries on one's system (which do not ++ need to relocate their code). If you are sure this is your case, ++ then enable this option otherwise be careful as you may not even ++ be able to boot or log on your system (for example, some PAM ++ modules are erroneously compiled as non-PIC by default). ++ ++ NOTE: if you are using dynamic ELF executables (as suggested ++ when using ASLR) then you must have made sure that you linked ++ your files using the PIC version of crt1 (the et_dyn.tar.gz package ++ referenced there has already been updated to support this). ++ ++config PAX_ETEXECRELOCS ++ bool "Allow ELF ET_EXEC text relocations" ++ depends on PAX_MPROTECT && (ALPHA || IA64 || PARISC) ++ default y ++ help ++ On some architectures there are incorrectly created applications ++ that require text relocations and would not work without enabling ++ this option. If you are an alpha, ia64 or parisc user, you should ++ enable this option and disable it once you have made sure that ++ none of your applications need it. ++ ++config PAX_EMUPLT ++ bool "Automatically emulate ELF PLT" ++ depends on PAX_MPROTECT && (ALPHA || PARISC || PPC32 || SPARC32 || SPARC64) ++ default y ++ help ++ Enabling this option will have the kernel automatically detect ++ and emulate the Procedure Linkage Table entries in ELF files. ++ On some architectures such entries are in writable memory, and ++ become non-executable leading to task termination. Therefore ++ it is mandatory that you enable this option on alpha, parisc, ++ ppc (if secure-plt is not used throughout in userland), sparc ++ and sparc64, otherwise your system would not even boot. ++ ++ NOTE: this feature *does* open up a loophole in the protection ++ provided by the non-executable pages, therefore the proper ++ solution is to modify the toolchain to produce a PLT that does ++ not need to be writable. ++ ++config PAX_DLRESOLVE ++ bool ++ depends on PAX_EMUPLT && (SPARC32 || SPARC64) ++ default y ++ ++config PAX_SYSCALL ++ bool ++ depends on PAX_PAGEEXEC && PPC32 ++ default y ++ ++config PAX_KERNEXEC ++ bool "Enforce non-executable kernel pages" ++ depends on PAX_NOEXEC && X86 && !EFI && !COMPAT_VDSO && (!X86_32 || X86_WP_WORKS_OK) && !PARAVIRT ++ help ++ This is the kernel land equivalent of PAGEEXEC and MPROTECT, ++ that is, enabling this option will make it harder to inject ++ and execute 'foreign' code in kernel memory itself. ++ ++endmenu ++ ++menu "Address Space Layout Randomization" ++ depends on PAX ++ ++config PAX_ASLR ++ bool "Address Space Layout Randomization" ++ depends on PAX_EI_PAX || PAX_PT_PAX_FLAGS || PAX_HAVE_ACL_FLAGS || PAX_HOOK_ACL_FLAGS ++ help ++ Many if not most exploit techniques rely on the knowledge of ++ certain addresses in the attacked program. The following options ++ will allow the kernel to apply a certain amount of randomization ++ to specific parts of the program thereby forcing an attacker to ++ guess them in most cases. Any failed guess will most likely crash ++ the attacked program which allows the kernel to detect such attempts ++ and react on them. PaX itself provides no reaction mechanisms, ++ instead it is strongly encouraged that you make use of Nergal's ++ segvguard (ftp://ftp.pl.openwall.com/misc/segvguard/) or grsecurity's ++ (http://www.grsecurity.net/) built-in crash detection features or ++ develop one yourself. ++ ++ By saying Y here you can choose to randomize the following areas: ++ - top of the task's kernel stack ++ - top of the task's userland stack ++ - base address for mmap() requests that do not specify one ++ (this includes all libraries) ++ - base address of the main executable ++ ++ It is strongly recommended to say Y here as address space layout ++ randomization has negligible impact on performance yet it provides ++ a very effective protection. ++ ++ NOTE: you can use the 'chpax' or 'paxctl' utilities to control ++ this feature on a per file basis. ++ ++config PAX_RANDKSTACK ++ bool "Randomize kernel stack base" ++ depends on PAX_ASLR && X86_TSC && X86_32 ++ help ++ By saying Y here the kernel will randomize every task's kernel ++ stack on every system call. This will not only force an attacker ++ to guess it but also prevent him from making use of possible ++ leaked information about it. ++ ++ Since the kernel stack is a rather scarce resource, randomization ++ may cause unexpected stack overflows, therefore you should very ++ carefully test your system. Note that once enabled in the kernel ++ configuration, this feature cannot be disabled on a per file basis. ++ ++config PAX_RANDUSTACK ++ bool "Randomize user stack base" ++ depends on PAX_ASLR ++ help ++ By saying Y here the kernel will randomize every task's userland ++ stack. The randomization is done in two steps where the second ++ one may apply a big amount of shift to the top of the stack and ++ cause problems for programs that want to use lots of memory (more ++ than 2.5 GB if SEGMEXEC is not active, or 1.25 GB when it is). ++ For this reason the second step can be controlled by 'chpax' or ++ 'paxctl' on a per file basis. ++ ++config PAX_RANDMMAP ++ bool "Randomize mmap() base" ++ depends on PAX_ASLR ++ help ++ By saying Y here the kernel will use a randomized base address for ++ mmap() requests that do not specify one themselves. As a result ++ all dynamically loaded libraries will appear at random addresses ++ and therefore be harder to exploit by a technique where an attacker ++ attempts to execute library code for his purposes (e.g. spawn a ++ shell from an exploited program that is running at an elevated ++ privilege level). ++ ++ Furthermore, if a program is relinked as a dynamic ELF file, its ++ base address will be randomized as well, completing the full ++ randomization of the address space layout. Attacking such programs ++ becomes a guess game. You can find an example of doing this at ++ http://pax.grsecurity.net/et_dyn.tar.gz and practical samples at ++ http://www.grsecurity.net/grsec-gcc-specs.tar.gz . ++ ++ NOTE: you can use the 'chpax' or 'paxctl' utilities to control this ++ feature on a per file basis. ++ ++endmenu ++ ++menu "Miscellaneous hardening features" ++ ++config PAX_MEMORY_SANITIZE ++ bool "Sanitize all freed memory" ++ help ++ By saying Y here the kernel will erase memory pages as soon as they ++ are freed. This in turn reduces the lifetime of data stored in the ++ pages, making it less likely that sensitive information such as ++ passwords, cryptographic secrets, etc stay in memory for too long. ++ ++ This is especially useful for programs whose runtime is short, long ++ lived processes and the kernel itself benefit from this as long as ++ they operate on whole memory pages and ensure timely freeing of pages ++ that may hold sensitive information. ++ ++ The tradeoff is performance impact, on a single CPU system kernel ++ compilation sees a 3% slowdown, other systems and workloads may vary ++ and you are advised to test this feature on your expected workload ++ before deploying it. ++ ++ Note that this feature does not protect data stored in live pages, ++ e.g., process memory swapped to disk may stay there for a long time. ++ ++config PAX_MEMORY_UDEREF ++ bool "Prevent invalid userland pointer dereference" ++ depends on X86_32 && !COMPAT_VDSO && !UML_X86 ++ help ++ By saying Y here the kernel will be prevented from dereferencing ++ userland pointers in contexts where the kernel expects only kernel ++ pointers. This is both a useful runtime debugging feature and a ++ security measure that prevents exploiting a class of kernel bugs. ++ ++ The tradeoff is that some virtualization solutions may experience ++ a huge slowdown and therefore you should not enable this feature ++ for kernels meant to run in such environments. Whether a given VM ++ solution is affected or not is best determined by simply trying it ++ out, the performance impact will be obvious right on boot as this ++ mechanism engages from very early on. A good rule of thumb is that ++ VMs running on CPUs without hardware virtualization support (i.e., ++ the majority of IA-32 CPUs) will likely experience the slowdown. ++ ++config PAX_REFCOUNT ++ bool "Prevent various kernel object reference counter overflows" ++ depends on X86 ++ help ++ By saying Y here the kernel will detect and prevent overflowing ++ various (but not all) kinds of object reference counters. Such ++ overflows can normally occur due to bugs only and are often, if ++ not always, exploitable. ++ ++ The tradeoff is that data structures protected by an overflowed ++ refcount will never be freed and therefore will leak memory. Note ++ that this leak also happens even without this protection but in ++ that case the overflow can eventually trigger the freeing of the ++ data structure while it is still being used elsewhere, resulting ++ in the exploitable situation that this feature prevents. ++ ++ Since this has a negligible performance impact, you should enable ++ this feature. ++endmenu ++ ++endmenu ++ + config KEYS + bool "Enable access key retention support" + help +diff -urNp a/security/commoncap.c b/security/commoncap.c +--- a/security/commoncap.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/security/commoncap.c 2009-05-24 18:10:25.387209637 -0700 +@@ -27,9 +27,11 @@ + #include <linux/prctl.h> + #include <linux/securebits.h> + ++extern kernel_cap_t gr_cap_rtnetlink(struct sock *sk); ++ + int cap_netlink_send(struct sock *sk, struct sk_buff *skb) + { +- NETLINK_CB(skb).eff_cap = current->cap_effective; ++ NETLINK_CB(skb).eff_cap = gr_cap_rtnetlink(sk); + return 0; + } + +@@ -56,6 +58,14 @@ int cap_capable (struct task_struct *tsk + return -EPERM; + } + ++int cap_capable_nolog (struct task_struct *tsk, int cap) ++{ ++ /* tsk = current for all callers */ ++ if (cap_raised(tsk->cap_effective, cap) && gr_is_capable_nolog(cap)) ++ return 0; ++ return -EPERM; ++} ++ + int cap_settime(struct timespec *ts, struct timezone *tz) + { + if (!capable(CAP_SYS_TIME)) +@@ -379,8 +389,11 @@ void cap_bprm_apply_creds (struct linux_ + } + } + +- current->suid = current->euid = current->fsuid = bprm->e_uid; +- current->sgid = current->egid = current->fsgid = bprm->e_gid; ++ if (!gr_check_user_change(-1, bprm->e_uid, bprm->e_uid)) ++ current->suid = current->euid = current->fsuid = bprm->e_uid; ++ ++ if (!gr_check_group_change(-1, bprm->e_gid, bprm->e_gid)) ++ current->sgid = current->egid = current->fsgid = bprm->e_gid; + + /* For init, we want to retain the capabilities set + * in the init_task struct. Thus we skip the usual +@@ -393,6 +406,8 @@ void cap_bprm_apply_creds (struct linux_ + cap_clear(current->cap_effective); + } + ++ gr_handle_chroot_caps(current); ++ + /* AUD: Audit candidate if current->cap_effective is set */ + + current->securebits &= ~issecure_mask(SECURE_KEEP_CAPS); +@@ -705,7 +720,7 @@ int cap_vm_enough_memory(struct mm_struc + { + int cap_sys_admin = 0; + +- if (cap_capable(current, CAP_SYS_ADMIN) == 0) ++ if (cap_capable_nolog(current, CAP_SYS_ADMIN) == 0) + cap_sys_admin = 1; + return __vm_enough_memory(mm, pages, cap_sys_admin); + } +diff -urNp a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c +--- a/sound/core/oss/pcm_oss.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/sound/core/oss/pcm_oss.c 2009-05-24 18:10:25.388087761 -0700 +@@ -2929,8 +2929,8 @@ static void snd_pcm_oss_proc_done(struct + } + } + #else /* !CONFIG_SND_VERBOSE_PROCFS */ +-#define snd_pcm_oss_proc_init(pcm) +-#define snd_pcm_oss_proc_done(pcm) ++#define snd_pcm_oss_proc_init(pcm) do {} while (0) ++#define snd_pcm_oss_proc_done(pcm) do {} while (0) + #endif /* CONFIG_SND_VERBOSE_PROCFS */ + + /* +diff -urNp a/sound/core/seq/seq_lock.h b/sound/core/seq/seq_lock.h +--- a/sound/core/seq/seq_lock.h 2009-05-02 11:54:43.000000000 -0700 ++++ b/sound/core/seq/seq_lock.h 2009-05-24 18:10:25.389209823 -0700 +@@ -23,10 +23,10 @@ void snd_use_lock_sync_helper(snd_use_lo + #else /* SMP || CONFIG_SND_DEBUG */ + + typedef spinlock_t snd_use_lock_t; /* dummy */ +-#define snd_use_lock_init(lockp) /**/ +-#define snd_use_lock_use(lockp) /**/ +-#define snd_use_lock_free(lockp) /**/ +-#define snd_use_lock_sync(lockp) /**/ ++#define snd_use_lock_init(lockp) do {} while (0) ++#define snd_use_lock_use(lockp) do {} while (0) ++#define snd_use_lock_free(lockp) do {} while (0) ++#define snd_use_lock_sync(lockp) do {} while (0) + + #endif /* SMP || CONFIG_SND_DEBUG */ + +diff -urNp a/sound/pci/ac97/ac97_patch.c b/sound/pci/ac97/ac97_patch.c +--- a/sound/pci/ac97/ac97_patch.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/sound/pci/ac97/ac97_patch.c 2009-05-24 18:10:25.390086969 -0700 +@@ -1498,7 +1498,7 @@ static const struct snd_ac97_res_table a + { AC97_VIDEO, 0x9f1f }, + { AC97_AUX, 0x9f1f }, + { AC97_PCM, 0x9f1f }, +- { } /* terminator */ ++ { 0, 0 } /* terminator */ + }; + + static int patch_ad1819(struct snd_ac97 * ac97) +@@ -3870,7 +3870,7 @@ static struct snd_ac97_res_table lm4550_ + { AC97_AUX, 0x1f1f }, + { AC97_PCM, 0x1f1f }, + { AC97_REC_GAIN, 0x0f0f }, +- { } /* terminator */ ++ { 0, 0 } /* terminator */ + }; + + static int patch_lm4550(struct snd_ac97 *ac97) +diff -urNp a/sound/pci/ens1370.c b/sound/pci/ens1370.c +--- a/sound/pci/ens1370.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/sound/pci/ens1370.c 2009-05-24 18:10:25.392210766 -0700 +@@ -452,7 +452,7 @@ static struct pci_device_id snd_audiopci + { 0x1274, 0x5880, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0, }, /* ES1373 - CT5880 */ + { 0x1102, 0x8938, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0, }, /* Ectiva EV1938 */ + #endif +- { 0, } ++ { 0, 0, 0, 0, 0, 0, 0 } + }; + + MODULE_DEVICE_TABLE(pci, snd_audiopci_ids); +diff -urNp a/sound/pci/intel8x0.c b/sound/pci/intel8x0.c +--- a/sound/pci/intel8x0.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/sound/pci/intel8x0.c 2009-05-24 18:10:25.394210604 -0700 +@@ -443,7 +443,7 @@ static struct pci_device_id snd_intel8x0 + { 0x1022, 0x746d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DEVICE_INTEL }, /* AMD8111 */ + { 0x1022, 0x7445, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DEVICE_INTEL }, /* AMD768 */ + { 0x10b9, 0x5455, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DEVICE_ALI }, /* Ali5455 */ +- { 0, } ++ { 0, 0, 0, 0, 0, 0, 0 } + }; + + MODULE_DEVICE_TABLE(pci, snd_intel8x0_ids); +@@ -2088,7 +2088,7 @@ static struct ac97_quirk ac97_quirks[] _ + .type = AC97_TUNE_HP_ONLY + }, + #endif +- { } /* terminator */ ++ { 0, 0, 0, 0, NULL, 0 } /* terminator */ + }; + + static int __devinit snd_intel8x0_mixer(struct intel8x0 *chip, int ac97_clock, +diff -urNp a/sound/pci/intel8x0m.c b/sound/pci/intel8x0m.c +--- a/sound/pci/intel8x0m.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/sound/pci/intel8x0m.c 2009-05-24 18:10:25.395209894 -0700 +@@ -239,7 +239,7 @@ static struct pci_device_id snd_intel8x0 + { 0x1022, 0x746d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DEVICE_INTEL }, /* AMD8111 */ + { 0x10b9, 0x5455, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DEVICE_ALI }, /* Ali5455 */ + #endif +- { 0, } ++ { 0, 0, 0, 0, 0, 0, 0 } + }; + + MODULE_DEVICE_TABLE(pci, snd_intel8x0m_ids); +@@ -1258,7 +1258,7 @@ static struct shortname_table { + { 0x5455, "ALi M5455" }, + { 0x746d, "AMD AMD8111" }, + #endif +- { 0 }, ++ { 0, NULL }, + }; + + static int __devinit snd_intel8x0m_probe(struct pci_dev *pci, +diff -urNp a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c +--- a/virt/kvm/kvm_main.c 2009-05-02 11:54:43.000000000 -0700 ++++ b/virt/kvm/kvm_main.c 2009-05-24 18:10:25.396210650 -0700 +@@ -1788,6 +1788,9 @@ static struct miscdevice kvm_dev = { + KVM_MINOR, + "kvm", + &kvm_chardev_ops, ++ {NULL, NULL}, ++ NULL, ++ NULL + }; + + static void hardware_enable(void *junk) +@@ -2019,7 +2022,7 @@ static void kvm_sched_out(struct preempt + kvm_arch_vcpu_put(vcpu); + } + +-int kvm_init(void *opaque, unsigned int vcpu_size, ++int kvm_init(const void *opaque, unsigned int vcpu_size, + struct module *module) + { + int r; diff --git a/kernel/2.6.28/4421_grsec-remove-localversion-grsec.patch b/kernel/2.6.28/4421_grsec-remove-localversion-grsec.patch new file mode 100644 index 0000000..31cf878 --- /dev/null +++ b/kernel/2.6.28/4421_grsec-remove-localversion-grsec.patch @@ -0,0 +1,9 @@ +From: Kerin Millar <kerframil@gmail.com> + +Remove grsecurity's localversion-grsec file as it is inconsistent with +Gentoo's kernel practices and naming scheme. + +--- a/localversion-grsec 2008-02-24 14:26:59.000000000 +0000 ++++ b/localversion-grsec 1970-01-01 01:00:00.000000000 +0100 +@@ -1 +0,0 @@ +--grsec diff --git a/kernel/2.6.28/4422_grsec-mute-warnings.patch b/kernel/2.6.28/4422_grsec-mute-warnings.patch new file mode 100644 index 0000000..f570095 --- /dev/null +++ b/kernel/2.6.28/4422_grsec-mute-warnings.patch @@ -0,0 +1,28 @@ +From: Gordon Malm <gengor@gentoo.org> + +Updated patch for kernel series 2.6.24. + +The credits/description from the original version of this patch remain accurate +and are included below. + +--- +From: Alexander Gabert <gaberta@fh-trier.de> + +This patch removes the warnings introduced by grsec patch 2.1.9 and later. +It removes the -W options added by the patch and restores the original +warning flags of vanilla kernel versions. + +Acked-by: Christian Heim <phreak@gentoo.org> +--- + +--- a/Makefile ++++ b/Makefile +@@ -214,7 +214,7 @@ + + HOSTCC = gcc + HOSTCXX = g++ +-HOSTCFLAGS = -Wall -W -Wno-unused -Wno-sign-compare -Wstrict-prototypes -O2 -fomit-frame-pointer ++HOSTCFLAGS = -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer + HOSTCXXFLAGS = -O2 + + # Decide whether to build built-in, modular, or both. diff --git a/kernel/2.6.28/4425_grsec-pax-without-grsec.patch b/kernel/2.6.28/4425_grsec-pax-without-grsec.patch new file mode 100644 index 0000000..c0fb38b --- /dev/null +++ b/kernel/2.6.28/4425_grsec-pax-without-grsec.patch @@ -0,0 +1,60 @@ +From: Gordon Malm <gengor@gentoo.org> + +Allow PaX options to be selected without first selecting CONFIG_GRKERNSEC. + +This patch has been updated to keep current with newer kernel versions. +The original version of this patch contained no credits/description. + +--- a/arch/x86/mm/fault.c ++++ b/arch/x86/mm/fault.c +@@ -431,10 +431,12 @@ static void show_fault_oops(struct pt_re + if (init_mm.start_code <= address && address < init_mm.end_code) + #endif + { ++#ifdef CONFIG_GRKERNSEC + if (current->signal->curr_ip) + printk(KERN_ERR "PAX: From %u.%u.%u.%u: %s:%d, uid/euid: %u/%u, attempted to modify kernel code\n", + NIPQUAD(current->signal->curr_ip), current->comm, task_pid_nr(current), current->uid, current->euid); + else ++#endif + printk(KERN_ERR "PAX: %s:%d, uid/euid: %u/%u, attempted to modify kernel code\n", + current->comm, task_pid_nr(current), current->uid, current->euid); + } +--- a/fs/exec.c ++++ b/fs/exec.c +@@ -1693,9 +1693,11 @@ void pax_report_fault(struct pt_regs *re + } + up_read(&mm->mmap_sem); + } ++#ifdef CONFIG_GRKERNSEC + if (tsk->signal->curr_ip) + printk(KERN_ERR "PAX: From %u.%u.%u.%u: execution attempt in: %s, %08lx-%08lx %08lx\n", NIPQUAD(tsk->signal->curr_ip), path_fault, start, end, offset); + else ++#endif + printk(KERN_ERR "PAX: execution attempt in: %s, %08lx-%08lx %08lx\n", path_fault, start, end, offset); + printk(KERN_ERR "PAX: terminating task: %s(%s):%d, uid/euid: %u/%u, " + "PC: %p, SP: %p\n", path_exec, tsk->comm, task_pid_nr(tsk), +@@ -1710,10 +1712,12 @@ void pax_report_fault(struct pt_regs *re + #ifdef CONFIG_PAX_REFCOUNT + void pax_report_refcount_overflow(struct pt_regs *regs) + { ++#ifdef CONFIG_GRKERNSEC + if (current->signal->curr_ip) + printk(KERN_ERR "PAX: From %u.%u.%u.%u: refcount overflow detected in: %s:%d, uid/euid: %u/%u\n", + NIPQUAD(current->signal->curr_ip), current->comm, task_pid_nr(current), current->uid, current->euid); + else ++#endif + printk(KERN_ERR "PAX: refcount overflow detected in: %s:%d, uid/euid: %u/%u\n", + current->comm, task_pid_nr(current), current->uid, current->euid); + print_symbol(KERN_ERR "PAX: refcount overflow occured at: %s\n", instruction_pointer(regs)); +--- a/security/Kconfig ++++ b/security/Kconfig +@@ -10,7 +10,7 @@ menu "PaX" + + config PAX + bool "Enable various PaX features" +- depends on GRKERNSEC && (ALPHA || ARM || AVR32 || IA64 || MIPS32 || MIPS64 || PARISC || PPC32 || PPC64 || SPARC32 || SPARC64 || X86) ++ depends on (ALPHA || ARM || AVR32 || IA64 || MIPS32 || MIPS64 || PARISC || PPC32 || PPC64 || SPARC32 || SPARC64 || X86) + help + This allows you to enable various PaX features. PaX adds + intrusion prevention mechanisms to the kernel that reduce diff --git a/kernel/2.6.28/4430_grsec-kconfig-default-gids.patch b/kernel/2.6.28/4430_grsec-kconfig-default-gids.patch new file mode 100644 index 0000000..614adbc --- /dev/null +++ b/kernel/2.6.28/4430_grsec-kconfig-default-gids.patch @@ -0,0 +1,76 @@ +From: Kerin Millar <kerframil@gmail.com> + +grsecurity contains a number of options which allow certain protections +to be applied to or exempted from members of a given group. However, the +default GIDs specified in the upstream patch are entirely arbitrary and +there is no telling which (if any) groups the GIDs will correlate with +on an end-user's system. Because some users don't pay a great deal of +attention to the finer points of kernel configuration, it is probably +wise to specify some reasonable defaults so as to stop careless users +from shooting themselves in the foot. + +--- a/grsecurity/Kconfig ++++ b/grsecurity/Kconfig +@@ -352,7 +564,7 @@ + config GRKERNSEC_PROC_GID + int "GID for special group" + depends on GRKERNSEC_PROC_USERGROUP +- default 1001 ++ default 10 + + config GRKERNSEC_PROC_ADD + bool "Additional restrictions" +@@ -547,7 +759,7 @@ + config GRKERNSEC_AUDIT_GID + int "GID for auditing" + depends on GRKERNSEC_AUDIT_GROUP +- default 1007 ++ default 100 + + config GRKERNSEC_EXECLOG + bool "Exec logging" +@@ -700,7 +912,7 @@ + config GRKERNSEC_TPE_GID + int "GID for untrusted users" + depends on GRKERNSEC_TPE && !GRKERNSEC_TPE_INVERT +- default 1005 ++ default 100 + help + If you have selected the "Invert GID option" above, setting this + GID determines what group TPE restrictions will be *disabled* for. +@@ -712,7 +924,7 @@ + config GRKERNSEC_TPE_GID + int "GID for trusted users" + depends on GRKERNSEC_TPE && GRKERNSEC_TPE_INVERT +- default 1005 ++ default 10 + help + If you have selected the "Invert GID option" above, setting this + GID determines what group TPE restrictions will be *disabled* for. +@@ -754,7 +966,7 @@ + config GRKERNSEC_SOCKET_ALL_GID + int "GID to deny all sockets for" + depends on GRKERNSEC_SOCKET_ALL +- default 1004 ++ default 65534 + help + Here you can choose the GID to disable socket access for. Remember to + add the users you want socket access disabled for to the GID +@@ -775,7 +987,7 @@ + config GRKERNSEC_SOCKET_CLIENT_GID + int "GID to deny client sockets for" + depends on GRKERNSEC_SOCKET_CLIENT +- default 1003 ++ default 65534 + help + Here you can choose the GID to disable client socket access for. + Remember to add the users you want client socket access disabled for to +@@ -793,7 +1005,7 @@ + config GRKERNSEC_SOCKET_SERVER_GID + int "GID to deny server sockets for" + depends on GRKERNSEC_SOCKET_SERVER +- default 1002 ++ default 65534 + help + Here you can choose the GID to disable server socket access for. + Remember to add the users you want server socket access disabled for to diff --git a/kernel/2.6.28/4435_grsec-kconfig-gentoo.patch b/kernel/2.6.28/4435_grsec-kconfig-gentoo.patch new file mode 100644 index 0000000..bf80919 --- /dev/null +++ b/kernel/2.6.28/4435_grsec-kconfig-gentoo.patch @@ -0,0 +1,243 @@ +From: Gordon Malm <gengor@gentoo.org> +From: Kerin Millar <kerframil@gmail.com> + +Add Hardened Gentoo [server/workstation] predefined grsecurity +levels. They're designed to provide a comparitively high level of +security while remaining generally suitable for as great a majority +of the userbase as possible (particularly new users). + +Make Hardened Gentoo [workstation] predefined grsecurity level the +default. The Hardened Gentoo [server] level is more restrictive +and conflicts with some software and thus would be less suitable. + +The original version of this patch was conceived and created by: +Ned Ludd <solar@gentoo.org> + +--- a/grsecurity/Kconfig ++++ b/grsecurity/Kconfig +@@ -20,7 +20,7 @@ config GRKERNSEC + choice + prompt "Security Level" + depends on GRKERNSEC +- default GRKERNSEC_CUSTOM ++ default GRKERNSEC_HARDENED_WORKSTATION + + config GRKERNSEC_LOW + bool "Low" +@@ -183,6 +183,216 @@ config GRKERNSEC_HIGH + - Mount/unmount/remount logging + - Kernel symbol hiding + - Prevention of memory exhaustion-based exploits ++ ++config GRKERNSEC_HARDENED_SERVER ++ bool "Hardened Gentoo [server]" ++ select GRKERNSEC_AUDIT_MOUNT ++ select GRKERNSEC_BRUTE ++ select GRKERNSEC_CHROOT ++ select GRKERNSEC_CHROOT_CAPS ++ select GRKERNSEC_CHROOT_CHDIR ++ select GRKERNSEC_CHROOT_CHMOD ++ select GRKERNSEC_CHROOT_DOUBLE ++ select GRKERNSEC_CHROOT_FCHDIR ++ select GRKERNSEC_CHROOT_FINDTASK ++ select GRKERNSEC_CHROOT_MKNOD ++ select GRKERNSEC_CHROOT_MOUNT ++ select GRKERNSEC_CHROOT_NICE ++ select GRKERNSEC_CHROOT_PIVOT ++ select GRKERNSEC_CHROOT_SHMAT ++ select GRKERNSEC_CHROOT_SYSCTL ++ select GRKERNSEC_CHROOT_UNIX ++ select GRKERNSEC_DMESG ++ select GRKERNSEC_EXECVE ++ select GRKERNSEC_FIFO ++ select GRKERNSEC_FORKFAIL ++ select GRKERNSEC_HIDESYM ++ select GRKERNSEC_IO if (X86) ++ select GRKERNSEC_KMEM ++ select GRKERNSEC_LINK ++ select GRKERNSEC_MODSTOP if (MODULES) ++ select GRKERNSEC_PROC ++ select GRKERNSEC_PROC_ADD ++ select GRKERNSEC_PROC_IPADDR ++ select GRKERNSEC_PROC_MEMMAP ++ select GRKERNSEC_PROC_USERGROUP ++ select GRKERNSEC_RANDNET ++ select GRKERNSEC_RESLOG ++ select GRKERNSEC_SIGNAL ++# select GRKERNSEC_SOCKET ++# select GRKERNSEC_SOCKET_SERVER ++ select GRKERNSEC_SYSCTL ++ select GRKERNSEC_SYSCTL_ON ++ select GRKERNSEC_TIME ++ select PAX ++ select PAX_ASLR ++ select PAX_DLRESOLVE if (SPARC32 || SPARC64) ++ select PAX_EI_PAX ++ select PAX_EMUPLT if (ALPHA || PARISC || PPC32 || SPARC32 || SPARC64) ++ select PAX_EMUSIGRT if (PARISC || PPC32) ++ select PAX_EMUTRAMP if (PARISC || PPC32) ++ select PAX_ETEXECRELOCS if (ALPHA || IA64 || PARISC) ++ select PAX_KERNEXEC if (X86 && !EFI && !COMPAT_VDSO && !PARAVIRT && (!X86_32 || X86_WP_WORKS_OK)) ++ select PAX_MEMORY_SANITIZE ++ select PAX_MEMORY_UDEREF if (X86_32 && !COMPAT_VDSO && !UML_X86) ++ select PAX_MPROTECT if (!PPC64) ++ select PAX_HAVE_ACL_FLAGS ++ select PAX_NOELFRELOCS if (X86) ++ select PAX_NOEXEC ++ select PAX_PAGEEXEC ++ select PAX_PT_PAX_FLAGS ++ select PAX_RANDKSTACK if (X86_32 && X86_TSC) ++ select PAX_RANDMMAP ++ select PAX_RANDUSTACK ++ select PAX_REFCOUNT if (X86) ++ select PAX_SEGMEXEC if (X86_32) ++ select PAX_SYSCALL if (PPC32) ++ help ++ If you say Y here, a configuration will be used that is endorsed by ++ the Hardened Gentoo project. Therefore, many of the protections ++ made available by grsecurity and PaX will be enabled. ++ ++ Hardened Gentoo's pre-defined security levels are designed to provide ++ a high level of security while minimizing incompatibilities with the ++ majority of available software. For further information, please ++ view <http://www.grsecurity.net> and <http://pax.grsecurity.net> as ++ well as the Hardened Gentoo Primer at ++ <http://www.gentoo.org/proj/en/hardened/primer.xml>. ++ ++ This Hardened Gentoo [server] level is identical to the ++ Hardened Gentoo [workstation] level, but with the GRKERNSEC_IO, ++ PAX_KERNEXEC and PAX_NOELFRELOCS security features enabled. ++ Accordingly, this is the preferred security level if the system will ++ not be utilizing software incompatible with the aforementioned ++ grsecurity/PaX features. ++ ++ You may wish to emerge paxctl, a utility which allows you to toggle ++ PaX features on problematic binaries on an individual basis. Note that ++ this only works for ELF binaries that contain a PT_PAX_FLAGS header. ++ Translated, this means that if you wish to toggle PaX features on ++ binaries provided by applications that are distributed only in binary ++ format (rather than being built locally from sources), you will need to ++ run paxctl -C on the binaries beforehand so as to inject the missing ++ headers. ++ ++ When this level is selected, some options cannot be changed. However, ++ you may opt to fully customize the options that are selected by ++ choosing "Custom" in the Security Level menu. You may find it helpful ++ to inherit the options selected by the "Hardened Gentoo [server]" ++ security level as a starting point for further configuration. To ++ accomplish this, select this security level then exit the menuconfig ++ interface, saving changes when prompted. Then, run make menuconfig ++ again and select the "Custom" level. ++ ++ Note that this security level probably should not be used if the ++ target system is a 32bit x86 virtualized guest. If you intend to run ++ the kernel in a 32bit x86 virtualized guest you will likely need to ++ disable the PAX_MEMORY_UDEREF option in order to avoid an unacceptable ++ impact on performance. ++ ++config GRKERNSEC_HARDENED_WORKSTATION ++ bool "Hardened Gentoo [workstation]" ++ select GRKERNSEC_AUDIT_MOUNT ++ select GRKERNSEC_BRUTE ++ select GRKERNSEC_CHROOT ++ select GRKERNSEC_CHROOT_CAPS ++ select GRKERNSEC_CHROOT_CHDIR ++ select GRKERNSEC_CHROOT_CHMOD ++ select GRKERNSEC_CHROOT_DOUBLE ++ select GRKERNSEC_CHROOT_FCHDIR ++ select GRKERNSEC_CHROOT_FINDTASK ++ select GRKERNSEC_CHROOT_MKNOD ++ select GRKERNSEC_CHROOT_MOUNT ++ select GRKERNSEC_CHROOT_NICE ++ select GRKERNSEC_CHROOT_PIVOT ++ select GRKERNSEC_CHROOT_SHMAT ++ select GRKERNSEC_CHROOT_SYSCTL ++ select GRKERNSEC_CHROOT_UNIX ++ select GRKERNSEC_DMESG ++ select GRKERNSEC_EXECVE ++ select GRKERNSEC_FIFO ++ select GRKERNSEC_FORKFAIL ++ select GRKERNSEC_HIDESYM ++ select GRKERNSEC_KMEM ++ select GRKERNSEC_LINK ++ select GRKERNSEC_MODSTOP if (MODULES) ++ select GRKERNSEC_PROC ++ select GRKERNSEC_PROC_ADD ++ select GRKERNSEC_PROC_IPADDR ++ select GRKERNSEC_PROC_MEMMAP ++ select GRKERNSEC_PROC_USERGROUP ++ select GRKERNSEC_RANDNET ++ select GRKERNSEC_RESLOG ++ select GRKERNSEC_SIGNAL ++# select GRKERNSEC_SOCKET ++# select GRKERNSEC_SOCKET_SERVER ++ select GRKERNSEC_SYSCTL ++ select GRKERNSEC_SYSCTL_ON ++ select GRKERNSEC_TIME ++ select PAX ++ select PAX_ASLR ++ select PAX_DLRESOLVE if (SPARC32 || SPARC64) ++ select PAX_EI_PAX ++ select PAX_EMUPLT if (ALPHA || PARISC || PPC32 || SPARC32 || SPARC64) ++ select PAX_EMUSIGRT if (PARISC || PPC32) ++ select PAX_EMUTRAMP if (PARISC || PPC32) ++ select PAX_ETEXECRELOCS if (ALPHA || IA64 || PARISC) ++ select PAX_MEMORY_SANITIZE ++ select PAX_MEMORY_UDEREF if (X86_32 && !COMPAT_VDSO && !UML_X86) ++ select PAX_MPROTECT if (!PPC64) ++ select PAX_HAVE_ACL_FLAGS ++ select PAX_NOEXEC ++ select PAX_PAGEEXEC ++ select PAX_PT_PAX_FLAGS ++ select PAX_RANDKSTACK if (X86_32 && X86_TSC) ++ select PAX_RANDMMAP ++ select PAX_RANDUSTACK ++ select PAX_REFCOUNT if (X86) ++ select PAX_SEGMEXEC if (X86_32) ++ select PAX_SYSCALL if (PPC32) ++ help ++ If you say Y here, a configuration will be used that is endorsed by ++ the Hardened Gentoo project. Therefore, many of the protections ++ made available by grsecurity and PaX will be enabled. ++ ++ Hardened Gentoo's pre-defined security levels are designed to provide ++ a high level of security while minimizing incompatibilities with the ++ majority of available software. For further information, please ++ view <http://www.grsecurity.net> and <http://pax.grsecurity.net> as ++ well as the Hardened Gentoo Primer at ++ <http://www.gentoo.org/proj/en/hardened/primer.xml>. ++ ++ This Hardened Gentoo [workstation] level is designed for machines ++ which are intended to run software not compatible with the ++ GRKERNSEC_IO, PAX_KERNEXEC and PAX_NOELFRELOCS features of grsecurity. ++ Accordingly, this security level is suitable for use with the X server ++ "Xorg" and/or any system that will act as host OS to the virtualization ++ softwares vmware-server or virtualbox. ++ ++ You may wish to emerge paxctl, a utility which allows you to toggle ++ PaX features on problematic binaries on an individual basis. Note that ++ this only works for ELF binaries that contain a PT_PAX_FLAGS header. ++ Translated, this means that if you wish to toggle PaX features on ++ binaries provided by applications that are distributed only in binary ++ format (rather than being built locally from sources), you will need to ++ run paxctl -C on the binaries beforehand so as to inject the missing ++ headers. ++ ++ When this level is selected, some options cannot be changed. However, ++ you may opt to fully customize the options that are selected by ++ choosing "Custom" in the Security Level menu. You may find it helpful ++ to inherit the options selected by the "Hardened Gentoo [workstation]" ++ security level as a starting point for further configuration. To ++ accomplish this, select this security level then exit the menuconfig ++ interface, saving changes when prompted. Then, run make menuconfig ++ again and select the "Custom" level. ++ ++ Note that this security level probably should not be used if the ++ target system is a 32bit x86 virtualized guest. If you intend to run ++ the kernel in a 32bit x86 virtualized guest you will likely need to ++ disable the PAX_MEMORY_UDEREF option in order to avoid an unacceptable ++ impact on performance. ++ + config GRKERNSEC_CUSTOM + bool "Custom" + help diff --git a/kernel/2.6.28/4440_selinux-avc_audit-log-curr_ip.patch b/kernel/2.6.28/4440_selinux-avc_audit-log-curr_ip.patch new file mode 100644 index 0000000..560bc89 --- /dev/null +++ b/kernel/2.6.28/4440_selinux-avc_audit-log-curr_ip.patch @@ -0,0 +1,65 @@ +From: Gordon Malm <gengor@gentoo.org> + +This is a reworked version of the original +*_selinux-avc_audit-log-curr_ip.patch carried in earlier releases of +hardened-sources. + +Dropping the patch, or simply fixing the #ifdef of the original patch +could break automated logging setups so this route was necessary. + +Suggestions for improving the help text are welcome. + +The original patch's description is still accurate and included below. + +--- +Provides support for a new field ipaddr within the SELinux +AVC audit log, relying in task_struct->curr_ip (ipv4 only) +provided by grSecurity patch to be applied before. + +Signed-off-by: Lorenzo Hernandez Garcia-Hierro <lorenzo@gnu.org> +--- + +--- a/grsecurity/Kconfig ++++ b/grsecurity/Kconfig +@@ -1044,6 +1044,27 @@ endmenu + menu "Logging Options" + depends on GRKERNSEC + ++config GRKERNSEC_SELINUX_AVC_LOG_IPADDR ++ def_bool n ++ prompt "Add source IP address to SELinux AVC log messages" ++ depends on GRKERNSEC && SECURITY_SELINUX ++ help ++ If you say Y here, a new field "ipaddr=" will be added to many SELinux ++ AVC log messages. The value of this field in any given message ++ represents the source IP address of the remote machine/user that created ++ the offending process. ++ ++ This information is sourced from task_struct->curr_ip provided by ++ grsecurity's GRKERNSEC top-level configuration option. One limitation ++ is that only IPv4 is supported. ++ ++ In many instances SELinux AVC log messages already log a superior level ++ of information that also includes source port and destination ip/port. ++ Additionally, SELinux's AVC log code supports IPv6. ++ ++ However, grsecurity's task_struct->curr_ip will sometimes (often?) ++ provide the offender's IP address where stock SELinux logging fails to. ++ + config GRKERNSEC_FLOODTIME + int "Seconds in between log messages (minimum)" + default 10 +--- a/security/selinux/avc.c ++++ b/security/selinux/avc.c +@@ -202,6 +202,11 @@ static void avc_dump_query(struct audit_ + char *scontext; + u32 scontext_len; + ++#ifdef CONFIG_GRKERNSEC_SELINUX_AVC_LOG_IPADDR ++ if (current->signal->curr_ip) ++ audit_log_format(ab, "ipaddr=%u.%u.%u.%u ", NIPQUAD(current->signal->curr_ip)); ++#endif ++ + rc = security_sid_to_context(ssid, &scontext, &scontext_len); + if (rc) + audit_log_format(ab, "ssid=%d", ssid); diff --git a/kernel/2.6.28/4445_disable-compat_vdso.patch b/kernel/2.6.28/4445_disable-compat_vdso.patch new file mode 100644 index 0000000..3780030 --- /dev/null +++ b/kernel/2.6.28/4445_disable-compat_vdso.patch @@ -0,0 +1,74 @@ +From: Gordon Malm <gengor@gentoo.org> +From: Kerin Millar <kerframil@gmail.com> + +COMPAT_VDSO is inappropriate for any modern Hardened Gentoo system. It +conflicts with various parts of PaX, crashing the system if enabled +while PaX's NOEXEC or UDEREF features are active. Moreover, it prevents +a number of important PaX options from appearing in the configuration +menu, including all PaX NOEXEC implementations. Unfortunately, the +reason for the disappearance of these PaX configuration options is +often far from obvious to inexperienced users. + +Therefore, we disable the COMPAT_VDSO menu entry entirely. However, +COMPAT_VDSO operation can still be enabled via bootparam and sysctl +interfaces. Consequently, we must also disable the ability to select +COMPAT_VDSO operation at boot or runtime. Here we patch the kernel so +that selecting COMPAT_VDSO operation at boot/runtime has no effect if +conflicting PaX options are enabled, leaving VDSO_ENABLED operation +intact. + +Closes bug: http://bugs.gentoo.org/show_bug.cgi?id=210138 + +--- a/arch/x86/Kconfig ++++ b/arch/x86/Kconfig +@@ -1215,16 +1215,7 @@ config HOTPLUG_CPU + + config COMPAT_VDSO + def_bool n +- prompt "Compat VDSO support" + depends on (X86_32 || IA32_EMULATION) && !PAX_NOEXEC +- help +- Map the 32-bit VDSO to the predictable old-style address too. +- ---help--- +- Say N here if you are running a sufficiently recent glibc +- version (2.3.3 or later), to remove the high-mapped +- VDSO mapping and to exclusively use the randomized VDSO. +- +- If unsure, say Y. + + endmenu + +--- a/arch/x86/vdso/vdso32-setup.c ++++ b/arch/x86/vdso/vdso32-setup.c +@@ -333,17 +333,21 @@ int arch_setup_additional_pages(struct l + + map_compat_vdso(compat); + ++#if !defined(CONFIG_PAX_NOEXEC) && !defined(CONFIG_PAX_MEMORY_UDEREF) + if (compat) + addr = VDSO_HIGH_BASE; + else { ++#endif + addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, MAP_EXECUTABLE); + if (IS_ERR_VALUE(addr)) { + ret = addr; + goto up_fail; + } ++#if !defined(CONFIG_PAX_NOEXEC) && !defined(CONFIG_PAX_MEMORY_UDEREF) + } + + if (compat_uses_vma || !compat) { ++#endif + /* + * MAYWRITE to allow gdb to COW and set breakpoints + * +@@ -361,7 +365,9 @@ int arch_setup_additional_pages(struct l + + if (ret) + goto up_fail; ++#if !defined(CONFIG_PAX_NOEXEC) && !defined(CONFIG_PAX_MEMORY_UDEREF) + } ++#endif + + current->mm->context.vdso = addr; + current_thread_info()->sysenter_return = |