diff options
Diffstat (limited to 'trunk/2.6.18/30082b_x86-fix-copy_user.patch')
-rw-r--r-- | trunk/2.6.18/30082b_x86-fix-copy_user.patch | 537 |
1 files changed, 537 insertions, 0 deletions
diff --git a/trunk/2.6.18/30082b_x86-fix-copy_user.patch b/trunk/2.6.18/30082b_x86-fix-copy_user.patch new file mode 100644 index 0000000..f0b7a07 --- /dev/null +++ b/trunk/2.6.18/30082b_x86-fix-copy_user.patch @@ -0,0 +1,537 @@ +commit ad2fc2cd925300b8127cf682f5a1c7511ae9dd27 +Author: Vitaly Mayatskikh <v.mayatskih@gmail.com> +Date: Wed Jul 2 15:53:13 2008 +0200 + + x86: fix copy_user on x86 + + Switch copy_user_generic_string(), copy_user_generic_unrolled() and + __copy_user_nocache() from custom tail handlers to generic + copy_user_tail_handle(). + + Signed-off-by: Vitaly Mayatskikh <v.mayatskih@gmail.com> + Acked-by: Linus Torvalds <torvalds@linux-foundation.org> + Signed-off-by: Ingo Molnar <mingo@elte.hu> + +Backported to Debian's 2.6.18 by dann frazier <dannf@debian.org> + +diff -urpN linux-source-2.6.18.orig/arch/x86_64/lib/copy_user.S linux-source-2.6.18/arch/x86_64/lib/copy_user.S +--- linux-source-2.6.18.orig/arch/x86_64/lib/copy_user.S 2008-07-15 23:01:24.000000000 -0700 ++++ linux-source-2.6.18/arch/x86_64/lib/copy_user.S 2008-07-15 23:33:23.000000000 -0700 +@@ -1,8 +1,10 @@ +-/* Copyright 2002 Andi Kleen, SuSE Labs. ++/* ++ * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com> ++ * Copyright 2002 Andi Kleen, SuSE Labs. + * Subject to the GNU Public License v2. +- * +- * Functions to copy from and to user space. +- */ ++ * ++ * Functions to copy from and to user space. ++ */ + + #include <linux/linkage.h> + #include <asm/dwarf2.h> +@@ -20,60 +22,88 @@ + .long \orig-1f /* by default jump to orig */ + 1: + .section .altinstr_replacement,"ax" +-2: .byte 0xe9 /* near jump with 32bit immediate */ ++2: .byte 0xe9 /* near jump with 32bit immediate */ + .long \alt-1b /* offset */ /* or alternatively to alt */ + .previous + .section .altinstructions,"a" + .align 8 + .quad 0b + .quad 2b +- .byte \feature /* when feature is set */ ++ .byte \feature /* when feature is set */ + .byte 5 + .byte 5 + .previous + .endm + +-/* Standard copy_to_user with segment limit checking */ ++ .macro ALIGN_DESTINATION ++#ifdef FIX_ALIGNMENT ++ /* check for bad alignment of destination */ ++ movl %edi,%ecx ++ andl $7,%ecx ++ jz 102f /* already aligned */ ++ subl $8,%ecx ++ negl %ecx ++ subl %ecx,%edx ++100: movb (%rsi),%al ++101: movb %al,(%rdi) ++ incq %rsi ++ incq %rdi ++ decl %ecx ++ jnz 100b ++102: ++ .section .fixup,"ax" ++103: addl %r8d,%edx /* ecx is zerorest also */ ++ jmp copy_user_handle_tail ++ .previous ++ ++ .section __ex_table,"a" ++ .align 8 ++ .quad 100b,103b ++ .quad 101b,103b ++ .previous ++#endif ++ .endm ++ ++/* Standard copy_to_user with segment limit checking */ + ENTRY(copy_to_user) + CFI_STARTPROC + GET_THREAD_INFO(%rax) + movq %rdi,%rcx + addq %rdx,%rcx +- jc bad_to_user +- cmpq threadinfo_addr_limit(%rax),%rcx ++ jc bad_to_user ++ cmpq threadinfo_addr_limit(%rax),%rcx + jae bad_to_user +- xorl %eax,%eax /* clear zero flag */ + ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string + CFI_ENDPROC + +-ENTRY(copy_user_generic) ++/* Standard copy_from_user with segment limit checking */ ++ENTRY(copy_from_user) + CFI_STARTPROC +- movl $1,%ecx /* set zero flag */ ++ GET_THREAD_INFO(%rax) ++ movq %rsi,%rcx ++ addq %rdx,%rcx ++ jc bad_from_user ++ cmpq threadinfo_addr_limit(%rax),%rcx ++ jae bad_from_user + ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string + CFI_ENDPROC ++ENDPROC(copy_from_user) + +-ENTRY(__copy_from_user_inatomic) ++ENTRY(copy_user_generic) + CFI_STARTPROC +- xorl %ecx,%ecx /* clear zero flag */ + ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string + CFI_ENDPROC ++ENDPROC(copy_user_generic) + +-/* Standard copy_from_user with segment limit checking */ +-ENTRY(copy_from_user) ++ENTRY(__copy_from_user_inatomic) + CFI_STARTPROC +- GET_THREAD_INFO(%rax) +- movq %rsi,%rcx +- addq %rdx,%rcx +- jc bad_from_user +- cmpq threadinfo_addr_limit(%rax),%rcx +- jae bad_from_user +- movl $1,%ecx /* set zero flag */ + ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string + CFI_ENDPROC +-ENDPROC(copy_from_user) +- ++ENDPROC(__copy_from_user_inatomic) ++ + .section .fixup,"ax" + /* must zero dest */ ++ENTRY(bad_from_user) + bad_from_user: + CFI_STARTPROC + movl %edx,%ecx +@@ -81,274 +111,158 @@ bad_from_user: + rep + stosb + bad_to_user: +- movl %edx,%eax ++ movl %edx,%eax + ret + CFI_ENDPROC +-END(bad_from_user) ++ENDPROC(bad_from_user) + .previous +- +- ++ + /* + * copy_user_generic_unrolled - memory copy with exception handling. +- * This version is for CPUs like P4 that don't have efficient micro code for rep movsq +- * +- * Input: ++ * This version is for CPUs like P4 that don't have efficient micro ++ * code for rep movsq ++ * ++ * Input: + * rdi destination + * rsi source + * rdx count +- * ecx zero flag -- if true zero destination on error + * +- * Output: +- * eax uncopied bytes or 0 if successful. ++ * Output: ++ * eax uncopied bytes or 0 if successfull. + */ + ENTRY(copy_user_generic_unrolled) + CFI_STARTPROC +- pushq %rbx +- CFI_ADJUST_CFA_OFFSET 8 +- CFI_REL_OFFSET rbx, 0 +- pushq %rcx +- CFI_ADJUST_CFA_OFFSET 8 +- CFI_REL_OFFSET rcx, 0 +- xorl %eax,%eax /*zero for the exception handler */ +- +-#ifdef FIX_ALIGNMENT +- /* check for bad alignment of destination */ +- movl %edi,%ecx +- andl $7,%ecx +- jnz .Lbad_alignment +-.Lafter_bad_alignment: +-#endif +- +- movq %rdx,%rcx +- +- movl $64,%ebx +- shrq $6,%rdx +- decq %rdx +- js .Lhandle_tail +- +- .p2align 4 +-.Lloop: +-.Ls1: movq (%rsi),%r11 +-.Ls2: movq 1*8(%rsi),%r8 +-.Ls3: movq 2*8(%rsi),%r9 +-.Ls4: movq 3*8(%rsi),%r10 +-.Ld1: movq %r11,(%rdi) +-.Ld2: movq %r8,1*8(%rdi) +-.Ld3: movq %r9,2*8(%rdi) +-.Ld4: movq %r10,3*8(%rdi) +- +-.Ls5: movq 4*8(%rsi),%r11 +-.Ls6: movq 5*8(%rsi),%r8 +-.Ls7: movq 6*8(%rsi),%r9 +-.Ls8: movq 7*8(%rsi),%r10 +-.Ld5: movq %r11,4*8(%rdi) +-.Ld6: movq %r8,5*8(%rdi) +-.Ld7: movq %r9,6*8(%rdi) +-.Ld8: movq %r10,7*8(%rdi) +- +- decq %rdx +- ++ cmpl $8,%edx ++ jb 20f /* less then 8 bytes, go to byte copy loop */ ++ ALIGN_DESTINATION ++ movl %edx,%ecx ++ andl $63,%edx ++ shrl $6,%ecx ++ jz 17f ++1: movq (%rsi),%r8 ++2: movq 1*8(%rsi),%r9 ++3: movq 2*8(%rsi),%r10 ++4: movq 3*8(%rsi),%r11 ++5: movq %r8,(%rdi) ++6: movq %r9,1*8(%rdi) ++7: movq %r10,2*8(%rdi) ++8: movq %r11,3*8(%rdi) ++9: movq 4*8(%rsi),%r8 ++10: movq 5*8(%rsi),%r9 ++11: movq 6*8(%rsi),%r10 ++12: movq 7*8(%rsi),%r11 ++13: movq %r8,4*8(%rdi) ++14: movq %r9,5*8(%rdi) ++15: movq %r10,6*8(%rdi) ++16: movq %r11,7*8(%rdi) + leaq 64(%rsi),%rsi + leaq 64(%rdi),%rdi +- +- jns .Lloop +- +- .p2align 4 +-.Lhandle_tail: +- movl %ecx,%edx +- andl $63,%ecx +- shrl $3,%ecx +- jz .Lhandle_7 +- movl $8,%ebx +- .p2align 4 +-.Lloop_8: +-.Ls9: movq (%rsi),%r8 +-.Ld9: movq %r8,(%rdi) + decl %ecx +- leaq 8(%rdi),%rdi ++ jnz 1b ++17: movl %edx,%ecx ++ andl $7,%edx ++ shrl $3,%ecx ++ jz 20f ++18: movq (%rsi),%r8 ++19: movq %r8,(%rdi) + leaq 8(%rsi),%rsi +- jnz .Lloop_8 +- +-.Lhandle_7: ++ leaq 8(%rdi),%rdi ++ decl %ecx ++ jnz 18b ++20: andl %edx,%edx ++ jz 23f + movl %edx,%ecx +- andl $7,%ecx +- jz .Lende +- .p2align 4 +-.Lloop_1: +-.Ls10: movb (%rsi),%bl +-.Ld10: movb %bl,(%rdi) +- incq %rdi ++21: movb (%rsi),%al ++22: movb %al,(%rdi) + incq %rsi ++ incq %rdi + decl %ecx +- jnz .Lloop_1 +- +- CFI_REMEMBER_STATE +-.Lende: +- popq %rcx +- CFI_ADJUST_CFA_OFFSET -8 +- CFI_RESTORE rcx +- popq %rbx +- CFI_ADJUST_CFA_OFFSET -8 +- CFI_RESTORE rbx ++ jnz 21b ++23: xor %eax,%eax + ret +- CFI_RESTORE_STATE + +-#ifdef FIX_ALIGNMENT +- /* align destination */ +- .p2align 4 +-.Lbad_alignment: +- movl $8,%r9d +- subl %ecx,%r9d +- movl %r9d,%ecx +- cmpq %r9,%rdx +- jz .Lhandle_7 +- js .Lhandle_7 +-.Lalign_1: +-.Ls11: movb (%rsi),%bl +-.Ld11: movb %bl,(%rdi) +- incq %rsi +- incq %rdi +- decl %ecx +- jnz .Lalign_1 +- subq %r9,%rdx +- jmp .Lafter_bad_alignment +-#endif ++ .section .fixup,"ax" ++30: shll $6,%ecx ++ addl %ecx,%edx ++ jmp 60f ++40: lea (%rdx,%rcx,8),%rdx ++ jmp 60f ++50: movl %ecx,%edx ++60: jmp copy_user_handle_tail /* ecx is zerorest also */ ++ .previous + +- /* table sorted by exception address */ + .section __ex_table,"a" + .align 8 +- .quad .Ls1,.Ls1e +- .quad .Ls2,.Ls2e +- .quad .Ls3,.Ls3e +- .quad .Ls4,.Ls4e +- .quad .Ld1,.Ls1e +- .quad .Ld2,.Ls2e +- .quad .Ld3,.Ls3e +- .quad .Ld4,.Ls4e +- .quad .Ls5,.Ls5e +- .quad .Ls6,.Ls6e +- .quad .Ls7,.Ls7e +- .quad .Ls8,.Ls8e +- .quad .Ld5,.Ls5e +- .quad .Ld6,.Ls6e +- .quad .Ld7,.Ls7e +- .quad .Ld8,.Ls8e +- .quad .Ls9,.Le_quad +- .quad .Ld9,.Le_quad +- .quad .Ls10,.Le_byte +- .quad .Ld10,.Le_byte +-#ifdef FIX_ALIGNMENT +- .quad .Ls11,.Lzero_rest +- .quad .Ld11,.Lzero_rest +-#endif +- .quad .Le5,.Le_zero ++ .quad 1b,30b ++ .quad 2b,30b ++ .quad 3b,30b ++ .quad 4b,30b ++ .quad 5b,30b ++ .quad 6b,30b ++ .quad 7b,30b ++ .quad 8b,30b ++ .quad 9b,30b ++ .quad 10b,30b ++ .quad 11b,30b ++ .quad 12b,30b ++ .quad 13b,30b ++ .quad 14b,30b ++ .quad 15b,30b ++ .quad 16b,30b ++ .quad 18b,40b ++ .quad 19b,40b ++ .quad 21b,50b ++ .quad 22b,50b + .previous +- +- /* compute 64-offset for main loop. 8 bytes accuracy with error on the +- pessimistic side. this is gross. it would be better to fix the +- interface. */ +- /* eax: zero, ebx: 64 */ +-.Ls1e: addl $8,%eax +-.Ls2e: addl $8,%eax +-.Ls3e: addl $8,%eax +-.Ls4e: addl $8,%eax +-.Ls5e: addl $8,%eax +-.Ls6e: addl $8,%eax +-.Ls7e: addl $8,%eax +-.Ls8e: addl $8,%eax +- addq %rbx,%rdi /* +64 */ +- subq %rax,%rdi /* correct destination with computed offset */ +- +- shlq $6,%rdx /* loop counter * 64 (stride length) */ +- addq %rax,%rdx /* add offset to loopcnt */ +- andl $63,%ecx /* remaining bytes */ +- addq %rcx,%rdx /* add them */ +- jmp .Lzero_rest +- +- /* exception on quad word loop in tail handling */ +- /* ecx: loopcnt/8, %edx: length, rdi: correct */ +-.Le_quad: +- shll $3,%ecx +- andl $7,%edx +- addl %ecx,%edx +- /* edx: bytes to zero, rdi: dest, eax:zero */ +-.Lzero_rest: +- cmpl $0,(%rsp) +- jz .Le_zero +- movq %rdx,%rcx +-.Le_byte: +- xorl %eax,%eax +-.Le5: rep +- stosb +- /* when there is another exception while zeroing the rest just return */ +-.Le_zero: +- movq %rdx,%rax +- jmp .Lende + CFI_ENDPROC +-ENDPROC(copy_user_generic) ++ENDPROC(copy_user_generic_unrolled) + +- +- /* Some CPUs run faster using the string copy instructions. +- This is also a lot simpler. Use them when possible. +- Patch in jmps to this code instead of copying it fully +- to avoid unwanted aliasing in the exception tables. */ +- +- /* rdi destination +- * rsi source +- * rdx count +- * ecx zero flag +- * +- * Output: +- * eax uncopied bytes or 0 if successfull. +- * +- * Only 4GB of copy is supported. This shouldn't be a problem +- * because the kernel normally only writes from/to page sized chunks +- * even if user space passed a longer buffer. +- * And more would be dangerous because both Intel and AMD have +- * errata with rep movsq > 4GB. If someone feels the need to fix +- * this please consider this. +- */ ++/* Some CPUs run faster using the string copy instructions. ++ * This is also a lot simpler. Use them when possible. ++ * ++ * Only 4GB of copy is supported. This shouldn't be a problem ++ * because the kernel normally only writes from/to page sized chunks ++ * even if user space passed a longer buffer. ++ * And more would be dangerous because both Intel and AMD have ++ * errata with rep movsq > 4GB. If someone feels the need to fix ++ * this please consider this. ++ * ++ * Input: ++ * rdi destination ++ * rsi source ++ * rdx count ++ * ++ * Output: ++ * eax uncopied bytes or 0 if successful. ++ */ + ENTRY(copy_user_generic_string) + CFI_STARTPROC +- movl %ecx,%r8d /* save zero flag */ ++ andl %edx,%edx ++ jz 4f ++ cmpl $8,%edx ++ jb 2f /* less than 8 bytes, go to byte copy loop */ ++ ALIGN_DESTINATION + movl %edx,%ecx + shrl $3,%ecx +- andl $7,%edx +- jz 10f +-1: rep +- movsq +- movl %edx,%ecx +-2: rep +- movsb +-9: movl %ecx,%eax +- ret +- +- /* multiple of 8 byte */ +-10: rep ++ andl $7,%edx ++1: rep + movsq +- xor %eax,%eax ++2: movl %edx,%ecx ++3: rep ++ movsb ++4: xorl %eax,%eax + ret + +- /* exception handling */ +-3: lea (%rdx,%rcx,8),%rax /* exception on quad loop */ +- jmp 6f +-5: movl %ecx,%eax /* exception on byte loop */ +- /* eax: left over bytes */ +-6: testl %r8d,%r8d /* zero flag set? */ +- jz 7f +- movl %eax,%ecx /* initialize x86 loop counter */ +- push %rax +- xorl %eax,%eax +-8: rep +- stosb /* zero the rest */ +-11: pop %rax +-7: ret +- CFI_ENDPROC +-END(copy_user_generic_c) ++ .section .fixup,"ax" ++11: lea (%rdx,%rcx,8),%rcx ++12: movl %ecx,%edx /* ecx is zerorest also */ ++ jmp copy_user_handle_tail ++ .previous + + .section __ex_table,"a" +- .quad 1b,3b +- .quad 2b,5b +- .quad 8b,11b +- .quad 10b,3b ++ .align 8 ++ .quad 1b,11b ++ .quad 3b,12b + .previous ++ CFI_ENDPROC ++ENDPROC(copy_user_generic_string) |