summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'trunk/2.6.18/30082b_x86-fix-copy_user.patch')
-rw-r--r--trunk/2.6.18/30082b_x86-fix-copy_user.patch537
1 files changed, 537 insertions, 0 deletions
diff --git a/trunk/2.6.18/30082b_x86-fix-copy_user.patch b/trunk/2.6.18/30082b_x86-fix-copy_user.patch
new file mode 100644
index 0000000..f0b7a07
--- /dev/null
+++ b/trunk/2.6.18/30082b_x86-fix-copy_user.patch
@@ -0,0 +1,537 @@
+commit ad2fc2cd925300b8127cf682f5a1c7511ae9dd27
+Author: Vitaly Mayatskikh <v.mayatskih@gmail.com>
+Date: Wed Jul 2 15:53:13 2008 +0200
+
+ x86: fix copy_user on x86
+
+ Switch copy_user_generic_string(), copy_user_generic_unrolled() and
+ __copy_user_nocache() from custom tail handlers to generic
+ copy_user_tail_handle().
+
+ Signed-off-by: Vitaly Mayatskikh <v.mayatskih@gmail.com>
+ Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
+ Signed-off-by: Ingo Molnar <mingo@elte.hu>
+
+Backported to Debian's 2.6.18 by dann frazier <dannf@debian.org>
+
+diff -urpN linux-source-2.6.18.orig/arch/x86_64/lib/copy_user.S linux-source-2.6.18/arch/x86_64/lib/copy_user.S
+--- linux-source-2.6.18.orig/arch/x86_64/lib/copy_user.S 2008-07-15 23:01:24.000000000 -0700
++++ linux-source-2.6.18/arch/x86_64/lib/copy_user.S 2008-07-15 23:33:23.000000000 -0700
+@@ -1,8 +1,10 @@
+-/* Copyright 2002 Andi Kleen, SuSE Labs.
++/*
++ * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
++ * Copyright 2002 Andi Kleen, SuSE Labs.
+ * Subject to the GNU Public License v2.
+- *
+- * Functions to copy from and to user space.
+- */
++ *
++ * Functions to copy from and to user space.
++ */
+
+ #include <linux/linkage.h>
+ #include <asm/dwarf2.h>
+@@ -20,60 +22,88 @@
+ .long \orig-1f /* by default jump to orig */
+ 1:
+ .section .altinstr_replacement,"ax"
+-2: .byte 0xe9 /* near jump with 32bit immediate */
++2: .byte 0xe9 /* near jump with 32bit immediate */
+ .long \alt-1b /* offset */ /* or alternatively to alt */
+ .previous
+ .section .altinstructions,"a"
+ .align 8
+ .quad 0b
+ .quad 2b
+- .byte \feature /* when feature is set */
++ .byte \feature /* when feature is set */
+ .byte 5
+ .byte 5
+ .previous
+ .endm
+
+-/* Standard copy_to_user with segment limit checking */
++ .macro ALIGN_DESTINATION
++#ifdef FIX_ALIGNMENT
++ /* check for bad alignment of destination */
++ movl %edi,%ecx
++ andl $7,%ecx
++ jz 102f /* already aligned */
++ subl $8,%ecx
++ negl %ecx
++ subl %ecx,%edx
++100: movb (%rsi),%al
++101: movb %al,(%rdi)
++ incq %rsi
++ incq %rdi
++ decl %ecx
++ jnz 100b
++102:
++ .section .fixup,"ax"
++103: addl %r8d,%edx /* ecx is zerorest also */
++ jmp copy_user_handle_tail
++ .previous
++
++ .section __ex_table,"a"
++ .align 8
++ .quad 100b,103b
++ .quad 101b,103b
++ .previous
++#endif
++ .endm
++
++/* Standard copy_to_user with segment limit checking */
+ ENTRY(copy_to_user)
+ CFI_STARTPROC
+ GET_THREAD_INFO(%rax)
+ movq %rdi,%rcx
+ addq %rdx,%rcx
+- jc bad_to_user
+- cmpq threadinfo_addr_limit(%rax),%rcx
++ jc bad_to_user
++ cmpq threadinfo_addr_limit(%rax),%rcx
+ jae bad_to_user
+- xorl %eax,%eax /* clear zero flag */
+ ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
+ CFI_ENDPROC
+
+-ENTRY(copy_user_generic)
++/* Standard copy_from_user with segment limit checking */
++ENTRY(copy_from_user)
+ CFI_STARTPROC
+- movl $1,%ecx /* set zero flag */
++ GET_THREAD_INFO(%rax)
++ movq %rsi,%rcx
++ addq %rdx,%rcx
++ jc bad_from_user
++ cmpq threadinfo_addr_limit(%rax),%rcx
++ jae bad_from_user
+ ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
+ CFI_ENDPROC
++ENDPROC(copy_from_user)
+
+-ENTRY(__copy_from_user_inatomic)
++ENTRY(copy_user_generic)
+ CFI_STARTPROC
+- xorl %ecx,%ecx /* clear zero flag */
+ ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
+ CFI_ENDPROC
++ENDPROC(copy_user_generic)
+
+-/* Standard copy_from_user with segment limit checking */
+-ENTRY(copy_from_user)
++ENTRY(__copy_from_user_inatomic)
+ CFI_STARTPROC
+- GET_THREAD_INFO(%rax)
+- movq %rsi,%rcx
+- addq %rdx,%rcx
+- jc bad_from_user
+- cmpq threadinfo_addr_limit(%rax),%rcx
+- jae bad_from_user
+- movl $1,%ecx /* set zero flag */
+ ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
+ CFI_ENDPROC
+-ENDPROC(copy_from_user)
+-
++ENDPROC(__copy_from_user_inatomic)
++
+ .section .fixup,"ax"
+ /* must zero dest */
++ENTRY(bad_from_user)
+ bad_from_user:
+ CFI_STARTPROC
+ movl %edx,%ecx
+@@ -81,274 +111,158 @@ bad_from_user:
+ rep
+ stosb
+ bad_to_user:
+- movl %edx,%eax
++ movl %edx,%eax
+ ret
+ CFI_ENDPROC
+-END(bad_from_user)
++ENDPROC(bad_from_user)
+ .previous
+-
+-
++
+ /*
+ * copy_user_generic_unrolled - memory copy with exception handling.
+- * This version is for CPUs like P4 that don't have efficient micro code for rep movsq
+- *
+- * Input:
++ * This version is for CPUs like P4 that don't have efficient micro
++ * code for rep movsq
++ *
++ * Input:
+ * rdi destination
+ * rsi source
+ * rdx count
+- * ecx zero flag -- if true zero destination on error
+ *
+- * Output:
+- * eax uncopied bytes or 0 if successful.
++ * Output:
++ * eax uncopied bytes or 0 if successfull.
+ */
+ ENTRY(copy_user_generic_unrolled)
+ CFI_STARTPROC
+- pushq %rbx
+- CFI_ADJUST_CFA_OFFSET 8
+- CFI_REL_OFFSET rbx, 0
+- pushq %rcx
+- CFI_ADJUST_CFA_OFFSET 8
+- CFI_REL_OFFSET rcx, 0
+- xorl %eax,%eax /*zero for the exception handler */
+-
+-#ifdef FIX_ALIGNMENT
+- /* check for bad alignment of destination */
+- movl %edi,%ecx
+- andl $7,%ecx
+- jnz .Lbad_alignment
+-.Lafter_bad_alignment:
+-#endif
+-
+- movq %rdx,%rcx
+-
+- movl $64,%ebx
+- shrq $6,%rdx
+- decq %rdx
+- js .Lhandle_tail
+-
+- .p2align 4
+-.Lloop:
+-.Ls1: movq (%rsi),%r11
+-.Ls2: movq 1*8(%rsi),%r8
+-.Ls3: movq 2*8(%rsi),%r9
+-.Ls4: movq 3*8(%rsi),%r10
+-.Ld1: movq %r11,(%rdi)
+-.Ld2: movq %r8,1*8(%rdi)
+-.Ld3: movq %r9,2*8(%rdi)
+-.Ld4: movq %r10,3*8(%rdi)
+-
+-.Ls5: movq 4*8(%rsi),%r11
+-.Ls6: movq 5*8(%rsi),%r8
+-.Ls7: movq 6*8(%rsi),%r9
+-.Ls8: movq 7*8(%rsi),%r10
+-.Ld5: movq %r11,4*8(%rdi)
+-.Ld6: movq %r8,5*8(%rdi)
+-.Ld7: movq %r9,6*8(%rdi)
+-.Ld8: movq %r10,7*8(%rdi)
+-
+- decq %rdx
+-
++ cmpl $8,%edx
++ jb 20f /* less then 8 bytes, go to byte copy loop */
++ ALIGN_DESTINATION
++ movl %edx,%ecx
++ andl $63,%edx
++ shrl $6,%ecx
++ jz 17f
++1: movq (%rsi),%r8
++2: movq 1*8(%rsi),%r9
++3: movq 2*8(%rsi),%r10
++4: movq 3*8(%rsi),%r11
++5: movq %r8,(%rdi)
++6: movq %r9,1*8(%rdi)
++7: movq %r10,2*8(%rdi)
++8: movq %r11,3*8(%rdi)
++9: movq 4*8(%rsi),%r8
++10: movq 5*8(%rsi),%r9
++11: movq 6*8(%rsi),%r10
++12: movq 7*8(%rsi),%r11
++13: movq %r8,4*8(%rdi)
++14: movq %r9,5*8(%rdi)
++15: movq %r10,6*8(%rdi)
++16: movq %r11,7*8(%rdi)
+ leaq 64(%rsi),%rsi
+ leaq 64(%rdi),%rdi
+-
+- jns .Lloop
+-
+- .p2align 4
+-.Lhandle_tail:
+- movl %ecx,%edx
+- andl $63,%ecx
+- shrl $3,%ecx
+- jz .Lhandle_7
+- movl $8,%ebx
+- .p2align 4
+-.Lloop_8:
+-.Ls9: movq (%rsi),%r8
+-.Ld9: movq %r8,(%rdi)
+ decl %ecx
+- leaq 8(%rdi),%rdi
++ jnz 1b
++17: movl %edx,%ecx
++ andl $7,%edx
++ shrl $3,%ecx
++ jz 20f
++18: movq (%rsi),%r8
++19: movq %r8,(%rdi)
+ leaq 8(%rsi),%rsi
+- jnz .Lloop_8
+-
+-.Lhandle_7:
++ leaq 8(%rdi),%rdi
++ decl %ecx
++ jnz 18b
++20: andl %edx,%edx
++ jz 23f
+ movl %edx,%ecx
+- andl $7,%ecx
+- jz .Lende
+- .p2align 4
+-.Lloop_1:
+-.Ls10: movb (%rsi),%bl
+-.Ld10: movb %bl,(%rdi)
+- incq %rdi
++21: movb (%rsi),%al
++22: movb %al,(%rdi)
+ incq %rsi
++ incq %rdi
+ decl %ecx
+- jnz .Lloop_1
+-
+- CFI_REMEMBER_STATE
+-.Lende:
+- popq %rcx
+- CFI_ADJUST_CFA_OFFSET -8
+- CFI_RESTORE rcx
+- popq %rbx
+- CFI_ADJUST_CFA_OFFSET -8
+- CFI_RESTORE rbx
++ jnz 21b
++23: xor %eax,%eax
+ ret
+- CFI_RESTORE_STATE
+
+-#ifdef FIX_ALIGNMENT
+- /* align destination */
+- .p2align 4
+-.Lbad_alignment:
+- movl $8,%r9d
+- subl %ecx,%r9d
+- movl %r9d,%ecx
+- cmpq %r9,%rdx
+- jz .Lhandle_7
+- js .Lhandle_7
+-.Lalign_1:
+-.Ls11: movb (%rsi),%bl
+-.Ld11: movb %bl,(%rdi)
+- incq %rsi
+- incq %rdi
+- decl %ecx
+- jnz .Lalign_1
+- subq %r9,%rdx
+- jmp .Lafter_bad_alignment
+-#endif
++ .section .fixup,"ax"
++30: shll $6,%ecx
++ addl %ecx,%edx
++ jmp 60f
++40: lea (%rdx,%rcx,8),%rdx
++ jmp 60f
++50: movl %ecx,%edx
++60: jmp copy_user_handle_tail /* ecx is zerorest also */
++ .previous
+
+- /* table sorted by exception address */
+ .section __ex_table,"a"
+ .align 8
+- .quad .Ls1,.Ls1e
+- .quad .Ls2,.Ls2e
+- .quad .Ls3,.Ls3e
+- .quad .Ls4,.Ls4e
+- .quad .Ld1,.Ls1e
+- .quad .Ld2,.Ls2e
+- .quad .Ld3,.Ls3e
+- .quad .Ld4,.Ls4e
+- .quad .Ls5,.Ls5e
+- .quad .Ls6,.Ls6e
+- .quad .Ls7,.Ls7e
+- .quad .Ls8,.Ls8e
+- .quad .Ld5,.Ls5e
+- .quad .Ld6,.Ls6e
+- .quad .Ld7,.Ls7e
+- .quad .Ld8,.Ls8e
+- .quad .Ls9,.Le_quad
+- .quad .Ld9,.Le_quad
+- .quad .Ls10,.Le_byte
+- .quad .Ld10,.Le_byte
+-#ifdef FIX_ALIGNMENT
+- .quad .Ls11,.Lzero_rest
+- .quad .Ld11,.Lzero_rest
+-#endif
+- .quad .Le5,.Le_zero
++ .quad 1b,30b
++ .quad 2b,30b
++ .quad 3b,30b
++ .quad 4b,30b
++ .quad 5b,30b
++ .quad 6b,30b
++ .quad 7b,30b
++ .quad 8b,30b
++ .quad 9b,30b
++ .quad 10b,30b
++ .quad 11b,30b
++ .quad 12b,30b
++ .quad 13b,30b
++ .quad 14b,30b
++ .quad 15b,30b
++ .quad 16b,30b
++ .quad 18b,40b
++ .quad 19b,40b
++ .quad 21b,50b
++ .quad 22b,50b
+ .previous
+-
+- /* compute 64-offset for main loop. 8 bytes accuracy with error on the
+- pessimistic side. this is gross. it would be better to fix the
+- interface. */
+- /* eax: zero, ebx: 64 */
+-.Ls1e: addl $8,%eax
+-.Ls2e: addl $8,%eax
+-.Ls3e: addl $8,%eax
+-.Ls4e: addl $8,%eax
+-.Ls5e: addl $8,%eax
+-.Ls6e: addl $8,%eax
+-.Ls7e: addl $8,%eax
+-.Ls8e: addl $8,%eax
+- addq %rbx,%rdi /* +64 */
+- subq %rax,%rdi /* correct destination with computed offset */
+-
+- shlq $6,%rdx /* loop counter * 64 (stride length) */
+- addq %rax,%rdx /* add offset to loopcnt */
+- andl $63,%ecx /* remaining bytes */
+- addq %rcx,%rdx /* add them */
+- jmp .Lzero_rest
+-
+- /* exception on quad word loop in tail handling */
+- /* ecx: loopcnt/8, %edx: length, rdi: correct */
+-.Le_quad:
+- shll $3,%ecx
+- andl $7,%edx
+- addl %ecx,%edx
+- /* edx: bytes to zero, rdi: dest, eax:zero */
+-.Lzero_rest:
+- cmpl $0,(%rsp)
+- jz .Le_zero
+- movq %rdx,%rcx
+-.Le_byte:
+- xorl %eax,%eax
+-.Le5: rep
+- stosb
+- /* when there is another exception while zeroing the rest just return */
+-.Le_zero:
+- movq %rdx,%rax
+- jmp .Lende
+ CFI_ENDPROC
+-ENDPROC(copy_user_generic)
++ENDPROC(copy_user_generic_unrolled)
+
+-
+- /* Some CPUs run faster using the string copy instructions.
+- This is also a lot simpler. Use them when possible.
+- Patch in jmps to this code instead of copying it fully
+- to avoid unwanted aliasing in the exception tables. */
+-
+- /* rdi destination
+- * rsi source
+- * rdx count
+- * ecx zero flag
+- *
+- * Output:
+- * eax uncopied bytes or 0 if successfull.
+- *
+- * Only 4GB of copy is supported. This shouldn't be a problem
+- * because the kernel normally only writes from/to page sized chunks
+- * even if user space passed a longer buffer.
+- * And more would be dangerous because both Intel and AMD have
+- * errata with rep movsq > 4GB. If someone feels the need to fix
+- * this please consider this.
+- */
++/* Some CPUs run faster using the string copy instructions.
++ * This is also a lot simpler. Use them when possible.
++ *
++ * Only 4GB of copy is supported. This shouldn't be a problem
++ * because the kernel normally only writes from/to page sized chunks
++ * even if user space passed a longer buffer.
++ * And more would be dangerous because both Intel and AMD have
++ * errata with rep movsq > 4GB. If someone feels the need to fix
++ * this please consider this.
++ *
++ * Input:
++ * rdi destination
++ * rsi source
++ * rdx count
++ *
++ * Output:
++ * eax uncopied bytes or 0 if successful.
++ */
+ ENTRY(copy_user_generic_string)
+ CFI_STARTPROC
+- movl %ecx,%r8d /* save zero flag */
++ andl %edx,%edx
++ jz 4f
++ cmpl $8,%edx
++ jb 2f /* less than 8 bytes, go to byte copy loop */
++ ALIGN_DESTINATION
+ movl %edx,%ecx
+ shrl $3,%ecx
+- andl $7,%edx
+- jz 10f
+-1: rep
+- movsq
+- movl %edx,%ecx
+-2: rep
+- movsb
+-9: movl %ecx,%eax
+- ret
+-
+- /* multiple of 8 byte */
+-10: rep
++ andl $7,%edx
++1: rep
+ movsq
+- xor %eax,%eax
++2: movl %edx,%ecx
++3: rep
++ movsb
++4: xorl %eax,%eax
+ ret
+
+- /* exception handling */
+-3: lea (%rdx,%rcx,8),%rax /* exception on quad loop */
+- jmp 6f
+-5: movl %ecx,%eax /* exception on byte loop */
+- /* eax: left over bytes */
+-6: testl %r8d,%r8d /* zero flag set? */
+- jz 7f
+- movl %eax,%ecx /* initialize x86 loop counter */
+- push %rax
+- xorl %eax,%eax
+-8: rep
+- stosb /* zero the rest */
+-11: pop %rax
+-7: ret
+- CFI_ENDPROC
+-END(copy_user_generic_c)
++ .section .fixup,"ax"
++11: lea (%rdx,%rcx,8),%rcx
++12: movl %ecx,%edx /* ecx is zerorest also */
++ jmp copy_user_handle_tail
++ .previous
+
+ .section __ex_table,"a"
+- .quad 1b,3b
+- .quad 2b,5b
+- .quad 8b,11b
+- .quad 10b,3b
++ .align 8
++ .quad 1b,11b
++ .quad 3b,12b
+ .previous
++ CFI_ENDPROC
++ENDPROC(copy_user_generic_string)