OpenCores
URL https://opencores.org/ocsvn/or1k_soc_on_altera_embedded_dev_kit/or1k_soc_on_altera_embedded_dev_kit/trunk

Subversion Repositories or1k_soc_on_altera_embedded_dev_kit

[/] [or1k_soc_on_altera_embedded_dev_kit/] [trunk/] [linux-2.6/] [linux-2.6.24/] [arch/] [x86/] [lib/] [copy_page_64.S] - Rev 3

Compare with Previous | Blame | View Log

/* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */

#include <linux/linkage.h>
#include <asm/dwarf2.h>

        ALIGN
copy_page_c:
        CFI_STARTPROC
        movl $4096/8,%ecx
        rep movsq
        ret
        CFI_ENDPROC
ENDPROC(copy_page_c)

/* Don't use streaming store because it's better when the target
   ends up in cache. */
            
/* Could vary the prefetch distance based on SMP/UP */

ENTRY(copy_page)
        CFI_STARTPROC
        subq    $3*8,%rsp
        CFI_ADJUST_CFA_OFFSET 3*8
        movq    %rbx,(%rsp)
        CFI_REL_OFFSET rbx, 0
        movq    %r12,1*8(%rsp)
        CFI_REL_OFFSET r12, 1*8
        movq    %r13,2*8(%rsp)
        CFI_REL_OFFSET r13, 2*8

        movl    $(4096/64)-5,%ecx
        .p2align 4
.Loop64:
        dec     %rcx

        movq        (%rsi), %rax
        movq      8 (%rsi), %rbx
        movq     16 (%rsi), %rdx
        movq     24 (%rsi), %r8
        movq     32 (%rsi), %r9
        movq     40 (%rsi), %r10
        movq     48 (%rsi), %r11
        movq     56 (%rsi), %r12

        prefetcht0 5*64(%rsi)

        movq     %rax,    (%rdi)
        movq     %rbx,  8 (%rdi)
        movq     %rdx, 16 (%rdi)
        movq     %r8,  24 (%rdi)
        movq     %r9,  32 (%rdi)
        movq     %r10, 40 (%rdi)
        movq     %r11, 48 (%rdi)
        movq     %r12, 56 (%rdi)

        leaq    64 (%rsi), %rsi
        leaq    64 (%rdi), %rdi

        jnz     .Loop64

        movl    $5,%ecx
        .p2align 4
.Loop2:
        decl   %ecx

        movq        (%rsi), %rax
        movq      8 (%rsi), %rbx
        movq     16 (%rsi), %rdx
        movq     24 (%rsi), %r8
        movq     32 (%rsi), %r9
        movq     40 (%rsi), %r10
        movq     48 (%rsi), %r11
        movq     56 (%rsi), %r12

        movq     %rax,    (%rdi)
        movq     %rbx,  8 (%rdi)
        movq     %rdx, 16 (%rdi)
        movq     %r8,  24 (%rdi)
        movq     %r9,  32 (%rdi)
        movq     %r10, 40 (%rdi)
        movq     %r11, 48 (%rdi)
        movq     %r12, 56 (%rdi)

        leaq    64(%rdi),%rdi
        leaq    64(%rsi),%rsi

        jnz     .Loop2

        movq    (%rsp),%rbx
        CFI_RESTORE rbx
        movq    1*8(%rsp),%r12
        CFI_RESTORE r12
        movq    2*8(%rsp),%r13
        CFI_RESTORE r13
        addq    $3*8,%rsp
        CFI_ADJUST_CFA_OFFSET -3*8
        ret
.Lcopy_page_end:
        CFI_ENDPROC
ENDPROC(copy_page)

        /* Some CPUs run faster using the string copy instructions.
           It is also a lot simpler. Use this when possible */

#include <asm/cpufeature.h>

        .section .altinstr_replacement,"ax"
1:      .byte 0xeb                                      /* jmp <disp8> */
        .byte (copy_page_c - copy_page) - (2f - 1b)     /* offset */
2:
        .previous
        .section .altinstructions,"a"
        .align 8
        .quad copy_page
        .quad 1b
        .byte X86_FEATURE_REP_GOOD
        .byte .Lcopy_page_end - copy_page
        .byte 2b - 1b
        .previous

Compare with Previous | Blame | View Log

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.