OpenCores
URL https://opencores.org/ocsvn/openrisc_2011-10-31/openrisc_2011-10-31/trunk

Subversion Repositories openrisc_2011-10-31

[/] [openrisc/] [tags/] [gnu-src/] [newlib-1.18.0/] [newlib-1.18.0-or32-1.0rc1/] [newlib/] [libc/] [machine/] [x86_64/] [memcpy.S] - Diff between revs 207 and 345

Only display areas with differences | Details | Blame | View Log

Rev 207 Rev 345
/*
/*
 * ====================================================
 * ====================================================
 * Copyright (C) 2007 by Ellips BV. All rights reserved.
 * Copyright (C) 2007 by Ellips BV. All rights reserved.
 *
 *
 * Permission to use, copy, modify, and distribute this
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * software is freely granted, provided that this notice
 * is preserved.
 * is preserved.
 * ====================================================
 * ====================================================
 */
 */
  #include "x86_64mach.h"
  #include "x86_64mach.h"
  .global SYM (memcpy)
  .global SYM (memcpy)
  SOTYPE_FUNCTION(memcpy)
  SOTYPE_FUNCTION(memcpy)
SYM (memcpy):
SYM (memcpy):
  movq    rdi, rax                /* Store destination in return value */
  movq    rdi, rax                /* Store destination in return value */
  cmpq    $16, rdx
  cmpq    $16, rdx
  jb      byte_copy
  jb      byte_copy
  movq    rdi, r8                 /* Align destination on quad word boundary */
  movq    rdi, r8                 /* Align destination on quad word boundary */
  andq    $7, r8
  andq    $7, r8
  jz      quadword_aligned
  jz      quadword_aligned
  movq    $8, rcx
  movq    $8, rcx
  subq    r8, rcx
  subq    r8, rcx
  subq    rcx, rdx
  subq    rcx, rdx
  rep     movsb
  rep     movsb
quadword_aligned:
quadword_aligned:
  cmpq    $256, rdx
  cmpq    $256, rdx
  jb      quadword_copy
  jb      quadword_copy
  pushq    rax
  pushq    rax
  pushq    r12
  pushq    r12
  pushq    r13
  pushq    r13
  pushq    r14
  pushq    r14
  movq    rdx, rcx                /* Copy 128 bytes at a time with minimum cache polution */
  movq    rdx, rcx                /* Copy 128 bytes at a time with minimum cache polution */
  shrq    $7, rcx
  shrq    $7, rcx
  .p2align 4
  .p2align 4
loop:
loop:
  prefetchnta   768 (rsi)
  prefetchnta   768 (rsi)
  prefetchnta   832 (rsi)
  prefetchnta   832 (rsi)
  movq       (rsi), rax
  movq       (rsi), rax
  movq     8 (rsi), r8
  movq     8 (rsi), r8
  movq    16 (rsi), r9
  movq    16 (rsi), r9
  movq    24 (rsi), r10
  movq    24 (rsi), r10
  movq    32 (rsi), r11
  movq    32 (rsi), r11
  movq    40 (rsi), r12
  movq    40 (rsi), r12
  movq    48 (rsi), r13
  movq    48 (rsi), r13
  movq    56 (rsi), r14
  movq    56 (rsi), r14
  movntiq rax,    (rdi)
  movntiq rax,    (rdi)
  movntiq r8 ,  8 (rdi)
  movntiq r8 ,  8 (rdi)
  movntiq r9 , 16 (rdi)
  movntiq r9 , 16 (rdi)
  movntiq r10, 24 (rdi)
  movntiq r10, 24 (rdi)
  movntiq r11, 32 (rdi)
  movntiq r11, 32 (rdi)
  movntiq r12, 40 (rdi)
  movntiq r12, 40 (rdi)
  movntiq r13, 48 (rdi)
  movntiq r13, 48 (rdi)
  movntiq r14, 56 (rdi)
  movntiq r14, 56 (rdi)
  movq     64 (rsi), rax
  movq     64 (rsi), rax
  movq     72 (rsi), r8
  movq     72 (rsi), r8
  movq     80 (rsi), r9
  movq     80 (rsi), r9
  movq     88 (rsi), r10
  movq     88 (rsi), r10
  movq     96 (rsi), r11
  movq     96 (rsi), r11
  movq    104 (rsi), r12
  movq    104 (rsi), r12
  movq    112 (rsi), r13
  movq    112 (rsi), r13
  movq    120 (rsi), r14
  movq    120 (rsi), r14
  movntiq rax,  64 (rdi)
  movntiq rax,  64 (rdi)
  movntiq r8 ,  72 (rdi)
  movntiq r8 ,  72 (rdi)
  movntiq r9 ,  80 (rdi)
  movntiq r9 ,  80 (rdi)
  movntiq r10,  88 (rdi)
  movntiq r10,  88 (rdi)
  movntiq r11,  96 (rdi)
  movntiq r11,  96 (rdi)
  movntiq r12, 104 (rdi)
  movntiq r12, 104 (rdi)
  movntiq r13, 112 (rdi)
  movntiq r13, 112 (rdi)
  movntiq r14, 120 (rdi)
  movntiq r14, 120 (rdi)
  leaq    128 (rsi), rsi
  leaq    128 (rsi), rsi
  leaq    128 (rdi), rdi
  leaq    128 (rdi), rdi
  dec     rcx
  dec     rcx
  jnz     loop
  jnz     loop
  sfence
  sfence
  movq    rdx, rcx
  movq    rdx, rcx
  andq    $127, rcx
  andq    $127, rcx
  rep     movsb
  rep     movsb
  popq    r14
  popq    r14
  popq    r13
  popq    r13
  popq    r12
  popq    r12
  popq    rax
  popq    rax
  ret
  ret
byte_copy:
byte_copy:
  movq    rdx, rcx
  movq    rdx, rcx
  rep     movsb
  rep     movsb
  ret
  ret
quadword_copy:
quadword_copy:
  movq    rdx, rcx
  movq    rdx, rcx
  shrq    $3, rcx
  shrq    $3, rcx
  .p2align 4
  .p2align 4
  rep     movsq
  rep     movsq
  movq    rdx, rcx
  movq    rdx, rcx
  andq    $7, rcx
  andq    $7, rcx
  rep     movsb                   /* Copy the remaining bytes */
  rep     movsb                   /* Copy the remaining bytes */
  ret
  ret
 
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.