OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [tags/] [gnu-src/] [newlib-1.18.0/] [newlib-1.18.0-or32-1.0rc1/] [newlib/] [libc/] [machine/] [x86_64/] [memset.S] - Diff between revs 207 and 345

Go to most recent revision | Only display areas with differences | Details | Blame | View Log

Rev 207 Rev 345
/*
/*
 * ====================================================
 * ====================================================
 * Copyright (C) 2007 by Ellips BV. All rights reserved.
 * Copyright (C) 2007 by Ellips BV. All rights reserved.
 *
 *
 * Permission to use, copy, modify, and distribute this
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * software is freely granted, provided that this notice
 * is preserved.
 * is preserved.
 * ====================================================
 * ====================================================
 */
 */
  #include "x86_64mach.h"
  #include "x86_64mach.h"
  .global SYM (memset)
  .global SYM (memset)
  SOTYPE_FUNCTION(memset)
  SOTYPE_FUNCTION(memset)
SYM (memset):
SYM (memset):
  movq    rdi, r9                 /* Save return value */
  movq    rdi, r9                 /* Save return value */
  movq    rsi, rax
  movq    rsi, rax
  movq    rdx, rcx
  movq    rdx, rcx
  cmpq    $16, rdx
  cmpq    $16, rdx
  jb      byte_set
  jb      byte_set
  movq    rdi, r8                 /* Align on quad word boundary */
  movq    rdi, r8                 /* Align on quad word boundary */
  andq    $7, r8
  andq    $7, r8
  jz      quadword_aligned
  jz      quadword_aligned
  movq    $8, rcx
  movq    $8, rcx
  subq    r8, rcx
  subq    r8, rcx
  subq    rcx, rdx
  subq    rcx, rdx
  rep     stosb
  rep     stosb
  movq    rdx, rcx
  movq    rdx, rcx
quadword_aligned:
quadword_aligned:
  movabs  $0x0101010101010101, r8
  movabs  $0x0101010101010101, r8
  movzbl  sil, eax
  movzbl  sil, eax
  imul    r8, rax
  imul    r8, rax
  cmpq    $256, rdx
  cmpq    $256, rdx
  jb      quadword_set
  jb      quadword_set
  shrq    $7, rcx                 /* Store 128 bytes at a time with minimum cache polution */
  shrq    $7, rcx                 /* Store 128 bytes at a time with minimum cache polution */
  .p2align 4
  .p2align 4
loop:
loop:
  movntiq rax,     (rdi)
  movntiq rax,     (rdi)
  movntiq rax,   8 (rdi)
  movntiq rax,   8 (rdi)
  movntiq rax,  16 (rdi)
  movntiq rax,  16 (rdi)
  movntiq rax,  24 (rdi)
  movntiq rax,  24 (rdi)
  movntiq rax,  32 (rdi)
  movntiq rax,  32 (rdi)
  movntiq rax,  40 (rdi)
  movntiq rax,  40 (rdi)
  movntiq rax,  48 (rdi)
  movntiq rax,  48 (rdi)
  movntiq rax,  56 (rdi)
  movntiq rax,  56 (rdi)
  movntiq rax,  64 (rdi)
  movntiq rax,  64 (rdi)
  movntiq rax,  72 (rdi)
  movntiq rax,  72 (rdi)
  movntiq rax,  80 (rdi)
  movntiq rax,  80 (rdi)
  movntiq rax,  88 (rdi)
  movntiq rax,  88 (rdi)
  movntiq rax,  96 (rdi)
  movntiq rax,  96 (rdi)
  movntiq rax, 104 (rdi)
  movntiq rax, 104 (rdi)
  movntiq rax, 112 (rdi)
  movntiq rax, 112 (rdi)
  movntiq rax, 120 (rdi)
  movntiq rax, 120 (rdi)
  leaq    128 (rdi), rdi
  leaq    128 (rdi), rdi
  dec     rcx
  dec     rcx
  jnz     loop
  jnz     loop
  sfence
  sfence
  movq    rdx, rcx
  movq    rdx, rcx
  andq    $127, rcx
  andq    $127, rcx
  rep     stosb
  rep     stosb
  movq    r9, rax
  movq    r9, rax
  ret
  ret
byte_set:
byte_set:
  rep     stosb
  rep     stosb
  movq    r9, rax
  movq    r9, rax
  ret
  ret
quadword_set:
quadword_set:
  shrq    $3, rcx
  shrq    $3, rcx
  .p2align 4
  .p2align 4
  rep     stosq
  rep     stosq
  movq    rdx, rcx
  movq    rdx, rcx
  andq    $7, rcx
  andq    $7, rcx
  rep     stosb                   /* Store the remaining bytes */
  rep     stosb                   /* Store the remaining bytes */
  movq    r9, rax
  movq    r9, rax
  ret
  ret
 
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.