OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [linux/] [linux-2.4/] [arch/] [cris/] [lib/] [checksumcopy.S] - Rev 1765

Compare with Previous | Blame | View Log

/*
 * A fast checksum+copy routine using movem
 * Copyright (c) 1998-2003 Axis Communications AB
 *
 * Authors:     Bjorn Wesen
 *
 * csum_partial_copy_nocheck(const char *src, char *dst,
 *                           int len, unsigned int sum)
 */

        .globl  csum_partial_copy_nocheck
csum_partial_copy_nocheck:      
        
        ;; r10 - src
        ;; r11 - dst
        ;; r12 - length
        ;; r13 - checksum

        ;; check for breakeven length between movem and normal word looping versions
        ;; we also do _NOT_ want to compute a checksum over more than the 
        ;; actual length when length < 40
        
        cmpu.w  80, $r12
        blo     _word_loop
        nop

        ;; need to save the registers we use below in the movem loop
        ;; this overhead is why we have a check above for breakeven length
        ;; only r0 - r8 have to be saved, the other ones are clobber-able
        ;; according to the ABI
        
        subq    9*4, $sp
        movem   $r8, [$sp]
        
        ;; do a movem copy and checksum

        subq    10*4, $r12      ; update length for the first loop
        
_mloop: movem   [$r10+],$r9     ; read 10 longwords
1:      ;; A failing userspace access will have this as PC.
        movem   $r9,[$r11+]     ; write 10 longwords

        ;; perform dword checksumming on the 10 longwords
        
        add.d   $r0,$r13
        ax
        add.d   $r1,$r13
        ax
        add.d   $r2,$r13
        ax
        add.d   $r3,$r13
        ax
        add.d   $r4,$r13
        ax
        add.d   $r5,$r13
        ax
        add.d   $r6,$r13
        ax
        add.d   $r7,$r13
        ax
        add.d   $r8,$r13
        ax
        add.d   $r9,$r13

        ;; fold the carry into the checksum, to avoid having to loop the carry
        ;; back into the top
        
        ax
        addq    0,$r13
        ax                      ; do it again, since we might have generated a carry
        addq    0,$r13

        subq    10*4,$r12
        bge     _mloop
        nop

        addq    10*4,$r12       ; compensate for last loop underflowing length

        movem   [$sp+],$r8      ; restore regs

_word_loop:
        ;; only fold if there is anything to fold.

        cmpq    0,$r13
        beq     _no_fold

        ;; fold 32-bit checksum into a 16-bit checksum, to avoid carries below
        ;; r9 can be used as temporary.
        
        move.d  $r13,$r9
        lsrq    16,$r9          ; r0 = checksum >> 16
        and.d   0xffff,$r13     ; checksum = checksum & 0xffff
        add.d   $r9,$r13        ; checksum += r0
        move.d  $r13,$r9        ; do the same again, maybe we got a carry last add
        lsrq    16,$r9
        and.d   0xffff,$r13
        add.d   $r9,$r13
        
_no_fold:
        cmpq    2,$r12
        blt     _no_words
        nop
        
        ;; copy and checksum the rest of the words
        
        subq    2,$r12
        
_wloop: move.w  [$r10+],$r9
2:      ;; A failing userspace access will have this as PC.
        addu.w  $r9,$r13
        subq    2,$r12
        bge     _wloop
        move.w  $r9,[$r11+]
        
        addq    2,$r12
                
_no_words:
        ;; see if we have one odd byte more
        cmpq    1,$r12
        beq     _do_byte
        nop
        ret
        move.d  $r13, $r10

_do_byte:       
        ;; copy and checksum the last byte
        move.b  [$r10],$r9
3:      ;; A failing userspace access will have this as PC.
        addu.b  $r9,$r13
        move.b  $r9,[$r11]
        ret
        move.d  $r13, $r10

Compare with Previous | Blame | View Log

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.