URL https://opencores.org/ocsvn/or1k_soc_on_altera_embedded_dev_kit/or1k_soc_on_altera_embedded_dev_kit/trunk
Subversion Repositories or1k_soc_on_altera_embedded_dev_kit

[/] [or1k_soc_on_altera_embedded_dev_kit/] [trunk/] [linux-2.6/] [linux-2.6.24/] [arch/] [m32r/] [lib/] [checksum.S] - Rev 3

Compare with Previous | Blame | View Log
/*
 * INET         An implementation of the TCP/IP protocol suite for the LINUX
 *              operating system.  INET is implemented using the  BSD Socket
 *              interface as the means of communication with the user level.
 *
 *              IP/TCP/UDP checksumming routines
 *
 * Authors:     Jorge Cwik, <jorge@laser.satlink.net>
 *              Arnt Gulbrandsen, <agulbra@nvg.unit.no>
 *              Tom May, <ftom@netcom.com>
 *              Pentium Pro/II routines:
 *              Alexander Kjeldaas <astor@guardian.no>
 *              Finn Arne Gangstad <finnag@guardian.no>
 *              Lots of code moved from tcp.c and ip.c; see those files
 *              for more names.
 *
 * Changes:     Ingo Molnar, converted csum_partial_copy() to 2.1 exception
 *                           handling.
 *              Andi Kleen,  add zeroing on error
 *                   converted to pure assembler
 *              Hirokazu Takata,Hiroyuki Kondo rewrite for the m32r architecture.
 *
 *              This program is free software; you can redistribute it and/or
 *              modify it under the terms of the GNU General Public License
 *              as published by the Free Software Foundation; either version
 *              2 of the License, or (at your option) any later version.
 */

#include <linux/linkage.h>
#include <asm/assembler.h>
#include <asm/errno.h>

/*
 * computes a partial checksum, e.g. for TCP/UDP fragments
 */

/*
unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
 */


#ifdef CONFIG_ISA_DUAL_ISSUE

        /*
         * Experiments with Ethernet and SLIP connections show that buff
         * is aligned on either a 2-byte or 4-byte boundary.  We get at
         * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
         * Fortunately, it is easy to convert 2-byte alignment to 4-byte
         * alignment for the unrolled loop.
         */

        .text
ENTRY(csum_partial)
        ; Function args
        ;  r0: unsigned char *buff
        ;  r1: int len
        ;  r2: unsigned int sum

        push    r2                  ||  ldi     r2, #0
        and3    r7, r0, #1              ; Check alignment.
        beqz    r7, 1f                  ; Jump if alignment is ok.
        ; 1-byte mis aligned
        ldub    r4, @r0             ||  addi    r0, #1
        ; clear c-bit || Alignment uses up bytes.
        cmp     r0, r0              ||  addi    r1, #-1
        ldi     r3, #0              ||  addx    r2, r4
        addx    r2, r3
        .fillinsn
1:
        and3    r4, r0, #2              ; Check alignment.
        beqz    r4, 2f                  ; Jump if alignment is ok.
        ; clear c-bit || Alignment uses up two bytes.
        cmp     r0, r0              ||  addi    r1, #-2
        bgtz    r1, 1f                  ; Jump if we had at least two bytes.
        bra     4f                  ||  addi    r1, #2
        .fillinsn                       ; len(r1) was < 2.  Deal with it.
1:
        ; 2-byte aligned
        lduh    r4, @r0             ||  ldi     r3, #0
        addx    r2, r4              ||  addi    r0, #2
        addx    r2, r3
        .fillinsn
2:
        ; 4-byte aligned
        cmp     r0, r0                  ; clear c-bit
        srl3    r6, r1, #5
        beqz    r6, 2f
        .fillinsn

1:      ld      r3, @r0+
        ld      r4, @r0+                                        ; +4
        ld      r5, @r0+                                        ; +8
        ld      r3, @r0+            ||  addx    r2, r3          ; +12
        ld      r4, @r0+            ||  addx    r2, r4          ; +16
        ld      r5, @r0+            ||  addx    r2, r5          ; +20
        ld      r3, @r0+            ||  addx    r2, r3          ; +24
        ld      r4, @r0+            ||  addx    r2, r4          ; +28
        addx    r2, r5              ||  addi    r6, #-1
        addx    r2, r3
        addx    r2, r4
        bnez    r6, 1b

        addx    r2, r6                  ; r6=0
        cmp     r0, r0                  ; This clears c-bit
        .fillinsn
2:      and3    r6, r1, #0x1c           ; withdraw len
        beqz    r6, 4f
        srli    r6, #2
        .fillinsn

3:      ld      r4, @r0+            ||  addi    r6, #-1
        addx    r2, r4
        bnez    r6, 3b

        addx    r2, r6                  ; r6=0
        cmp     r0, r0                  ; This clears c-bit
        .fillinsn
4:      and3    r1, r1, #3
        beqz    r1, 7f                  ; if len == 0 goto end
        and3    r6, r1, #2
        beqz    r6, 5f                  ; if len < 2  goto 5f(1byte)
        lduh    r4, @r0             ||  addi    r0, #2
        addi    r1, #-2             ||  slli    r4, #16
        addx    r2, r4
        beqz    r1, 6f
        .fillinsn
5:      ldub    r4, @r0             ||  ldi     r1, #0
#ifndef __LITTLE_ENDIAN__
        slli    r4, #8
#endif
        addx    r2, r4
        .fillinsn
6:      addx    r2, r1
        .fillinsn
7:
        and3    r0, r2, #0xffff
        srli    r2, #16
        add     r0, r2
        srl3    r2, r0, #16
        beqz    r2, 1f
        addi    r0, #1
        and3    r0, r0, #0xffff
        .fillinsn
1:
        beqz    r7, 1f                  ; swap the upper byte for the lower
        and3    r2, r0, #0xff
        srl3    r0, r0, #8
        slli    r2, #8
        or      r0, r2
        .fillinsn
1:
        pop     r2                  ||  cmp     r0, r0
        addx    r0, r2              ||  ldi     r2, #0
        addx    r0, r2
        jmp     r14

#else /* not CONFIG_ISA_DUAL_ISSUE */

        /*
         * Experiments with Ethernet and SLIP connections show that buff
         * is aligned on either a 2-byte or 4-byte boundary.  We get at
         * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
         * Fortunately, it is easy to convert 2-byte alignment to 4-byte
         * alignment for the unrolled loop.
         */

        .text
ENTRY(csum_partial)
        ; Function args
        ;  r0: unsigned char *buff
        ;  r1: int len
        ;  r2: unsigned int sum

        push    r2
        ldi     r2, #0
        and3    r7, r0, #1              ; Check alignment.
        beqz    r7, 1f                  ; Jump if alignment is ok.
        ; 1-byte mis aligned
        ldub    r4, @r0
        addi    r0, #1
        addi    r1, #-1                 ; Alignment uses up bytes.
        cmp     r0, r0                  ; clear c-bit
        ldi     r3, #0
        addx    r2, r4
        addx    r2, r3
        .fillinsn
1:
        and3    r4, r0, #2              ; Check alignment.
        beqz    r4, 2f                  ; Jump if alignment is ok.
        addi    r1, #-2                 ; Alignment uses up two bytes.
        cmp             r0, r0                  ; clear c-bit
        bgtz    r1, 1f                  ; Jump if we had at least two bytes.
        addi    r1, #2                  ; len(r1) was < 2.  Deal with it.
        bra     4f
        .fillinsn
1:
        ; 2-byte aligned
        lduh    r4, @r0
        addi    r0, #2
        ldi             r3, #0
        addx    r2, r4
        addx    r2, r3
        .fillinsn
2:
        ; 4-byte aligned
        cmp     r0, r0                  ; clear c-bit
        srl3    r6, r1, #5
        beqz    r6, 2f
        .fillinsn

1:      ld      r3, @r0+
        ld      r4, @r0+                ; +4
        ld      r5, @r0+                ; +8
        addx    r2, r3
        addx    r2, r4
        addx    r2, r5
        ld      r3, @r0+                ; +12
        ld      r4, @r0+                ; +16
        ld      r5, @r0+                ; +20
        addx    r2, r3
        addx    r2, r4
        addx    r2, r5
        ld      r3, @r0+                ; +24
        ld      r4, @r0+                ; +28
        addi    r6, #-1
        addx    r2, r3
        addx    r2, r4
        bnez    r6, 1b
        addx    r2, r6                  ; r6=0
        cmp     r0, r0                  ; This clears c-bit
        .fillinsn

2:      and3    r6, r1, #0x1c           ; withdraw len
        beqz    r6, 4f
        srli    r6, #2
        .fillinsn

3:      ld      r4, @r0+
        addi    r6, #-1
        addx    r2, r4
        bnez    r6, 3b
        addx    r2, r6                  ; r6=0
        cmp     r0, r0                  ; This clears c-bit
        .fillinsn

4:      and3    r1, r1, #3
        beqz    r1, 7f                  ; if len == 0 goto end
        and3    r6, r1, #2
        beqz    r6, 5f                  ; if len < 2  goto 5f(1byte)

        lduh    r4, @r0
        addi    r0, #2
        addi    r1, #-2
        slli    r4, #16
        addx    r2, r4
        beqz    r1, 6f
        .fillinsn
5:      ldub    r4, @r0
#ifndef __LITTLE_ENDIAN__
        slli    r4, #8
#endif
        addx    r2, r4
        .fillinsn
6:      ldi     r5, #0
        addx    r2, r5
        .fillinsn
7:
        and3    r0, r2, #0xffff
        srli    r2, #16
        add     r0, r2
        srl3    r2, r0, #16
        beqz    r2, 1f
        addi    r0, #1
        and3    r0, r0, #0xffff
        .fillinsn
1:
        beqz    r7, 1f
        mv      r2, r0
        srl3    r0, r2, #8
        and3    r2, r2, #0xff
        slli    r2, #8
        or      r0, r2
        .fillinsn
1:
        pop     r2
        cmp     r0, r0
        addx    r0, r2
        ldi     r2, #0
        addx    r0, r2
        jmp     r14

#endif /* not CONFIG_ISA_DUAL_ISSUE */

/*
unsigned int csum_partial_copy_generic (const char *src, char *dst,
                                  int len, int sum, int *src_err_ptr, int *dst_err_ptr)
 */

/*
 * Copy from ds while checksumming, otherwise like csum_partial
 *
 * The macros SRC and DST specify the type of access for the instruction.
 * thus we can call a custom exception handler for all access types.
 *
 * FIXME: could someone double-check whether I haven't mixed up some SRC and
 *        DST definitions? It's damn hard to trigger all cases.  I hope I got
 *        them all but there's no guarantee.
 */

ENTRY(csum_partial_copy_generic)
        nop
        nop
        nop
        nop
        jmp r14
        nop
        nop
        nop

        .end
Compare with Previous | Blame | View Log
Browse

Tools

Subversion Repositories or1k_soc_on_altera_embedded_dev_kit

[/] [or1k_soc_on_altera_embedded_dev_kit/] [trunk/] [linux-2.6/] [linux-2.6.24/] [arch/] [m32r/] [lib/] [checksum.S] - Rev 3