OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [linux/] [linux-2.4/] [arch/] [alpha/] [lib/] [ev67-strrchr.S] - Blame information for rev 1765

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 1275 phoenix
/*
2
 * arch/alpha/lib/ev67-strrchr.S
3
 * 21264 version by Rick Gorton 
4
 *
5
 * Finds length of a 0-terminated string.  Optimized for the
6
 * Alpha architecture:
7
 *
8
 *      - memory accessed as aligned quadwords only
9
 *      - uses bcmpge to compare 8 bytes in parallel
10
 *
11
 * Much of the information about 21264 scheduling/coding comes from:
12
 *      Compiler Writer's Guide for the Alpha 21264
13
 *      abbreviated as 'CWG' in other comments here
14
 *      ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html
15
 * Scheduling notation:
16
 *      E       - either cluster
17
 *      U       - upper subcluster; U0 - subcluster U0; U1 - subcluster U1
18
 *      L       - lower subcluster; L0 - subcluster L0; L1 - subcluster L1
19
 */
20
 
21
 
22
#include 
23
 
24
        .set noreorder
25
        .set noat
26
 
27
        .align 4
28
        .ent strrchr
29
        .globl strrchr
30
strrchr:
31
        .frame sp, 0, ra
32
        .prologue 0
33
 
34
        and     a1, 0xff, t2    # E : 00000000000000ch
35
        insbl   a1, 1, t4       # U : 000000000000ch00
36
        insbl   a1, 2, t5       # U : 0000000000ch0000
37
        ldq_u   t0, 0(a0)       # L : load first quadword Latency=3
38
 
39
        mov     zero, t6        # E : t6 is last match aligned addr
40
        or      t2, t4, a1      # E : 000000000000chch
41
        sll     t5, 8, t3       # U : 00000000ch000000
42
        mov     zero, t8        # E : t8 is last match byte compare mask
43
 
44
        andnot  a0, 7, v0       # E : align source addr
45
        or      t5, t3, t3      # E : 00000000chch0000
46
        sll     a1, 32, t2      # U : 0000chch00000000
47
        sll     a1, 48, t4      # U : chch000000000000
48
 
49
        or      t4, a1, a1      # E : chch00000000chch
50
        or      t2, t3, t2      # E : 0000chchchch0000
51
        or      a1, t2, a1      # E : chchchchchchchch
52
        lda     t5, -1          # E : build garbage mask
53
 
54
        cmpbge  zero, t0, t1    # E : bits set iff byte == zero
55
        mskqh   t5, a0, t4      # E : Complete garbage mask
56
        xor     t0, a1, t2      # E : make bytes == c zero
57
        cmpbge  zero, t4, t4    # E : bits set iff byte is garbage
58
 
59
        cmpbge  zero, t2, t3    # E : bits set iff byte == c
60
        andnot  t1, t4, t1      # E : clear garbage from null test
61
        andnot  t3, t4, t3      # E : clear garbage from char test
62
        bne     t1, $eos        # U : did we already hit the terminator?
63
 
64
        /* Character search main loop */
65
$loop:
66
        ldq     t0, 8(v0)       # L : load next quadword
67
        cmovne  t3, v0, t6      # E : save previous comparisons match
68
        nop                     #   : Latency=2, extra map slot (keep nop with cmov)
69
        nop
70
 
71
        cmovne  t3, t3, t8      # E : Latency=2, extra map slot
72
        nop                     #   : keep with cmovne
73
        addq    v0, 8, v0       # E :
74
        xor     t0, a1, t2      # E :
75
 
76
        cmpbge  zero, t0, t1    # E : bits set iff byte == zero
77
        cmpbge  zero, t2, t3    # E : bits set iff byte == c
78
        beq     t1, $loop       # U : if we havnt seen a null, loop
79
        nop
80
 
81
        /* Mask out character matches after terminator */
82
$eos:
83
        negq    t1, t4          # E : isolate first null byte match
84
        and     t1, t4, t4      # E :
85
        subq    t4, 1, t5       # E : build a mask of the bytes upto...
86
        or      t4, t5, t4      # E : ... and including the null
87
 
88
        and     t3, t4, t3      # E : mask out char matches after null
89
        cmovne  t3, t3, t8      # E : save it, if match found Latency=2, extra map slot
90
        nop                     #   : Keep with cmovne
91
        nop
92
 
93
        cmovne  t3, v0, t6      # E :
94
        nop                     #   : Keep with cmovne
95
        /* Locate the address of the last matched character */
96
        ctlz    t8, t2          # U0 : Latency=3 (0x40 for t8=0)
97
        nop
98
 
99
        cmoveq  t8, 0x3f, t2    # E : Compensate for case when no match is seen
100
        nop                     # E : hide the cmov latency (2) behind ctlz latency
101
        lda     t5, 0x3f($31)   # E :
102
        subq    t5, t2, t5      # E : Normalize leading zero count
103
 
104
        addq    t6, t5, v0      # E : and add to quadword address
105
        ret                     # L0 : Latency=3
106
        nop
107
        nop
108
 
109
        .end strrchr

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.