OpenCores
URL https://opencores.org/ocsvn/or1k_soc_on_altera_embedded_dev_kit/or1k_soc_on_altera_embedded_dev_kit/trunk

Subversion Repositories or1k_soc_on_altera_embedded_dev_kit

[/] [or1k_soc_on_altera_embedded_dev_kit/] [trunk/] [linux-2.6/] [linux-2.6.24/] [arch/] [powerpc/] [kernel/] [vector.S] - Blame information for rev 3

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 3 xianfeng
#include 
2
#include 
3
 
4
/*
5
 * The routines below are in assembler so we can closely control the
6
 * usage of floating-point registers.  These routines must be called
7
 * with preempt disabled.
8
 */
9
#ifdef CONFIG_PPC32
10
        .data
11
fpzero:
12
        .long   0
13
fpone:
14
        .long   0x3f800000      /* 1.0 in single-precision FP */
15
fphalf:
16
        .long   0x3f000000      /* 0.5 in single-precision FP */
17
 
18
#define LDCONST(fr, name)       \
19
        lis     r11,name@ha;    \
20
        lfs     fr,name@l(r11)
21
#else
22
 
23
        .section ".toc","aw"
24
fpzero:
25
        .tc     FD_0_0[TC],0
26
fpone:
27
        .tc     FD_3ff00000_0[TC],0x3ff0000000000000    /* 1.0 */
28
fphalf:
29
        .tc     FD_3fe00000_0[TC],0x3fe0000000000000    /* 0.5 */
30
 
31
#define LDCONST(fr, name)       \
32
        lfd     fr,name@toc(r2)
33
#endif
34
 
35
        .text
36
/*
37
 * Internal routine to enable floating point and set FPSCR to 0.
38
 * Don't call it from C; it doesn't use the normal calling convention.
39
 */
40
fpenable:
41
#ifdef CONFIG_PPC32
42
        stwu    r1,-64(r1)
43
#else
44
        stdu    r1,-64(r1)
45
#endif
46
        mfmsr   r10
47
        ori     r11,r10,MSR_FP
48
        mtmsr   r11
49
        isync
50
        stfd    fr0,24(r1)
51
        stfd    fr1,16(r1)
52
        stfd    fr31,8(r1)
53
        LDCONST(fr1, fpzero)
54
        mffs    fr31
55
        MTFSF_L(fr1)
56
        blr
57
 
58
fpdisable:
59
        mtlr    r12
60
        MTFSF_L(fr31)
61
        lfd     fr31,8(r1)
62
        lfd     fr1,16(r1)
63
        lfd     fr0,24(r1)
64
        mtmsr   r10
65
        isync
66
        addi    r1,r1,64
67
        blr
68
 
69
/*
70
 * Vector add, floating point.
71
 */
72
_GLOBAL(vaddfp)
73
        mflr    r12
74
        bl      fpenable
75
        li      r0,4
76
        mtctr   r0
77
        li      r6,0
78
1:      lfsx    fr0,r4,r6
79
        lfsx    fr1,r5,r6
80
        fadds   fr0,fr0,fr1
81
        stfsx   fr0,r3,r6
82
        addi    r6,r6,4
83
        bdnz    1b
84
        b       fpdisable
85
 
86
/*
87
 * Vector subtract, floating point.
88
 */
89
_GLOBAL(vsubfp)
90
        mflr    r12
91
        bl      fpenable
92
        li      r0,4
93
        mtctr   r0
94
        li      r6,0
95
1:      lfsx    fr0,r4,r6
96
        lfsx    fr1,r5,r6
97
        fsubs   fr0,fr0,fr1
98
        stfsx   fr0,r3,r6
99
        addi    r6,r6,4
100
        bdnz    1b
101
        b       fpdisable
102
 
103
/*
104
 * Vector multiply and add, floating point.
105
 */
106
_GLOBAL(vmaddfp)
107
        mflr    r12
108
        bl      fpenable
109
        stfd    fr2,32(r1)
110
        li      r0,4
111
        mtctr   r0
112
        li      r7,0
113
1:      lfsx    fr0,r4,r7
114
        lfsx    fr1,r5,r7
115
        lfsx    fr2,r6,r7
116
        fmadds  fr0,fr0,fr2,fr1
117
        stfsx   fr0,r3,r7
118
        addi    r7,r7,4
119
        bdnz    1b
120
        lfd     fr2,32(r1)
121
        b       fpdisable
122
 
123
/*
124
 * Vector negative multiply and subtract, floating point.
125
 */
126
_GLOBAL(vnmsubfp)
127
        mflr    r12
128
        bl      fpenable
129
        stfd    fr2,32(r1)
130
        li      r0,4
131
        mtctr   r0
132
        li      r7,0
133
1:      lfsx    fr0,r4,r7
134
        lfsx    fr1,r5,r7
135
        lfsx    fr2,r6,r7
136
        fnmsubs fr0,fr0,fr2,fr1
137
        stfsx   fr0,r3,r7
138
        addi    r7,r7,4
139
        bdnz    1b
140
        lfd     fr2,32(r1)
141
        b       fpdisable
142
 
143
/*
144
 * Vector reciprocal estimate.  We just compute 1.0/x.
145
 * r3 -> destination, r4 -> source.
146
 */
147
_GLOBAL(vrefp)
148
        mflr    r12
149
        bl      fpenable
150
        li      r0,4
151
        LDCONST(fr1, fpone)
152
        mtctr   r0
153
        li      r6,0
154
1:      lfsx    fr0,r4,r6
155
        fdivs   fr0,fr1,fr0
156
        stfsx   fr0,r3,r6
157
        addi    r6,r6,4
158
        bdnz    1b
159
        b       fpdisable
160
 
161
/*
162
 * Vector reciprocal square-root estimate, floating point.
163
 * We use the frsqrte instruction for the initial estimate followed
164
 * by 2 iterations of Newton-Raphson to get sufficient accuracy.
165
 * r3 -> destination, r4 -> source.
166
 */
167
_GLOBAL(vrsqrtefp)
168
        mflr    r12
169
        bl      fpenable
170
        stfd    fr2,32(r1)
171
        stfd    fr3,40(r1)
172
        stfd    fr4,48(r1)
173
        stfd    fr5,56(r1)
174
        li      r0,4
175
        LDCONST(fr4, fpone)
176
        LDCONST(fr5, fphalf)
177
        mtctr   r0
178
        li      r6,0
179
1:      lfsx    fr0,r4,r6
180
        frsqrte fr1,fr0         /* r = frsqrte(s) */
181
        fmuls   fr3,fr1,fr0     /* r * s */
182
        fmuls   fr2,fr1,fr5     /* r * 0.5 */
183
        fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */
184
        fmadds  fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */
185
        fmuls   fr3,fr1,fr0     /* r * s */
186
        fmuls   fr2,fr1,fr5     /* r * 0.5 */
187
        fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */
188
        fmadds  fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */
189
        stfsx   fr1,r3,r6
190
        addi    r6,r6,4
191
        bdnz    1b
192
        lfd     fr5,56(r1)
193
        lfd     fr4,48(r1)
194
        lfd     fr3,40(r1)
195
        lfd     fr2,32(r1)
196
        b       fpdisable

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.