OpenCores
URL https://opencores.org/ocsvn/or1k_soc_on_altera_embedded_dev_kit/or1k_soc_on_altera_embedded_dev_kit/trunk

Subversion Repositories or1k_soc_on_altera_embedded_dev_kit

[/] [or1k_soc_on_altera_embedded_dev_kit/] [trunk/] [linux-2.6/] [linux-2.6.24/] [arch/] [powerpc/] [kernel/] [vecemu.c] - Blame information for rev 3

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 3 xianfeng
/*
2
 * Routines to emulate some Altivec/VMX instructions, specifically
3
 * those that can trap when given denormalized operands in Java mode.
4
 */
5
#include <linux/kernel.h>
6
#include <linux/errno.h>
7
#include <linux/sched.h>
8
#include <asm/ptrace.h>
9
#include <asm/processor.h>
10
#include <asm/uaccess.h>
11
 
12
/* Functions in vector.S */
13
extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b);
14
extern void vsubfp(vector128 *dst, vector128 *a, vector128 *b);
15
extern void vmaddfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c);
16
extern void vnmsubfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c);
17
extern void vrefp(vector128 *dst, vector128 *src);
18
extern void vrsqrtefp(vector128 *dst, vector128 *src);
19
extern void vexptep(vector128 *dst, vector128 *src);
20
 
21
static unsigned int exp2s[8] = {
22
        0x800000,
23
        0x8b95c2,
24
        0x9837f0,
25
        0xa5fed7,
26
        0xb504f3,
27
        0xc5672a,
28
        0xd744fd,
29
        0xeac0c7
30
};
31
 
32
/*
33
 * Computes an estimate of 2^x.  The `s' argument is the 32-bit
34
 * single-precision floating-point representation of x.
35
 */
36
static unsigned int eexp2(unsigned int s)
37
{
38
        int exp, pwr;
39
        unsigned int mant, frac;
40
 
41
        /* extract exponent field from input */
42
        exp = ((s >> 23) & 0xff) - 127;
43
        if (exp > 7) {
44
                /* check for NaN input */
45
                if (exp == 128 && (s & 0x7fffff) != 0)
46
                        return s | 0x400000;    /* return QNaN */
47
                /* 2^-big = 0, 2^+big = +Inf */
48
                return (s & 0x80000000)? 0: 0x7f800000;  /* 0 or +Inf */
49
        }
50
        if (exp < -23)
51
                return 0x3f800000;      /* 1.0 */
52
 
53
        /* convert to fixed point integer in 9.23 representation */
54
        pwr = (s & 0x7fffff) | 0x800000;
55
        if (exp > 0)
56
                pwr <<= exp;
57
        else
58
                pwr >>= -exp;
59
        if (s & 0x80000000)
60
                pwr = -pwr;
61
 
62
        /* extract integer part, which becomes exponent part of result */
63
        exp = (pwr >> 23) + 126;
64
        if (exp >= 254)
65
                return 0x7f800000;
66
        if (exp < -23)
67
                return 0;
68
 
69
        /* table lookup on top 3 bits of fraction to get mantissa */
70
        mant = exp2s[(pwr >> 20) & 7];
71
 
72
        /* linear interpolation using remaining 20 bits of fraction */
73
        asm("mulhwu %0,%1,%2" : "=r" (frac)
74
            : "r" (pwr << 12), "r" (0x172b83ff));
75
        asm("mulhwu %0,%1,%2" : "=r" (frac) : "r" (frac), "r" (mant));
76
        mant += frac;
77
 
78
        if (exp >= 0)
79
                return mant + (exp << 23);
80
 
81
        /* denormalized result */
82
        exp = -exp;
83
        mant += 1 << (exp - 1);
84
        return mant >> exp;
85
}
86
 
87
/*
88
 * Computes an estimate of log_2(x).  The `s' argument is the 32-bit
89
 * single-precision floating-point representation of x.
90
 */
91
static unsigned int elog2(unsigned int s)
92
{
93
        int exp, mant, lz, frac;
94
 
95
        exp = s & 0x7f800000;
96
        mant = s & 0x7fffff;
97
        if (exp == 0x7f800000) {        /* Inf or NaN */
98
                if (mant != 0)
99
                        s |= 0x400000;  /* turn NaN into QNaN */
100
                return s;
101
        }
102
        if ((exp | mant) == 0)           /* +0 or -0 */
103
                return 0xff800000;      /* return -Inf */
104
 
105
        if (exp == 0) {
106
                /* denormalized */
107
                asm("cntlzw %0,%1" : "=r" (lz) : "r" (mant));
108
                mant <<= lz - 8;
109
                exp = (-118 - lz) << 23;
110
        } else {
111
                mant |= 0x800000;
112
                exp -= 127 << 23;
113
        }
114
 
115
        if (mant >= 0xb504f3) {                         /* 2^0.5 * 2^23 */
116
                exp |= 0x400000;                        /* 0.5 * 2^23 */
117
                asm("mulhwu %0,%1,%2" : "=r" (mant)
118
                    : "r" (mant), "r" (0xb504f334));    /* 2^-0.5 * 2^32 */
119
        }
120
        if (mant >= 0x9837f0) {                         /* 2^0.25 * 2^23 */
121
                exp |= 0x200000;                        /* 0.25 * 2^23 */
122
                asm("mulhwu %0,%1,%2" : "=r" (mant)
123
                    : "r" (mant), "r" (0xd744fccb));    /* 2^-0.25 * 2^32 */
124
        }
125
        if (mant >= 0x8b95c2) {                         /* 2^0.125 * 2^23 */
126
                exp |= 0x100000;                        /* 0.125 * 2^23 */
127
                asm("mulhwu %0,%1,%2" : "=r" (mant)
128
                    : "r" (mant), "r" (0xeac0c6e8));    /* 2^-0.125 * 2^32 */
129
        }
130
        if (mant > 0x800000) {                          /* 1.0 * 2^23 */
131
                /* calculate (mant - 1) * 1.381097463 */
132
                /* 1.381097463 == 0.125 / (2^0.125 - 1) */
133
                asm("mulhwu %0,%1,%2" : "=r" (frac)
134
                    : "r" ((mant - 0x800000) << 1), "r" (0xb0c7cd3a));
135
                exp += frac;
136
        }
137
        s = exp & 0x80000000;
138
        if (exp != 0) {
139
                if (s)
140
                        exp = -exp;
141
                asm("cntlzw %0,%1" : "=r" (lz) : "r" (exp));
142
                lz = 8 - lz;
143
                if (lz > 0)
144
                        exp >>= lz;
145
                else if (lz < 0)
146
                        exp <<= -lz;
147
                s += ((lz + 126) << 23) + exp;
148
        }
149
        return s;
150
}
151
 
152
#define VSCR_SAT        1
153
 
154
static int ctsxs(unsigned int x, int scale, unsigned int *vscrp)
155
{
156
        int exp, mant;
157
 
158
        exp = (x >> 23) & 0xff;
159
        mant = x & 0x7fffff;
160
        if (exp == 255 && mant != 0)
161
                return 0;                /* NaN -> 0 */
162
        exp = exp - 127 + scale;
163
        if (exp < 0)
164
                return 0;                /* round towards zero */
165
        if (exp >= 31) {
166
                /* saturate, unless the result would be -2^31 */
167
                if (x + (scale << 23) != 0xcf000000)
168
                        *vscrp |= VSCR_SAT;
169
                return (x & 0x80000000)? 0x80000000: 0x7fffffff;
170
        }
171
        mant |= 0x800000;
172
        mant = (mant << 7) >> (30 - exp);
173
        return (x & 0x80000000)? -mant: mant;
174
}
175
 
176
static unsigned int ctuxs(unsigned int x, int scale, unsigned int *vscrp)
177
{
178
        int exp;
179
        unsigned int mant;
180
 
181
        exp = (x >> 23) & 0xff;
182
        mant = x & 0x7fffff;
183
        if (exp == 255 && mant != 0)
184
                return 0;                /* NaN -> 0 */
185
        exp = exp - 127 + scale;
186
        if (exp < 0)
187
                return 0;                /* round towards zero */
188
        if (x & 0x80000000) {
189
                /* negative => saturate to 0 */
190
                *vscrp |= VSCR_SAT;
191
                return 0;
192
        }
193
        if (exp >= 32) {
194
                /* saturate */
195
                *vscrp |= VSCR_SAT;
196
                return 0xffffffff;
197
        }
198
        mant |= 0x800000;
199
        mant = (mant << 8) >> (31 - exp);
200
        return mant;
201
}
202
 
203
/* Round to floating integer, towards 0 */
204
static unsigned int rfiz(unsigned int x)
205
{
206
        int exp;
207
 
208
        exp = ((x >> 23) & 0xff) - 127;
209
        if (exp == 128 && (x & 0x7fffff) != 0)
210
                return x | 0x400000;    /* NaN -> make it a QNaN */
211
        if (exp >= 23)
212
                return x;               /* it's an integer already (or Inf) */
213
        if (exp < 0)
214
                return x & 0x80000000;  /* |x| < 1.0 rounds to 0 */
215
        return x & ~(0x7fffff >> exp);
216
}
217
 
218
/* Round to floating integer, towards +/- Inf */
219
static unsigned int rfii(unsigned int x)
220
{
221
        int exp, mask;
222
 
223
        exp = ((x >> 23) & 0xff) - 127;
224
        if (exp == 128 && (x & 0x7fffff) != 0)
225
                return x | 0x400000;    /* NaN -> make it a QNaN */
226
        if (exp >= 23)
227
                return x;               /* it's an integer already (or Inf) */
228
        if ((x & 0x7fffffff) == 0)
229
                return x;               /* +/-0 -> +/-0 */
230
        if (exp < 0)
231
                /* 0 < |x| < 1.0 rounds to +/- 1.0 */
232
                return (x & 0x80000000) | 0x3f800000;
233
        mask = 0x7fffff >> exp;
234
        /* mantissa overflows into exponent - that's OK,
235
           it can't overflow into the sign bit */
236
        return (x + mask) & ~mask;
237
}
238
 
239
/* Round to floating integer, to nearest */
240
static unsigned int rfin(unsigned int x)
241
{
242
        int exp, half;
243
 
244
        exp = ((x >> 23) & 0xff) - 127;
245
        if (exp == 128 && (x & 0x7fffff) != 0)
246
                return x | 0x400000;    /* NaN -> make it a QNaN */
247
        if (exp >= 23)
248
                return x;               /* it's an integer already (or Inf) */
249
        if (exp < -1)
250
                return x & 0x80000000;  /* |x| < 0.5 -> +/-0 */
251
        if (exp == -1)
252
                /* 0.5 <= |x| < 1.0 rounds to +/- 1.0 */
253
                return (x & 0x80000000) | 0x3f800000;
254
        half = 0x400000 >> exp;
255
        /* add 0.5 to the magnitude and chop off the fraction bits */
256
        return (x + half) & ~(0x7fffff >> exp);
257
}
258
 
259
int emulate_altivec(struct pt_regs *regs)
260
{
261
        unsigned int instr, i;
262
        unsigned int va, vb, vc, vd;
263
        vector128 *vrs;
264
 
265
        if (get_user(instr, (unsigned int __user *) regs->nip))
266
                return -EFAULT;
267
        if ((instr >> 26) != 4)
268
                return -EINVAL;         /* not an altivec instruction */
269
        vd = (instr >> 21) & 0x1f;
270
        va = (instr >> 16) & 0x1f;
271
        vb = (instr >> 11) & 0x1f;
272
        vc = (instr >> 6) & 0x1f;
273
 
274
        vrs = current->thread.vr;
275
        switch (instr & 0x3f) {
276
        case 10:
277
                switch (vc) {
278
                case 0:  /* vaddfp */
279
                        vaddfp(&vrs[vd], &vrs[va], &vrs[vb]);
280
                        break;
281
                case 1: /* vsubfp */
282
                        vsubfp(&vrs[vd], &vrs[va], &vrs[vb]);
283
                        break;
284
                case 4: /* vrefp */
285
                        vrefp(&vrs[vd], &vrs[vb]);
286
                        break;
287
                case 5: /* vrsqrtefp */
288
                        vrsqrtefp(&vrs[vd], &vrs[vb]);
289
                        break;
290
                case 6: /* vexptefp */
291
                        for (i = 0; i < 4; ++i)
292
                                vrs[vd].u[i] = eexp2(vrs[vb].u[i]);
293
                        break;
294
                case 7: /* vlogefp */
295
                        for (i = 0; i < 4; ++i)
296
                                vrs[vd].u[i] = elog2(vrs[vb].u[i]);
297
                        break;
298
                case 8:         /* vrfin */
299
                        for (i = 0; i < 4; ++i)
300
                                vrs[vd].u[i] = rfin(vrs[vb].u[i]);
301
                        break;
302
                case 9:         /* vrfiz */
303
                        for (i = 0; i < 4; ++i)
304
                                vrs[vd].u[i] = rfiz(vrs[vb].u[i]);
305
                        break;
306
                case 10:        /* vrfip */
307
                        for (i = 0; i < 4; ++i) {
308
                                u32 x = vrs[vb].u[i];
309
                                x = (x & 0x80000000)? rfiz(x): rfii(x);
310
                                vrs[vd].u[i] = x;
311
                        }
312
                        break;
313
                case 11:        /* vrfim */
314
                        for (i = 0; i < 4; ++i) {
315
                                u32 x = vrs[vb].u[i];
316
                                x = (x & 0x80000000)? rfii(x): rfiz(x);
317
                                vrs[vd].u[i] = x;
318
                        }
319
                        break;
320
                case 14:        /* vctuxs */
321
                        for (i = 0; i < 4; ++i)
322
                                vrs[vd].u[i] = ctuxs(vrs[vb].u[i], va,
323
                                                &current->thread.vscr.u[3]);
324
                        break;
325
                case 15:        /* vctsxs */
326
                        for (i = 0; i < 4; ++i)
327
                                vrs[vd].u[i] = ctsxs(vrs[vb].u[i], va,
328
                                                &current->thread.vscr.u[3]);
329
                        break;
330
                default:
331
                        return -EINVAL;
332
                }
333
                break;
334
        case 46:        /* vmaddfp */
335
                vmaddfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
336
                break;
337
        case 47:        /* vnmsubfp */
338
                vnmsubfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
339
                break;
340
        default:
341
                return -EINVAL;
342
        }
343
 
344
        return 0;
345
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.