OpenCores
URL https://opencores.org/ocsvn/scarts/scarts/trunk

Subversion Repositories scarts

[/] [scarts/] [trunk/] [toolchain/] [scarts-newlib/] [newlib-1.17.0/] [newlib/] [libm/] [machine/] [spu/] [headers/] [remquo.h] - Blame information for rev 9

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 9 jlechner
/*
2
  (C) Copyright 2001,2006,
3
  International Business Machines Corporation,
4
  Sony Computer Entertainment, Incorporated,
5
  Toshiba Corporation,
6
 
7
  All rights reserved.
8
 
9
  Redistribution and use in source and binary forms, with or without
10
  modification, are permitted provided that the following conditions are met:
11
 
12
    * Redistributions of source code must retain the above copyright notice,
13
  this list of conditions and the following disclaimer.
14
    * Redistributions in binary form must reproduce the above copyright
15
  notice, this list of conditions and the following disclaimer in the
16
  documentation and/or other materials provided with the distribution.
17
    * Neither the names of the copyright holders nor the names of their
18
  contributors may be used to endorse or promote products derived from this
19
  software without specific prior written permission.
20
 
21
  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
22
  IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23
  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24
  PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
25
  OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26
  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27
  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28
  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29
  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30
  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31
  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
*/
33
#ifndef _REMQUO_H_
34
#define _REMQUO_H_      1
35
 
36
#include <spu_intrinsics.h>
37
#include "headers/vec_literal.h"
38
 
39
static __inline double _remquo(double x, double y, int *quo)
40
{
41
  int n, shift;
42
  vec_uchar16 swap_words = VEC_LITERAL(vec_uchar16, 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11);
43
  vec_uchar16 propagate = VEC_LITERAL(vec_uchar16, 4,5,6,7, 192,192,192,192, 12,13,14,15, 192,192,192,192);
44
  vec_uchar16 splat_hi = VEC_LITERAL(vec_uchar16, 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11);
45
  vec_uchar16 splat_lo = VEC_LITERAL(vec_uchar16, 4,5,6,7,4,5,6,7, 12,13,14,15, 12,13,14,15);
46
  vec_int4 quotient;
47
  vec_int4 four = { 4, 4, 4, 4 };
48
  vec_uint4 vx, vy, z;
49
  vec_uint4 x_hi, y_hi, y8_hi, y_lo, y2, y4;
50
  vec_uint4 abs_x, abs_y, abs_2x, abs_2y, abs_8y;
51
  vec_uint4 exp_x, exp_y;
52
  vec_uint4 zero_x, zero_y;
53
  vec_uint4 logb_x, logb_y;
54
  vec_uint4 mant_x, mant_y;
55
  vec_uint4 normal, norm, denorm;
56
  vec_uint4 gt, eq, bias;
57
  vec_uint4 nan_out, not_ge, quo_pos, overflow;
58
  vec_uint4 result, result0, resultx, cnt, sign, borrow;
59
  vec_uint4 exp_special = VEC_SPLAT_U32(0x7FF00000);
60
  vec_uint4 half_smax = VEC_SPLAT_U32(0x7FEFFFFF);
61
  vec_uint4 lsb       = (vec_uint4)(VEC_SPLAT_U64(0x0000000000000001ULL));
62
  vec_uint4 sign_mask = (vec_uint4)(VEC_SPLAT_U64(0x8000000000000000ULL));
63
  vec_uint4 implied_1 = (vec_uint4)(VEC_SPLAT_U64(0x0010000000000000ULL));
64
  vec_uint4 mant_mask = (vec_uint4)(VEC_SPLAT_U64(0x000FFFFFFFFFFFFFULL));
65
 
66
  vx = (vec_uint4)spu_promote(x, 0);
67
  vy = (vec_uint4)spu_promote(y, 0);
68
 
69
  abs_x = spu_andc(vx, sign_mask);
70
  abs_y = spu_andc(vy, sign_mask);
71
 
72
  abs_2y = spu_add(abs_y, implied_1);
73
  abs_8y = spu_add(abs_y, VEC_LITERAL(vec_uint4, 0x00300000, 0, 0x00300000, 0));
74
 
75
  sign = spu_and(vx, sign_mask);
76
 
77
  quo_pos = spu_cmpgt((vec_int4)spu_and(spu_xor(vx, vy), sign_mask), -1);
78
  quo_pos = spu_shuffle(quo_pos, quo_pos, splat_hi);
79
 
80
  /* Compute abs_x = fmodf(abs_x, 8*abs_y). If y is greater than 0.125*SMAX
81
   * (SMAX is the maximum representable float), then return abs_x.
82
   */
83
  {
84
    x_hi = spu_shuffle(abs_x, abs_x, splat_hi);
85
    y_lo = spu_shuffle(abs_y, abs_y, splat_lo);
86
    y_hi = spu_shuffle(abs_y, abs_y, splat_hi);
87
    y8_hi = spu_shuffle(abs_8y, abs_8y, splat_hi);
88
 
89
    /* Force a NaN output if (1) abs_x is infinity or NaN or (2)
90
     * abs_y is a NaN.
91
     */
92
    nan_out = spu_or(spu_cmpgt(x_hi, half_smax),
93
                     spu_or(spu_cmpgt(y_hi, exp_special),
94
                            spu_and(spu_cmpeq(y_hi, exp_special),
95
                                    spu_cmpgt(y_lo, 0))));
96
 
97
    /* Determine ilogb of abs_x and abs_8y and
98
     * extract the mantissas (mant_x, mant_y)
99
     */
100
    exp_x  = spu_rlmask(x_hi, -20);
101
    exp_y  = spu_rlmask(y8_hi, -20);
102
 
103
    resultx = spu_or(spu_cmpgt(y8_hi, x_hi), spu_cmpgt(y_hi, half_smax));
104
 
105
    zero_x = spu_cmpeq(exp_x, 0);
106
    zero_y = spu_cmpeq(exp_y, 0);
107
 
108
    logb_x = spu_add(exp_x, -1023);
109
    logb_y = spu_add(exp_y, -1023);
110
 
111
    mant_x = spu_andc(spu_sel(implied_1, abs_x, mant_mask), zero_x);
112
    mant_y = spu_andc(spu_sel(implied_1, abs_8y, mant_mask), zero_y);
113
 
114
    /* Compute fixed point fmod of mant_x and mant_y. Set the flag,
115
     * result0, to all ones if we detect that the final result is
116
     * ever 0.
117
     */
118
    result0 = spu_or(zero_x, zero_y);
119
 
120
    n = spu_extract(spu_sub(logb_x, logb_y), 0);
121
 
122
    while (n-- > 0) {
123
      borrow = spu_genb(mant_x, mant_y);
124
      borrow = spu_shuffle(borrow, borrow, propagate);
125
      z = spu_subx(mant_x, mant_y, borrow);
126
 
127
      result0 = spu_or(spu_cmpeq(spu_or(z, spu_shuffle(z, z, swap_words)), 0), result0);
128
 
129
      mant_x = spu_sel(spu_slqw(mant_x, 1), spu_andc(spu_slqw(z, 1), lsb), spu_cmpgt((vec_int4)spu_shuffle(z, z, splat_hi), -1));
130
    }
131
 
132
 
133
    borrow = spu_genb(mant_x, mant_y);
134
    borrow = spu_shuffle(borrow, borrow, propagate);
135
    z = spu_subx(mant_x, mant_y, borrow);
136
 
137
    mant_x = spu_sel(mant_x, z, spu_cmpgt((vec_int4)spu_shuffle(z, z, splat_hi), -1));
138
    mant_x = spu_andc(mant_x, VEC_LITERAL(vec_uint4, 0,0,-1,-1));
139
 
140
    result0 = spu_or(spu_cmpeq(spu_or(mant_x, spu_shuffle(mant_x, mant_x, swap_words)), 0), result0);
141
 
142
    /* Convert the result back to floating point and restore
143
     * the sign. If we flagged the result to be zero (result0),
144
     * zero it. If we flagged the result to equal its input x,
145
     * (resultx) then return x.
146
     *
147
     * Double precision generates a denorm for an output.
148
     */
149
    cnt = spu_cntlz(mant_x);
150
    cnt = spu_add(cnt, spu_and(spu_rlqwbyte(cnt, 4), spu_cmpeq(cnt, 32)));
151
    cnt = spu_add(spu_shuffle(cnt, cnt, splat_hi), -11);
152
 
153
    shift = spu_extract(exp_y, 0) - 1;
154
    denorm = spu_slqwbytebc(spu_slqw(mant_x, shift), shift);
155
 
156
    exp_y = spu_sub(exp_y, cnt);
157
 
158
    normal = spu_cmpgt((vec_int4)exp_y, 0);
159
 
160
    /* Normalize normal results, denormalize denorm results.
161
     */
162
    shift = spu_extract(cnt, 0);
163
    norm = spu_slqwbytebc(spu_slqw(spu_andc(mant_x, VEC_LITERAL(vec_uint4, 0x00100000, 0, -1, -1)), shift), shift);
164
 
165
    mant_x = spu_sel(denorm, norm, normal);
166
 
167
    exp_y = spu_and(spu_rl(exp_y, 20), normal);
168
 
169
    result = spu_sel(exp_y, mant_x, mant_mask);
170
 
171
    abs_x = spu_sel(spu_andc(result, spu_rlmask(result0, -1)), abs_x, resultx);
172
 
173
  }
174
 
175
  /* if (x >= 4*y)
176
   *   x -= 4*y
177
   *   quotient = 4
178
   * else
179
   *   quotient = 0
180
   */
181
  y4 = spu_andc(spu_add(abs_y, spu_rl(implied_1, 1)), zero_y);
182
 
183
  overflow = spu_cmpgt(y_hi, VEC_SPLAT_U32(0x7FCFFFFF));
184
  gt = spu_cmpgt(y4, abs_x);
185
  eq = spu_cmpeq(y4, abs_x);
186
  not_ge = spu_or(gt, spu_and(eq, spu_rlqwbyte(gt, 4)));
187
  not_ge = spu_shuffle(not_ge, not_ge, splat_hi);
188
  not_ge = spu_or(not_ge, overflow);
189
 
190
  abs_x = spu_sel((vec_uint4)spu_sub((vec_double2)abs_x, (vec_double2)y4), abs_x, not_ge);
191
  quotient = spu_andc(four, (vec_int4)not_ge);
192
 
193
  /* if (x >= 2*y
194
   *    x -= 2*y
195
   *    quotient += 2
196
   */
197
  y2 = spu_andc(spu_add(abs_y, implied_1), zero_y);
198
 
199
  overflow = spu_cmpgt(y_hi, VEC_SPLAT_U32(0x7FDFFFFF));
200
  gt = spu_cmpgt(y2, abs_x);
201
  eq = spu_cmpeq(y2, abs_x);
202
  not_ge = spu_or(gt, spu_and(eq, spu_rlqwbyte(gt, 4)));
203
  not_ge = spu_shuffle(not_ge, not_ge, splat_hi);
204
  not_ge = spu_or(not_ge, overflow);
205
 
206
 
207
  abs_x = spu_sel((vec_uint4)spu_sub((vec_double2)abs_x, (vec_double2)y2), abs_x, not_ge);
208
  quotient = spu_sel(spu_add(quotient, 2), quotient, not_ge);
209
 
210
  /* if (2*x > y)
211
   *     x -= y
212
   *     if (2*x >= y) x -= y
213
   */
214
  abs_2x = spu_and(spu_add(abs_x, implied_1), normal);
215
 
216
  gt = spu_cmpgt(abs_2x, abs_y);
217
  eq = spu_cmpeq(abs_2x, abs_y);
218
  bias = spu_or(gt, spu_and(eq, spu_rlqwbyte(gt, 4)));
219
  bias = spu_shuffle(bias, bias, splat_hi);
220
  abs_x = spu_sel(abs_x, (vec_uint4)spu_sub((vec_double2)abs_x, (vec_double2)abs_y), bias);
221
  quotient = spu_sub(quotient, (vec_int4)bias);
222
 
223
  bias = spu_andc(bias, spu_rlmaska((vec_uint4)spu_msub((vec_double2)abs_x, VEC_SPLAT_F64(2.0), (vec_double2)abs_y), -31));
224
  bias = spu_shuffle(bias, bias, splat_hi);
225
  abs_x = spu_sel(abs_x, (vec_uint4)spu_sub((vec_double2)abs_x, (vec_double2)abs_y), bias);
226
  quotient = spu_sub(quotient, (vec_int4)bias);
227
 
228
  /* Generate a correct final sign
229
   */
230
  result = spu_sel(spu_xor(abs_x, sign), exp_special, nan_out);
231
 
232
  quotient = spu_and(quotient, 7);
233
  quotient = spu_sel(spu_sub(0, quotient), quotient, quo_pos);
234
 
235
  *quo = spu_extract(quotient, 0);
236
 
237
  return (spu_extract((vec_double2)result, 0));
238
}
239
#endif /* _REMQUO_H_ */

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.