OpenCores
URL https://opencores.org/ocsvn/an-fpga-implementation-of-low-latency-noc-based-mpsoc/an-fpga-implementation-of-low-latency-noc-based-mpsoc/trunk

Subversion Repositories an-fpga-implementation-of-low-latency-noc-based-mpsoc

[/] [an-fpga-implementation-of-low-latency-noc-based-mpsoc/] [trunk/] [mpsoc/] [src_processor/] [mor1kx-5.0/] [rtl/] [verilog/] [pfpu32/] [pfpu32_muldiv.v] - Blame information for rev 48

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 48 alirezamon
//////////////////////////////////////////////////////////////////////
2
//                                                                  //
3
//    pfpu32_muldiv                                                 //
4
//                                                                  //
5
//    This file is part of the mor1kx project                       //
6
//    https://github.com/openrisc/mor1kx                            //
7
//                                                                  //
8
//    Description                                                   //
9
//    combined multiplier/divisor pipeline for                      //
10
//    single precision floating point numbers                       //
11
//                                                                  //
12
//    Author(s):                                                    //
13
//          Andrey Bacherov, avbacherov@opencores.org               //
14
//                                                                  //
15
//////////////////////////////////////////////////////////////////////
16
//                                                                  //
17
//  Copyright (C) 2015                                              //
18
//                                                                  //
19
//  This source file may be used and distributed without            //
20
//  restriction provided that this copyright statement is not       //
21
//  removed from the file and that any derivative work contains     //
22
//  the original copyright notice and the associated disclaimer.    //
23
//                                                                  //
24
//    THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY           //
25
//  EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED       //
26
//  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS       //
27
//  FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL THE AUTHOR          //
28
//  OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,             //
29
//  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES        //
30
//  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE       //
31
//  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR            //
32
//  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF      //
33
//  LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT      //
34
//  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT      //
35
//  OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE             //
36
//  POSSIBILITY OF SUCH DAMAGE.                                     //
37
//////////////////////////////////////////////////////////////////////
38
 
39
`include "mor1kx-defines.v"
40
 
41
module pfpu32_muldiv
42
(
43
   input             clk,
44
   input             rst,
45
   input             flush_i,  // flushe pipe
46
   input             adv_i,    // advance pipe
47
   input             start_i,  // start
48
   input             is_div_i, // 1: division, 0: multiplication
49
   // input 'a' related values
50
   input             signa_i,
51
   input       [9:0] exp10a_i,
52
   input      [23:0] fract24a_i,
53
   input             infa_i,
54
   input             zeroa_i,
55
   // input 'b' related values
56
   input             signb_i,
57
   input       [9:0] exp10b_i,
58
   input      [23:0] fract24b_i,
59
   input             infb_i,
60
   input             zerob_i,
61
   // 'a'/'b' related
62
   input             snan_i,
63
   input             qnan_i,
64
   input             anan_sign_i,
65
   // MUL/DIV common outputs
66
   output reg        muldiv_rdy_o,       // ready
67
   output reg        muldiv_sign_o,      // signum
68
   output reg  [4:0] muldiv_shr_o,       // do right shift in align stage
69
   output reg  [9:0] muldiv_exp10shr_o,  // exponent for right shift align
70
   output reg        muldiv_shl_o,       // do left shift in align stage
71
   output reg  [9:0] muldiv_exp10shl_o,  // exponent for left shift align
72
   output reg  [9:0] muldiv_exp10sh0_o,  // exponent for no shift in align
73
   output reg [27:0] muldiv_fract28_o,   // fractional with appended {r,s} bits
74
   output reg        muldiv_inv_o,       // invalid operation flag
75
   output reg        muldiv_inf_o,       // infinity output reg
76
   output reg        muldiv_snan_o,      // signaling NaN output reg
77
   output reg        muldiv_qnan_o,      // quiet NaN output reg
78
   output reg        muldiv_anan_sign_o, // signum for output nan
79
   // DIV additional outputs
80
   output reg        div_op_o,           // operation is division
81
   output reg        div_sign_rmnd_o,    // signum of reminder for IEEE compliant rounding
82
   output reg        div_dbz_o           // div division by zero flag
83
);
84
 
85
  /*
86
     Any stage's output is registered.
87
     Definitions:
88
       s??o_name - "S"tage number "??", "O"utput
89
       s??t_name - "S"tage number "??", "T"emporary (internally)
90
  */
91
 
92
 
93
  /* Stage #1: pre-operation stage */
94
 
95
 
96
    // detection of some exceptions
97
  wire s0t_inv = is_div_i ? ((zeroa_i & zerob_i) | (infa_i & infb_i)) : // div: 0/0, inf/inf -> invalid operation; snan output
98
                            ((zeroa_i & infb_i) | (zerob_i & infa_i));  // mul: 0 * inf -> invalid operation; snan output
99
    // division by zero
100
  wire s0t_dbz   = is_div_i & (~zeroa_i) & (~infa_i) & zerob_i;
101
    //   inf input
102
  wire s0t_inf_i = infa_i | (infb_i & (~is_div_i)); // for DIV only infA is used
103
 
104
    // force intermediate results to zero
105
  wire s0t_opc_0 = zeroa_i | zerob_i | (is_div_i & (infa_i | infb_i));
106
 
107
  // count leading zeros
108
  reg [4:0] s0t_nlza;
109
  always @(fract24a_i) begin
110
    casez(fract24a_i) // synopsys full_case parallel_case
111
      24'b1???????????????????????: s0t_nlza =  0;
112
      24'b01??????????????????????: s0t_nlza =  1;
113
      24'b001?????????????????????: s0t_nlza =  2;
114
      24'b0001????????????????????: s0t_nlza =  3;
115
      24'b00001???????????????????: s0t_nlza =  4;
116
      24'b000001??????????????????: s0t_nlza =  5;
117
      24'b0000001?????????????????: s0t_nlza =  6;
118
      24'b00000001????????????????: s0t_nlza =  7;
119
      24'b000000001???????????????: s0t_nlza =  8;
120
      24'b0000000001??????????????: s0t_nlza =  9;
121
      24'b00000000001?????????????: s0t_nlza = 10;
122
      24'b000000000001????????????: s0t_nlza = 11;
123
      24'b0000000000001???????????: s0t_nlza = 12;
124
      24'b00000000000001??????????: s0t_nlza = 13;
125
      24'b000000000000001?????????: s0t_nlza = 14;
126
      24'b0000000000000001????????: s0t_nlza = 15;
127
      24'b00000000000000001???????: s0t_nlza = 16;
128
      24'b000000000000000001??????: s0t_nlza = 17;
129
      24'b0000000000000000001?????: s0t_nlza = 18;
130
      24'b00000000000000000001????: s0t_nlza = 19;
131
      24'b000000000000000000001???: s0t_nlza = 20;
132
      24'b0000000000000000000001??: s0t_nlza = 21;
133
      24'b00000000000000000000001?: s0t_nlza = 22;
134
      24'b000000000000000000000001: s0t_nlza = 23;
135
      24'b000000000000000000000000: s0t_nlza =  0; // zero rezult
136
    endcase
137
  end // nlz for 'a'
138
 
139
  // count leading zeros
140
  reg [4:0] s0t_nlzb;
141
  always @(fract24b_i) begin
142
    casez(fract24b_i) // synopsys full_case parallel_case
143
      24'b1???????????????????????: s0t_nlzb =  0;
144
      24'b01??????????????????????: s0t_nlzb =  1;
145
      24'b001?????????????????????: s0t_nlzb =  2;
146
      24'b0001????????????????????: s0t_nlzb =  3;
147
      24'b00001???????????????????: s0t_nlzb =  4;
148
      24'b000001??????????????????: s0t_nlzb =  5;
149
      24'b0000001?????????????????: s0t_nlzb =  6;
150
      24'b00000001????????????????: s0t_nlzb =  7;
151
      24'b000000001???????????????: s0t_nlzb =  8;
152
      24'b0000000001??????????????: s0t_nlzb =  9;
153
      24'b00000000001?????????????: s0t_nlzb = 10;
154
      24'b000000000001????????????: s0t_nlzb = 11;
155
      24'b0000000000001???????????: s0t_nlzb = 12;
156
      24'b00000000000001??????????: s0t_nlzb = 13;
157
      24'b000000000000001?????????: s0t_nlzb = 14;
158
      24'b0000000000000001????????: s0t_nlzb = 15;
159
      24'b00000000000000001???????: s0t_nlzb = 16;
160
      24'b000000000000000001??????: s0t_nlzb = 17;
161
      24'b0000000000000000001?????: s0t_nlzb = 18;
162
      24'b00000000000000000001????: s0t_nlzb = 19;
163
      24'b000000000000000000001???: s0t_nlzb = 20;
164
      24'b0000000000000000000001??: s0t_nlzb = 21;
165
      24'b00000000000000000000001?: s0t_nlzb = 22;
166
      24'b000000000000000000000001: s0t_nlzb = 23;
167
      24'b000000000000000000000000: s0t_nlzb =  0; // zero result
168
    endcase
169
  end // nlz of 'b'
170
 
171
 
172
  // pre-norm stage outputs
173
  //   input related
174
  reg s0o_inv, s0o_inf_i,
175
      s0o_snan_i, s0o_qnan_i, s0o_anan_i_sign;
176
  //   computation related
177
  reg        s0o_is_div;
178
  reg        s0o_opc_0;
179
  reg        s0o_signc;
180
  reg  [9:0] s0o_exp10a;
181
  reg [23:0] s0o_fract24a;
182
  reg  [4:0] s0o_shla;
183
  reg  [9:0] s0o_exp10b;
184
  reg [23:0] s0o_fract24b;
185
  reg  [4:0] s0o_shlb;
186
  // DIV additional outputs
187
  reg        s0o_dbz;
188
  // registering
189
  always @(posedge clk) begin
190
    if(adv_i) begin
191
        // input related
192
      s0o_inv         <= s0t_inv;
193
      s0o_inf_i       <= s0t_inf_i;
194
      s0o_snan_i      <= snan_i;
195
      s0o_qnan_i      <= qnan_i;
196
      s0o_anan_i_sign <= anan_sign_i;
197
        // computation related
198
      s0o_is_div   <= is_div_i;
199
      s0o_opc_0    <= s0t_opc_0;
200
      s0o_signc    <= signa_i ^ signb_i;
201
      s0o_exp10a   <= exp10a_i;
202
      s0o_fract24a <= fract24a_i;
203
      s0o_shla     <= s0t_nlza;
204
      s0o_exp10b   <= exp10b_i;
205
      s0o_fract24b <= fract24b_i;
206
      s0o_shlb     <= s0t_nlzb;
207
        // DIV additional outputs
208
      s0o_dbz   <= s0t_dbz;
209
    end // push pipe
210
  end
211
 
212
  // route ready through side back
213
  reg s0o_ready;
214
  always @(posedge clk `OR_ASYNC_RST) begin
215
    if (rst)
216
      s0o_ready <= 0;
217
    else if(flush_i)
218
      s0o_ready <= 0;
219
    else if(adv_i)
220
      s0o_ready <= start_i;
221
  end // posedge clock
222
 
223
 
224
  // left-shift the dividend and divisor
225
  wire [23:0] s1t_fract24a_shl = s0o_fract24a << s0o_shla;
226
  wire [23:0] s1t_fract24b_shl = s0o_fract24b << s0o_shlb;
227
 
228
  // force result to zero
229
  wire [23:0] s1t_fract24a = s1t_fract24a_shl & {24{~s0o_opc_0}};
230
  wire [23:0] s1t_fract24b = s1t_fract24b_shl & {24{~s0o_opc_0}};
231
 
232
  // exponent
233
  wire [9:0] s1t_exp10mux =
234
    s0o_is_div ? (s0o_exp10a - {5'd0,s0o_shla} - s0o_exp10b + {5'd0,s0o_shlb} + 10'd127) :
235
                 (s0o_exp10a - {5'd0,s0o_shla} + s0o_exp10b - {5'd0,s0o_shlb} - 10'd127);
236
 
237
  // force result to zero
238
  wire [9:0] s1t_exp10c = s1t_exp10mux & {10{~s0o_opc_0}};
239
 
240
 
241
  // Goldshmidt division iterations control
242
  reg [10:0] itr_state; // iteration state indicator
243
  // iteration characteristic points:
244
  //   quotient is computed
245
  wire itr_rndQ = itr_state[10];
246
  //   iteration in progress
247
  wire itr_Proc = |itr_state;
248
  // iteration control state machine
249
  always @(posedge clk `OR_ASYNC_RST) begin
250
    if (rst)
251
      itr_state <= 11'd0;
252
    else if(flush_i)
253
      itr_state <= 11'd0;
254
    else if(adv_i & s0o_ready & s0o_is_div)
255
      itr_state <= 11'd1;
256
    else if(adv_i)
257
      itr_state <= {itr_state[9:0],1'b0};
258
  end // posedge clock
259
 
260
  // Multiplication operation flag
261
  wire s1t_is_mul = s0o_ready & (~s0o_is_div);
262
 
263
 
264
  // stage #1 outputs
265
  //   input related
266
  reg s1o_inv, s1o_inf_i,
267
      s1o_snan_i, s1o_qnan_i, s1o_anan_i_sign;
268
  //   computation related
269
  reg        s1o_opc_0;
270
  reg        s1o_signc;
271
  reg [9:0]  s1o_exp10c;
272
  reg [23:0] s1o_fract24a;
273
  reg [23:0] s1o_fract24b;
274
  // DIV additional outputs
275
  reg        s1o_dbz;
276
  //   registering
277
  always @(posedge clk) begin
278
    if(adv_i & ~itr_Proc) begin
279
        // input related
280
      s1o_inv         <= s0o_inv;
281
      s1o_inf_i       <= s0o_inf_i;
282
      s1o_snan_i      <= s0o_snan_i;
283
      s1o_qnan_i      <= s0o_qnan_i;
284
      s1o_anan_i_sign <= s0o_anan_i_sign;
285
        // computation related
286
      s1o_opc_0    <= s0o_opc_0;
287
      s1o_signc    <= s0o_signc;
288
      s1o_exp10c   <= s1t_exp10c;
289
      s1o_fract24a <= s1t_fract24a;
290
      s1o_fract24b <= s1t_fract24b;
291
        // DIV additional outputs
292
      s1o_dbz <= s0o_dbz;
293
    end // advance pipe
294
  end // posedge clock
295
 
296
  // ready is special case
297
  reg s1o_mul_ready;
298
  reg s1o_div_ready;
299
  always @(posedge clk `OR_ASYNC_RST) begin
300
    if (rst) begin
301
      s1o_mul_ready <= 1'b0;
302
      s1o_div_ready <= 1'b0;
303
    end else if(flush_i) begin
304
      s1o_mul_ready <= 1'b0;
305
      s1o_div_ready <= 1'b0;
306
    end else if(adv_i) begin
307
      s1o_mul_ready <= s1t_is_mul;
308
      s1o_div_ready <= itr_rndQ;
309
    end
310
  end // posedge clock
311
 
312
 
313
  /* Stage #2: 1st part of multiplier */
314
 
315
 
316
  // rigt shift value
317
  // and appropriatelly corrected exponent
318
  wire s1o_exp10c_0             = ~(|s1o_exp10c);
319
  wire [9:0] s2t_shr_of_neg_exp = 11'h401 - {1'b0,s1o_exp10c}; // 1024-v+1
320
  // variants:
321
  wire [9:0] s2t_shr_t;
322
  wire [9:0] s2t_exp10rx;
323
  assign {s2t_shr_t,s2t_exp10rx} =
324
    // force zero result
325
    s1o_opc_0     ? {10'd0,10'd0} :
326
    // negative exponent sum
327
    //   (!) takes 1x.xx case into account automatically
328
    s1o_exp10c[9] ? {s2t_shr_of_neg_exp,10'd1} :
329
    // (a) zero exponent sum (denorm. result potentially)
330
    //   (!) takes 1x.xx case into account automatically
331
    // (b) normal case
332
    //   (!) 1x.xx case is processed in next stage
333
                    {{9'd0,s1o_exp10c_0},(s1o_exp10c | {9'd0,s1o_exp10c_0})};
334
  // limited by 31 and forced result to zero
335
  wire [4:0] s2t_shrx = s2t_shr_t[4:0] | {5{|s2t_shr_t[9:5]}};
336
 
337
 
338
  // Support Goldshmidt iteration
339
  // initial estimation of reciprocal
340
  wire [8:0] itr_recip9b;
341
  arecip_lut u_arlut
342
  (
343
    .b_i(s1o_fract24b[22:16]),
344
    .r_o(itr_recip9b)
345
  );
346
  // support case: b==1
347
  wire b_eq_1 = s1o_fract24b[23] & (~(|s1o_fract24b[22:0]));
348
  // reciprocal with restored leading 01
349
  wire [10:0] itr_recip11b = b_eq_1 ?  11'b10000000000 :
350
                                      {2'b01,itr_recip9b};
351
 
352
  // the subsequent two stages multiplier operates with 32-bit inputs
353
  // 25-bits: fractionals (quotient is in range 0.5 to 1)
354
  //  1-bit : rounding bit
355
  //  6-bits: guard (due to truncations of intermediate results)
356
 
357
  // intermediate results:
358
  //   updated divisor (D) is rounded up while all other intermediate values
359
  //   are just truncated in according with directed rounding analysed in:
360
  //     Guy Even, Peter-M.Seidel, Warren E.Ferguson
361
  //     "A parametric error analysis of Goldschmidt’s division algorithm"
362
  wire itr_rndD = itr_state[3] | itr_state[6];
363
  wire itr_rndDvsr;
364
  //   align resulting quotient to support subsequent IEEE-compliant rounding
365
  wire [25:0] itr_res_qtnt26; // rounded quotient
366
  //   Updated quotient or divisor
367
  wire [32:0] itr_qtnt33;
368
  //   'F' (2-D) or 'Reminder'
369
  wire [32:0] itr_rmnd33;
370
 
371
 
372
  // control for multiplier's input 'A'
373
  //   the register also contains quotient to output
374
  wire itr_uinA = s1t_is_mul   |
375
                  itr_state[0] | itr_state[3] |
376
                  itr_state[6] | itr_rndQ;
377
  // multiplexer for multiplier's input 'A'
378
  wire [31:0] itr_mul32a =
379
     s1t_is_mul   ? {s1t_fract24a,8'd0}   :
380
     itr_state[0] ? {itr_recip11b,21'd0}  :
381
     itr_rndQ     ? {itr_res_qtnt26,6'd0} : // truncate by 2^(-n-1)
382
                     itr_rmnd33[31:0];
383
  // register of multiplier's input 'A'
384
  reg [15:0] s1o_mul16_al;
385
  reg [15:0] s1o_mul16_ah;
386
  // registering
387
  always @(posedge clk) begin
388
    if(adv_i & itr_uinA) begin
389
      s1o_mul16_al <= itr_mul32a[15: 0];
390
      s1o_mul16_ah <= itr_mul32a[31:16];
391
    end
392
  end // posedge clock
393
 
394
 
395
  // control for multiplier's input 'B'
396
  wire itr_uinB = s1t_is_mul   |
397
                  itr_state[0] | itr_state[1] |
398
                  itr_state[3] | itr_state[4] |
399
                  itr_state[6] | itr_state[7] |
400
                  itr_rndQ;
401
  // multiplexer for multiplier's input 'B'
402
  wire [31:0] itr_mul32b =
403
     s1t_is_mul               ? {s1t_fract24b,8'd0} :
404
    (itr_state[0] | itr_rndQ) ? {s1o_fract24b,8'd0} :
405
     itr_state[1]             ? {s1o_fract24a,8'd0} :
406
                                 itr_qtnt33[31:0];
407
  // register of multiplier's input 'B'
408
  reg [15:0] s1o_mul16_bl;
409
  reg [15:0] s1o_mul16_bh;
410
  always @(posedge clk) begin
411
    if(adv_i & itr_uinB) begin
412
      s1o_mul16_bl <= itr_mul32b[15: 0];
413
      s1o_mul16_bh <= itr_mul32b[31:16];
414
    end
415
  end // posedge clock
416
 
417
  // stage #2 outputs
418
  //   input related
419
  reg s2o_inv, s2o_inf_i,
420
      s2o_snan_i, s2o_qnan_i, s2o_anan_i_sign;
421
  // DIV additional outputs
422
  reg        s2o_dbz;
423
  reg [23:0] s2o_fract24a;
424
  //   computation related
425
  reg        s2o_opc_0;
426
  reg        s2o_signc;
427
  reg  [9:0] s2o_exp10c;
428
  reg  [4:0] s2o_shrx;
429
  reg        s2o_is_shrx;
430
  reg  [9:0] s2o_exp10rx;
431
  //   multipliers
432
  reg [31:0] s2o_fract32_albl;
433
  reg [31:0] s2o_fract32_albh;
434
  reg [31:0] s2o_fract32_ahbl;
435
  reg [31:0] s2o_fract32_ahbh;
436
  //   registering
437
  always @(posedge clk) begin
438
    if(adv_i) begin
439
        // input related
440
      s2o_inv         <= s1o_inv;
441
      s2o_inf_i       <= s1o_inf_i;
442
      s2o_snan_i      <= s1o_snan_i;
443
      s2o_qnan_i      <= s1o_qnan_i;
444
      s2o_anan_i_sign <= s1o_anan_i_sign;
445
        // DIV additional outputs
446
      s2o_dbz      <= s1o_dbz;
447
      s2o_fract24a <= s1o_fract24a;
448
        // computation related
449
      s2o_opc_0   <= s1o_opc_0;
450
      s2o_signc   <= s1o_signc;
451
      s2o_exp10c  <= s1o_exp10c;
452
      s2o_shrx    <= s2t_shrx;
453
      s2o_is_shrx <= (|s2t_shrx);
454
      s2o_exp10rx <= s2t_exp10rx;
455
        // multipliers
456
      s2o_fract32_albl <= s1o_mul16_al * s1o_mul16_bl;
457
      s2o_fract32_albh <= s1o_mul16_al * s1o_mul16_bh;
458
      s2o_fract32_ahbl <= s1o_mul16_ah * s1o_mul16_bl;
459
      s2o_fract32_ahbh <= s1o_mul16_ah * s1o_mul16_bh;
460
    end // advance pipe
461
  end // posedge clock
462
 
463
  // ready is special case
464
  reg s2o_mul_ready;
465
  reg s2o_div_ready;
466
  always @(posedge clk `OR_ASYNC_RST) begin
467
    if (rst) begin
468
      s2o_mul_ready <= 1'b0;
469
      s2o_div_ready <= 1'b0;
470
    end else if(flush_i) begin
471
      s2o_mul_ready <= 1'b0;
472
      s2o_div_ready <= 1'b0;
473
    end else if(adv_i) begin
474
      s2o_mul_ready <= s1o_mul_ready;
475
      s2o_div_ready <= s1o_div_ready;
476
    end
477
  end // posedge clock
478
 
479
 
480
  /* Stage #3: 2nd part of multiplier */
481
 
482
 
483
  // 2nd stage of multiplier
484
  wire [47:0] s3t_fract48;
485
  assign s3t_fract48 = {s2o_fract32_ahbh,  16'd0} +
486
                       {16'd0, s2o_fract32_ahbl} +
487
                       {16'd0, s2o_fract32_albh} +
488
                       {32'd0, s2o_fract32_albl[31:16]};
489
 
490
  // stage #3 outputs (for division support)
491
 
492
  // full product
493
  reg [32:0] s3o_mul33o; // output
494
  reg        s3o_mul33s; // sticky
495
  //   registering
496
  always @(posedge clk) begin
497
    if(adv_i) begin
498
      s3o_mul33o <= s3t_fract48[47:15];
499
      s3o_mul33s <= (|s3t_fract48[14:0]) | (|s2o_fract32_albl[15:0]);
500
    end
501
  end // posedge clock
502
 
503
  // For pipelinization of division final stage
504
  //   input related
505
  reg s3o_inv, s3o_inf_i,
506
      s3o_snan_i, s3o_qnan_i, s3o_anan_i_sign;
507
  //   DIV computation related
508
  reg        s3o_dbz;
509
  reg [23:0] s3o_fract24a;
510
  reg        s3o_opc_0;
511
  reg        s3o_signc;
512
  reg  [9:0] s3o_exp10c;
513
  reg  [4:0] s3o_shrx;
514
  reg        s3o_is_shrx;
515
  reg  [9:0] s3o_exp10rx;
516
  // registering
517
  always @(posedge clk) begin
518
    if(adv_i) begin
519
        // input related
520
      s3o_inv         <= s2o_inv;
521
      s3o_inf_i       <= s2o_inf_i;
522
      s3o_snan_i      <= s2o_snan_i;
523
      s3o_qnan_i      <= s2o_qnan_i;
524
      s3o_anan_i_sign <= s2o_anan_i_sign;
525
        // DIV computation related
526
      s3o_dbz      <= s2o_dbz;
527
      s3o_fract24a <= s2o_fract24a;
528
      s3o_opc_0    <= s2o_opc_0;
529
      s3o_signc    <= s2o_signc;
530
      s3o_exp10c   <= s2o_exp10c;
531
      s3o_shrx     <= s2o_shrx;
532
      s3o_is_shrx  <= s2o_is_shrx;
533
      s3o_exp10rx  <= s2o_exp10rx;
534
    end // advance pipe
535
  end // @clock
536
 
537
  // stage 3 ready makes sense for division only
538
  reg s3o_div_ready;
539
  always @(posedge clk `OR_ASYNC_RST) begin
540
    if (rst)
541
      s3o_div_ready <= 1'b0;
542
    else if(flush_i)
543
      s3o_div_ready <= 1'b0;
544
    else if(adv_i)
545
      s3o_div_ready <= s2o_div_ready;
546
  end // posedge clock
547
 
548
 
549
  // Feedback from multiplier's output with various rounding tecqs.
550
  //   +2^(-n-2) in case of rounding 1.xxx qutient
551
  wire itr_rndQ1xx =   s3o_mul33o[31];
552
  //   +2^(-n-2) in case of rounding 0.1xx qutient
553
  wire itr_rndQ01x = (~s3o_mul33o[31]);
554
  //   rounding mask:
555
  wire [32:0] itr_rndM33 = // bits [6],[5] ... [0]
556
    { 26'd0,(itr_rndQ & itr_rndQ1xx),(itr_rndQ & itr_rndQ01x), // round resulting quotient
557
       4'd0,(itr_rndD & s3o_mul33s) };                         // round intermediate divisor
558
  //   rounding
559
  assign itr_qtnt33 = s3o_mul33o + itr_rndM33;
560
 
561
 
562
  // compute 2's complement or reminder (for sticky bit detection)
563
  // binary point position is located just after bit [30]
564
  wire [32:0] itr_AorT33 =
565
    s3o_div_ready ? {1'b0,s3o_fract24a,8'd0} : // for reminder
566
                    {32'h80000000,1'b0};       // for two's complement
567
 
568
  // 'Reminder' / Two's complement
569
  assign itr_rmnd33 = itr_AorT33 - itr_qtnt33;
570
 
571
  // Auxiliary flags:
572
  //  - truncated reminder isn't zero
573
  wire s4t_rmnd33_n0  = |itr_rmnd33;
574
  //  - rounded quotient is exact
575
  wire s4t_qtnt_exact = ~(s4t_rmnd33_n0 | s3o_mul33s);
576
  //  - signum of final reminder
577
  wire s4t_sign_rmnd  = itr_rmnd33[32] | ((~s4t_rmnd33_n0) & s3o_mul33s);
578
 
579
 
580
  // Additionally store 26-bit of non-rounded (_raw_) and rounded (_res_) quotients.
581
  // It is used for rounding in cases of denormalized result.
582
  // Stiky bit is forced to be zero.
583
  // The value are marked by stage #2 output
584
  // raw
585
  reg [25:0] s3o_raw_qtnt26;
586
  // rounded
587
  reg [25:0] s3o_res_qtnt26;
588
  assign     itr_res_qtnt26 = {itr_qtnt33[31:7],itr_qtnt33[6] & itr_rndQ01x};
589
  // latching
590
  always @(posedge clk ) begin
591
    if(itr_rndQ) begin
592
      s3o_raw_qtnt26 <= s3o_mul33o[31:6];
593
      s3o_res_qtnt26 <= itr_res_qtnt26;
594
    end
595
  end
596
 
597
  // Possible left shift computation.
598
  // In fact, as the dividend and divisor was normalized
599
  //   and the result is non-zero
600
  //   the '1' is maximum number of leading zeros in the quotient.
601
  wire s4t_nlz = ~s3o_res_qtnt26[25];
602
  wire [9:0] s4t_exp10_m1 = s3o_exp10c - 10'd1;
603
  // left shift flag and corrected exponent
604
  wire       s4t_shlx;
605
  wire [9:0] s4t_exp10lx;
606
  assign {s4t_shlx,s4t_exp10lx} =
607
      // shift isn't needed (includes zero result)
608
    (~s4t_nlz)            ? {1'b0,s3o_exp10c} :
609
      // normalization is possible
610
    (s3o_exp10c >  10'd1) ? {1'b1,s4t_exp10_m1} :
611
      // denormalized and zero cases
612
                            {1'b0,{9'd0,~s3o_opc_0}};
613
 
614
  // check if quotient is denormalized
615
  wire s4t_denorm = s3o_is_shrx |
616
                    ((~s3o_is_shrx) & (~s4t_shlx) & s4t_nlz);
617
  // Select quotient for subsequent align and rounding
618
  // The rounded (_res_) quotient is used:
619
  //   - for normalized result
620
  //   - exact result
621
  //   - non-exact but lesser than infinity precision result
622
  wire [25:0] s4t_qtnt26 =
623
    ( (~s4t_denorm) | s4t_qtnt_exact |
624
      ((~s4t_qtnt_exact) & (~s4t_sign_rmnd)) ) ? s3o_res_qtnt26 :
625
                                                 s3o_raw_qtnt26;
626
 
627
 
628
  // output
629
  always @(posedge clk) begin
630
    if(adv_i) begin
631
        // input related
632
      muldiv_inv_o       <= s3o_div_ready ? s3o_inv : s2o_inv;
633
      muldiv_inf_o       <= s3o_div_ready ? s3o_inf_i : s2o_inf_i;
634
      muldiv_snan_o      <= s3o_div_ready ? s3o_snan_i : s2o_snan_i;
635
      muldiv_qnan_o      <= s3o_div_ready ? s3o_qnan_i : s2o_qnan_i;
636
      muldiv_anan_sign_o <= s3o_div_ready ? s3o_anan_i_sign : s2o_anan_i_sign;
637
        // computation related
638
      muldiv_sign_o     <= s3o_div_ready ? s3o_signc : s2o_signc;
639
      muldiv_shr_o      <= s3o_div_ready ? s3o_shrx : s2o_shrx;
640
      muldiv_exp10shr_o <= s3o_div_ready ? s3o_exp10rx : s2o_exp10rx;
641
      muldiv_shl_o      <= s3o_div_ready & s4t_shlx;          // makes sense for DIV only
642
      muldiv_exp10shl_o <= {10{s3o_div_ready}} & s4t_exp10lx; // makes sense for DIV only
643
      muldiv_exp10sh0_o <= s3o_div_ready ? s3o_exp10c : s2o_exp10c;
644
      muldiv_fract28_o  <= s3o_div_ready ?
645
                           {1'b0,s4t_qtnt26,~s4t_qtnt_exact} :      // quotient
646
                           {s3t_fract48[47:21],|s3t_fract48[20:0]}; // product
647
        // DIV additional outputs
648
      div_op_o        <= s3o_div_ready;
649
      div_sign_rmnd_o <= s3o_div_ready & s4t_sign_rmnd;
650
      div_dbz_o       <= s3o_div_ready & s3o_dbz;
651
    end // advance pipe
652
  end // posedge clock
653
 
654
  // ready is special case
655
  always @(posedge clk `OR_ASYNC_RST) begin
656
    if (rst)
657
      muldiv_rdy_o <= 0;
658
    else if(flush_i)
659
      muldiv_rdy_o <= 0;
660
    else if(adv_i)
661
      muldiv_rdy_o <= s2o_mul_ready | s3o_div_ready;
662
  end // posedge clock
663
 
664
endmodule // pfpu32_muldiv
665
 
666
 
667
// initial reciprocal approximation
668
module arecip_lut
669
(
670
  input      [6:0] b_i,
671
  output reg [8:0] r_o
672
);
673
  always @(b_i) begin
674
    case(b_i) // synopsys full_case parallel_case
675
      7'd0   : r_o = 9'd508;
676
      7'd1   : r_o = 9'd500;
677
      7'd2   : r_o = 9'd492;
678
      7'd3   : r_o = 9'd485;
679
      7'd4   : r_o = 9'd477;
680
      7'd5   : r_o = 9'd470;
681
      7'd6   : r_o = 9'd463;
682
      7'd7   : r_o = 9'd455;
683
      7'd8   : r_o = 9'd448;
684
      7'd9   : r_o = 9'd441;
685
      7'd10  : r_o = 9'd434;
686
      7'd11  : r_o = 9'd428;
687
      7'd12  : r_o = 9'd421;
688
      7'd13  : r_o = 9'd414;
689
      7'd14  : r_o = 9'd408;
690
      7'd15  : r_o = 9'd401;
691
      7'd16  : r_o = 9'd395;
692
      7'd17  : r_o = 9'd389;
693
      7'd18  : r_o = 9'd383;
694
      7'd19  : r_o = 9'd377;
695
      7'd20  : r_o = 9'd371;
696
      7'd21  : r_o = 9'd365;
697
      7'd22  : r_o = 9'd359;
698
      7'd23  : r_o = 9'd353;
699
      7'd24  : r_o = 9'd347;
700
      7'd25  : r_o = 9'd342;
701
      7'd26  : r_o = 9'd336;
702
      7'd27  : r_o = 9'd331;
703
      7'd28  : r_o = 9'd326;
704
      7'd29  : r_o = 9'd320;
705
      7'd30  : r_o = 9'd315;
706
      7'd31  : r_o = 9'd310;
707
      7'd32  : r_o = 9'd305;
708
      7'd33  : r_o = 9'd300;
709
      7'd34  : r_o = 9'd295;
710
      7'd35  : r_o = 9'd290;
711
      7'd36  : r_o = 9'd285;
712
      7'd37  : r_o = 9'd280;
713
      7'd38  : r_o = 9'd275;
714
      7'd39  : r_o = 9'd271;
715
      7'd40  : r_o = 9'd266;
716
      7'd41  : r_o = 9'd261;
717
      7'd42  : r_o = 9'd257;
718
      7'd43  : r_o = 9'd252;
719
      7'd44  : r_o = 9'd248;
720
      7'd45  : r_o = 9'd243;
721
      7'd46  : r_o = 9'd239;
722
      7'd47  : r_o = 9'd235;
723
      7'd48  : r_o = 9'd231;
724
      7'd49  : r_o = 9'd226;
725
      7'd50  : r_o = 9'd222;
726
      7'd51  : r_o = 9'd218;
727
      7'd52  : r_o = 9'd214;
728
      7'd53  : r_o = 9'd210;
729
      7'd54  : r_o = 9'd206;
730
      7'd55  : r_o = 9'd202;
731
      7'd56  : r_o = 9'd198;
732
      7'd57  : r_o = 9'd195;
733
      7'd58  : r_o = 9'd191;
734
      7'd59  : r_o = 9'd187;
735
      7'd60  : r_o = 9'd183;
736
      7'd61  : r_o = 9'd180;
737
      7'd62  : r_o = 9'd176;
738
      7'd63  : r_o = 9'd172;
739
      7'd64  : r_o = 9'd169;
740
      7'd65  : r_o = 9'd165;
741
      7'd66  : r_o = 9'd162;
742
      7'd67  : r_o = 9'd158;
743
      7'd68  : r_o = 9'd155;
744
      7'd69  : r_o = 9'd152;
745
      7'd70  : r_o = 9'd148;
746
      7'd71  : r_o = 9'd145;
747
      7'd72  : r_o = 9'd142;
748
      7'd73  : r_o = 9'd138;
749
      7'd74  : r_o = 9'd135;
750
      7'd75  : r_o = 9'd132;
751
      7'd76  : r_o = 9'd129;
752
      7'd77  : r_o = 9'd126;
753
      7'd78  : r_o = 9'd123;
754
      7'd79  : r_o = 9'd120;
755
      7'd80  : r_o = 9'd117;
756
      7'd81  : r_o = 9'd114;
757
      7'd82  : r_o = 9'd111;
758
      7'd83  : r_o = 9'd108;
759
      7'd84  : r_o = 9'd105;
760
      7'd85  : r_o = 9'd102;
761
      7'd86  : r_o = 9'd99;
762
      7'd87  : r_o = 9'd96;
763
      7'd88  : r_o = 9'd93;
764
      7'd89  : r_o = 9'd91;
765
      7'd90  : r_o = 9'd88;
766
      7'd91  : r_o = 9'd85;
767
      7'd92  : r_o = 9'd82;
768
      7'd93  : r_o = 9'd80;
769
      7'd94  : r_o = 9'd77;
770
      7'd95  : r_o = 9'd74;
771
      7'd96  : r_o = 9'd72;
772
      7'd97  : r_o = 9'd69;
773
      7'd98  : r_o = 9'd67;
774
      7'd99  : r_o = 9'd64;
775
      7'd100 : r_o = 9'd62;
776
      7'd101 : r_o = 9'd59;
777
      7'd102 : r_o = 9'd57;
778
      7'd103 : r_o = 9'd54;
779
      7'd104 : r_o = 9'd52;
780
      7'd105 : r_o = 9'd49;
781
      7'd106 : r_o = 9'd47;
782
      7'd107 : r_o = 9'd45;
783
      7'd108 : r_o = 9'd42;
784
      7'd109 : r_o = 9'd40;
785
      7'd110 : r_o = 9'd38;
786
      7'd111 : r_o = 9'd35;
787
      7'd112 : r_o = 9'd33;
788
      7'd113 : r_o = 9'd31;
789
      7'd114 : r_o = 9'd29;
790
      7'd115 : r_o = 9'd26;
791
      7'd116 : r_o = 9'd24;
792
      7'd117 : r_o = 9'd22;
793
      7'd118 : r_o = 9'd20;
794
      7'd119 : r_o = 9'd18;
795
      7'd120 : r_o = 9'd15;
796
      7'd121 : r_o = 9'd13;
797
      7'd122 : r_o = 9'd11;
798
      7'd123 : r_o = 9'd9;
799
      7'd124 : r_o = 9'd7;
800
      7'd125 : r_o = 9'd5;
801
      7'd126 : r_o = 9'd3;
802
      default: r_o = 9'd1;
803
    endcase // LUT for initial approximation of reciprocal
804
  end // always
805
endmodule

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.