OpenCores
URL https://opencores.org/ocsvn/an-fpga-implementation-of-low-latency-noc-based-mpsoc/an-fpga-implementation-of-low-latency-noc-based-mpsoc/trunk

Subversion Repositories an-fpga-implementation-of-low-latency-noc-based-mpsoc

[/] [an-fpga-implementation-of-low-latency-noc-based-mpsoc/] [trunk/] [mpsoc/] [src_processor/] [mor1kx-5.0/] [rtl/] [verilog/] [pfpu32/] [pfpu32_addsub.v] - Blame information for rev 48

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 48 alirezamon
//////////////////////////////////////////////////////////////////////
2
//                                                                  //
3
//    pfpu32_addsub                                                 //
4
//                                                                  //
5
//    This file is part of the mor1kx project                       //
6
//    https://github.com/openrisc/mor1kx                            //
7
//                                                                  //
8
//    Description                                                   //
9
//    addition/subtraction pipeline for single precision floating   //
10
//    point numbers                                                 //
11
//                                                                  //
12
//    Author(s):                                                    //
13
//        - Original design (FPU100) -                              //
14
//          Jidan Al-eryani, jidan@gmx.net                          //
15
//        - Conv. to Verilog and inclusion in OR1200 -              //
16
//          Julius Baxter, julius@opencores.org                     //
17
//        - Update for mor1kx,                                      //
18
//          bug fixing and further development -                    //
19
//          Andrey Bacherov, avbacherov@opencores.org               //
20
//                                                                  //
21
//////////////////////////////////////////////////////////////////////
22
//                                                                  //
23
//  Copyright (C) 2006, 2010, 2014                                  //
24
//                                                                  //
25
//  This source file may be used and distributed without            //
26
//  restriction provided that this copyright statement is not       //
27
//  removed from the file and that any derivative work contains     //
28
//  the original copyright notice and the associated disclaimer.    //
29
//                                                                  //
30
//    THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY           //
31
//  EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED       //
32
//  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS       //
33
//  FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL THE AUTHOR          //
34
//  OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,             //
35
//  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES        //
36
//  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE       //
37
//  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR            //
38
//  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF      //
39
//  LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT      //
40
//  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT      //
41
//  OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE             //
42
//  POSSIBILITY OF SUCH DAMAGE.                                     //
43
//////////////////////////////////////////////////////////////////////
44
 
45
`include "mor1kx-defines.v"
46
 
47
 
48
module pfpu32_addsub
49
(
50
   input             clk,
51
   input             rst,
52
   input             flush_i,  // flushe pipe
53
   input             adv_i,    // advance pipe
54
   input             start_i,  // start add/sub
55
   input             is_sub_i, // 1: substruction, 0: addition
56
   // input 'a' related values
57
   input             signa_i,
58
   input       [9:0] exp10a_i,
59
   input      [23:0] fract24a_i,
60
   input             infa_i,
61
   // input 'b' related values
62
   input             signb_i,
63
   input       [9:0] exp10b_i,
64
   input      [23:0] fract24b_i,
65
   input             infb_i,
66
   // 'a'/'b' related
67
   input             snan_i,
68
   input             qnan_i,
69
   input             anan_sign_i,
70
   input             addsub_agtb_i,
71
   input             addsub_aeqb_i,
72
   // outputs
73
   output reg        add_rdy_o,       // ready
74
   output reg        add_sign_o,      // signum
75
   output reg        add_sub_0_o,     // flag that actual substruction is performed and result is zero
76
   output reg  [4:0] add_shl_o,       // do left shift in align stage
77
   output reg  [9:0] add_exp10shl_o,  // exponent for left shift align
78
   output reg  [9:0] add_exp10sh0_o,  // exponent for no shift in align
79
   output reg [27:0] add_fract28_o,   // fractional with appended {r,s} bits
80
   output reg        add_inv_o,       // invalid operation flag
81
   output reg        add_inf_o,       // infinity output reg
82
   output reg        add_snan_o,      // signaling NaN output reg
83
   output reg        add_qnan_o,      // quiet NaN output reg
84
   output reg        add_anan_sign_o  // signum for output nan
85
);
86
  /*
87
     Any stage's output is registered.
88
     Definitions:
89
       s??o_name - "S"tage number "??", "O"utput
90
       s??t_name - "S"tage number "??", "T"emporary (internally)
91
  */
92
 
93
  /* Stage #1: pre addition / substruction align */
94
 
95
    // detection of some exceptions
96
    //   inf - inf -> invalid operation; snan output
97
  wire s1t_inv = infa_i & infb_i &
98
                 (signa_i ^ (is_sub_i ^ signb_i));
99
    //   inf input
100
  wire s1t_inf_i = infa_i | infb_i;
101
 
102
    // signums for calculation
103
  wire s1t_calc_signa = signa_i;
104
  wire s1t_calc_signb = (signb_i ^ is_sub_i);
105
 
106
    // not shifted operand and its signum
107
  wire [23:0] s1t_fract24_nsh =
108
    addsub_agtb_i ? fract24a_i : fract24b_i;
109
 
110
    // operand for right shift
111
  wire [23:0] s1t_fract24_fsh =
112
    addsub_agtb_i ? fract24b_i : fract24a_i;
113
 
114
    // shift amount
115
  wire [9:0] s1t_exp_diff =
116
    addsub_agtb_i ? (exp10a_i - exp10b_i) :
117
                    (exp10b_i - exp10a_i);
118
 
119
  // limiter by 31
120
  wire [4:0] s1t_shr = s1t_exp_diff[4:0] | {5{|s1t_exp_diff[9:5]}};
121
 
122
  // stage #1 outputs
123
  //  input related
124
  reg s1o_inv, s1o_inf_i,
125
      s1o_snan_i, s1o_qnan_i, s1o_anan_i_sign;
126
  //  computation related
127
  reg        s1o_aeqb;
128
  reg  [4:0] s1o_shr;
129
  reg        s1o_sign_nsh;
130
  reg        s1o_op_sub;
131
  reg  [9:0] s1o_exp10c;
132
  reg [23:0] s1o_fract24_nsh;
133
  reg [23:0] s1o_fract24_fsh;
134
  //  registering
135
  always @(posedge clk) begin
136
    if(adv_i) begin
137
        // input related
138
      s1o_inv         <= s1t_inv;
139
      s1o_inf_i       <= s1t_inf_i;
140
      s1o_snan_i      <= snan_i;
141
      s1o_qnan_i      <= qnan_i;
142
      s1o_anan_i_sign <= anan_sign_i;
143
        // computation related
144
      s1o_aeqb        <= addsub_aeqb_i;
145
      s1o_shr         <= s1t_shr & {5{~s1t_inf_i}};
146
      s1o_sign_nsh    <= addsub_agtb_i ? s1t_calc_signa : s1t_calc_signb;
147
      s1o_op_sub      <= s1t_calc_signa ^ s1t_calc_signb;
148
      s1o_exp10c      <= addsub_agtb_i ? exp10a_i : exp10b_i;
149
      s1o_fract24_nsh <= s1t_fract24_nsh & {24{~s1t_inf_i}};
150
      s1o_fract24_fsh <= s1t_fract24_fsh & {24{~s1t_inf_i}};
151
    end // advance
152
  end // posedge clock
153
 
154
  // ready is special case
155
  reg s1o_ready;
156
  always @(posedge clk `OR_ASYNC_RST) begin
157
    if (rst)
158
      s1o_ready  <= 0;
159
    else if(flush_i)
160
      s1o_ready  <= 0;
161
    else if(adv_i)
162
      s1o_ready <= start_i;
163
  end // posedge clock
164
 
165
 
166
  /* Stage 2: multiplex and shift */
167
 
168
 
169
  // shifter
170
  wire [25:0] s2t_fract26_fsh = {s1o_fract24_fsh,2'd0};
171
  wire [25:0] s2t_fract26_shr = s2t_fract26_fsh >> s1o_shr;
172
 
173
  // sticky
174
  reg s2t_sticky;
175
  always @(s1o_shr or s1o_fract24_fsh) begin
176
    case(s1o_shr)
177
      5'd0, 5'd1, 5'd2 : s2t_sticky = 1'b0; // two added zero bits
178
      5'd3 : s2t_sticky = s1o_fract24_fsh[0];
179
      5'd4 : s2t_sticky = |s1o_fract24_fsh[1:0];
180
      5'd5 : s2t_sticky = |s1o_fract24_fsh[2:0];
181
      5'd6 : s2t_sticky = |s1o_fract24_fsh[3:0];
182
      5'd7 : s2t_sticky = |s1o_fract24_fsh[4:0];
183
      5'd8 : s2t_sticky = |s1o_fract24_fsh[5:0];
184
      5'd9 : s2t_sticky = |s1o_fract24_fsh[6:0];
185
      5'd10: s2t_sticky = |s1o_fract24_fsh[7:0];
186
      5'd11: s2t_sticky = |s1o_fract24_fsh[8:0];
187
      5'd12: s2t_sticky = |s1o_fract24_fsh[9:0];
188
      5'd13: s2t_sticky = |s1o_fract24_fsh[10:0];
189
      5'd14: s2t_sticky = |s1o_fract24_fsh[11:0];
190
      5'd15: s2t_sticky = |s1o_fract24_fsh[12:0];
191
      5'd16: s2t_sticky = |s1o_fract24_fsh[13:0];
192
      5'd17: s2t_sticky = |s1o_fract24_fsh[14:0];
193
      5'd18: s2t_sticky = |s1o_fract24_fsh[15:0];
194
      5'd19: s2t_sticky = |s1o_fract24_fsh[16:0];
195
      5'd20: s2t_sticky = |s1o_fract24_fsh[17:0];
196
      5'd21: s2t_sticky = |s1o_fract24_fsh[18:0];
197
      5'd22: s2t_sticky = |s1o_fract24_fsh[19:0];
198
      5'd23: s2t_sticky = |s1o_fract24_fsh[20:0];
199
      5'd24: s2t_sticky = |s1o_fract24_fsh[21:0];
200
      5'd25: s2t_sticky = |s1o_fract24_fsh[22:0];
201
      default: s2t_sticky = |s1o_fract24_fsh[23:0];
202
    endcase
203
  end
204
 
205
    // add/sub of non-shifted and shifted operands
206
  wire [27:0] s2t_fract28_shr = {1'b0,s2t_fract26_shr,s2t_sticky};
207
 
208
  wire [27:0] s2t_fract28_add = {1'b0,s1o_fract24_nsh,3'd0} +
209
                                (s2t_fract28_shr ^ {28{s1o_op_sub}}) +
210
                                {27'd0,s1o_op_sub};
211
 
212
 
213
  // stage #2 outputs
214
  //  input related
215
  reg s2o_inv, s2o_inf_i,
216
      s2o_snan_i, s2o_qnan_i, s2o_anan_i_sign;
217
  //  computational related
218
  reg        s2o_signc;
219
  reg [9:0]  s2o_exp10c;
220
  reg [26:0] s2o_fract27;
221
  reg        s2o_sub_0;       // actual operation is substruction and the result is zero
222
  reg        s2o_sticky;      // rounding support
223
  //  registering
224
  always @(posedge clk) begin
225
    if(adv_i) begin
226
        // input related
227
      s2o_inv         <= s1o_inv;
228
      s2o_inf_i       <= s1o_inf_i;
229
      s2o_snan_i      <= s1o_snan_i;
230
      s2o_qnan_i      <= s1o_qnan_i;
231
      s2o_anan_i_sign <= s1o_anan_i_sign;
232
        // computation related
233
      s2o_signc       <= s1o_sign_nsh;
234
      s2o_exp10c      <= s1o_exp10c;
235
      s2o_fract27     <= s2t_fract28_add[27:1];
236
      s2o_sub_0       <= s1o_aeqb & s1o_op_sub;
237
      s2o_sticky      <= s2t_sticky;
238
    end // advance
239
  end // posedge clock
240
 
241
  // ready is special case
242
  reg s2o_ready;
243
  always @(posedge clk `OR_ASYNC_RST) begin
244
    if (rst)
245
      s2o_ready  <= 0;
246
    else if(flush_i)
247
      s2o_ready  <= 0;
248
    else if(adv_i)
249
      s2o_ready <= s1o_ready;
250
  end // posedge clock
251
 
252
 
253
  /* Stage 4: update exponent */
254
 
255
 
256
  // for possible left shift
257
  // [26] bit is right shift flag
258
  reg [4:0] s3t_nlz;
259
  always @(s2o_fract27) begin
260
    casez(s2o_fract27)
261
      27'b1??????????????????????????: s3t_nlz <=  0; // [26] bit: shift right
262
      27'b01?????????????????????????: s3t_nlz <=  0; // 1 is in place
263
      27'b001????????????????????????: s3t_nlz <=  1;
264
      27'b0001???????????????????????: s3t_nlz <=  2;
265
      27'b00001??????????????????????: s3t_nlz <=  3;
266
      27'b000001?????????????????????: s3t_nlz <=  4;
267
      27'b0000001????????????????????: s3t_nlz <=  5;
268
      27'b00000001???????????????????: s3t_nlz <=  6;
269
      27'b000000001??????????????????: s3t_nlz <=  7;
270
      27'b0000000001?????????????????: s3t_nlz <=  8;
271
      27'b00000000001????????????????: s3t_nlz <=  9;
272
      27'b000000000001???????????????: s3t_nlz <= 10;
273
      27'b0000000000001??????????????: s3t_nlz <= 11;
274
      27'b00000000000001?????????????: s3t_nlz <= 12;
275
      27'b000000000000001????????????: s3t_nlz <= 13;
276
      27'b0000000000000001???????????: s3t_nlz <= 14;
277
      27'b00000000000000001??????????: s3t_nlz <= 15;
278
      27'b000000000000000001?????????: s3t_nlz <= 16;
279
      27'b0000000000000000001????????: s3t_nlz <= 17;
280
      27'b00000000000000000001???????: s3t_nlz <= 18;
281
      27'b000000000000000000001??????: s3t_nlz <= 19;
282
      27'b0000000000000000000001?????: s3t_nlz <= 20;
283
      27'b00000000000000000000001????: s3t_nlz <= 21;
284
      27'b000000000000000000000001???: s3t_nlz <= 22;
285
      27'b0000000000000000000000001??: s3t_nlz <= 23;
286
      27'b00000000000000000000000001?: s3t_nlz <= 24;
287
      27'b000000000000000000000000001: s3t_nlz <= 25;
288
      27'b000000000000000000000000000: s3t_nlz <=  0; // zero result
289
    endcase
290
  end // always
291
 
292
  // left shift amount and corrected exponent
293
  wire [4:0] s3t_nlz_m1    = (s3t_nlz - 5'd1);
294
  wire [9:0] s3t_exp10c_m1 = s2o_exp10c - 10'd1;
295
  wire [9:0] s3t_exp10c_mz = s2o_exp10c - {5'd0,s3t_nlz};
296
  wire [4:0] s3t_shl;
297
  wire [9:0] s3t_exp10shl;
298
  assign {s3t_shl,s3t_exp10shl} =
299
      // shift isn't needed or impossible
300
    (~(|s3t_nlz) | (s2o_exp10c == 10'd1)) ?
301
                              {5'd0,s2o_exp10c} :
302
      // normalization is possible
303
    (s2o_exp10c >  s3t_nlz) ? {s3t_nlz,s3t_exp10c_mz} :
304
      // denormalized cases
305
    (s2o_exp10c == s3t_nlz) ? {s3t_nlz_m1,10'd1} :
306
                              {s3t_exp10c_m1[4:0],10'd1};
307
 
308
 
309
  // registering output
310
  always @(posedge clk) begin
311
    if(adv_i) begin
312
        // input related
313
      add_inv_o       <= s2o_inv;
314
      add_inf_o       <= s2o_inf_i;
315
      add_snan_o      <= s2o_snan_i;
316
      add_qnan_o      <= s2o_qnan_i;
317
      add_anan_sign_o <= s2o_anan_i_sign;
318
        // computation related
319
      add_sign_o      <= s2o_signc;
320
      add_sub_0_o     <= s2o_sub_0;
321
      add_shl_o       <= s3t_shl;
322
      add_exp10shl_o  <= s3t_exp10shl;
323
      add_exp10sh0_o  <= s2o_exp10c;
324
      add_fract28_o   <= {s2o_fract27,s2o_sticky};
325
    end // advance
326
  end // posedge clock
327
 
328
  // ready is special case
329
  always @(posedge clk `OR_ASYNC_RST) begin
330
    if (rst)
331
      add_rdy_o <= 0;
332
    else if(flush_i)
333
      add_rdy_o <= 0;
334
    else if(adv_i)
335
      add_rdy_o <= s2o_ready;
336
  end // posedge clock
337
 
338
endmodule // pfpu32_addsub

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.