URL https://opencores.org/ocsvn/an-fpga-implementation-of-low-latency-noc-based-mpsoc/an-fpga-implementation-of-low-latency-noc-based-mpsoc/trunk

Subversion Repositories an-fpga-implementation-of-low-latency-noc-based-mpsoc

[/] [an-fpga-implementation-of-low-latency-noc-based-mpsoc/] [trunk/] [mpsoc/] [src_processor/] [mor1kx-5.0/] [rtl/] [verilog/] [pfpu32/] [pfpu32_addsub.v] - Blame information for rev 48

Details | Compare with Previous | View Log


//////////////////////////////////////////////////////////////////////
//                                                                  //
//    pfpu32_addsub                                                 //
//                                                                  //
//    This file is part of the mor1kx project                       //
//    https://github.com/openrisc/mor1kx                            //
//                                                                  //
//    Description                                                   //
//    addition/subtraction pipeline for single precision floating   //
//    point numbers                                                 //
//                                                                  //
//    Author(s):                                                    //
//        - Original design (FPU100) -                              //
//          Jidan Al-eryani, jidan@gmx.net                          //
//        - Conv. to Verilog and inclusion in OR1200 -              //
//          Julius Baxter, julius@opencores.org                     //
//        - Update for mor1kx,                                      //
//          bug fixing and further development -                    //
//          Andrey Bacherov, avbacherov@opencores.org               //
//                                                                  //
//////////////////////////////////////////////////////////////////////
//                                                                  //
//  Copyright (C) 2006, 2010, 2014                                  //
//                                                                  //
//  This source file may be used and distributed without            //
//  restriction provided that this copyright statement is not       //
//  removed from the file and that any derivative work contains     //
//  the original copyright notice and the associated disclaimer.    //
//                                                                  //
//    THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY           //
//  EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED       //
//  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS       //
//  FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL THE AUTHOR          //
//  OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,             //
//  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES        //
//  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE       //
//  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR            //
//  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF      //
//  LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT      //
//  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT      //
//  OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE             //
//  POSSIBILITY OF SUCH DAMAGE.                                     //
//////////////////////////////////////////////////////////////////////
 
`include "mor1kx-defines.v"
 
 
module pfpu32_addsub
(
   input             clk,
   input             rst,
   input             flush_i,  // flushe pipe
   input             adv_i,    // advance pipe
   input             start_i,  // start add/sub
   input             is_sub_i, // 1: substruction, 0: addition
   // input 'a' related values
   input             signa_i,
   input       [9:0] exp10a_i,
   input      [23:0] fract24a_i,
   input             infa_i,
   // input 'b' related values
   input             signb_i,
   input       [9:0] exp10b_i,
   input      [23:0] fract24b_i,
   input             infb_i,
   // 'a'/'b' related
   input             snan_i,
   input             qnan_i,
   input             anan_sign_i,
   input             addsub_agtb_i,
   input             addsub_aeqb_i,
   // outputs
   output reg        add_rdy_o,       // ready
   output reg        add_sign_o,      // signum
   output reg        add_sub_0_o,     // flag that actual substruction is performed and result is zero
   output reg  [4:0] add_shl_o,       // do left shift in align stage
   output reg  [9:0] add_exp10shl_o,  // exponent for left shift align
   output reg  [9:0] add_exp10sh0_o,  // exponent for no shift in align
   output reg [27:0] add_fract28_o,   // fractional with appended {r,s} bits
   output reg        add_inv_o,       // invalid operation flag
   output reg        add_inf_o,       // infinity output reg
   output reg        add_snan_o,      // signaling NaN output reg
   output reg        add_qnan_o,      // quiet NaN output reg
   output reg        add_anan_sign_o  // signum for output nan
);
  /*
     Any stage's output is registered.
     Definitions:
       s??o_name - "S"tage number "??", "O"utput
       s??t_name - "S"tage number "??", "T"emporary (internally)
  */
 
  /* Stage #1: pre addition / substruction align */
 
    // detection of some exceptions
    //   inf - inf -> invalid operation; snan output
  wire s1t_inv = infa_i & infb_i &
                 (signa_i ^ (is_sub_i ^ signb_i));
    //   inf input
  wire s1t_inf_i = infa_i | infb_i;
 
    // signums for calculation
  wire s1t_calc_signa = signa_i;
  wire s1t_calc_signb = (signb_i ^ is_sub_i);
 
    // not shifted operand and its signum
  wire [23:0] s1t_fract24_nsh =
    addsub_agtb_i ? fract24a_i : fract24b_i;
 
    // operand for right shift
  wire [23:0] s1t_fract24_fsh =
    addsub_agtb_i ? fract24b_i : fract24a_i;
 
    // shift amount
  wire [9:0] s1t_exp_diff =
    addsub_agtb_i ? (exp10a_i - exp10b_i) :
                    (exp10b_i - exp10a_i);
 
  // limiter by 31
  wire [4:0] s1t_shr = s1t_exp_diff[4:0] | {5{|s1t_exp_diff[9:5]}};
 
  // stage #1 outputs
  //  input related
  reg s1o_inv, s1o_inf_i,
      s1o_snan_i, s1o_qnan_i, s1o_anan_i_sign;
  //  computation related
  reg        s1o_aeqb;
  reg  [4:0] s1o_shr;
  reg        s1o_sign_nsh;
  reg        s1o_op_sub;
  reg  [9:0] s1o_exp10c;
  reg [23:0] s1o_fract24_nsh;
  reg [23:0] s1o_fract24_fsh;
  //  registering
  always @(posedge clk) begin
    if(adv_i) begin
        // input related
      s1o_inv         <= s1t_inv;
      s1o_inf_i       <= s1t_inf_i;
      s1o_snan_i      <= snan_i;
      s1o_qnan_i      <= qnan_i;
      s1o_anan_i_sign <= anan_sign_i;
        // computation related
      s1o_aeqb        <= addsub_aeqb_i;
      s1o_shr         <= s1t_shr & {5{~s1t_inf_i}};
      s1o_sign_nsh    <= addsub_agtb_i ? s1t_calc_signa : s1t_calc_signb;
      s1o_op_sub      <= s1t_calc_signa ^ s1t_calc_signb;
      s1o_exp10c      <= addsub_agtb_i ? exp10a_i : exp10b_i;
      s1o_fract24_nsh <= s1t_fract24_nsh & {24{~s1t_inf_i}};
      s1o_fract24_fsh <= s1t_fract24_fsh & {24{~s1t_inf_i}};
    end // advance
  end // posedge clock
 
  // ready is special case
  reg s1o_ready;
  always @(posedge clk `OR_ASYNC_RST) begin
    if (rst)
      s1o_ready  <= 0;
    else if(flush_i)
      s1o_ready  <= 0;
    else if(adv_i)
      s1o_ready <= start_i;
  end // posedge clock
 
 
  /* Stage 2: multiplex and shift */
 
 
  // shifter
  wire [25:0] s2t_fract26_fsh = {s1o_fract24_fsh,2'd0};
  wire [25:0] s2t_fract26_shr = s2t_fract26_fsh >> s1o_shr;
 
  // sticky
  reg s2t_sticky;
  always @(s1o_shr or s1o_fract24_fsh) begin
    case(s1o_shr)
      5'd0, 5'd1, 5'd2 : s2t_sticky = 1'b0; // two added zero bits
      5'd3 : s2t_sticky = s1o_fract24_fsh[0];
      5'd4 : s2t_sticky = |s1o_fract24_fsh[1:0];
      5'd5 : s2t_sticky = |s1o_fract24_fsh[2:0];
      5'd6 : s2t_sticky = |s1o_fract24_fsh[3:0];
      5'd7 : s2t_sticky = |s1o_fract24_fsh[4:0];
      5'd8 : s2t_sticky = |s1o_fract24_fsh[5:0];
      5'd9 : s2t_sticky = |s1o_fract24_fsh[6:0];
      5'd10: s2t_sticky = |s1o_fract24_fsh[7:0];
      5'd11: s2t_sticky = |s1o_fract24_fsh[8:0];
      5'd12: s2t_sticky = |s1o_fract24_fsh[9:0];
      5'd13: s2t_sticky = |s1o_fract24_fsh[10:0];
      5'd14: s2t_sticky = |s1o_fract24_fsh[11:0];
      5'd15: s2t_sticky = |s1o_fract24_fsh[12:0];
      5'd16: s2t_sticky = |s1o_fract24_fsh[13:0];
      5'd17: s2t_sticky = |s1o_fract24_fsh[14:0];
      5'd18: s2t_sticky = |s1o_fract24_fsh[15:0];
      5'd19: s2t_sticky = |s1o_fract24_fsh[16:0];
      5'd20: s2t_sticky = |s1o_fract24_fsh[17:0];
      5'd21: s2t_sticky = |s1o_fract24_fsh[18:0];
      5'd22: s2t_sticky = |s1o_fract24_fsh[19:0];
      5'd23: s2t_sticky = |s1o_fract24_fsh[20:0];
      5'd24: s2t_sticky = |s1o_fract24_fsh[21:0];
      5'd25: s2t_sticky = |s1o_fract24_fsh[22:0];
      default: s2t_sticky = |s1o_fract24_fsh[23:0];
    endcase
  end
 
    // add/sub of non-shifted and shifted operands
  wire [27:0] s2t_fract28_shr = {1'b0,s2t_fract26_shr,s2t_sticky};
 
  wire [27:0] s2t_fract28_add = {1'b0,s1o_fract24_nsh,3'd0} +
                                (s2t_fract28_shr ^ {28{s1o_op_sub}}) +
                                {27'd0,s1o_op_sub};
 
 
  // stage #2 outputs
  //  input related
  reg s2o_inv, s2o_inf_i,
      s2o_snan_i, s2o_qnan_i, s2o_anan_i_sign;
  //  computational related
  reg        s2o_signc;
  reg [9:0]  s2o_exp10c;
  reg [26:0] s2o_fract27;
  reg        s2o_sub_0;       // actual operation is substruction and the result is zero
  reg        s2o_sticky;      // rounding support
  //  registering
  always @(posedge clk) begin
    if(adv_i) begin
        // input related
      s2o_inv         <= s1o_inv;
      s2o_inf_i       <= s1o_inf_i;
      s2o_snan_i      <= s1o_snan_i;
      s2o_qnan_i      <= s1o_qnan_i;
      s2o_anan_i_sign <= s1o_anan_i_sign;
        // computation related
      s2o_signc       <= s1o_sign_nsh;
      s2o_exp10c      <= s1o_exp10c;
      s2o_fract27     <= s2t_fract28_add[27:1];
      s2o_sub_0       <= s1o_aeqb & s1o_op_sub;
      s2o_sticky      <= s2t_sticky;
    end // advance
  end // posedge clock
 
  // ready is special case
  reg s2o_ready;
  always @(posedge clk `OR_ASYNC_RST) begin
    if (rst)
      s2o_ready  <= 0;
    else if(flush_i)
      s2o_ready  <= 0;
    else if(adv_i)
      s2o_ready <= s1o_ready;
  end // posedge clock
 
 
  /* Stage 4: update exponent */
 
 
  // for possible left shift
  // [26] bit is right shift flag
  reg [4:0] s3t_nlz;
  always @(s2o_fract27) begin
    casez(s2o_fract27)
      27'b1??????????????????????????: s3t_nlz <=  0; // [26] bit: shift right
      27'b01?????????????????????????: s3t_nlz <=  0; // 1 is in place
      27'b001????????????????????????: s3t_nlz <=  1;
      27'b0001???????????????????????: s3t_nlz <=  2;
      27'b00001??????????????????????: s3t_nlz <=  3;
      27'b000001?????????????????????: s3t_nlz <=  4;
      27'b0000001????????????????????: s3t_nlz <=  5;
      27'b00000001???????????????????: s3t_nlz <=  6;
      27'b000000001??????????????????: s3t_nlz <=  7;
      27'b0000000001?????????????????: s3t_nlz <=  8;
      27'b00000000001????????????????: s3t_nlz <=  9;
      27'b000000000001???????????????: s3t_nlz <= 10;
      27'b0000000000001??????????????: s3t_nlz <= 11;
      27'b00000000000001?????????????: s3t_nlz <= 12;
      27'b000000000000001????????????: s3t_nlz <= 13;
      27'b0000000000000001???????????: s3t_nlz <= 14;
      27'b00000000000000001??????????: s3t_nlz <= 15;
      27'b000000000000000001?????????: s3t_nlz <= 16;
      27'b0000000000000000001????????: s3t_nlz <= 17;
      27'b00000000000000000001???????: s3t_nlz <= 18;
      27'b000000000000000000001??????: s3t_nlz <= 19;
      27'b0000000000000000000001?????: s3t_nlz <= 20;
      27'b00000000000000000000001????: s3t_nlz <= 21;
      27'b000000000000000000000001???: s3t_nlz <= 22;
      27'b0000000000000000000000001??: s3t_nlz <= 23;
      27'b00000000000000000000000001?: s3t_nlz <= 24;
      27'b000000000000000000000000001: s3t_nlz <= 25;
      27'b000000000000000000000000000: s3t_nlz <=  0; // zero result
    endcase
  end // always
 
  // left shift amount and corrected exponent
  wire [4:0] s3t_nlz_m1    = (s3t_nlz - 5'd1);
  wire [9:0] s3t_exp10c_m1 = s2o_exp10c - 10'd1;
  wire [9:0] s3t_exp10c_mz = s2o_exp10c - {5'd0,s3t_nlz};
  wire [4:0] s3t_shl;
  wire [9:0] s3t_exp10shl;
  assign {s3t_shl,s3t_exp10shl} =
      // shift isn't needed or impossible
    (~(|s3t_nlz) | (s2o_exp10c == 10'd1)) ?
                              {5'd0,s2o_exp10c} :
      // normalization is possible
    (s2o_exp10c >  s3t_nlz) ? {s3t_nlz,s3t_exp10c_mz} :
      // denormalized cases
    (s2o_exp10c == s3t_nlz) ? {s3t_nlz_m1,10'd1} :
                              {s3t_exp10c_m1[4:0],10'd1};
 
 
  // registering output
  always @(posedge clk) begin
    if(adv_i) begin
        // input related
      add_inv_o       <= s2o_inv;
      add_inf_o       <= s2o_inf_i;
      add_snan_o      <= s2o_snan_i;
      add_qnan_o      <= s2o_qnan_i;
      add_anan_sign_o <= s2o_anan_i_sign;
        // computation related
      add_sign_o      <= s2o_signc;
      add_sub_0_o     <= s2o_sub_0;
      add_shl_o       <= s3t_shl;
      add_exp10shl_o  <= s3t_exp10shl;
      add_exp10sh0_o  <= s2o_exp10c;
      add_fract28_o   <= {s2o_fract27,s2o_sticky};
    end // advance
  end // posedge clock
 
  // ready is special case
  always @(posedge clk `OR_ASYNC_RST) begin
    if (rst)
      add_rdy_o <= 0;
    else if(flush_i)
      add_rdy_o <= 0;
    else if(adv_i)
      add_rdy_o <= s2o_ready;
  end // posedge clock
 
endmodule // pfpu32_addsub

Browse

Tools

Subversion Repositories an-fpga-implementation-of-low-latency-noc-based-mpsoc

[/] [an-fpga-implementation-of-low-latency-noc-based-mpsoc/] [trunk/] [mpsoc/] [src_processor/] [mor1kx-5.0/] [rtl/] [verilog/] [pfpu32/] [pfpu32_addsub.v] - Blame information for rev 48

Line No.	Rev	Author	Line
1	48	alirezamon	`//////////////////////////////////////////////////////////////////////`
2			`// //`
3			`// pfpu32_addsub //`
4			`// //`
5			`// This file is part of the mor1kx project //`
6			`// https://github.com/openrisc/mor1kx //`
7			`// //`
8			`// Description //`
9			`// addition/subtraction pipeline for single precision floating //`
10			`// point numbers //`
11			`// //`
12			`// Author(s): //`
13			`// - Original design (FPU100) - //`
14			`// Jidan Al-eryani, jidan@gmx.net //`
15			`// - Conv. to Verilog and inclusion in OR1200 - //`
16			`// Julius Baxter, julius@opencores.org //`
17			`// - Update for mor1kx, //`
18			`// bug fixing and further development - //`
19			`// Andrey Bacherov, avbacherov@opencores.org //`
20			`// //`
21			`//////////////////////////////////////////////////////////////////////`
22			`// //`
23			`// Copyright (C) 2006, 2010, 2014 //`
24			`// //`
25			`// This source file may be used and distributed without //`
26			`// restriction provided that this copyright statement is not //`
27			`// removed from the file and that any derivative work contains //`
28			`// the original copyright notice and the associated disclaimer. //`
29			`// //`
30			// THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY //
31			`// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED //`
32			`// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS //`
33			`// FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL THE AUTHOR //`
34			`// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, //`
35			`// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES //`
36			`// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE //`
37			`// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR //`
38			`// BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF //`
39			`// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT //`
40			`// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT //`
41			`// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE //`
42			`// POSSIBILITY OF SUCH DAMAGE. //`
43			`//////////////////////////////////////////////////////////////////////`
44
45			`include "mor1kx-defines.v"
46
47
48			`module pfpu32_addsub`
49			`(`
50			`input clk,`
51			`input rst,`
52			`input flush_i, // flushe pipe`
53			`input adv_i, // advance pipe`
54			`input start_i, // start add/sub`
55			`input is_sub_i, // 1: substruction, 0: addition`
56			`// input 'a' related values`
57			`input signa_i,`
58			`input [9:0] exp10a_i,`
59			`input [23:0] fract24a_i,`
60			`input infa_i,`
61			`// input 'b' related values`
62			`input signb_i,`
63			`input [9:0] exp10b_i,`
64			`input [23:0] fract24b_i,`
65			`input infb_i,`
66			`// 'a'/'b' related`
67			`input snan_i,`
68			`input qnan_i,`
69			`input anan_sign_i,`
70			`input addsub_agtb_i,`
71			`input addsub_aeqb_i,`
72			`// outputs`
73			`output reg add_rdy_o, // ready`
74			`output reg add_sign_o, // signum`
75			`output reg add_sub_0_o, // flag that actual substruction is performed and result is zero`
76			`output reg [4:0] add_shl_o, // do left shift in align stage`
77			`output reg [9:0] add_exp10shl_o, // exponent for left shift align`
78			`output reg [9:0] add_exp10sh0_o, // exponent for no shift in align`
79			`output reg [27:0] add_fract28_o, // fractional with appended {r,s} bits`
80			`output reg add_inv_o, // invalid operation flag`
81			`output reg add_inf_o, // infinity output reg`
82			`output reg add_snan_o, // signaling NaN output reg`
83			`output reg add_qnan_o, // quiet NaN output reg`
84			`output reg add_anan_sign_o // signum for output nan`
85			`);`
86			`/*`
87			`Any stage's output is registered.`
88			`Definitions:`
89			`s??o_name - "S"tage number "??", "O"utput`
90			`s??t_name - "S"tage number "??", "T"emporary (internally)`
91			`*/`
92
93			`/* Stage #1: pre addition / substruction align */`
94
95			`// detection of some exceptions`
96			`// inf - inf -> invalid operation; snan output`
97			`wire s1t_inv = infa_i & infb_i &`
98			`(signa_i ^ (is_sub_i ^ signb_i));`
99			`// inf input`
100			`wire s1t_inf_i = infa_i \| infb_i;`
101
102			`// signums for calculation`
103			`wire s1t_calc_signa = signa_i;`
104			`wire s1t_calc_signb = (signb_i ^ is_sub_i);`
105
106			`// not shifted operand and its signum`
107			`wire [23:0] s1t_fract24_nsh =`
108			`addsub_agtb_i ? fract24a_i : fract24b_i;`
109
110			`// operand for right shift`
111			`wire [23:0] s1t_fract24_fsh =`
112			`addsub_agtb_i ? fract24b_i : fract24a_i;`
113
114			`// shift amount`
115			`wire [9:0] s1t_exp_diff =`
116			`addsub_agtb_i ? (exp10a_i - exp10b_i) :`
117			`(exp10b_i - exp10a_i);`
118
119			`// limiter by 31`
120			`wire [4:0] s1t_shr = s1t_exp_diff[4:0] \| {5{\|s1t_exp_diff[9:5]}};`
121
122			`// stage #1 outputs`
123			`// input related`
124			`reg s1o_inv, s1o_inf_i,`
125			`s1o_snan_i, s1o_qnan_i, s1o_anan_i_sign;`
126			`// computation related`
127			`reg s1o_aeqb;`
128			`reg [4:0] s1o_shr;`
129			`reg s1o_sign_nsh;`
130			`reg s1o_op_sub;`
131			`reg [9:0] s1o_exp10c;`
132			`reg [23:0] s1o_fract24_nsh;`
133			`reg [23:0] s1o_fract24_fsh;`
134			`// registering`
135			`always @(posedge clk) begin`
136			`if(adv_i) begin`
137			`// input related`
138			`s1o_inv <= s1t_inv;`
139			`s1o_inf_i <= s1t_inf_i;`
140			`s1o_snan_i <= snan_i;`
141			`s1o_qnan_i <= qnan_i;`
142			`s1o_anan_i_sign <= anan_sign_i;`
143			`// computation related`
144			`s1o_aeqb <= addsub_aeqb_i;`
145			`s1o_shr <= s1t_shr & {5{~s1t_inf_i}};`
146			`s1o_sign_nsh <= addsub_agtb_i ? s1t_calc_signa : s1t_calc_signb;`
147			`s1o_op_sub <= s1t_calc_signa ^ s1t_calc_signb;`
148			`s1o_exp10c <= addsub_agtb_i ? exp10a_i : exp10b_i;`
149			`s1o_fract24_nsh <= s1t_fract24_nsh & {24{~s1t_inf_i}};`
150			`s1o_fract24_fsh <= s1t_fract24_fsh & {24{~s1t_inf_i}};`
151			`end // advance`
152			`end // posedge clock`
153
154			`// ready is special case`
155			`reg s1o_ready;`
156			always @(posedge clk `OR_ASYNC_RST) begin
157			`if (rst)`
158			`s1o_ready <= 0;`
159			`else if(flush_i)`
160			`s1o_ready <= 0;`
161			`else if(adv_i)`
162			`s1o_ready <= start_i;`
163			`end // posedge clock`
164
165
166			`/* Stage 2: multiplex and shift */`
167
168
169			`// shifter`
170			`wire [25:0] s2t_fract26_fsh = {s1o_fract24_fsh,2'd0};`
171			`wire [25:0] s2t_fract26_shr = s2t_fract26_fsh >> s1o_shr;`
172
173			`// sticky`
174			`reg s2t_sticky;`
175			`always @(s1o_shr or s1o_fract24_fsh) begin`
176			`case(s1o_shr)`
177			`5'd0, 5'd1, 5'd2 : s2t_sticky = 1'b0; // two added zero bits`
178			`5'd3 : s2t_sticky = s1o_fract24_fsh[0];`
179			`5'd4 : s2t_sticky = \|s1o_fract24_fsh[1:0];`
180			`5'd5 : s2t_sticky = \|s1o_fract24_fsh[2:0];`
181			`5'd6 : s2t_sticky = \|s1o_fract24_fsh[3:0];`
182			`5'd7 : s2t_sticky = \|s1o_fract24_fsh[4:0];`
183			`5'd8 : s2t_sticky = \|s1o_fract24_fsh[5:0];`
184			`5'd9 : s2t_sticky = \|s1o_fract24_fsh[6:0];`
185			`5'd10: s2t_sticky = \|s1o_fract24_fsh[7:0];`
186			`5'd11: s2t_sticky = \|s1o_fract24_fsh[8:0];`
187			`5'd12: s2t_sticky = \|s1o_fract24_fsh[9:0];`
188			`5'd13: s2t_sticky = \|s1o_fract24_fsh[10:0];`
189			`5'd14: s2t_sticky = \|s1o_fract24_fsh[11:0];`
190			`5'd15: s2t_sticky = \|s1o_fract24_fsh[12:0];`
191			`5'd16: s2t_sticky = \|s1o_fract24_fsh[13:0];`
192			`5'd17: s2t_sticky = \|s1o_fract24_fsh[14:0];`
193			`5'd18: s2t_sticky = \|s1o_fract24_fsh[15:0];`
194			`5'd19: s2t_sticky = \|s1o_fract24_fsh[16:0];`
195			`5'd20: s2t_sticky = \|s1o_fract24_fsh[17:0];`
196			`5'd21: s2t_sticky = \|s1o_fract24_fsh[18:0];`
197			`5'd22: s2t_sticky = \|s1o_fract24_fsh[19:0];`
198			`5'd23: s2t_sticky = \|s1o_fract24_fsh[20:0];`
199			`5'd24: s2t_sticky = \|s1o_fract24_fsh[21:0];`
200			`5'd25: s2t_sticky = \|s1o_fract24_fsh[22:0];`
201			`default: s2t_sticky = \|s1o_fract24_fsh[23:0];`
202			`endcase`
203			`end`
204
205			`// add/sub of non-shifted and shifted operands`
206			`wire [27:0] s2t_fract28_shr = {1'b0,s2t_fract26_shr,s2t_sticky};`
207
208			`wire [27:0] s2t_fract28_add = {1'b0,s1o_fract24_nsh,3'd0} +`
209			`(s2t_fract28_shr ^ {28{s1o_op_sub}}) +`
210			`{27'd0,s1o_op_sub};`
211
212
213			`// stage #2 outputs`
214			`// input related`
215			`reg s2o_inv, s2o_inf_i,`
216			`s2o_snan_i, s2o_qnan_i, s2o_anan_i_sign;`
217			`// computational related`
218			`reg s2o_signc;`
219			`reg [9:0] s2o_exp10c;`
220			`reg [26:0] s2o_fract27;`
221			`reg s2o_sub_0; // actual operation is substruction and the result is zero`
222			`reg s2o_sticky; // rounding support`
223			`// registering`
224			`always @(posedge clk) begin`
225			`if(adv_i) begin`
226			`// input related`
227			`s2o_inv <= s1o_inv;`
228			`s2o_inf_i <= s1o_inf_i;`
229			`s2o_snan_i <= s1o_snan_i;`
230			`s2o_qnan_i <= s1o_qnan_i;`
231			`s2o_anan_i_sign <= s1o_anan_i_sign;`
232			`// computation related`
233			`s2o_signc <= s1o_sign_nsh;`
234			`s2o_exp10c <= s1o_exp10c;`
235			`s2o_fract27 <= s2t_fract28_add[27:1];`
236			`s2o_sub_0 <= s1o_aeqb & s1o_op_sub;`
237			`s2o_sticky <= s2t_sticky;`
238			`end // advance`
239			`end // posedge clock`
240
241			`// ready is special case`
242			`reg s2o_ready;`
243			always @(posedge clk `OR_ASYNC_RST) begin
244			`if (rst)`
245			`s2o_ready <= 0;`
246			`else if(flush_i)`
247			`s2o_ready <= 0;`
248			`else if(adv_i)`
249			`s2o_ready <= s1o_ready;`
250			`end // posedge clock`
251
252
253			`/* Stage 4: update exponent */`
254
255
256			`// for possible left shift`
257			`// [26] bit is right shift flag`
258			`reg [4:0] s3t_nlz;`
259			`always @(s2o_fract27) begin`
260			`casez(s2o_fract27)`
261			`27'b1??????????????????????????: s3t_nlz <= 0; // [26] bit: shift right`
262			`27'b01?????????????????????????: s3t_nlz <= 0; // 1 is in place`
263			`27'b001????????????????????????: s3t_nlz <= 1;`
264			`27'b0001???????????????????????: s3t_nlz <= 2;`
265			`27'b00001??????????????????????: s3t_nlz <= 3;`
266			`27'b000001?????????????????????: s3t_nlz <= 4;`
267			`27'b0000001????????????????????: s3t_nlz <= 5;`
268			`27'b00000001???????????????????: s3t_nlz <= 6;`
269			`27'b000000001??????????????????: s3t_nlz <= 7;`
270			`27'b0000000001?????????????????: s3t_nlz <= 8;`
271			`27'b00000000001????????????????: s3t_nlz <= 9;`
272			`27'b000000000001???????????????: s3t_nlz <= 10;`
273			`27'b0000000000001??????????????: s3t_nlz <= 11;`
274			`27'b00000000000001?????????????: s3t_nlz <= 12;`
275			`27'b000000000000001????????????: s3t_nlz <= 13;`
276			`27'b0000000000000001???????????: s3t_nlz <= 14;`
277			`27'b00000000000000001??????????: s3t_nlz <= 15;`
278			`27'b000000000000000001?????????: s3t_nlz <= 16;`
279			`27'b0000000000000000001????????: s3t_nlz <= 17;`
280			`27'b00000000000000000001???????: s3t_nlz <= 18;`
281			`27'b000000000000000000001??????: s3t_nlz <= 19;`
282			`27'b0000000000000000000001?????: s3t_nlz <= 20;`
283			`27'b00000000000000000000001????: s3t_nlz <= 21;`
284			`27'b000000000000000000000001???: s3t_nlz <= 22;`
285			`27'b0000000000000000000000001??: s3t_nlz <= 23;`
286			`27'b00000000000000000000000001?: s3t_nlz <= 24;`
287			`27'b000000000000000000000000001: s3t_nlz <= 25;`
288			`27'b000000000000000000000000000: s3t_nlz <= 0; // zero result`
289			`endcase`
290			`end // always`
291
292			`// left shift amount and corrected exponent`
293			`wire [4:0] s3t_nlz_m1 = (s3t_nlz - 5'd1);`
294			`wire [9:0] s3t_exp10c_m1 = s2o_exp10c - 10'd1;`
295			`wire [9:0] s3t_exp10c_mz = s2o_exp10c - {5'd0,s3t_nlz};`
296			`wire [4:0] s3t_shl;`
297			`wire [9:0] s3t_exp10shl;`
298			`assign {s3t_shl,s3t_exp10shl} =`
299			`// shift isn't needed or impossible`
300			`(~(\|s3t_nlz) \| (s2o_exp10c == 10'd1)) ?`
301			`{5'd0,s2o_exp10c} :`
302			`// normalization is possible`
303			`(s2o_exp10c > s3t_nlz) ? {s3t_nlz,s3t_exp10c_mz} :`
304			`// denormalized cases`
305			`(s2o_exp10c == s3t_nlz) ? {s3t_nlz_m1,10'd1} :`
306			`{s3t_exp10c_m1[4:0],10'd1};`
307
308
309			`// registering output`
310			`always @(posedge clk) begin`
311			`if(adv_i) begin`
312			`// input related`
313			`add_inv_o <= s2o_inv;`
314			`add_inf_o <= s2o_inf_i;`
315			`add_snan_o <= s2o_snan_i;`
316			`add_qnan_o <= s2o_qnan_i;`
317			`add_anan_sign_o <= s2o_anan_i_sign;`
318			`// computation related`
319			`add_sign_o <= s2o_signc;`
320			`add_sub_0_o <= s2o_sub_0;`
321			`add_shl_o <= s3t_shl;`
322			`add_exp10shl_o <= s3t_exp10shl;`
323			`add_exp10sh0_o <= s2o_exp10c;`
324			`add_fract28_o <= {s2o_fract27,s2o_sticky};`
325			`end // advance`
326			`end // posedge clock`
327
328			`// ready is special case`
329			always @(posedge clk `OR_ASYNC_RST) begin
330			`if (rst)`
331			`add_rdy_o <= 0;`
332			`else if(flush_i)`
333			`add_rdy_o <= 0;`
334			`else if(adv_i)`
335			`add_rdy_o <= s2o_ready;`
336			`end // posedge clock`
337
338			`endmodule // pfpu32_addsub`