OpenCores
URL https://opencores.org/ocsvn/neorv32/neorv32/trunk

Subversion Repositories neorv32

[/] [neorv32/] [trunk/] [rtl/] [core/] [neorv32_cpu_cp_fpu.vhd] - Blame information for rev 74

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 52 zero_gravi
-- #################################################################################################
2 53 zero_gravi
-- # << NEORV32 - CPU Co-Processor: Single-Prec. Floating Point Unit (RISC-V "Zfinx" Extension) >> #
3 52 zero_gravi
-- # ********************************************************************************************* #
4 53 zero_gravi
-- # The Zfinx floating-point extension uses the integer register file (x) for all FP operations.  #
5
-- # See the official RISC-V specs (https://github.com/riscv/riscv-zfinx) for more information.    #
6 55 zero_gravi
-- #                                                                                               #
7
-- # Design Notes:                                                                                 #
8
-- # * This FPU is based on a multi-cycle architecture and is NOT suited for pipelined operations. #
9
-- # * The hardware design goal was SIZE (performance comes second). All shift operations are done #
10
-- #   using an iterative approach (one bit per clock cycle, no barrel shifters!).                 #
11
-- # * Multiplication (FMUL instruction) will infer DSP blocks (if available).                     #
12
-- # * Subnormal numbers are not supported yet - they are "flushed to zero" before entering the    #
13
-- #   actual FPU core.                                                                            #
14
-- # * Division and sqare root operations (FDIV, FSQRT) and fused multiply-accumulate operations   #
15
-- #   (F[N]MADD) are not supported yet - they will raise an illegal instruction exception.        #
16
-- # * Rounding mode <100> ("round to nearest, ties to max magnitude") is not supported yet.       #
17
-- # * Signaling NaNs (sNaN) will not be generated by the hardware at all. However, if inserted by #
18
-- #   the programmer they are handled correctly.                                                  #
19 52 zero_gravi
-- # ********************************************************************************************* #
20
-- # BSD 3-Clause License                                                                          #
21
-- #                                                                                               #
22 71 zero_gravi
-- # Copyright (c) 2022, Stephan Nolting. All rights reserved.                                     #
23 52 zero_gravi
-- #                                                                                               #
24
-- # Redistribution and use in source and binary forms, with or without modification, are          #
25
-- # permitted provided that the following conditions are met:                                     #
26
-- #                                                                                               #
27
-- # 1. Redistributions of source code must retain the above copyright notice, this list of        #
28
-- #    conditions and the following disclaimer.                                                   #
29
-- #                                                                                               #
30
-- # 2. Redistributions in binary form must reproduce the above copyright notice, this list of     #
31
-- #    conditions and the following disclaimer in the documentation and/or other materials        #
32
-- #    provided with the distribution.                                                            #
33
-- #                                                                                               #
34
-- # 3. Neither the name of the copyright holder nor the names of its contributors may be used to  #
35
-- #    endorse or promote products derived from this software without specific prior written      #
36
-- #    permission.                                                                                #
37
-- #                                                                                               #
38
-- # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS   #
39
-- # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF               #
40
-- # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE    #
41
-- # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,     #
42
-- # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE #
43
-- # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED    #
44
-- # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING     #
45
-- # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED  #
46
-- # OF THE POSSIBILITY OF SUCH DAMAGE.                                                            #
47
-- # ********************************************************************************************* #
48
-- # The NEORV32 Processor - https://github.com/stnolting/neorv32              (c) Stephan Nolting #
49
-- #################################################################################################
50
 
51
library ieee;
52
use ieee.std_logic_1164.all;
53
use ieee.numeric_std.all;
54
 
55
library neorv32;
56
use neorv32.neorv32_package.all;
57
 
58
entity neorv32_cpu_cp_fpu is
59
  port (
60
    -- global control --
61 53 zero_gravi
    clk_i    : in  std_ulogic; -- global clock, rising edge
62
    rstn_i   : in  std_ulogic; -- global reset, low-active, async
63
    ctrl_i   : in  std_ulogic_vector(ctrl_width_c-1 downto 0); -- main control bus
64
    start_i  : in  std_ulogic; -- trigger operation
65 52 zero_gravi
    -- data input --
66 56 zero_gravi
    cmp_i    : in  std_ulogic_vector(1 downto 0); -- comparator status
67 53 zero_gravi
    rs1_i    : in  std_ulogic_vector(data_width_c-1 downto 0); -- rf source 1
68
    rs2_i    : in  std_ulogic_vector(data_width_c-1 downto 0); -- rf source 2
69 52 zero_gravi
    -- result and status --
70 53 zero_gravi
    res_o    : out std_ulogic_vector(data_width_c-1 downto 0); -- operation result
71
    fflags_o : out std_ulogic_vector(4 downto 0); -- exception flags
72
    valid_o  : out std_ulogic -- data output valid
73 52 zero_gravi
  );
74
end neorv32_cpu_cp_fpu;
75
 
76
architecture neorv32_cpu_cp_fpu_rtl of neorv32_cpu_cp_fpu is
77
 
78 55 zero_gravi
  -- FPU core functions --
79
  constant op_class_c  : std_ulogic_vector(2 downto 0) := "000";
80
  constant op_comp_c   : std_ulogic_vector(2 downto 0) := "001";
81
  constant op_i2f_c    : std_ulogic_vector(2 downto 0) := "010";
82
  constant op_f2i_c    : std_ulogic_vector(2 downto 0) := "011";
83
  constant op_sgnj_c   : std_ulogic_vector(2 downto 0) := "100";
84
  constant op_minmax_c : std_ulogic_vector(2 downto 0) := "101";
85
  constant op_addsub_c : std_ulogic_vector(2 downto 0) := "110";
86
  constant op_mul_c    : std_ulogic_vector(2 downto 0) := "111";
87
 
88
  -- float-to-integer unit --
89
  component neorv32_cpu_cp_fpu_f2i
90
  port (
91
    -- control --
92
    clk_i      : in  std_ulogic; -- global clock, rising edge
93
    rstn_i     : in  std_ulogic; -- global reset, low-active, async
94
    start_i    : in  std_ulogic; -- trigger operation
95
    rmode_i    : in  std_ulogic_vector(02 downto 0); -- rounding mode
96
    funct_i    : in  std_ulogic; -- 0=signed, 1=unsigned
97
    -- input --
98
    sign_i     : in  std_ulogic; -- sign
99
    exponent_i : in  std_ulogic_vector(07 downto 0); -- exponent
100
    mantissa_i : in  std_ulogic_vector(22 downto 0); -- mantissa
101
    class_i    : in  std_ulogic_vector(09 downto 0); -- operand class
102
    -- output --
103
    result_o   : out std_ulogic_vector(31 downto 0); -- integer result
104
    flags_o    : out std_ulogic_vector(04 downto 0); -- exception flags
105
    done_o     : out std_ulogic -- operation done
106
  );
107
  end component;
108
 
109
  -- normalizer + rounding unit --
110
  component neorv32_cpu_cp_fpu_normalizer
111
  port (
112
    -- control --
113
    clk_i      : in  std_ulogic; -- global clock, rising edge
114
    rstn_i     : in  std_ulogic; -- global reset, low-active, async
115
    start_i    : in  std_ulogic; -- trigger operation
116
    rmode_i    : in  std_ulogic_vector(02 downto 0); -- rounding mode
117
    funct_i    : in  std_ulogic; -- operating mode (0=norm&round, 1=int-to-float)
118
    -- input --
119
    sign_i     : in  std_ulogic; -- sign
120
    exponent_i : in  std_ulogic_vector(08 downto 0); -- extended exponent
121
    mantissa_i : in  std_ulogic_vector(47 downto 0); -- extended mantissa
122
    integer_i  : in  std_ulogic_vector(31 downto 0); -- integer input
123
    class_i    : in  std_ulogic_vector(09 downto 0); -- input number class
124
    flags_i    : in  std_ulogic_vector(04 downto 0); -- exception flags input
125
    -- output --
126
    result_o   : out std_ulogic_vector(31 downto 0); -- result (float or int)
127
    flags_o    : out std_ulogic_vector(04 downto 0); -- exception flags
128
    done_o     : out std_ulogic -- operation done
129
  );
130
  end component;
131
 
132
  -- commands (one-hot) --
133
  type cmd_t is record
134
    instr_class  : std_ulogic;
135
    instr_sgnj   : std_ulogic;
136
    instr_comp   : std_ulogic;
137
    instr_i2f    : std_ulogic;
138
    instr_f2i    : std_ulogic;
139
    instr_minmax : std_ulogic;
140
    instr_addsub : std_ulogic;
141
    instr_mul    : std_ulogic;
142
    funct        : std_ulogic_vector(2 downto 0);
143
  end record;
144
  signal cmd : cmd_t;
145
  signal funct_ff : std_ulogic_vector(2 downto 0);
146
 
147
  -- co-processor control engine --
148
  type ctrl_state_t is (S_IDLE, S_BUSY);
149
  type ctrl_engine_t is record
150
    state : ctrl_state_t;
151
    start : std_ulogic;
152
    valid : std_ulogic;
153
  end record;
154
  signal ctrl_engine : ctrl_engine_t;
155
 
156
  -- floating-point operands --
157
  type op_data_t  is array (0 to 1) of std_ulogic_vector(31 downto 0);
158
  type op_class_t is array (0 to 1) of std_ulogic_vector(09 downto 0);
159
  type fpu_operands_t is record
160
    rs1       : std_ulogic_vector(31 downto 0); -- operand 1
161
    rs1_class : std_ulogic_vector(09 downto 0); -- operand 1 number class
162
    rs2       : std_ulogic_vector(31 downto 0); -- operand 2
163
    rs2_class : std_ulogic_vector(09 downto 0); -- operand 2 number class
164
    frm       : std_ulogic_vector(02 downto 0); -- rounding mode
165
  end record;
166
  signal op_data      : op_data_t;
167
  signal op_class     : op_class_t;
168
  signal fpu_operands : fpu_operands_t;
169
 
170
  -- floating-point comparator --
171 56 zero_gravi
  signal cmp_ff        : std_ulogic_vector(01 downto 0);
172 55 zero_gravi
  signal comp_equal_ff : std_ulogic;
173
  signal comp_less_ff  : std_ulogic;
174
 
175
  -- functional units interface --
176
  type fu_interface_t is record
177
    result : std_ulogic_vector(31 downto 0);
178
    flags  : std_ulogic_vector(04 downto 0);
179
    start  : std_ulogic;
180
    done   : std_ulogic;
181
  end record;
182
  signal fu_classify    : fu_interface_t;
183
  signal fu_compare     : fu_interface_t;
184
  signal fu_sign_inject : fu_interface_t;
185
  signal fu_min_max     : fu_interface_t;
186
  signal fu_conv_f2i    : fu_interface_t;
187
  signal fu_addsub      : fu_interface_t;
188
  signal fu_mul         : fu_interface_t;
189
  signal fu_core_done   : std_ulogic; -- FU operation completed
190
 
191
  -- integer-to-float --
192
  type fu_i2f_interface_t is record
193
    result : std_ulogic_vector(31 downto 0);
194
    sign   : std_ulogic;
195
    start  : std_ulogic;
196
    done   : std_ulogic;
197
  end record;
198
  signal fu_conv_i2f : fu_i2f_interface_t; -- float result
199
 
200
  -- multiplier unit --
201
  type multiplier_t is record
202
    opa       : unsigned(23 downto 0); -- mantissa A plus hidden one
203
    opb       : unsigned(23 downto 0); -- mantissa B plus hidden one
204
    buf_ff    : unsigned(47 downto 0); -- product buffer
205
    sign      : std_ulogic; -- resulting sign
206
    product   : std_ulogic_vector(47 downto 0); -- product
207
    exp_sum   : std_ulogic_vector(08 downto 0); -- incl 1x overflow/underflow bit
208
    exp_res   : std_ulogic_vector(09 downto 0); -- resulting exponent incl 2x overflow/underflow bit
209
    --
210
    res_class : std_ulogic_vector(09 downto 0);
211
    flags     : std_ulogic_vector(04 downto 0); -- exception flags
212
    --
213
    start     : std_ulogic;
214
    latency   : std_ulogic_vector(02 downto 0); -- unit latency
215
    done      : std_ulogic;
216
  end record;
217
  signal multiplier : multiplier_t;
218
 
219
  -- adder/subtractor unit --
220
  type addsub_t is record
221
    -- input comparison --
222
    exp_comp  : std_ulogic_vector(01 downto 0); -- equal & less
223
    small_exp : std_ulogic_vector(07 downto 0);
224
    small_man : std_ulogic_vector(23 downto 0); -- mantissa + hiden one
225
    large_exp : std_ulogic_vector(07 downto 0);
226
    large_man : std_ulogic_vector(23 downto 0); -- mantissa + hiden one
227
    -- smaller mantissa alginment --
228
    man_sreg  : std_ulogic_vector(23 downto 0); -- mantissa + hidden one
229
    man_g_ext : std_ulogic;
230
    man_r_ext : std_ulogic;
231
    man_s_ext : std_ulogic;
232
    exp_cnt   : std_ulogic_vector(08 downto 0);
233
    -- adder/subtractor stage --
234
    man_comp  : std_ulogic;
235
    man_s     : std_ulogic_vector(26 downto 0); -- mantissa + hiden one + GRS
236
    man_l     : std_ulogic_vector(26 downto 0); -- mantissa + hiden one + GRS
237
    add_stage : std_ulogic_vector(27 downto 0); -- adder result incl. overflow
238
    -- result --
239
    res_sign  : std_ulogic;
240
    res_sum   : std_ulogic_vector(27 downto 0); -- mantissa sum (+1 bit) + GRS bits (for rounding)
241
    res_class : std_ulogic_vector(09 downto 0);
242
    flags     : std_ulogic_vector(04 downto 0); -- exception flags
243
    -- arbitration --
244
    start     : std_ulogic;
245
    latency   : std_ulogic_vector(04 downto 0); -- unit latency
246
    done      : std_ulogic;
247
  end record;
248
  signal addsub : addsub_t;
249
 
250
  -- normalizer interface (normalization & rounding and int-to-float) --
251
  type normalizer_t is record
252
    start     : std_ulogic;
253
    mode      : std_ulogic;
254
    sign      : std_ulogic;
255
    xexp      : std_ulogic_vector(08 downto 0);
256
    xmantissa : std_ulogic_vector(47 downto 0);
257
    result    : std_ulogic_vector(31 downto 0);
258
    class     : std_ulogic_vector(09 downto 0);
259
    flags_in  : std_ulogic_vector(04 downto 0);
260
    flags_out : std_ulogic_vector(04 downto 0);
261
    done      : std_ulogic;
262
  end record;
263
  signal normalizer : normalizer_t;
264
 
265 52 zero_gravi
begin
266
 
267 55 zero_gravi
-- ****************************************************************************************************************************
268
-- Control
269
-- ****************************************************************************************************************************
270
 
271
  -- Instruction Decoding -------------------------------------------------------------------
272 52 zero_gravi
  -- -------------------------------------------------------------------------------------------
273 55 zero_gravi
  -- one-hot re-encoding --
274
  cmd.instr_class  <= '1' when (ctrl_i(ctrl_ir_funct12_11_c downto ctrl_ir_funct12_7_c) = "11100") else '0';
275
  cmd.instr_comp   <= '1' when (ctrl_i(ctrl_ir_funct12_11_c downto ctrl_ir_funct12_7_c) = "10100") else '0';
276
  cmd.instr_i2f    <= '1' when (ctrl_i(ctrl_ir_funct12_11_c downto ctrl_ir_funct12_7_c) = "11010") else '0';
277
  cmd.instr_f2i    <= '1' when (ctrl_i(ctrl_ir_funct12_11_c downto ctrl_ir_funct12_7_c) = "11000") else '0';
278
  cmd.instr_sgnj   <= '1' when (ctrl_i(ctrl_ir_funct12_11_c downto ctrl_ir_funct12_7_c) = "00100") else '0';
279
  cmd.instr_minmax <= '1' when (ctrl_i(ctrl_ir_funct12_11_c downto ctrl_ir_funct12_7_c) = "00101") else '0';
280
  cmd.instr_addsub <= '1' when (ctrl_i(ctrl_ir_funct12_11_c downto ctrl_ir_funct12_8_c) = "0000")  else '0';
281
  cmd.instr_mul    <= '1' when (ctrl_i(ctrl_ir_funct12_11_c downto ctrl_ir_funct12_7_c) = "00010") else '0';
282 52 zero_gravi
 
283 55 zero_gravi
  -- binary re-encoding --
284
  cmd.funct <= op_mul_c     when (cmd.instr_mul    = '1') else
285
               op_addsub_c  when (cmd.instr_addsub = '1') else
286
               op_minmax_c  when (cmd.instr_minmax = '1') else
287
               op_sgnj_c    when (cmd.instr_sgnj   = '1') else
288
               op_f2i_c     when (cmd.instr_f2i    = '1') else
289
               op_i2f_c     when (cmd.instr_i2f    = '1') else
290
               op_comp_c    when (cmd.instr_comp   = '1') else
291
               op_class_c;--when (cmd.instr_class  = '1') else (others => '-');
292 52 zero_gravi
 
293 55 zero_gravi
 
294
  -- Input Operands: Check for subnormal numbers (flush to zero) ----------------------------
295
  -- -------------------------------------------------------------------------------------------
296
  -- Subnormal numbers are not supported and are "flushed to zero"! FIXME / TODO
297
  -- rs1 --
298
  op_data(0)(31)           <= rs1_i(31);
299
  op_data(0)(30 downto 23) <= rs1_i(30 downto 23);
300
  op_data(0)(22 downto 00) <= (others => '0') when (rs1_i(30 downto 23) = "00000000") else rs1_i(22 downto 0); -- flush mantissa to zero if subnormal
301
  -- rs2 --
302
  op_data(1)(31)           <= rs2_i(31);
303
  op_data(1)(30 downto 23) <= rs2_i(30 downto 23);
304
  op_data(1)(22 downto 00) <= (others => '0') when (rs2_i(30 downto 23) = "00000000") else rs2_i(22 downto 0); -- flush mantissa to zero if subnormal
305
 
306
 
307
  -- Number Classifier ----------------------------------------------------------------------
308
  -- -------------------------------------------------------------------------------------------
309
  number_classifier: process(op_data)
310
    variable op_m_all_zero_v, op_e_all_zero_v, op_e_all_one_v       : std_ulogic;
311
    variable op_is_zero_v, op_is_inf_v, op_is_denorm_v, op_is_nan_v : std_ulogic;
312
  begin
313
    for i in 0 to 1 loop -- for rs1 and rs2 inputs
314
      -- check for all-zero/all-one --
315 74 zero_gravi
      op_m_all_zero_v := '0';
316
      op_e_all_zero_v := '0';
317
      op_e_all_one_v  := '0';
318
      if (or_reduce_f(op_data(i)(22 downto 00)) = '0') then
319
        op_m_all_zero_v := '1';
320
      end if;
321
      if (or_reduce_f(op_data(i)(30 downto 23)) = '0') then
322
        op_e_all_zero_v := '1';
323
      end if;
324
      if (and_reduce_f(op_data(i)(30 downto 23)) = '1') then
325
        op_e_all_one_v  := '1';
326
      end if;
327 55 zero_gravi
 
328
      -- check special cases --
329
      op_is_zero_v   := op_e_all_zero_v and      op_m_all_zero_v;  -- zero
330
      op_is_inf_v    := op_e_all_one_v  and      op_m_all_zero_v;  -- infinity
331 56 zero_gravi
      op_is_denorm_v := '0'; -- FIXME / TODO -- op_e_all_zero_v and (not op_m_all_zero_v); -- subnormal
332 55 zero_gravi
      op_is_nan_v    := op_e_all_one_v  and (not op_m_all_zero_v); -- NaN
333
 
334
      -- actual attributes --
335
      op_class(i)(fp_class_neg_inf_c)    <= op_data(i)(31) and op_is_inf_v; -- negative infinity
336
      op_class(i)(fp_class_neg_norm_c)   <= op_data(i)(31) and (not op_is_denorm_v) and (not op_is_nan_v) and (not op_is_inf_v) and (not op_is_zero_v); -- negative normal number
337
      op_class(i)(fp_class_neg_denorm_c) <= op_data(i)(31) and op_is_denorm_v; -- negative subnormal number
338
      op_class(i)(fp_class_neg_zero_c)   <= op_data(i)(31) and op_is_zero_v; -- negative zero
339
      op_class(i)(fp_class_pos_zero_c)   <= (not op_data(i)(31)) and op_is_zero_v; -- positive zero
340
      op_class(i)(fp_class_pos_denorm_c) <= (not op_data(i)(31)) and op_is_denorm_v; -- positive subnormal number
341
      op_class(i)(fp_class_pos_norm_c)   <= (not op_data(i)(31)) and (not op_is_denorm_v) and (not op_is_nan_v) and (not op_is_inf_v) and (not op_is_zero_v); -- positive normal number
342
      op_class(i)(fp_class_pos_inf_c)    <= (not op_data(i)(31)) and op_is_inf_v; -- positive infinity
343
      op_class(i)(fp_class_snan_c)       <= op_is_nan_v and (not op_data(i)(22)); -- signaling NaN
344
      op_class(i)(fp_class_qnan_c)       <= op_is_nan_v and (    op_data(i)(22)); -- quiet NaN
345
    end loop; -- i
346
  end process number_classifier;
347
 
348
 
349
  -- Co-Processor Control Engine ------------------------------------------------------------
350
  -- -------------------------------------------------------------------------------------------
351
  control_engine_fsm: process(rstn_i, clk_i)
352
  begin
353
    if (rstn_i = '0') then
354
      ctrl_engine.state      <= S_IDLE;
355 74 zero_gravi
      ctrl_engine.valid      <= '0';
356 55 zero_gravi
      ctrl_engine.start      <= '0';
357 56 zero_gravi
      fpu_operands.frm       <= (others => def_rst_val_c);
358
      fpu_operands.rs1       <= (others => def_rst_val_c);
359
      fpu_operands.rs1_class <= (others => def_rst_val_c);
360
      fpu_operands.rs2       <= (others => def_rst_val_c);
361
      fpu_operands.rs2_class <= (others => def_rst_val_c);
362
      funct_ff               <= (others => def_rst_val_c);
363
      cmp_ff                 <= (others => def_rst_val_c);
364 55 zero_gravi
    elsif rising_edge(clk_i) then
365
      -- arbiter defaults --
366
      ctrl_engine.valid <= '0';
367
      ctrl_engine.start <= '0';
368
 
369
      -- state machine --
370
      case ctrl_engine.state is
371
 
372
        when S_IDLE => -- waiting for operation trigger
373
        -- ------------------------------------------------------------
374
          funct_ff <= cmd.funct; -- actual operation to execute
375 56 zero_gravi
          cmp_ff   <= cmp_i; -- main ALU comparator
376 55 zero_gravi
          -- rounding mode --
377
          -- TODO / FIXME "round to nearest, ties to max magnitude" (0b100) is not supported yet
378
          if (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_0_c) = "111") then
379 61 zero_gravi
            fpu_operands.frm <= '0' & ctrl_i(ctrl_alu_frm1_c downto ctrl_alu_frm0_c);
380 55 zero_gravi
          else
381
            fpu_operands.frm <= '0' & ctrl_i(ctrl_ir_funct3_1_c downto ctrl_ir_funct3_0_c);
382
          end if;
383
          --
384
          if (start_i = '1') then
385
            -- operand data --
386
            fpu_operands.rs1       <= op_data(0);
387
            fpu_operands.rs1_class <= op_class(0);
388
            fpu_operands.rs2       <= op_data(1);
389
            fpu_operands.rs2_class <= op_class(1);
390
            -- execute! --
391
            ctrl_engine.start <= '1';
392
            ctrl_engine.state <= S_BUSY;
393
          end if;
394
 
395
        when S_BUSY => -- operation in progress (multi-cycle)
396
        -- -----------------------------------------------------------
397 71 zero_gravi
          if (fu_core_done = '1') or (ctrl_i(ctrl_trap_c) = '1') then -- processing done? abort if trap
398 55 zero_gravi
            ctrl_engine.valid <= '1';
399
            ctrl_engine.state <= S_IDLE;
400
          end if;
401
 
402
        when others => -- undefined
403
        -- ------------------------------------------------------------
404
          ctrl_engine.state <= S_IDLE;
405
 
406
      end case;
407
    end if;
408
  end process control_engine_fsm;
409
 
410
  -- operation done / valid output --
411
  valid_o <= ctrl_engine.valid;
412
 
413
 
414
  -- Functional Unit Interface (operation-start trigger) ------------------------------------
415
  -- -------------------------------------------------------------------------------------------
416
  fu_classify.start    <= ctrl_engine.start and cmd.instr_class;
417
  fu_compare.start     <= ctrl_engine.start and cmd.instr_comp;
418
  fu_sign_inject.start <= ctrl_engine.start and cmd.instr_sgnj;
419
  fu_min_max.start     <= ctrl_engine.start and cmd.instr_minmax;
420
  fu_conv_i2f.start    <= ctrl_engine.start and cmd.instr_i2f;
421
  fu_conv_f2i.start    <= ctrl_engine.start and cmd.instr_f2i;
422
  fu_addsub.start      <= ctrl_engine.start and cmd.instr_addsub;
423
  fu_mul.start         <= ctrl_engine.start and cmd.instr_mul;
424
 
425
 
426
-- ****************************************************************************************************************************
427
-- FPU Core - Functional Units
428
-- ****************************************************************************************************************************
429
 
430
  -- Number Classifier (FCLASS) -------------------------------------------------------------
431
  -- -------------------------------------------------------------------------------------------
432
  fu_classify.flags <= (others => '0'); -- does not generate flags at all
433
  fu_classify.result(31 downto 10) <= (others => '0');
434
  fu_classify.result(09 downto 00) <= fpu_operands.rs1_class;
435
  fu_classify.done <= fu_classify.start;
436
 
437
 
438
  -- Floating-Point Comparator --------------------------------------------------------------
439
  -- -------------------------------------------------------------------------------------------
440 56 zero_gravi
  float_comparator: process(rstn_i, clk_i)
441 55 zero_gravi
    variable cond_v : std_ulogic_vector(1 downto 0);
442
  begin
443 56 zero_gravi
    if (rstn_i = '0') then
444
      comp_equal_ff   <= def_rst_val_c;
445
      comp_less_ff    <= def_rst_val_c;
446
      fu_compare.done <= def_rst_val_c;
447
      fu_min_max.done <= def_rst_val_c;
448
    elsif rising_edge(clk_i) then
449 55 zero_gravi
      -- equal --
450
      if ((fpu_operands.rs1_class(fp_class_pos_inf_c)   = '1') and (fpu_operands.rs2_class(fp_class_pos_inf_c) = '1')) or -- +inf == +inf
451
         ((fpu_operands.rs1_class(fp_class_neg_inf_c)   = '1') and (fpu_operands.rs2_class(fp_class_neg_inf_c) = '1')) or -- -inf == -inf
452
         (((fpu_operands.rs1_class(fp_class_pos_zero_c) = '1') or (fpu_operands.rs1_class(fp_class_neg_zero_c) = '1')) and
453
          ((fpu_operands.rs2_class(fp_class_pos_zero_c) = '1') or (fpu_operands.rs2_class(fp_class_neg_zero_c) = '1'))) or  -- +/-zero == +/-zero
454 56 zero_gravi
         (cmp_ff(cmp_equal_c) = '1') then -- identical in every way (comparator result from main ALU)
455 55 zero_gravi
        comp_equal_ff <= '1';
456
      else
457
        comp_equal_ff <= '0';
458
      end if;
459
 
460
      -- less than --
461
      if ((fpu_operands.rs1_class(fp_class_pos_inf_c)  = '1') and (fpu_operands.rs2_class(fp_class_pos_inf_c) = '1')) or -- +inf !< +inf
462
         ((fpu_operands.rs1_class(fp_class_neg_inf_c)  = '1') and (fpu_operands.rs2_class(fp_class_neg_inf_c) = '1')) or -- -inf !< -inf
463
         (((fpu_operands.rs1_class(fp_class_pos_zero_c) = '1') or (fpu_operands.rs1_class(fp_class_neg_zero_c) = '1')) and
464
          ((fpu_operands.rs2_class(fp_class_pos_zero_c) = '1') or (fpu_operands.rs2_class(fp_class_neg_zero_c) = '1'))) then  -- +/-zero !< +/-zero
465
        comp_less_ff <= '0';
466
      else
467
        cond_v := fpu_operands.rs1(31) & fpu_operands.rs2(31);
468
        case cond_v is
469
          when "10"   => comp_less_ff <= '1'; -- rs1 negative, rs2 positive
470
          when "01"   => comp_less_ff <= '0'; -- rs1 positive, rs2 negative
471 56 zero_gravi
          when "00"   => comp_less_ff <= cmp_ff(cmp_less_c); -- both positive (comparator result from main ALU)
472
          when "11"   => comp_less_ff <= not cmp_ff(cmp_less_c); -- both negative (comparator result from main ALU)
473 55 zero_gravi
          when others => comp_less_ff <= '0'; -- undefined
474
        end case;
475
      end if;
476
 
477
      -- comparator latency --
478
      fu_compare.done <= fu_compare.start; -- for actual comparison operation
479
      fu_min_max.done <= fu_min_max.start; -- for min/max operations
480
    end if;
481
  end process float_comparator;
482
 
483
 
484
  -- Comparison (FEQ/FLT/FLE) ---------------------------------------------------------------
485
  -- -------------------------------------------------------------------------------------------
486
  float_comparison: process(fpu_operands, ctrl_i, comp_equal_ff, comp_less_ff)
487
    variable snan_v : std_ulogic; -- at least one input is sNaN
488
    variable qnan_v : std_ulogic; -- at least one input is qNaN
489
  begin
490
    -- check for NaN --
491
    snan_v := fpu_operands.rs1_class(fp_class_snan_c) or fpu_operands.rs2_class(fp_class_snan_c);
492
    qnan_v := fpu_operands.rs1_class(fp_class_qnan_c) or fpu_operands.rs2_class(fp_class_qnan_c);
493
 
494
    -- condition evaluation --
495
    fu_compare.result <= (others => '0');
496
    case ctrl_i(ctrl_ir_funct3_1_c downto ctrl_ir_funct3_0_c) is
497
      when "00" => -- FLE: less than or equal
498
        fu_compare.result(0) <= (comp_less_ff or comp_equal_ff) and (not (snan_v or qnan_v)); -- result is zero if either input is NaN
499
      when "01" => -- FLT: less than
500
        fu_compare.result(0) <= comp_less_ff and (not (snan_v or qnan_v)); -- result is zero if either input is NaN
501
      when "10" => -- FEQ: equal
502
        fu_compare.result(0) <= comp_equal_ff and (not (snan_v or qnan_v)); -- result is zero if either input is NaN
503
      when others => -- undefined
504
        fu_compare.result(0) <= '0';
505
    end case;
506
  end process float_comparison;
507
 
508
  -- latency --
509
  -- -> done in "float_comparator"
510
 
511
  -- exceptions --
512
  fu_compare.flags <= (others => '0'); -- does not generate exceptions here, but normalizer can generate exceptions
513
 
514
 
515
  -- Min/Max Select (FMIN/FMAX) -------------------------------------------------------------
516
  -- -------------------------------------------------------------------------------------------
517 61 zero_gravi
  min_max_select: process(fpu_operands, comp_less_ff, fu_compare, ctrl_i)
518 55 zero_gravi
    variable cond_v : std_ulogic_vector(2 downto 0);
519
  begin
520 73 zero_gravi
    -- comparison result - check for special cases: -0 is less than +0
521 55 zero_gravi
    if ((fpu_operands.rs1_class(fp_class_neg_zero_c) = '1') and (fpu_operands.rs2_class(fp_class_pos_zero_c) = '1')) then
522
      cond_v(0) := ctrl_i(ctrl_ir_funct3_0_c);
523
    elsif ((fpu_operands.rs1_class(fp_class_pos_zero_c) = '1') and (fpu_operands.rs2_class(fp_class_neg_zero_c) = '1')) then
524
      cond_v(0) := not ctrl_i(ctrl_ir_funct3_0_c);
525
    else -- "normal= comparison
526
      cond_v(0) := comp_less_ff xnor ctrl_i(ctrl_ir_funct3_0_c); -- min/max select
527
    end if;
528
 
529 73 zero_gravi
    -- number NaN check --
530 55 zero_gravi
    cond_v(2) := fpu_operands.rs1_class(fp_class_snan_c) or fpu_operands.rs1_class(fp_class_qnan_c);
531
    cond_v(1) := fpu_operands.rs2_class(fp_class_snan_c) or fpu_operands.rs2_class(fp_class_qnan_c);
532
 
533
    -- data output --
534
    case cond_v is
535
      when "000"         => fu_min_max.result <= fpu_operands.rs1;
536
      when "001"         => fu_min_max.result <= fpu_operands.rs2;
537
      when "010" | "011" => fu_min_max.result <= fpu_operands.rs1; -- if one input is NaN output the non-NaN one
538
      when "100" | "101" => fu_min_max.result <= fpu_operands.rs2; -- if one input is NaN output the non-NaN one
539
      when others        => fu_min_max.result <= fp_single_qnan_c; -- output quiet NaN if both inputs are NaN
540
    end case;
541
  end process min_max_select;
542
 
543
  -- latency --
544
  -- -> done in "float_comparator"
545
 
546
  -- exceptions --
547
  fu_min_max.flags <= (others => '0'); -- does not generate exceptions here, but normalizer can generate exceptions
548
 
549
 
550
  -- Convert: Float to [unsigned] Integer (FCVT.S.W) ----------------------------------------
551
  -- -------------------------------------------------------------------------------------------
552
  neorv32_cpu_cp_fpu_f2i_inst: neorv32_cpu_cp_fpu_f2i
553
  port map (
554
    -- control --
555
    clk_i      => clk_i,                          -- global clock, rising edge
556
    rstn_i     => rstn_i,                         -- global reset, low-active, async
557
    start_i    => fu_conv_f2i.start,              -- trigger operation
558
    rmode_i    => fpu_operands.frm,               -- rounding mode
559
    funct_i    => ctrl_i(ctrl_ir_funct12_0_c),    -- 0=signed, 1=unsigned
560
    -- input --
561
    sign_i     => fpu_operands.rs1(31),           -- sign
562
    exponent_i => fpu_operands.rs1(30 downto 23), -- exponent
563
    mantissa_i => fpu_operands.rs1(22 downto 00), -- mantissa
564
    class_i    => fpu_operands.rs1_class,         -- operand class
565
    -- output --
566
    result_o   => fu_conv_f2i.result,             -- integer result
567
    flags_o    => fu_conv_f2i.flags,              -- exception flags
568
    done_o     => fu_conv_f2i.done                -- operation done
569
  );
570
 
571
 
572
  -- Sign-Injection (FSGNJ) -----------------------------------------------------------------
573
  -- -------------------------------------------------------------------------------------------
574
  sign_injector: process(ctrl_i, fpu_operands)
575
  begin
576
    case ctrl_i(ctrl_ir_funct3_1_c downto ctrl_ir_funct3_0_c) is
577
      when "00"   => fu_sign_inject.result(31) <= fpu_operands.rs2(31); -- FSGNJ
578
      when "01"   => fu_sign_inject.result(31) <= not fpu_operands.rs2(31); -- FSGNJN
579
      when "10"   => fu_sign_inject.result(31) <= fpu_operands.rs1(31) xor fpu_operands.rs2(31); -- FSGNJX
580
      when others => fu_sign_inject.result(31) <= fpu_operands.rs2(31); -- undefined
581
    end case;
582
    fu_sign_inject.result(30 downto 0) <= fpu_operands.rs1(30 downto 0);
583
    fu_sign_inject.flags <= (others => '0'); -- does not generate flags
584
  end process sign_injector;
585
 
586
  -- latency --
587
  fu_sign_inject.done <= fu_sign_inject.start;
588
 
589
 
590
  -- Convert: [unsigned] Integer to Float (FCVT.W.S) ----------------------------------------
591
  -- -------------------------------------------------------------------------------------------
592 56 zero_gravi
  convert_i2f: process(rstn_i, clk_i)
593 55 zero_gravi
  begin
594
    -- this process only computes the absolute input value
595
    -- the actual conversion is done by the normalizer
596 56 zero_gravi
    if (rstn_i = '0') then
597
      fu_conv_i2f.result <= (others => def_rst_val_c);
598
      fu_conv_i2f.sign   <= def_rst_val_c;
599
    elsif rising_edge(clk_i) then
600 55 zero_gravi
      if (ctrl_i(ctrl_ir_funct12_0_c) = '0') and (rs1_i(31) = '1') then -- convert signed integer
601
        fu_conv_i2f.result <= std_ulogic_vector(0 - unsigned(rs1_i));
602
        fu_conv_i2f.sign   <= rs1_i(31); -- original sign
603
      else -- convert unsigned integer
604
        fu_conv_i2f.result <= rs1_i;
605
        fu_conv_i2f.sign   <= '0';
606
      end if;
607
      fu_conv_i2f.done <= fu_conv_i2f.start; -- actual conversion is done by the normalizer unit
608
    end if;
609
  end process convert_i2f;
610
 
611
 
612
  -- Multiplier Core (FMUL) -----------------------------------------------------------------
613
  -- -------------------------------------------------------------------------------------------
614 56 zero_gravi
  multiplier_core: process(rstn_i, clk_i)
615
  begin
616
    if (rstn_i = '0') then
617
      multiplier.opa                <= (others => '-'); -- these might be DSP regs!
618
      multiplier.opb                <= (others => '-'); -- these might be DSP regs!
619
      multiplier.buf_ff             <= (others => '-'); -- these might be DSP regs!
620
      multiplier.product            <= (others => '-'); -- these might be DSP regs!
621
      multiplier.sign               <= def_rst_val_c;
622
      multiplier.exp_res            <= (others => def_rst_val_c);
623
      multiplier.flags(fp_exc_of_c) <= def_rst_val_c;
624
      multiplier.flags(fp_exc_uf_c) <= def_rst_val_c;
625
      multiplier.flags(fp_exc_nv_c) <= def_rst_val_c;
626
      multiplier.latency            <= (others => def_rst_val_c);
627
    elsif rising_edge(clk_i) then
628 55 zero_gravi
      -- multiplier core --
629
      if (multiplier.start = '1') then -- FIXME / TODO remove buffer?
630
        multiplier.opa <= unsigned('1' & fpu_operands.rs1(22 downto 0)); -- append hidden one
631 56 zero_gravi
        multiplier.opb <= unsigned('1' & fpu_operands.rs2(22 downto 0)); -- append hidden one
632 55 zero_gravi
      end if;
633
      multiplier.buf_ff  <= multiplier.opa * multiplier.opb;
634
      multiplier.product <= std_ulogic_vector(multiplier.buf_ff(47 downto 0)); -- let the register balancing do the magic here
635
      multiplier.sign    <= fpu_operands.rs1(31) xor fpu_operands.rs2(31); -- resulting sign
636
 
637
      -- exponent computation --
638
      multiplier.exp_res <= std_ulogic_vector(unsigned('0' & multiplier.exp_sum) - 127);
639
      if (multiplier.exp_res(multiplier.exp_res'left) = '1') then -- underflow (exp_res is "negative")
640
        multiplier.flags(fp_exc_of_c) <= '0';
641
        multiplier.flags(fp_exc_uf_c) <= '1';
642
      elsif (multiplier.exp_res(multiplier.exp_res'left-1) = '1') then -- overflow
643
        multiplier.flags(fp_exc_of_c) <= '1';
644
        multiplier.flags(fp_exc_uf_c) <= '0';
645
      else
646
        multiplier.flags(fp_exc_of_c) <= '0';
647
        multiplier.flags(fp_exc_uf_c) <= '0';
648
      end if;
649
 
650
      -- invalid operation --
651
      multiplier.flags(fp_exc_nv_c) <=
652
        ((fpu_operands.rs1_class(fp_class_pos_zero_c) or fpu_operands.rs1_class(fp_class_neg_zero_c)) and
653
         (fpu_operands.rs2_class(fp_class_pos_inf_c)  or fpu_operands.rs2_class(fp_class_neg_inf_c))) or -- mul(+/-zero, +/-inf)
654
        ((fpu_operands.rs1_class(fp_class_pos_inf_c)  or fpu_operands.rs1_class(fp_class_neg_inf_c)) and
655
         (fpu_operands.rs2_class(fp_class_pos_zero_c) or fpu_operands.rs2_class(fp_class_neg_zero_c))); -- mul(+/-inf, +/-zero)
656
 
657
      -- latency shift register --
658
      multiplier.latency <= multiplier.latency(multiplier.latency'left-1 downto 0) & multiplier.start;
659
    end if;
660
  end process multiplier_core;
661
 
662
  -- exponent sum --
663
  multiplier.exp_sum <= std_ulogic_vector(unsigned('0' & fpu_operands.rs1(30 downto 23)) + unsigned('0' & fpu_operands.rs2(30 downto 23)));
664
 
665
  -- latency --
666
  multiplier.start <= fu_mul.start;
667
  multiplier.done  <= multiplier.latency(multiplier.latency'left);
668
  fu_mul.done      <= multiplier.done;
669
 
670
  -- unused exception flags --
671
  multiplier.flags(fp_exc_dz_c) <= '0'; -- division by zero: not possible here
672
  multiplier.flags(fp_exc_nx_c) <= '0'; -- inexcat: not possible here
673
 
674
 
675
  -- result class -- 
676 56 zero_gravi
  multiplier_class_core: process(rstn_i, clk_i)
677 55 zero_gravi
    variable a_pos_norm_v, a_neg_norm_v, b_pos_norm_v, b_neg_norm_v : std_ulogic;
678
    variable a_pos_subn_v, a_neg_subn_v, b_pos_subn_v, b_neg_subn_v : std_ulogic;
679
    variable a_pos_zero_v, a_neg_zero_v, b_pos_zero_v, b_neg_zero_v : std_ulogic;
680
    variable a_pos_inf_v,  a_neg_inf_v,  b_pos_inf_v,  b_neg_inf_v  : std_ulogic;
681
    variable a_snan_v,     a_qnan_v,     b_snan_v,     b_qnan_v     : std_ulogic;
682
  begin
683 56 zero_gravi
    if (rstn_i = '0') then
684 74 zero_gravi
      multiplier.res_class <= (others => def_rst_val_c);
685 56 zero_gravi
    elsif rising_edge(clk_i) then
686 55 zero_gravi
      -- minions --
687
      a_pos_norm_v := fpu_operands.rs1_class(fp_class_pos_norm_c);    b_pos_norm_v := fpu_operands.rs2_class(fp_class_pos_norm_c);
688
      a_neg_norm_v := fpu_operands.rs1_class(fp_class_neg_norm_c);    b_neg_norm_v := fpu_operands.rs2_class(fp_class_neg_norm_c);
689
      a_pos_subn_v := fpu_operands.rs1_class(fp_class_pos_denorm_c);  b_pos_subn_v := fpu_operands.rs2_class(fp_class_pos_denorm_c);
690
      a_neg_subn_v := fpu_operands.rs1_class(fp_class_neg_denorm_c);  b_neg_subn_v := fpu_operands.rs2_class(fp_class_neg_denorm_c);
691
      a_pos_zero_v := fpu_operands.rs1_class(fp_class_pos_zero_c);    b_pos_zero_v := fpu_operands.rs2_class(fp_class_pos_zero_c);
692
      a_neg_zero_v := fpu_operands.rs1_class(fp_class_neg_zero_c);    b_neg_zero_v := fpu_operands.rs2_class(fp_class_neg_zero_c);
693
      a_pos_inf_v  := fpu_operands.rs1_class(fp_class_pos_inf_c);     b_pos_inf_v  := fpu_operands.rs2_class(fp_class_pos_inf_c);
694
      a_neg_inf_v  := fpu_operands.rs1_class(fp_class_neg_inf_c);     b_neg_inf_v  := fpu_operands.rs2_class(fp_class_neg_inf_c);
695
      a_snan_v     := fpu_operands.rs1_class(fp_class_snan_c);        b_snan_v     := fpu_operands.rs2_class(fp_class_snan_c);
696
      a_qnan_v     := fpu_operands.rs1_class(fp_class_qnan_c);        b_qnan_v     := fpu_operands.rs2_class(fp_class_qnan_c);
697
 
698
      -- +normal --
699
      multiplier.res_class(fp_class_pos_norm_c) <=
700
        (a_pos_norm_v and b_pos_norm_v) or -- +norm * +norm
701
        (a_neg_norm_v and b_neg_norm_v);   -- -norm * -norm
702
      -- -normal --
703
      multiplier.res_class(fp_class_neg_norm_c) <=
704
        (a_pos_norm_v and b_neg_norm_v) or -- +norm * -norm
705
        (a_neg_norm_v and b_pos_norm_v);   -- -norm * +norm
706
 
707
      -- +infinity --
708
      multiplier.res_class(fp_class_pos_inf_c) <=
709
        (a_pos_inf_v  and b_pos_inf_v)  or -- +inf    * +inf
710
        (a_neg_inf_v  and b_neg_inf_v)  or -- -inf    * -inf
711
        (a_pos_norm_v and b_pos_inf_v)  or -- +norm   * +inf
712
        (a_pos_inf_v  and b_pos_norm_v) or -- +inf    * +norm
713
        (a_neg_norm_v and b_neg_inf_v)  or -- -norm   * -inf
714
        (a_neg_inf_v  and b_neg_norm_v) or -- -inf    * -norm
715
        (a_neg_subn_v and b_neg_inf_v)  or -- -denorm * -inf
716
        (a_neg_inf_v  and b_neg_subn_v);   -- -inf    * -denorm
717
      -- -infinity --
718
      multiplier.res_class(fp_class_neg_inf_c) <=
719
        (a_pos_inf_v  and b_neg_inf_v)  or -- +inf    * -inf
720
        (a_neg_inf_v  and b_pos_inf_v)  or -- -inf    * +inf
721
        (a_pos_norm_v and b_neg_inf_v)  or -- +norm   * -inf
722
        (a_neg_inf_v  and b_pos_norm_v) or -- -inf    * +norm
723
        (a_neg_norm_v and b_pos_inf_v)  or -- -norm   * +inf
724
        (a_pos_inf_v  and b_neg_norm_v) or -- +inf    * -norm
725
        (a_pos_subn_v and b_neg_inf_v)  or -- +denorm * -inf
726
        (a_neg_inf_v  and b_pos_subn_v) or -- -inf    * +de-norm
727
        (a_neg_subn_v and b_pos_inf_v)  or -- -denorm * +inf
728
        (a_pos_inf_v  and b_neg_subn_v);   -- +inf    * -de-norm
729
 
730
      -- +zero --
731
      multiplier.res_class(fp_class_pos_zero_c) <=
732
        (a_pos_zero_v and b_pos_zero_v) or -- +zero   * +zero
733
        (a_pos_zero_v and b_pos_norm_v) or -- +zero   * +norm
734
        (a_pos_zero_v and b_pos_subn_v) or -- +zero   * +denorm
735
        (a_neg_zero_v and b_neg_zero_v) or -- -zero   * -zero
736
        (a_neg_zero_v and b_neg_norm_v) or -- -zero   * -norm
737
        (a_neg_zero_v and b_neg_subn_v) or -- -zero   * -denorm
738
        (a_pos_norm_v and b_pos_zero_v) or -- +norm   * +zero
739
        (a_pos_subn_v and b_pos_zero_v) or -- +denorm * +zero
740
        (a_neg_norm_v and b_neg_zero_v) or -- -norm   * -zero
741
        (a_neg_subn_v and b_neg_zero_v);   -- -denorm * -zero
742
 
743
      -- -zero --
744
      multiplier.res_class(fp_class_neg_zero_c) <=
745
        (a_pos_zero_v and b_neg_zero_v) or -- +zero   * -zero
746
        (a_pos_zero_v and b_neg_norm_v) or -- +zero   * -norm
747
        (a_pos_zero_v and b_neg_subn_v) or -- +zero   * -denorm
748
        (a_neg_zero_v and b_pos_zero_v) or -- -zero   * +zero
749
        (a_neg_zero_v and b_pos_norm_v) or -- -zero   * +norm
750
        (a_neg_zero_v and b_pos_subn_v) or -- -zero   * +denorm
751
        (a_neg_norm_v and b_pos_zero_v) or -- -norm   * +zero
752
        (a_neg_subn_v and b_pos_zero_v) or -- -denorm * +zero
753
        (a_pos_norm_v and b_neg_zero_v) or -- +norm   * -zero
754
        (a_pos_subn_v and b_neg_zero_v);   -- +denorm * -zero
755
 
756
      -- sNaN --
757
      multiplier.res_class(fp_class_snan_c) <= (a_snan_v or b_snan_v); -- any input is sNaN
758
      -- qNaN --
759
      multiplier.res_class(fp_class_qnan_c) <=
760
        (a_snan_v or b_snan_v) or -- any input is sNaN
761
        (a_qnan_v or b_qnan_v) or -- nay input is qNaN
762
        ((a_pos_inf_v  or a_neg_inf_v)  and (b_pos_zero_v or b_neg_zero_v)) or -- +/-inf * +/-zero
763
        ((a_pos_zero_v or a_neg_zero_v) and (b_pos_inf_v  or b_neg_inf_v));    -- +/-zero * +/-inf
764 74 zero_gravi
 
765
      -- subnormal result --
766
      multiplier.res_class(fp_class_pos_denorm_c) <= '0'; -- is evaluated by the normalizer
767
      multiplier.res_class(fp_class_neg_denorm_c) <= '0'; -- is evaluated by the normalizer
768 55 zero_gravi
    end if;
769
  end process multiplier_class_core;
770
 
771
  -- unused --
772
  fu_mul.result <= (others => '0');
773
  fu_mul.flags  <= (others => '0');
774
 
775
 
776
  -- Adder/Subtractor Core (FADD, FSUB) -----------------------------------------------------
777
  -- -------------------------------------------------------------------------------------------
778 56 zero_gravi
  adder_subtractor_core: process(rstn_i, clk_i)
779
  begin
780
    if (rstn_i = '0') then
781
      addsub.latency   <= (others => def_rst_val_c);
782
      addsub.exp_comp  <= (others => def_rst_val_c);
783
      addsub.man_sreg  <= (others => def_rst_val_c);
784
      addsub.exp_cnt   <= (others => def_rst_val_c);
785
      addsub.man_g_ext <= def_rst_val_c;
786
      addsub.man_r_ext <= def_rst_val_c;
787
      addsub.man_s_ext <= def_rst_val_c;
788
      addsub.man_comp  <= def_rst_val_c;
789
      addsub.add_stage <= (others => def_rst_val_c);
790
      addsub.res_sign  <= def_rst_val_c;
791
      addsub.flags(fp_exc_nv_c) <= def_rst_val_c;
792
    elsif rising_edge(clk_i) then
793 55 zero_gravi
      -- arbitration / latency --
794
      if (ctrl_engine.state = S_IDLE) then -- hacky "reset"
795
        addsub.latency <= (others => '0');
796
      else
797
        addsub.latency(0) <= addsub.start; -- input comparator delay
798
        if (addsub.latency(0) = '1') then
799
          addsub.latency(1) <= '1';
800
          addsub.latency(2) <= '0';
801
        elsif (addsub.exp_cnt(7 downto 0) = addsub.large_exp) then -- radix point not yet aligned
802
          addsub.latency(1) <= '0';
803
          addsub.latency(2) <= addsub.latency(1) and (not addsub.latency(0)); -- "shift done"
804
        end if;
805
        addsub.latency(3) <= addsub.latency(2); -- adder stage
806
        addsub.latency(4) <= addsub.latency(3); -- final stage
807
      end if;
808
 
809
      -- exponent check: find smaller number (radix-offset-only) --
810
      if (unsigned(fpu_operands.rs1(30 downto 23)) < unsigned(fpu_operands.rs2(30 downto 23))) then
811
        addsub.exp_comp(0) <= '1'; -- rs1 < rs2
812
      else
813
        addsub.exp_comp(0) <= '0'; -- rs1 >= rs2
814
      end if;
815
      if (unsigned(fpu_operands.rs1(30 downto 23)) = unsigned(fpu_operands.rs2(30 downto 23))) then
816
        addsub.exp_comp(1) <= '1'; -- rs1 == rs2
817
      else -- rs1 != rs2
818
        addsub.exp_comp(1) <= '0';
819
      end if;
820
 
821
      -- shift right small mantissa to align radix point --
822
      if (addsub.latency(0) = '1') then
823
        if ((fpu_operands.rs1_class(fp_class_pos_zero_c) or fpu_operands.rs2_class(fp_class_pos_zero_c) or
824
             fpu_operands.rs1_class(fp_class_neg_zero_c) or fpu_operands.rs2_class(fp_class_neg_zero_c)) = '0') then -- no input is zero
825
          addsub.man_sreg <= addsub.small_man;
826
        else
827
          addsub.man_sreg <= (others => '0');
828
        end if;
829
        addsub.exp_cnt   <= '0' & addsub.small_exp;
830
        addsub.man_g_ext <= '0';
831
        addsub.man_r_ext <= '0';
832
        addsub.man_s_ext <= '0';
833
      elsif (addsub.exp_cnt(7 downto 0) /= addsub.large_exp) then -- shift right until same magnitude
834
        addsub.man_sreg  <= '0' & addsub.man_sreg(addsub.man_sreg'left downto 1);
835
        addsub.man_g_ext <= addsub.man_sreg(0);
836
        addsub.man_r_ext <= addsub.man_g_ext;
837
        addsub.man_s_ext <= addsub.man_s_ext or addsub.man_r_ext; -- sticky bit
838
        addsub.exp_cnt   <= std_ulogic_vector(unsigned(addsub.exp_cnt) + 1);
839
      end if;
840
 
841
      -- mantissa check: find smaller number (magnitude-only) --
842
      if (unsigned(addsub.man_sreg) <= unsigned(addsub.large_man)) then
843
        addsub.man_comp <= '1';
844
      else
845
        addsub.man_comp <= '0';
846
      end if;
847
 
848
      -- actual addition/subtraction (incl. overflow) --
849
      if ((ctrl_i(ctrl_ir_funct12_7_c) xor (fpu_operands.rs1(31) xor fpu_operands.rs2(31))) = '0') then -- add
850
        addsub.add_stage <= std_ulogic_vector(unsigned('0' & addsub.man_l) + unsigned('0' & addsub.man_s));
851
      else -- sub
852
        addsub.add_stage <= std_ulogic_vector(unsigned('0' & addsub.man_l) - unsigned('0' & addsub.man_s));
853
      end if;
854
 
855
      -- result sign --
856
      if (ctrl_i(ctrl_ir_funct12_7_c) = '0') then -- add
857
        if (fpu_operands.rs1(31) = fpu_operands.rs2(31)) then -- identical signs
858
          addsub.res_sign <= fpu_operands.rs1(31);
859
        else -- different signs
860
          if (addsub.exp_comp(1) = '1') then -- exp are equal (also check relation of mantissas)
861
            addsub.res_sign <= fpu_operands.rs1(31) xor (not addsub.man_comp);
862
          else
863
            addsub.res_sign <= fpu_operands.rs1(31) xor addsub.exp_comp(0);
864
          end if;
865
        end if;
866
      else -- sub
867
        if (fpu_operands.rs1(31) = fpu_operands.rs2(31)) then -- identical signs
868
          if (addsub.exp_comp(1) = '1') then -- exp are equal (also check relation of mantissas)
869
            addsub.res_sign <= fpu_operands.rs1(31) xor (not addsub.man_comp);
870
          else
871
            addsub.res_sign <= fpu_operands.rs1(31) xor addsub.exp_comp(0);
872
          end if;
873
        else -- different signs
874
          addsub.res_sign <= fpu_operands.rs1(31);
875
        end if;
876
      end if;
877
 
878
      -- exception flags --
879
      addsub.flags(fp_exc_nv_c) <= ((fpu_operands.rs1_class(fp_class_pos_inf_c) or fpu_operands.rs1_class(fp_class_neg_inf_c)) and
880
                                    (fpu_operands.rs2_class(fp_class_pos_inf_c) or fpu_operands.rs2_class(fp_class_neg_inf_c))); -- +/-inf +/- +/-inf
881
    end if;
882
  end process adder_subtractor_core;
883
 
884
  -- exceptions - unused -- 
885
  addsub.flags(fp_exc_dz_c) <= '0'; -- division by zero -> not possible
886
  addsub.flags(fp_exc_of_c) <= '0'; -- not possible here (but may occur in normalizer)
887
  addsub.flags(fp_exc_uf_c) <= '0'; -- not possible here (but may occur in normalizer)
888
  addsub.flags(fp_exc_nx_c) <= '0'; -- not possible here (but may occur in normalizer)
889
 
890
  -- exponent check: find smaller number (magnitude-only) --
891
  addsub.small_exp <=        fpu_operands.rs1(30 downto 23)  when (addsub.exp_comp(0) = '1') else        fpu_operands.rs2(30 downto 23);
892
  addsub.large_exp <=        fpu_operands.rs2(30 downto 23)  when (addsub.exp_comp(0) = '1') else        fpu_operands.rs1(30 downto 23);
893
  addsub.small_man <= ('1' & fpu_operands.rs1(22 downto 00)) when (addsub.exp_comp(0) = '1') else ('1' & fpu_operands.rs2(22 downto 00));
894
  addsub.large_man <= ('1' & fpu_operands.rs2(22 downto 00)) when (addsub.exp_comp(0) = '1') else ('1' & fpu_operands.rs1(22 downto 00));
895
 
896
  -- mantissa check: find smaller number (magnitude-only) --
897
  addsub.man_s <= (addsub.man_sreg & addsub.man_g_ext & addsub.man_r_ext & addsub.man_s_ext) when (addsub.man_comp = '1') else (addsub.large_man & "000");
898
  addsub.man_l <= (addsub.large_man & "000") when (addsub.man_comp = '1') else (addsub.man_sreg & addsub.man_g_ext & addsub.man_r_ext & addsub.man_s_ext);
899
 
900
  -- latency --
901
  addsub.start   <= fu_addsub.start;
902
  addsub.done    <= addsub.latency(addsub.latency'left);
903
  fu_addsub.done <= addsub.done;
904
 
905
  -- mantissa result --
906
  addsub.res_sum <= addsub.add_stage(27 downto 0);
907
 
908
 
909
  -- result class -- 
910 56 zero_gravi
  adder_subtractor_class_core: process(rstn_i, clk_i)
911 55 zero_gravi
    variable a_pos_norm_v, a_neg_norm_v, b_pos_norm_v, b_neg_norm_v : std_ulogic;
912
    variable a_pos_subn_v, a_neg_subn_v, b_pos_subn_v, b_neg_subn_v : std_ulogic;
913
    variable a_pos_zero_v, a_neg_zero_v, b_pos_zero_v, b_neg_zero_v : std_ulogic;
914
    variable a_pos_inf_v,  a_neg_inf_v,  b_pos_inf_v,  b_neg_inf_v  : std_ulogic;
915
    variable a_snan_v,     a_qnan_v,     b_snan_v,     b_qnan_v     : std_ulogic;
916
  begin
917 56 zero_gravi
    if (rstn_i = '0') then
918 74 zero_gravi
      addsub.res_class <= (others => def_rst_val_c);
919 56 zero_gravi
    elsif rising_edge(clk_i) then
920 55 zero_gravi
      -- minions --
921
      a_pos_norm_v := fpu_operands.rs1_class(fp_class_pos_norm_c);    b_pos_norm_v := fpu_operands.rs2_class(fp_class_pos_norm_c);
922
      a_neg_norm_v := fpu_operands.rs1_class(fp_class_neg_norm_c);    b_neg_norm_v := fpu_operands.rs2_class(fp_class_neg_norm_c);
923
      a_pos_subn_v := fpu_operands.rs1_class(fp_class_pos_denorm_c);  b_pos_subn_v := fpu_operands.rs2_class(fp_class_pos_denorm_c);
924
      a_neg_subn_v := fpu_operands.rs1_class(fp_class_neg_denorm_c);  b_neg_subn_v := fpu_operands.rs2_class(fp_class_neg_denorm_c);
925
      a_pos_zero_v := fpu_operands.rs1_class(fp_class_pos_zero_c);    b_pos_zero_v := fpu_operands.rs2_class(fp_class_pos_zero_c);
926
      a_neg_zero_v := fpu_operands.rs1_class(fp_class_neg_zero_c);    b_neg_zero_v := fpu_operands.rs2_class(fp_class_neg_zero_c);
927
      a_pos_inf_v  := fpu_operands.rs1_class(fp_class_pos_inf_c);     b_pos_inf_v  := fpu_operands.rs2_class(fp_class_pos_inf_c);
928
      a_neg_inf_v  := fpu_operands.rs1_class(fp_class_neg_inf_c);     b_neg_inf_v  := fpu_operands.rs2_class(fp_class_neg_inf_c);
929
      a_snan_v     := fpu_operands.rs1_class(fp_class_snan_c);        b_snan_v     := fpu_operands.rs2_class(fp_class_snan_c);
930
      a_qnan_v     := fpu_operands.rs1_class(fp_class_qnan_c);        b_qnan_v     := fpu_operands.rs2_class(fp_class_qnan_c);
931
 
932
      if (ctrl_i(ctrl_ir_funct12_7_c) = '0') then -- addition
933
        -- +infinity --
934
        addsub.res_class(fp_class_pos_inf_c) <=
935
          (a_pos_inf_v  and b_pos_inf_v)  or -- +inf    + +inf
936
          (a_pos_inf_v  and b_pos_zero_v) or -- +inf    + +zero
937
          (a_pos_zero_v and b_pos_inf_v)  or -- +zero   + +inf
938
          (a_pos_inf_v  and b_neg_zero_v) or -- +inf    + -zero
939
          (a_neg_zero_v and b_pos_inf_v)  or -- -zero   + +inf
940
          --
941
          (a_pos_inf_v  and b_pos_norm_v) or -- +inf    + +norm
942
          (a_pos_norm_v and b_pos_inf_v)  or -- +norm   + +inf
943
          (a_pos_inf_v  and b_pos_subn_v) or -- +inf    + +denorm
944
          (a_pos_subn_v and b_pos_inf_v)  or -- +denorm + +inf
945
          --
946
          (a_pos_inf_v  and b_neg_norm_v) or -- +inf    + -norm
947
          (a_neg_norm_v and b_pos_inf_v)  or -- -norm   + +inf
948
          (a_pos_inf_v  and b_neg_subn_v) or -- +inf    + -denorm
949
          (a_neg_subn_v and b_pos_inf_v);    -- -denorm + +inf
950
        -- -infinity --
951
        addsub.res_class(fp_class_neg_inf_c) <=
952
          (a_neg_inf_v  and b_neg_inf_v)  or -- -inf    + -inf
953
          (a_neg_inf_v  and b_pos_zero_v) or -- -inf    + +zero
954
          (a_pos_zero_v and b_neg_inf_v)  or -- +zero   + -inf
955
          (a_neg_inf_v  and b_neg_zero_v) or -- -inf    + -zero
956
          (a_neg_zero_v and b_neg_inf_v)  or -- -zero   + -inf
957
          --
958
          (a_neg_inf_v  and b_pos_norm_v) or -- -inf    + +norm
959
          (a_pos_norm_v and b_neg_inf_v)  or -- +norm   + -inf
960
          (a_neg_inf_v  and b_neg_norm_v) or -- -inf    + -norm
961
          (a_neg_norm_v and b_neg_inf_v)  or -- -norm   + -inf
962
          --
963
          (a_neg_inf_v  and b_pos_subn_v) or -- -inf    + +denorm
964
          (a_pos_subn_v and b_neg_inf_v)  or -- +denorm + -inf
965
          (a_neg_inf_v  and b_neg_subn_v) or -- -inf    + -denorm
966
          (a_neg_subn_v and b_neg_inf_v);    -- -denorm + -inf
967
 
968
        -- +zero --
969
        addsub.res_class(fp_class_pos_zero_c) <=
970
          (a_pos_zero_v and b_pos_zero_v) or -- +zero + +zero
971
          (a_pos_zero_v and b_neg_zero_v) or -- +zero + -zero
972
          (a_neg_zero_v and b_pos_zero_v);   -- -zero + +zero
973
        -- -zero --
974
        addsub.res_class(fp_class_neg_zero_c) <=
975
          (a_neg_zero_v and b_neg_zero_v);   -- -zero + -zero
976
 
977
        -- qNaN --
978
        addsub.res_class(fp_class_qnan_c) <=
979
          (a_snan_v    or  b_snan_v)    or -- any input is sNaN
980
          (a_qnan_v    or  b_qnan_v)    or -- any input is qNaN
981
          (a_pos_inf_v and b_neg_inf_v) or -- +inf + -inf
982
          (a_neg_inf_v and b_pos_inf_v);   -- -inf + +inf
983
 
984
      else -- subtraction
985
        -- +infinity --
986
        addsub.res_class(fp_class_pos_inf_c) <=
987
          (a_pos_inf_v  and b_neg_inf_v)  or -- +inf    - -inf
988
          (a_pos_inf_v  and b_pos_zero_v) or -- +inf    - +zero
989
          (a_pos_inf_v  and b_neg_zero_v) or -- +inf    - -zero
990
          (a_pos_inf_v  and b_pos_norm_v) or -- +inf    - +norm
991
          (a_pos_inf_v  and b_pos_subn_v) or -- +inf    - +denorm
992
          (a_pos_inf_v  and b_neg_norm_v) or -- +inf    - -norm
993
          (a_pos_inf_v  and b_neg_subn_v) or -- +inf    - -denorm
994
          --
995
          (a_pos_zero_v and b_neg_inf_v)  or -- +zero   - -inf
996
          (a_neg_zero_v and b_neg_inf_v)  or -- -zero   - -inf
997
          --
998
          (a_pos_norm_v and b_neg_inf_v)  or -- +norm   - -inf
999
          (a_pos_subn_v and b_neg_inf_v)  or -- +denorm - -inf
1000
          (a_neg_norm_v and b_neg_inf_v)  or -- -norm   - -inf
1001
          (a_neg_subn_v and b_neg_inf_v);    -- -denorm - -inf
1002
        -- -infinity --
1003
        addsub.res_class(fp_class_neg_inf_c) <=
1004
          (a_neg_inf_v  and b_pos_inf_v)  or -- -inf    - +inf
1005
          (a_neg_inf_v  and b_pos_zero_v) or -- -inf    - +zero
1006
          (a_neg_inf_v  and b_neg_zero_v) or -- -inf    - -zero
1007
          (a_neg_inf_v  and b_pos_norm_v) or -- -inf    - +norm
1008
          (a_neg_inf_v  and b_pos_subn_v) or -- -inf    - +denorm
1009
          (a_neg_inf_v  and b_neg_norm_v) or -- -inf    - -norm
1010
          (a_neg_inf_v  and b_neg_subn_v) or -- -inf    - -denorm
1011
          --
1012
          (a_pos_zero_v and b_pos_inf_v)  or -- +zero   - +inf
1013
          (a_neg_zero_v and b_pos_inf_v)  or -- -zero   - +inf
1014
          --
1015
          (a_pos_norm_v and b_pos_inf_v)  or -- +norm   - +inf
1016
          (a_pos_subn_v and b_pos_inf_v)  or -- +denorm - +inf
1017
          (a_neg_norm_v and b_pos_inf_v)  or -- -norm   - +inf
1018
          (a_neg_subn_v and b_pos_inf_v);    -- -denorm - +inf
1019
 
1020
        -- +zero --
1021
        addsub.res_class(fp_class_pos_zero_c) <=
1022
          (a_pos_zero_v and b_pos_zero_v) or -- +zero - +zero
1023
          (a_pos_zero_v and b_neg_zero_v) or -- +zero - -zero
1024
          (a_neg_zero_v and b_neg_zero_v);   -- -zero - -zero
1025
        -- -zero --
1026
        addsub.res_class(fp_class_neg_zero_c) <=
1027
          (a_neg_zero_v and b_pos_zero_v);   -- -zero - +zero
1028
 
1029
        -- qNaN --
1030
        addsub.res_class(fp_class_qnan_c) <=
1031
          (a_snan_v    or  b_snan_v)    or -- any input is sNaN
1032
          (a_qnan_v    or  b_qnan_v)    or -- any input is qNaN
1033
          (a_pos_inf_v and b_pos_inf_v) or -- +inf - +inf
1034
          (a_neg_inf_v and b_neg_inf_v);   -- -inf - -inf
1035
      end if;
1036
 
1037
      -- normal --
1038
      addsub.res_class(fp_class_pos_norm_c) <= (a_pos_norm_v or a_neg_norm_v) and (b_pos_norm_v or b_neg_norm_v); -- +/-norm +/- +-/norm [sign is irrelevant here]
1039
      addsub.res_class(fp_class_neg_norm_c) <= (a_pos_norm_v or a_neg_norm_v) and (b_pos_norm_v or b_neg_norm_v); -- +/-norm +/- +-/norm [sign is irrelevant here]
1040
 
1041
      -- sNaN --
1042
      addsub.res_class(fp_class_snan_c) <= (a_snan_v or b_snan_v); -- any input is sNaN
1043 74 zero_gravi
 
1044
      -- subnormal result --
1045
      addsub.res_class(fp_class_pos_denorm_c) <= '0'; -- is evaluated by the normalizer
1046
      addsub.res_class(fp_class_neg_denorm_c) <= '0'; -- is evaluated by the normalizer
1047 55 zero_gravi
    end if;
1048
  end process adder_subtractor_class_core;
1049
 
1050
  -- unused --
1051
  fu_addsub.result <= (others => '0');
1052
  fu_addsub.flags  <= (others => '0');
1053
 
1054
 
1055
-- ****************************************************************************************************************************
1056
-- FPU Core - Normalize & Round
1057
-- ****************************************************************************************************************************
1058
 
1059
  -- Normalizer Input -----------------------------------------------------------------------
1060
  -- -------------------------------------------------------------------------------------------
1061
  normalizer_input_select: process(funct_ff, addsub, multiplier, fu_conv_i2f)
1062
  begin
1063
    case funct_ff is
1064
      when op_addsub_c => -- addition/subtraction
1065
        normalizer.mode      <= '0'; -- normalization
1066
        normalizer.sign      <= addsub.res_sign;
1067
        normalizer.xexp      <= addsub.exp_cnt;
1068
        normalizer.xmantissa(47 downto 23) <= addsub.res_sum(27 downto 3);
1069
        normalizer.xmantissa(22) <= addsub.res_sum(2);
1070
        normalizer.xmantissa(21) <= addsub.res_sum(1);
1071
        normalizer.xmantissa(20 downto 01) <= (others => '0');
1072
        normalizer.xmantissa(00) <= addsub.res_sum(0);
1073
        normalizer.class     <= addsub.res_class;
1074
        normalizer.flags_in  <= addsub.flags;
1075
        normalizer.start     <= addsub.done;
1076
      when op_mul_c => -- multiplication
1077
        normalizer.mode      <= '0'; -- normalization
1078
        normalizer.sign      <= multiplier.sign;
1079
        normalizer.xexp      <= '0' & multiplier.exp_res(7 downto 0);
1080
        normalizer.xmantissa <= multiplier.product;
1081
        normalizer.class     <= multiplier.res_class;
1082
        normalizer.flags_in  <= multiplier.flags;
1083
        normalizer.start     <= multiplier.done;
1084
      when others => -- op_i2f_c
1085
        normalizer.mode      <= '1'; -- int_to_float
1086
        normalizer.sign      <= fu_conv_i2f.sign;
1087
        normalizer.xexp      <= "001111111"; -- bias = 127
1088
        normalizer.xmantissa <= (others => '0'); -- don't care
1089
        normalizer.class     <= (others => '0'); -- don't care
1090
        normalizer.flags_in  <= (others => '0'); -- no flags yet
1091
        normalizer.start     <= fu_conv_i2f.done;
1092
    end case;
1093
  end process normalizer_input_select;
1094
 
1095
 
1096
  -- Normalizer & Rounding Unit -------------------------------------------------------------
1097
  -- -------------------------------------------------------------------------------------------
1098
  neorv32_cpu_cp_fpu_normalizer_inst: neorv32_cpu_cp_fpu_normalizer
1099
  port map (
1100
    -- control --
1101
    clk_i      => clk_i,                -- global clock, rising edge
1102
    rstn_i     => rstn_i,               -- global reset, low-active, async
1103
    start_i    => normalizer.start,     -- trigger operation
1104
    rmode_i    => fpu_operands.frm,     -- rounding mode
1105
    funct_i    => normalizer.mode,      -- operation mode
1106
    -- input --
1107
    sign_i     => normalizer.sign,      -- sign
1108
    exponent_i => normalizer.xexp,      -- extended exponent
1109
    mantissa_i => normalizer.xmantissa, -- extended mantissa
1110
    integer_i  => fu_conv_i2f.result,   -- integer input
1111
    class_i    => normalizer.class,     -- input number class
1112
    flags_i    => normalizer.flags_in,  -- exception flags input
1113
    -- output --
1114
    result_o   => normalizer.result,    -- result (float or int)
1115
    flags_o    => normalizer.flags_out, -- exception flags
1116
    done_o     => normalizer.done       -- operation done
1117
  );
1118
 
1119
 
1120
-- ****************************************************************************************************************************
1121
-- FPU Core - Result
1122
-- ****************************************************************************************************************************
1123
 
1124
  -- Result Output to CPU Pipeline ----------------------------------------------------------
1125
  -- -------------------------------------------------------------------------------------------
1126 56 zero_gravi
  output_gate: process(rstn_i, clk_i)
1127 55 zero_gravi
  begin
1128 56 zero_gravi
    if (rstn_i = '0') then
1129
      res_o    <= (others => def_rst_val_c);
1130
      fflags_o <= (others => def_rst_val_c);
1131
    elsif rising_edge(clk_i) then
1132 55 zero_gravi
      if (ctrl_engine.valid = '1') then
1133
        case funct_ff is
1134
          when op_class_c =>
1135
            res_o    <= fu_classify.result;
1136
            fflags_o <= fu_classify.flags;
1137
          when op_comp_c =>
1138
            res_o    <= fu_compare.result;
1139
            fflags_o <= fu_compare.flags;
1140
          when op_f2i_c =>
1141
            res_o    <= fu_conv_f2i.result;
1142
            fflags_o <= fu_conv_f2i.flags;
1143
          when op_sgnj_c =>
1144
            res_o    <= fu_sign_inject.result;
1145
            fflags_o <= fu_sign_inject.flags;
1146
          when op_minmax_c =>
1147
            res_o    <= fu_min_max.result;
1148
            fflags_o <= fu_min_max.flags;
1149
          when others => -- op_mul_c, op_addsub_c, op_i2f_c, ...
1150
            res_o    <= normalizer.result;
1151
            fflags_o <= normalizer.flags_out;
1152
        end case;
1153
      else
1154
        res_o    <= (others => '0');
1155
        fflags_o <= (others => '0');
1156
      end if;
1157
    end if;
1158
  end process output_gate;
1159
 
1160
  -- operation done --
1161
  fu_core_done <= fu_compare.done or fu_classify.done or fu_sign_inject.done or fu_min_max.done or normalizer.done or fu_conv_f2i.done;
1162
 
1163
 
1164 52 zero_gravi
end neorv32_cpu_cp_fpu_rtl;
1165 55 zero_gravi
 
1166
-- ###########################################################################################################################################
1167
-- ###########################################################################################################################################
1168
 
1169
-- #################################################################################################
1170
-- # << NEORV32 - Single-Precision Floating-Point Unit: Normalizer and Rounding Unit >>            #
1171
-- # ********************************************************************************************* #
1172
-- # This unit also performs integer-to-float conversions.                                         #
1173
-- # ********************************************************************************************* #
1174
-- # BSD 3-Clause License                                                                          #
1175
-- #                                                                                               #
1176
-- # Copyright (c) 2021, Stephan Nolting. All rights reserved.                                     #
1177
-- #                                                                                               #
1178
-- # Redistribution and use in source and binary forms, with or without modification, are          #
1179
-- # permitted provided that the following conditions are met:                                     #
1180
-- #                                                                                               #
1181
-- # 1. Redistributions of source code must retain the above copyright notice, this list of        #
1182
-- #    conditions and the following disclaimer.                                                   #
1183
-- #                                                                                               #
1184
-- # 2. Redistributions in binary form must reproduce the above copyright notice, this list of     #
1185
-- #    conditions and the following disclaimer in the documentation and/or other materials        #
1186
-- #    provided with the distribution.                                                            #
1187
-- #                                                                                               #
1188
-- # 3. Neither the name of the copyright holder nor the names of its contributors may be used to  #
1189
-- #    endorse or promote products derived from this software without specific prior written      #
1190
-- #    permission.                                                                                #
1191
-- #                                                                                               #
1192
-- # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS   #
1193
-- # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF               #
1194
-- # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE    #
1195
-- # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,     #
1196
-- # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE #
1197
-- # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED    #
1198
-- # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING     #
1199
-- # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED  #
1200
-- # OF THE POSSIBILITY OF SUCH DAMAGE.                                                            #
1201
-- # ********************************************************************************************* #
1202
-- # The NEORV32 Processor - https://github.com/stnolting/neorv32              (c) Stephan Nolting #
1203
-- #################################################################################################
1204
 
1205
library ieee;
1206
use ieee.std_logic_1164.all;
1207
use ieee.numeric_std.all;
1208
 
1209
library neorv32;
1210
use neorv32.neorv32_package.all;
1211
 
1212
entity neorv32_cpu_cp_fpu_normalizer is
1213
  port (
1214
    -- control --
1215
    clk_i      : in  std_ulogic; -- global clock, rising edge
1216
    rstn_i     : in  std_ulogic; -- global reset, low-active, async
1217
    start_i    : in  std_ulogic; -- trigger operation
1218
    rmode_i    : in  std_ulogic_vector(02 downto 0); -- rounding mode
1219
    funct_i    : in  std_ulogic; -- operating mode (0=norm&round, 1=int-to-float)
1220
    -- input --
1221
    sign_i     : in  std_ulogic; -- sign
1222
    exponent_i : in  std_ulogic_vector(08 downto 0); -- extended exponent
1223
    mantissa_i : in  std_ulogic_vector(47 downto 0); -- extended mantissa
1224
    integer_i  : in  std_ulogic_vector(31 downto 0); -- integer input
1225
    class_i    : in  std_ulogic_vector(09 downto 0); -- input number class
1226
    flags_i    : in  std_ulogic_vector(04 downto 0); -- exception flags input
1227
    -- output --
1228
    result_o   : out std_ulogic_vector(31 downto 0); -- float result
1229
    flags_o    : out std_ulogic_vector(04 downto 0); -- exception flags output
1230
    done_o     : out std_ulogic -- operation done
1231
  );
1232
end neorv32_cpu_cp_fpu_normalizer;
1233
 
1234
architecture neorv32_cpu_cp_fpu_normalizer_rtl of neorv32_cpu_cp_fpu_normalizer is
1235
 
1236
  -- controller --
1237
  type ctrl_engine_state_t is (S_IDLE, S_PREPARE_I2F, S_CHECK_I2F, S_PREPARE_NORM, S_PREPARE_SHIFT, S_NORMALIZE_BUSY, S_ROUND, S_CHECK, S_FINALIZE);
1238
  type ctrl_t is record
1239
    state   : ctrl_engine_state_t; -- current state
1240
    norm_r  : std_ulogic; -- normalization round 0 or 1
1241
    cnt     : std_ulogic_vector(08 downto 0); -- interation counter/exponent (incl. overflow)
1242
    cnt_pre : std_ulogic_vector(08 downto 0);
1243
    cnt_of  : std_ulogic; -- counter overflow
1244
    cnt_uf  : std_ulogic; -- counter underflow
1245
    rounded : std_ulogic; -- output is rounded
1246
    res_sgn : std_ulogic;
1247
    res_exp : std_ulogic_vector(07 downto 0);
1248
    res_man : std_ulogic_vector(22 downto 0);
1249
    class   : std_ulogic_vector(09 downto 0);
1250
    flags   : std_ulogic_vector(04 downto 0);
1251
  end record;
1252
  signal ctrl : ctrl_t;
1253
 
1254
  -- normalization shift register --
1255
  type sreg_t is record
1256
    done  : std_ulogic;
1257
    dir   : std_ulogic; -- shift direction: 0=right, 1=left
1258
    zero  : std_ulogic;
1259
    upper : std_ulogic_vector(31 downto 0);
1260
    lower : std_ulogic_vector(22 downto 0);
1261
    ext_g : std_ulogic; -- guard bit
1262
    ext_r : std_ulogic; -- round bit
1263
    ext_s : std_ulogic; -- sticky bit
1264
  end record;
1265
  signal sreg : sreg_t;
1266
 
1267
  -- rounding unit --
1268
  type round_t is record
1269
    en     : std_ulogic; -- enable rounding
1270
    sub    : std_ulogic; -- 0=decrement, 1=increment
1271
    output : std_ulogic_vector(24 downto 0); -- mantissa size + hidden one + 1
1272
  end record;
1273
  signal round : round_t;
1274
 
1275
begin
1276
 
1277
  -- Control Engine -------------------------------------------------------------------------
1278
  -- -------------------------------------------------------------------------------------------
1279
  ctrl_engine: process(rstn_i, clk_i)
1280
  begin
1281
    if (rstn_i = '0') then
1282
      ctrl.state   <= S_IDLE;
1283 56 zero_gravi
      ctrl.norm_r  <= def_rst_val_c;
1284
      ctrl.cnt     <= (others => def_rst_val_c);
1285
      ctrl.cnt_pre <= (others => def_rst_val_c);
1286
      ctrl.cnt_of  <= def_rst_val_c;
1287
      ctrl.cnt_uf  <= def_rst_val_c;
1288
      ctrl.rounded <= def_rst_val_c;
1289
      ctrl.res_exp <= (others => def_rst_val_c);
1290
      ctrl.res_man <= (others => def_rst_val_c);
1291
      ctrl.res_sgn <= def_rst_val_c;
1292
      ctrl.class   <= (others => def_rst_val_c);
1293
      ctrl.flags   <= (others => def_rst_val_c);
1294 55 zero_gravi
      --
1295 56 zero_gravi
      sreg.upper   <= (others => def_rst_val_c);
1296
      sreg.lower   <= (others => def_rst_val_c);
1297
      sreg.dir     <= def_rst_val_c;
1298
      sreg.ext_g   <= def_rst_val_c;
1299
      sreg.ext_r   <= def_rst_val_c;
1300
      sreg.ext_s   <= def_rst_val_c;
1301 55 zero_gravi
      --
1302
      done_o       <= '0';
1303
    elsif rising_edge(clk_i) then
1304
      -- defaults --
1305
      ctrl.cnt_pre <= ctrl.cnt;
1306
      done_o       <= '0';
1307
 
1308
      -- exponent counter underflow/overflow --
1309
      if ((ctrl.cnt_pre(8 downto 7) = "01") and (ctrl.cnt(8 downto 7) = "10")) then -- overflow
1310
        ctrl.cnt_of <= '1';
1311
      elsif (ctrl.cnt_pre(8 downto 7) = "00") and (ctrl.cnt(8 downto 7) = "11") then -- underflow
1312
        ctrl.cnt_uf <= '1';
1313
      end if;
1314
 
1315
      -- fsm --
1316
      case ctrl.state is
1317
 
1318
        when S_IDLE => -- wait for operation trigger
1319
        -- ------------------------------------------------------------
1320
          ctrl.norm_r  <= '0'; -- start with first normalization
1321
          ctrl.rounded <= '0'; -- not rounded yet
1322
          ctrl.cnt_of  <= '0';
1323
          ctrl.cnt_uf  <= '0';
1324
          --
1325
          if (start_i = '1') then
1326
            ctrl.cnt     <= exponent_i;
1327
            ctrl.res_sgn <= sign_i;
1328
            ctrl.class   <= class_i;
1329
            ctrl.flags   <= flags_i;
1330
            if (funct_i = '0') then -- float -> float
1331
              ctrl.state <= S_PREPARE_NORM;
1332
            else -- integer -> float
1333
              ctrl.state <= S_PREPARE_I2F;
1334
            end if;
1335
          end if;
1336
 
1337
        when S_PREPARE_I2F => -- prepare integer-to-float conversion
1338
        -- ------------------------------------------------------------
1339
          sreg.upper <= integer_i;
1340
          sreg.lower <= (others => '0');
1341
          sreg.ext_g <= '0';
1342
          sreg.ext_r <= '0';
1343
          sreg.ext_s <= '0';
1344
          sreg.dir   <= '0'; -- shift right
1345
          ctrl.state <= S_CHECK_I2F;
1346
 
1347
        when S_CHECK_I2F => -- check if converting zero
1348
        -- ------------------------------------------------------------
1349
          if (sreg.zero = '1') then -- all zero
1350
            ctrl.class(fp_class_pos_zero_c) <= '1';
1351
            ctrl.state <= S_FINALIZE;
1352
          else
1353
            ctrl.state <= S_NORMALIZE_BUSY;
1354
          end if;
1355
 
1356
        when S_PREPARE_NORM => -- prepare "normal" normalization & rounding
1357
        -- ------------------------------------------------------------
1358
          sreg.upper(31 downto 02) <= (others => '0');
1359
          sreg.upper(01 downto 00) <= mantissa_i(47 downto 46);
1360
          sreg.lower <= mantissa_i(45 downto 23);
1361
          sreg.ext_g <= mantissa_i(22);
1362
          sreg.ext_r <= mantissa_i(21);
1363 74 zero_gravi
          if (or_reduce_f(mantissa_i(20 downto 0)) = '1') then
1364
            sreg.ext_s <= '1';
1365
          else
1366
            sreg.ext_s <= '0';
1367
          end if;
1368 55 zero_gravi
          -- check for special cases --
1369
          if ((ctrl.class(fp_class_snan_c)       or ctrl.class(fp_class_qnan_c)       or -- NaN
1370
               ctrl.class(fp_class_neg_zero_c)   or ctrl.class(fp_class_pos_zero_c)   or -- zero
1371
               ctrl.class(fp_class_neg_denorm_c) or ctrl.class(fp_class_pos_denorm_c) or -- subnormal
1372
               ctrl.class(fp_class_neg_inf_c)    or ctrl.class(fp_class_pos_inf_c)    or -- infinity
1373
               ctrl.flags(fp_exc_uf_c) or -- underflow
1374
               ctrl.flags(fp_exc_of_c) or -- overflow
1375
               ctrl.flags(fp_exc_nv_c)) = '1') then -- invalid
1376
            ctrl.state <= S_FINALIZE;
1377
          else
1378
            ctrl.state <= S_PREPARE_SHIFT;
1379
          end if;
1380
 
1381
        when S_PREPARE_SHIFT => -- prepare shift direction (for "normal" normalization only)
1382
        -- ------------------------------------------------------------
1383
          if (sreg.zero = '0') then -- number < 1.0
1384
            sreg.dir <= '0'; -- shift right
1385
          else -- number >= 1.0
1386
            sreg.dir <= '1'; -- shift left
1387
          end if;
1388
          ctrl.state <= S_NORMALIZE_BUSY;
1389
 
1390
        when S_NORMALIZE_BUSY => -- running normalization cycle
1391
        -- ------------------------------------------------------------
1392
          -- shift until normalized or exception --
1393
          if (sreg.done = '1') or (ctrl.cnt_uf = '1') or (ctrl.cnt_of = '1') then
1394
            -- normalization control --
1395
            ctrl.norm_r <= '1';
1396
            if (ctrl.norm_r = '0') then -- first normalization cycle done
1397
              ctrl.state <= S_ROUND;
1398
            else -- second normalization cycle done
1399
              ctrl.state <= S_CHECK;
1400
            end if;
1401
          else
1402
            if (sreg.dir = '0') then -- shift right
1403
              ctrl.cnt   <= std_ulogic_vector(unsigned(ctrl.cnt) + 1);
1404
              sreg.upper <= '0' & sreg.upper(sreg.upper'left downto 1);
1405
              sreg.lower <= sreg.upper(0) & sreg.lower(sreg.lower'left downto 1);
1406
              sreg.ext_g <= sreg.lower(0);
1407
              sreg.ext_r <= sreg.ext_g;
1408
              sreg.ext_s <= sreg.ext_r or sreg.ext_s; -- sticky bit
1409
            else -- shift left
1410
              ctrl.cnt   <= std_ulogic_vector(unsigned(ctrl.cnt) - 1);
1411
              sreg.upper <= sreg.upper(sreg.upper'left-1 downto 0) & sreg.lower(sreg.lower'left);
1412
              sreg.lower <= sreg.lower(sreg.lower'left-1 downto 0) & sreg.ext_g;
1413
              sreg.ext_g <= sreg.ext_r;
1414
              sreg.ext_r <= sreg.ext_s;
1415
              sreg.ext_s <= sreg.ext_s; -- sticky bit
1416
            end if;
1417
          end if;
1418
 
1419
        when S_ROUND => -- rounding cycle (after first normalization)
1420
        -- ------------------------------------------------------------
1421
          ctrl.rounded <= ctrl.rounded or round.en;
1422
          sreg.upper(31 downto 02) <= (others => '0');
1423
          sreg.upper(01 downto 00) <= round.output(24 downto 23);
1424
          sreg.lower <= round.output(22 downto 00);
1425
          sreg.ext_g <= '0';
1426
          sreg.ext_r <= '0';
1427
          sreg.ext_s <= '0';
1428
          ctrl.state <= S_PREPARE_SHIFT;
1429
 
1430
        when S_CHECK => -- check for overflow/underflow
1431
        -- ------------------------------------------------------------
1432
          if (ctrl.cnt_uf = '1') then -- underflow
1433
            ctrl.flags(fp_exc_uf_c) <= '1';
1434
          elsif (ctrl.cnt_of = '1') then -- overflow
1435
            ctrl.flags(fp_exc_of_c) <= '1';
1436
          elsif (ctrl.cnt(7 downto 0) = x"00") then -- subnormal
1437
            ctrl.flags(fp_exc_uf_c) <= '1';
1438
          elsif (ctrl.cnt(7 downto 0) = x"FF") then -- infinity
1439
            ctrl.flags(fp_exc_of_c) <= '1';
1440
          end if;
1441
          ctrl.state  <= S_FINALIZE;
1442
 
1443
        when S_FINALIZE => -- result finalization
1444
        -- ------------------------------------------------------------
1445
          -- generate result word (the ORDER of checks is imporatant here!) --
1446
          if (ctrl.class(fp_class_snan_c) = '1') or (ctrl.class(fp_class_qnan_c) = '1') then -- sNaN / qNaN
1447
            ctrl.res_sgn <= fp_single_qnan_c(31);
1448
            ctrl.res_exp <= fp_single_qnan_c(30 downto 23);
1449
            ctrl.res_man <= fp_single_qnan_c(22 downto 00);
1450
          elsif (ctrl.class(fp_class_neg_inf_c) = '1') or (ctrl.class(fp_class_pos_inf_c) = '1') or -- infinity
1451
                (ctrl.flags(fp_exc_of_c) = '1') then -- overflow
1452
            ctrl.res_exp <= fp_single_pos_inf_c(30 downto 23); -- keep original sign
1453
            ctrl.res_man <= fp_single_pos_inf_c(22 downto 00);
1454
          elsif (ctrl.class(fp_class_neg_zero_c) = '1') or (ctrl.class(fp_class_pos_zero_c) = '1') then -- zero
1455
            ctrl.res_sgn <= ctrl.class(fp_class_neg_zero_c);
1456
            ctrl.res_exp <= fp_single_pos_zero_c(30 downto 23);
1457
            ctrl.res_man <= fp_single_pos_zero_c(22 downto 00);
1458
          elsif (ctrl.flags(fp_exc_uf_c) = '1') or -- underflow
1459
                (sreg.zero = '1') or (ctrl.class(fp_class_neg_denorm_c) = '1') or (ctrl.class(fp_class_pos_denorm_c) = '1') then -- denormalized (flush-to-zero)
1460
            ctrl.res_exp <= fp_single_pos_zero_c(30 downto 23); -- keep original sign
1461
            ctrl.res_man <= fp_single_pos_zero_c(22 downto 00);
1462
          else -- result is ok
1463
            ctrl.res_exp <= ctrl.cnt(7 downto 0);
1464
            ctrl.res_man <= sreg.lower;
1465
          end if;
1466
          -- generate exception flags --
1467
          ctrl.flags(fp_exc_nv_c) <= ctrl.flags(fp_exc_nv_c) or ctrl.class(fp_class_snan_c); -- invalid if input is SIGNALING NaN
1468
          ctrl.flags(fp_exc_nx_c) <= ctrl.flags(fp_exc_nx_c) or ctrl.rounded; -- inexcat if result is rounded
1469
          --
1470
          done_o     <= '1';
1471
          ctrl.state <= S_IDLE;
1472
 
1473
        when others => -- undefined
1474
        -- ------------------------------------------------------------
1475
          ctrl.state <= S_IDLE;
1476
 
1477
      end case;
1478
    end if;
1479
  end process ctrl_engine;
1480
 
1481
  -- stop shifting when normalized --
1482 74 zero_gravi
  sreg.done <= '1' when (or_reduce_f(sreg.upper(sreg.upper'left downto 1)) = '0') and (sreg.upper(0) = '1') else '0'; -- input is zero, hidden one is set
1483 55 zero_gravi
 
1484
  -- all-zero including hidden bit --
1485 74 zero_gravi
  sreg.zero <= '1' when (or_reduce_f(sreg.upper) = '0') else '0';
1486 55 zero_gravi
 
1487
  -- result --
1488
  result_o(31)           <= ctrl.res_sgn;
1489
  result_o(30 downto 23) <= ctrl.res_exp;
1490
  result_o(22 downto  0) <= ctrl.res_man;
1491
 
1492
  -- exception flags --
1493
  flags_o(fp_exc_nv_c) <= ctrl.flags(fp_exc_nv_c); -- invalid operation
1494
  flags_o(fp_exc_dz_c) <= ctrl.flags(fp_exc_dz_c); -- divide by zero
1495
  flags_o(fp_exc_of_c) <= ctrl.flags(fp_exc_of_c); -- overflow
1496
  flags_o(fp_exc_uf_c) <= ctrl.flags(fp_exc_uf_c); -- underflow
1497
  flags_o(fp_exc_nx_c) <= ctrl.flags(fp_exc_nx_c); -- inexact
1498
 
1499
 
1500
  -- Rounding -------------------------------------------------------------------------------
1501
  -- -------------------------------------------------------------------------------------------
1502
  rounding_unit_ctrl: process(rmode_i, sreg)
1503
  begin
1504
    -- defaults --
1505
    round.en  <= '0';
1506
    round.sub <= '0';
1507
    -- rounding mode --
1508
    case rmode_i(2 downto 0) is
1509
      when "000" => -- round to nearest, ties to even
1510
        if (sreg.ext_g = '0') then
1511
          round.en <= '0'; -- round down (do nothing)
1512
        else
1513
          if (sreg.ext_r = '0') and (sreg.ext_s = '0') then -- tie!
1514
            round.en <= sreg.lower(0); -- round up if LSB of mantissa is set
1515
          else
1516
            round.en <= '1'; -- round up
1517
          end if;
1518
        end if;
1519
        round.sub <= '0'; -- increment
1520
      when "001" => -- round towards zero
1521
        round.en <= '0'; -- no rounding -> just truncate
1522
      when "010" => -- round down (towards -infinity)
1523
        round.en  <= sreg.ext_g or sreg.ext_r or sreg.ext_s;
1524
        round.sub <= '1'; -- decrement
1525
      when "011" => -- round up (towards +infinity)
1526
        round.en  <= sreg.ext_g or sreg.ext_r or sreg.ext_s;
1527
        round.sub <= '0'; -- increment
1528
      when "100" => -- round to nearest, ties to max magnitude
1529
        round.en <= '0'; -- FIXME / TODO
1530
      when others => -- undefined
1531
        round.en <= '0';
1532
    end case;
1533
  end process rounding_unit_ctrl;
1534
 
1535
 
1536
  -- incrementer/decrementer --
1537
  rounding_unit_add: process(round, sreg)
1538
    variable tmp_v : std_ulogic_vector(24 downto 0);
1539
  begin
1540
    tmp_v := '0' & sreg.upper(0) & sreg.lower;
1541
    if (round.en = '1') then
1542
      if (round.sub = '0') then -- increment
1543
        round.output <= std_ulogic_vector(unsigned(tmp_v) + 1);
1544
      else -- decrement
1545
        round.output <= std_ulogic_vector(unsigned(tmp_v) - 1);
1546
      end if;
1547
    else -- do nothing
1548
      round.output <= tmp_v;
1549
    end if;
1550
  end process rounding_unit_add;
1551
 
1552
 
1553
end neorv32_cpu_cp_fpu_normalizer_rtl;
1554
 
1555
-- ###########################################################################################################################################
1556
-- ###########################################################################################################################################
1557
 
1558
-- #################################################################################################
1559
-- # << NEORV32 - Single-Precision Floating-Point Unit: Float-To-Int Converter >>                  #
1560
-- # ********************************************************************************************* #
1561
-- # BSD 3-Clause License                                                                          #
1562
-- #                                                                                               #
1563
-- # Copyright (c) 2021, Stephan Nolting. All rights reserved.                                     #
1564
-- #                                                                                               #
1565
-- # Redistribution and use in source and binary forms, with or without modification, are          #
1566
-- # permitted provided that the following conditions are met:                                     #
1567
-- #                                                                                               #
1568
-- # 1. Redistributions of source code must retain the above copyright notice, this list of        #
1569
-- #    conditions and the following disclaimer.                                                   #
1570
-- #                                                                                               #
1571
-- # 2. Redistributions in binary form must reproduce the above copyright notice, this list of     #
1572
-- #    conditions and the following disclaimer in the documentation and/or other materials        #
1573
-- #    provided with the distribution.                                                            #
1574
-- #                                                                                               #
1575
-- # 3. Neither the name of the copyright holder nor the names of its contributors may be used to  #
1576
-- #    endorse or promote products derived from this software without specific prior written      #
1577
-- #    permission.                                                                                #
1578
-- #                                                                                               #
1579
-- # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS   #
1580
-- # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF               #
1581
-- # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE    #
1582
-- # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,     #
1583
-- # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE #
1584
-- # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED    #
1585
-- # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING     #
1586
-- # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED  #
1587
-- # OF THE POSSIBILITY OF SUCH DAMAGE.                                                            #
1588
-- # ********************************************************************************************* #
1589
-- # The NEORV32 Processor - https://github.com/stnolting/neorv32              (c) Stephan Nolting #
1590
-- #################################################################################################
1591
 
1592
library ieee;
1593
use ieee.std_logic_1164.all;
1594
use ieee.numeric_std.all;
1595
 
1596
library neorv32;
1597
use neorv32.neorv32_package.all;
1598
 
1599
entity neorv32_cpu_cp_fpu_f2i is
1600
  port (
1601
    -- control --
1602
    clk_i      : in  std_ulogic; -- global clock, rising edge
1603
    rstn_i     : in  std_ulogic; -- global reset, low-active, async
1604
    start_i    : in  std_ulogic; -- trigger operation
1605
    rmode_i    : in  std_ulogic_vector(02 downto 0); -- rounding mode
1606
    funct_i    : in  std_ulogic; -- 0=signed, 1=unsigned
1607
    -- input --
1608
    sign_i     : in  std_ulogic; -- sign
1609
    exponent_i : in  std_ulogic_vector(07 downto 0); -- exponent
1610
    mantissa_i : in  std_ulogic_vector(22 downto 0); -- mantissa
1611
    class_i    : in  std_ulogic_vector(09 downto 0); -- operand class
1612
    -- output --
1613
    result_o   : out std_ulogic_vector(31 downto 0); -- integer result
1614
    flags_o    : out std_ulogic_vector(04 downto 0); -- exception flags
1615
    done_o     : out std_ulogic -- operation done
1616
  );
1617
end neorv32_cpu_cp_fpu_f2i;
1618
 
1619
architecture neorv32_cpu_cp_fpu_f2i_rtl of neorv32_cpu_cp_fpu_f2i is
1620
 
1621
  -- controller --
1622
  type ctrl_engine_state_t is (S_IDLE, S_PREPARE_F2I, S_NORMALIZE_BUSY, S_ROUND, S_FINALIZE);
1623
  type ctrl_t is record
1624
    state      : ctrl_engine_state_t; -- current state
1625
    unsign     : std_ulogic;
1626
    cnt        : std_ulogic_vector(07 downto 0); -- interation counter/exponent
1627
    sign       : std_ulogic;
1628
    class      : std_ulogic_vector(09 downto 0);
1629
    rounded    : std_ulogic; -- output is rounded
1630
    over       : std_ulogic; -- output is overflowing
1631
    under      : std_ulogic; -- output in underflowing
1632
    result_tmp : std_ulogic_vector(31 downto 0);
1633
    result     : std_ulogic_vector(31 downto 0);
1634
  end record;
1635
  signal ctrl : ctrl_t;
1636
 
1637
  -- conversion shift register --
1638
  type sreg_t is record
1639
    int   : std_ulogic_vector(31 downto 0); -- including hidden-zero
1640
    mant  : std_ulogic_vector(22 downto 0);
1641
    ext_g : std_ulogic; -- guard bit
1642
    ext_r : std_ulogic; -- round bit
1643
    ext_s : std_ulogic; -- sticky bit
1644
  end record;
1645
  signal sreg : sreg_t;
1646
 
1647
  -- rounding unit --
1648
  type round_t is record
1649
    en     : std_ulogic; -- enable rounding
1650
    sub    : std_ulogic; -- 0=decrement, 1=increment
1651
    output : std_ulogic_vector(32 downto 0); -- result + overflow
1652
  end record;
1653
  signal round : round_t;
1654
 
1655
begin
1656
 
1657
  -- Control Engine -------------------------------------------------------------------------
1658
  -- -------------------------------------------------------------------------------------------
1659
  ctrl_engine: process(rstn_i, clk_i)
1660
  begin
1661
    if (rstn_i = '0') then
1662
      ctrl.state      <= S_IDLE;
1663 56 zero_gravi
      ctrl.cnt        <= (others => def_rst_val_c);
1664
      ctrl.sign       <= def_rst_val_c;
1665
      ctrl.class      <= (others => def_rst_val_c);
1666
      ctrl.rounded    <= def_rst_val_c;
1667
      ctrl.over       <= def_rst_val_c;
1668
      ctrl.under      <= def_rst_val_c;
1669
      ctrl.unsign     <= def_rst_val_c;
1670
      ctrl.result     <= (others => def_rst_val_c);
1671
      ctrl.result_tmp <= (others => def_rst_val_c);
1672
      sreg.int        <= (others => def_rst_val_c);
1673
      sreg.mant       <= (others => def_rst_val_c);
1674
      sreg.ext_s      <= def_rst_val_c;
1675 55 zero_gravi
      done_o          <= '0';
1676
    elsif rising_edge(clk_i) then
1677
      -- defaults --
1678
      done_o <= '0';
1679
 
1680
      -- fsm --
1681
      case ctrl.state is
1682
 
1683
        when S_IDLE => -- wait for operation trigger
1684
        -- ------------------------------------------------------------
1685
          ctrl.rounded <= '0'; -- not rounded yet
1686
          ctrl.over    <= '0'; -- not overflowing yet
1687
          ctrl.under   <= '0'; -- not underflowing yet
1688
          ctrl.unsign  <= funct_i;
1689
          sreg.ext_s   <= '0'; -- init
1690
          if (start_i = '1') then
1691
            ctrl.cnt    <= exponent_i;
1692
            ctrl.sign   <= sign_i;
1693
            ctrl.class  <= class_i;
1694
            sreg.mant   <= mantissa_i;
1695
            ctrl.state  <= S_PREPARE_F2I;
1696
          end if;
1697
 
1698
        when S_PREPARE_F2I => -- prepare float-to-integer conversion
1699
        -- ------------------------------------------------------------
1700
          if (unsigned(ctrl.cnt) < 126) then -- less than 0.5
1701
            sreg.int    <= (others => '0');
1702
            ctrl.under  <= '1'; -- this is an underflow!
1703
            ctrl.cnt    <= (others => '0');
1704
          elsif (unsigned(ctrl.cnt) = 126) then -- num < 1.0 but num >= 0.5
1705
            sreg.int    <= (others => '0');
1706
            sreg.mant   <= '1' & sreg.mant(sreg.mant'left downto 1);
1707
            ctrl.cnt    <= (others => '0');
1708
          else
1709
            sreg.int    <= (others => '0');
1710
            sreg.int(0) <= '1'; -- hidden one
1711
            ctrl.cnt    <= std_ulogic_vector(unsigned(ctrl.cnt) - 127); -- remove bias to get raw number of left shifts
1712
          end if;
1713
          -- check terminal cases --
1714
          if ((ctrl.class(fp_class_neg_inf_c)  or ctrl.class(fp_class_pos_inf_c) or
1715
               ctrl.class(fp_class_neg_zero_c) or ctrl.class(fp_class_pos_zero_c) or
1716
               ctrl.class(fp_class_snan_c)     or ctrl.class(fp_class_qnan_c)) = '1') then
1717
            ctrl.state <= S_FINALIZE;
1718
          else
1719
            ctrl.state <= S_NORMALIZE_BUSY;
1720
          end if;
1721
 
1722
        when S_NORMALIZE_BUSY => -- running normalization cycle
1723
        -- ------------------------------------------------------------
1724 74 zero_gravi
          if (or_reduce_f(sreg.mant(sreg.mant'left-2 downto 0)) = '1') then
1725
            sreg.ext_s <= '1'; -- sticky bit
1726
          end if;
1727 60 zero_gravi
          if (or_reduce_f(ctrl.cnt(ctrl.cnt'left-1 downto 0)) = '0') then
1728 55 zero_gravi
            if (ctrl.unsign = '0') then -- signed conversion
1729
              ctrl.over <= ctrl.over or sreg.int(sreg.int'left); -- update overrun flag again to check for numerical overflow into sign bit
1730
            end if;
1731
            ctrl.state <= S_ROUND;
1732
          else -- shift left
1733
            ctrl.cnt  <= std_ulogic_vector(unsigned(ctrl.cnt) - 1);
1734
            sreg.int  <= sreg.int(sreg.int'left-1 downto 0) & sreg.mant(sreg.mant'left);
1735
            sreg.mant <= sreg.mant(sreg.mant'left-1 downto 0) & '0';
1736
            ctrl.over <= ctrl.over or sreg.int(sreg.int'left);
1737
          end if;
1738
 
1739
        when S_ROUND => -- rounding cycle
1740
        -- ------------------------------------------------------------
1741
          ctrl.rounded    <= ctrl.rounded or round.en;
1742
          ctrl.over       <= ctrl.over or round.output(round.output'left); -- overflow after rounding
1743
          ctrl.result_tmp <= round.output(round.output'left-1 downto 0);
1744
          ctrl.state      <= S_FINALIZE;
1745
 
1746
        when S_FINALIZE => -- check for corner cases and finalize result
1747
        -- ------------------------------------------------------------
1748
          if (ctrl.unsign = '1') then -- unsigned conversion
1749
            if (ctrl.class(fp_class_snan_c) = '1') or (ctrl.class(fp_class_qnan_c) = '1') or (ctrl.class(fp_class_pos_inf_c) = '1') or -- NaN or +inf
1750
               ((ctrl.sign = '0') and (ctrl.over = '1')) then -- positive out-of-range
1751
              ctrl.result <= x"ffffffff";
1752
            elsif (ctrl.class(fp_class_neg_zero_c) = '1') or (ctrl.class(fp_class_pos_zero_c) = '1') or (ctrl.class(fp_class_neg_inf_c) = '1') or -- subnormal zero or -inf
1753
               (ctrl.sign = '1') or (ctrl.under = '1') then -- negative out-of-range or underflow
1754
              ctrl.result <= x"00000000";
1755
            else
1756
              ctrl.result <= ctrl.result_tmp;
1757
            end if;
1758
 
1759
          else -- signed conversion
1760
            if (ctrl.class(fp_class_snan_c) = '1') or (ctrl.class(fp_class_qnan_c) = '1') or (ctrl.class(fp_class_pos_inf_c) = '1') or  -- NaN or +inf
1761
                  ((ctrl.sign = '0') and (ctrl.over = '1')) then -- positive out-of-range
1762
              ctrl.result <= x"7fffffff";
1763
            elsif (ctrl.class(fp_class_neg_zero_c) = '1') or (ctrl.class(fp_class_pos_zero_c) = '1') or (ctrl.under = '1') then -- subnormal zero or underflow
1764
              ctrl.result <= x"00000000";
1765
            elsif (ctrl.class(fp_class_neg_inf_c) = '1') or ((ctrl.sign = '1') and (ctrl.over = '1')) then -- -inf or negative out-of-range
1766
              ctrl.result <= x"80000000";
1767
            else -- result is ok, make sign adaption
1768
              if (ctrl.sign = '1') then
1769
                ctrl.result <= std_ulogic_vector(0 - unsigned(ctrl.result_tmp)); -- abs()
1770
              else
1771
                ctrl.result <= ctrl.result_tmp;
1772
              end if;
1773
            end if;
1774
          end if;
1775
          done_o     <= '1';
1776
          ctrl.state <= S_IDLE;
1777
 
1778
        when others => -- undefined
1779
        -- ------------------------------------------------------------
1780
          ctrl.state <= S_IDLE;
1781
 
1782
      end case;
1783
    end if;
1784
  end process ctrl_engine;
1785
 
1786
  -- result --
1787
  result_o <= ctrl.result;
1788
 
1789
  -- exception flags --
1790
  flags_o(fp_exc_nv_c) <= ctrl.class(fp_class_snan_c) or ctrl.class(fp_class_qnan_c); -- invalid operation
1791
  flags_o(fp_exc_dz_c) <= '0'; -- divide by zero - not possible here
1792
  flags_o(fp_exc_of_c) <= ctrl.over or ctrl.class(fp_class_pos_inf_c) or ctrl.class(fp_class_neg_inf_c); -- overflow
1793
  flags_o(fp_exc_uf_c) <= ctrl.under; -- underflow
1794
  flags_o(fp_exc_nx_c) <= ctrl.rounded; -- inexact if result was rounded
1795
 
1796
 
1797
  -- Rounding -------------------------------------------------------------------------------
1798
  -- -------------------------------------------------------------------------------------------
1799
  rounding_unit_ctrl: process(rmode_i, sreg)
1800
  begin
1801
    -- defaults --
1802
    round.en  <= '0';
1803
    round.sub <= '0';
1804
    -- rounding mode --
1805
    case rmode_i(2 downto 0) is
1806
      when "000" => -- round to nearest, ties to even
1807
        if (sreg.ext_g = '0') then
1808
          round.en <= '0'; -- round down (do nothing)
1809
        else
1810
          if (sreg.ext_r = '0') and (sreg.ext_s = '0') then -- tie!
1811
            round.en <= sreg.int(0); -- round up if LSB of integer is set
1812
          else
1813
            round.en <= '1'; -- round up
1814
          end if;
1815
        end if;
1816
        round.sub <= '0'; -- increment
1817
      when "001" => -- round towards zero
1818
        round.en <= '0'; -- no rounding -> just truncate
1819
      when "010" => -- round down (towards -infinity)
1820
        round.en  <= sreg.ext_g or sreg.ext_r or sreg.ext_s;
1821
        round.sub <= '1'; -- decrement
1822
      when "011" => -- round up (towards +infinity)
1823
        round.en  <= sreg.ext_g or sreg.ext_r or sreg.ext_s;
1824
        round.sub <= '0'; -- increment
1825
      when "100" => -- round to nearest, ties to max magnitude
1826
        round.en <= '0'; -- FIXME / TODO
1827
      when others => -- undefined
1828
        round.en <= '0';
1829
    end case;
1830
  end process rounding_unit_ctrl;
1831
 
1832
  -- rounding: guard and round bits --
1833
  sreg.ext_g <= sreg.mant(sreg.mant'left);
1834
  sreg.ext_r <= sreg.mant(sreg.mant'left-1);
1835
 
1836
 
1837
  -- incrementer/decrementer --
1838
  rounding_unit_add: process(round, sreg)
1839
    variable tmp_v : std_ulogic_vector(32 downto 0); -- including overflow
1840
  begin
1841
    tmp_v := '0' & sreg.int;
1842
    if (round.en = '1') then
1843
      if (round.sub = '0') then -- increment
1844
        round.output <= std_ulogic_vector(unsigned(tmp_v) + 1);
1845
      else -- decrement
1846
        round.output <= std_ulogic_vector(unsigned(tmp_v) - 1);
1847
      end if;
1848
    else -- do nothing
1849
      round.output <= tmp_v;
1850
    end if;
1851
  end process rounding_unit_add;
1852
 
1853
 
1854
end neorv32_cpu_cp_fpu_f2i_rtl;

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.