OpenCores
URL https://opencores.org/ocsvn/neorv32/neorv32/trunk

Subversion Repositories neorv32

[/] [neorv32/] [trunk/] [rtl/] [core/] [neorv32_cpu_cp_fpu.vhd] - Blame information for rev 61

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 52 zero_gravi
-- #################################################################################################
2 53 zero_gravi
-- # << NEORV32 - CPU Co-Processor: Single-Prec. Floating Point Unit (RISC-V "Zfinx" Extension) >> #
3 52 zero_gravi
-- # ********************************************************************************************* #
4 53 zero_gravi
-- # The Zfinx floating-point extension uses the integer register file (x) for all FP operations.  #
5
-- # See the official RISC-V specs (https://github.com/riscv/riscv-zfinx) for more information.    #
6 55 zero_gravi
-- #                                                                                               #
7
-- # Design Notes:                                                                                 #
8
-- # * This FPU is based on a multi-cycle architecture and is NOT suited for pipelined operations. #
9
-- # * The hardware design goal was SIZE (performance comes second). All shift operations are done #
10
-- #   using an iterative approach (one bit per clock cycle, no barrel shifters!).                 #
11
-- # * Multiplication (FMUL instruction) will infer DSP blocks (if available).                     #
12
-- # * Subnormal numbers are not supported yet - they are "flushed to zero" before entering the    #
13
-- #   actual FPU core.                                                                            #
14
-- # * Division and sqare root operations (FDIV, FSQRT) and fused multiply-accumulate operations   #
15
-- #   (F[N]MADD) are not supported yet - they will raise an illegal instruction exception.        #
16
-- # * Rounding mode <100> ("round to nearest, ties to max magnitude") is not supported yet.       #
17
-- # * Signaling NaNs (sNaN) will not be generated by the hardware at all. However, if inserted by #
18
-- #   the programmer they are handled correctly.                                                  #
19 52 zero_gravi
-- # ********************************************************************************************* #
20
-- # BSD 3-Clause License                                                                          #
21
-- #                                                                                               #
22
-- # Copyright (c) 2021, Stephan Nolting. All rights reserved.                                     #
23
-- #                                                                                               #
24
-- # Redistribution and use in source and binary forms, with or without modification, are          #
25
-- # permitted provided that the following conditions are met:                                     #
26
-- #                                                                                               #
27
-- # 1. Redistributions of source code must retain the above copyright notice, this list of        #
28
-- #    conditions and the following disclaimer.                                                   #
29
-- #                                                                                               #
30
-- # 2. Redistributions in binary form must reproduce the above copyright notice, this list of     #
31
-- #    conditions and the following disclaimer in the documentation and/or other materials        #
32
-- #    provided with the distribution.                                                            #
33
-- #                                                                                               #
34
-- # 3. Neither the name of the copyright holder nor the names of its contributors may be used to  #
35
-- #    endorse or promote products derived from this software without specific prior written      #
36
-- #    permission.                                                                                #
37
-- #                                                                                               #
38
-- # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS   #
39
-- # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF               #
40
-- # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE    #
41
-- # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,     #
42
-- # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE #
43
-- # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED    #
44
-- # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING     #
45
-- # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED  #
46
-- # OF THE POSSIBILITY OF SUCH DAMAGE.                                                            #
47
-- # ********************************************************************************************* #
48
-- # The NEORV32 Processor - https://github.com/stnolting/neorv32              (c) Stephan Nolting #
49
-- #################################################################################################
50
 
51
library ieee;
52
use ieee.std_logic_1164.all;
53
use ieee.numeric_std.all;
54
 
55
library neorv32;
56
use neorv32.neorv32_package.all;
57
 
58
entity neorv32_cpu_cp_fpu is
59
  port (
60
    -- global control --
61 53 zero_gravi
    clk_i    : in  std_ulogic; -- global clock, rising edge
62
    rstn_i   : in  std_ulogic; -- global reset, low-active, async
63
    ctrl_i   : in  std_ulogic_vector(ctrl_width_c-1 downto 0); -- main control bus
64
    start_i  : in  std_ulogic; -- trigger operation
65 52 zero_gravi
    -- data input --
66 56 zero_gravi
    cmp_i    : in  std_ulogic_vector(1 downto 0); -- comparator status
67 53 zero_gravi
    rs1_i    : in  std_ulogic_vector(data_width_c-1 downto 0); -- rf source 1
68
    rs2_i    : in  std_ulogic_vector(data_width_c-1 downto 0); -- rf source 2
69 52 zero_gravi
    -- result and status --
70 53 zero_gravi
    res_o    : out std_ulogic_vector(data_width_c-1 downto 0); -- operation result
71
    fflags_o : out std_ulogic_vector(4 downto 0); -- exception flags
72
    valid_o  : out std_ulogic -- data output valid
73 52 zero_gravi
  );
74
end neorv32_cpu_cp_fpu;
75
 
76
architecture neorv32_cpu_cp_fpu_rtl of neorv32_cpu_cp_fpu is
77
 
78 55 zero_gravi
  -- FPU core functions --
79
  constant op_class_c  : std_ulogic_vector(2 downto 0) := "000";
80
  constant op_comp_c   : std_ulogic_vector(2 downto 0) := "001";
81
  constant op_i2f_c    : std_ulogic_vector(2 downto 0) := "010";
82
  constant op_f2i_c    : std_ulogic_vector(2 downto 0) := "011";
83
  constant op_sgnj_c   : std_ulogic_vector(2 downto 0) := "100";
84
  constant op_minmax_c : std_ulogic_vector(2 downto 0) := "101";
85
  constant op_addsub_c : std_ulogic_vector(2 downto 0) := "110";
86
  constant op_mul_c    : std_ulogic_vector(2 downto 0) := "111";
87
 
88
  -- float-to-integer unit --
89
  component neorv32_cpu_cp_fpu_f2i
90
  port (
91
    -- control --
92
    clk_i      : in  std_ulogic; -- global clock, rising edge
93
    rstn_i     : in  std_ulogic; -- global reset, low-active, async
94
    start_i    : in  std_ulogic; -- trigger operation
95
    rmode_i    : in  std_ulogic_vector(02 downto 0); -- rounding mode
96
    funct_i    : in  std_ulogic; -- 0=signed, 1=unsigned
97
    -- input --
98
    sign_i     : in  std_ulogic; -- sign
99
    exponent_i : in  std_ulogic_vector(07 downto 0); -- exponent
100
    mantissa_i : in  std_ulogic_vector(22 downto 0); -- mantissa
101
    class_i    : in  std_ulogic_vector(09 downto 0); -- operand class
102
    -- output --
103
    result_o   : out std_ulogic_vector(31 downto 0); -- integer result
104
    flags_o    : out std_ulogic_vector(04 downto 0); -- exception flags
105
    done_o     : out std_ulogic -- operation done
106
  );
107
  end component;
108
 
109
  -- normalizer + rounding unit --
110
  component neorv32_cpu_cp_fpu_normalizer
111
  port (
112
    -- control --
113
    clk_i      : in  std_ulogic; -- global clock, rising edge
114
    rstn_i     : in  std_ulogic; -- global reset, low-active, async
115
    start_i    : in  std_ulogic; -- trigger operation
116
    rmode_i    : in  std_ulogic_vector(02 downto 0); -- rounding mode
117
    funct_i    : in  std_ulogic; -- operating mode (0=norm&round, 1=int-to-float)
118
    -- input --
119
    sign_i     : in  std_ulogic; -- sign
120
    exponent_i : in  std_ulogic_vector(08 downto 0); -- extended exponent
121
    mantissa_i : in  std_ulogic_vector(47 downto 0); -- extended mantissa
122
    integer_i  : in  std_ulogic_vector(31 downto 0); -- integer input
123
    class_i    : in  std_ulogic_vector(09 downto 0); -- input number class
124
    flags_i    : in  std_ulogic_vector(04 downto 0); -- exception flags input
125
    -- output --
126
    result_o   : out std_ulogic_vector(31 downto 0); -- result (float or int)
127
    flags_o    : out std_ulogic_vector(04 downto 0); -- exception flags
128
    done_o     : out std_ulogic -- operation done
129
  );
130
  end component;
131
 
132
  -- commands (one-hot) --
133
  type cmd_t is record
134
    instr_class  : std_ulogic;
135
    instr_sgnj   : std_ulogic;
136
    instr_comp   : std_ulogic;
137
    instr_i2f    : std_ulogic;
138
    instr_f2i    : std_ulogic;
139
    instr_minmax : std_ulogic;
140
    instr_addsub : std_ulogic;
141
    instr_mul    : std_ulogic;
142
    funct        : std_ulogic_vector(2 downto 0);
143
  end record;
144
  signal cmd : cmd_t;
145
  signal funct_ff : std_ulogic_vector(2 downto 0);
146
 
147
  -- co-processor control engine --
148
  type ctrl_state_t is (S_IDLE, S_BUSY);
149
  type ctrl_engine_t is record
150
    state : ctrl_state_t;
151
    start : std_ulogic;
152
    valid : std_ulogic;
153
  end record;
154
  signal ctrl_engine : ctrl_engine_t;
155
 
156
  -- floating-point operands --
157
  type op_data_t  is array (0 to 1) of std_ulogic_vector(31 downto 0);
158
  type op_class_t is array (0 to 1) of std_ulogic_vector(09 downto 0);
159
  type fpu_operands_t is record
160
    rs1       : std_ulogic_vector(31 downto 0); -- operand 1
161
    rs1_class : std_ulogic_vector(09 downto 0); -- operand 1 number class
162
    rs2       : std_ulogic_vector(31 downto 0); -- operand 2
163
    rs2_class : std_ulogic_vector(09 downto 0); -- operand 2 number class
164
    frm       : std_ulogic_vector(02 downto 0); -- rounding mode
165
  end record;
166
  signal op_data      : op_data_t;
167
  signal op_class     : op_class_t;
168
  signal fpu_operands : fpu_operands_t;
169
 
170
  -- floating-point comparator --
171 56 zero_gravi
  signal cmp_ff        : std_ulogic_vector(01 downto 0);
172 55 zero_gravi
  signal comp_equal_ff : std_ulogic;
173
  signal comp_less_ff  : std_ulogic;
174
 
175
  -- functional units interface --
176
  type fu_interface_t is record
177
    result : std_ulogic_vector(31 downto 0);
178
    flags  : std_ulogic_vector(04 downto 0);
179
    start  : std_ulogic;
180
    done   : std_ulogic;
181
  end record;
182
  signal fu_classify    : fu_interface_t;
183
  signal fu_compare     : fu_interface_t;
184
  signal fu_sign_inject : fu_interface_t;
185
  signal fu_min_max     : fu_interface_t;
186
  signal fu_conv_f2i    : fu_interface_t;
187
  signal fu_addsub      : fu_interface_t;
188
  signal fu_mul         : fu_interface_t;
189
  signal fu_core_done   : std_ulogic; -- FU operation completed
190
 
191
  -- integer-to-float --
192
  type fu_i2f_interface_t is record
193
    result : std_ulogic_vector(31 downto 0);
194
    sign   : std_ulogic;
195
    start  : std_ulogic;
196
    done   : std_ulogic;
197
  end record;
198
  signal fu_conv_i2f : fu_i2f_interface_t; -- float result
199
 
200
  -- multiplier unit --
201
  type multiplier_t is record
202
    opa       : unsigned(23 downto 0); -- mantissa A plus hidden one
203
    opb       : unsigned(23 downto 0); -- mantissa B plus hidden one
204
    buf_ff    : unsigned(47 downto 0); -- product buffer
205
    sign      : std_ulogic; -- resulting sign
206
    product   : std_ulogic_vector(47 downto 0); -- product
207
    exp_sum   : std_ulogic_vector(08 downto 0); -- incl 1x overflow/underflow bit
208
    exp_res   : std_ulogic_vector(09 downto 0); -- resulting exponent incl 2x overflow/underflow bit
209
    --
210
    res_class : std_ulogic_vector(09 downto 0);
211
    flags     : std_ulogic_vector(04 downto 0); -- exception flags
212
    --
213
    start     : std_ulogic;
214
    latency   : std_ulogic_vector(02 downto 0); -- unit latency
215
    done      : std_ulogic;
216
  end record;
217
  signal multiplier : multiplier_t;
218
 
219
  -- adder/subtractor unit --
220
  type addsub_t is record
221
    -- input comparison --
222
    exp_comp  : std_ulogic_vector(01 downto 0); -- equal & less
223
    small_exp : std_ulogic_vector(07 downto 0);
224
    small_man : std_ulogic_vector(23 downto 0); -- mantissa + hiden one
225
    large_exp : std_ulogic_vector(07 downto 0);
226
    large_man : std_ulogic_vector(23 downto 0); -- mantissa + hiden one
227
    -- smaller mantissa alginment --
228
    man_sreg  : std_ulogic_vector(23 downto 0); -- mantissa + hidden one
229
    man_g_ext : std_ulogic;
230
    man_r_ext : std_ulogic;
231
    man_s_ext : std_ulogic;
232
    exp_cnt   : std_ulogic_vector(08 downto 0);
233
    -- adder/subtractor stage --
234
    man_comp  : std_ulogic;
235
    man_s     : std_ulogic_vector(26 downto 0); -- mantissa + hiden one + GRS
236
    man_l     : std_ulogic_vector(26 downto 0); -- mantissa + hiden one + GRS
237
    add_stage : std_ulogic_vector(27 downto 0); -- adder result incl. overflow
238
    -- result --
239
    res_sign  : std_ulogic;
240
    res_sum   : std_ulogic_vector(27 downto 0); -- mantissa sum (+1 bit) + GRS bits (for rounding)
241
    res_class : std_ulogic_vector(09 downto 0);
242
    flags     : std_ulogic_vector(04 downto 0); -- exception flags
243
    -- arbitration --
244
    start     : std_ulogic;
245
    latency   : std_ulogic_vector(04 downto 0); -- unit latency
246
    done      : std_ulogic;
247
  end record;
248
  signal addsub : addsub_t;
249
 
250
  -- normalizer interface (normalization & rounding and int-to-float) --
251
  type normalizer_t is record
252
    start     : std_ulogic;
253
    mode      : std_ulogic;
254
    sign      : std_ulogic;
255
    xexp      : std_ulogic_vector(08 downto 0);
256
    xmantissa : std_ulogic_vector(47 downto 0);
257
    result    : std_ulogic_vector(31 downto 0);
258
    class     : std_ulogic_vector(09 downto 0);
259
    flags_in  : std_ulogic_vector(04 downto 0);
260
    flags_out : std_ulogic_vector(04 downto 0);
261
    done      : std_ulogic;
262
  end record;
263
  signal normalizer : normalizer_t;
264
 
265 52 zero_gravi
begin
266
 
267 55 zero_gravi
-- ****************************************************************************************************************************
268
-- Control
269
-- ****************************************************************************************************************************
270
 
271
  -- Instruction Decoding -------------------------------------------------------------------
272 52 zero_gravi
  -- -------------------------------------------------------------------------------------------
273 55 zero_gravi
  -- one-hot re-encoding --
274
  cmd.instr_class  <= '1' when (ctrl_i(ctrl_ir_funct12_11_c downto ctrl_ir_funct12_7_c) = "11100") else '0';
275
  cmd.instr_comp   <= '1' when (ctrl_i(ctrl_ir_funct12_11_c downto ctrl_ir_funct12_7_c) = "10100") else '0';
276
  cmd.instr_i2f    <= '1' when (ctrl_i(ctrl_ir_funct12_11_c downto ctrl_ir_funct12_7_c) = "11010") else '0';
277
  cmd.instr_f2i    <= '1' when (ctrl_i(ctrl_ir_funct12_11_c downto ctrl_ir_funct12_7_c) = "11000") else '0';
278
  cmd.instr_sgnj   <= '1' when (ctrl_i(ctrl_ir_funct12_11_c downto ctrl_ir_funct12_7_c) = "00100") else '0';
279
  cmd.instr_minmax <= '1' when (ctrl_i(ctrl_ir_funct12_11_c downto ctrl_ir_funct12_7_c) = "00101") else '0';
280
  cmd.instr_addsub <= '1' when (ctrl_i(ctrl_ir_funct12_11_c downto ctrl_ir_funct12_8_c) = "0000")  else '0';
281
  cmd.instr_mul    <= '1' when (ctrl_i(ctrl_ir_funct12_11_c downto ctrl_ir_funct12_7_c) = "00010") else '0';
282 52 zero_gravi
 
283 55 zero_gravi
  -- binary re-encoding --
284
  cmd.funct <= op_mul_c     when (cmd.instr_mul    = '1') else
285
               op_addsub_c  when (cmd.instr_addsub = '1') else
286
               op_minmax_c  when (cmd.instr_minmax = '1') else
287
               op_sgnj_c    when (cmd.instr_sgnj   = '1') else
288
               op_f2i_c     when (cmd.instr_f2i    = '1') else
289
               op_i2f_c     when (cmd.instr_i2f    = '1') else
290
               op_comp_c    when (cmd.instr_comp   = '1') else
291
               op_class_c;--when (cmd.instr_class  = '1') else (others => '-');
292 52 zero_gravi
 
293 55 zero_gravi
 
294
  -- Input Operands: Check for subnormal numbers (flush to zero) ----------------------------
295
  -- -------------------------------------------------------------------------------------------
296
  -- Subnormal numbers are not supported and are "flushed to zero"! FIXME / TODO
297
  -- rs1 --
298
  op_data(0)(31)           <= rs1_i(31);
299
  op_data(0)(30 downto 23) <= rs1_i(30 downto 23);
300
  op_data(0)(22 downto 00) <= (others => '0') when (rs1_i(30 downto 23) = "00000000") else rs1_i(22 downto 0); -- flush mantissa to zero if subnormal
301
  -- rs2 --
302
  op_data(1)(31)           <= rs2_i(31);
303
  op_data(1)(30 downto 23) <= rs2_i(30 downto 23);
304
  op_data(1)(22 downto 00) <= (others => '0') when (rs2_i(30 downto 23) = "00000000") else rs2_i(22 downto 0); -- flush mantissa to zero if subnormal
305
 
306
 
307
  -- Number Classifier ----------------------------------------------------------------------
308
  -- -------------------------------------------------------------------------------------------
309
  number_classifier: process(op_data)
310
    variable op_m_all_zero_v, op_e_all_zero_v, op_e_all_one_v       : std_ulogic;
311
    variable op_is_zero_v, op_is_inf_v, op_is_denorm_v, op_is_nan_v : std_ulogic;
312
  begin
313
    for i in 0 to 1 loop -- for rs1 and rs2 inputs
314
      -- check for all-zero/all-one --
315 60 zero_gravi
      op_m_all_zero_v := not or_reduce_f(op_data(i)(22 downto 00));
316
      op_e_all_zero_v := not or_reduce_f(op_data(i)(30 downto 23));
317
      op_e_all_one_v  := and_reduce_f(op_data(i)(30 downto 23));
318 55 zero_gravi
 
319
      -- check special cases --
320
      op_is_zero_v   := op_e_all_zero_v and      op_m_all_zero_v;  -- zero
321
      op_is_inf_v    := op_e_all_one_v  and      op_m_all_zero_v;  -- infinity
322 56 zero_gravi
      op_is_denorm_v := '0'; -- FIXME / TODO -- op_e_all_zero_v and (not op_m_all_zero_v); -- subnormal
323 55 zero_gravi
      op_is_nan_v    := op_e_all_one_v  and (not op_m_all_zero_v); -- NaN
324
 
325
      -- actual attributes --
326
      op_class(i)(fp_class_neg_inf_c)    <= op_data(i)(31) and op_is_inf_v; -- negative infinity
327
      op_class(i)(fp_class_neg_norm_c)   <= op_data(i)(31) and (not op_is_denorm_v) and (not op_is_nan_v) and (not op_is_inf_v) and (not op_is_zero_v); -- negative normal number
328
      op_class(i)(fp_class_neg_denorm_c) <= op_data(i)(31) and op_is_denorm_v; -- negative subnormal number
329
      op_class(i)(fp_class_neg_zero_c)   <= op_data(i)(31) and op_is_zero_v; -- negative zero
330
      op_class(i)(fp_class_pos_zero_c)   <= (not op_data(i)(31)) and op_is_zero_v; -- positive zero
331
      op_class(i)(fp_class_pos_denorm_c) <= (not op_data(i)(31)) and op_is_denorm_v; -- positive subnormal number
332
      op_class(i)(fp_class_pos_norm_c)   <= (not op_data(i)(31)) and (not op_is_denorm_v) and (not op_is_nan_v) and (not op_is_inf_v) and (not op_is_zero_v); -- positive normal number
333
      op_class(i)(fp_class_pos_inf_c)    <= (not op_data(i)(31)) and op_is_inf_v; -- positive infinity
334
      op_class(i)(fp_class_snan_c)       <= op_is_nan_v and (not op_data(i)(22)); -- signaling NaN
335
      op_class(i)(fp_class_qnan_c)       <= op_is_nan_v and (    op_data(i)(22)); -- quiet NaN
336
    end loop; -- i
337
  end process number_classifier;
338
 
339
 
340
  -- Co-Processor Control Engine ------------------------------------------------------------
341
  -- -------------------------------------------------------------------------------------------
342
  control_engine_fsm: process(rstn_i, clk_i)
343
  begin
344
    if (rstn_i = '0') then
345
      ctrl_engine.state      <= S_IDLE;
346
      ctrl_engine.start      <= '0';
347 56 zero_gravi
      fpu_operands.frm       <= (others => def_rst_val_c);
348
      fpu_operands.rs1       <= (others => def_rst_val_c);
349
      fpu_operands.rs1_class <= (others => def_rst_val_c);
350
      fpu_operands.rs2       <= (others => def_rst_val_c);
351
      fpu_operands.rs2_class <= (others => def_rst_val_c);
352
      funct_ff               <= (others => def_rst_val_c);
353
      cmp_ff                 <= (others => def_rst_val_c);
354 55 zero_gravi
    elsif rising_edge(clk_i) then
355
      -- arbiter defaults --
356
      ctrl_engine.valid <= '0';
357
      ctrl_engine.start <= '0';
358
 
359
      -- state machine --
360
      case ctrl_engine.state is
361
 
362
        when S_IDLE => -- waiting for operation trigger
363
        -- ------------------------------------------------------------
364
          funct_ff <= cmd.funct; -- actual operation to execute
365 56 zero_gravi
          cmp_ff   <= cmp_i; -- main ALU comparator
366 55 zero_gravi
          -- rounding mode --
367
          -- TODO / FIXME "round to nearest, ties to max magnitude" (0b100) is not supported yet
368
          if (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_0_c) = "111") then
369 61 zero_gravi
            fpu_operands.frm <= '0' & ctrl_i(ctrl_alu_frm1_c downto ctrl_alu_frm0_c);
370 55 zero_gravi
          else
371
            fpu_operands.frm <= '0' & ctrl_i(ctrl_ir_funct3_1_c downto ctrl_ir_funct3_0_c);
372
          end if;
373
          --
374
          if (start_i = '1') then
375
            -- operand data --
376
            fpu_operands.rs1       <= op_data(0);
377
            fpu_operands.rs1_class <= op_class(0);
378
            fpu_operands.rs2       <= op_data(1);
379
            fpu_operands.rs2_class <= op_class(1);
380
            -- execute! --
381
            ctrl_engine.start <= '1';
382
            ctrl_engine.state <= S_BUSY;
383
          end if;
384
 
385
        when S_BUSY => -- operation in progress (multi-cycle)
386
        -- -----------------------------------------------------------
387
          if (fu_core_done = '1') then -- processing done?
388
            ctrl_engine.valid <= '1';
389
            ctrl_engine.state <= S_IDLE;
390
          end if;
391
 
392
        when others => -- undefined
393
        -- ------------------------------------------------------------
394
          ctrl_engine.state <= S_IDLE;
395
 
396
      end case;
397
    end if;
398
  end process control_engine_fsm;
399
 
400
  -- operation done / valid output --
401
  valid_o <= ctrl_engine.valid;
402
 
403
 
404
  -- Functional Unit Interface (operation-start trigger) ------------------------------------
405
  -- -------------------------------------------------------------------------------------------
406
  fu_classify.start    <= ctrl_engine.start and cmd.instr_class;
407
  fu_compare.start     <= ctrl_engine.start and cmd.instr_comp;
408
  fu_sign_inject.start <= ctrl_engine.start and cmd.instr_sgnj;
409
  fu_min_max.start     <= ctrl_engine.start and cmd.instr_minmax;
410
  fu_conv_i2f.start    <= ctrl_engine.start and cmd.instr_i2f;
411
  fu_conv_f2i.start    <= ctrl_engine.start and cmd.instr_f2i;
412
  fu_addsub.start      <= ctrl_engine.start and cmd.instr_addsub;
413
  fu_mul.start         <= ctrl_engine.start and cmd.instr_mul;
414
 
415
 
416
-- ****************************************************************************************************************************
417
-- FPU Core - Functional Units
418
-- ****************************************************************************************************************************
419
 
420
  -- Number Classifier (FCLASS) -------------------------------------------------------------
421
  -- -------------------------------------------------------------------------------------------
422
  fu_classify.flags <= (others => '0'); -- does not generate flags at all
423
  fu_classify.result(31 downto 10) <= (others => '0');
424
  fu_classify.result(09 downto 00) <= fpu_operands.rs1_class;
425
  fu_classify.done <= fu_classify.start;
426
 
427
 
428
  -- Floating-Point Comparator --------------------------------------------------------------
429
  -- -------------------------------------------------------------------------------------------
430 56 zero_gravi
  float_comparator: process(rstn_i, clk_i)
431 55 zero_gravi
    variable cond_v : std_ulogic_vector(1 downto 0);
432
  begin
433 56 zero_gravi
    if (rstn_i = '0') then
434
      comp_equal_ff   <= def_rst_val_c;
435
      comp_less_ff    <= def_rst_val_c;
436
      fu_compare.done <= def_rst_val_c;
437
      fu_min_max.done <= def_rst_val_c;
438
    elsif rising_edge(clk_i) then
439 55 zero_gravi
      -- equal --
440
      if ((fpu_operands.rs1_class(fp_class_pos_inf_c)   = '1') and (fpu_operands.rs2_class(fp_class_pos_inf_c) = '1')) or -- +inf == +inf
441
         ((fpu_operands.rs1_class(fp_class_neg_inf_c)   = '1') and (fpu_operands.rs2_class(fp_class_neg_inf_c) = '1')) or -- -inf == -inf
442
         (((fpu_operands.rs1_class(fp_class_pos_zero_c) = '1') or (fpu_operands.rs1_class(fp_class_neg_zero_c) = '1')) and
443
          ((fpu_operands.rs2_class(fp_class_pos_zero_c) = '1') or (fpu_operands.rs2_class(fp_class_neg_zero_c) = '1'))) or  -- +/-zero == +/-zero
444 56 zero_gravi
         (cmp_ff(cmp_equal_c) = '1') then -- identical in every way (comparator result from main ALU)
445 55 zero_gravi
        comp_equal_ff <= '1';
446
      else
447
        comp_equal_ff <= '0';
448
      end if;
449
 
450
      -- less than --
451
      if ((fpu_operands.rs1_class(fp_class_pos_inf_c)  = '1') and (fpu_operands.rs2_class(fp_class_pos_inf_c) = '1')) or -- +inf !< +inf
452
         ((fpu_operands.rs1_class(fp_class_neg_inf_c)  = '1') and (fpu_operands.rs2_class(fp_class_neg_inf_c) = '1')) or -- -inf !< -inf
453
         (((fpu_operands.rs1_class(fp_class_pos_zero_c) = '1') or (fpu_operands.rs1_class(fp_class_neg_zero_c) = '1')) and
454
          ((fpu_operands.rs2_class(fp_class_pos_zero_c) = '1') or (fpu_operands.rs2_class(fp_class_neg_zero_c) = '1'))) then  -- +/-zero !< +/-zero
455
        comp_less_ff <= '0';
456
      else
457
        cond_v := fpu_operands.rs1(31) & fpu_operands.rs2(31);
458
        case cond_v is
459
          when "10"   => comp_less_ff <= '1'; -- rs1 negative, rs2 positive
460
          when "01"   => comp_less_ff <= '0'; -- rs1 positive, rs2 negative
461 56 zero_gravi
          when "00"   => comp_less_ff <= cmp_ff(cmp_less_c); -- both positive (comparator result from main ALU)
462
          when "11"   => comp_less_ff <= not cmp_ff(cmp_less_c); -- both negative (comparator result from main ALU)
463 55 zero_gravi
          when others => comp_less_ff <= '0'; -- undefined
464
        end case;
465
      end if;
466
 
467
      -- comparator latency --
468
      fu_compare.done <= fu_compare.start; -- for actual comparison operation
469
      fu_min_max.done <= fu_min_max.start; -- for min/max operations
470
    end if;
471
  end process float_comparator;
472
 
473
 
474
  -- Comparison (FEQ/FLT/FLE) ---------------------------------------------------------------
475
  -- -------------------------------------------------------------------------------------------
476
  float_comparison: process(fpu_operands, ctrl_i, comp_equal_ff, comp_less_ff)
477
    variable snan_v : std_ulogic; -- at least one input is sNaN
478
    variable qnan_v : std_ulogic; -- at least one input is qNaN
479
  begin
480
    -- check for NaN --
481
    snan_v := fpu_operands.rs1_class(fp_class_snan_c) or fpu_operands.rs2_class(fp_class_snan_c);
482
    qnan_v := fpu_operands.rs1_class(fp_class_qnan_c) or fpu_operands.rs2_class(fp_class_qnan_c);
483
 
484
    -- condition evaluation --
485
    fu_compare.result <= (others => '0');
486
    case ctrl_i(ctrl_ir_funct3_1_c downto ctrl_ir_funct3_0_c) is
487
      when "00" => -- FLE: less than or equal
488
        fu_compare.result(0) <= (comp_less_ff or comp_equal_ff) and (not (snan_v or qnan_v)); -- result is zero if either input is NaN
489
      when "01" => -- FLT: less than
490
        fu_compare.result(0) <= comp_less_ff and (not (snan_v or qnan_v)); -- result is zero if either input is NaN
491
      when "10" => -- FEQ: equal
492
        fu_compare.result(0) <= comp_equal_ff and (not (snan_v or qnan_v)); -- result is zero if either input is NaN
493
      when others => -- undefined
494
        fu_compare.result(0) <= '0';
495
    end case;
496
  end process float_comparison;
497
 
498
  -- latency --
499
  -- -> done in "float_comparator"
500
 
501
  -- exceptions --
502
  fu_compare.flags <= (others => '0'); -- does not generate exceptions here, but normalizer can generate exceptions
503
 
504
 
505
  -- Min/Max Select (FMIN/FMAX) -------------------------------------------------------------
506
  -- -------------------------------------------------------------------------------------------
507 61 zero_gravi
  min_max_select: process(fpu_operands, comp_less_ff, fu_compare, ctrl_i)
508 55 zero_gravi
    variable cond_v : std_ulogic_vector(2 downto 0);
509
  begin
510
    -- comparison restul - check for special cases: -0 is less than +0
511
    if ((fpu_operands.rs1_class(fp_class_neg_zero_c) = '1') and (fpu_operands.rs2_class(fp_class_pos_zero_c) = '1')) then
512
      cond_v(0) := ctrl_i(ctrl_ir_funct3_0_c);
513
    elsif ((fpu_operands.rs1_class(fp_class_pos_zero_c) = '1') and (fpu_operands.rs2_class(fp_class_neg_zero_c) = '1')) then
514
      cond_v(0) := not ctrl_i(ctrl_ir_funct3_0_c);
515
    else -- "normal= comparison
516
      cond_v(0) := comp_less_ff xnor ctrl_i(ctrl_ir_funct3_0_c); -- min/max select
517
    end if;
518
 
519
    -- nmumber NaN check --
520
    cond_v(2) := fpu_operands.rs1_class(fp_class_snan_c) or fpu_operands.rs1_class(fp_class_qnan_c);
521
    cond_v(1) := fpu_operands.rs2_class(fp_class_snan_c) or fpu_operands.rs2_class(fp_class_qnan_c);
522
 
523
    -- data output --
524
    case cond_v is
525
      when "000"         => fu_min_max.result <= fpu_operands.rs1;
526
      when "001"         => fu_min_max.result <= fpu_operands.rs2;
527
      when "010" | "011" => fu_min_max.result <= fpu_operands.rs1; -- if one input is NaN output the non-NaN one
528
      when "100" | "101" => fu_min_max.result <= fpu_operands.rs2; -- if one input is NaN output the non-NaN one
529
      when others        => fu_min_max.result <= fp_single_qnan_c; -- output quiet NaN if both inputs are NaN
530
    end case;
531
  end process min_max_select;
532
 
533
  -- latency --
534
  -- -> done in "float_comparator"
535
 
536
  -- exceptions --
537
  fu_min_max.flags <= (others => '0'); -- does not generate exceptions here, but normalizer can generate exceptions
538
 
539
 
540
  -- Convert: Float to [unsigned] Integer (FCVT.S.W) ----------------------------------------
541
  -- -------------------------------------------------------------------------------------------
542
  neorv32_cpu_cp_fpu_f2i_inst: neorv32_cpu_cp_fpu_f2i
543
  port map (
544
    -- control --
545
    clk_i      => clk_i,                          -- global clock, rising edge
546
    rstn_i     => rstn_i,                         -- global reset, low-active, async
547
    start_i    => fu_conv_f2i.start,              -- trigger operation
548
    rmode_i    => fpu_operands.frm,               -- rounding mode
549
    funct_i    => ctrl_i(ctrl_ir_funct12_0_c),    -- 0=signed, 1=unsigned
550
    -- input --
551
    sign_i     => fpu_operands.rs1(31),           -- sign
552
    exponent_i => fpu_operands.rs1(30 downto 23), -- exponent
553
    mantissa_i => fpu_operands.rs1(22 downto 00), -- mantissa
554
    class_i    => fpu_operands.rs1_class,         -- operand class
555
    -- output --
556
    result_o   => fu_conv_f2i.result,             -- integer result
557
    flags_o    => fu_conv_f2i.flags,              -- exception flags
558
    done_o     => fu_conv_f2i.done                -- operation done
559
  );
560
 
561
 
562
  -- Sign-Injection (FSGNJ) -----------------------------------------------------------------
563
  -- -------------------------------------------------------------------------------------------
564
  sign_injector: process(ctrl_i, fpu_operands)
565
  begin
566
    case ctrl_i(ctrl_ir_funct3_1_c downto ctrl_ir_funct3_0_c) is
567
      when "00"   => fu_sign_inject.result(31) <= fpu_operands.rs2(31); -- FSGNJ
568
      when "01"   => fu_sign_inject.result(31) <= not fpu_operands.rs2(31); -- FSGNJN
569
      when "10"   => fu_sign_inject.result(31) <= fpu_operands.rs1(31) xor fpu_operands.rs2(31); -- FSGNJX
570
      when others => fu_sign_inject.result(31) <= fpu_operands.rs2(31); -- undefined
571
    end case;
572
    fu_sign_inject.result(30 downto 0) <= fpu_operands.rs1(30 downto 0);
573
    fu_sign_inject.flags <= (others => '0'); -- does not generate flags
574
  end process sign_injector;
575
 
576
  -- latency --
577
  fu_sign_inject.done <= fu_sign_inject.start;
578
 
579
 
580
  -- Convert: [unsigned] Integer to Float (FCVT.W.S) ----------------------------------------
581
  -- -------------------------------------------------------------------------------------------
582 56 zero_gravi
  convert_i2f: process(rstn_i, clk_i)
583 55 zero_gravi
  begin
584
    -- this process only computes the absolute input value
585
    -- the actual conversion is done by the normalizer
586 56 zero_gravi
    if (rstn_i = '0') then
587
      fu_conv_i2f.result <= (others => def_rst_val_c);
588
      fu_conv_i2f.sign   <= def_rst_val_c;
589
    elsif rising_edge(clk_i) then
590 55 zero_gravi
      if (ctrl_i(ctrl_ir_funct12_0_c) = '0') and (rs1_i(31) = '1') then -- convert signed integer
591
        fu_conv_i2f.result <= std_ulogic_vector(0 - unsigned(rs1_i));
592
        fu_conv_i2f.sign   <= rs1_i(31); -- original sign
593
      else -- convert unsigned integer
594
        fu_conv_i2f.result <= rs1_i;
595
        fu_conv_i2f.sign   <= '0';
596
      end if;
597
      fu_conv_i2f.done <= fu_conv_i2f.start; -- actual conversion is done by the normalizer unit
598
    end if;
599
  end process convert_i2f;
600
 
601
 
602
  -- Multiplier Core (FMUL) -----------------------------------------------------------------
603
  -- -------------------------------------------------------------------------------------------
604 56 zero_gravi
  multiplier_core: process(rstn_i, clk_i)
605
  begin
606
    if (rstn_i = '0') then
607
      multiplier.opa                <= (others => '-'); -- these might be DSP regs!
608
      multiplier.opb                <= (others => '-'); -- these might be DSP regs!
609
      multiplier.buf_ff             <= (others => '-'); -- these might be DSP regs!
610
      multiplier.product            <= (others => '-'); -- these might be DSP regs!
611
      multiplier.sign               <= def_rst_val_c;
612
      multiplier.exp_res            <= (others => def_rst_val_c);
613
      multiplier.flags(fp_exc_of_c) <= def_rst_val_c;
614
      multiplier.flags(fp_exc_uf_c) <= def_rst_val_c;
615
      multiplier.flags(fp_exc_nv_c) <= def_rst_val_c;
616
      multiplier.latency            <= (others => def_rst_val_c);
617
    elsif rising_edge(clk_i) then
618 55 zero_gravi
      -- multiplier core --
619
      if (multiplier.start = '1') then -- FIXME / TODO remove buffer?
620
        multiplier.opa <= unsigned('1' & fpu_operands.rs1(22 downto 0)); -- append hidden one
621 56 zero_gravi
        multiplier.opb <= unsigned('1' & fpu_operands.rs2(22 downto 0)); -- append hidden one
622 55 zero_gravi
      end if;
623
      multiplier.buf_ff  <= multiplier.opa * multiplier.opb;
624
      multiplier.product <= std_ulogic_vector(multiplier.buf_ff(47 downto 0)); -- let the register balancing do the magic here
625
      multiplier.sign    <= fpu_operands.rs1(31) xor fpu_operands.rs2(31); -- resulting sign
626
 
627
      -- exponent computation --
628
      multiplier.exp_res <= std_ulogic_vector(unsigned('0' & multiplier.exp_sum) - 127);
629
      if (multiplier.exp_res(multiplier.exp_res'left) = '1') then -- underflow (exp_res is "negative")
630
        multiplier.flags(fp_exc_of_c) <= '0';
631
        multiplier.flags(fp_exc_uf_c) <= '1';
632
      elsif (multiplier.exp_res(multiplier.exp_res'left-1) = '1') then -- overflow
633
        multiplier.flags(fp_exc_of_c) <= '1';
634
        multiplier.flags(fp_exc_uf_c) <= '0';
635
      else
636
        multiplier.flags(fp_exc_of_c) <= '0';
637
        multiplier.flags(fp_exc_uf_c) <= '0';
638
      end if;
639
 
640
      -- invalid operation --
641
      multiplier.flags(fp_exc_nv_c) <=
642
        ((fpu_operands.rs1_class(fp_class_pos_zero_c) or fpu_operands.rs1_class(fp_class_neg_zero_c)) and
643
         (fpu_operands.rs2_class(fp_class_pos_inf_c)  or fpu_operands.rs2_class(fp_class_neg_inf_c))) or -- mul(+/-zero, +/-inf)
644
        ((fpu_operands.rs1_class(fp_class_pos_inf_c)  or fpu_operands.rs1_class(fp_class_neg_inf_c)) and
645
         (fpu_operands.rs2_class(fp_class_pos_zero_c) or fpu_operands.rs2_class(fp_class_neg_zero_c))); -- mul(+/-inf, +/-zero)
646
 
647
      -- latency shift register --
648
      multiplier.latency <= multiplier.latency(multiplier.latency'left-1 downto 0) & multiplier.start;
649
    end if;
650
  end process multiplier_core;
651
 
652
  -- exponent sum --
653
  multiplier.exp_sum <= std_ulogic_vector(unsigned('0' & fpu_operands.rs1(30 downto 23)) + unsigned('0' & fpu_operands.rs2(30 downto 23)));
654
 
655
  -- latency --
656
  multiplier.start <= fu_mul.start;
657
  multiplier.done  <= multiplier.latency(multiplier.latency'left);
658
  fu_mul.done      <= multiplier.done;
659
 
660
  -- unused exception flags --
661
  multiplier.flags(fp_exc_dz_c) <= '0'; -- division by zero: not possible here
662
  multiplier.flags(fp_exc_nx_c) <= '0'; -- inexcat: not possible here
663
 
664
 
665
  -- result class -- 
666 56 zero_gravi
  multiplier_class_core: process(rstn_i, clk_i)
667 55 zero_gravi
    variable a_pos_norm_v, a_neg_norm_v, b_pos_norm_v, b_neg_norm_v : std_ulogic;
668
    variable a_pos_subn_v, a_neg_subn_v, b_pos_subn_v, b_neg_subn_v : std_ulogic;
669
    variable a_pos_zero_v, a_neg_zero_v, b_pos_zero_v, b_neg_zero_v : std_ulogic;
670
    variable a_pos_inf_v,  a_neg_inf_v,  b_pos_inf_v,  b_neg_inf_v  : std_ulogic;
671
    variable a_snan_v,     a_qnan_v,     b_snan_v,     b_qnan_v     : std_ulogic;
672
  begin
673 56 zero_gravi
    if (rstn_i = '0') then
674
      multiplier.res_class(fp_class_pos_norm_c) <= def_rst_val_c;
675
      multiplier.res_class(fp_class_neg_norm_c) <= def_rst_val_c;
676
      multiplier.res_class(fp_class_pos_inf_c)  <= def_rst_val_c;
677
      multiplier.res_class(fp_class_neg_inf_c)  <= def_rst_val_c;
678
      multiplier.res_class(fp_class_pos_zero_c) <= def_rst_val_c;
679
      multiplier.res_class(fp_class_neg_zero_c) <= def_rst_val_c;
680
    elsif rising_edge(clk_i) then
681 55 zero_gravi
      -- minions --
682
      a_pos_norm_v := fpu_operands.rs1_class(fp_class_pos_norm_c);    b_pos_norm_v := fpu_operands.rs2_class(fp_class_pos_norm_c);
683
      a_neg_norm_v := fpu_operands.rs1_class(fp_class_neg_norm_c);    b_neg_norm_v := fpu_operands.rs2_class(fp_class_neg_norm_c);
684
      a_pos_subn_v := fpu_operands.rs1_class(fp_class_pos_denorm_c);  b_pos_subn_v := fpu_operands.rs2_class(fp_class_pos_denorm_c);
685
      a_neg_subn_v := fpu_operands.rs1_class(fp_class_neg_denorm_c);  b_neg_subn_v := fpu_operands.rs2_class(fp_class_neg_denorm_c);
686
      a_pos_zero_v := fpu_operands.rs1_class(fp_class_pos_zero_c);    b_pos_zero_v := fpu_operands.rs2_class(fp_class_pos_zero_c);
687
      a_neg_zero_v := fpu_operands.rs1_class(fp_class_neg_zero_c);    b_neg_zero_v := fpu_operands.rs2_class(fp_class_neg_zero_c);
688
      a_pos_inf_v  := fpu_operands.rs1_class(fp_class_pos_inf_c);     b_pos_inf_v  := fpu_operands.rs2_class(fp_class_pos_inf_c);
689
      a_neg_inf_v  := fpu_operands.rs1_class(fp_class_neg_inf_c);     b_neg_inf_v  := fpu_operands.rs2_class(fp_class_neg_inf_c);
690
      a_snan_v     := fpu_operands.rs1_class(fp_class_snan_c);        b_snan_v     := fpu_operands.rs2_class(fp_class_snan_c);
691
      a_qnan_v     := fpu_operands.rs1_class(fp_class_qnan_c);        b_qnan_v     := fpu_operands.rs2_class(fp_class_qnan_c);
692
 
693
      -- +normal --
694
      multiplier.res_class(fp_class_pos_norm_c) <=
695
        (a_pos_norm_v and b_pos_norm_v) or -- +norm * +norm
696
        (a_neg_norm_v and b_neg_norm_v);   -- -norm * -norm
697
      -- -normal --
698
      multiplier.res_class(fp_class_neg_norm_c) <=
699
        (a_pos_norm_v and b_neg_norm_v) or -- +norm * -norm
700
        (a_neg_norm_v and b_pos_norm_v);   -- -norm * +norm
701
 
702
      -- +infinity --
703
      multiplier.res_class(fp_class_pos_inf_c) <=
704
        (a_pos_inf_v  and b_pos_inf_v)  or -- +inf    * +inf
705
        (a_neg_inf_v  and b_neg_inf_v)  or -- -inf    * -inf
706
        (a_pos_norm_v and b_pos_inf_v)  or -- +norm   * +inf
707
        (a_pos_inf_v  and b_pos_norm_v) or -- +inf    * +norm
708
        (a_neg_norm_v and b_neg_inf_v)  or -- -norm   * -inf
709
        (a_neg_inf_v  and b_neg_norm_v) or -- -inf    * -norm
710
        (a_neg_subn_v and b_neg_inf_v)  or -- -denorm * -inf
711
        (a_neg_inf_v  and b_neg_subn_v);   -- -inf    * -denorm
712
      -- -infinity --
713
      multiplier.res_class(fp_class_neg_inf_c) <=
714
        (a_pos_inf_v  and b_neg_inf_v)  or -- +inf    * -inf
715
        (a_neg_inf_v  and b_pos_inf_v)  or -- -inf    * +inf
716
        (a_pos_norm_v and b_neg_inf_v)  or -- +norm   * -inf
717
        (a_neg_inf_v  and b_pos_norm_v) or -- -inf    * +norm
718
        (a_neg_norm_v and b_pos_inf_v)  or -- -norm   * +inf
719
        (a_pos_inf_v  and b_neg_norm_v) or -- +inf    * -norm
720
        (a_pos_subn_v and b_neg_inf_v)  or -- +denorm * -inf
721
        (a_neg_inf_v  and b_pos_subn_v) or -- -inf    * +de-norm
722
        (a_neg_subn_v and b_pos_inf_v)  or -- -denorm * +inf
723
        (a_pos_inf_v  and b_neg_subn_v);   -- +inf    * -de-norm
724
 
725
      -- +zero --
726
      multiplier.res_class(fp_class_pos_zero_c) <=
727
        (a_pos_zero_v and b_pos_zero_v) or -- +zero   * +zero
728
        (a_pos_zero_v and b_pos_norm_v) or -- +zero   * +norm
729
        (a_pos_zero_v and b_pos_subn_v) or -- +zero   * +denorm
730
        (a_neg_zero_v and b_neg_zero_v) or -- -zero   * -zero
731
        (a_neg_zero_v and b_neg_norm_v) or -- -zero   * -norm
732
        (a_neg_zero_v and b_neg_subn_v) or -- -zero   * -denorm
733
        (a_pos_norm_v and b_pos_zero_v) or -- +norm   * +zero
734
        (a_pos_subn_v and b_pos_zero_v) or -- +denorm * +zero
735
        (a_neg_norm_v and b_neg_zero_v) or -- -norm   * -zero
736
        (a_neg_subn_v and b_neg_zero_v);   -- -denorm * -zero
737
 
738
      -- -zero --
739
      multiplier.res_class(fp_class_neg_zero_c) <=
740
        (a_pos_zero_v and b_neg_zero_v) or -- +zero   * -zero
741
        (a_pos_zero_v and b_neg_norm_v) or -- +zero   * -norm
742
        (a_pos_zero_v and b_neg_subn_v) or -- +zero   * -denorm
743
        (a_neg_zero_v and b_pos_zero_v) or -- -zero   * +zero
744
        (a_neg_zero_v and b_pos_norm_v) or -- -zero   * +norm
745
        (a_neg_zero_v and b_pos_subn_v) or -- -zero   * +denorm
746
        (a_neg_norm_v and b_pos_zero_v) or -- -norm   * +zero
747
        (a_neg_subn_v and b_pos_zero_v) or -- -denorm * +zero
748
        (a_pos_norm_v and b_neg_zero_v) or -- +norm   * -zero
749
        (a_pos_subn_v and b_neg_zero_v);   -- +denorm * -zero
750
 
751
      -- sNaN --
752
      multiplier.res_class(fp_class_snan_c) <= (a_snan_v or b_snan_v); -- any input is sNaN
753
      -- qNaN --
754
      multiplier.res_class(fp_class_qnan_c) <=
755
        (a_snan_v or b_snan_v) or -- any input is sNaN
756
        (a_qnan_v or b_qnan_v) or -- nay input is qNaN
757
        ((a_pos_inf_v  or a_neg_inf_v)  and (b_pos_zero_v or b_neg_zero_v)) or -- +/-inf * +/-zero
758
        ((a_pos_zero_v or a_neg_zero_v) and (b_pos_inf_v  or b_neg_inf_v));    -- +/-zero * +/-inf
759
    end if;
760
  end process multiplier_class_core;
761
 
762
  -- subnormal result --
763
  multiplier.res_class(fp_class_pos_denorm_c) <= '0'; -- is evaluated by the normalizer
764
  multiplier.res_class(fp_class_neg_denorm_c) <= '0'; -- is evaluated by the normalizer
765
 
766
  -- unused --
767
  fu_mul.result <= (others => '0');
768
  fu_mul.flags  <= (others => '0');
769
 
770
 
771
  -- Adder/Subtractor Core (FADD, FSUB) -----------------------------------------------------
772
  -- -------------------------------------------------------------------------------------------
773 56 zero_gravi
  adder_subtractor_core: process(rstn_i, clk_i)
774
  begin
775
    if (rstn_i = '0') then
776
      addsub.latency   <= (others => def_rst_val_c);
777
      addsub.exp_comp  <= (others => def_rst_val_c);
778
      addsub.man_sreg  <= (others => def_rst_val_c);
779
      addsub.exp_cnt   <= (others => def_rst_val_c);
780
      addsub.man_g_ext <= def_rst_val_c;
781
      addsub.man_r_ext <= def_rst_val_c;
782
      addsub.man_s_ext <= def_rst_val_c;
783
      addsub.man_comp  <= def_rst_val_c;
784
      addsub.add_stage <= (others => def_rst_val_c);
785
      addsub.res_sign  <= def_rst_val_c;
786
      addsub.flags(fp_exc_nv_c) <= def_rst_val_c;
787
    elsif rising_edge(clk_i) then
788 55 zero_gravi
      -- arbitration / latency --
789
      if (ctrl_engine.state = S_IDLE) then -- hacky "reset"
790
        addsub.latency <= (others => '0');
791
      else
792
        addsub.latency(0) <= addsub.start; -- input comparator delay
793
        if (addsub.latency(0) = '1') then
794
          addsub.latency(1) <= '1';
795
          addsub.latency(2) <= '0';
796
        elsif (addsub.exp_cnt(7 downto 0) = addsub.large_exp) then -- radix point not yet aligned
797
          addsub.latency(1) <= '0';
798
          addsub.latency(2) <= addsub.latency(1) and (not addsub.latency(0)); -- "shift done"
799
        end if;
800
        addsub.latency(3) <= addsub.latency(2); -- adder stage
801
        addsub.latency(4) <= addsub.latency(3); -- final stage
802
      end if;
803
 
804
      -- exponent check: find smaller number (radix-offset-only) --
805
      if (unsigned(fpu_operands.rs1(30 downto 23)) < unsigned(fpu_operands.rs2(30 downto 23))) then
806
        addsub.exp_comp(0) <= '1'; -- rs1 < rs2
807
      else
808
        addsub.exp_comp(0) <= '0'; -- rs1 >= rs2
809
      end if;
810
      if (unsigned(fpu_operands.rs1(30 downto 23)) = unsigned(fpu_operands.rs2(30 downto 23))) then
811
        addsub.exp_comp(1) <= '1'; -- rs1 == rs2
812
      else -- rs1 != rs2
813
        addsub.exp_comp(1) <= '0';
814
      end if;
815
 
816
      -- shift right small mantissa to align radix point --
817
      if (addsub.latency(0) = '1') then
818
        if ((fpu_operands.rs1_class(fp_class_pos_zero_c) or fpu_operands.rs2_class(fp_class_pos_zero_c) or
819
             fpu_operands.rs1_class(fp_class_neg_zero_c) or fpu_operands.rs2_class(fp_class_neg_zero_c)) = '0') then -- no input is zero
820
          addsub.man_sreg <= addsub.small_man;
821
        else
822
          addsub.man_sreg <= (others => '0');
823
        end if;
824
        addsub.exp_cnt   <= '0' & addsub.small_exp;
825
        addsub.man_g_ext <= '0';
826
        addsub.man_r_ext <= '0';
827
        addsub.man_s_ext <= '0';
828
      elsif (addsub.exp_cnt(7 downto 0) /= addsub.large_exp) then -- shift right until same magnitude
829
        addsub.man_sreg  <= '0' & addsub.man_sreg(addsub.man_sreg'left downto 1);
830
        addsub.man_g_ext <= addsub.man_sreg(0);
831
        addsub.man_r_ext <= addsub.man_g_ext;
832
        addsub.man_s_ext <= addsub.man_s_ext or addsub.man_r_ext; -- sticky bit
833
        addsub.exp_cnt   <= std_ulogic_vector(unsigned(addsub.exp_cnt) + 1);
834
      end if;
835
 
836
      -- mantissa check: find smaller number (magnitude-only) --
837
      if (unsigned(addsub.man_sreg) <= unsigned(addsub.large_man)) then
838
        addsub.man_comp <= '1';
839
      else
840
        addsub.man_comp <= '0';
841
      end if;
842
 
843
      -- actual addition/subtraction (incl. overflow) --
844
      if ((ctrl_i(ctrl_ir_funct12_7_c) xor (fpu_operands.rs1(31) xor fpu_operands.rs2(31))) = '0') then -- add
845
        addsub.add_stage <= std_ulogic_vector(unsigned('0' & addsub.man_l) + unsigned('0' & addsub.man_s));
846
      else -- sub
847
        addsub.add_stage <= std_ulogic_vector(unsigned('0' & addsub.man_l) - unsigned('0' & addsub.man_s));
848
      end if;
849
 
850
      -- result sign --
851
      if (ctrl_i(ctrl_ir_funct12_7_c) = '0') then -- add
852
        if (fpu_operands.rs1(31) = fpu_operands.rs2(31)) then -- identical signs
853
          addsub.res_sign <= fpu_operands.rs1(31);
854
        else -- different signs
855
          if (addsub.exp_comp(1) = '1') then -- exp are equal (also check relation of mantissas)
856
            addsub.res_sign <= fpu_operands.rs1(31) xor (not addsub.man_comp);
857
          else
858
            addsub.res_sign <= fpu_operands.rs1(31) xor addsub.exp_comp(0);
859
          end if;
860
        end if;
861
      else -- sub
862
        if (fpu_operands.rs1(31) = fpu_operands.rs2(31)) then -- identical signs
863
          if (addsub.exp_comp(1) = '1') then -- exp are equal (also check relation of mantissas)
864
            addsub.res_sign <= fpu_operands.rs1(31) xor (not addsub.man_comp);
865
          else
866
            addsub.res_sign <= fpu_operands.rs1(31) xor addsub.exp_comp(0);
867
          end if;
868
        else -- different signs
869
          addsub.res_sign <= fpu_operands.rs1(31);
870
        end if;
871
      end if;
872
 
873
      -- exception flags --
874
      addsub.flags(fp_exc_nv_c) <= ((fpu_operands.rs1_class(fp_class_pos_inf_c) or fpu_operands.rs1_class(fp_class_neg_inf_c)) and
875
                                    (fpu_operands.rs2_class(fp_class_pos_inf_c) or fpu_operands.rs2_class(fp_class_neg_inf_c))); -- +/-inf +/- +/-inf
876
    end if;
877
  end process adder_subtractor_core;
878
 
879
  -- exceptions - unused -- 
880
  addsub.flags(fp_exc_dz_c) <= '0'; -- division by zero -> not possible
881
  addsub.flags(fp_exc_of_c) <= '0'; -- not possible here (but may occur in normalizer)
882
  addsub.flags(fp_exc_uf_c) <= '0'; -- not possible here (but may occur in normalizer)
883
  addsub.flags(fp_exc_nx_c) <= '0'; -- not possible here (but may occur in normalizer)
884
 
885
  -- exponent check: find smaller number (magnitude-only) --
886
  addsub.small_exp <=        fpu_operands.rs1(30 downto 23)  when (addsub.exp_comp(0) = '1') else        fpu_operands.rs2(30 downto 23);
887
  addsub.large_exp <=        fpu_operands.rs2(30 downto 23)  when (addsub.exp_comp(0) = '1') else        fpu_operands.rs1(30 downto 23);
888
  addsub.small_man <= ('1' & fpu_operands.rs1(22 downto 00)) when (addsub.exp_comp(0) = '1') else ('1' & fpu_operands.rs2(22 downto 00));
889
  addsub.large_man <= ('1' & fpu_operands.rs2(22 downto 00)) when (addsub.exp_comp(0) = '1') else ('1' & fpu_operands.rs1(22 downto 00));
890
 
891
  -- mantissa check: find smaller number (magnitude-only) --
892
  addsub.man_s <= (addsub.man_sreg & addsub.man_g_ext & addsub.man_r_ext & addsub.man_s_ext) when (addsub.man_comp = '1') else (addsub.large_man & "000");
893
  addsub.man_l <= (addsub.large_man & "000") when (addsub.man_comp = '1') else (addsub.man_sreg & addsub.man_g_ext & addsub.man_r_ext & addsub.man_s_ext);
894
 
895
  -- latency --
896
  addsub.start   <= fu_addsub.start;
897
  addsub.done    <= addsub.latency(addsub.latency'left);
898
  fu_addsub.done <= addsub.done;
899
 
900
  -- mantissa result --
901
  addsub.res_sum <= addsub.add_stage(27 downto 0);
902
 
903
 
904
  -- result class -- 
905 56 zero_gravi
  adder_subtractor_class_core: process(rstn_i, clk_i)
906 55 zero_gravi
    variable a_pos_norm_v, a_neg_norm_v, b_pos_norm_v, b_neg_norm_v : std_ulogic;
907
    variable a_pos_subn_v, a_neg_subn_v, b_pos_subn_v, b_neg_subn_v : std_ulogic;
908
    variable a_pos_zero_v, a_neg_zero_v, b_pos_zero_v, b_neg_zero_v : std_ulogic;
909
    variable a_pos_inf_v,  a_neg_inf_v,  b_pos_inf_v,  b_neg_inf_v  : std_ulogic;
910
    variable a_snan_v,     a_qnan_v,     b_snan_v,     b_qnan_v     : std_ulogic;
911
  begin
912 56 zero_gravi
    if (rstn_i = '0') then
913
      addsub.res_class(fp_class_pos_inf_c)  <= def_rst_val_c;
914
      addsub.res_class(fp_class_neg_inf_c)  <= def_rst_val_c;
915
      addsub.res_class(fp_class_pos_zero_c) <= def_rst_val_c;
916
      addsub.res_class(fp_class_neg_zero_c) <= def_rst_val_c;
917
      addsub.res_class(fp_class_qnan_c)     <= def_rst_val_c;
918
    elsif rising_edge(clk_i) then
919 55 zero_gravi
      -- minions --
920
      a_pos_norm_v := fpu_operands.rs1_class(fp_class_pos_norm_c);    b_pos_norm_v := fpu_operands.rs2_class(fp_class_pos_norm_c);
921
      a_neg_norm_v := fpu_operands.rs1_class(fp_class_neg_norm_c);    b_neg_norm_v := fpu_operands.rs2_class(fp_class_neg_norm_c);
922
      a_pos_subn_v := fpu_operands.rs1_class(fp_class_pos_denorm_c);  b_pos_subn_v := fpu_operands.rs2_class(fp_class_pos_denorm_c);
923
      a_neg_subn_v := fpu_operands.rs1_class(fp_class_neg_denorm_c);  b_neg_subn_v := fpu_operands.rs2_class(fp_class_neg_denorm_c);
924
      a_pos_zero_v := fpu_operands.rs1_class(fp_class_pos_zero_c);    b_pos_zero_v := fpu_operands.rs2_class(fp_class_pos_zero_c);
925
      a_neg_zero_v := fpu_operands.rs1_class(fp_class_neg_zero_c);    b_neg_zero_v := fpu_operands.rs2_class(fp_class_neg_zero_c);
926
      a_pos_inf_v  := fpu_operands.rs1_class(fp_class_pos_inf_c);     b_pos_inf_v  := fpu_operands.rs2_class(fp_class_pos_inf_c);
927
      a_neg_inf_v  := fpu_operands.rs1_class(fp_class_neg_inf_c);     b_neg_inf_v  := fpu_operands.rs2_class(fp_class_neg_inf_c);
928
      a_snan_v     := fpu_operands.rs1_class(fp_class_snan_c);        b_snan_v     := fpu_operands.rs2_class(fp_class_snan_c);
929
      a_qnan_v     := fpu_operands.rs1_class(fp_class_qnan_c);        b_qnan_v     := fpu_operands.rs2_class(fp_class_qnan_c);
930
 
931
      if (ctrl_i(ctrl_ir_funct12_7_c) = '0') then -- addition
932
        -- +infinity --
933
        addsub.res_class(fp_class_pos_inf_c) <=
934
          (a_pos_inf_v  and b_pos_inf_v)  or -- +inf    + +inf
935
          (a_pos_inf_v  and b_pos_zero_v) or -- +inf    + +zero
936
          (a_pos_zero_v and b_pos_inf_v)  or -- +zero   + +inf
937
          (a_pos_inf_v  and b_neg_zero_v) or -- +inf    + -zero
938
          (a_neg_zero_v and b_pos_inf_v)  or -- -zero   + +inf
939
          --
940
          (a_pos_inf_v  and b_pos_norm_v) or -- +inf    + +norm
941
          (a_pos_norm_v and b_pos_inf_v)  or -- +norm   + +inf
942
          (a_pos_inf_v  and b_pos_subn_v) or -- +inf    + +denorm
943
          (a_pos_subn_v and b_pos_inf_v)  or -- +denorm + +inf
944
          --
945
          (a_pos_inf_v  and b_neg_norm_v) or -- +inf    + -norm
946
          (a_neg_norm_v and b_pos_inf_v)  or -- -norm   + +inf
947
          (a_pos_inf_v  and b_neg_subn_v) or -- +inf    + -denorm
948
          (a_neg_subn_v and b_pos_inf_v);    -- -denorm + +inf
949
        -- -infinity --
950
        addsub.res_class(fp_class_neg_inf_c) <=
951
          (a_neg_inf_v  and b_neg_inf_v)  or -- -inf    + -inf
952
          (a_neg_inf_v  and b_pos_zero_v) or -- -inf    + +zero
953
          (a_pos_zero_v and b_neg_inf_v)  or -- +zero   + -inf
954
          (a_neg_inf_v  and b_neg_zero_v) or -- -inf    + -zero
955
          (a_neg_zero_v and b_neg_inf_v)  or -- -zero   + -inf
956
          --
957
          (a_neg_inf_v  and b_pos_norm_v) or -- -inf    + +norm
958
          (a_pos_norm_v and b_neg_inf_v)  or -- +norm   + -inf
959
          (a_neg_inf_v  and b_neg_norm_v) or -- -inf    + -norm
960
          (a_neg_norm_v and b_neg_inf_v)  or -- -norm   + -inf
961
          --
962
          (a_neg_inf_v  and b_pos_subn_v) or -- -inf    + +denorm
963
          (a_pos_subn_v and b_neg_inf_v)  or -- +denorm + -inf
964
          (a_neg_inf_v  and b_neg_subn_v) or -- -inf    + -denorm
965
          (a_neg_subn_v and b_neg_inf_v);    -- -denorm + -inf
966
 
967
        -- +zero --
968
        addsub.res_class(fp_class_pos_zero_c) <=
969
          (a_pos_zero_v and b_pos_zero_v) or -- +zero + +zero
970
          (a_pos_zero_v and b_neg_zero_v) or -- +zero + -zero
971
          (a_neg_zero_v and b_pos_zero_v);   -- -zero + +zero
972
        -- -zero --
973
        addsub.res_class(fp_class_neg_zero_c) <=
974
          (a_neg_zero_v and b_neg_zero_v);   -- -zero + -zero
975
 
976
        -- qNaN --
977
        addsub.res_class(fp_class_qnan_c) <=
978
          (a_snan_v    or  b_snan_v)    or -- any input is sNaN
979
          (a_qnan_v    or  b_qnan_v)    or -- any input is qNaN
980
          (a_pos_inf_v and b_neg_inf_v) or -- +inf + -inf
981
          (a_neg_inf_v and b_pos_inf_v);   -- -inf + +inf
982
 
983
      else -- subtraction
984
        -- +infinity --
985
        addsub.res_class(fp_class_pos_inf_c) <=
986
          (a_pos_inf_v  and b_neg_inf_v)  or -- +inf    - -inf
987
          (a_pos_inf_v  and b_pos_zero_v) or -- +inf    - +zero
988
          (a_pos_inf_v  and b_neg_zero_v) or -- +inf    - -zero
989
          (a_pos_inf_v  and b_pos_norm_v) or -- +inf    - +norm
990
          (a_pos_inf_v  and b_pos_subn_v) or -- +inf    - +denorm
991
          (a_pos_inf_v  and b_neg_norm_v) or -- +inf    - -norm
992
          (a_pos_inf_v  and b_neg_subn_v) or -- +inf    - -denorm
993
          --
994
          (a_pos_zero_v and b_neg_inf_v)  or -- +zero   - -inf
995
          (a_neg_zero_v and b_neg_inf_v)  or -- -zero   - -inf
996
          --
997
          (a_pos_norm_v and b_neg_inf_v)  or -- +norm   - -inf
998
          (a_pos_subn_v and b_neg_inf_v)  or -- +denorm - -inf
999
          (a_neg_norm_v and b_neg_inf_v)  or -- -norm   - -inf
1000
          (a_neg_subn_v and b_neg_inf_v);    -- -denorm - -inf
1001
        -- -infinity --
1002
        addsub.res_class(fp_class_neg_inf_c) <=
1003
          (a_neg_inf_v  and b_pos_inf_v)  or -- -inf    - +inf
1004
          (a_neg_inf_v  and b_pos_zero_v) or -- -inf    - +zero
1005
          (a_neg_inf_v  and b_neg_zero_v) or -- -inf    - -zero
1006
          (a_neg_inf_v  and b_pos_norm_v) or -- -inf    - +norm
1007
          (a_neg_inf_v  and b_pos_subn_v) or -- -inf    - +denorm
1008
          (a_neg_inf_v  and b_neg_norm_v) or -- -inf    - -norm
1009
          (a_neg_inf_v  and b_neg_subn_v) or -- -inf    - -denorm
1010
          --
1011
          (a_pos_zero_v and b_pos_inf_v)  or -- +zero   - +inf
1012
          (a_neg_zero_v and b_pos_inf_v)  or -- -zero   - +inf
1013
          --
1014
          (a_pos_norm_v and b_pos_inf_v)  or -- +norm   - +inf
1015
          (a_pos_subn_v and b_pos_inf_v)  or -- +denorm - +inf
1016
          (a_neg_norm_v and b_pos_inf_v)  or -- -norm   - +inf
1017
          (a_neg_subn_v and b_pos_inf_v);    -- -denorm - +inf
1018
 
1019
        -- +zero --
1020
        addsub.res_class(fp_class_pos_zero_c) <=
1021
          (a_pos_zero_v and b_pos_zero_v) or -- +zero - +zero
1022
          (a_pos_zero_v and b_neg_zero_v) or -- +zero - -zero
1023
          (a_neg_zero_v and b_neg_zero_v);   -- -zero - -zero
1024
        -- -zero --
1025
        addsub.res_class(fp_class_neg_zero_c) <=
1026
          (a_neg_zero_v and b_pos_zero_v);   -- -zero - +zero
1027
 
1028
        -- qNaN --
1029
        addsub.res_class(fp_class_qnan_c) <=
1030
          (a_snan_v    or  b_snan_v)    or -- any input is sNaN
1031
          (a_qnan_v    or  b_qnan_v)    or -- any input is qNaN
1032
          (a_pos_inf_v and b_pos_inf_v) or -- +inf - +inf
1033
          (a_neg_inf_v and b_neg_inf_v);   -- -inf - -inf
1034
      end if;
1035
 
1036
      -- normal --
1037
      addsub.res_class(fp_class_pos_norm_c) <= (a_pos_norm_v or a_neg_norm_v) and (b_pos_norm_v or b_neg_norm_v); -- +/-norm +/- +-/norm [sign is irrelevant here]
1038
      addsub.res_class(fp_class_neg_norm_c) <= (a_pos_norm_v or a_neg_norm_v) and (b_pos_norm_v or b_neg_norm_v); -- +/-norm +/- +-/norm [sign is irrelevant here]
1039
 
1040
      -- sNaN --
1041
      addsub.res_class(fp_class_snan_c) <= (a_snan_v or b_snan_v); -- any input is sNaN
1042
    end if;
1043
  end process adder_subtractor_class_core;
1044
 
1045
  -- subnormal result --
1046
  addsub.res_class(fp_class_pos_denorm_c) <= '0'; -- is evaluated by the normalizer
1047
  addsub.res_class(fp_class_neg_denorm_c) <= '0'; -- is evaluated by the normalizer
1048
 
1049
  -- unused --
1050
  fu_addsub.result <= (others => '0');
1051
  fu_addsub.flags  <= (others => '0');
1052
 
1053
 
1054
-- ****************************************************************************************************************************
1055
-- FPU Core - Normalize & Round
1056
-- ****************************************************************************************************************************
1057
 
1058
  -- Normalizer Input -----------------------------------------------------------------------
1059
  -- -------------------------------------------------------------------------------------------
1060
  normalizer_input_select: process(funct_ff, addsub, multiplier, fu_conv_i2f)
1061
  begin
1062
    case funct_ff is
1063
      when op_addsub_c => -- addition/subtraction
1064
        normalizer.mode      <= '0'; -- normalization
1065
        normalizer.sign      <= addsub.res_sign;
1066
        normalizer.xexp      <= addsub.exp_cnt;
1067
        normalizer.xmantissa(47 downto 23) <= addsub.res_sum(27 downto 3);
1068
        normalizer.xmantissa(22) <= addsub.res_sum(2);
1069
        normalizer.xmantissa(21) <= addsub.res_sum(1);
1070
        normalizer.xmantissa(20 downto 01) <= (others => '0');
1071
        normalizer.xmantissa(00) <= addsub.res_sum(0);
1072
        normalizer.class     <= addsub.res_class;
1073
        normalizer.flags_in  <= addsub.flags;
1074
        normalizer.start     <= addsub.done;
1075
      when op_mul_c => -- multiplication
1076
        normalizer.mode      <= '0'; -- normalization
1077
        normalizer.sign      <= multiplier.sign;
1078
        normalizer.xexp      <= '0' & multiplier.exp_res(7 downto 0);
1079
        normalizer.xmantissa <= multiplier.product;
1080
        normalizer.class     <= multiplier.res_class;
1081
        normalizer.flags_in  <= multiplier.flags;
1082
        normalizer.start     <= multiplier.done;
1083
      when others => -- op_i2f_c
1084
        normalizer.mode      <= '1'; -- int_to_float
1085
        normalizer.sign      <= fu_conv_i2f.sign;
1086
        normalizer.xexp      <= "001111111"; -- bias = 127
1087
        normalizer.xmantissa <= (others => '0'); -- don't care
1088
        normalizer.class     <= (others => '0'); -- don't care
1089
        normalizer.flags_in  <= (others => '0'); -- no flags yet
1090
        normalizer.start     <= fu_conv_i2f.done;
1091
    end case;
1092
  end process normalizer_input_select;
1093
 
1094
 
1095
  -- Normalizer & Rounding Unit -------------------------------------------------------------
1096
  -- -------------------------------------------------------------------------------------------
1097
  neorv32_cpu_cp_fpu_normalizer_inst: neorv32_cpu_cp_fpu_normalizer
1098
  port map (
1099
    -- control --
1100
    clk_i      => clk_i,                -- global clock, rising edge
1101
    rstn_i     => rstn_i,               -- global reset, low-active, async
1102
    start_i    => normalizer.start,     -- trigger operation
1103
    rmode_i    => fpu_operands.frm,     -- rounding mode
1104
    funct_i    => normalizer.mode,      -- operation mode
1105
    -- input --
1106
    sign_i     => normalizer.sign,      -- sign
1107
    exponent_i => normalizer.xexp,      -- extended exponent
1108
    mantissa_i => normalizer.xmantissa, -- extended mantissa
1109
    integer_i  => fu_conv_i2f.result,   -- integer input
1110
    class_i    => normalizer.class,     -- input number class
1111
    flags_i    => normalizer.flags_in,  -- exception flags input
1112
    -- output --
1113
    result_o   => normalizer.result,    -- result (float or int)
1114
    flags_o    => normalizer.flags_out, -- exception flags
1115
    done_o     => normalizer.done       -- operation done
1116
  );
1117
 
1118
 
1119
-- ****************************************************************************************************************************
1120
-- FPU Core - Result
1121
-- ****************************************************************************************************************************
1122
 
1123
  -- Result Output to CPU Pipeline ----------------------------------------------------------
1124
  -- -------------------------------------------------------------------------------------------
1125 56 zero_gravi
  output_gate: process(rstn_i, clk_i)
1126 55 zero_gravi
  begin
1127 56 zero_gravi
    if (rstn_i = '0') then
1128
      res_o    <= (others => def_rst_val_c);
1129
      fflags_o <= (others => def_rst_val_c);
1130
    elsif rising_edge(clk_i) then
1131 55 zero_gravi
      if (ctrl_engine.valid = '1') then
1132
        case funct_ff is
1133
          when op_class_c =>
1134
            res_o    <= fu_classify.result;
1135
            fflags_o <= fu_classify.flags;
1136
          when op_comp_c =>
1137
            res_o    <= fu_compare.result;
1138
            fflags_o <= fu_compare.flags;
1139
          when op_f2i_c =>
1140
            res_o    <= fu_conv_f2i.result;
1141
            fflags_o <= fu_conv_f2i.flags;
1142
          when op_sgnj_c =>
1143
            res_o    <= fu_sign_inject.result;
1144
            fflags_o <= fu_sign_inject.flags;
1145
          when op_minmax_c =>
1146
            res_o    <= fu_min_max.result;
1147
            fflags_o <= fu_min_max.flags;
1148
          when others => -- op_mul_c, op_addsub_c, op_i2f_c, ...
1149
            res_o    <= normalizer.result;
1150
            fflags_o <= normalizer.flags_out;
1151
        end case;
1152
      else
1153
        res_o    <= (others => '0');
1154
        fflags_o <= (others => '0');
1155
      end if;
1156
    end if;
1157
  end process output_gate;
1158
 
1159
  -- operation done --
1160
  fu_core_done <= fu_compare.done or fu_classify.done or fu_sign_inject.done or fu_min_max.done or normalizer.done or fu_conv_f2i.done;
1161
 
1162
 
1163 52 zero_gravi
end neorv32_cpu_cp_fpu_rtl;
1164 55 zero_gravi
 
1165
-- ###########################################################################################################################################
1166
-- ###########################################################################################################################################
1167
 
1168
-- #################################################################################################
1169
-- # << NEORV32 - Single-Precision Floating-Point Unit: Normalizer and Rounding Unit >>            #
1170
-- # ********************************************************************************************* #
1171
-- # This unit also performs integer-to-float conversions.                                         #
1172
-- # ********************************************************************************************* #
1173
-- # BSD 3-Clause License                                                                          #
1174
-- #                                                                                               #
1175
-- # Copyright (c) 2021, Stephan Nolting. All rights reserved.                                     #
1176
-- #                                                                                               #
1177
-- # Redistribution and use in source and binary forms, with or without modification, are          #
1178
-- # permitted provided that the following conditions are met:                                     #
1179
-- #                                                                                               #
1180
-- # 1. Redistributions of source code must retain the above copyright notice, this list of        #
1181
-- #    conditions and the following disclaimer.                                                   #
1182
-- #                                                                                               #
1183
-- # 2. Redistributions in binary form must reproduce the above copyright notice, this list of     #
1184
-- #    conditions and the following disclaimer in the documentation and/or other materials        #
1185
-- #    provided with the distribution.                                                            #
1186
-- #                                                                                               #
1187
-- # 3. Neither the name of the copyright holder nor the names of its contributors may be used to  #
1188
-- #    endorse or promote products derived from this software without specific prior written      #
1189
-- #    permission.                                                                                #
1190
-- #                                                                                               #
1191
-- # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS   #
1192
-- # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF               #
1193
-- # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE    #
1194
-- # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,     #
1195
-- # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE #
1196
-- # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED    #
1197
-- # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING     #
1198
-- # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED  #
1199
-- # OF THE POSSIBILITY OF SUCH DAMAGE.                                                            #
1200
-- # ********************************************************************************************* #
1201
-- # The NEORV32 Processor - https://github.com/stnolting/neorv32              (c) Stephan Nolting #
1202
-- #################################################################################################
1203
 
1204
library ieee;
1205
use ieee.std_logic_1164.all;
1206
use ieee.numeric_std.all;
1207
 
1208
library neorv32;
1209
use neorv32.neorv32_package.all;
1210
 
1211
entity neorv32_cpu_cp_fpu_normalizer is
1212
  port (
1213
    -- control --
1214
    clk_i      : in  std_ulogic; -- global clock, rising edge
1215
    rstn_i     : in  std_ulogic; -- global reset, low-active, async
1216
    start_i    : in  std_ulogic; -- trigger operation
1217
    rmode_i    : in  std_ulogic_vector(02 downto 0); -- rounding mode
1218
    funct_i    : in  std_ulogic; -- operating mode (0=norm&round, 1=int-to-float)
1219
    -- input --
1220
    sign_i     : in  std_ulogic; -- sign
1221
    exponent_i : in  std_ulogic_vector(08 downto 0); -- extended exponent
1222
    mantissa_i : in  std_ulogic_vector(47 downto 0); -- extended mantissa
1223
    integer_i  : in  std_ulogic_vector(31 downto 0); -- integer input
1224
    class_i    : in  std_ulogic_vector(09 downto 0); -- input number class
1225
    flags_i    : in  std_ulogic_vector(04 downto 0); -- exception flags input
1226
    -- output --
1227
    result_o   : out std_ulogic_vector(31 downto 0); -- float result
1228
    flags_o    : out std_ulogic_vector(04 downto 0); -- exception flags output
1229
    done_o     : out std_ulogic -- operation done
1230
  );
1231
end neorv32_cpu_cp_fpu_normalizer;
1232
 
1233
architecture neorv32_cpu_cp_fpu_normalizer_rtl of neorv32_cpu_cp_fpu_normalizer is
1234
 
1235
  -- controller --
1236
  type ctrl_engine_state_t is (S_IDLE, S_PREPARE_I2F, S_CHECK_I2F, S_PREPARE_NORM, S_PREPARE_SHIFT, S_NORMALIZE_BUSY, S_ROUND, S_CHECK, S_FINALIZE);
1237
  type ctrl_t is record
1238
    state   : ctrl_engine_state_t; -- current state
1239
    norm_r  : std_ulogic; -- normalization round 0 or 1
1240
    cnt     : std_ulogic_vector(08 downto 0); -- interation counter/exponent (incl. overflow)
1241
    cnt_pre : std_ulogic_vector(08 downto 0);
1242
    cnt_of  : std_ulogic; -- counter overflow
1243
    cnt_uf  : std_ulogic; -- counter underflow
1244
    rounded : std_ulogic; -- output is rounded
1245
    res_sgn : std_ulogic;
1246
    res_exp : std_ulogic_vector(07 downto 0);
1247
    res_man : std_ulogic_vector(22 downto 0);
1248
    class   : std_ulogic_vector(09 downto 0);
1249
    flags   : std_ulogic_vector(04 downto 0);
1250
  end record;
1251
  signal ctrl : ctrl_t;
1252
 
1253
  -- normalization shift register --
1254
  type sreg_t is record
1255
    done  : std_ulogic;
1256
    dir   : std_ulogic; -- shift direction: 0=right, 1=left
1257
    zero  : std_ulogic;
1258
    upper : std_ulogic_vector(31 downto 0);
1259
    lower : std_ulogic_vector(22 downto 0);
1260
    ext_g : std_ulogic; -- guard bit
1261
    ext_r : std_ulogic; -- round bit
1262
    ext_s : std_ulogic; -- sticky bit
1263
  end record;
1264
  signal sreg : sreg_t;
1265
 
1266
  -- rounding unit --
1267
  type round_t is record
1268
    en     : std_ulogic; -- enable rounding
1269
    sub    : std_ulogic; -- 0=decrement, 1=increment
1270
    output : std_ulogic_vector(24 downto 0); -- mantissa size + hidden one + 1
1271
  end record;
1272
  signal round : round_t;
1273
 
1274
begin
1275
 
1276
  -- Control Engine -------------------------------------------------------------------------
1277
  -- -------------------------------------------------------------------------------------------
1278
  ctrl_engine: process(rstn_i, clk_i)
1279
  begin
1280
    if (rstn_i = '0') then
1281
      ctrl.state   <= S_IDLE;
1282 56 zero_gravi
      ctrl.norm_r  <= def_rst_val_c;
1283
      ctrl.cnt     <= (others => def_rst_val_c);
1284
      ctrl.cnt_pre <= (others => def_rst_val_c);
1285
      ctrl.cnt_of  <= def_rst_val_c;
1286
      ctrl.cnt_uf  <= def_rst_val_c;
1287
      ctrl.rounded <= def_rst_val_c;
1288
      ctrl.res_exp <= (others => def_rst_val_c);
1289
      ctrl.res_man <= (others => def_rst_val_c);
1290
      ctrl.res_sgn <= def_rst_val_c;
1291
      ctrl.class   <= (others => def_rst_val_c);
1292
      ctrl.flags   <= (others => def_rst_val_c);
1293 55 zero_gravi
      --
1294 56 zero_gravi
      sreg.upper   <= (others => def_rst_val_c);
1295
      sreg.lower   <= (others => def_rst_val_c);
1296
      sreg.dir     <= def_rst_val_c;
1297
      sreg.ext_g   <= def_rst_val_c;
1298
      sreg.ext_r   <= def_rst_val_c;
1299
      sreg.ext_s   <= def_rst_val_c;
1300 55 zero_gravi
      --
1301
      done_o       <= '0';
1302
    elsif rising_edge(clk_i) then
1303
      -- defaults --
1304
      ctrl.cnt_pre <= ctrl.cnt;
1305
      done_o       <= '0';
1306
 
1307
      -- exponent counter underflow/overflow --
1308
      if ((ctrl.cnt_pre(8 downto 7) = "01") and (ctrl.cnt(8 downto 7) = "10")) then -- overflow
1309
        ctrl.cnt_of <= '1';
1310
      elsif (ctrl.cnt_pre(8 downto 7) = "00") and (ctrl.cnt(8 downto 7) = "11") then -- underflow
1311
        ctrl.cnt_uf <= '1';
1312
      end if;
1313
 
1314
      -- fsm --
1315
      case ctrl.state is
1316
 
1317
        when S_IDLE => -- wait for operation trigger
1318
        -- ------------------------------------------------------------
1319
          ctrl.norm_r  <= '0'; -- start with first normalization
1320
          ctrl.rounded <= '0'; -- not rounded yet
1321
          ctrl.cnt_of  <= '0';
1322
          ctrl.cnt_uf  <= '0';
1323
          --
1324
          if (start_i = '1') then
1325
            ctrl.cnt     <= exponent_i;
1326
            ctrl.res_sgn <= sign_i;
1327
            ctrl.class   <= class_i;
1328
            ctrl.flags   <= flags_i;
1329
            if (funct_i = '0') then -- float -> float
1330
              ctrl.state <= S_PREPARE_NORM;
1331
            else -- integer -> float
1332
              ctrl.state <= S_PREPARE_I2F;
1333
            end if;
1334
          end if;
1335
 
1336
        when S_PREPARE_I2F => -- prepare integer-to-float conversion
1337
        -- ------------------------------------------------------------
1338
          sreg.upper <= integer_i;
1339
          sreg.lower <= (others => '0');
1340
          sreg.ext_g <= '0';
1341
          sreg.ext_r <= '0';
1342
          sreg.ext_s <= '0';
1343
          sreg.dir   <= '0'; -- shift right
1344
          ctrl.state <= S_CHECK_I2F;
1345
 
1346
        when S_CHECK_I2F => -- check if converting zero
1347
        -- ------------------------------------------------------------
1348
          if (sreg.zero = '1') then -- all zero
1349
            ctrl.class(fp_class_pos_zero_c) <= '1';
1350
            ctrl.state <= S_FINALIZE;
1351
          else
1352
            ctrl.state <= S_NORMALIZE_BUSY;
1353
          end if;
1354
 
1355
        when S_PREPARE_NORM => -- prepare "normal" normalization & rounding
1356
        -- ------------------------------------------------------------
1357
          sreg.upper(31 downto 02) <= (others => '0');
1358
          sreg.upper(01 downto 00) <= mantissa_i(47 downto 46);
1359
          sreg.lower <= mantissa_i(45 downto 23);
1360
          sreg.ext_g <= mantissa_i(22);
1361
          sreg.ext_r <= mantissa_i(21);
1362 60 zero_gravi
          sreg.ext_s <= or_reduce_f(mantissa_i(20 downto 0));
1363 55 zero_gravi
          -- check for special cases --
1364
          if ((ctrl.class(fp_class_snan_c)       or ctrl.class(fp_class_qnan_c)       or -- NaN
1365
               ctrl.class(fp_class_neg_zero_c)   or ctrl.class(fp_class_pos_zero_c)   or -- zero
1366
               ctrl.class(fp_class_neg_denorm_c) or ctrl.class(fp_class_pos_denorm_c) or -- subnormal
1367
               ctrl.class(fp_class_neg_inf_c)    or ctrl.class(fp_class_pos_inf_c)    or -- infinity
1368
               ctrl.flags(fp_exc_uf_c) or -- underflow
1369
               ctrl.flags(fp_exc_of_c) or -- overflow
1370
               ctrl.flags(fp_exc_nv_c)) = '1') then -- invalid
1371
            ctrl.state <= S_FINALIZE;
1372
          else
1373
            ctrl.state <= S_PREPARE_SHIFT;
1374
          end if;
1375
 
1376
        when S_PREPARE_SHIFT => -- prepare shift direction (for "normal" normalization only)
1377
        -- ------------------------------------------------------------
1378
          if (sreg.zero = '0') then -- number < 1.0
1379
            sreg.dir <= '0'; -- shift right
1380
          else -- number >= 1.0
1381
            sreg.dir <= '1'; -- shift left
1382
          end if;
1383
          ctrl.state <= S_NORMALIZE_BUSY;
1384
 
1385
        when S_NORMALIZE_BUSY => -- running normalization cycle
1386
        -- ------------------------------------------------------------
1387
          -- shift until normalized or exception --
1388
          if (sreg.done = '1') or (ctrl.cnt_uf = '1') or (ctrl.cnt_of = '1') then
1389
            -- normalization control --
1390
            ctrl.norm_r <= '1';
1391
            if (ctrl.norm_r = '0') then -- first normalization cycle done
1392
              ctrl.state <= S_ROUND;
1393
            else -- second normalization cycle done
1394
              ctrl.state <= S_CHECK;
1395
            end if;
1396
          else
1397
            if (sreg.dir = '0') then -- shift right
1398
              ctrl.cnt   <= std_ulogic_vector(unsigned(ctrl.cnt) + 1);
1399
              sreg.upper <= '0' & sreg.upper(sreg.upper'left downto 1);
1400
              sreg.lower <= sreg.upper(0) & sreg.lower(sreg.lower'left downto 1);
1401
              sreg.ext_g <= sreg.lower(0);
1402
              sreg.ext_r <= sreg.ext_g;
1403
              sreg.ext_s <= sreg.ext_r or sreg.ext_s; -- sticky bit
1404
            else -- shift left
1405
              ctrl.cnt   <= std_ulogic_vector(unsigned(ctrl.cnt) - 1);
1406
              sreg.upper <= sreg.upper(sreg.upper'left-1 downto 0) & sreg.lower(sreg.lower'left);
1407
              sreg.lower <= sreg.lower(sreg.lower'left-1 downto 0) & sreg.ext_g;
1408
              sreg.ext_g <= sreg.ext_r;
1409
              sreg.ext_r <= sreg.ext_s;
1410
              sreg.ext_s <= sreg.ext_s; -- sticky bit
1411
            end if;
1412
          end if;
1413
 
1414
        when S_ROUND => -- rounding cycle (after first normalization)
1415
        -- ------------------------------------------------------------
1416
          ctrl.rounded <= ctrl.rounded or round.en;
1417
          sreg.upper(31 downto 02) <= (others => '0');
1418
          sreg.upper(01 downto 00) <= round.output(24 downto 23);
1419
          sreg.lower <= round.output(22 downto 00);
1420
          sreg.ext_g <= '0';
1421
          sreg.ext_r <= '0';
1422
          sreg.ext_s <= '0';
1423
          ctrl.state <= S_PREPARE_SHIFT;
1424
 
1425
        when S_CHECK => -- check for overflow/underflow
1426
        -- ------------------------------------------------------------
1427
          if (ctrl.cnt_uf = '1') then -- underflow
1428
            ctrl.flags(fp_exc_uf_c) <= '1';
1429
          elsif (ctrl.cnt_of = '1') then -- overflow
1430
            ctrl.flags(fp_exc_of_c) <= '1';
1431
          elsif (ctrl.cnt(7 downto 0) = x"00") then -- subnormal
1432
            ctrl.flags(fp_exc_uf_c) <= '1';
1433
          elsif (ctrl.cnt(7 downto 0) = x"FF") then -- infinity
1434
            ctrl.flags(fp_exc_of_c) <= '1';
1435
          end if;
1436
          ctrl.state  <= S_FINALIZE;
1437
 
1438
        when S_FINALIZE => -- result finalization
1439
        -- ------------------------------------------------------------
1440
          -- generate result word (the ORDER of checks is imporatant here!) --
1441
          if (ctrl.class(fp_class_snan_c) = '1') or (ctrl.class(fp_class_qnan_c) = '1') then -- sNaN / qNaN
1442
            ctrl.res_sgn <= fp_single_qnan_c(31);
1443
            ctrl.res_exp <= fp_single_qnan_c(30 downto 23);
1444
            ctrl.res_man <= fp_single_qnan_c(22 downto 00);
1445
          elsif (ctrl.class(fp_class_neg_inf_c) = '1') or (ctrl.class(fp_class_pos_inf_c) = '1') or -- infinity
1446
                (ctrl.flags(fp_exc_of_c) = '1') then -- overflow
1447
            ctrl.res_exp <= fp_single_pos_inf_c(30 downto 23); -- keep original sign
1448
            ctrl.res_man <= fp_single_pos_inf_c(22 downto 00);
1449
          elsif (ctrl.class(fp_class_neg_zero_c) = '1') or (ctrl.class(fp_class_pos_zero_c) = '1') then -- zero
1450
            ctrl.res_sgn <= ctrl.class(fp_class_neg_zero_c);
1451
            ctrl.res_exp <= fp_single_pos_zero_c(30 downto 23);
1452
            ctrl.res_man <= fp_single_pos_zero_c(22 downto 00);
1453
          elsif (ctrl.flags(fp_exc_uf_c) = '1') or -- underflow
1454
                (sreg.zero = '1') or (ctrl.class(fp_class_neg_denorm_c) = '1') or (ctrl.class(fp_class_pos_denorm_c) = '1') then -- denormalized (flush-to-zero)
1455
            ctrl.res_exp <= fp_single_pos_zero_c(30 downto 23); -- keep original sign
1456
            ctrl.res_man <= fp_single_pos_zero_c(22 downto 00);
1457
          else -- result is ok
1458
            ctrl.res_exp <= ctrl.cnt(7 downto 0);
1459
            ctrl.res_man <= sreg.lower;
1460
          end if;
1461
          -- generate exception flags --
1462
          ctrl.flags(fp_exc_nv_c) <= ctrl.flags(fp_exc_nv_c) or ctrl.class(fp_class_snan_c); -- invalid if input is SIGNALING NaN
1463
          ctrl.flags(fp_exc_nx_c) <= ctrl.flags(fp_exc_nx_c) or ctrl.rounded; -- inexcat if result is rounded
1464
          --
1465
          done_o     <= '1';
1466
          ctrl.state <= S_IDLE;
1467
 
1468
        when others => -- undefined
1469
        -- ------------------------------------------------------------
1470
          ctrl.state <= S_IDLE;
1471
 
1472
      end case;
1473
    end if;
1474
  end process ctrl_engine;
1475
 
1476
  -- stop shifting when normalized --
1477 60 zero_gravi
  sreg.done <= (not or_reduce_f(sreg.upper(sreg.upper'left downto 1))) and sreg.upper(0); -- input is zero, hidden one is set
1478 55 zero_gravi
 
1479
  -- all-zero including hidden bit --
1480 60 zero_gravi
  sreg.zero <= not or_reduce_f(sreg.upper);
1481 55 zero_gravi
 
1482
  -- result --
1483
  result_o(31)           <= ctrl.res_sgn;
1484
  result_o(30 downto 23) <= ctrl.res_exp;
1485
  result_o(22 downto  0) <= ctrl.res_man;
1486
 
1487
  -- exception flags --
1488
  flags_o(fp_exc_nv_c) <= ctrl.flags(fp_exc_nv_c); -- invalid operation
1489
  flags_o(fp_exc_dz_c) <= ctrl.flags(fp_exc_dz_c); -- divide by zero
1490
  flags_o(fp_exc_of_c) <= ctrl.flags(fp_exc_of_c); -- overflow
1491
  flags_o(fp_exc_uf_c) <= ctrl.flags(fp_exc_uf_c); -- underflow
1492
  flags_o(fp_exc_nx_c) <= ctrl.flags(fp_exc_nx_c); -- inexact
1493
 
1494
 
1495
  -- Rounding -------------------------------------------------------------------------------
1496
  -- -------------------------------------------------------------------------------------------
1497
  rounding_unit_ctrl: process(rmode_i, sreg)
1498
  begin
1499
    -- defaults --
1500
    round.en  <= '0';
1501
    round.sub <= '0';
1502
    -- rounding mode --
1503
    case rmode_i(2 downto 0) is
1504
      when "000" => -- round to nearest, ties to even
1505
        if (sreg.ext_g = '0') then
1506
          round.en <= '0'; -- round down (do nothing)
1507
        else
1508
          if (sreg.ext_r = '0') and (sreg.ext_s = '0') then -- tie!
1509
            round.en <= sreg.lower(0); -- round up if LSB of mantissa is set
1510
          else
1511
            round.en <= '1'; -- round up
1512
          end if;
1513
        end if;
1514
        round.sub <= '0'; -- increment
1515
      when "001" => -- round towards zero
1516
        round.en <= '0'; -- no rounding -> just truncate
1517
      when "010" => -- round down (towards -infinity)
1518
        round.en  <= sreg.ext_g or sreg.ext_r or sreg.ext_s;
1519
        round.sub <= '1'; -- decrement
1520
      when "011" => -- round up (towards +infinity)
1521
        round.en  <= sreg.ext_g or sreg.ext_r or sreg.ext_s;
1522
        round.sub <= '0'; -- increment
1523
      when "100" => -- round to nearest, ties to max magnitude
1524
        round.en <= '0'; -- FIXME / TODO
1525
      when others => -- undefined
1526
        round.en <= '0';
1527
    end case;
1528
  end process rounding_unit_ctrl;
1529
 
1530
 
1531
  -- incrementer/decrementer --
1532
  rounding_unit_add: process(round, sreg)
1533
    variable tmp_v : std_ulogic_vector(24 downto 0);
1534
  begin
1535
    tmp_v := '0' & sreg.upper(0) & sreg.lower;
1536
    if (round.en = '1') then
1537
      if (round.sub = '0') then -- increment
1538
        round.output <= std_ulogic_vector(unsigned(tmp_v) + 1);
1539
      else -- decrement
1540
        round.output <= std_ulogic_vector(unsigned(tmp_v) - 1);
1541
      end if;
1542
    else -- do nothing
1543
      round.output <= tmp_v;
1544
    end if;
1545
  end process rounding_unit_add;
1546
 
1547
 
1548
end neorv32_cpu_cp_fpu_normalizer_rtl;
1549
 
1550
-- ###########################################################################################################################################
1551
-- ###########################################################################################################################################
1552
 
1553
-- #################################################################################################
1554
-- # << NEORV32 - Single-Precision Floating-Point Unit: Float-To-Int Converter >>                  #
1555
-- # ********************************************************************************************* #
1556
-- # BSD 3-Clause License                                                                          #
1557
-- #                                                                                               #
1558
-- # Copyright (c) 2021, Stephan Nolting. All rights reserved.                                     #
1559
-- #                                                                                               #
1560
-- # Redistribution and use in source and binary forms, with or without modification, are          #
1561
-- # permitted provided that the following conditions are met:                                     #
1562
-- #                                                                                               #
1563
-- # 1. Redistributions of source code must retain the above copyright notice, this list of        #
1564
-- #    conditions and the following disclaimer.                                                   #
1565
-- #                                                                                               #
1566
-- # 2. Redistributions in binary form must reproduce the above copyright notice, this list of     #
1567
-- #    conditions and the following disclaimer in the documentation and/or other materials        #
1568
-- #    provided with the distribution.                                                            #
1569
-- #                                                                                               #
1570
-- # 3. Neither the name of the copyright holder nor the names of its contributors may be used to  #
1571
-- #    endorse or promote products derived from this software without specific prior written      #
1572
-- #    permission.                                                                                #
1573
-- #                                                                                               #
1574
-- # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS   #
1575
-- # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF               #
1576
-- # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE    #
1577
-- # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,     #
1578
-- # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE #
1579
-- # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED    #
1580
-- # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING     #
1581
-- # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED  #
1582
-- # OF THE POSSIBILITY OF SUCH DAMAGE.                                                            #
1583
-- # ********************************************************************************************* #
1584
-- # The NEORV32 Processor - https://github.com/stnolting/neorv32              (c) Stephan Nolting #
1585
-- #################################################################################################
1586
 
1587
library ieee;
1588
use ieee.std_logic_1164.all;
1589
use ieee.numeric_std.all;
1590
 
1591
library neorv32;
1592
use neorv32.neorv32_package.all;
1593
 
1594
entity neorv32_cpu_cp_fpu_f2i is
1595
  port (
1596
    -- control --
1597
    clk_i      : in  std_ulogic; -- global clock, rising edge
1598
    rstn_i     : in  std_ulogic; -- global reset, low-active, async
1599
    start_i    : in  std_ulogic; -- trigger operation
1600
    rmode_i    : in  std_ulogic_vector(02 downto 0); -- rounding mode
1601
    funct_i    : in  std_ulogic; -- 0=signed, 1=unsigned
1602
    -- input --
1603
    sign_i     : in  std_ulogic; -- sign
1604
    exponent_i : in  std_ulogic_vector(07 downto 0); -- exponent
1605
    mantissa_i : in  std_ulogic_vector(22 downto 0); -- mantissa
1606
    class_i    : in  std_ulogic_vector(09 downto 0); -- operand class
1607
    -- output --
1608
    result_o   : out std_ulogic_vector(31 downto 0); -- integer result
1609
    flags_o    : out std_ulogic_vector(04 downto 0); -- exception flags
1610
    done_o     : out std_ulogic -- operation done
1611
  );
1612
end neorv32_cpu_cp_fpu_f2i;
1613
 
1614
architecture neorv32_cpu_cp_fpu_f2i_rtl of neorv32_cpu_cp_fpu_f2i is
1615
 
1616
  -- controller --
1617
  type ctrl_engine_state_t is (S_IDLE, S_PREPARE_F2I, S_NORMALIZE_BUSY, S_ROUND, S_FINALIZE);
1618
  type ctrl_t is record
1619
    state      : ctrl_engine_state_t; -- current state
1620
    unsign     : std_ulogic;
1621
    cnt        : std_ulogic_vector(07 downto 0); -- interation counter/exponent
1622
    sign       : std_ulogic;
1623
    class      : std_ulogic_vector(09 downto 0);
1624
    rounded    : std_ulogic; -- output is rounded
1625
    over       : std_ulogic; -- output is overflowing
1626
    under      : std_ulogic; -- output in underflowing
1627
    result_tmp : std_ulogic_vector(31 downto 0);
1628
    result     : std_ulogic_vector(31 downto 0);
1629
  end record;
1630
  signal ctrl : ctrl_t;
1631
 
1632
  -- conversion shift register --
1633
  type sreg_t is record
1634
    int   : std_ulogic_vector(31 downto 0); -- including hidden-zero
1635
    mant  : std_ulogic_vector(22 downto 0);
1636
    ext_g : std_ulogic; -- guard bit
1637
    ext_r : std_ulogic; -- round bit
1638
    ext_s : std_ulogic; -- sticky bit
1639
  end record;
1640
  signal sreg : sreg_t;
1641
 
1642
  -- rounding unit --
1643
  type round_t is record
1644
    en     : std_ulogic; -- enable rounding
1645
    sub    : std_ulogic; -- 0=decrement, 1=increment
1646
    output : std_ulogic_vector(32 downto 0); -- result + overflow
1647
  end record;
1648
  signal round : round_t;
1649
 
1650
begin
1651
 
1652
  -- Control Engine -------------------------------------------------------------------------
1653
  -- -------------------------------------------------------------------------------------------
1654
  ctrl_engine: process(rstn_i, clk_i)
1655
  begin
1656
    if (rstn_i = '0') then
1657
      ctrl.state      <= S_IDLE;
1658 56 zero_gravi
      ctrl.cnt        <= (others => def_rst_val_c);
1659
      ctrl.sign       <= def_rst_val_c;
1660
      ctrl.class      <= (others => def_rst_val_c);
1661
      ctrl.rounded    <= def_rst_val_c;
1662
      ctrl.over       <= def_rst_val_c;
1663
      ctrl.under      <= def_rst_val_c;
1664
      ctrl.unsign     <= def_rst_val_c;
1665
      ctrl.result     <= (others => def_rst_val_c);
1666
      ctrl.result_tmp <= (others => def_rst_val_c);
1667
      sreg.int        <= (others => def_rst_val_c);
1668
      sreg.mant       <= (others => def_rst_val_c);
1669
      sreg.ext_s      <= def_rst_val_c;
1670 55 zero_gravi
      done_o          <= '0';
1671
    elsif rising_edge(clk_i) then
1672
      -- defaults --
1673
      done_o <= '0';
1674
 
1675
      -- fsm --
1676
      case ctrl.state is
1677
 
1678
        when S_IDLE => -- wait for operation trigger
1679
        -- ------------------------------------------------------------
1680
          ctrl.rounded <= '0'; -- not rounded yet
1681
          ctrl.over    <= '0'; -- not overflowing yet
1682
          ctrl.under   <= '0'; -- not underflowing yet
1683
          ctrl.unsign  <= funct_i;
1684
          sreg.ext_s   <= '0'; -- init
1685
          if (start_i = '1') then
1686
            ctrl.cnt    <= exponent_i;
1687
            ctrl.sign   <= sign_i;
1688
            ctrl.class  <= class_i;
1689
            sreg.mant   <= mantissa_i;
1690
            ctrl.state  <= S_PREPARE_F2I;
1691
          end if;
1692
 
1693
        when S_PREPARE_F2I => -- prepare float-to-integer conversion
1694
        -- ------------------------------------------------------------
1695
          if (unsigned(ctrl.cnt) < 126) then -- less than 0.5
1696
            sreg.int    <= (others => '0');
1697
            ctrl.under  <= '1'; -- this is an underflow!
1698
            ctrl.cnt    <= (others => '0');
1699
          elsif (unsigned(ctrl.cnt) = 126) then -- num < 1.0 but num >= 0.5
1700
            sreg.int    <= (others => '0');
1701
            sreg.mant   <= '1' & sreg.mant(sreg.mant'left downto 1);
1702
            ctrl.cnt    <= (others => '0');
1703
          else
1704
            sreg.int    <= (others => '0');
1705
            sreg.int(0) <= '1'; -- hidden one
1706
            ctrl.cnt    <= std_ulogic_vector(unsigned(ctrl.cnt) - 127); -- remove bias to get raw number of left shifts
1707
          end if;
1708
          -- check terminal cases --
1709
          if ((ctrl.class(fp_class_neg_inf_c)  or ctrl.class(fp_class_pos_inf_c) or
1710
               ctrl.class(fp_class_neg_zero_c) or ctrl.class(fp_class_pos_zero_c) or
1711
               ctrl.class(fp_class_snan_c)     or ctrl.class(fp_class_qnan_c)) = '1') then
1712
            ctrl.state <= S_FINALIZE;
1713
          else
1714
            ctrl.state <= S_NORMALIZE_BUSY;
1715
          end if;
1716
 
1717
        when S_NORMALIZE_BUSY => -- running normalization cycle
1718
        -- ------------------------------------------------------------
1719 60 zero_gravi
          sreg.ext_s <= sreg.ext_s or or_reduce_f(sreg.mant(sreg.mant'left-2 downto 0)); -- sticky bit
1720
          if (or_reduce_f(ctrl.cnt(ctrl.cnt'left-1 downto 0)) = '0') then
1721 55 zero_gravi
            if (ctrl.unsign = '0') then -- signed conversion
1722
              ctrl.over <= ctrl.over or sreg.int(sreg.int'left); -- update overrun flag again to check for numerical overflow into sign bit
1723
            end if;
1724
            ctrl.state <= S_ROUND;
1725
          else -- shift left
1726
            ctrl.cnt  <= std_ulogic_vector(unsigned(ctrl.cnt) - 1);
1727
            sreg.int  <= sreg.int(sreg.int'left-1 downto 0) & sreg.mant(sreg.mant'left);
1728
            sreg.mant <= sreg.mant(sreg.mant'left-1 downto 0) & '0';
1729
            ctrl.over <= ctrl.over or sreg.int(sreg.int'left);
1730
          end if;
1731
 
1732
        when S_ROUND => -- rounding cycle
1733
        -- ------------------------------------------------------------
1734
          ctrl.rounded    <= ctrl.rounded or round.en;
1735
          ctrl.over       <= ctrl.over or round.output(round.output'left); -- overflow after rounding
1736
          ctrl.result_tmp <= round.output(round.output'left-1 downto 0);
1737
          ctrl.state      <= S_FINALIZE;
1738
 
1739
        when S_FINALIZE => -- check for corner cases and finalize result
1740
        -- ------------------------------------------------------------
1741
          if (ctrl.unsign = '1') then -- unsigned conversion
1742
            if (ctrl.class(fp_class_snan_c) = '1') or (ctrl.class(fp_class_qnan_c) = '1') or (ctrl.class(fp_class_pos_inf_c) = '1') or -- NaN or +inf
1743
               ((ctrl.sign = '0') and (ctrl.over = '1')) then -- positive out-of-range
1744
              ctrl.result <= x"ffffffff";
1745
            elsif (ctrl.class(fp_class_neg_zero_c) = '1') or (ctrl.class(fp_class_pos_zero_c) = '1') or (ctrl.class(fp_class_neg_inf_c) = '1') or -- subnormal zero or -inf
1746
               (ctrl.sign = '1') or (ctrl.under = '1') then -- negative out-of-range or underflow
1747
              ctrl.result <= x"00000000";
1748
            else
1749
              ctrl.result <= ctrl.result_tmp;
1750
            end if;
1751
 
1752
          else -- signed conversion
1753
            if (ctrl.class(fp_class_snan_c) = '1') or (ctrl.class(fp_class_qnan_c) = '1') or (ctrl.class(fp_class_pos_inf_c) = '1') or  -- NaN or +inf
1754
                  ((ctrl.sign = '0') and (ctrl.over = '1')) then -- positive out-of-range
1755
              ctrl.result <= x"7fffffff";
1756
            elsif (ctrl.class(fp_class_neg_zero_c) = '1') or (ctrl.class(fp_class_pos_zero_c) = '1') or (ctrl.under = '1') then -- subnormal zero or underflow
1757
              ctrl.result <= x"00000000";
1758
            elsif (ctrl.class(fp_class_neg_inf_c) = '1') or ((ctrl.sign = '1') and (ctrl.over = '1')) then -- -inf or negative out-of-range
1759
              ctrl.result <= x"80000000";
1760
            else -- result is ok, make sign adaption
1761
              if (ctrl.sign = '1') then
1762
                ctrl.result <= std_ulogic_vector(0 - unsigned(ctrl.result_tmp)); -- abs()
1763
              else
1764
                ctrl.result <= ctrl.result_tmp;
1765
              end if;
1766
            end if;
1767
          end if;
1768
          done_o     <= '1';
1769
          ctrl.state <= S_IDLE;
1770
 
1771
        when others => -- undefined
1772
        -- ------------------------------------------------------------
1773
          ctrl.state <= S_IDLE;
1774
 
1775
      end case;
1776
    end if;
1777
  end process ctrl_engine;
1778
 
1779
  -- result --
1780
  result_o <= ctrl.result;
1781
 
1782
  -- exception flags --
1783
  flags_o(fp_exc_nv_c) <= ctrl.class(fp_class_snan_c) or ctrl.class(fp_class_qnan_c); -- invalid operation
1784
  flags_o(fp_exc_dz_c) <= '0'; -- divide by zero - not possible here
1785
  flags_o(fp_exc_of_c) <= ctrl.over or ctrl.class(fp_class_pos_inf_c) or ctrl.class(fp_class_neg_inf_c); -- overflow
1786
  flags_o(fp_exc_uf_c) <= ctrl.under; -- underflow
1787
  flags_o(fp_exc_nx_c) <= ctrl.rounded; -- inexact if result was rounded
1788
 
1789
 
1790
  -- Rounding -------------------------------------------------------------------------------
1791
  -- -------------------------------------------------------------------------------------------
1792
  rounding_unit_ctrl: process(rmode_i, sreg)
1793
  begin
1794
    -- defaults --
1795
    round.en  <= '0';
1796
    round.sub <= '0';
1797
    -- rounding mode --
1798
    case rmode_i(2 downto 0) is
1799
      when "000" => -- round to nearest, ties to even
1800
        if (sreg.ext_g = '0') then
1801
          round.en <= '0'; -- round down (do nothing)
1802
        else
1803
          if (sreg.ext_r = '0') and (sreg.ext_s = '0') then -- tie!
1804
            round.en <= sreg.int(0); -- round up if LSB of integer is set
1805
          else
1806
            round.en <= '1'; -- round up
1807
          end if;
1808
        end if;
1809
        round.sub <= '0'; -- increment
1810
      when "001" => -- round towards zero
1811
        round.en <= '0'; -- no rounding -> just truncate
1812
      when "010" => -- round down (towards -infinity)
1813
        round.en  <= sreg.ext_g or sreg.ext_r or sreg.ext_s;
1814
        round.sub <= '1'; -- decrement
1815
      when "011" => -- round up (towards +infinity)
1816
        round.en  <= sreg.ext_g or sreg.ext_r or sreg.ext_s;
1817
        round.sub <= '0'; -- increment
1818
      when "100" => -- round to nearest, ties to max magnitude
1819
        round.en <= '0'; -- FIXME / TODO
1820
      when others => -- undefined
1821
        round.en <= '0';
1822
    end case;
1823
  end process rounding_unit_ctrl;
1824
 
1825
  -- rounding: guard and round bits --
1826
  sreg.ext_g <= sreg.mant(sreg.mant'left);
1827
  sreg.ext_r <= sreg.mant(sreg.mant'left-1);
1828
 
1829
 
1830
  -- incrementer/decrementer --
1831
  rounding_unit_add: process(round, sreg)
1832
    variable tmp_v : std_ulogic_vector(32 downto 0); -- including overflow
1833
  begin
1834
    tmp_v := '0' & sreg.int;
1835
    if (round.en = '1') then
1836
      if (round.sub = '0') then -- increment
1837
        round.output <= std_ulogic_vector(unsigned(tmp_v) + 1);
1838
      else -- decrement
1839
        round.output <= std_ulogic_vector(unsigned(tmp_v) - 1);
1840
      end if;
1841
    else -- do nothing
1842
      round.output <= tmp_v;
1843
    end if;
1844
  end process rounding_unit_add;
1845
 
1846
 
1847
end neorv32_cpu_cp_fpu_f2i_rtl;

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.