OpenCores
URL https://opencores.org/ocsvn/neorv32/neorv32/trunk

Subversion Repositories neorv32

[/] [neorv32/] [trunk/] [rtl/] [core/] [neorv32_cpu_cp_fpu.vhd] - Blame information for rev 60

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 52 zero_gravi
-- #################################################################################################
2 53 zero_gravi
-- # << NEORV32 - CPU Co-Processor: Single-Prec. Floating Point Unit (RISC-V "Zfinx" Extension) >> #
3 52 zero_gravi
-- # ********************************************************************************************* #
4 53 zero_gravi
-- # The Zfinx floating-point extension uses the integer register file (x) for all FP operations.  #
5
-- # See the official RISC-V specs (https://github.com/riscv/riscv-zfinx) for more information.    #
6 55 zero_gravi
-- #                                                                                               #
7
-- # Design Notes:                                                                                 #
8
-- # * This FPU is based on a multi-cycle architecture and is NOT suited for pipelined operations. #
9
-- # * The hardware design goal was SIZE (performance comes second). All shift operations are done #
10
-- #   using an iterative approach (one bit per clock cycle, no barrel shifters!).                 #
11
-- # * Multiplication (FMUL instruction) will infer DSP blocks (if available).                     #
12
-- # * Subnormal numbers are not supported yet - they are "flushed to zero" before entering the    #
13
-- #   actual FPU core.                                                                            #
14
-- # * Division and sqare root operations (FDIV, FSQRT) and fused multiply-accumulate operations   #
15
-- #   (F[N]MADD) are not supported yet - they will raise an illegal instruction exception.        #
16
-- # * Rounding mode <100> ("round to nearest, ties to max magnitude") is not supported yet.       #
17
-- # * Signaling NaNs (sNaN) will not be generated by the hardware at all. However, if inserted by #
18
-- #   the programmer they are handled correctly.                                                  #
19 52 zero_gravi
-- # ********************************************************************************************* #
20
-- # BSD 3-Clause License                                                                          #
21
-- #                                                                                               #
22
-- # Copyright (c) 2021, Stephan Nolting. All rights reserved.                                     #
23
-- #                                                                                               #
24
-- # Redistribution and use in source and binary forms, with or without modification, are          #
25
-- # permitted provided that the following conditions are met:                                     #
26
-- #                                                                                               #
27
-- # 1. Redistributions of source code must retain the above copyright notice, this list of        #
28
-- #    conditions and the following disclaimer.                                                   #
29
-- #                                                                                               #
30
-- # 2. Redistributions in binary form must reproduce the above copyright notice, this list of     #
31
-- #    conditions and the following disclaimer in the documentation and/or other materials        #
32
-- #    provided with the distribution.                                                            #
33
-- #                                                                                               #
34
-- # 3. Neither the name of the copyright holder nor the names of its contributors may be used to  #
35
-- #    endorse or promote products derived from this software without specific prior written      #
36
-- #    permission.                                                                                #
37
-- #                                                                                               #
38
-- # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS   #
39
-- # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF               #
40
-- # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE    #
41
-- # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,     #
42
-- # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE #
43
-- # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED    #
44
-- # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING     #
45
-- # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED  #
46
-- # OF THE POSSIBILITY OF SUCH DAMAGE.                                                            #
47
-- # ********************************************************************************************* #
48
-- # The NEORV32 Processor - https://github.com/stnolting/neorv32              (c) Stephan Nolting #
49
-- #################################################################################################
50
 
51
library ieee;
52
use ieee.std_logic_1164.all;
53
use ieee.numeric_std.all;
54
 
55
library neorv32;
56
use neorv32.neorv32_package.all;
57
 
58
entity neorv32_cpu_cp_fpu is
59
  port (
60
    -- global control --
61 53 zero_gravi
    clk_i    : in  std_ulogic; -- global clock, rising edge
62
    rstn_i   : in  std_ulogic; -- global reset, low-active, async
63
    ctrl_i   : in  std_ulogic_vector(ctrl_width_c-1 downto 0); -- main control bus
64
    start_i  : in  std_ulogic; -- trigger operation
65 52 zero_gravi
    -- data input --
66 53 zero_gravi
    frm_i    : in  std_ulogic_vector(2 downto 0); -- rounding mode
67 56 zero_gravi
    cmp_i    : in  std_ulogic_vector(1 downto 0); -- comparator status
68 53 zero_gravi
    rs1_i    : in  std_ulogic_vector(data_width_c-1 downto 0); -- rf source 1
69
    rs2_i    : in  std_ulogic_vector(data_width_c-1 downto 0); -- rf source 2
70 52 zero_gravi
    -- result and status --
71 53 zero_gravi
    res_o    : out std_ulogic_vector(data_width_c-1 downto 0); -- operation result
72
    fflags_o : out std_ulogic_vector(4 downto 0); -- exception flags
73
    valid_o  : out std_ulogic -- data output valid
74 52 zero_gravi
  );
75
end neorv32_cpu_cp_fpu;
76
 
77
architecture neorv32_cpu_cp_fpu_rtl of neorv32_cpu_cp_fpu is
78
 
79 55 zero_gravi
  -- FPU core functions --
80
  constant op_class_c  : std_ulogic_vector(2 downto 0) := "000";
81
  constant op_comp_c   : std_ulogic_vector(2 downto 0) := "001";
82
  constant op_i2f_c    : std_ulogic_vector(2 downto 0) := "010";
83
  constant op_f2i_c    : std_ulogic_vector(2 downto 0) := "011";
84
  constant op_sgnj_c   : std_ulogic_vector(2 downto 0) := "100";
85
  constant op_minmax_c : std_ulogic_vector(2 downto 0) := "101";
86
  constant op_addsub_c : std_ulogic_vector(2 downto 0) := "110";
87
  constant op_mul_c    : std_ulogic_vector(2 downto 0) := "111";
88
 
89
  -- float-to-integer unit --
90
  component neorv32_cpu_cp_fpu_f2i
91
  port (
92
    -- control --
93
    clk_i      : in  std_ulogic; -- global clock, rising edge
94
    rstn_i     : in  std_ulogic; -- global reset, low-active, async
95
    start_i    : in  std_ulogic; -- trigger operation
96
    rmode_i    : in  std_ulogic_vector(02 downto 0); -- rounding mode
97
    funct_i    : in  std_ulogic; -- 0=signed, 1=unsigned
98
    -- input --
99
    sign_i     : in  std_ulogic; -- sign
100
    exponent_i : in  std_ulogic_vector(07 downto 0); -- exponent
101
    mantissa_i : in  std_ulogic_vector(22 downto 0); -- mantissa
102
    class_i    : in  std_ulogic_vector(09 downto 0); -- operand class
103
    -- output --
104
    result_o   : out std_ulogic_vector(31 downto 0); -- integer result
105
    flags_o    : out std_ulogic_vector(04 downto 0); -- exception flags
106
    done_o     : out std_ulogic -- operation done
107
  );
108
  end component;
109
 
110
  -- normalizer + rounding unit --
111
  component neorv32_cpu_cp_fpu_normalizer
112
  port (
113
    -- control --
114
    clk_i      : in  std_ulogic; -- global clock, rising edge
115
    rstn_i     : in  std_ulogic; -- global reset, low-active, async
116
    start_i    : in  std_ulogic; -- trigger operation
117
    rmode_i    : in  std_ulogic_vector(02 downto 0); -- rounding mode
118
    funct_i    : in  std_ulogic; -- operating mode (0=norm&round, 1=int-to-float)
119
    -- input --
120
    sign_i     : in  std_ulogic; -- sign
121
    exponent_i : in  std_ulogic_vector(08 downto 0); -- extended exponent
122
    mantissa_i : in  std_ulogic_vector(47 downto 0); -- extended mantissa
123
    integer_i  : in  std_ulogic_vector(31 downto 0); -- integer input
124
    class_i    : in  std_ulogic_vector(09 downto 0); -- input number class
125
    flags_i    : in  std_ulogic_vector(04 downto 0); -- exception flags input
126
    -- output --
127
    result_o   : out std_ulogic_vector(31 downto 0); -- result (float or int)
128
    flags_o    : out std_ulogic_vector(04 downto 0); -- exception flags
129
    done_o     : out std_ulogic -- operation done
130
  );
131
  end component;
132
 
133
  -- commands (one-hot) --
134
  type cmd_t is record
135
    instr_class  : std_ulogic;
136
    instr_sgnj   : std_ulogic;
137
    instr_comp   : std_ulogic;
138
    instr_i2f    : std_ulogic;
139
    instr_f2i    : std_ulogic;
140
    instr_minmax : std_ulogic;
141
    instr_addsub : std_ulogic;
142
    instr_mul    : std_ulogic;
143
    funct        : std_ulogic_vector(2 downto 0);
144
  end record;
145
  signal cmd : cmd_t;
146
  signal funct_ff : std_ulogic_vector(2 downto 0);
147
 
148
  -- co-processor control engine --
149
  type ctrl_state_t is (S_IDLE, S_BUSY);
150
  type ctrl_engine_t is record
151
    state : ctrl_state_t;
152
    start : std_ulogic;
153
    valid : std_ulogic;
154
  end record;
155
  signal ctrl_engine : ctrl_engine_t;
156
 
157
  -- floating-point operands --
158
  type op_data_t  is array (0 to 1) of std_ulogic_vector(31 downto 0);
159
  type op_class_t is array (0 to 1) of std_ulogic_vector(09 downto 0);
160
  type fpu_operands_t is record
161
    rs1       : std_ulogic_vector(31 downto 0); -- operand 1
162
    rs1_class : std_ulogic_vector(09 downto 0); -- operand 1 number class
163
    rs2       : std_ulogic_vector(31 downto 0); -- operand 2
164
    rs2_class : std_ulogic_vector(09 downto 0); -- operand 2 number class
165
    frm       : std_ulogic_vector(02 downto 0); -- rounding mode
166
  end record;
167
  signal op_data      : op_data_t;
168
  signal op_class     : op_class_t;
169
  signal fpu_operands : fpu_operands_t;
170
 
171
  -- floating-point comparator --
172 56 zero_gravi
  signal cmp_ff        : std_ulogic_vector(01 downto 0);
173 55 zero_gravi
  signal comp_equal_ff : std_ulogic;
174
  signal comp_less_ff  : std_ulogic;
175
 
176
  -- functional units interface --
177
  type fu_interface_t is record
178
    result : std_ulogic_vector(31 downto 0);
179
    flags  : std_ulogic_vector(04 downto 0);
180
    start  : std_ulogic;
181
    done   : std_ulogic;
182
  end record;
183
  signal fu_classify    : fu_interface_t;
184
  signal fu_compare     : fu_interface_t;
185
  signal fu_sign_inject : fu_interface_t;
186
  signal fu_min_max     : fu_interface_t;
187
  signal fu_conv_f2i    : fu_interface_t;
188
  signal fu_addsub      : fu_interface_t;
189
  signal fu_mul         : fu_interface_t;
190
  signal fu_core_done   : std_ulogic; -- FU operation completed
191
 
192
  -- integer-to-float --
193
  type fu_i2f_interface_t is record
194
    result : std_ulogic_vector(31 downto 0);
195
    sign   : std_ulogic;
196
    start  : std_ulogic;
197
    done   : std_ulogic;
198
  end record;
199
  signal fu_conv_i2f : fu_i2f_interface_t; -- float result
200
 
201
  -- multiplier unit --
202
  type multiplier_t is record
203
    opa       : unsigned(23 downto 0); -- mantissa A plus hidden one
204
    opb       : unsigned(23 downto 0); -- mantissa B plus hidden one
205
    buf_ff    : unsigned(47 downto 0); -- product buffer
206
    sign      : std_ulogic; -- resulting sign
207
    product   : std_ulogic_vector(47 downto 0); -- product
208
    exp_sum   : std_ulogic_vector(08 downto 0); -- incl 1x overflow/underflow bit
209
    exp_res   : std_ulogic_vector(09 downto 0); -- resulting exponent incl 2x overflow/underflow bit
210
    --
211
    res_class : std_ulogic_vector(09 downto 0);
212
    flags     : std_ulogic_vector(04 downto 0); -- exception flags
213
    --
214
    start     : std_ulogic;
215
    latency   : std_ulogic_vector(02 downto 0); -- unit latency
216
    done      : std_ulogic;
217
  end record;
218
  signal multiplier : multiplier_t;
219
 
220
  -- adder/subtractor unit --
221
  type addsub_t is record
222
    -- input comparison --
223
    exp_comp  : std_ulogic_vector(01 downto 0); -- equal & less
224
    small_exp : std_ulogic_vector(07 downto 0);
225
    small_man : std_ulogic_vector(23 downto 0); -- mantissa + hiden one
226
    large_exp : std_ulogic_vector(07 downto 0);
227
    large_man : std_ulogic_vector(23 downto 0); -- mantissa + hiden one
228
    -- smaller mantissa alginment --
229
    man_sreg  : std_ulogic_vector(23 downto 0); -- mantissa + hidden one
230
    man_g_ext : std_ulogic;
231
    man_r_ext : std_ulogic;
232
    man_s_ext : std_ulogic;
233
    exp_cnt   : std_ulogic_vector(08 downto 0);
234
    -- adder/subtractor stage --
235
    man_comp  : std_ulogic;
236
    man_s     : std_ulogic_vector(26 downto 0); -- mantissa + hiden one + GRS
237
    man_l     : std_ulogic_vector(26 downto 0); -- mantissa + hiden one + GRS
238
    add_stage : std_ulogic_vector(27 downto 0); -- adder result incl. overflow
239
    -- result --
240
    res_sign  : std_ulogic;
241
    res_sum   : std_ulogic_vector(27 downto 0); -- mantissa sum (+1 bit) + GRS bits (for rounding)
242
    res_class : std_ulogic_vector(09 downto 0);
243
    flags     : std_ulogic_vector(04 downto 0); -- exception flags
244
    -- arbitration --
245
    start     : std_ulogic;
246
    latency   : std_ulogic_vector(04 downto 0); -- unit latency
247
    done      : std_ulogic;
248
  end record;
249
  signal addsub : addsub_t;
250
 
251
  -- normalizer interface (normalization & rounding and int-to-float) --
252
  type normalizer_t is record
253
    start     : std_ulogic;
254
    mode      : std_ulogic;
255
    sign      : std_ulogic;
256
    xexp      : std_ulogic_vector(08 downto 0);
257
    xmantissa : std_ulogic_vector(47 downto 0);
258
    result    : std_ulogic_vector(31 downto 0);
259
    class     : std_ulogic_vector(09 downto 0);
260
    flags_in  : std_ulogic_vector(04 downto 0);
261
    flags_out : std_ulogic_vector(04 downto 0);
262
    done      : std_ulogic;
263
  end record;
264
  signal normalizer : normalizer_t;
265
 
266 52 zero_gravi
begin
267
 
268 55 zero_gravi
-- ****************************************************************************************************************************
269
-- Control
270
-- ****************************************************************************************************************************
271
 
272
  -- Instruction Decoding -------------------------------------------------------------------
273 52 zero_gravi
  -- -------------------------------------------------------------------------------------------
274 55 zero_gravi
  -- one-hot re-encoding --
275
  cmd.instr_class  <= '1' when (ctrl_i(ctrl_ir_funct12_11_c downto ctrl_ir_funct12_7_c) = "11100") else '0';
276
  cmd.instr_comp   <= '1' when (ctrl_i(ctrl_ir_funct12_11_c downto ctrl_ir_funct12_7_c) = "10100") else '0';
277
  cmd.instr_i2f    <= '1' when (ctrl_i(ctrl_ir_funct12_11_c downto ctrl_ir_funct12_7_c) = "11010") else '0';
278
  cmd.instr_f2i    <= '1' when (ctrl_i(ctrl_ir_funct12_11_c downto ctrl_ir_funct12_7_c) = "11000") else '0';
279
  cmd.instr_sgnj   <= '1' when (ctrl_i(ctrl_ir_funct12_11_c downto ctrl_ir_funct12_7_c) = "00100") else '0';
280
  cmd.instr_minmax <= '1' when (ctrl_i(ctrl_ir_funct12_11_c downto ctrl_ir_funct12_7_c) = "00101") else '0';
281
  cmd.instr_addsub <= '1' when (ctrl_i(ctrl_ir_funct12_11_c downto ctrl_ir_funct12_8_c) = "0000")  else '0';
282
  cmd.instr_mul    <= '1' when (ctrl_i(ctrl_ir_funct12_11_c downto ctrl_ir_funct12_7_c) = "00010") else '0';
283 52 zero_gravi
 
284 55 zero_gravi
  -- binary re-encoding --
285
  cmd.funct <= op_mul_c     when (cmd.instr_mul    = '1') else
286
               op_addsub_c  when (cmd.instr_addsub = '1') else
287
               op_minmax_c  when (cmd.instr_minmax = '1') else
288
               op_sgnj_c    when (cmd.instr_sgnj   = '1') else
289
               op_f2i_c     when (cmd.instr_f2i    = '1') else
290
               op_i2f_c     when (cmd.instr_i2f    = '1') else
291
               op_comp_c    when (cmd.instr_comp   = '1') else
292
               op_class_c;--when (cmd.instr_class  = '1') else (others => '-');
293 52 zero_gravi
 
294 55 zero_gravi
 
295
  -- Input Operands: Check for subnormal numbers (flush to zero) ----------------------------
296
  -- -------------------------------------------------------------------------------------------
297
  -- Subnormal numbers are not supported and are "flushed to zero"! FIXME / TODO
298
  -- rs1 --
299
  op_data(0)(31)           <= rs1_i(31);
300
  op_data(0)(30 downto 23) <= rs1_i(30 downto 23);
301
  op_data(0)(22 downto 00) <= (others => '0') when (rs1_i(30 downto 23) = "00000000") else rs1_i(22 downto 0); -- flush mantissa to zero if subnormal
302
  -- rs2 --
303
  op_data(1)(31)           <= rs2_i(31);
304
  op_data(1)(30 downto 23) <= rs2_i(30 downto 23);
305
  op_data(1)(22 downto 00) <= (others => '0') when (rs2_i(30 downto 23) = "00000000") else rs2_i(22 downto 0); -- flush mantissa to zero if subnormal
306
 
307
 
308
  -- Number Classifier ----------------------------------------------------------------------
309
  -- -------------------------------------------------------------------------------------------
310
  number_classifier: process(op_data)
311
    variable op_m_all_zero_v, op_e_all_zero_v, op_e_all_one_v       : std_ulogic;
312
    variable op_is_zero_v, op_is_inf_v, op_is_denorm_v, op_is_nan_v : std_ulogic;
313
  begin
314
    for i in 0 to 1 loop -- for rs1 and rs2 inputs
315
      -- check for all-zero/all-one --
316 60 zero_gravi
      op_m_all_zero_v := not or_reduce_f(op_data(i)(22 downto 00));
317
      op_e_all_zero_v := not or_reduce_f(op_data(i)(30 downto 23));
318
      op_e_all_one_v  := and_reduce_f(op_data(i)(30 downto 23));
319 55 zero_gravi
 
320
      -- check special cases --
321
      op_is_zero_v   := op_e_all_zero_v and      op_m_all_zero_v;  -- zero
322
      op_is_inf_v    := op_e_all_one_v  and      op_m_all_zero_v;  -- infinity
323 56 zero_gravi
      op_is_denorm_v := '0'; -- FIXME / TODO -- op_e_all_zero_v and (not op_m_all_zero_v); -- subnormal
324 55 zero_gravi
      op_is_nan_v    := op_e_all_one_v  and (not op_m_all_zero_v); -- NaN
325
 
326
      -- actual attributes --
327
      op_class(i)(fp_class_neg_inf_c)    <= op_data(i)(31) and op_is_inf_v; -- negative infinity
328
      op_class(i)(fp_class_neg_norm_c)   <= op_data(i)(31) and (not op_is_denorm_v) and (not op_is_nan_v) and (not op_is_inf_v) and (not op_is_zero_v); -- negative normal number
329
      op_class(i)(fp_class_neg_denorm_c) <= op_data(i)(31) and op_is_denorm_v; -- negative subnormal number
330
      op_class(i)(fp_class_neg_zero_c)   <= op_data(i)(31) and op_is_zero_v; -- negative zero
331
      op_class(i)(fp_class_pos_zero_c)   <= (not op_data(i)(31)) and op_is_zero_v; -- positive zero
332
      op_class(i)(fp_class_pos_denorm_c) <= (not op_data(i)(31)) and op_is_denorm_v; -- positive subnormal number
333
      op_class(i)(fp_class_pos_norm_c)   <= (not op_data(i)(31)) and (not op_is_denorm_v) and (not op_is_nan_v) and (not op_is_inf_v) and (not op_is_zero_v); -- positive normal number
334
      op_class(i)(fp_class_pos_inf_c)    <= (not op_data(i)(31)) and op_is_inf_v; -- positive infinity
335
      op_class(i)(fp_class_snan_c)       <= op_is_nan_v and (not op_data(i)(22)); -- signaling NaN
336
      op_class(i)(fp_class_qnan_c)       <= op_is_nan_v and (    op_data(i)(22)); -- quiet NaN
337
    end loop; -- i
338
  end process number_classifier;
339
 
340
 
341
  -- Co-Processor Control Engine ------------------------------------------------------------
342
  -- -------------------------------------------------------------------------------------------
343
  control_engine_fsm: process(rstn_i, clk_i)
344
  begin
345
    if (rstn_i = '0') then
346
      ctrl_engine.state      <= S_IDLE;
347
      ctrl_engine.start      <= '0';
348 56 zero_gravi
      fpu_operands.frm       <= (others => def_rst_val_c);
349
      fpu_operands.rs1       <= (others => def_rst_val_c);
350
      fpu_operands.rs1_class <= (others => def_rst_val_c);
351
      fpu_operands.rs2       <= (others => def_rst_val_c);
352
      fpu_operands.rs2_class <= (others => def_rst_val_c);
353
      funct_ff               <= (others => def_rst_val_c);
354
      cmp_ff                 <= (others => def_rst_val_c);
355 55 zero_gravi
    elsif rising_edge(clk_i) then
356
      -- arbiter defaults --
357
      ctrl_engine.valid <= '0';
358
      ctrl_engine.start <= '0';
359
 
360
      -- state machine --
361
      case ctrl_engine.state is
362
 
363
        when S_IDLE => -- waiting for operation trigger
364
        -- ------------------------------------------------------------
365
          funct_ff <= cmd.funct; -- actual operation to execute
366 56 zero_gravi
          cmp_ff   <= cmp_i; -- main ALU comparator
367 55 zero_gravi
          -- rounding mode --
368
          -- TODO / FIXME "round to nearest, ties to max magnitude" (0b100) is not supported yet
369
          if (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_0_c) = "111") then
370
            fpu_operands.frm <= '0' & frm_i(1 downto 0);
371
          else
372
            fpu_operands.frm <= '0' & ctrl_i(ctrl_ir_funct3_1_c downto ctrl_ir_funct3_0_c);
373
          end if;
374
          --
375
          if (start_i = '1') then
376
            -- operand data --
377
            fpu_operands.rs1       <= op_data(0);
378
            fpu_operands.rs1_class <= op_class(0);
379
            fpu_operands.rs2       <= op_data(1);
380
            fpu_operands.rs2_class <= op_class(1);
381
            -- execute! --
382
            ctrl_engine.start <= '1';
383
            ctrl_engine.state <= S_BUSY;
384
          end if;
385
 
386
        when S_BUSY => -- operation in progress (multi-cycle)
387
        -- -----------------------------------------------------------
388
          if (fu_core_done = '1') then -- processing done?
389
            ctrl_engine.valid <= '1';
390
            ctrl_engine.state <= S_IDLE;
391
          end if;
392
 
393
        when others => -- undefined
394
        -- ------------------------------------------------------------
395
          ctrl_engine.state <= S_IDLE;
396
 
397
      end case;
398
    end if;
399
  end process control_engine_fsm;
400
 
401
  -- operation done / valid output --
402
  valid_o <= ctrl_engine.valid;
403
 
404
 
405
  -- Functional Unit Interface (operation-start trigger) ------------------------------------
406
  -- -------------------------------------------------------------------------------------------
407
  fu_classify.start    <= ctrl_engine.start and cmd.instr_class;
408
  fu_compare.start     <= ctrl_engine.start and cmd.instr_comp;
409
  fu_sign_inject.start <= ctrl_engine.start and cmd.instr_sgnj;
410
  fu_min_max.start     <= ctrl_engine.start and cmd.instr_minmax;
411
  fu_conv_i2f.start    <= ctrl_engine.start and cmd.instr_i2f;
412
  fu_conv_f2i.start    <= ctrl_engine.start and cmd.instr_f2i;
413
  fu_addsub.start      <= ctrl_engine.start and cmd.instr_addsub;
414
  fu_mul.start         <= ctrl_engine.start and cmd.instr_mul;
415
 
416
 
417
-- ****************************************************************************************************************************
418
-- FPU Core - Functional Units
419
-- ****************************************************************************************************************************
420
 
421
  -- Number Classifier (FCLASS) -------------------------------------------------------------
422
  -- -------------------------------------------------------------------------------------------
423
  fu_classify.flags <= (others => '0'); -- does not generate flags at all
424
  fu_classify.result(31 downto 10) <= (others => '0');
425
  fu_classify.result(09 downto 00) <= fpu_operands.rs1_class;
426
  fu_classify.done <= fu_classify.start;
427
 
428
 
429
  -- Floating-Point Comparator --------------------------------------------------------------
430
  -- -------------------------------------------------------------------------------------------
431 56 zero_gravi
  float_comparator: process(rstn_i, clk_i)
432 55 zero_gravi
    variable cond_v : std_ulogic_vector(1 downto 0);
433
  begin
434 56 zero_gravi
    if (rstn_i = '0') then
435
      comp_equal_ff   <= def_rst_val_c;
436
      comp_less_ff    <= def_rst_val_c;
437
      fu_compare.done <= def_rst_val_c;
438
      fu_min_max.done <= def_rst_val_c;
439
    elsif rising_edge(clk_i) then
440 55 zero_gravi
      -- equal --
441
      if ((fpu_operands.rs1_class(fp_class_pos_inf_c)   = '1') and (fpu_operands.rs2_class(fp_class_pos_inf_c) = '1')) or -- +inf == +inf
442
         ((fpu_operands.rs1_class(fp_class_neg_inf_c)   = '1') and (fpu_operands.rs2_class(fp_class_neg_inf_c) = '1')) or -- -inf == -inf
443
         (((fpu_operands.rs1_class(fp_class_pos_zero_c) = '1') or (fpu_operands.rs1_class(fp_class_neg_zero_c) = '1')) and
444
          ((fpu_operands.rs2_class(fp_class_pos_zero_c) = '1') or (fpu_operands.rs2_class(fp_class_neg_zero_c) = '1'))) or  -- +/-zero == +/-zero
445 56 zero_gravi
         (cmp_ff(cmp_equal_c) = '1') then -- identical in every way (comparator result from main ALU)
446 55 zero_gravi
        comp_equal_ff <= '1';
447
      else
448
        comp_equal_ff <= '0';
449
      end if;
450
 
451
      -- less than --
452
      if ((fpu_operands.rs1_class(fp_class_pos_inf_c)  = '1') and (fpu_operands.rs2_class(fp_class_pos_inf_c) = '1')) or -- +inf !< +inf
453
         ((fpu_operands.rs1_class(fp_class_neg_inf_c)  = '1') and (fpu_operands.rs2_class(fp_class_neg_inf_c) = '1')) or -- -inf !< -inf
454
         (((fpu_operands.rs1_class(fp_class_pos_zero_c) = '1') or (fpu_operands.rs1_class(fp_class_neg_zero_c) = '1')) and
455
          ((fpu_operands.rs2_class(fp_class_pos_zero_c) = '1') or (fpu_operands.rs2_class(fp_class_neg_zero_c) = '1'))) then  -- +/-zero !< +/-zero
456
        comp_less_ff <= '0';
457
      else
458
        cond_v := fpu_operands.rs1(31) & fpu_operands.rs2(31);
459
        case cond_v is
460
          when "10"   => comp_less_ff <= '1'; -- rs1 negative, rs2 positive
461
          when "01"   => comp_less_ff <= '0'; -- rs1 positive, rs2 negative
462 56 zero_gravi
          when "00"   => comp_less_ff <= cmp_ff(cmp_less_c); -- both positive (comparator result from main ALU)
463
          when "11"   => comp_less_ff <= not cmp_ff(cmp_less_c); -- both negative (comparator result from main ALU)
464 55 zero_gravi
          when others => comp_less_ff <= '0'; -- undefined
465
        end case;
466
      end if;
467
 
468
      -- comparator latency --
469
      fu_compare.done <= fu_compare.start; -- for actual comparison operation
470
      fu_min_max.done <= fu_min_max.start; -- for min/max operations
471
    end if;
472
  end process float_comparator;
473
 
474
 
475
  -- Comparison (FEQ/FLT/FLE) ---------------------------------------------------------------
476
  -- -------------------------------------------------------------------------------------------
477
  float_comparison: process(fpu_operands, ctrl_i, comp_equal_ff, comp_less_ff)
478
    variable snan_v : std_ulogic; -- at least one input is sNaN
479
    variable qnan_v : std_ulogic; -- at least one input is qNaN
480
  begin
481
    -- check for NaN --
482
    snan_v := fpu_operands.rs1_class(fp_class_snan_c) or fpu_operands.rs2_class(fp_class_snan_c);
483
    qnan_v := fpu_operands.rs1_class(fp_class_qnan_c) or fpu_operands.rs2_class(fp_class_qnan_c);
484
 
485
    -- condition evaluation --
486
    fu_compare.result <= (others => '0');
487
    case ctrl_i(ctrl_ir_funct3_1_c downto ctrl_ir_funct3_0_c) is
488
      when "00" => -- FLE: less than or equal
489
        fu_compare.result(0) <= (comp_less_ff or comp_equal_ff) and (not (snan_v or qnan_v)); -- result is zero if either input is NaN
490
      when "01" => -- FLT: less than
491
        fu_compare.result(0) <= comp_less_ff and (not (snan_v or qnan_v)); -- result is zero if either input is NaN
492
      when "10" => -- FEQ: equal
493
        fu_compare.result(0) <= comp_equal_ff and (not (snan_v or qnan_v)); -- result is zero if either input is NaN
494
      when others => -- undefined
495
        fu_compare.result(0) <= '0';
496
    end case;
497
  end process float_comparison;
498
 
499
  -- latency --
500
  -- -> done in "float_comparator"
501
 
502
  -- exceptions --
503
  fu_compare.flags <= (others => '0'); -- does not generate exceptions here, but normalizer can generate exceptions
504
 
505
 
506
  -- Min/Max Select (FMIN/FMAX) -------------------------------------------------------------
507
  -- -------------------------------------------------------------------------------------------
508
  min_max_select: process(fpu_operands, comp_less_ff, fpu_operands, fu_compare, ctrl_i)
509
    variable cond_v : std_ulogic_vector(2 downto 0);
510
  begin
511
    -- comparison restul - check for special cases: -0 is less than +0
512
    if ((fpu_operands.rs1_class(fp_class_neg_zero_c) = '1') and (fpu_operands.rs2_class(fp_class_pos_zero_c) = '1')) then
513
      cond_v(0) := ctrl_i(ctrl_ir_funct3_0_c);
514
    elsif ((fpu_operands.rs1_class(fp_class_pos_zero_c) = '1') and (fpu_operands.rs2_class(fp_class_neg_zero_c) = '1')) then
515
      cond_v(0) := not ctrl_i(ctrl_ir_funct3_0_c);
516
    else -- "normal= comparison
517
      cond_v(0) := comp_less_ff xnor ctrl_i(ctrl_ir_funct3_0_c); -- min/max select
518
    end if;
519
 
520
    -- nmumber NaN check --
521
    cond_v(2) := fpu_operands.rs1_class(fp_class_snan_c) or fpu_operands.rs1_class(fp_class_qnan_c);
522
    cond_v(1) := fpu_operands.rs2_class(fp_class_snan_c) or fpu_operands.rs2_class(fp_class_qnan_c);
523
 
524
    -- data output --
525
    case cond_v is
526
      when "000"         => fu_min_max.result <= fpu_operands.rs1;
527
      when "001"         => fu_min_max.result <= fpu_operands.rs2;
528
      when "010" | "011" => fu_min_max.result <= fpu_operands.rs1; -- if one input is NaN output the non-NaN one
529
      when "100" | "101" => fu_min_max.result <= fpu_operands.rs2; -- if one input is NaN output the non-NaN one
530
      when others        => fu_min_max.result <= fp_single_qnan_c; -- output quiet NaN if both inputs are NaN
531
    end case;
532
  end process min_max_select;
533
 
534
  -- latency --
535
  -- -> done in "float_comparator"
536
 
537
  -- exceptions --
538
  fu_min_max.flags <= (others => '0'); -- does not generate exceptions here, but normalizer can generate exceptions
539
 
540
 
541
  -- Convert: Float to [unsigned] Integer (FCVT.S.W) ----------------------------------------
542
  -- -------------------------------------------------------------------------------------------
543
  neorv32_cpu_cp_fpu_f2i_inst: neorv32_cpu_cp_fpu_f2i
544
  port map (
545
    -- control --
546
    clk_i      => clk_i,                          -- global clock, rising edge
547
    rstn_i     => rstn_i,                         -- global reset, low-active, async
548
    start_i    => fu_conv_f2i.start,              -- trigger operation
549
    rmode_i    => fpu_operands.frm,               -- rounding mode
550
    funct_i    => ctrl_i(ctrl_ir_funct12_0_c),    -- 0=signed, 1=unsigned
551
    -- input --
552
    sign_i     => fpu_operands.rs1(31),           -- sign
553
    exponent_i => fpu_operands.rs1(30 downto 23), -- exponent
554
    mantissa_i => fpu_operands.rs1(22 downto 00), -- mantissa
555
    class_i    => fpu_operands.rs1_class,         -- operand class
556
    -- output --
557
    result_o   => fu_conv_f2i.result,             -- integer result
558
    flags_o    => fu_conv_f2i.flags,              -- exception flags
559
    done_o     => fu_conv_f2i.done                -- operation done
560
  );
561
 
562
 
563
  -- Sign-Injection (FSGNJ) -----------------------------------------------------------------
564
  -- -------------------------------------------------------------------------------------------
565
  sign_injector: process(ctrl_i, fpu_operands)
566
  begin
567
    case ctrl_i(ctrl_ir_funct3_1_c downto ctrl_ir_funct3_0_c) is
568
      when "00"   => fu_sign_inject.result(31) <= fpu_operands.rs2(31); -- FSGNJ
569
      when "01"   => fu_sign_inject.result(31) <= not fpu_operands.rs2(31); -- FSGNJN
570
      when "10"   => fu_sign_inject.result(31) <= fpu_operands.rs1(31) xor fpu_operands.rs2(31); -- FSGNJX
571
      when others => fu_sign_inject.result(31) <= fpu_operands.rs2(31); -- undefined
572
    end case;
573
    fu_sign_inject.result(30 downto 0) <= fpu_operands.rs1(30 downto 0);
574
    fu_sign_inject.flags <= (others => '0'); -- does not generate flags
575
  end process sign_injector;
576
 
577
  -- latency --
578
  fu_sign_inject.done <= fu_sign_inject.start;
579
 
580
 
581
  -- Convert: [unsigned] Integer to Float (FCVT.W.S) ----------------------------------------
582
  -- -------------------------------------------------------------------------------------------
583 56 zero_gravi
  convert_i2f: process(rstn_i, clk_i)
584 55 zero_gravi
  begin
585
    -- this process only computes the absolute input value
586
    -- the actual conversion is done by the normalizer
587 56 zero_gravi
    if (rstn_i = '0') then
588
      fu_conv_i2f.result <= (others => def_rst_val_c);
589
      fu_conv_i2f.sign   <= def_rst_val_c;
590
    elsif rising_edge(clk_i) then
591 55 zero_gravi
      if (ctrl_i(ctrl_ir_funct12_0_c) = '0') and (rs1_i(31) = '1') then -- convert signed integer
592
        fu_conv_i2f.result <= std_ulogic_vector(0 - unsigned(rs1_i));
593
        fu_conv_i2f.sign   <= rs1_i(31); -- original sign
594
      else -- convert unsigned integer
595
        fu_conv_i2f.result <= rs1_i;
596
        fu_conv_i2f.sign   <= '0';
597
      end if;
598
      fu_conv_i2f.done <= fu_conv_i2f.start; -- actual conversion is done by the normalizer unit
599
    end if;
600
  end process convert_i2f;
601
 
602
 
603
  -- Multiplier Core (FMUL) -----------------------------------------------------------------
604
  -- -------------------------------------------------------------------------------------------
605 56 zero_gravi
  multiplier_core: process(rstn_i, clk_i)
606
  begin
607
    if (rstn_i = '0') then
608
      multiplier.opa                <= (others => '-'); -- these might be DSP regs!
609
      multiplier.opb                <= (others => '-'); -- these might be DSP regs!
610
      multiplier.buf_ff             <= (others => '-'); -- these might be DSP regs!
611
      multiplier.product            <= (others => '-'); -- these might be DSP regs!
612
      multiplier.sign               <= def_rst_val_c;
613
      multiplier.exp_res            <= (others => def_rst_val_c);
614
      multiplier.flags(fp_exc_of_c) <= def_rst_val_c;
615
      multiplier.flags(fp_exc_uf_c) <= def_rst_val_c;
616
      multiplier.flags(fp_exc_nv_c) <= def_rst_val_c;
617
      multiplier.latency            <= (others => def_rst_val_c);
618
    elsif rising_edge(clk_i) then
619 55 zero_gravi
      -- multiplier core --
620
      if (multiplier.start = '1') then -- FIXME / TODO remove buffer?
621
        multiplier.opa <= unsigned('1' & fpu_operands.rs1(22 downto 0)); -- append hidden one
622 56 zero_gravi
        multiplier.opb <= unsigned('1' & fpu_operands.rs2(22 downto 0)); -- append hidden one
623 55 zero_gravi
      end if;
624
      multiplier.buf_ff  <= multiplier.opa * multiplier.opb;
625
      multiplier.product <= std_ulogic_vector(multiplier.buf_ff(47 downto 0)); -- let the register balancing do the magic here
626
      multiplier.sign    <= fpu_operands.rs1(31) xor fpu_operands.rs2(31); -- resulting sign
627
 
628
      -- exponent computation --
629
      multiplier.exp_res <= std_ulogic_vector(unsigned('0' & multiplier.exp_sum) - 127);
630
      if (multiplier.exp_res(multiplier.exp_res'left) = '1') then -- underflow (exp_res is "negative")
631
        multiplier.flags(fp_exc_of_c) <= '0';
632
        multiplier.flags(fp_exc_uf_c) <= '1';
633
      elsif (multiplier.exp_res(multiplier.exp_res'left-1) = '1') then -- overflow
634
        multiplier.flags(fp_exc_of_c) <= '1';
635
        multiplier.flags(fp_exc_uf_c) <= '0';
636
      else
637
        multiplier.flags(fp_exc_of_c) <= '0';
638
        multiplier.flags(fp_exc_uf_c) <= '0';
639
      end if;
640
 
641
      -- invalid operation --
642
      multiplier.flags(fp_exc_nv_c) <=
643
        ((fpu_operands.rs1_class(fp_class_pos_zero_c) or fpu_operands.rs1_class(fp_class_neg_zero_c)) and
644
         (fpu_operands.rs2_class(fp_class_pos_inf_c)  or fpu_operands.rs2_class(fp_class_neg_inf_c))) or -- mul(+/-zero, +/-inf)
645
        ((fpu_operands.rs1_class(fp_class_pos_inf_c)  or fpu_operands.rs1_class(fp_class_neg_inf_c)) and
646
         (fpu_operands.rs2_class(fp_class_pos_zero_c) or fpu_operands.rs2_class(fp_class_neg_zero_c))); -- mul(+/-inf, +/-zero)
647
 
648
      -- latency shift register --
649
      multiplier.latency <= multiplier.latency(multiplier.latency'left-1 downto 0) & multiplier.start;
650
    end if;
651
  end process multiplier_core;
652
 
653
  -- exponent sum --
654
  multiplier.exp_sum <= std_ulogic_vector(unsigned('0' & fpu_operands.rs1(30 downto 23)) + unsigned('0' & fpu_operands.rs2(30 downto 23)));
655
 
656
  -- latency --
657
  multiplier.start <= fu_mul.start;
658
  multiplier.done  <= multiplier.latency(multiplier.latency'left);
659
  fu_mul.done      <= multiplier.done;
660
 
661
  -- unused exception flags --
662
  multiplier.flags(fp_exc_dz_c) <= '0'; -- division by zero: not possible here
663
  multiplier.flags(fp_exc_nx_c) <= '0'; -- inexcat: not possible here
664
 
665
 
666
  -- result class -- 
667 56 zero_gravi
  multiplier_class_core: process(rstn_i, clk_i)
668 55 zero_gravi
    variable a_pos_norm_v, a_neg_norm_v, b_pos_norm_v, b_neg_norm_v : std_ulogic;
669
    variable a_pos_subn_v, a_neg_subn_v, b_pos_subn_v, b_neg_subn_v : std_ulogic;
670
    variable a_pos_zero_v, a_neg_zero_v, b_pos_zero_v, b_neg_zero_v : std_ulogic;
671
    variable a_pos_inf_v,  a_neg_inf_v,  b_pos_inf_v,  b_neg_inf_v  : std_ulogic;
672
    variable a_snan_v,     a_qnan_v,     b_snan_v,     b_qnan_v     : std_ulogic;
673
  begin
674 56 zero_gravi
    if (rstn_i = '0') then
675
      multiplier.res_class(fp_class_pos_norm_c) <= def_rst_val_c;
676
      multiplier.res_class(fp_class_neg_norm_c) <= def_rst_val_c;
677
      multiplier.res_class(fp_class_pos_inf_c)  <= def_rst_val_c;
678
      multiplier.res_class(fp_class_neg_inf_c)  <= def_rst_val_c;
679
      multiplier.res_class(fp_class_pos_zero_c) <= def_rst_val_c;
680
      multiplier.res_class(fp_class_neg_zero_c) <= def_rst_val_c;
681
    elsif rising_edge(clk_i) then
682 55 zero_gravi
      -- minions --
683
      a_pos_norm_v := fpu_operands.rs1_class(fp_class_pos_norm_c);    b_pos_norm_v := fpu_operands.rs2_class(fp_class_pos_norm_c);
684
      a_neg_norm_v := fpu_operands.rs1_class(fp_class_neg_norm_c);    b_neg_norm_v := fpu_operands.rs2_class(fp_class_neg_norm_c);
685
      a_pos_subn_v := fpu_operands.rs1_class(fp_class_pos_denorm_c);  b_pos_subn_v := fpu_operands.rs2_class(fp_class_pos_denorm_c);
686
      a_neg_subn_v := fpu_operands.rs1_class(fp_class_neg_denorm_c);  b_neg_subn_v := fpu_operands.rs2_class(fp_class_neg_denorm_c);
687
      a_pos_zero_v := fpu_operands.rs1_class(fp_class_pos_zero_c);    b_pos_zero_v := fpu_operands.rs2_class(fp_class_pos_zero_c);
688
      a_neg_zero_v := fpu_operands.rs1_class(fp_class_neg_zero_c);    b_neg_zero_v := fpu_operands.rs2_class(fp_class_neg_zero_c);
689
      a_pos_inf_v  := fpu_operands.rs1_class(fp_class_pos_inf_c);     b_pos_inf_v  := fpu_operands.rs2_class(fp_class_pos_inf_c);
690
      a_neg_inf_v  := fpu_operands.rs1_class(fp_class_neg_inf_c);     b_neg_inf_v  := fpu_operands.rs2_class(fp_class_neg_inf_c);
691
      a_snan_v     := fpu_operands.rs1_class(fp_class_snan_c);        b_snan_v     := fpu_operands.rs2_class(fp_class_snan_c);
692
      a_qnan_v     := fpu_operands.rs1_class(fp_class_qnan_c);        b_qnan_v     := fpu_operands.rs2_class(fp_class_qnan_c);
693
 
694
      -- +normal --
695
      multiplier.res_class(fp_class_pos_norm_c) <=
696
        (a_pos_norm_v and b_pos_norm_v) or -- +norm * +norm
697
        (a_neg_norm_v and b_neg_norm_v);   -- -norm * -norm
698
      -- -normal --
699
      multiplier.res_class(fp_class_neg_norm_c) <=
700
        (a_pos_norm_v and b_neg_norm_v) or -- +norm * -norm
701
        (a_neg_norm_v and b_pos_norm_v);   -- -norm * +norm
702
 
703
      -- +infinity --
704
      multiplier.res_class(fp_class_pos_inf_c) <=
705
        (a_pos_inf_v  and b_pos_inf_v)  or -- +inf    * +inf
706
        (a_neg_inf_v  and b_neg_inf_v)  or -- -inf    * -inf
707
        (a_pos_norm_v and b_pos_inf_v)  or -- +norm   * +inf
708
        (a_pos_inf_v  and b_pos_norm_v) or -- +inf    * +norm
709
        (a_neg_norm_v and b_neg_inf_v)  or -- -norm   * -inf
710
        (a_neg_inf_v  and b_neg_norm_v) or -- -inf    * -norm
711
        (a_neg_subn_v and b_neg_inf_v)  or -- -denorm * -inf
712
        (a_neg_inf_v  and b_neg_subn_v);   -- -inf    * -denorm
713
      -- -infinity --
714
      multiplier.res_class(fp_class_neg_inf_c) <=
715
        (a_pos_inf_v  and b_neg_inf_v)  or -- +inf    * -inf
716
        (a_neg_inf_v  and b_pos_inf_v)  or -- -inf    * +inf
717
        (a_pos_norm_v and b_neg_inf_v)  or -- +norm   * -inf
718
        (a_neg_inf_v  and b_pos_norm_v) or -- -inf    * +norm
719
        (a_neg_norm_v and b_pos_inf_v)  or -- -norm   * +inf
720
        (a_pos_inf_v  and b_neg_norm_v) or -- +inf    * -norm
721
        (a_pos_subn_v and b_neg_inf_v)  or -- +denorm * -inf
722
        (a_neg_inf_v  and b_pos_subn_v) or -- -inf    * +de-norm
723
        (a_neg_subn_v and b_pos_inf_v)  or -- -denorm * +inf
724
        (a_pos_inf_v  and b_neg_subn_v);   -- +inf    * -de-norm
725
 
726
      -- +zero --
727
      multiplier.res_class(fp_class_pos_zero_c) <=
728
        (a_pos_zero_v and b_pos_zero_v) or -- +zero   * +zero
729
        (a_pos_zero_v and b_pos_norm_v) or -- +zero   * +norm
730
        (a_pos_zero_v and b_pos_subn_v) or -- +zero   * +denorm
731
        (a_neg_zero_v and b_neg_zero_v) or -- -zero   * -zero
732
        (a_neg_zero_v and b_neg_norm_v) or -- -zero   * -norm
733
        (a_neg_zero_v and b_neg_subn_v) or -- -zero   * -denorm
734
        (a_pos_norm_v and b_pos_zero_v) or -- +norm   * +zero
735
        (a_pos_subn_v and b_pos_zero_v) or -- +denorm * +zero
736
        (a_neg_norm_v and b_neg_zero_v) or -- -norm   * -zero
737
        (a_neg_subn_v and b_neg_zero_v);   -- -denorm * -zero
738
 
739
      -- -zero --
740
      multiplier.res_class(fp_class_neg_zero_c) <=
741
        (a_pos_zero_v and b_neg_zero_v) or -- +zero   * -zero
742
        (a_pos_zero_v and b_neg_norm_v) or -- +zero   * -norm
743
        (a_pos_zero_v and b_neg_subn_v) or -- +zero   * -denorm
744
        (a_neg_zero_v and b_pos_zero_v) or -- -zero   * +zero
745
        (a_neg_zero_v and b_pos_norm_v) or -- -zero   * +norm
746
        (a_neg_zero_v and b_pos_subn_v) or -- -zero   * +denorm
747
        (a_neg_norm_v and b_pos_zero_v) or -- -norm   * +zero
748
        (a_neg_subn_v and b_pos_zero_v) or -- -denorm * +zero
749
        (a_pos_norm_v and b_neg_zero_v) or -- +norm   * -zero
750
        (a_pos_subn_v and b_neg_zero_v);   -- +denorm * -zero
751
 
752
      -- sNaN --
753
      multiplier.res_class(fp_class_snan_c) <= (a_snan_v or b_snan_v); -- any input is sNaN
754
      -- qNaN --
755
      multiplier.res_class(fp_class_qnan_c) <=
756
        (a_snan_v or b_snan_v) or -- any input is sNaN
757
        (a_qnan_v or b_qnan_v) or -- nay input is qNaN
758
        ((a_pos_inf_v  or a_neg_inf_v)  and (b_pos_zero_v or b_neg_zero_v)) or -- +/-inf * +/-zero
759
        ((a_pos_zero_v or a_neg_zero_v) and (b_pos_inf_v  or b_neg_inf_v));    -- +/-zero * +/-inf
760
    end if;
761
  end process multiplier_class_core;
762
 
763
  -- subnormal result --
764
  multiplier.res_class(fp_class_pos_denorm_c) <= '0'; -- is evaluated by the normalizer
765
  multiplier.res_class(fp_class_neg_denorm_c) <= '0'; -- is evaluated by the normalizer
766
 
767
  -- unused --
768
  fu_mul.result <= (others => '0');
769
  fu_mul.flags  <= (others => '0');
770
 
771
 
772
  -- Adder/Subtractor Core (FADD, FSUB) -----------------------------------------------------
773
  -- -------------------------------------------------------------------------------------------
774 56 zero_gravi
  adder_subtractor_core: process(rstn_i, clk_i)
775
  begin
776
    if (rstn_i = '0') then
777
      addsub.latency   <= (others => def_rst_val_c);
778
      addsub.exp_comp  <= (others => def_rst_val_c);
779
      addsub.man_sreg  <= (others => def_rst_val_c);
780
      addsub.exp_cnt   <= (others => def_rst_val_c);
781
      addsub.man_g_ext <= def_rst_val_c;
782
      addsub.man_r_ext <= def_rst_val_c;
783
      addsub.man_s_ext <= def_rst_val_c;
784
      addsub.man_comp  <= def_rst_val_c;
785
      addsub.add_stage <= (others => def_rst_val_c);
786
      addsub.res_sign  <= def_rst_val_c;
787
      addsub.flags(fp_exc_nv_c) <= def_rst_val_c;
788
    elsif rising_edge(clk_i) then
789 55 zero_gravi
      -- arbitration / latency --
790
      if (ctrl_engine.state = S_IDLE) then -- hacky "reset"
791
        addsub.latency <= (others => '0');
792
      else
793
        addsub.latency(0) <= addsub.start; -- input comparator delay
794
        if (addsub.latency(0) = '1') then
795
          addsub.latency(1) <= '1';
796
          addsub.latency(2) <= '0';
797
        elsif (addsub.exp_cnt(7 downto 0) = addsub.large_exp) then -- radix point not yet aligned
798
          addsub.latency(1) <= '0';
799
          addsub.latency(2) <= addsub.latency(1) and (not addsub.latency(0)); -- "shift done"
800
        end if;
801
        addsub.latency(3) <= addsub.latency(2); -- adder stage
802
        addsub.latency(4) <= addsub.latency(3); -- final stage
803
      end if;
804
 
805
      -- exponent check: find smaller number (radix-offset-only) --
806
      if (unsigned(fpu_operands.rs1(30 downto 23)) < unsigned(fpu_operands.rs2(30 downto 23))) then
807
        addsub.exp_comp(0) <= '1'; -- rs1 < rs2
808
      else
809
        addsub.exp_comp(0) <= '0'; -- rs1 >= rs2
810
      end if;
811
      if (unsigned(fpu_operands.rs1(30 downto 23)) = unsigned(fpu_operands.rs2(30 downto 23))) then
812
        addsub.exp_comp(1) <= '1'; -- rs1 == rs2
813
      else -- rs1 != rs2
814
        addsub.exp_comp(1) <= '0';
815
      end if;
816
 
817
      -- shift right small mantissa to align radix point --
818
      if (addsub.latency(0) = '1') then
819
        if ((fpu_operands.rs1_class(fp_class_pos_zero_c) or fpu_operands.rs2_class(fp_class_pos_zero_c) or
820
             fpu_operands.rs1_class(fp_class_neg_zero_c) or fpu_operands.rs2_class(fp_class_neg_zero_c)) = '0') then -- no input is zero
821
          addsub.man_sreg <= addsub.small_man;
822
        else
823
          addsub.man_sreg <= (others => '0');
824
        end if;
825
        addsub.exp_cnt   <= '0' & addsub.small_exp;
826
        addsub.man_g_ext <= '0';
827
        addsub.man_r_ext <= '0';
828
        addsub.man_s_ext <= '0';
829
      elsif (addsub.exp_cnt(7 downto 0) /= addsub.large_exp) then -- shift right until same magnitude
830
        addsub.man_sreg  <= '0' & addsub.man_sreg(addsub.man_sreg'left downto 1);
831
        addsub.man_g_ext <= addsub.man_sreg(0);
832
        addsub.man_r_ext <= addsub.man_g_ext;
833
        addsub.man_s_ext <= addsub.man_s_ext or addsub.man_r_ext; -- sticky bit
834
        addsub.exp_cnt   <= std_ulogic_vector(unsigned(addsub.exp_cnt) + 1);
835
      end if;
836
 
837
      -- mantissa check: find smaller number (magnitude-only) --
838
      if (unsigned(addsub.man_sreg) <= unsigned(addsub.large_man)) then
839
        addsub.man_comp <= '1';
840
      else
841
        addsub.man_comp <= '0';
842
      end if;
843
 
844
      -- actual addition/subtraction (incl. overflow) --
845
      if ((ctrl_i(ctrl_ir_funct12_7_c) xor (fpu_operands.rs1(31) xor fpu_operands.rs2(31))) = '0') then -- add
846
        addsub.add_stage <= std_ulogic_vector(unsigned('0' & addsub.man_l) + unsigned('0' & addsub.man_s));
847
      else -- sub
848
        addsub.add_stage <= std_ulogic_vector(unsigned('0' & addsub.man_l) - unsigned('0' & addsub.man_s));
849
      end if;
850
 
851
      -- result sign --
852
      if (ctrl_i(ctrl_ir_funct12_7_c) = '0') then -- add
853
        if (fpu_operands.rs1(31) = fpu_operands.rs2(31)) then -- identical signs
854
          addsub.res_sign <= fpu_operands.rs1(31);
855
        else -- different signs
856
          if (addsub.exp_comp(1) = '1') then -- exp are equal (also check relation of mantissas)
857
            addsub.res_sign <= fpu_operands.rs1(31) xor (not addsub.man_comp);
858
          else
859
            addsub.res_sign <= fpu_operands.rs1(31) xor addsub.exp_comp(0);
860
          end if;
861
        end if;
862
      else -- sub
863
        if (fpu_operands.rs1(31) = fpu_operands.rs2(31)) then -- identical signs
864
          if (addsub.exp_comp(1) = '1') then -- exp are equal (also check relation of mantissas)
865
            addsub.res_sign <= fpu_operands.rs1(31) xor (not addsub.man_comp);
866
          else
867
            addsub.res_sign <= fpu_operands.rs1(31) xor addsub.exp_comp(0);
868
          end if;
869
        else -- different signs
870
          addsub.res_sign <= fpu_operands.rs1(31);
871
        end if;
872
      end if;
873
 
874
      -- exception flags --
875
      addsub.flags(fp_exc_nv_c) <= ((fpu_operands.rs1_class(fp_class_pos_inf_c) or fpu_operands.rs1_class(fp_class_neg_inf_c)) and
876
                                    (fpu_operands.rs2_class(fp_class_pos_inf_c) or fpu_operands.rs2_class(fp_class_neg_inf_c))); -- +/-inf +/- +/-inf
877
    end if;
878
  end process adder_subtractor_core;
879
 
880
  -- exceptions - unused -- 
881
  addsub.flags(fp_exc_dz_c) <= '0'; -- division by zero -> not possible
882
  addsub.flags(fp_exc_of_c) <= '0'; -- not possible here (but may occur in normalizer)
883
  addsub.flags(fp_exc_uf_c) <= '0'; -- not possible here (but may occur in normalizer)
884
  addsub.flags(fp_exc_nx_c) <= '0'; -- not possible here (but may occur in normalizer)
885
 
886
  -- exponent check: find smaller number (magnitude-only) --
887
  addsub.small_exp <=        fpu_operands.rs1(30 downto 23)  when (addsub.exp_comp(0) = '1') else        fpu_operands.rs2(30 downto 23);
888
  addsub.large_exp <=        fpu_operands.rs2(30 downto 23)  when (addsub.exp_comp(0) = '1') else        fpu_operands.rs1(30 downto 23);
889
  addsub.small_man <= ('1' & fpu_operands.rs1(22 downto 00)) when (addsub.exp_comp(0) = '1') else ('1' & fpu_operands.rs2(22 downto 00));
890
  addsub.large_man <= ('1' & fpu_operands.rs2(22 downto 00)) when (addsub.exp_comp(0) = '1') else ('1' & fpu_operands.rs1(22 downto 00));
891
 
892
  -- mantissa check: find smaller number (magnitude-only) --
893
  addsub.man_s <= (addsub.man_sreg & addsub.man_g_ext & addsub.man_r_ext & addsub.man_s_ext) when (addsub.man_comp = '1') else (addsub.large_man & "000");
894
  addsub.man_l <= (addsub.large_man & "000") when (addsub.man_comp = '1') else (addsub.man_sreg & addsub.man_g_ext & addsub.man_r_ext & addsub.man_s_ext);
895
 
896
  -- latency --
897
  addsub.start   <= fu_addsub.start;
898
  addsub.done    <= addsub.latency(addsub.latency'left);
899
  fu_addsub.done <= addsub.done;
900
 
901
  -- mantissa result --
902
  addsub.res_sum <= addsub.add_stage(27 downto 0);
903
 
904
 
905
  -- result class -- 
906 56 zero_gravi
  adder_subtractor_class_core: process(rstn_i, clk_i)
907 55 zero_gravi
    variable a_pos_norm_v, a_neg_norm_v, b_pos_norm_v, b_neg_norm_v : std_ulogic;
908
    variable a_pos_subn_v, a_neg_subn_v, b_pos_subn_v, b_neg_subn_v : std_ulogic;
909
    variable a_pos_zero_v, a_neg_zero_v, b_pos_zero_v, b_neg_zero_v : std_ulogic;
910
    variable a_pos_inf_v,  a_neg_inf_v,  b_pos_inf_v,  b_neg_inf_v  : std_ulogic;
911
    variable a_snan_v,     a_qnan_v,     b_snan_v,     b_qnan_v     : std_ulogic;
912
  begin
913 56 zero_gravi
    if (rstn_i = '0') then
914
      addsub.res_class(fp_class_pos_inf_c)  <= def_rst_val_c;
915
      addsub.res_class(fp_class_neg_inf_c)  <= def_rst_val_c;
916
      addsub.res_class(fp_class_pos_zero_c) <= def_rst_val_c;
917
      addsub.res_class(fp_class_neg_zero_c) <= def_rst_val_c;
918
      addsub.res_class(fp_class_qnan_c)     <= def_rst_val_c;
919
    elsif rising_edge(clk_i) then
920 55 zero_gravi
      -- minions --
921
      a_pos_norm_v := fpu_operands.rs1_class(fp_class_pos_norm_c);    b_pos_norm_v := fpu_operands.rs2_class(fp_class_pos_norm_c);
922
      a_neg_norm_v := fpu_operands.rs1_class(fp_class_neg_norm_c);    b_neg_norm_v := fpu_operands.rs2_class(fp_class_neg_norm_c);
923
      a_pos_subn_v := fpu_operands.rs1_class(fp_class_pos_denorm_c);  b_pos_subn_v := fpu_operands.rs2_class(fp_class_pos_denorm_c);
924
      a_neg_subn_v := fpu_operands.rs1_class(fp_class_neg_denorm_c);  b_neg_subn_v := fpu_operands.rs2_class(fp_class_neg_denorm_c);
925
      a_pos_zero_v := fpu_operands.rs1_class(fp_class_pos_zero_c);    b_pos_zero_v := fpu_operands.rs2_class(fp_class_pos_zero_c);
926
      a_neg_zero_v := fpu_operands.rs1_class(fp_class_neg_zero_c);    b_neg_zero_v := fpu_operands.rs2_class(fp_class_neg_zero_c);
927
      a_pos_inf_v  := fpu_operands.rs1_class(fp_class_pos_inf_c);     b_pos_inf_v  := fpu_operands.rs2_class(fp_class_pos_inf_c);
928
      a_neg_inf_v  := fpu_operands.rs1_class(fp_class_neg_inf_c);     b_neg_inf_v  := fpu_operands.rs2_class(fp_class_neg_inf_c);
929
      a_snan_v     := fpu_operands.rs1_class(fp_class_snan_c);        b_snan_v     := fpu_operands.rs2_class(fp_class_snan_c);
930
      a_qnan_v     := fpu_operands.rs1_class(fp_class_qnan_c);        b_qnan_v     := fpu_operands.rs2_class(fp_class_qnan_c);
931
 
932
      if (ctrl_i(ctrl_ir_funct12_7_c) = '0') then -- addition
933
        -- +infinity --
934
        addsub.res_class(fp_class_pos_inf_c) <=
935
          (a_pos_inf_v  and b_pos_inf_v)  or -- +inf    + +inf
936
          (a_pos_inf_v  and b_pos_zero_v) or -- +inf    + +zero
937
          (a_pos_zero_v and b_pos_inf_v)  or -- +zero   + +inf
938
          (a_pos_inf_v  and b_neg_zero_v) or -- +inf    + -zero
939
          (a_neg_zero_v and b_pos_inf_v)  or -- -zero   + +inf
940
          --
941
          (a_pos_inf_v  and b_pos_norm_v) or -- +inf    + +norm
942
          (a_pos_norm_v and b_pos_inf_v)  or -- +norm   + +inf
943
          (a_pos_inf_v  and b_pos_subn_v) or -- +inf    + +denorm
944
          (a_pos_subn_v and b_pos_inf_v)  or -- +denorm + +inf
945
          --
946
          (a_pos_inf_v  and b_neg_norm_v) or -- +inf    + -norm
947
          (a_neg_norm_v and b_pos_inf_v)  or -- -norm   + +inf
948
          (a_pos_inf_v  and b_neg_subn_v) or -- +inf    + -denorm
949
          (a_neg_subn_v and b_pos_inf_v);    -- -denorm + +inf
950
        -- -infinity --
951
        addsub.res_class(fp_class_neg_inf_c) <=
952
          (a_neg_inf_v  and b_neg_inf_v)  or -- -inf    + -inf
953
          (a_neg_inf_v  and b_pos_zero_v) or -- -inf    + +zero
954
          (a_pos_zero_v and b_neg_inf_v)  or -- +zero   + -inf
955
          (a_neg_inf_v  and b_neg_zero_v) or -- -inf    + -zero
956
          (a_neg_zero_v and b_neg_inf_v)  or -- -zero   + -inf
957
          --
958
          (a_neg_inf_v  and b_pos_norm_v) or -- -inf    + +norm
959
          (a_pos_norm_v and b_neg_inf_v)  or -- +norm   + -inf
960
          (a_neg_inf_v  and b_neg_norm_v) or -- -inf    + -norm
961
          (a_neg_norm_v and b_neg_inf_v)  or -- -norm   + -inf
962
          --
963
          (a_neg_inf_v  and b_pos_subn_v) or -- -inf    + +denorm
964
          (a_pos_subn_v and b_neg_inf_v)  or -- +denorm + -inf
965
          (a_neg_inf_v  and b_neg_subn_v) or -- -inf    + -denorm
966
          (a_neg_subn_v and b_neg_inf_v);    -- -denorm + -inf
967
 
968
        -- +zero --
969
        addsub.res_class(fp_class_pos_zero_c) <=
970
          (a_pos_zero_v and b_pos_zero_v) or -- +zero + +zero
971
          (a_pos_zero_v and b_neg_zero_v) or -- +zero + -zero
972
          (a_neg_zero_v and b_pos_zero_v);   -- -zero + +zero
973
        -- -zero --
974
        addsub.res_class(fp_class_neg_zero_c) <=
975
          (a_neg_zero_v and b_neg_zero_v);   -- -zero + -zero
976
 
977
        -- qNaN --
978
        addsub.res_class(fp_class_qnan_c) <=
979
          (a_snan_v    or  b_snan_v)    or -- any input is sNaN
980
          (a_qnan_v    or  b_qnan_v)    or -- any input is qNaN
981
          (a_pos_inf_v and b_neg_inf_v) or -- +inf + -inf
982
          (a_neg_inf_v and b_pos_inf_v);   -- -inf + +inf
983
 
984
      else -- subtraction
985
        -- +infinity --
986
        addsub.res_class(fp_class_pos_inf_c) <=
987
          (a_pos_inf_v  and b_neg_inf_v)  or -- +inf    - -inf
988
          (a_pos_inf_v  and b_pos_zero_v) or -- +inf    - +zero
989
          (a_pos_inf_v  and b_neg_zero_v) or -- +inf    - -zero
990
          (a_pos_inf_v  and b_pos_norm_v) or -- +inf    - +norm
991
          (a_pos_inf_v  and b_pos_subn_v) or -- +inf    - +denorm
992
          (a_pos_inf_v  and b_neg_norm_v) or -- +inf    - -norm
993
          (a_pos_inf_v  and b_neg_subn_v) or -- +inf    - -denorm
994
          --
995
          (a_pos_zero_v and b_neg_inf_v)  or -- +zero   - -inf
996
          (a_neg_zero_v and b_neg_inf_v)  or -- -zero   - -inf
997
          --
998
          (a_pos_norm_v and b_neg_inf_v)  or -- +norm   - -inf
999
          (a_pos_subn_v and b_neg_inf_v)  or -- +denorm - -inf
1000
          (a_neg_norm_v and b_neg_inf_v)  or -- -norm   - -inf
1001
          (a_neg_subn_v and b_neg_inf_v);    -- -denorm - -inf
1002
        -- -infinity --
1003
        addsub.res_class(fp_class_neg_inf_c) <=
1004
          (a_neg_inf_v  and b_pos_inf_v)  or -- -inf    - +inf
1005
          (a_neg_inf_v  and b_pos_zero_v) or -- -inf    - +zero
1006
          (a_neg_inf_v  and b_neg_zero_v) or -- -inf    - -zero
1007
          (a_neg_inf_v  and b_pos_norm_v) or -- -inf    - +norm
1008
          (a_neg_inf_v  and b_pos_subn_v) or -- -inf    - +denorm
1009
          (a_neg_inf_v  and b_neg_norm_v) or -- -inf    - -norm
1010
          (a_neg_inf_v  and b_neg_subn_v) or -- -inf    - -denorm
1011
          --
1012
          (a_pos_zero_v and b_pos_inf_v)  or -- +zero   - +inf
1013
          (a_neg_zero_v and b_pos_inf_v)  or -- -zero   - +inf
1014
          --
1015
          (a_pos_norm_v and b_pos_inf_v)  or -- +norm   - +inf
1016
          (a_pos_subn_v and b_pos_inf_v)  or -- +denorm - +inf
1017
          (a_neg_norm_v and b_pos_inf_v)  or -- -norm   - +inf
1018
          (a_neg_subn_v and b_pos_inf_v);    -- -denorm - +inf
1019
 
1020
        -- +zero --
1021
        addsub.res_class(fp_class_pos_zero_c) <=
1022
          (a_pos_zero_v and b_pos_zero_v) or -- +zero - +zero
1023
          (a_pos_zero_v and b_neg_zero_v) or -- +zero - -zero
1024
          (a_neg_zero_v and b_neg_zero_v);   -- -zero - -zero
1025
        -- -zero --
1026
        addsub.res_class(fp_class_neg_zero_c) <=
1027
          (a_neg_zero_v and b_pos_zero_v);   -- -zero - +zero
1028
 
1029
        -- qNaN --
1030
        addsub.res_class(fp_class_qnan_c) <=
1031
          (a_snan_v    or  b_snan_v)    or -- any input is sNaN
1032
          (a_qnan_v    or  b_qnan_v)    or -- any input is qNaN
1033
          (a_pos_inf_v and b_pos_inf_v) or -- +inf - +inf
1034
          (a_neg_inf_v and b_neg_inf_v);   -- -inf - -inf
1035
      end if;
1036
 
1037
      -- normal --
1038
      addsub.res_class(fp_class_pos_norm_c) <= (a_pos_norm_v or a_neg_norm_v) and (b_pos_norm_v or b_neg_norm_v); -- +/-norm +/- +-/norm [sign is irrelevant here]
1039
      addsub.res_class(fp_class_neg_norm_c) <= (a_pos_norm_v or a_neg_norm_v) and (b_pos_norm_v or b_neg_norm_v); -- +/-norm +/- +-/norm [sign is irrelevant here]
1040
 
1041
      -- sNaN --
1042
      addsub.res_class(fp_class_snan_c) <= (a_snan_v or b_snan_v); -- any input is sNaN
1043
    end if;
1044
  end process adder_subtractor_class_core;
1045
 
1046
  -- subnormal result --
1047
  addsub.res_class(fp_class_pos_denorm_c) <= '0'; -- is evaluated by the normalizer
1048
  addsub.res_class(fp_class_neg_denorm_c) <= '0'; -- is evaluated by the normalizer
1049
 
1050
  -- unused --
1051
  fu_addsub.result <= (others => '0');
1052
  fu_addsub.flags  <= (others => '0');
1053
 
1054
 
1055
-- ****************************************************************************************************************************
1056
-- FPU Core - Normalize & Round
1057
-- ****************************************************************************************************************************
1058
 
1059
  -- Normalizer Input -----------------------------------------------------------------------
1060
  -- -------------------------------------------------------------------------------------------
1061
  normalizer_input_select: process(funct_ff, addsub, multiplier, fu_conv_i2f)
1062
  begin
1063
    case funct_ff is
1064
      when op_addsub_c => -- addition/subtraction
1065
        normalizer.mode      <= '0'; -- normalization
1066
        normalizer.sign      <= addsub.res_sign;
1067
        normalizer.xexp      <= addsub.exp_cnt;
1068
        normalizer.xmantissa(47 downto 23) <= addsub.res_sum(27 downto 3);
1069
        normalizer.xmantissa(22) <= addsub.res_sum(2);
1070
        normalizer.xmantissa(21) <= addsub.res_sum(1);
1071
        normalizer.xmantissa(20 downto 01) <= (others => '0');
1072
        normalizer.xmantissa(00) <= addsub.res_sum(0);
1073
        normalizer.class     <= addsub.res_class;
1074
        normalizer.flags_in  <= addsub.flags;
1075
        normalizer.start     <= addsub.done;
1076
      when op_mul_c => -- multiplication
1077
        normalizer.mode      <= '0'; -- normalization
1078
        normalizer.sign      <= multiplier.sign;
1079
        normalizer.xexp      <= '0' & multiplier.exp_res(7 downto 0);
1080
        normalizer.xmantissa <= multiplier.product;
1081
        normalizer.class     <= multiplier.res_class;
1082
        normalizer.flags_in  <= multiplier.flags;
1083
        normalizer.start     <= multiplier.done;
1084
      when others => -- op_i2f_c
1085
        normalizer.mode      <= '1'; -- int_to_float
1086
        normalizer.sign      <= fu_conv_i2f.sign;
1087
        normalizer.xexp      <= "001111111"; -- bias = 127
1088
        normalizer.xmantissa <= (others => '0'); -- don't care
1089
        normalizer.class     <= (others => '0'); -- don't care
1090
        normalizer.flags_in  <= (others => '0'); -- no flags yet
1091
        normalizer.start     <= fu_conv_i2f.done;
1092
    end case;
1093
  end process normalizer_input_select;
1094
 
1095
 
1096
  -- Normalizer & Rounding Unit -------------------------------------------------------------
1097
  -- -------------------------------------------------------------------------------------------
1098
  neorv32_cpu_cp_fpu_normalizer_inst: neorv32_cpu_cp_fpu_normalizer
1099
  port map (
1100
    -- control --
1101
    clk_i      => clk_i,                -- global clock, rising edge
1102
    rstn_i     => rstn_i,               -- global reset, low-active, async
1103
    start_i    => normalizer.start,     -- trigger operation
1104
    rmode_i    => fpu_operands.frm,     -- rounding mode
1105
    funct_i    => normalizer.mode,      -- operation mode
1106
    -- input --
1107
    sign_i     => normalizer.sign,      -- sign
1108
    exponent_i => normalizer.xexp,      -- extended exponent
1109
    mantissa_i => normalizer.xmantissa, -- extended mantissa
1110
    integer_i  => fu_conv_i2f.result,   -- integer input
1111
    class_i    => normalizer.class,     -- input number class
1112
    flags_i    => normalizer.flags_in,  -- exception flags input
1113
    -- output --
1114
    result_o   => normalizer.result,    -- result (float or int)
1115
    flags_o    => normalizer.flags_out, -- exception flags
1116
    done_o     => normalizer.done       -- operation done
1117
  );
1118
 
1119
 
1120
-- ****************************************************************************************************************************
1121
-- FPU Core - Result
1122
-- ****************************************************************************************************************************
1123
 
1124
  -- Result Output to CPU Pipeline ----------------------------------------------------------
1125
  -- -------------------------------------------------------------------------------------------
1126 56 zero_gravi
  output_gate: process(rstn_i, clk_i)
1127 55 zero_gravi
  begin
1128 56 zero_gravi
    if (rstn_i = '0') then
1129
      res_o    <= (others => def_rst_val_c);
1130
      fflags_o <= (others => def_rst_val_c);
1131
    elsif rising_edge(clk_i) then
1132 55 zero_gravi
      if (ctrl_engine.valid = '1') then
1133
        case funct_ff is
1134
          when op_class_c =>
1135
            res_o    <= fu_classify.result;
1136
            fflags_o <= fu_classify.flags;
1137
          when op_comp_c =>
1138
            res_o    <= fu_compare.result;
1139
            fflags_o <= fu_compare.flags;
1140
          when op_f2i_c =>
1141
            res_o    <= fu_conv_f2i.result;
1142
            fflags_o <= fu_conv_f2i.flags;
1143
          when op_sgnj_c =>
1144
            res_o    <= fu_sign_inject.result;
1145
            fflags_o <= fu_sign_inject.flags;
1146
          when op_minmax_c =>
1147
            res_o    <= fu_min_max.result;
1148
            fflags_o <= fu_min_max.flags;
1149
          when others => -- op_mul_c, op_addsub_c, op_i2f_c, ...
1150
            res_o    <= normalizer.result;
1151
            fflags_o <= normalizer.flags_out;
1152
        end case;
1153
      else
1154
        res_o    <= (others => '0');
1155
        fflags_o <= (others => '0');
1156
      end if;
1157
    end if;
1158
  end process output_gate;
1159
 
1160
  -- operation done --
1161
  fu_core_done <= fu_compare.done or fu_classify.done or fu_sign_inject.done or fu_min_max.done or normalizer.done or fu_conv_f2i.done;
1162
 
1163
 
1164 52 zero_gravi
end neorv32_cpu_cp_fpu_rtl;
1165 55 zero_gravi
 
1166
-- ###########################################################################################################################################
1167
-- ###########################################################################################################################################
1168
 
1169
-- #################################################################################################
1170
-- # << NEORV32 - Single-Precision Floating-Point Unit: Normalizer and Rounding Unit >>            #
1171
-- # ********************************************************************************************* #
1172
-- # This unit also performs integer-to-float conversions.                                         #
1173
-- # ********************************************************************************************* #
1174
-- # BSD 3-Clause License                                                                          #
1175
-- #                                                                                               #
1176
-- # Copyright (c) 2021, Stephan Nolting. All rights reserved.                                     #
1177
-- #                                                                                               #
1178
-- # Redistribution and use in source and binary forms, with or without modification, are          #
1179
-- # permitted provided that the following conditions are met:                                     #
1180
-- #                                                                                               #
1181
-- # 1. Redistributions of source code must retain the above copyright notice, this list of        #
1182
-- #    conditions and the following disclaimer.                                                   #
1183
-- #                                                                                               #
1184
-- # 2. Redistributions in binary form must reproduce the above copyright notice, this list of     #
1185
-- #    conditions and the following disclaimer in the documentation and/or other materials        #
1186
-- #    provided with the distribution.                                                            #
1187
-- #                                                                                               #
1188
-- # 3. Neither the name of the copyright holder nor the names of its contributors may be used to  #
1189
-- #    endorse or promote products derived from this software without specific prior written      #
1190
-- #    permission.                                                                                #
1191
-- #                                                                                               #
1192
-- # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS   #
1193
-- # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF               #
1194
-- # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE    #
1195
-- # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,     #
1196
-- # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE #
1197
-- # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED    #
1198
-- # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING     #
1199
-- # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED  #
1200
-- # OF THE POSSIBILITY OF SUCH DAMAGE.                                                            #
1201
-- # ********************************************************************************************* #
1202
-- # The NEORV32 Processor - https://github.com/stnolting/neorv32              (c) Stephan Nolting #
1203
-- #################################################################################################
1204
 
1205
library ieee;
1206
use ieee.std_logic_1164.all;
1207
use ieee.numeric_std.all;
1208
 
1209
library neorv32;
1210
use neorv32.neorv32_package.all;
1211
 
1212
entity neorv32_cpu_cp_fpu_normalizer is
1213
  port (
1214
    -- control --
1215
    clk_i      : in  std_ulogic; -- global clock, rising edge
1216
    rstn_i     : in  std_ulogic; -- global reset, low-active, async
1217
    start_i    : in  std_ulogic; -- trigger operation
1218
    rmode_i    : in  std_ulogic_vector(02 downto 0); -- rounding mode
1219
    funct_i    : in  std_ulogic; -- operating mode (0=norm&round, 1=int-to-float)
1220
    -- input --
1221
    sign_i     : in  std_ulogic; -- sign
1222
    exponent_i : in  std_ulogic_vector(08 downto 0); -- extended exponent
1223
    mantissa_i : in  std_ulogic_vector(47 downto 0); -- extended mantissa
1224
    integer_i  : in  std_ulogic_vector(31 downto 0); -- integer input
1225
    class_i    : in  std_ulogic_vector(09 downto 0); -- input number class
1226
    flags_i    : in  std_ulogic_vector(04 downto 0); -- exception flags input
1227
    -- output --
1228
    result_o   : out std_ulogic_vector(31 downto 0); -- float result
1229
    flags_o    : out std_ulogic_vector(04 downto 0); -- exception flags output
1230
    done_o     : out std_ulogic -- operation done
1231
  );
1232
end neorv32_cpu_cp_fpu_normalizer;
1233
 
1234
architecture neorv32_cpu_cp_fpu_normalizer_rtl of neorv32_cpu_cp_fpu_normalizer is
1235
 
1236
  -- controller --
1237
  type ctrl_engine_state_t is (S_IDLE, S_PREPARE_I2F, S_CHECK_I2F, S_PREPARE_NORM, S_PREPARE_SHIFT, S_NORMALIZE_BUSY, S_ROUND, S_CHECK, S_FINALIZE);
1238
  type ctrl_t is record
1239
    state   : ctrl_engine_state_t; -- current state
1240
    norm_r  : std_ulogic; -- normalization round 0 or 1
1241
    cnt     : std_ulogic_vector(08 downto 0); -- interation counter/exponent (incl. overflow)
1242
    cnt_pre : std_ulogic_vector(08 downto 0);
1243
    cnt_of  : std_ulogic; -- counter overflow
1244
    cnt_uf  : std_ulogic; -- counter underflow
1245
    rounded : std_ulogic; -- output is rounded
1246
    res_sgn : std_ulogic;
1247
    res_exp : std_ulogic_vector(07 downto 0);
1248
    res_man : std_ulogic_vector(22 downto 0);
1249
    class   : std_ulogic_vector(09 downto 0);
1250
    flags   : std_ulogic_vector(04 downto 0);
1251
  end record;
1252
  signal ctrl : ctrl_t;
1253
 
1254
  -- normalization shift register --
1255
  type sreg_t is record
1256
    done  : std_ulogic;
1257
    dir   : std_ulogic; -- shift direction: 0=right, 1=left
1258
    zero  : std_ulogic;
1259
    upper : std_ulogic_vector(31 downto 0);
1260
    lower : std_ulogic_vector(22 downto 0);
1261
    ext_g : std_ulogic; -- guard bit
1262
    ext_r : std_ulogic; -- round bit
1263
    ext_s : std_ulogic; -- sticky bit
1264
  end record;
1265
  signal sreg : sreg_t;
1266
 
1267
  -- rounding unit --
1268
  type round_t is record
1269
    en     : std_ulogic; -- enable rounding
1270
    sub    : std_ulogic; -- 0=decrement, 1=increment
1271
    output : std_ulogic_vector(24 downto 0); -- mantissa size + hidden one + 1
1272
  end record;
1273
  signal round : round_t;
1274
 
1275
begin
1276
 
1277
  -- Control Engine -------------------------------------------------------------------------
1278
  -- -------------------------------------------------------------------------------------------
1279
  ctrl_engine: process(rstn_i, clk_i)
1280
  begin
1281
    if (rstn_i = '0') then
1282
      ctrl.state   <= S_IDLE;
1283 56 zero_gravi
      ctrl.norm_r  <= def_rst_val_c;
1284
      ctrl.cnt     <= (others => def_rst_val_c);
1285
      ctrl.cnt_pre <= (others => def_rst_val_c);
1286
      ctrl.cnt_of  <= def_rst_val_c;
1287
      ctrl.cnt_uf  <= def_rst_val_c;
1288
      ctrl.rounded <= def_rst_val_c;
1289
      ctrl.res_exp <= (others => def_rst_val_c);
1290
      ctrl.res_man <= (others => def_rst_val_c);
1291
      ctrl.res_sgn <= def_rst_val_c;
1292
      ctrl.class   <= (others => def_rst_val_c);
1293
      ctrl.flags   <= (others => def_rst_val_c);
1294 55 zero_gravi
      --
1295 56 zero_gravi
      sreg.upper   <= (others => def_rst_val_c);
1296
      sreg.lower   <= (others => def_rst_val_c);
1297
      sreg.dir     <= def_rst_val_c;
1298
      sreg.ext_g   <= def_rst_val_c;
1299
      sreg.ext_r   <= def_rst_val_c;
1300
      sreg.ext_s   <= def_rst_val_c;
1301 55 zero_gravi
      --
1302
      done_o       <= '0';
1303
    elsif rising_edge(clk_i) then
1304
      -- defaults --
1305
      ctrl.cnt_pre <= ctrl.cnt;
1306
      done_o       <= '0';
1307
 
1308
      -- exponent counter underflow/overflow --
1309
      if ((ctrl.cnt_pre(8 downto 7) = "01") and (ctrl.cnt(8 downto 7) = "10")) then -- overflow
1310
        ctrl.cnt_of <= '1';
1311
      elsif (ctrl.cnt_pre(8 downto 7) = "00") and (ctrl.cnt(8 downto 7) = "11") then -- underflow
1312
        ctrl.cnt_uf <= '1';
1313
      end if;
1314
 
1315
      -- fsm --
1316
      case ctrl.state is
1317
 
1318
        when S_IDLE => -- wait for operation trigger
1319
        -- ------------------------------------------------------------
1320
          ctrl.norm_r  <= '0'; -- start with first normalization
1321
          ctrl.rounded <= '0'; -- not rounded yet
1322
          ctrl.cnt_of  <= '0';
1323
          ctrl.cnt_uf  <= '0';
1324
          --
1325
          if (start_i = '1') then
1326
            ctrl.cnt     <= exponent_i;
1327
            ctrl.res_sgn <= sign_i;
1328
            ctrl.class   <= class_i;
1329
            ctrl.flags   <= flags_i;
1330
            if (funct_i = '0') then -- float -> float
1331
              ctrl.state <= S_PREPARE_NORM;
1332
            else -- integer -> float
1333
              ctrl.state <= S_PREPARE_I2F;
1334
            end if;
1335
          end if;
1336
 
1337
        when S_PREPARE_I2F => -- prepare integer-to-float conversion
1338
        -- ------------------------------------------------------------
1339
          sreg.upper <= integer_i;
1340
          sreg.lower <= (others => '0');
1341
          sreg.ext_g <= '0';
1342
          sreg.ext_r <= '0';
1343
          sreg.ext_s <= '0';
1344
          sreg.dir   <= '0'; -- shift right
1345
          ctrl.state <= S_CHECK_I2F;
1346
 
1347
        when S_CHECK_I2F => -- check if converting zero
1348
        -- ------------------------------------------------------------
1349
          if (sreg.zero = '1') then -- all zero
1350
            ctrl.class(fp_class_pos_zero_c) <= '1';
1351
            ctrl.state <= S_FINALIZE;
1352
          else
1353
            ctrl.state <= S_NORMALIZE_BUSY;
1354
          end if;
1355
 
1356
        when S_PREPARE_NORM => -- prepare "normal" normalization & rounding
1357
        -- ------------------------------------------------------------
1358
          sreg.upper(31 downto 02) <= (others => '0');
1359
          sreg.upper(01 downto 00) <= mantissa_i(47 downto 46);
1360
          sreg.lower <= mantissa_i(45 downto 23);
1361
          sreg.ext_g <= mantissa_i(22);
1362
          sreg.ext_r <= mantissa_i(21);
1363 60 zero_gravi
          sreg.ext_s <= or_reduce_f(mantissa_i(20 downto 0));
1364 55 zero_gravi
          -- check for special cases --
1365
          if ((ctrl.class(fp_class_snan_c)       or ctrl.class(fp_class_qnan_c)       or -- NaN
1366
               ctrl.class(fp_class_neg_zero_c)   or ctrl.class(fp_class_pos_zero_c)   or -- zero
1367
               ctrl.class(fp_class_neg_denorm_c) or ctrl.class(fp_class_pos_denorm_c) or -- subnormal
1368
               ctrl.class(fp_class_neg_inf_c)    or ctrl.class(fp_class_pos_inf_c)    or -- infinity
1369
               ctrl.flags(fp_exc_uf_c) or -- underflow
1370
               ctrl.flags(fp_exc_of_c) or -- overflow
1371
               ctrl.flags(fp_exc_nv_c)) = '1') then -- invalid
1372
            ctrl.state <= S_FINALIZE;
1373
          else
1374
            ctrl.state <= S_PREPARE_SHIFT;
1375
          end if;
1376
 
1377
        when S_PREPARE_SHIFT => -- prepare shift direction (for "normal" normalization only)
1378
        -- ------------------------------------------------------------
1379
          if (sreg.zero = '0') then -- number < 1.0
1380
            sreg.dir <= '0'; -- shift right
1381
          else -- number >= 1.0
1382
            sreg.dir <= '1'; -- shift left
1383
          end if;
1384
          ctrl.state <= S_NORMALIZE_BUSY;
1385
 
1386
        when S_NORMALIZE_BUSY => -- running normalization cycle
1387
        -- ------------------------------------------------------------
1388
          -- shift until normalized or exception --
1389
          if (sreg.done = '1') or (ctrl.cnt_uf = '1') or (ctrl.cnt_of = '1') then
1390
            -- normalization control --
1391
            ctrl.norm_r <= '1';
1392
            if (ctrl.norm_r = '0') then -- first normalization cycle done
1393
              ctrl.state <= S_ROUND;
1394
            else -- second normalization cycle done
1395
              ctrl.state <= S_CHECK;
1396
            end if;
1397
          else
1398
            if (sreg.dir = '0') then -- shift right
1399
              ctrl.cnt   <= std_ulogic_vector(unsigned(ctrl.cnt) + 1);
1400
              sreg.upper <= '0' & sreg.upper(sreg.upper'left downto 1);
1401
              sreg.lower <= sreg.upper(0) & sreg.lower(sreg.lower'left downto 1);
1402
              sreg.ext_g <= sreg.lower(0);
1403
              sreg.ext_r <= sreg.ext_g;
1404
              sreg.ext_s <= sreg.ext_r or sreg.ext_s; -- sticky bit
1405
            else -- shift left
1406
              ctrl.cnt   <= std_ulogic_vector(unsigned(ctrl.cnt) - 1);
1407
              sreg.upper <= sreg.upper(sreg.upper'left-1 downto 0) & sreg.lower(sreg.lower'left);
1408
              sreg.lower <= sreg.lower(sreg.lower'left-1 downto 0) & sreg.ext_g;
1409
              sreg.ext_g <= sreg.ext_r;
1410
              sreg.ext_r <= sreg.ext_s;
1411
              sreg.ext_s <= sreg.ext_s; -- sticky bit
1412
            end if;
1413
          end if;
1414
 
1415
        when S_ROUND => -- rounding cycle (after first normalization)
1416
        -- ------------------------------------------------------------
1417
          ctrl.rounded <= ctrl.rounded or round.en;
1418
          sreg.upper(31 downto 02) <= (others => '0');
1419
          sreg.upper(01 downto 00) <= round.output(24 downto 23);
1420
          sreg.lower <= round.output(22 downto 00);
1421
          sreg.ext_g <= '0';
1422
          sreg.ext_r <= '0';
1423
          sreg.ext_s <= '0';
1424
          ctrl.state <= S_PREPARE_SHIFT;
1425
 
1426
        when S_CHECK => -- check for overflow/underflow
1427
        -- ------------------------------------------------------------
1428
          if (ctrl.cnt_uf = '1') then -- underflow
1429
            ctrl.flags(fp_exc_uf_c) <= '1';
1430
          elsif (ctrl.cnt_of = '1') then -- overflow
1431
            ctrl.flags(fp_exc_of_c) <= '1';
1432
          elsif (ctrl.cnt(7 downto 0) = x"00") then -- subnormal
1433
            ctrl.flags(fp_exc_uf_c) <= '1';
1434
          elsif (ctrl.cnt(7 downto 0) = x"FF") then -- infinity
1435
            ctrl.flags(fp_exc_of_c) <= '1';
1436
          end if;
1437
          ctrl.state  <= S_FINALIZE;
1438
 
1439
        when S_FINALIZE => -- result finalization
1440
        -- ------------------------------------------------------------
1441
          -- generate result word (the ORDER of checks is imporatant here!) --
1442
          if (ctrl.class(fp_class_snan_c) = '1') or (ctrl.class(fp_class_qnan_c) = '1') then -- sNaN / qNaN
1443
            ctrl.res_sgn <= fp_single_qnan_c(31);
1444
            ctrl.res_exp <= fp_single_qnan_c(30 downto 23);
1445
            ctrl.res_man <= fp_single_qnan_c(22 downto 00);
1446
          elsif (ctrl.class(fp_class_neg_inf_c) = '1') or (ctrl.class(fp_class_pos_inf_c) = '1') or -- infinity
1447
                (ctrl.flags(fp_exc_of_c) = '1') then -- overflow
1448
            ctrl.res_exp <= fp_single_pos_inf_c(30 downto 23); -- keep original sign
1449
            ctrl.res_man <= fp_single_pos_inf_c(22 downto 00);
1450
          elsif (ctrl.class(fp_class_neg_zero_c) = '1') or (ctrl.class(fp_class_pos_zero_c) = '1') then -- zero
1451
            ctrl.res_sgn <= ctrl.class(fp_class_neg_zero_c);
1452
            ctrl.res_exp <= fp_single_pos_zero_c(30 downto 23);
1453
            ctrl.res_man <= fp_single_pos_zero_c(22 downto 00);
1454
          elsif (ctrl.flags(fp_exc_uf_c) = '1') or -- underflow
1455
                (sreg.zero = '1') or (ctrl.class(fp_class_neg_denorm_c) = '1') or (ctrl.class(fp_class_pos_denorm_c) = '1') then -- denormalized (flush-to-zero)
1456
            ctrl.res_exp <= fp_single_pos_zero_c(30 downto 23); -- keep original sign
1457
            ctrl.res_man <= fp_single_pos_zero_c(22 downto 00);
1458
          else -- result is ok
1459
            ctrl.res_exp <= ctrl.cnt(7 downto 0);
1460
            ctrl.res_man <= sreg.lower;
1461
          end if;
1462
          -- generate exception flags --
1463
          ctrl.flags(fp_exc_nv_c) <= ctrl.flags(fp_exc_nv_c) or ctrl.class(fp_class_snan_c); -- invalid if input is SIGNALING NaN
1464
          ctrl.flags(fp_exc_nx_c) <= ctrl.flags(fp_exc_nx_c) or ctrl.rounded; -- inexcat if result is rounded
1465
          --
1466
          done_o     <= '1';
1467
          ctrl.state <= S_IDLE;
1468
 
1469
        when others => -- undefined
1470
        -- ------------------------------------------------------------
1471
          ctrl.state <= S_IDLE;
1472
 
1473
      end case;
1474
    end if;
1475
  end process ctrl_engine;
1476
 
1477
  -- stop shifting when normalized --
1478 60 zero_gravi
  sreg.done <= (not or_reduce_f(sreg.upper(sreg.upper'left downto 1))) and sreg.upper(0); -- input is zero, hidden one is set
1479 55 zero_gravi
 
1480
  -- all-zero including hidden bit --
1481 60 zero_gravi
  sreg.zero <= not or_reduce_f(sreg.upper);
1482 55 zero_gravi
 
1483
  -- result --
1484
  result_o(31)           <= ctrl.res_sgn;
1485
  result_o(30 downto 23) <= ctrl.res_exp;
1486
  result_o(22 downto  0) <= ctrl.res_man;
1487
 
1488
  -- exception flags --
1489
  flags_o(fp_exc_nv_c) <= ctrl.flags(fp_exc_nv_c); -- invalid operation
1490
  flags_o(fp_exc_dz_c) <= ctrl.flags(fp_exc_dz_c); -- divide by zero
1491
  flags_o(fp_exc_of_c) <= ctrl.flags(fp_exc_of_c); -- overflow
1492
  flags_o(fp_exc_uf_c) <= ctrl.flags(fp_exc_uf_c); -- underflow
1493
  flags_o(fp_exc_nx_c) <= ctrl.flags(fp_exc_nx_c); -- inexact
1494
 
1495
 
1496
  -- Rounding -------------------------------------------------------------------------------
1497
  -- -------------------------------------------------------------------------------------------
1498
  rounding_unit_ctrl: process(rmode_i, sreg)
1499
  begin
1500
    -- defaults --
1501
    round.en  <= '0';
1502
    round.sub <= '0';
1503
    -- rounding mode --
1504
    case rmode_i(2 downto 0) is
1505
      when "000" => -- round to nearest, ties to even
1506
        if (sreg.ext_g = '0') then
1507
          round.en <= '0'; -- round down (do nothing)
1508
        else
1509
          if (sreg.ext_r = '0') and (sreg.ext_s = '0') then -- tie!
1510
            round.en <= sreg.lower(0); -- round up if LSB of mantissa is set
1511
          else
1512
            round.en <= '1'; -- round up
1513
          end if;
1514
        end if;
1515
        round.sub <= '0'; -- increment
1516
      when "001" => -- round towards zero
1517
        round.en <= '0'; -- no rounding -> just truncate
1518
      when "010" => -- round down (towards -infinity)
1519
        round.en  <= sreg.ext_g or sreg.ext_r or sreg.ext_s;
1520
        round.sub <= '1'; -- decrement
1521
      when "011" => -- round up (towards +infinity)
1522
        round.en  <= sreg.ext_g or sreg.ext_r or sreg.ext_s;
1523
        round.sub <= '0'; -- increment
1524
      when "100" => -- round to nearest, ties to max magnitude
1525
        round.en <= '0'; -- FIXME / TODO
1526
      when others => -- undefined
1527
        round.en <= '0';
1528
    end case;
1529
  end process rounding_unit_ctrl;
1530
 
1531
 
1532
  -- incrementer/decrementer --
1533
  rounding_unit_add: process(round, sreg)
1534
    variable tmp_v : std_ulogic_vector(24 downto 0);
1535
  begin
1536
    tmp_v := '0' & sreg.upper(0) & sreg.lower;
1537
    if (round.en = '1') then
1538
      if (round.sub = '0') then -- increment
1539
        round.output <= std_ulogic_vector(unsigned(tmp_v) + 1);
1540
      else -- decrement
1541
        round.output <= std_ulogic_vector(unsigned(tmp_v) - 1);
1542
      end if;
1543
    else -- do nothing
1544
      round.output <= tmp_v;
1545
    end if;
1546
  end process rounding_unit_add;
1547
 
1548
 
1549
end neorv32_cpu_cp_fpu_normalizer_rtl;
1550
 
1551
-- ###########################################################################################################################################
1552
-- ###########################################################################################################################################
1553
 
1554
-- #################################################################################################
1555
-- # << NEORV32 - Single-Precision Floating-Point Unit: Float-To-Int Converter >>                  #
1556
-- # ********************************************************************************************* #
1557
-- # BSD 3-Clause License                                                                          #
1558
-- #                                                                                               #
1559
-- # Copyright (c) 2021, Stephan Nolting. All rights reserved.                                     #
1560
-- #                                                                                               #
1561
-- # Redistribution and use in source and binary forms, with or without modification, are          #
1562
-- # permitted provided that the following conditions are met:                                     #
1563
-- #                                                                                               #
1564
-- # 1. Redistributions of source code must retain the above copyright notice, this list of        #
1565
-- #    conditions and the following disclaimer.                                                   #
1566
-- #                                                                                               #
1567
-- # 2. Redistributions in binary form must reproduce the above copyright notice, this list of     #
1568
-- #    conditions and the following disclaimer in the documentation and/or other materials        #
1569
-- #    provided with the distribution.                                                            #
1570
-- #                                                                                               #
1571
-- # 3. Neither the name of the copyright holder nor the names of its contributors may be used to  #
1572
-- #    endorse or promote products derived from this software without specific prior written      #
1573
-- #    permission.                                                                                #
1574
-- #                                                                                               #
1575
-- # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS   #
1576
-- # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF               #
1577
-- # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE    #
1578
-- # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,     #
1579
-- # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE #
1580
-- # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED    #
1581
-- # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING     #
1582
-- # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED  #
1583
-- # OF THE POSSIBILITY OF SUCH DAMAGE.                                                            #
1584
-- # ********************************************************************************************* #
1585
-- # The NEORV32 Processor - https://github.com/stnolting/neorv32              (c) Stephan Nolting #
1586
-- #################################################################################################
1587
 
1588
library ieee;
1589
use ieee.std_logic_1164.all;
1590
use ieee.numeric_std.all;
1591
 
1592
library neorv32;
1593
use neorv32.neorv32_package.all;
1594
 
1595
entity neorv32_cpu_cp_fpu_f2i is
1596
  port (
1597
    -- control --
1598
    clk_i      : in  std_ulogic; -- global clock, rising edge
1599
    rstn_i     : in  std_ulogic; -- global reset, low-active, async
1600
    start_i    : in  std_ulogic; -- trigger operation
1601
    rmode_i    : in  std_ulogic_vector(02 downto 0); -- rounding mode
1602
    funct_i    : in  std_ulogic; -- 0=signed, 1=unsigned
1603
    -- input --
1604
    sign_i     : in  std_ulogic; -- sign
1605
    exponent_i : in  std_ulogic_vector(07 downto 0); -- exponent
1606
    mantissa_i : in  std_ulogic_vector(22 downto 0); -- mantissa
1607
    class_i    : in  std_ulogic_vector(09 downto 0); -- operand class
1608
    -- output --
1609
    result_o   : out std_ulogic_vector(31 downto 0); -- integer result
1610
    flags_o    : out std_ulogic_vector(04 downto 0); -- exception flags
1611
    done_o     : out std_ulogic -- operation done
1612
  );
1613
end neorv32_cpu_cp_fpu_f2i;
1614
 
1615
architecture neorv32_cpu_cp_fpu_f2i_rtl of neorv32_cpu_cp_fpu_f2i is
1616
 
1617
  -- controller --
1618
  type ctrl_engine_state_t is (S_IDLE, S_PREPARE_F2I, S_NORMALIZE_BUSY, S_ROUND, S_FINALIZE);
1619
  type ctrl_t is record
1620
    state      : ctrl_engine_state_t; -- current state
1621
    unsign     : std_ulogic;
1622
    cnt        : std_ulogic_vector(07 downto 0); -- interation counter/exponent
1623
    sign       : std_ulogic;
1624
    class      : std_ulogic_vector(09 downto 0);
1625
    rounded    : std_ulogic; -- output is rounded
1626
    over       : std_ulogic; -- output is overflowing
1627
    under      : std_ulogic; -- output in underflowing
1628
    result_tmp : std_ulogic_vector(31 downto 0);
1629
    result     : std_ulogic_vector(31 downto 0);
1630
  end record;
1631
  signal ctrl : ctrl_t;
1632
 
1633
  -- conversion shift register --
1634
  type sreg_t is record
1635
    int   : std_ulogic_vector(31 downto 0); -- including hidden-zero
1636
    mant  : std_ulogic_vector(22 downto 0);
1637
    ext_g : std_ulogic; -- guard bit
1638
    ext_r : std_ulogic; -- round bit
1639
    ext_s : std_ulogic; -- sticky bit
1640
  end record;
1641
  signal sreg : sreg_t;
1642
 
1643
  -- rounding unit --
1644
  type round_t is record
1645
    en     : std_ulogic; -- enable rounding
1646
    sub    : std_ulogic; -- 0=decrement, 1=increment
1647
    output : std_ulogic_vector(32 downto 0); -- result + overflow
1648
  end record;
1649
  signal round : round_t;
1650
 
1651
begin
1652
 
1653
  -- Control Engine -------------------------------------------------------------------------
1654
  -- -------------------------------------------------------------------------------------------
1655
  ctrl_engine: process(rstn_i, clk_i)
1656
  begin
1657
    if (rstn_i = '0') then
1658
      ctrl.state      <= S_IDLE;
1659 56 zero_gravi
      ctrl.cnt        <= (others => def_rst_val_c);
1660
      ctrl.sign       <= def_rst_val_c;
1661
      ctrl.class      <= (others => def_rst_val_c);
1662
      ctrl.rounded    <= def_rst_val_c;
1663
      ctrl.over       <= def_rst_val_c;
1664
      ctrl.under      <= def_rst_val_c;
1665
      ctrl.unsign     <= def_rst_val_c;
1666
      ctrl.result     <= (others => def_rst_val_c);
1667
      ctrl.result_tmp <= (others => def_rst_val_c);
1668
      sreg.int        <= (others => def_rst_val_c);
1669
      sreg.mant       <= (others => def_rst_val_c);
1670
      sreg.ext_s      <= def_rst_val_c;
1671 55 zero_gravi
      done_o          <= '0';
1672
    elsif rising_edge(clk_i) then
1673
      -- defaults --
1674
      done_o <= '0';
1675
 
1676
      -- fsm --
1677
      case ctrl.state is
1678
 
1679
        when S_IDLE => -- wait for operation trigger
1680
        -- ------------------------------------------------------------
1681
          ctrl.rounded <= '0'; -- not rounded yet
1682
          ctrl.over    <= '0'; -- not overflowing yet
1683
          ctrl.under   <= '0'; -- not underflowing yet
1684
          ctrl.unsign  <= funct_i;
1685
          sreg.ext_s   <= '0'; -- init
1686
          if (start_i = '1') then
1687
            ctrl.cnt    <= exponent_i;
1688
            ctrl.sign   <= sign_i;
1689
            ctrl.class  <= class_i;
1690
            sreg.mant   <= mantissa_i;
1691
            ctrl.state  <= S_PREPARE_F2I;
1692
          end if;
1693
 
1694
        when S_PREPARE_F2I => -- prepare float-to-integer conversion
1695
        -- ------------------------------------------------------------
1696
          if (unsigned(ctrl.cnt) < 126) then -- less than 0.5
1697
            sreg.int    <= (others => '0');
1698
            ctrl.under  <= '1'; -- this is an underflow!
1699
            ctrl.cnt    <= (others => '0');
1700
          elsif (unsigned(ctrl.cnt) = 126) then -- num < 1.0 but num >= 0.5
1701
            sreg.int    <= (others => '0');
1702
            sreg.mant   <= '1' & sreg.mant(sreg.mant'left downto 1);
1703
            ctrl.cnt    <= (others => '0');
1704
          else
1705
            sreg.int    <= (others => '0');
1706
            sreg.int(0) <= '1'; -- hidden one
1707
            ctrl.cnt    <= std_ulogic_vector(unsigned(ctrl.cnt) - 127); -- remove bias to get raw number of left shifts
1708
          end if;
1709
          -- check terminal cases --
1710
          if ((ctrl.class(fp_class_neg_inf_c)  or ctrl.class(fp_class_pos_inf_c) or
1711
               ctrl.class(fp_class_neg_zero_c) or ctrl.class(fp_class_pos_zero_c) or
1712
               ctrl.class(fp_class_snan_c)     or ctrl.class(fp_class_qnan_c)) = '1') then
1713
            ctrl.state <= S_FINALIZE;
1714
          else
1715
            ctrl.state <= S_NORMALIZE_BUSY;
1716
          end if;
1717
 
1718
        when S_NORMALIZE_BUSY => -- running normalization cycle
1719
        -- ------------------------------------------------------------
1720 60 zero_gravi
          sreg.ext_s <= sreg.ext_s or or_reduce_f(sreg.mant(sreg.mant'left-2 downto 0)); -- sticky bit
1721
          if (or_reduce_f(ctrl.cnt(ctrl.cnt'left-1 downto 0)) = '0') then
1722 55 zero_gravi
            if (ctrl.unsign = '0') then -- signed conversion
1723
              ctrl.over <= ctrl.over or sreg.int(sreg.int'left); -- update overrun flag again to check for numerical overflow into sign bit
1724
            end if;
1725
            ctrl.state <= S_ROUND;
1726
          else -- shift left
1727
            ctrl.cnt  <= std_ulogic_vector(unsigned(ctrl.cnt) - 1);
1728
            sreg.int  <= sreg.int(sreg.int'left-1 downto 0) & sreg.mant(sreg.mant'left);
1729
            sreg.mant <= sreg.mant(sreg.mant'left-1 downto 0) & '0';
1730
            ctrl.over <= ctrl.over or sreg.int(sreg.int'left);
1731
          end if;
1732
 
1733
        when S_ROUND => -- rounding cycle
1734
        -- ------------------------------------------------------------
1735
          ctrl.rounded    <= ctrl.rounded or round.en;
1736
          ctrl.over       <= ctrl.over or round.output(round.output'left); -- overflow after rounding
1737
          ctrl.result_tmp <= round.output(round.output'left-1 downto 0);
1738
          ctrl.state      <= S_FINALIZE;
1739
 
1740
        when S_FINALIZE => -- check for corner cases and finalize result
1741
        -- ------------------------------------------------------------
1742
          if (ctrl.unsign = '1') then -- unsigned conversion
1743
            if (ctrl.class(fp_class_snan_c) = '1') or (ctrl.class(fp_class_qnan_c) = '1') or (ctrl.class(fp_class_pos_inf_c) = '1') or -- NaN or +inf
1744
               ((ctrl.sign = '0') and (ctrl.over = '1')) then -- positive out-of-range
1745
              ctrl.result <= x"ffffffff";
1746
            elsif (ctrl.class(fp_class_neg_zero_c) = '1') or (ctrl.class(fp_class_pos_zero_c) = '1') or (ctrl.class(fp_class_neg_inf_c) = '1') or -- subnormal zero or -inf
1747
               (ctrl.sign = '1') or (ctrl.under = '1') then -- negative out-of-range or underflow
1748
              ctrl.result <= x"00000000";
1749
            else
1750
              ctrl.result <= ctrl.result_tmp;
1751
            end if;
1752
 
1753
          else -- signed conversion
1754
            if (ctrl.class(fp_class_snan_c) = '1') or (ctrl.class(fp_class_qnan_c) = '1') or (ctrl.class(fp_class_pos_inf_c) = '1') or  -- NaN or +inf
1755
                  ((ctrl.sign = '0') and (ctrl.over = '1')) then -- positive out-of-range
1756
              ctrl.result <= x"7fffffff";
1757
            elsif (ctrl.class(fp_class_neg_zero_c) = '1') or (ctrl.class(fp_class_pos_zero_c) = '1') or (ctrl.under = '1') then -- subnormal zero or underflow
1758
              ctrl.result <= x"00000000";
1759
            elsif (ctrl.class(fp_class_neg_inf_c) = '1') or ((ctrl.sign = '1') and (ctrl.over = '1')) then -- -inf or negative out-of-range
1760
              ctrl.result <= x"80000000";
1761
            else -- result is ok, make sign adaption
1762
              if (ctrl.sign = '1') then
1763
                ctrl.result <= std_ulogic_vector(0 - unsigned(ctrl.result_tmp)); -- abs()
1764
              else
1765
                ctrl.result <= ctrl.result_tmp;
1766
              end if;
1767
            end if;
1768
          end if;
1769
          done_o     <= '1';
1770
          ctrl.state <= S_IDLE;
1771
 
1772
        when others => -- undefined
1773
        -- ------------------------------------------------------------
1774
          ctrl.state <= S_IDLE;
1775
 
1776
      end case;
1777
    end if;
1778
  end process ctrl_engine;
1779
 
1780
  -- result --
1781
  result_o <= ctrl.result;
1782
 
1783
  -- exception flags --
1784
  flags_o(fp_exc_nv_c) <= ctrl.class(fp_class_snan_c) or ctrl.class(fp_class_qnan_c); -- invalid operation
1785
  flags_o(fp_exc_dz_c) <= '0'; -- divide by zero - not possible here
1786
  flags_o(fp_exc_of_c) <= ctrl.over or ctrl.class(fp_class_pos_inf_c) or ctrl.class(fp_class_neg_inf_c); -- overflow
1787
  flags_o(fp_exc_uf_c) <= ctrl.under; -- underflow
1788
  flags_o(fp_exc_nx_c) <= ctrl.rounded; -- inexact if result was rounded
1789
 
1790
 
1791
  -- Rounding -------------------------------------------------------------------------------
1792
  -- -------------------------------------------------------------------------------------------
1793
  rounding_unit_ctrl: process(rmode_i, sreg)
1794
  begin
1795
    -- defaults --
1796
    round.en  <= '0';
1797
    round.sub <= '0';
1798
    -- rounding mode --
1799
    case rmode_i(2 downto 0) is
1800
      when "000" => -- round to nearest, ties to even
1801
        if (sreg.ext_g = '0') then
1802
          round.en <= '0'; -- round down (do nothing)
1803
        else
1804
          if (sreg.ext_r = '0') and (sreg.ext_s = '0') then -- tie!
1805
            round.en <= sreg.int(0); -- round up if LSB of integer is set
1806
          else
1807
            round.en <= '1'; -- round up
1808
          end if;
1809
        end if;
1810
        round.sub <= '0'; -- increment
1811
      when "001" => -- round towards zero
1812
        round.en <= '0'; -- no rounding -> just truncate
1813
      when "010" => -- round down (towards -infinity)
1814
        round.en  <= sreg.ext_g or sreg.ext_r or sreg.ext_s;
1815
        round.sub <= '1'; -- decrement
1816
      when "011" => -- round up (towards +infinity)
1817
        round.en  <= sreg.ext_g or sreg.ext_r or sreg.ext_s;
1818
        round.sub <= '0'; -- increment
1819
      when "100" => -- round to nearest, ties to max magnitude
1820
        round.en <= '0'; -- FIXME / TODO
1821
      when others => -- undefined
1822
        round.en <= '0';
1823
    end case;
1824
  end process rounding_unit_ctrl;
1825
 
1826
  -- rounding: guard and round bits --
1827
  sreg.ext_g <= sreg.mant(sreg.mant'left);
1828
  sreg.ext_r <= sreg.mant(sreg.mant'left-1);
1829
 
1830
 
1831
  -- incrementer/decrementer --
1832
  rounding_unit_add: process(round, sreg)
1833
    variable tmp_v : std_ulogic_vector(32 downto 0); -- including overflow
1834
  begin
1835
    tmp_v := '0' & sreg.int;
1836
    if (round.en = '1') then
1837
      if (round.sub = '0') then -- increment
1838
        round.output <= std_ulogic_vector(unsigned(tmp_v) + 1);
1839
      else -- decrement
1840
        round.output <= std_ulogic_vector(unsigned(tmp_v) - 1);
1841
      end if;
1842
    else -- do nothing
1843
      round.output <= tmp_v;
1844
    end if;
1845
  end process rounding_unit_add;
1846
 
1847
 
1848
end neorv32_cpu_cp_fpu_f2i_rtl;

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.