OpenCores
URL https://opencores.org/ocsvn/neorv32/neorv32/trunk

Subversion Repositories neorv32

[/] [neorv32/] [trunk/] [rtl/] [core/] [neorv32_cpu_cp_fpu.vhd] - Blame information for rev 55

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 52 zero_gravi
-- #################################################################################################
2 53 zero_gravi
-- # << NEORV32 - CPU Co-Processor: Single-Prec. Floating Point Unit (RISC-V "Zfinx" Extension) >> #
3 52 zero_gravi
-- # ********************************************************************************************* #
4 53 zero_gravi
-- # The Zfinx floating-point extension uses the integer register file (x) for all FP operations.  #
5
-- # See the official RISC-V specs (https://github.com/riscv/riscv-zfinx) for more information.    #
6 55 zero_gravi
-- #                                                                                               #
7
-- # Design Notes:                                                                                 #
8
-- # * This FPU is based on a multi-cycle architecture and is NOT suited for pipelined operations. #
9
-- # * The hardware design goal was SIZE (performance comes second). All shift operations are done #
10
-- #   using an iterative approach (one bit per clock cycle, no barrel shifters!).                 #
11
-- # * Multiplication (FMUL instruction) will infer DSP blocks (if available).                     #
12
-- # * Subnormal numbers are not supported yet - they are "flushed to zero" before entering the    #
13
-- #   actual FPU core.                                                                            #
14
-- # * Division and sqare root operations (FDIV, FSQRT) and fused multiply-accumulate operations   #
15
-- #   (F[N]MADD) are not supported yet - they will raise an illegal instruction exception.        #
16
-- # * Rounding mode <100> ("round to nearest, ties to max magnitude") is not supported yet.       #
17
-- # * Signaling NaNs (sNaN) will not be generated by the hardware at all. However, if inserted by #
18
-- #   the programmer they are handled correctly.                                                  #
19 52 zero_gravi
-- # ********************************************************************************************* #
20
-- # BSD 3-Clause License                                                                          #
21
-- #                                                                                               #
22
-- # Copyright (c) 2021, Stephan Nolting. All rights reserved.                                     #
23
-- #                                                                                               #
24
-- # Redistribution and use in source and binary forms, with or without modification, are          #
25
-- # permitted provided that the following conditions are met:                                     #
26
-- #                                                                                               #
27
-- # 1. Redistributions of source code must retain the above copyright notice, this list of        #
28
-- #    conditions and the following disclaimer.                                                   #
29
-- #                                                                                               #
30
-- # 2. Redistributions in binary form must reproduce the above copyright notice, this list of     #
31
-- #    conditions and the following disclaimer in the documentation and/or other materials        #
32
-- #    provided with the distribution.                                                            #
33
-- #                                                                                               #
34
-- # 3. Neither the name of the copyright holder nor the names of its contributors may be used to  #
35
-- #    endorse or promote products derived from this software without specific prior written      #
36
-- #    permission.                                                                                #
37
-- #                                                                                               #
38
-- # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS   #
39
-- # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF               #
40
-- # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE    #
41
-- # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,     #
42
-- # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE #
43
-- # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED    #
44
-- # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING     #
45
-- # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED  #
46
-- # OF THE POSSIBILITY OF SUCH DAMAGE.                                                            #
47
-- # ********************************************************************************************* #
48
-- # The NEORV32 Processor - https://github.com/stnolting/neorv32              (c) Stephan Nolting #
49
-- #################################################################################################
50
 
51
library ieee;
52
use ieee.std_logic_1164.all;
53
use ieee.numeric_std.all;
54
 
55
library neorv32;
56
use neorv32.neorv32_package.all;
57
 
58
entity neorv32_cpu_cp_fpu is
59
  port (
60
    -- global control --
61 53 zero_gravi
    clk_i    : in  std_ulogic; -- global clock, rising edge
62
    rstn_i   : in  std_ulogic; -- global reset, low-active, async
63
    ctrl_i   : in  std_ulogic_vector(ctrl_width_c-1 downto 0); -- main control bus
64
    start_i  : in  std_ulogic; -- trigger operation
65 52 zero_gravi
    -- data input --
66 53 zero_gravi
    frm_i    : in  std_ulogic_vector(2 downto 0); -- rounding mode
67
    rs1_i    : in  std_ulogic_vector(data_width_c-1 downto 0); -- rf source 1
68
    rs2_i    : in  std_ulogic_vector(data_width_c-1 downto 0); -- rf source 2
69 52 zero_gravi
    -- result and status --
70 53 zero_gravi
    res_o    : out std_ulogic_vector(data_width_c-1 downto 0); -- operation result
71
    fflags_o : out std_ulogic_vector(4 downto 0); -- exception flags
72
    valid_o  : out std_ulogic -- data output valid
73 52 zero_gravi
  );
74
end neorv32_cpu_cp_fpu;
75
 
76
architecture neorv32_cpu_cp_fpu_rtl of neorv32_cpu_cp_fpu is
77
 
78 55 zero_gravi
  -- FPU core functions --
79
  constant op_class_c  : std_ulogic_vector(2 downto 0) := "000";
80
  constant op_comp_c   : std_ulogic_vector(2 downto 0) := "001";
81
  constant op_i2f_c    : std_ulogic_vector(2 downto 0) := "010";
82
  constant op_f2i_c    : std_ulogic_vector(2 downto 0) := "011";
83
  constant op_sgnj_c   : std_ulogic_vector(2 downto 0) := "100";
84
  constant op_minmax_c : std_ulogic_vector(2 downto 0) := "101";
85
  constant op_addsub_c : std_ulogic_vector(2 downto 0) := "110";
86
  constant op_mul_c    : std_ulogic_vector(2 downto 0) := "111";
87
 
88
  -- float-to-integer unit --
89
  component neorv32_cpu_cp_fpu_f2i
90
  port (
91
    -- control --
92
    clk_i      : in  std_ulogic; -- global clock, rising edge
93
    rstn_i     : in  std_ulogic; -- global reset, low-active, async
94
    start_i    : in  std_ulogic; -- trigger operation
95
    rmode_i    : in  std_ulogic_vector(02 downto 0); -- rounding mode
96
    funct_i    : in  std_ulogic; -- 0=signed, 1=unsigned
97
    -- input --
98
    sign_i     : in  std_ulogic; -- sign
99
    exponent_i : in  std_ulogic_vector(07 downto 0); -- exponent
100
    mantissa_i : in  std_ulogic_vector(22 downto 0); -- mantissa
101
    class_i    : in  std_ulogic_vector(09 downto 0); -- operand class
102
    -- output --
103
    result_o   : out std_ulogic_vector(31 downto 0); -- integer result
104
    flags_o    : out std_ulogic_vector(04 downto 0); -- exception flags
105
    done_o     : out std_ulogic -- operation done
106
  );
107
  end component;
108
 
109
  -- normalizer + rounding unit --
110
  component neorv32_cpu_cp_fpu_normalizer
111
  port (
112
    -- control --
113
    clk_i      : in  std_ulogic; -- global clock, rising edge
114
    rstn_i     : in  std_ulogic; -- global reset, low-active, async
115
    start_i    : in  std_ulogic; -- trigger operation
116
    rmode_i    : in  std_ulogic_vector(02 downto 0); -- rounding mode
117
    funct_i    : in  std_ulogic; -- operating mode (0=norm&round, 1=int-to-float)
118
    -- input --
119
    sign_i     : in  std_ulogic; -- sign
120
    exponent_i : in  std_ulogic_vector(08 downto 0); -- extended exponent
121
    mantissa_i : in  std_ulogic_vector(47 downto 0); -- extended mantissa
122
    integer_i  : in  std_ulogic_vector(31 downto 0); -- integer input
123
    class_i    : in  std_ulogic_vector(09 downto 0); -- input number class
124
    flags_i    : in  std_ulogic_vector(04 downto 0); -- exception flags input
125
    -- output --
126
    result_o   : out std_ulogic_vector(31 downto 0); -- result (float or int)
127
    flags_o    : out std_ulogic_vector(04 downto 0); -- exception flags
128
    done_o     : out std_ulogic -- operation done
129
  );
130
  end component;
131
 
132
  -- commands (one-hot) --
133
  type cmd_t is record
134
    instr_class  : std_ulogic;
135
    instr_sgnj   : std_ulogic;
136
    instr_comp   : std_ulogic;
137
    instr_i2f    : std_ulogic;
138
    instr_f2i    : std_ulogic;
139
    instr_minmax : std_ulogic;
140
    instr_addsub : std_ulogic;
141
    instr_mul    : std_ulogic;
142
    funct        : std_ulogic_vector(2 downto 0);
143
  end record;
144
  signal cmd : cmd_t;
145
  signal funct_ff : std_ulogic_vector(2 downto 0);
146
 
147
  -- co-processor control engine --
148
  type ctrl_state_t is (S_IDLE, S_BUSY);
149
  type ctrl_engine_t is record
150
    state : ctrl_state_t;
151
    start : std_ulogic;
152
    valid : std_ulogic;
153
  end record;
154
  signal ctrl_engine : ctrl_engine_t;
155
 
156
  -- floating-point operands --
157
  type op_data_t  is array (0 to 1) of std_ulogic_vector(31 downto 0);
158
  type op_class_t is array (0 to 1) of std_ulogic_vector(09 downto 0);
159
  type fpu_operands_t is record
160
    rs1       : std_ulogic_vector(31 downto 0); -- operand 1
161
    rs1_class : std_ulogic_vector(09 downto 0); -- operand 1 number class
162
    rs2       : std_ulogic_vector(31 downto 0); -- operand 2
163
    rs2_class : std_ulogic_vector(09 downto 0); -- operand 2 number class
164
    frm       : std_ulogic_vector(02 downto 0); -- rounding mode
165
  end record;
166
  signal op_data      : op_data_t;
167
  signal op_class     : op_class_t;
168
  signal fpu_operands : fpu_operands_t;
169
 
170
  -- floating-point comparator --
171
  signal comp_equal_ff : std_ulogic;
172
  signal comp_less_ff  : std_ulogic;
173
  signal comp_less     : std_ulogic;
174
 
175
  -- functional units interface --
176
  type fu_interface_t is record
177
    result : std_ulogic_vector(31 downto 0);
178
    flags  : std_ulogic_vector(04 downto 0);
179
    start  : std_ulogic;
180
    done   : std_ulogic;
181
  end record;
182
  signal fu_classify    : fu_interface_t;
183
  signal fu_compare     : fu_interface_t;
184
  signal fu_sign_inject : fu_interface_t;
185
  signal fu_min_max     : fu_interface_t;
186
  signal fu_conv_f2i    : fu_interface_t;
187
  signal fu_addsub      : fu_interface_t;
188
  signal fu_mul         : fu_interface_t;
189
  signal fu_core_done   : std_ulogic; -- FU operation completed
190
 
191
  -- integer-to-float --
192
  type fu_i2f_interface_t is record
193
    result : std_ulogic_vector(31 downto 0);
194
    sign   : std_ulogic;
195
    start  : std_ulogic;
196
    done   : std_ulogic;
197
  end record;
198
  signal fu_conv_i2f : fu_i2f_interface_t; -- float result
199
 
200
  -- multiplier unit --
201
  type multiplier_t is record
202
    opa       : unsigned(23 downto 0); -- mantissa A plus hidden one
203
    opb       : unsigned(23 downto 0); -- mantissa B plus hidden one
204
    buf_ff    : unsigned(47 downto 0); -- product buffer
205
    sign      : std_ulogic; -- resulting sign
206
    product   : std_ulogic_vector(47 downto 0); -- product
207
    exp_sum   : std_ulogic_vector(08 downto 0); -- incl 1x overflow/underflow bit
208
    exp_res   : std_ulogic_vector(09 downto 0); -- resulting exponent incl 2x overflow/underflow bit
209
    --
210
    res_class : std_ulogic_vector(09 downto 0);
211
    flags     : std_ulogic_vector(04 downto 0); -- exception flags
212
    --
213
    start     : std_ulogic;
214
    latency   : std_ulogic_vector(02 downto 0); -- unit latency
215
    done      : std_ulogic;
216
  end record;
217
  signal multiplier : multiplier_t;
218
 
219
  -- adder/subtractor unit --
220
  type addsub_t is record
221
    -- input comparison --
222
    exp_comp  : std_ulogic_vector(01 downto 0); -- equal & less
223
    small_exp : std_ulogic_vector(07 downto 0);
224
    small_man : std_ulogic_vector(23 downto 0); -- mantissa + hiden one
225
    large_exp : std_ulogic_vector(07 downto 0);
226
    large_man : std_ulogic_vector(23 downto 0); -- mantissa + hiden one
227
    -- smaller mantissa alginment --
228
    man_sreg  : std_ulogic_vector(23 downto 0); -- mantissa + hidden one
229
    man_g_ext : std_ulogic;
230
    man_r_ext : std_ulogic;
231
    man_s_ext : std_ulogic;
232
    exp_cnt   : std_ulogic_vector(08 downto 0);
233
    -- adder/subtractor stage --
234
    man_comp  : std_ulogic;
235
    man_s     : std_ulogic_vector(26 downto 0); -- mantissa + hiden one + GRS
236
    man_l     : std_ulogic_vector(26 downto 0); -- mantissa + hiden one + GRS
237
    add_stage : std_ulogic_vector(27 downto 0); -- adder result incl. overflow
238
    -- result --
239
    res_sign  : std_ulogic;
240
    res_sum   : std_ulogic_vector(27 downto 0); -- mantissa sum (+1 bit) + GRS bits (for rounding)
241
    res_class : std_ulogic_vector(09 downto 0);
242
    flags     : std_ulogic_vector(04 downto 0); -- exception flags
243
    -- arbitration --
244
    start     : std_ulogic;
245
    latency   : std_ulogic_vector(04 downto 0); -- unit latency
246
    done      : std_ulogic;
247
  end record;
248
  signal addsub : addsub_t;
249
 
250
  -- normalizer interface (normalization & rounding and int-to-float) --
251
  type normalizer_t is record
252
    start     : std_ulogic;
253
    mode      : std_ulogic;
254
    sign      : std_ulogic;
255
    xexp      : std_ulogic_vector(08 downto 0);
256
    xmantissa : std_ulogic_vector(47 downto 0);
257
    result    : std_ulogic_vector(31 downto 0);
258
    class     : std_ulogic_vector(09 downto 0);
259
    flags_in  : std_ulogic_vector(04 downto 0);
260
    flags_out : std_ulogic_vector(04 downto 0);
261
    done      : std_ulogic;
262
  end record;
263
  signal normalizer : normalizer_t;
264
 
265 52 zero_gravi
begin
266
 
267 55 zero_gravi
-- ****************************************************************************************************************************
268
-- Control
269
-- ****************************************************************************************************************************
270
 
271
  -- Instruction Decoding -------------------------------------------------------------------
272 52 zero_gravi
  -- -------------------------------------------------------------------------------------------
273 55 zero_gravi
  -- one-hot re-encoding --
274
  cmd.instr_class  <= '1' when (ctrl_i(ctrl_ir_funct12_11_c downto ctrl_ir_funct12_7_c) = "11100") else '0';
275
  cmd.instr_comp   <= '1' when (ctrl_i(ctrl_ir_funct12_11_c downto ctrl_ir_funct12_7_c) = "10100") else '0';
276
  cmd.instr_i2f    <= '1' when (ctrl_i(ctrl_ir_funct12_11_c downto ctrl_ir_funct12_7_c) = "11010") else '0';
277
  cmd.instr_f2i    <= '1' when (ctrl_i(ctrl_ir_funct12_11_c downto ctrl_ir_funct12_7_c) = "11000") else '0';
278
  cmd.instr_sgnj   <= '1' when (ctrl_i(ctrl_ir_funct12_11_c downto ctrl_ir_funct12_7_c) = "00100") else '0';
279
  cmd.instr_minmax <= '1' when (ctrl_i(ctrl_ir_funct12_11_c downto ctrl_ir_funct12_7_c) = "00101") else '0';
280
  cmd.instr_addsub <= '1' when (ctrl_i(ctrl_ir_funct12_11_c downto ctrl_ir_funct12_8_c) = "0000")  else '0';
281
  cmd.instr_mul    <= '1' when (ctrl_i(ctrl_ir_funct12_11_c downto ctrl_ir_funct12_7_c) = "00010") else '0';
282 52 zero_gravi
 
283 55 zero_gravi
  -- binary re-encoding --
284
  cmd.funct <= op_mul_c     when (cmd.instr_mul    = '1') else
285
               op_addsub_c  when (cmd.instr_addsub = '1') else
286
               op_minmax_c  when (cmd.instr_minmax = '1') else
287
               op_sgnj_c    when (cmd.instr_sgnj   = '1') else
288
               op_f2i_c     when (cmd.instr_f2i    = '1') else
289
               op_i2f_c     when (cmd.instr_i2f    = '1') else
290
               op_comp_c    when (cmd.instr_comp   = '1') else
291
               op_class_c;--when (cmd.instr_class  = '1') else (others => '-');
292 52 zero_gravi
 
293 55 zero_gravi
 
294
  -- Input Operands: Check for subnormal numbers (flush to zero) ----------------------------
295
  -- -------------------------------------------------------------------------------------------
296
  -- Subnormal numbers are not supported and are "flushed to zero"! FIXME / TODO
297
  -- rs1 --
298
  op_data(0)(31)           <= rs1_i(31);
299
  op_data(0)(30 downto 23) <= rs1_i(30 downto 23);
300
  op_data(0)(22 downto 00) <= (others => '0') when (rs1_i(30 downto 23) = "00000000") else rs1_i(22 downto 0); -- flush mantissa to zero if subnormal
301
  -- rs2 --
302
  op_data(1)(31)           <= rs2_i(31);
303
  op_data(1)(30 downto 23) <= rs2_i(30 downto 23);
304
  op_data(1)(22 downto 00) <= (others => '0') when (rs2_i(30 downto 23) = "00000000") else rs2_i(22 downto 0); -- flush mantissa to zero if subnormal
305
 
306
 
307
  -- Number Classifier ----------------------------------------------------------------------
308
  -- -------------------------------------------------------------------------------------------
309
  number_classifier: process(op_data)
310
    variable op_m_all_zero_v, op_e_all_zero_v, op_e_all_one_v       : std_ulogic;
311
    variable op_is_zero_v, op_is_inf_v, op_is_denorm_v, op_is_nan_v : std_ulogic;
312
  begin
313
    for i in 0 to 1 loop -- for rs1 and rs2 inputs
314
      -- check for all-zero/all-one --
315
      op_m_all_zero_v := not or_all_f(op_data(i)(22 downto 00));
316
      op_e_all_zero_v := not or_all_f(op_data(i)(30 downto 23));
317
      op_e_all_one_v  := and_all_f(op_data(i)(30 downto 23));
318
 
319
      -- check special cases --
320
      op_is_zero_v   := op_e_all_zero_v and      op_m_all_zero_v;  -- zero
321
      op_is_inf_v    := op_e_all_one_v  and      op_m_all_zero_v;  -- infinity
322
      op_is_denorm_v := '0'; -- FIXME / TODO op_e_all_zero_v and (not op_m_all_zero_v); -- subnormal
323
      op_is_nan_v    := op_e_all_one_v  and (not op_m_all_zero_v); -- NaN
324
 
325
      -- actual attributes --
326
      op_class(i)(fp_class_neg_inf_c)    <= op_data(i)(31) and op_is_inf_v; -- negative infinity
327
      op_class(i)(fp_class_neg_norm_c)   <= op_data(i)(31) and (not op_is_denorm_v) and (not op_is_nan_v) and (not op_is_inf_v) and (not op_is_zero_v); -- negative normal number
328
      op_class(i)(fp_class_neg_denorm_c) <= op_data(i)(31) and op_is_denorm_v; -- negative subnormal number
329
      op_class(i)(fp_class_neg_zero_c)   <= op_data(i)(31) and op_is_zero_v; -- negative zero
330
      op_class(i)(fp_class_pos_zero_c)   <= (not op_data(i)(31)) and op_is_zero_v; -- positive zero
331
      op_class(i)(fp_class_pos_denorm_c) <= (not op_data(i)(31)) and op_is_denorm_v; -- positive subnormal number
332
      op_class(i)(fp_class_pos_norm_c)   <= (not op_data(i)(31)) and (not op_is_denorm_v) and (not op_is_nan_v) and (not op_is_inf_v) and (not op_is_zero_v); -- positive normal number
333
      op_class(i)(fp_class_pos_inf_c)    <= (not op_data(i)(31)) and op_is_inf_v; -- positive infinity
334
      op_class(i)(fp_class_snan_c)       <= op_is_nan_v and (not op_data(i)(22)); -- signaling NaN
335
      op_class(i)(fp_class_qnan_c)       <= op_is_nan_v and (    op_data(i)(22)); -- quiet NaN
336
    end loop; -- i
337
  end process number_classifier;
338
 
339
 
340
  -- Co-Processor Control Engine ------------------------------------------------------------
341
  -- -------------------------------------------------------------------------------------------
342
  control_engine_fsm: process(rstn_i, clk_i)
343
  begin
344
    if (rstn_i = '0') then
345
      ctrl_engine.state      <= S_IDLE;
346
      ctrl_engine.start      <= '0';
347
      fpu_operands.frm       <= (others => '0');
348
      fpu_operands.rs1       <= (others => '0');
349
      fpu_operands.rs1_class <= (others => '0');
350
      fpu_operands.rs2       <= (others => '0');
351
      fpu_operands.rs2_class <= (others => '0');
352
      funct_ff               <= (others => '0');
353
    elsif rising_edge(clk_i) then
354
      -- arbiter defaults --
355
      ctrl_engine.valid <= '0';
356
      ctrl_engine.start <= '0';
357
 
358
      -- state machine --
359
      case ctrl_engine.state is
360
 
361
        when S_IDLE => -- waiting for operation trigger
362
        -- ------------------------------------------------------------
363
          funct_ff <= cmd.funct; -- actual operation to execute
364
          -- rounding mode --
365
          -- TODO / FIXME "round to nearest, ties to max magnitude" (0b100) is not supported yet
366
          if (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_0_c) = "111") then
367
            fpu_operands.frm <= '0' & frm_i(1 downto 0);
368
          else
369
            fpu_operands.frm <= '0' & ctrl_i(ctrl_ir_funct3_1_c downto ctrl_ir_funct3_0_c);
370
          end if;
371
          --
372
          if (start_i = '1') then
373
            -- operand data --
374
            fpu_operands.rs1       <= op_data(0);
375
            fpu_operands.rs1_class <= op_class(0);
376
            fpu_operands.rs2       <= op_data(1);
377
            fpu_operands.rs2_class <= op_class(1);
378
            -- execute! --
379
            ctrl_engine.start <= '1';
380
            ctrl_engine.state <= S_BUSY;
381
          end if;
382
 
383
        when S_BUSY => -- operation in progress (multi-cycle)
384
        -- -----------------------------------------------------------
385
          if (fu_core_done = '1') then -- processing done?
386
            ctrl_engine.valid <= '1';
387
            ctrl_engine.state <= S_IDLE;
388
          end if;
389
 
390
        when others => -- undefined
391
        -- ------------------------------------------------------------
392
          ctrl_engine.state <= S_IDLE;
393
 
394
      end case;
395
    end if;
396
  end process control_engine_fsm;
397
 
398
  -- operation done / valid output --
399
  valid_o <= ctrl_engine.valid;
400
 
401
 
402
  -- Functional Unit Interface (operation-start trigger) ------------------------------------
403
  -- -------------------------------------------------------------------------------------------
404
  fu_classify.start    <= ctrl_engine.start and cmd.instr_class;
405
  fu_compare.start     <= ctrl_engine.start and cmd.instr_comp;
406
  fu_sign_inject.start <= ctrl_engine.start and cmd.instr_sgnj;
407
  fu_min_max.start     <= ctrl_engine.start and cmd.instr_minmax;
408
  fu_conv_i2f.start    <= ctrl_engine.start and cmd.instr_i2f;
409
  fu_conv_f2i.start    <= ctrl_engine.start and cmd.instr_f2i;
410
  fu_addsub.start      <= ctrl_engine.start and cmd.instr_addsub;
411
  fu_mul.start         <= ctrl_engine.start and cmd.instr_mul;
412
 
413
 
414
-- ****************************************************************************************************************************
415
-- FPU Core - Functional Units
416
-- ****************************************************************************************************************************
417
 
418
  -- Number Classifier (FCLASS) -------------------------------------------------------------
419
  -- -------------------------------------------------------------------------------------------
420
  fu_classify.flags <= (others => '0'); -- does not generate flags at all
421
  fu_classify.result(31 downto 10) <= (others => '0');
422
  fu_classify.result(09 downto 00) <= fpu_operands.rs1_class;
423
  fu_classify.done <= fu_classify.start;
424
 
425
 
426
  -- Floating-Point Comparator --------------------------------------------------------------
427
  -- -------------------------------------------------------------------------------------------
428
  float_comparator: process(clk_i)
429
    variable cond_v : std_ulogic_vector(1 downto 0);
430
  begin
431
    if rising_edge(clk_i) then
432
      -- equal --
433
      if ((fpu_operands.rs1_class(fp_class_pos_inf_c)   = '1') and (fpu_operands.rs2_class(fp_class_pos_inf_c) = '1')) or -- +inf == +inf
434
         ((fpu_operands.rs1_class(fp_class_neg_inf_c)   = '1') and (fpu_operands.rs2_class(fp_class_neg_inf_c) = '1')) or -- -inf == -inf
435
         (((fpu_operands.rs1_class(fp_class_pos_zero_c) = '1') or (fpu_operands.rs1_class(fp_class_neg_zero_c) = '1')) and
436
          ((fpu_operands.rs2_class(fp_class_pos_zero_c) = '1') or (fpu_operands.rs2_class(fp_class_neg_zero_c) = '1'))) or  -- +/-zero == +/-zero
437
         (fpu_operands.rs1 = fpu_operands.rs2) then -- identical in every way
438
        comp_equal_ff <= '1';
439
      else
440
        comp_equal_ff <= '0';
441
      end if;
442
 
443
      -- less than --
444
      if ((fpu_operands.rs1_class(fp_class_pos_inf_c)  = '1') and (fpu_operands.rs2_class(fp_class_pos_inf_c) = '1')) or -- +inf !< +inf
445
         ((fpu_operands.rs1_class(fp_class_neg_inf_c)  = '1') and (fpu_operands.rs2_class(fp_class_neg_inf_c) = '1')) or -- -inf !< -inf
446
         (((fpu_operands.rs1_class(fp_class_pos_zero_c) = '1') or (fpu_operands.rs1_class(fp_class_neg_zero_c) = '1')) and
447
          ((fpu_operands.rs2_class(fp_class_pos_zero_c) = '1') or (fpu_operands.rs2_class(fp_class_neg_zero_c) = '1'))) then  -- +/-zero !< +/-zero
448
        comp_less_ff <= '0';
449
      else
450
        cond_v := fpu_operands.rs1(31) & fpu_operands.rs2(31);
451
        case cond_v is
452
          when "10"   => comp_less_ff <= '1'; -- rs1 negative, rs2 positive
453
          when "01"   => comp_less_ff <= '0'; -- rs1 positive, rs2 negative
454
          when "00"   => comp_less_ff <= comp_less; -- both positive
455
          when "11"   => comp_less_ff <= not comp_less; -- both negative
456
          when others => comp_less_ff <= '0'; -- undefined
457
        end case;
458
      end if;
459
 
460
      -- comparator latency --
461
      fu_compare.done <= fu_compare.start; -- for actual comparison operation
462
      fu_min_max.done <= fu_min_max.start; -- for min/max operations
463
    end if;
464
  end process float_comparator;
465
 
466
  -- less than - only compare the "magnitude" part - sign bit has to be handled separately --
467
  comp_less <= '1' when (unsigned(fpu_operands.rs1(30 downto 0)) < unsigned(fpu_operands.rs2(30 downto 0))) else '0';
468
 
469
 
470
  -- Comparison (FEQ/FLT/FLE) ---------------------------------------------------------------
471
  -- -------------------------------------------------------------------------------------------
472
  float_comparison: process(fpu_operands, ctrl_i, comp_equal_ff, comp_less_ff)
473
    variable snan_v : std_ulogic; -- at least one input is sNaN
474
    variable qnan_v : std_ulogic; -- at least one input is qNaN
475
  begin
476
    -- check for NaN --
477
    snan_v := fpu_operands.rs1_class(fp_class_snan_c) or fpu_operands.rs2_class(fp_class_snan_c);
478
    qnan_v := fpu_operands.rs1_class(fp_class_qnan_c) or fpu_operands.rs2_class(fp_class_qnan_c);
479
 
480
    -- condition evaluation --
481
    fu_compare.result <= (others => '0');
482
    case ctrl_i(ctrl_ir_funct3_1_c downto ctrl_ir_funct3_0_c) is
483
      when "00" => -- FLE: less than or equal
484
        fu_compare.result(0) <= (comp_less_ff or comp_equal_ff) and (not (snan_v or qnan_v)); -- result is zero if either input is NaN
485
      when "01" => -- FLT: less than
486
        fu_compare.result(0) <= comp_less_ff and (not (snan_v or qnan_v)); -- result is zero if either input is NaN
487
      when "10" => -- FEQ: equal
488
        fu_compare.result(0) <= comp_equal_ff and (not (snan_v or qnan_v)); -- result is zero if either input is NaN
489
      when others => -- undefined
490
        fu_compare.result(0) <= '0';
491
    end case;
492
  end process float_comparison;
493
 
494
  -- latency --
495
  -- -> done in "float_comparator"
496
 
497
  -- exceptions --
498
  fu_compare.flags <= (others => '0'); -- does not generate exceptions here, but normalizer can generate exceptions
499
 
500
 
501
  -- Min/Max Select (FMIN/FMAX) -------------------------------------------------------------
502
  -- -------------------------------------------------------------------------------------------
503
  min_max_select: process(fpu_operands, comp_less_ff, fpu_operands, fu_compare, ctrl_i)
504
    variable cond_v : std_ulogic_vector(2 downto 0);
505
  begin
506
    -- comparison restul - check for special cases: -0 is less than +0
507
    if ((fpu_operands.rs1_class(fp_class_neg_zero_c) = '1') and (fpu_operands.rs2_class(fp_class_pos_zero_c) = '1')) then
508
      cond_v(0) := ctrl_i(ctrl_ir_funct3_0_c);
509
    elsif ((fpu_operands.rs1_class(fp_class_pos_zero_c) = '1') and (fpu_operands.rs2_class(fp_class_neg_zero_c) = '1')) then
510
      cond_v(0) := not ctrl_i(ctrl_ir_funct3_0_c);
511
    else -- "normal= comparison
512
      cond_v(0) := comp_less_ff xnor ctrl_i(ctrl_ir_funct3_0_c); -- min/max select
513
    end if;
514
 
515
    -- nmumber NaN check --
516
    cond_v(2) := fpu_operands.rs1_class(fp_class_snan_c) or fpu_operands.rs1_class(fp_class_qnan_c);
517
    cond_v(1) := fpu_operands.rs2_class(fp_class_snan_c) or fpu_operands.rs2_class(fp_class_qnan_c);
518
 
519
    -- data output --
520
    case cond_v is
521
      when "000"         => fu_min_max.result <= fpu_operands.rs1;
522
      when "001"         => fu_min_max.result <= fpu_operands.rs2;
523
      when "010" | "011" => fu_min_max.result <= fpu_operands.rs1; -- if one input is NaN output the non-NaN one
524
      when "100" | "101" => fu_min_max.result <= fpu_operands.rs2; -- if one input is NaN output the non-NaN one
525
      when others        => fu_min_max.result <= fp_single_qnan_c; -- output quiet NaN if both inputs are NaN
526
    end case;
527
  end process min_max_select;
528
 
529
  -- latency --
530
  -- -> done in "float_comparator"
531
 
532
  -- exceptions --
533
  fu_min_max.flags <= (others => '0'); -- does not generate exceptions here, but normalizer can generate exceptions
534
 
535
 
536
  -- Convert: Float to [unsigned] Integer (FCVT.S.W) ----------------------------------------
537
  -- -------------------------------------------------------------------------------------------
538
  neorv32_cpu_cp_fpu_f2i_inst: neorv32_cpu_cp_fpu_f2i
539
  port map (
540
    -- control --
541
    clk_i      => clk_i,                          -- global clock, rising edge
542
    rstn_i     => rstn_i,                         -- global reset, low-active, async
543
    start_i    => fu_conv_f2i.start,              -- trigger operation
544
    rmode_i    => fpu_operands.frm,               -- rounding mode
545
    funct_i    => ctrl_i(ctrl_ir_funct12_0_c),    -- 0=signed, 1=unsigned
546
    -- input --
547
    sign_i     => fpu_operands.rs1(31),           -- sign
548
    exponent_i => fpu_operands.rs1(30 downto 23), -- exponent
549
    mantissa_i => fpu_operands.rs1(22 downto 00), -- mantissa
550
    class_i    => fpu_operands.rs1_class,         -- operand class
551
    -- output --
552
    result_o   => fu_conv_f2i.result,             -- integer result
553
    flags_o    => fu_conv_f2i.flags,              -- exception flags
554
    done_o     => fu_conv_f2i.done                -- operation done
555
  );
556
 
557
 
558
  -- Sign-Injection (FSGNJ) -----------------------------------------------------------------
559
  -- -------------------------------------------------------------------------------------------
560
  sign_injector: process(ctrl_i, fpu_operands)
561
  begin
562
    case ctrl_i(ctrl_ir_funct3_1_c downto ctrl_ir_funct3_0_c) is
563
      when "00"   => fu_sign_inject.result(31) <= fpu_operands.rs2(31); -- FSGNJ
564
      when "01"   => fu_sign_inject.result(31) <= not fpu_operands.rs2(31); -- FSGNJN
565
      when "10"   => fu_sign_inject.result(31) <= fpu_operands.rs1(31) xor fpu_operands.rs2(31); -- FSGNJX
566
      when others => fu_sign_inject.result(31) <= fpu_operands.rs2(31); -- undefined
567
    end case;
568
    fu_sign_inject.result(30 downto 0) <= fpu_operands.rs1(30 downto 0);
569
    fu_sign_inject.flags <= (others => '0'); -- does not generate flags
570
  end process sign_injector;
571
 
572
  -- latency --
573
  fu_sign_inject.done <= fu_sign_inject.start;
574
 
575
 
576
  -- Convert: [unsigned] Integer to Float (FCVT.W.S) ----------------------------------------
577
  -- -------------------------------------------------------------------------------------------
578
  convert_i2f: process(clk_i)
579
  begin
580
    -- this process only computes the absolute input value
581
    -- the actual conversion is done by the normalizer
582
    if rising_edge(clk_i) then
583
      if (ctrl_i(ctrl_ir_funct12_0_c) = '0') and (rs1_i(31) = '1') then -- convert signed integer
584
        fu_conv_i2f.result <= std_ulogic_vector(0 - unsigned(rs1_i));
585
        fu_conv_i2f.sign   <= rs1_i(31); -- original sign
586
      else -- convert unsigned integer
587
        fu_conv_i2f.result <= rs1_i;
588
        fu_conv_i2f.sign   <= '0';
589
      end if;
590
      fu_conv_i2f.done <= fu_conv_i2f.start; -- actual conversion is done by the normalizer unit
591
    end if;
592
  end process convert_i2f;
593
 
594
 
595
  -- Multiplier Core (FMUL) -----------------------------------------------------------------
596
  -- -------------------------------------------------------------------------------------------
597
  multiplier_core: process(clk_i)
598
  begin
599
    if rising_edge(clk_i) then
600
      -- multiplier core --
601
      if (multiplier.start = '1') then -- FIXME / TODO remove buffer?
602
        multiplier.opa <= unsigned('1' & fpu_operands.rs1(22 downto 0)); -- append hidden one
603
        multiplier.opb <= unsigned('1' & fpu_operands.rs2(22 downto 0));
604
      end if;
605
      multiplier.buf_ff  <= multiplier.opa * multiplier.opb;
606
      multiplier.product <= std_ulogic_vector(multiplier.buf_ff(47 downto 0)); -- let the register balancing do the magic here
607
      multiplier.sign    <= fpu_operands.rs1(31) xor fpu_operands.rs2(31); -- resulting sign
608
 
609
      -- exponent computation --
610
      multiplier.exp_res <= std_ulogic_vector(unsigned('0' & multiplier.exp_sum) - 127);
611
      if (multiplier.exp_res(multiplier.exp_res'left) = '1') then -- underflow (exp_res is "negative")
612
        multiplier.flags(fp_exc_of_c) <= '0';
613
        multiplier.flags(fp_exc_uf_c) <= '1';
614
      elsif (multiplier.exp_res(multiplier.exp_res'left-1) = '1') then -- overflow
615
        multiplier.flags(fp_exc_of_c) <= '1';
616
        multiplier.flags(fp_exc_uf_c) <= '0';
617
      else
618
        multiplier.flags(fp_exc_of_c) <= '0';
619
        multiplier.flags(fp_exc_uf_c) <= '0';
620
      end if;
621
 
622
      -- invalid operation --
623
      multiplier.flags(fp_exc_nv_c) <=
624
        ((fpu_operands.rs1_class(fp_class_pos_zero_c) or fpu_operands.rs1_class(fp_class_neg_zero_c)) and
625
         (fpu_operands.rs2_class(fp_class_pos_inf_c)  or fpu_operands.rs2_class(fp_class_neg_inf_c))) or -- mul(+/-zero, +/-inf)
626
        ((fpu_operands.rs1_class(fp_class_pos_inf_c)  or fpu_operands.rs1_class(fp_class_neg_inf_c)) and
627
         (fpu_operands.rs2_class(fp_class_pos_zero_c) or fpu_operands.rs2_class(fp_class_neg_zero_c))); -- mul(+/-inf, +/-zero)
628
 
629
      -- latency shift register --
630
      multiplier.latency <= multiplier.latency(multiplier.latency'left-1 downto 0) & multiplier.start;
631
    end if;
632
  end process multiplier_core;
633
 
634
  -- exponent sum --
635
  multiplier.exp_sum <= std_ulogic_vector(unsigned('0' & fpu_operands.rs1(30 downto 23)) + unsigned('0' & fpu_operands.rs2(30 downto 23)));
636
 
637
  -- latency --
638
  multiplier.start <= fu_mul.start;
639
  multiplier.done  <= multiplier.latency(multiplier.latency'left);
640
  fu_mul.done      <= multiplier.done;
641
 
642
  -- unused exception flags --
643
  multiplier.flags(fp_exc_dz_c) <= '0'; -- division by zero: not possible here
644
  multiplier.flags(fp_exc_nx_c) <= '0'; -- inexcat: not possible here
645
 
646
 
647
  -- result class -- 
648
  multiplier_class_core: process(clk_i)
649
    variable a_pos_norm_v, a_neg_norm_v, b_pos_norm_v, b_neg_norm_v : std_ulogic;
650
    variable a_pos_subn_v, a_neg_subn_v, b_pos_subn_v, b_neg_subn_v : std_ulogic;
651
    variable a_pos_zero_v, a_neg_zero_v, b_pos_zero_v, b_neg_zero_v : std_ulogic;
652
    variable a_pos_inf_v,  a_neg_inf_v,  b_pos_inf_v,  b_neg_inf_v  : std_ulogic;
653
    variable a_snan_v,     a_qnan_v,     b_snan_v,     b_qnan_v     : std_ulogic;
654
  begin
655
    if rising_edge(clk_i) then
656
      -- minions --
657
      a_pos_norm_v := fpu_operands.rs1_class(fp_class_pos_norm_c);    b_pos_norm_v := fpu_operands.rs2_class(fp_class_pos_norm_c);
658
      a_neg_norm_v := fpu_operands.rs1_class(fp_class_neg_norm_c);    b_neg_norm_v := fpu_operands.rs2_class(fp_class_neg_norm_c);
659
      a_pos_subn_v := fpu_operands.rs1_class(fp_class_pos_denorm_c);  b_pos_subn_v := fpu_operands.rs2_class(fp_class_pos_denorm_c);
660
      a_neg_subn_v := fpu_operands.rs1_class(fp_class_neg_denorm_c);  b_neg_subn_v := fpu_operands.rs2_class(fp_class_neg_denorm_c);
661
      a_pos_zero_v := fpu_operands.rs1_class(fp_class_pos_zero_c);    b_pos_zero_v := fpu_operands.rs2_class(fp_class_pos_zero_c);
662
      a_neg_zero_v := fpu_operands.rs1_class(fp_class_neg_zero_c);    b_neg_zero_v := fpu_operands.rs2_class(fp_class_neg_zero_c);
663
      a_pos_inf_v  := fpu_operands.rs1_class(fp_class_pos_inf_c);     b_pos_inf_v  := fpu_operands.rs2_class(fp_class_pos_inf_c);
664
      a_neg_inf_v  := fpu_operands.rs1_class(fp_class_neg_inf_c);     b_neg_inf_v  := fpu_operands.rs2_class(fp_class_neg_inf_c);
665
      a_snan_v     := fpu_operands.rs1_class(fp_class_snan_c);        b_snan_v     := fpu_operands.rs2_class(fp_class_snan_c);
666
      a_qnan_v     := fpu_operands.rs1_class(fp_class_qnan_c);        b_qnan_v     := fpu_operands.rs2_class(fp_class_qnan_c);
667
 
668
      -- +normal --
669
      multiplier.res_class(fp_class_pos_norm_c) <=
670
        (a_pos_norm_v and b_pos_norm_v) or -- +norm * +norm
671
        (a_neg_norm_v and b_neg_norm_v);   -- -norm * -norm
672
      -- -normal --
673
      multiplier.res_class(fp_class_neg_norm_c) <=
674
        (a_pos_norm_v and b_neg_norm_v) or -- +norm * -norm
675
        (a_neg_norm_v and b_pos_norm_v);   -- -norm * +norm
676
 
677
      -- +infinity --
678
      multiplier.res_class(fp_class_pos_inf_c) <=
679
        (a_pos_inf_v  and b_pos_inf_v)  or -- +inf    * +inf
680
        (a_neg_inf_v  and b_neg_inf_v)  or -- -inf    * -inf
681
        (a_pos_norm_v and b_pos_inf_v)  or -- +norm   * +inf
682
        (a_pos_inf_v  and b_pos_norm_v) or -- +inf    * +norm
683
        (a_neg_norm_v and b_neg_inf_v)  or -- -norm   * -inf
684
        (a_neg_inf_v  and b_neg_norm_v) or -- -inf    * -norm
685
        (a_neg_subn_v and b_neg_inf_v)  or -- -denorm * -inf
686
        (a_neg_inf_v  and b_neg_subn_v);   -- -inf    * -denorm
687
      -- -infinity --
688
      multiplier.res_class(fp_class_neg_inf_c) <=
689
        (a_pos_inf_v  and b_neg_inf_v)  or -- +inf    * -inf
690
        (a_neg_inf_v  and b_pos_inf_v)  or -- -inf    * +inf
691
        (a_pos_norm_v and b_neg_inf_v)  or -- +norm   * -inf
692
        (a_neg_inf_v  and b_pos_norm_v) or -- -inf    * +norm
693
        (a_neg_norm_v and b_pos_inf_v)  or -- -norm   * +inf
694
        (a_pos_inf_v  and b_neg_norm_v) or -- +inf    * -norm
695
        (a_pos_subn_v and b_neg_inf_v)  or -- +denorm * -inf
696
        (a_neg_inf_v  and b_pos_subn_v) or -- -inf    * +de-norm
697
        (a_neg_subn_v and b_pos_inf_v)  or -- -denorm * +inf
698
        (a_pos_inf_v  and b_neg_subn_v);   -- +inf    * -de-norm
699
 
700
      -- +zero --
701
      multiplier.res_class(fp_class_pos_zero_c) <=
702
        (a_pos_zero_v and b_pos_zero_v) or -- +zero   * +zero
703
        (a_pos_zero_v and b_pos_norm_v) or -- +zero   * +norm
704
        (a_pos_zero_v and b_pos_subn_v) or -- +zero   * +denorm
705
        (a_neg_zero_v and b_neg_zero_v) or -- -zero   * -zero
706
        (a_neg_zero_v and b_neg_norm_v) or -- -zero   * -norm
707
        (a_neg_zero_v and b_neg_subn_v) or -- -zero   * -denorm
708
        (a_pos_norm_v and b_pos_zero_v) or -- +norm   * +zero
709
        (a_pos_subn_v and b_pos_zero_v) or -- +denorm * +zero
710
        (a_neg_norm_v and b_neg_zero_v) or -- -norm   * -zero
711
        (a_neg_subn_v and b_neg_zero_v);   -- -denorm * -zero
712
 
713
      -- -zero --
714
      multiplier.res_class(fp_class_neg_zero_c) <=
715
        (a_pos_zero_v and b_neg_zero_v) or -- +zero   * -zero
716
        (a_pos_zero_v and b_neg_norm_v) or -- +zero   * -norm
717
        (a_pos_zero_v and b_neg_subn_v) or -- +zero   * -denorm
718
        (a_neg_zero_v and b_pos_zero_v) or -- -zero   * +zero
719
        (a_neg_zero_v and b_pos_norm_v) or -- -zero   * +norm
720
        (a_neg_zero_v and b_pos_subn_v) or -- -zero   * +denorm
721
        (a_neg_norm_v and b_pos_zero_v) or -- -norm   * +zero
722
        (a_neg_subn_v and b_pos_zero_v) or -- -denorm * +zero
723
        (a_pos_norm_v and b_neg_zero_v) or -- +norm   * -zero
724
        (a_pos_subn_v and b_neg_zero_v);   -- +denorm * -zero
725
 
726
      -- sNaN --
727
      multiplier.res_class(fp_class_snan_c) <= (a_snan_v or b_snan_v); -- any input is sNaN
728
      -- qNaN --
729
      multiplier.res_class(fp_class_qnan_c) <=
730
        (a_snan_v or b_snan_v) or -- any input is sNaN
731
        (a_qnan_v or b_qnan_v) or -- nay input is qNaN
732
        ((a_pos_inf_v  or a_neg_inf_v)  and (b_pos_zero_v or b_neg_zero_v)) or -- +/-inf * +/-zero
733
        ((a_pos_zero_v or a_neg_zero_v) and (b_pos_inf_v  or b_neg_inf_v));    -- +/-zero * +/-inf
734
    end if;
735
  end process multiplier_class_core;
736
 
737
  -- subnormal result --
738
  multiplier.res_class(fp_class_pos_denorm_c) <= '0'; -- is evaluated by the normalizer
739
  multiplier.res_class(fp_class_neg_denorm_c) <= '0'; -- is evaluated by the normalizer
740
 
741
  -- unused --
742
  fu_mul.result <= (others => '0');
743
  fu_mul.flags  <= (others => '0');
744
 
745
 
746
  -- Adder/Subtractor Core (FADD, FSUB) -----------------------------------------------------
747
  -- -------------------------------------------------------------------------------------------
748
  adder_subtractor_core: process(clk_i)
749
  begin
750
    if rising_edge(clk_i) then
751
      -- arbitration / latency --
752
      if (ctrl_engine.state = S_IDLE) then -- hacky "reset"
753
        addsub.latency <= (others => '0');
754
      else
755
        addsub.latency(0) <= addsub.start; -- input comparator delay
756
        if (addsub.latency(0) = '1') then
757
          addsub.latency(1) <= '1';
758
          addsub.latency(2) <= '0';
759
        elsif (addsub.exp_cnt(7 downto 0) = addsub.large_exp) then -- radix point not yet aligned
760
          addsub.latency(1) <= '0';
761
          addsub.latency(2) <= addsub.latency(1) and (not addsub.latency(0)); -- "shift done"
762
        end if;
763
        addsub.latency(3) <= addsub.latency(2); -- adder stage
764
        addsub.latency(4) <= addsub.latency(3); -- final stage
765
      end if;
766
 
767
      -- exponent check: find smaller number (radix-offset-only) --
768
      if (unsigned(fpu_operands.rs1(30 downto 23)) < unsigned(fpu_operands.rs2(30 downto 23))) then
769
        addsub.exp_comp(0) <= '1'; -- rs1 < rs2
770
      else
771
        addsub.exp_comp(0) <= '0'; -- rs1 >= rs2
772
      end if;
773
      if (unsigned(fpu_operands.rs1(30 downto 23)) = unsigned(fpu_operands.rs2(30 downto 23))) then
774
        addsub.exp_comp(1) <= '1'; -- rs1 == rs2
775
      else -- rs1 != rs2
776
        addsub.exp_comp(1) <= '0';
777
      end if;
778
 
779
      -- shift right small mantissa to align radix point --
780
      if (addsub.latency(0) = '1') then
781
        if ((fpu_operands.rs1_class(fp_class_pos_zero_c) or fpu_operands.rs2_class(fp_class_pos_zero_c) or
782
             fpu_operands.rs1_class(fp_class_neg_zero_c) or fpu_operands.rs2_class(fp_class_neg_zero_c)) = '0') then -- no input is zero
783
          addsub.man_sreg <= addsub.small_man;
784
        else
785
          addsub.man_sreg <= (others => '0');
786
        end if;
787
        addsub.exp_cnt   <= '0' & addsub.small_exp;
788
        addsub.man_g_ext <= '0';
789
        addsub.man_r_ext <= '0';
790
        addsub.man_s_ext <= '0';
791
      elsif (addsub.exp_cnt(7 downto 0) /= addsub.large_exp) then -- shift right until same magnitude
792
        addsub.man_sreg  <= '0' & addsub.man_sreg(addsub.man_sreg'left downto 1);
793
        addsub.man_g_ext <= addsub.man_sreg(0);
794
        addsub.man_r_ext <= addsub.man_g_ext;
795
        addsub.man_s_ext <= addsub.man_s_ext or addsub.man_r_ext; -- sticky bit
796
        addsub.exp_cnt   <= std_ulogic_vector(unsigned(addsub.exp_cnt) + 1);
797
      end if;
798
 
799
      -- mantissa check: find smaller number (magnitude-only) --
800
      if (unsigned(addsub.man_sreg) <= unsigned(addsub.large_man)) then
801
        addsub.man_comp <= '1';
802
      else
803
        addsub.man_comp <= '0';
804
      end if;
805
 
806
      -- actual addition/subtraction (incl. overflow) --
807
      if ((ctrl_i(ctrl_ir_funct12_7_c) xor (fpu_operands.rs1(31) xor fpu_operands.rs2(31))) = '0') then -- add
808
        addsub.add_stage <= std_ulogic_vector(unsigned('0' & addsub.man_l) + unsigned('0' & addsub.man_s));
809
      else -- sub
810
        addsub.add_stage <= std_ulogic_vector(unsigned('0' & addsub.man_l) - unsigned('0' & addsub.man_s));
811
      end if;
812
 
813
      -- result sign --
814
      if (ctrl_i(ctrl_ir_funct12_7_c) = '0') then -- add
815
        if (fpu_operands.rs1(31) = fpu_operands.rs2(31)) then -- identical signs
816
          addsub.res_sign <= fpu_operands.rs1(31);
817
        else -- different signs
818
          if (addsub.exp_comp(1) = '1') then -- exp are equal (also check relation of mantissas)
819
            addsub.res_sign <= fpu_operands.rs1(31) xor (not addsub.man_comp);
820
          else
821
            addsub.res_sign <= fpu_operands.rs1(31) xor addsub.exp_comp(0);
822
          end if;
823
        end if;
824
      else -- sub
825
        if (fpu_operands.rs1(31) = fpu_operands.rs2(31)) then -- identical signs
826
          if (addsub.exp_comp(1) = '1') then -- exp are equal (also check relation of mantissas)
827
            addsub.res_sign <= fpu_operands.rs1(31) xor (not addsub.man_comp);
828
          else
829
            addsub.res_sign <= fpu_operands.rs1(31) xor addsub.exp_comp(0);
830
          end if;
831
        else -- different signs
832
          addsub.res_sign <= fpu_operands.rs1(31);
833
        end if;
834
      end if;
835
 
836
      -- exception flags --
837
      addsub.flags(fp_exc_nv_c) <= ((fpu_operands.rs1_class(fp_class_pos_inf_c) or fpu_operands.rs1_class(fp_class_neg_inf_c)) and
838
                                    (fpu_operands.rs2_class(fp_class_pos_inf_c) or fpu_operands.rs2_class(fp_class_neg_inf_c))); -- +/-inf +/- +/-inf
839
    end if;
840
  end process adder_subtractor_core;
841
 
842
  -- exceptions - unused -- 
843
  addsub.flags(fp_exc_dz_c) <= '0'; -- division by zero -> not possible
844
  addsub.flags(fp_exc_of_c) <= '0'; -- not possible here (but may occur in normalizer)
845
  addsub.flags(fp_exc_uf_c) <= '0'; -- not possible here (but may occur in normalizer)
846
  addsub.flags(fp_exc_nx_c) <= '0'; -- not possible here (but may occur in normalizer)
847
 
848
  -- exponent check: find smaller number (magnitude-only) --
849
  addsub.small_exp <=        fpu_operands.rs1(30 downto 23)  when (addsub.exp_comp(0) = '1') else        fpu_operands.rs2(30 downto 23);
850
  addsub.large_exp <=        fpu_operands.rs2(30 downto 23)  when (addsub.exp_comp(0) = '1') else        fpu_operands.rs1(30 downto 23);
851
  addsub.small_man <= ('1' & fpu_operands.rs1(22 downto 00)) when (addsub.exp_comp(0) = '1') else ('1' & fpu_operands.rs2(22 downto 00));
852
  addsub.large_man <= ('1' & fpu_operands.rs2(22 downto 00)) when (addsub.exp_comp(0) = '1') else ('1' & fpu_operands.rs1(22 downto 00));
853
 
854
  -- mantissa check: find smaller number (magnitude-only) --
855
  addsub.man_s <= (addsub.man_sreg & addsub.man_g_ext & addsub.man_r_ext & addsub.man_s_ext) when (addsub.man_comp = '1') else (addsub.large_man & "000");
856
  addsub.man_l <= (addsub.large_man & "000") when (addsub.man_comp = '1') else (addsub.man_sreg & addsub.man_g_ext & addsub.man_r_ext & addsub.man_s_ext);
857
 
858
  -- latency --
859
  addsub.start   <= fu_addsub.start;
860
  addsub.done    <= addsub.latency(addsub.latency'left);
861
  fu_addsub.done <= addsub.done;
862
 
863
  -- mantissa result --
864
  addsub.res_sum <= addsub.add_stage(27 downto 0);
865
 
866
 
867
  -- result class -- 
868
  adder_subtractor_class_core: process(clk_i)
869
    variable a_pos_norm_v, a_neg_norm_v, b_pos_norm_v, b_neg_norm_v : std_ulogic;
870
    variable a_pos_subn_v, a_neg_subn_v, b_pos_subn_v, b_neg_subn_v : std_ulogic;
871
    variable a_pos_zero_v, a_neg_zero_v, b_pos_zero_v, b_neg_zero_v : std_ulogic;
872
    variable a_pos_inf_v,  a_neg_inf_v,  b_pos_inf_v,  b_neg_inf_v  : std_ulogic;
873
    variable a_snan_v,     a_qnan_v,     b_snan_v,     b_qnan_v     : std_ulogic;
874
  begin
875
    if rising_edge(clk_i) then
876
      -- minions --
877
      a_pos_norm_v := fpu_operands.rs1_class(fp_class_pos_norm_c);    b_pos_norm_v := fpu_operands.rs2_class(fp_class_pos_norm_c);
878
      a_neg_norm_v := fpu_operands.rs1_class(fp_class_neg_norm_c);    b_neg_norm_v := fpu_operands.rs2_class(fp_class_neg_norm_c);
879
      a_pos_subn_v := fpu_operands.rs1_class(fp_class_pos_denorm_c);  b_pos_subn_v := fpu_operands.rs2_class(fp_class_pos_denorm_c);
880
      a_neg_subn_v := fpu_operands.rs1_class(fp_class_neg_denorm_c);  b_neg_subn_v := fpu_operands.rs2_class(fp_class_neg_denorm_c);
881
      a_pos_zero_v := fpu_operands.rs1_class(fp_class_pos_zero_c);    b_pos_zero_v := fpu_operands.rs2_class(fp_class_pos_zero_c);
882
      a_neg_zero_v := fpu_operands.rs1_class(fp_class_neg_zero_c);    b_neg_zero_v := fpu_operands.rs2_class(fp_class_neg_zero_c);
883
      a_pos_inf_v  := fpu_operands.rs1_class(fp_class_pos_inf_c);     b_pos_inf_v  := fpu_operands.rs2_class(fp_class_pos_inf_c);
884
      a_neg_inf_v  := fpu_operands.rs1_class(fp_class_neg_inf_c);     b_neg_inf_v  := fpu_operands.rs2_class(fp_class_neg_inf_c);
885
      a_snan_v     := fpu_operands.rs1_class(fp_class_snan_c);        b_snan_v     := fpu_operands.rs2_class(fp_class_snan_c);
886
      a_qnan_v     := fpu_operands.rs1_class(fp_class_qnan_c);        b_qnan_v     := fpu_operands.rs2_class(fp_class_qnan_c);
887
 
888
      if (ctrl_i(ctrl_ir_funct12_7_c) = '0') then -- addition
889
        -- +infinity --
890
        addsub.res_class(fp_class_pos_inf_c) <=
891
          (a_pos_inf_v  and b_pos_inf_v)  or -- +inf    + +inf
892
          (a_pos_inf_v  and b_pos_zero_v) or -- +inf    + +zero
893
          (a_pos_zero_v and b_pos_inf_v)  or -- +zero   + +inf
894
          (a_pos_inf_v  and b_neg_zero_v) or -- +inf    + -zero
895
          (a_neg_zero_v and b_pos_inf_v)  or -- -zero   + +inf
896
          --
897
          (a_pos_inf_v  and b_pos_norm_v) or -- +inf    + +norm
898
          (a_pos_norm_v and b_pos_inf_v)  or -- +norm   + +inf
899
          (a_pos_inf_v  and b_pos_subn_v) or -- +inf    + +denorm
900
          (a_pos_subn_v and b_pos_inf_v)  or -- +denorm + +inf
901
          --
902
          (a_pos_inf_v  and b_neg_norm_v) or -- +inf    + -norm
903
          (a_neg_norm_v and b_pos_inf_v)  or -- -norm   + +inf
904
          (a_pos_inf_v  and b_neg_subn_v) or -- +inf    + -denorm
905
          (a_neg_subn_v and b_pos_inf_v);    -- -denorm + +inf
906
        -- -infinity --
907
        addsub.res_class(fp_class_neg_inf_c) <=
908
          (a_neg_inf_v  and b_neg_inf_v)  or -- -inf    + -inf
909
          (a_neg_inf_v  and b_pos_zero_v) or -- -inf    + +zero
910
          (a_pos_zero_v and b_neg_inf_v)  or -- +zero   + -inf
911
          (a_neg_inf_v  and b_neg_zero_v) or -- -inf    + -zero
912
          (a_neg_zero_v and b_neg_inf_v)  or -- -zero   + -inf
913
          --
914
          (a_neg_inf_v  and b_pos_norm_v) or -- -inf    + +norm
915
          (a_pos_norm_v and b_neg_inf_v)  or -- +norm   + -inf
916
          (a_neg_inf_v  and b_neg_norm_v) or -- -inf    + -norm
917
          (a_neg_norm_v and b_neg_inf_v)  or -- -norm   + -inf
918
          --
919
          (a_neg_inf_v  and b_pos_subn_v) or -- -inf    + +denorm
920
          (a_pos_subn_v and b_neg_inf_v)  or -- +denorm + -inf
921
          (a_neg_inf_v  and b_neg_subn_v) or -- -inf    + -denorm
922
          (a_neg_subn_v and b_neg_inf_v);    -- -denorm + -inf
923
 
924
        -- +zero --
925
        addsub.res_class(fp_class_pos_zero_c) <=
926
          (a_pos_zero_v and b_pos_zero_v) or -- +zero + +zero
927
          (a_pos_zero_v and b_neg_zero_v) or -- +zero + -zero
928
          (a_neg_zero_v and b_pos_zero_v);   -- -zero + +zero
929
        -- -zero --
930
        addsub.res_class(fp_class_neg_zero_c) <=
931
          (a_neg_zero_v and b_neg_zero_v);   -- -zero + -zero
932
 
933
        -- qNaN --
934
        addsub.res_class(fp_class_qnan_c) <=
935
          (a_snan_v    or  b_snan_v)    or -- any input is sNaN
936
          (a_qnan_v    or  b_qnan_v)    or -- any input is qNaN
937
          (a_pos_inf_v and b_neg_inf_v) or -- +inf + -inf
938
          (a_neg_inf_v and b_pos_inf_v);   -- -inf + +inf
939
 
940
      else -- subtraction
941
        -- +infinity --
942
        addsub.res_class(fp_class_pos_inf_c) <=
943
          (a_pos_inf_v  and b_neg_inf_v)  or -- +inf    - -inf
944
          (a_pos_inf_v  and b_pos_zero_v) or -- +inf    - +zero
945
          (a_pos_inf_v  and b_neg_zero_v) or -- +inf    - -zero
946
          (a_pos_inf_v  and b_pos_norm_v) or -- +inf    - +norm
947
          (a_pos_inf_v  and b_pos_subn_v) or -- +inf    - +denorm
948
          (a_pos_inf_v  and b_neg_norm_v) or -- +inf    - -norm
949
          (a_pos_inf_v  and b_neg_subn_v) or -- +inf    - -denorm
950
          --
951
          (a_pos_zero_v and b_neg_inf_v)  or -- +zero   - -inf
952
          (a_neg_zero_v and b_neg_inf_v)  or -- -zero   - -inf
953
          --
954
          (a_pos_norm_v and b_neg_inf_v)  or -- +norm   - -inf
955
          (a_pos_subn_v and b_neg_inf_v)  or -- +denorm - -inf
956
          (a_neg_norm_v and b_neg_inf_v)  or -- -norm   - -inf
957
          (a_neg_subn_v and b_neg_inf_v);    -- -denorm - -inf
958
        -- -infinity --
959
        addsub.res_class(fp_class_neg_inf_c) <=
960
          (a_neg_inf_v  and b_pos_inf_v)  or -- -inf    - +inf
961
          (a_neg_inf_v  and b_pos_zero_v) or -- -inf    - +zero
962
          (a_neg_inf_v  and b_neg_zero_v) or -- -inf    - -zero
963
          (a_neg_inf_v  and b_pos_norm_v) or -- -inf    - +norm
964
          (a_neg_inf_v  and b_pos_subn_v) or -- -inf    - +denorm
965
          (a_neg_inf_v  and b_neg_norm_v) or -- -inf    - -norm
966
          (a_neg_inf_v  and b_neg_subn_v) or -- -inf    - -denorm
967
          --
968
          (a_pos_zero_v and b_pos_inf_v)  or -- +zero   - +inf
969
          (a_neg_zero_v and b_pos_inf_v)  or -- -zero   - +inf
970
          --
971
          (a_pos_norm_v and b_pos_inf_v)  or -- +norm   - +inf
972
          (a_pos_subn_v and b_pos_inf_v)  or -- +denorm - +inf
973
          (a_neg_norm_v and b_pos_inf_v)  or -- -norm   - +inf
974
          (a_neg_subn_v and b_pos_inf_v);    -- -denorm - +inf
975
 
976
        -- +zero --
977
        addsub.res_class(fp_class_pos_zero_c) <=
978
          (a_pos_zero_v and b_pos_zero_v) or -- +zero - +zero
979
          (a_pos_zero_v and b_neg_zero_v) or -- +zero - -zero
980
          (a_neg_zero_v and b_neg_zero_v);   -- -zero - -zero
981
        -- -zero --
982
        addsub.res_class(fp_class_neg_zero_c) <=
983
          (a_neg_zero_v and b_pos_zero_v);   -- -zero - +zero
984
 
985
        -- qNaN --
986
        addsub.res_class(fp_class_qnan_c) <=
987
          (a_snan_v    or  b_snan_v)    or -- any input is sNaN
988
          (a_qnan_v    or  b_qnan_v)    or -- any input is qNaN
989
          (a_pos_inf_v and b_pos_inf_v) or -- +inf - +inf
990
          (a_neg_inf_v and b_neg_inf_v);   -- -inf - -inf
991
      end if;
992
 
993
      -- normal --
994
      addsub.res_class(fp_class_pos_norm_c) <= (a_pos_norm_v or a_neg_norm_v) and (b_pos_norm_v or b_neg_norm_v); -- +/-norm +/- +-/norm [sign is irrelevant here]
995
      addsub.res_class(fp_class_neg_norm_c) <= (a_pos_norm_v or a_neg_norm_v) and (b_pos_norm_v or b_neg_norm_v); -- +/-norm +/- +-/norm [sign is irrelevant here]
996
 
997
      -- sNaN --
998
      addsub.res_class(fp_class_snan_c) <= (a_snan_v or b_snan_v); -- any input is sNaN
999
    end if;
1000
  end process adder_subtractor_class_core;
1001
 
1002
  -- subnormal result --
1003
  addsub.res_class(fp_class_pos_denorm_c) <= '0'; -- is evaluated by the normalizer
1004
  addsub.res_class(fp_class_neg_denorm_c) <= '0'; -- is evaluated by the normalizer
1005
 
1006
  -- unused --
1007
  fu_addsub.result <= (others => '0');
1008
  fu_addsub.flags  <= (others => '0');
1009
 
1010
 
1011
-- ****************************************************************************************************************************
1012
-- FPU Core - Normalize & Round
1013
-- ****************************************************************************************************************************
1014
 
1015
  -- Normalizer Input -----------------------------------------------------------------------
1016
  -- -------------------------------------------------------------------------------------------
1017
  normalizer_input_select: process(funct_ff, addsub, multiplier, fu_conv_i2f)
1018
  begin
1019
    case funct_ff is
1020
      when op_addsub_c => -- addition/subtraction
1021
        normalizer.mode      <= '0'; -- normalization
1022
        normalizer.sign      <= addsub.res_sign;
1023
        normalizer.xexp      <= addsub.exp_cnt;
1024
        normalizer.xmantissa(47 downto 23) <= addsub.res_sum(27 downto 3);
1025
        normalizer.xmantissa(22) <= addsub.res_sum(2);
1026
        normalizer.xmantissa(21) <= addsub.res_sum(1);
1027
        normalizer.xmantissa(20 downto 01) <= (others => '0');
1028
        normalizer.xmantissa(00) <= addsub.res_sum(0);
1029
        normalizer.class     <= addsub.res_class;
1030
        normalizer.flags_in  <= addsub.flags;
1031
        normalizer.start     <= addsub.done;
1032
      when op_mul_c => -- multiplication
1033
        normalizer.mode      <= '0'; -- normalization
1034
        normalizer.sign      <= multiplier.sign;
1035
        normalizer.xexp      <= '0' & multiplier.exp_res(7 downto 0);
1036
        normalizer.xmantissa <= multiplier.product;
1037
        normalizer.class     <= multiplier.res_class;
1038
        normalizer.flags_in  <= multiplier.flags;
1039
        normalizer.start     <= multiplier.done;
1040
      when others => -- op_i2f_c
1041
        normalizer.mode      <= '1'; -- int_to_float
1042
        normalizer.sign      <= fu_conv_i2f.sign;
1043
        normalizer.xexp      <= "001111111"; -- bias = 127
1044
        normalizer.xmantissa <= (others => '0'); -- don't care
1045
        normalizer.class     <= (others => '0'); -- don't care
1046
        normalizer.flags_in  <= (others => '0'); -- no flags yet
1047
        normalizer.start     <= fu_conv_i2f.done;
1048
    end case;
1049
  end process normalizer_input_select;
1050
 
1051
 
1052
  -- Normalizer & Rounding Unit -------------------------------------------------------------
1053
  -- -------------------------------------------------------------------------------------------
1054
  neorv32_cpu_cp_fpu_normalizer_inst: neorv32_cpu_cp_fpu_normalizer
1055
  port map (
1056
    -- control --
1057
    clk_i      => clk_i,                -- global clock, rising edge
1058
    rstn_i     => rstn_i,               -- global reset, low-active, async
1059
    start_i    => normalizer.start,     -- trigger operation
1060
    rmode_i    => fpu_operands.frm,     -- rounding mode
1061
    funct_i    => normalizer.mode,      -- operation mode
1062
    -- input --
1063
    sign_i     => normalizer.sign,      -- sign
1064
    exponent_i => normalizer.xexp,      -- extended exponent
1065
    mantissa_i => normalizer.xmantissa, -- extended mantissa
1066
    integer_i  => fu_conv_i2f.result,   -- integer input
1067
    class_i    => normalizer.class,     -- input number class
1068
    flags_i    => normalizer.flags_in,  -- exception flags input
1069
    -- output --
1070
    result_o   => normalizer.result,    -- result (float or int)
1071
    flags_o    => normalizer.flags_out, -- exception flags
1072
    done_o     => normalizer.done       -- operation done
1073
  );
1074
 
1075
 
1076
-- ****************************************************************************************************************************
1077
-- FPU Core - Result
1078
-- ****************************************************************************************************************************
1079
 
1080
  -- Result Output to CPU Pipeline ----------------------------------------------------------
1081
  -- -------------------------------------------------------------------------------------------
1082
  output_gate: process(clk_i)
1083
  begin
1084
    if rising_edge(clk_i) then
1085
      if (ctrl_engine.valid = '1') then
1086
        case funct_ff is
1087
          when op_class_c =>
1088
            res_o    <= fu_classify.result;
1089
            fflags_o <= fu_classify.flags;
1090
          when op_comp_c =>
1091
            res_o    <= fu_compare.result;
1092
            fflags_o <= fu_compare.flags;
1093
          when op_f2i_c =>
1094
            res_o    <= fu_conv_f2i.result;
1095
            fflags_o <= fu_conv_f2i.flags;
1096
          when op_sgnj_c =>
1097
            res_o    <= fu_sign_inject.result;
1098
            fflags_o <= fu_sign_inject.flags;
1099
          when op_minmax_c =>
1100
            res_o    <= fu_min_max.result;
1101
            fflags_o <= fu_min_max.flags;
1102
          when others => -- op_mul_c, op_addsub_c, op_i2f_c, ...
1103
            res_o    <= normalizer.result;
1104
            fflags_o <= normalizer.flags_out;
1105
        end case;
1106
      else
1107
        res_o    <= (others => '0');
1108
        fflags_o <= (others => '0');
1109
      end if;
1110
    end if;
1111
  end process output_gate;
1112
 
1113
  -- operation done --
1114
  fu_core_done <= fu_compare.done or fu_classify.done or fu_sign_inject.done or fu_min_max.done or normalizer.done or fu_conv_f2i.done;
1115
 
1116
 
1117 52 zero_gravi
end neorv32_cpu_cp_fpu_rtl;
1118 55 zero_gravi
 
1119
-- ###########################################################################################################################################
1120
-- ###########################################################################################################################################
1121
 
1122
-- #################################################################################################
1123
-- # << NEORV32 - Single-Precision Floating-Point Unit: Normalizer and Rounding Unit >>            #
1124
-- # ********************************************************************************************* #
1125
-- # This unit also performs integer-to-float conversions.                                         #
1126
-- # ********************************************************************************************* #
1127
-- # BSD 3-Clause License                                                                          #
1128
-- #                                                                                               #
1129
-- # Copyright (c) 2021, Stephan Nolting. All rights reserved.                                     #
1130
-- #                                                                                               #
1131
-- # Redistribution and use in source and binary forms, with or without modification, are          #
1132
-- # permitted provided that the following conditions are met:                                     #
1133
-- #                                                                                               #
1134
-- # 1. Redistributions of source code must retain the above copyright notice, this list of        #
1135
-- #    conditions and the following disclaimer.                                                   #
1136
-- #                                                                                               #
1137
-- # 2. Redistributions in binary form must reproduce the above copyright notice, this list of     #
1138
-- #    conditions and the following disclaimer in the documentation and/or other materials        #
1139
-- #    provided with the distribution.                                                            #
1140
-- #                                                                                               #
1141
-- # 3. Neither the name of the copyright holder nor the names of its contributors may be used to  #
1142
-- #    endorse or promote products derived from this software without specific prior written      #
1143
-- #    permission.                                                                                #
1144
-- #                                                                                               #
1145
-- # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS   #
1146
-- # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF               #
1147
-- # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE    #
1148
-- # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,     #
1149
-- # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE #
1150
-- # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED    #
1151
-- # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING     #
1152
-- # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED  #
1153
-- # OF THE POSSIBILITY OF SUCH DAMAGE.                                                            #
1154
-- # ********************************************************************************************* #
1155
-- # The NEORV32 Processor - https://github.com/stnolting/neorv32              (c) Stephan Nolting #
1156
-- #################################################################################################
1157
 
1158
library ieee;
1159
use ieee.std_logic_1164.all;
1160
use ieee.numeric_std.all;
1161
 
1162
library neorv32;
1163
use neorv32.neorv32_package.all;
1164
 
1165
entity neorv32_cpu_cp_fpu_normalizer is
1166
  port (
1167
    -- control --
1168
    clk_i      : in  std_ulogic; -- global clock, rising edge
1169
    rstn_i     : in  std_ulogic; -- global reset, low-active, async
1170
    start_i    : in  std_ulogic; -- trigger operation
1171
    rmode_i    : in  std_ulogic_vector(02 downto 0); -- rounding mode
1172
    funct_i    : in  std_ulogic; -- operating mode (0=norm&round, 1=int-to-float)
1173
    -- input --
1174
    sign_i     : in  std_ulogic; -- sign
1175
    exponent_i : in  std_ulogic_vector(08 downto 0); -- extended exponent
1176
    mantissa_i : in  std_ulogic_vector(47 downto 0); -- extended mantissa
1177
    integer_i  : in  std_ulogic_vector(31 downto 0); -- integer input
1178
    class_i    : in  std_ulogic_vector(09 downto 0); -- input number class
1179
    flags_i    : in  std_ulogic_vector(04 downto 0); -- exception flags input
1180
    -- output --
1181
    result_o   : out std_ulogic_vector(31 downto 0); -- float result
1182
    flags_o    : out std_ulogic_vector(04 downto 0); -- exception flags output
1183
    done_o     : out std_ulogic -- operation done
1184
  );
1185
end neorv32_cpu_cp_fpu_normalizer;
1186
 
1187
architecture neorv32_cpu_cp_fpu_normalizer_rtl of neorv32_cpu_cp_fpu_normalizer is
1188
 
1189
  -- controller --
1190
  type ctrl_engine_state_t is (S_IDLE, S_PREPARE_I2F, S_CHECK_I2F, S_PREPARE_NORM, S_PREPARE_SHIFT, S_NORMALIZE_BUSY, S_ROUND, S_CHECK, S_FINALIZE);
1191
  type ctrl_t is record
1192
    state   : ctrl_engine_state_t; -- current state
1193
    norm_r  : std_ulogic; -- normalization round 0 or 1
1194
    cnt     : std_ulogic_vector(08 downto 0); -- interation counter/exponent (incl. overflow)
1195
    cnt_pre : std_ulogic_vector(08 downto 0);
1196
    cnt_of  : std_ulogic; -- counter overflow
1197
    cnt_uf  : std_ulogic; -- counter underflow
1198
    rounded : std_ulogic; -- output is rounded
1199
    res_sgn : std_ulogic;
1200
    res_exp : std_ulogic_vector(07 downto 0);
1201
    res_man : std_ulogic_vector(22 downto 0);
1202
    class   : std_ulogic_vector(09 downto 0);
1203
    flags   : std_ulogic_vector(04 downto 0);
1204
  end record;
1205
  signal ctrl : ctrl_t;
1206
 
1207
  -- normalization shift register --
1208
  type sreg_t is record
1209
    done  : std_ulogic;
1210
    dir   : std_ulogic; -- shift direction: 0=right, 1=left
1211
    zero  : std_ulogic;
1212
    upper : std_ulogic_vector(31 downto 0);
1213
    lower : std_ulogic_vector(22 downto 0);
1214
    ext_g : std_ulogic; -- guard bit
1215
    ext_r : std_ulogic; -- round bit
1216
    ext_s : std_ulogic; -- sticky bit
1217
  end record;
1218
  signal sreg : sreg_t;
1219
 
1220
  -- rounding unit --
1221
  type round_t is record
1222
    en     : std_ulogic; -- enable rounding
1223
    sub    : std_ulogic; -- 0=decrement, 1=increment
1224
    output : std_ulogic_vector(24 downto 0); -- mantissa size + hidden one + 1
1225
  end record;
1226
  signal round : round_t;
1227
 
1228
begin
1229
 
1230
  -- Control Engine -------------------------------------------------------------------------
1231
  -- -------------------------------------------------------------------------------------------
1232
  ctrl_engine: process(rstn_i, clk_i)
1233
  begin
1234
    if (rstn_i = '0') then
1235
      ctrl.state   <= S_IDLE;
1236
      ctrl.norm_r  <= '0';
1237
      ctrl.cnt     <= (others => '0');
1238
      ctrl.cnt_pre <= (others => '0');
1239
      ctrl.cnt_of  <= '0';
1240
      ctrl.cnt_uf  <= '0';
1241
      ctrl.rounded <= '0';
1242
      ctrl.res_exp <= (others => '0');
1243
      ctrl.res_man <= (others => '0');
1244
      ctrl.res_sgn <= '0';
1245
      ctrl.class   <= (others => '0');
1246
      ctrl.flags   <= (others => '0');
1247
      --
1248
      sreg.upper   <= (others => '0');
1249
      sreg.lower   <= (others => '0');
1250
      sreg.dir     <= '0';
1251
      sreg.ext_g   <= '0';
1252
      sreg.ext_r   <= '0';
1253
      sreg.ext_s   <= '0';
1254
      --
1255
      done_o       <= '0';
1256
    elsif rising_edge(clk_i) then
1257
      -- defaults --
1258
      ctrl.cnt_pre <= ctrl.cnt;
1259
      done_o       <= '0';
1260
 
1261
      -- exponent counter underflow/overflow --
1262
      if ((ctrl.cnt_pre(8 downto 7) = "01") and (ctrl.cnt(8 downto 7) = "10")) then -- overflow
1263
        ctrl.cnt_of <= '1';
1264
      elsif (ctrl.cnt_pre(8 downto 7) = "00") and (ctrl.cnt(8 downto 7) = "11") then -- underflow
1265
        ctrl.cnt_uf <= '1';
1266
      end if;
1267
 
1268
      -- fsm --
1269
      case ctrl.state is
1270
 
1271
        when S_IDLE => -- wait for operation trigger
1272
        -- ------------------------------------------------------------
1273
          ctrl.norm_r  <= '0'; -- start with first normalization
1274
          ctrl.rounded <= '0'; -- not rounded yet
1275
          ctrl.cnt_of  <= '0';
1276
          ctrl.cnt_uf  <= '0';
1277
          --
1278
          if (start_i = '1') then
1279
            ctrl.cnt     <= exponent_i;
1280
            ctrl.res_sgn <= sign_i;
1281
            ctrl.class   <= class_i;
1282
            ctrl.flags   <= flags_i;
1283
            if (funct_i = '0') then -- float -> float
1284
              ctrl.state <= S_PREPARE_NORM;
1285
            else -- integer -> float
1286
              ctrl.state <= S_PREPARE_I2F;
1287
            end if;
1288
          end if;
1289
 
1290
        when S_PREPARE_I2F => -- prepare integer-to-float conversion
1291
        -- ------------------------------------------------------------
1292
          sreg.upper <= integer_i;
1293
          sreg.lower <= (others => '0');
1294
          sreg.ext_g <= '0';
1295
          sreg.ext_r <= '0';
1296
          sreg.ext_s <= '0';
1297
          sreg.dir   <= '0'; -- shift right
1298
          ctrl.state <= S_CHECK_I2F;
1299
 
1300
        when S_CHECK_I2F => -- check if converting zero
1301
        -- ------------------------------------------------------------
1302
          if (sreg.zero = '1') then -- all zero
1303
            ctrl.class(fp_class_pos_zero_c) <= '1';
1304
            ctrl.state <= S_FINALIZE;
1305
          else
1306
            ctrl.state <= S_NORMALIZE_BUSY;
1307
          end if;
1308
 
1309
        when S_PREPARE_NORM => -- prepare "normal" normalization & rounding
1310
        -- ------------------------------------------------------------
1311
          sreg.upper(31 downto 02) <= (others => '0');
1312
          sreg.upper(01 downto 00) <= mantissa_i(47 downto 46);
1313
          sreg.lower <= mantissa_i(45 downto 23);
1314
          sreg.ext_g <= mantissa_i(22);
1315
          sreg.ext_r <= mantissa_i(21);
1316
          sreg.ext_s <= or_all_f(mantissa_i(20 downto 0));
1317
          -- check for special cases --
1318
          if ((ctrl.class(fp_class_snan_c)       or ctrl.class(fp_class_qnan_c)       or -- NaN
1319
               ctrl.class(fp_class_neg_zero_c)   or ctrl.class(fp_class_pos_zero_c)   or -- zero
1320
               ctrl.class(fp_class_neg_denorm_c) or ctrl.class(fp_class_pos_denorm_c) or -- subnormal
1321
               ctrl.class(fp_class_neg_inf_c)    or ctrl.class(fp_class_pos_inf_c)    or -- infinity
1322
               ctrl.flags(fp_exc_uf_c) or -- underflow
1323
               ctrl.flags(fp_exc_of_c) or -- overflow
1324
               ctrl.flags(fp_exc_nv_c)) = '1') then -- invalid
1325
            ctrl.state <= S_FINALIZE;
1326
          else
1327
            ctrl.state <= S_PREPARE_SHIFT;
1328
          end if;
1329
 
1330
        when S_PREPARE_SHIFT => -- prepare shift direction (for "normal" normalization only)
1331
        -- ------------------------------------------------------------
1332
          if (sreg.zero = '0') then -- number < 1.0
1333
            sreg.dir <= '0'; -- shift right
1334
          else -- number >= 1.0
1335
            sreg.dir <= '1'; -- shift left
1336
          end if;
1337
          ctrl.state <= S_NORMALIZE_BUSY;
1338
 
1339
        when S_NORMALIZE_BUSY => -- running normalization cycle
1340
        -- ------------------------------------------------------------
1341
          -- shift until normalized or exception --
1342
          if (sreg.done = '1') or (ctrl.cnt_uf = '1') or (ctrl.cnt_of = '1') then
1343
            -- normalization control --
1344
            ctrl.norm_r <= '1';
1345
            if (ctrl.norm_r = '0') then -- first normalization cycle done
1346
              ctrl.state <= S_ROUND;
1347
            else -- second normalization cycle done
1348
              ctrl.state <= S_CHECK;
1349
            end if;
1350
          else
1351
            if (sreg.dir = '0') then -- shift right
1352
              ctrl.cnt   <= std_ulogic_vector(unsigned(ctrl.cnt) + 1);
1353
              sreg.upper <= '0' & sreg.upper(sreg.upper'left downto 1);
1354
              sreg.lower <= sreg.upper(0) & sreg.lower(sreg.lower'left downto 1);
1355
              sreg.ext_g <= sreg.lower(0);
1356
              sreg.ext_r <= sreg.ext_g;
1357
              sreg.ext_s <= sreg.ext_r or sreg.ext_s; -- sticky bit
1358
            else -- shift left
1359
              ctrl.cnt   <= std_ulogic_vector(unsigned(ctrl.cnt) - 1);
1360
              sreg.upper <= sreg.upper(sreg.upper'left-1 downto 0) & sreg.lower(sreg.lower'left);
1361
              sreg.lower <= sreg.lower(sreg.lower'left-1 downto 0) & sreg.ext_g;
1362
              sreg.ext_g <= sreg.ext_r;
1363
              sreg.ext_r <= sreg.ext_s;
1364
              sreg.ext_s <= sreg.ext_s; -- sticky bit
1365
            end if;
1366
          end if;
1367
 
1368
        when S_ROUND => -- rounding cycle (after first normalization)
1369
        -- ------------------------------------------------------------
1370
          ctrl.rounded <= ctrl.rounded or round.en;
1371
          sreg.upper(31 downto 02) <= (others => '0');
1372
          sreg.upper(01 downto 00) <= round.output(24 downto 23);
1373
          sreg.lower <= round.output(22 downto 00);
1374
          sreg.ext_g <= '0';
1375
          sreg.ext_r <= '0';
1376
          sreg.ext_s <= '0';
1377
          ctrl.state <= S_PREPARE_SHIFT;
1378
 
1379
        when S_CHECK => -- check for overflow/underflow
1380
        -- ------------------------------------------------------------
1381
          if (ctrl.cnt_uf = '1') then -- underflow
1382
            ctrl.flags(fp_exc_uf_c) <= '1';
1383
          elsif (ctrl.cnt_of = '1') then -- overflow
1384
            ctrl.flags(fp_exc_of_c) <= '1';
1385
          elsif (ctrl.cnt(7 downto 0) = x"00") then -- subnormal
1386
            ctrl.flags(fp_exc_uf_c) <= '1';
1387
          elsif (ctrl.cnt(7 downto 0) = x"FF") then -- infinity
1388
            ctrl.flags(fp_exc_of_c) <= '1';
1389
          end if;
1390
          ctrl.state  <= S_FINALIZE;
1391
 
1392
        when S_FINALIZE => -- result finalization
1393
        -- ------------------------------------------------------------
1394
          -- generate result word (the ORDER of checks is imporatant here!) --
1395
          if (ctrl.class(fp_class_snan_c) = '1') or (ctrl.class(fp_class_qnan_c) = '1') then -- sNaN / qNaN
1396
            ctrl.res_sgn <= fp_single_qnan_c(31);
1397
            ctrl.res_exp <= fp_single_qnan_c(30 downto 23);
1398
            ctrl.res_man <= fp_single_qnan_c(22 downto 00);
1399
          elsif (ctrl.class(fp_class_neg_inf_c) = '1') or (ctrl.class(fp_class_pos_inf_c) = '1') or -- infinity
1400
                (ctrl.flags(fp_exc_of_c) = '1') then -- overflow
1401
            ctrl.res_exp <= fp_single_pos_inf_c(30 downto 23); -- keep original sign
1402
            ctrl.res_man <= fp_single_pos_inf_c(22 downto 00);
1403
          elsif (ctrl.class(fp_class_neg_zero_c) = '1') or (ctrl.class(fp_class_pos_zero_c) = '1') then -- zero
1404
            ctrl.res_sgn <= ctrl.class(fp_class_neg_zero_c);
1405
            ctrl.res_exp <= fp_single_pos_zero_c(30 downto 23);
1406
            ctrl.res_man <= fp_single_pos_zero_c(22 downto 00);
1407
          elsif (ctrl.flags(fp_exc_uf_c) = '1') or -- underflow
1408
                (sreg.zero = '1') or (ctrl.class(fp_class_neg_denorm_c) = '1') or (ctrl.class(fp_class_pos_denorm_c) = '1') then -- denormalized (flush-to-zero)
1409
            ctrl.res_exp <= fp_single_pos_zero_c(30 downto 23); -- keep original sign
1410
            ctrl.res_man <= fp_single_pos_zero_c(22 downto 00);
1411
          else -- result is ok
1412
            ctrl.res_exp <= ctrl.cnt(7 downto 0);
1413
            ctrl.res_man <= sreg.lower;
1414
          end if;
1415
          -- generate exception flags --
1416
          ctrl.flags(fp_exc_nv_c) <= ctrl.flags(fp_exc_nv_c) or ctrl.class(fp_class_snan_c); -- invalid if input is SIGNALING NaN
1417
          ctrl.flags(fp_exc_nx_c) <= ctrl.flags(fp_exc_nx_c) or ctrl.rounded; -- inexcat if result is rounded
1418
          --
1419
          done_o     <= '1';
1420
          ctrl.state <= S_IDLE;
1421
 
1422
        when others => -- undefined
1423
        -- ------------------------------------------------------------
1424
          ctrl.state <= S_IDLE;
1425
 
1426
      end case;
1427
    end if;
1428
  end process ctrl_engine;
1429
 
1430
  -- stop shifting when normalized --
1431
  sreg.done <= (not or_all_f(sreg.upper(sreg.upper'left downto 1))) and sreg.upper(0); -- input is zero, hidden one is set
1432
 
1433
  -- all-zero including hidden bit --
1434
  sreg.zero <= not or_all_f(sreg.upper);
1435
 
1436
  -- result --
1437
  result_o(31)           <= ctrl.res_sgn;
1438
  result_o(30 downto 23) <= ctrl.res_exp;
1439
  result_o(22 downto  0) <= ctrl.res_man;
1440
 
1441
  -- exception flags --
1442
  flags_o(fp_exc_nv_c) <= ctrl.flags(fp_exc_nv_c); -- invalid operation
1443
  flags_o(fp_exc_dz_c) <= ctrl.flags(fp_exc_dz_c); -- divide by zero
1444
  flags_o(fp_exc_of_c) <= ctrl.flags(fp_exc_of_c); -- overflow
1445
  flags_o(fp_exc_uf_c) <= ctrl.flags(fp_exc_uf_c); -- underflow
1446
  flags_o(fp_exc_nx_c) <= ctrl.flags(fp_exc_nx_c); -- inexact
1447
 
1448
 
1449
  -- Rounding -------------------------------------------------------------------------------
1450
  -- -------------------------------------------------------------------------------------------
1451
  rounding_unit_ctrl: process(rmode_i, sreg)
1452
  begin
1453
    -- defaults --
1454
    round.en  <= '0';
1455
    round.sub <= '0';
1456
    -- rounding mode --
1457
    case rmode_i(2 downto 0) is
1458
      when "000" => -- round to nearest, ties to even
1459
        if (sreg.ext_g = '0') then
1460
          round.en <= '0'; -- round down (do nothing)
1461
        else
1462
          if (sreg.ext_r = '0') and (sreg.ext_s = '0') then -- tie!
1463
            round.en <= sreg.lower(0); -- round up if LSB of mantissa is set
1464
          else
1465
            round.en <= '1'; -- round up
1466
          end if;
1467
        end if;
1468
        round.sub <= '0'; -- increment
1469
      when "001" => -- round towards zero
1470
        round.en <= '0'; -- no rounding -> just truncate
1471
      when "010" => -- round down (towards -infinity)
1472
        round.en  <= sreg.ext_g or sreg.ext_r or sreg.ext_s;
1473
        round.sub <= '1'; -- decrement
1474
      when "011" => -- round up (towards +infinity)
1475
        round.en  <= sreg.ext_g or sreg.ext_r or sreg.ext_s;
1476
        round.sub <= '0'; -- increment
1477
      when "100" => -- round to nearest, ties to max magnitude
1478
        round.en <= '0'; -- FIXME / TODO
1479
      when others => -- undefined
1480
        round.en <= '0';
1481
    end case;
1482
  end process rounding_unit_ctrl;
1483
 
1484
 
1485
  -- incrementer/decrementer --
1486
  rounding_unit_add: process(round, sreg)
1487
    variable tmp_v : std_ulogic_vector(24 downto 0);
1488
  begin
1489
    tmp_v := '0' & sreg.upper(0) & sreg.lower;
1490
    if (round.en = '1') then
1491
      if (round.sub = '0') then -- increment
1492
        round.output <= std_ulogic_vector(unsigned(tmp_v) + 1);
1493
      else -- decrement
1494
        round.output <= std_ulogic_vector(unsigned(tmp_v) - 1);
1495
      end if;
1496
    else -- do nothing
1497
      round.output <= tmp_v;
1498
    end if;
1499
  end process rounding_unit_add;
1500
 
1501
 
1502
end neorv32_cpu_cp_fpu_normalizer_rtl;
1503
 
1504
-- ###########################################################################################################################################
1505
-- ###########################################################################################################################################
1506
 
1507
-- #################################################################################################
1508
-- # << NEORV32 - Single-Precision Floating-Point Unit: Float-To-Int Converter >>                  #
1509
-- # ********************************************************************************************* #
1510
-- # BSD 3-Clause License                                                                          #
1511
-- #                                                                                               #
1512
-- # Copyright (c) 2021, Stephan Nolting. All rights reserved.                                     #
1513
-- #                                                                                               #
1514
-- # Redistribution and use in source and binary forms, with or without modification, are          #
1515
-- # permitted provided that the following conditions are met:                                     #
1516
-- #                                                                                               #
1517
-- # 1. Redistributions of source code must retain the above copyright notice, this list of        #
1518
-- #    conditions and the following disclaimer.                                                   #
1519
-- #                                                                                               #
1520
-- # 2. Redistributions in binary form must reproduce the above copyright notice, this list of     #
1521
-- #    conditions and the following disclaimer in the documentation and/or other materials        #
1522
-- #    provided with the distribution.                                                            #
1523
-- #                                                                                               #
1524
-- # 3. Neither the name of the copyright holder nor the names of its contributors may be used to  #
1525
-- #    endorse or promote products derived from this software without specific prior written      #
1526
-- #    permission.                                                                                #
1527
-- #                                                                                               #
1528
-- # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS   #
1529
-- # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF               #
1530
-- # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE    #
1531
-- # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,     #
1532
-- # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE #
1533
-- # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED    #
1534
-- # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING     #
1535
-- # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED  #
1536
-- # OF THE POSSIBILITY OF SUCH DAMAGE.                                                            #
1537
-- # ********************************************************************************************* #
1538
-- # The NEORV32 Processor - https://github.com/stnolting/neorv32              (c) Stephan Nolting #
1539
-- #################################################################################################
1540
 
1541
library ieee;
1542
use ieee.std_logic_1164.all;
1543
use ieee.numeric_std.all;
1544
 
1545
library neorv32;
1546
use neorv32.neorv32_package.all;
1547
 
1548
entity neorv32_cpu_cp_fpu_f2i is
1549
  port (
1550
    -- control --
1551
    clk_i      : in  std_ulogic; -- global clock, rising edge
1552
    rstn_i     : in  std_ulogic; -- global reset, low-active, async
1553
    start_i    : in  std_ulogic; -- trigger operation
1554
    rmode_i    : in  std_ulogic_vector(02 downto 0); -- rounding mode
1555
    funct_i    : in  std_ulogic; -- 0=signed, 1=unsigned
1556
    -- input --
1557
    sign_i     : in  std_ulogic; -- sign
1558
    exponent_i : in  std_ulogic_vector(07 downto 0); -- exponent
1559
    mantissa_i : in  std_ulogic_vector(22 downto 0); -- mantissa
1560
    class_i    : in  std_ulogic_vector(09 downto 0); -- operand class
1561
    -- output --
1562
    result_o   : out std_ulogic_vector(31 downto 0); -- integer result
1563
    flags_o    : out std_ulogic_vector(04 downto 0); -- exception flags
1564
    done_o     : out std_ulogic -- operation done
1565
  );
1566
end neorv32_cpu_cp_fpu_f2i;
1567
 
1568
architecture neorv32_cpu_cp_fpu_f2i_rtl of neorv32_cpu_cp_fpu_f2i is
1569
 
1570
  -- controller --
1571
  type ctrl_engine_state_t is (S_IDLE, S_PREPARE_F2I, S_NORMALIZE_BUSY, S_ROUND, S_FINALIZE);
1572
  type ctrl_t is record
1573
    state      : ctrl_engine_state_t; -- current state
1574
    unsign     : std_ulogic;
1575
    cnt        : std_ulogic_vector(07 downto 0); -- interation counter/exponent
1576
    sign       : std_ulogic;
1577
    class      : std_ulogic_vector(09 downto 0);
1578
    rounded    : std_ulogic; -- output is rounded
1579
    over       : std_ulogic; -- output is overflowing
1580
    under      : std_ulogic; -- output in underflowing
1581
    result_tmp : std_ulogic_vector(31 downto 0);
1582
    result     : std_ulogic_vector(31 downto 0);
1583
  end record;
1584
  signal ctrl : ctrl_t;
1585
 
1586
  -- conversion shift register --
1587
  type sreg_t is record
1588
    int   : std_ulogic_vector(31 downto 0); -- including hidden-zero
1589
    mant  : std_ulogic_vector(22 downto 0);
1590
    ext_g : std_ulogic; -- guard bit
1591
    ext_r : std_ulogic; -- round bit
1592
    ext_s : std_ulogic; -- sticky bit
1593
  end record;
1594
  signal sreg : sreg_t;
1595
 
1596
  -- rounding unit --
1597
  type round_t is record
1598
    en     : std_ulogic; -- enable rounding
1599
    sub    : std_ulogic; -- 0=decrement, 1=increment
1600
    output : std_ulogic_vector(32 downto 0); -- result + overflow
1601
  end record;
1602
  signal round : round_t;
1603
 
1604
begin
1605
 
1606
  -- Control Engine -------------------------------------------------------------------------
1607
  -- -------------------------------------------------------------------------------------------
1608
  ctrl_engine: process(rstn_i, clk_i)
1609
  begin
1610
    if (rstn_i = '0') then
1611
      ctrl.state      <= S_IDLE;
1612
      ctrl.cnt        <= (others => '0');
1613
      ctrl.sign       <= '0';
1614
      ctrl.class      <= (others => '0');
1615
      ctrl.rounded    <= '0';
1616
      ctrl.over       <= '0';
1617
      ctrl.under      <= '0';
1618
      ctrl.unsign     <= '0';
1619
      ctrl.result     <= (others => '0');
1620
      ctrl.result_tmp <= (others => '0');
1621
      sreg.int        <= (others => '0');
1622
      sreg.mant       <= (others => '0');
1623
      sreg.ext_s      <= '0';
1624
      done_o          <= '0';
1625
    elsif rising_edge(clk_i) then
1626
      -- defaults --
1627
      done_o <= '0';
1628
 
1629
      -- fsm --
1630
      case ctrl.state is
1631
 
1632
        when S_IDLE => -- wait for operation trigger
1633
        -- ------------------------------------------------------------
1634
          ctrl.rounded <= '0'; -- not rounded yet
1635
          ctrl.over    <= '0'; -- not overflowing yet
1636
          ctrl.under   <= '0'; -- not underflowing yet
1637
          ctrl.unsign  <= funct_i;
1638
          sreg.ext_s   <= '0'; -- init
1639
          if (start_i = '1') then
1640
            ctrl.cnt    <= exponent_i;
1641
            ctrl.sign   <= sign_i;
1642
            ctrl.class  <= class_i;
1643
            sreg.mant   <= mantissa_i;
1644
            ctrl.state  <= S_PREPARE_F2I;
1645
          end if;
1646
 
1647
        when S_PREPARE_F2I => -- prepare float-to-integer conversion
1648
        -- ------------------------------------------------------------
1649
          if (unsigned(ctrl.cnt) < 126) then -- less than 0.5
1650
            sreg.int    <= (others => '0');
1651
            ctrl.under  <= '1'; -- this is an underflow!
1652
            ctrl.cnt    <= (others => '0');
1653
          elsif (unsigned(ctrl.cnt) = 126) then -- num < 1.0 but num >= 0.5
1654
            sreg.int    <= (others => '0');
1655
            sreg.mant   <= '1' & sreg.mant(sreg.mant'left downto 1);
1656
            ctrl.cnt    <= (others => '0');
1657
          else
1658
            sreg.int    <= (others => '0');
1659
            sreg.int(0) <= '1'; -- hidden one
1660
            ctrl.cnt    <= std_ulogic_vector(unsigned(ctrl.cnt) - 127); -- remove bias to get raw number of left shifts
1661
          end if;
1662
          -- check terminal cases --
1663
          if ((ctrl.class(fp_class_neg_inf_c)  or ctrl.class(fp_class_pos_inf_c) or
1664
               ctrl.class(fp_class_neg_zero_c) or ctrl.class(fp_class_pos_zero_c) or
1665
               ctrl.class(fp_class_snan_c)     or ctrl.class(fp_class_qnan_c)) = '1') then
1666
            ctrl.state <= S_FINALIZE;
1667
          else
1668
            ctrl.state <= S_NORMALIZE_BUSY;
1669
          end if;
1670
 
1671
        when S_NORMALIZE_BUSY => -- running normalization cycle
1672
        -- ------------------------------------------------------------
1673
          sreg.ext_s <= sreg.ext_s or or_all_f(sreg.mant(sreg.mant'left-2 downto 0)); -- sticky bit
1674
          if (or_all_f(ctrl.cnt(ctrl.cnt'left-1 downto 0)) = '0') then
1675
            if (ctrl.unsign = '0') then -- signed conversion
1676
              ctrl.over <= ctrl.over or sreg.int(sreg.int'left); -- update overrun flag again to check for numerical overflow into sign bit
1677
            end if;
1678
            ctrl.state <= S_ROUND;
1679
          else -- shift left
1680
            ctrl.cnt  <= std_ulogic_vector(unsigned(ctrl.cnt) - 1);
1681
            sreg.int  <= sreg.int(sreg.int'left-1 downto 0) & sreg.mant(sreg.mant'left);
1682
            sreg.mant <= sreg.mant(sreg.mant'left-1 downto 0) & '0';
1683
            ctrl.over <= ctrl.over or sreg.int(sreg.int'left);
1684
          end if;
1685
 
1686
        when S_ROUND => -- rounding cycle
1687
        -- ------------------------------------------------------------
1688
          ctrl.rounded    <= ctrl.rounded or round.en;
1689
          ctrl.over       <= ctrl.over or round.output(round.output'left); -- overflow after rounding
1690
          ctrl.result_tmp <= round.output(round.output'left-1 downto 0);
1691
          ctrl.state      <= S_FINALIZE;
1692
 
1693
        when S_FINALIZE => -- check for corner cases and finalize result
1694
        -- ------------------------------------------------------------
1695
          if (ctrl.unsign = '1') then -- unsigned conversion
1696
            if (ctrl.class(fp_class_snan_c) = '1') or (ctrl.class(fp_class_qnan_c) = '1') or (ctrl.class(fp_class_pos_inf_c) = '1') or -- NaN or +inf
1697
               ((ctrl.sign = '0') and (ctrl.over = '1')) then -- positive out-of-range
1698
              ctrl.result <= x"ffffffff";
1699
            elsif (ctrl.class(fp_class_neg_zero_c) = '1') or (ctrl.class(fp_class_pos_zero_c) = '1') or (ctrl.class(fp_class_neg_inf_c) = '1') or -- subnormal zero or -inf
1700
               (ctrl.sign = '1') or (ctrl.under = '1') then -- negative out-of-range or underflow
1701
              ctrl.result <= x"00000000";
1702
            else
1703
              ctrl.result <= ctrl.result_tmp;
1704
            end if;
1705
 
1706
          else -- signed conversion
1707
            if (ctrl.class(fp_class_snan_c) = '1') or (ctrl.class(fp_class_qnan_c) = '1') or (ctrl.class(fp_class_pos_inf_c) = '1') or  -- NaN or +inf
1708
                  ((ctrl.sign = '0') and (ctrl.over = '1')) then -- positive out-of-range
1709
              ctrl.result <= x"7fffffff";
1710
            elsif (ctrl.class(fp_class_neg_zero_c) = '1') or (ctrl.class(fp_class_pos_zero_c) = '1') or (ctrl.under = '1') then -- subnormal zero or underflow
1711
              ctrl.result <= x"00000000";
1712
            elsif (ctrl.class(fp_class_neg_inf_c) = '1') or ((ctrl.sign = '1') and (ctrl.over = '1')) then -- -inf or negative out-of-range
1713
              ctrl.result <= x"80000000";
1714
            else -- result is ok, make sign adaption
1715
              if (ctrl.sign = '1') then
1716
                ctrl.result <= std_ulogic_vector(0 - unsigned(ctrl.result_tmp)); -- abs()
1717
              else
1718
                ctrl.result <= ctrl.result_tmp;
1719
              end if;
1720
            end if;
1721
          end if;
1722
          done_o     <= '1';
1723
          ctrl.state <= S_IDLE;
1724
 
1725
        when others => -- undefined
1726
        -- ------------------------------------------------------------
1727
          ctrl.state <= S_IDLE;
1728
 
1729
      end case;
1730
    end if;
1731
  end process ctrl_engine;
1732
 
1733
  -- result --
1734
  result_o <= ctrl.result;
1735
 
1736
  -- exception flags --
1737
  flags_o(fp_exc_nv_c) <= ctrl.class(fp_class_snan_c) or ctrl.class(fp_class_qnan_c); -- invalid operation
1738
  flags_o(fp_exc_dz_c) <= '0'; -- divide by zero - not possible here
1739
  flags_o(fp_exc_of_c) <= ctrl.over or ctrl.class(fp_class_pos_inf_c) or ctrl.class(fp_class_neg_inf_c); -- overflow
1740
  flags_o(fp_exc_uf_c) <= ctrl.under; -- underflow
1741
  flags_o(fp_exc_nx_c) <= ctrl.rounded; -- inexact if result was rounded
1742
 
1743
 
1744
  -- Rounding -------------------------------------------------------------------------------
1745
  -- -------------------------------------------------------------------------------------------
1746
  rounding_unit_ctrl: process(rmode_i, sreg)
1747
  begin
1748
    -- defaults --
1749
    round.en  <= '0';
1750
    round.sub <= '0';
1751
    -- rounding mode --
1752
    case rmode_i(2 downto 0) is
1753
      when "000" => -- round to nearest, ties to even
1754
        if (sreg.ext_g = '0') then
1755
          round.en <= '0'; -- round down (do nothing)
1756
        else
1757
          if (sreg.ext_r = '0') and (sreg.ext_s = '0') then -- tie!
1758
            round.en <= sreg.int(0); -- round up if LSB of integer is set
1759
          else
1760
            round.en <= '1'; -- round up
1761
          end if;
1762
        end if;
1763
        round.sub <= '0'; -- increment
1764
      when "001" => -- round towards zero
1765
        round.en <= '0'; -- no rounding -> just truncate
1766
      when "010" => -- round down (towards -infinity)
1767
        round.en  <= sreg.ext_g or sreg.ext_r or sreg.ext_s;
1768
        round.sub <= '1'; -- decrement
1769
      when "011" => -- round up (towards +infinity)
1770
        round.en  <= sreg.ext_g or sreg.ext_r or sreg.ext_s;
1771
        round.sub <= '0'; -- increment
1772
      when "100" => -- round to nearest, ties to max magnitude
1773
        round.en <= '0'; -- FIXME / TODO
1774
      when others => -- undefined
1775
        round.en <= '0';
1776
    end case;
1777
  end process rounding_unit_ctrl;
1778
 
1779
  -- rounding: guard and round bits --
1780
  sreg.ext_g <= sreg.mant(sreg.mant'left);
1781
  sreg.ext_r <= sreg.mant(sreg.mant'left-1);
1782
 
1783
 
1784
  -- incrementer/decrementer --
1785
  rounding_unit_add: process(round, sreg)
1786
    variable tmp_v : std_ulogic_vector(32 downto 0); -- including overflow
1787
  begin
1788
    tmp_v := '0' & sreg.int;
1789
    if (round.en = '1') then
1790
      if (round.sub = '0') then -- increment
1791
        round.output <= std_ulogic_vector(unsigned(tmp_v) + 1);
1792
      else -- decrement
1793
        round.output <= std_ulogic_vector(unsigned(tmp_v) - 1);
1794
      end if;
1795
    else -- do nothing
1796
      round.output <= tmp_v;
1797
    end if;
1798
  end process rounding_unit_add;
1799
 
1800
 
1801
end neorv32_cpu_cp_fpu_f2i_rtl;

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.