URL https://opencores.org/ocsvn/qfp32/qfp32/trunk

Subversion Repositories qfp32

[/] [qfp32/] [trunk/] [Units/] [divider.vhd] - Blame information for rev 3

Details | Compare with Previous | View Log


-- Copyright (c) 2013 Malte Graeper (mgraep@t-online.de) All rights reserved.
 
library IEEE;
use IEEE.std_logic_1164.all;
use IEEE.numeric_std.all;
 
package qfp32_divider_p is
 
  function zero_blocks (
    data : unsigned;
    block_size : integer)
    return std_ulogic_vector;
 
end package qfp32_divider_p;
 
package body qfp32_divider_p is
 
  function zero_blocks (
    data : unsigned;
    block_size : integer)
    return std_ulogic_vector is
 
    constant max_blocks : integer := data'length/block_size;
    variable data_zero : std_ulogic_vector(max_blocks-1 downto 0);
    variable data_downto : unsigned(data'length-1 downto 0);
 
  begin
 
    -- workaround for slice problems
    -- if parameter is unsigned with std_ulogic_vector concat there are
    -- problems => use to_unsigned instead of std_ulogic_vector
    data_downto := data;
 
    data_zero := (others => '0');
    for i in 0 to max_blocks-1 loop
      if data_downto(data'length-1 downto data'length-(i+1)*block_size) = to_unsigned(0,(i+1)*block_size) then
        data_zero(i) := '1';
      end if;
    end loop;  -- i
 
    return data_zero;
  end zero_blocks;
 
end package body qfp32_divider_p;
 
 
library IEEE;
use IEEE.std_logic_1164.all;
use IEEE.numeric_std.all;
 
library work;
use work.qfp_p.all;
use work.qfp32_divider_p.all;
 
entity qfp32_divider is
 
  port (
    clk_i     : in  std_ulogic;
    reset_n_i : in  std_ulogic;
 
    start_i : in  std_ulogic;
    ready_o : out std_ulogic;
 
    regA_i : in  qfp32_t;
    regB_i : in  qfp32_t;
 
    complete_o : out std_ulogic;
    result_o   : out qfp32_raw_t);
 
end qfp32_divider;
 
architecture Rtl of qfp32_divider is
 
  -- r=(1/d)*2^(29)
  -- shifting rem left each time (in loop), the result is effectivly multiplied by 2^(29)
  -- QFPx0: d = v*2^24
  -- QFPx8: d = v*2^16
  -- QFPx16: d = v*2^8
  -- QFPx24: d = v*2^0
 
  signal start_1d : std_ulogic;
 
  signal p1_divisor_mant : unsigned(28 downto 0);
  signal p1_dividend_mant : unsigned(28 downto 0);
  signal p1_divisor_zero : std_ulogic_vector(3 downto 0);
  signal p1_allowed_dividend_shift : unsigned(1 downto 0);
  -- if the msb of divisor is set, the possible additional shift cannot happen because
  -- the condition 'dividend_top_bits >= 2*divisor_top_bits can never be
  -- fullfilled (both vectors are 29 bits) therefore if an additional shift happens
  -- at most the 28th bit of divisor is set and shifted by 8 eg 36bits is enough
  signal p1_divisor : unsigned(35 downto 0); -- 28+8 buffer for shifting
  signal p1_dividend : unsigned(32 downto 0); -- 29+4
  signal p1_exp : unsigned(2 downto 0);
  signal p1_delta_exp : unsigned(2 downto 0);
  signal p1_adjust_divisor : unsigned(1 downto 0);
  signal p1_adjust_divisor_final : unsigned(2 downto 0);
  signal p1_adjust_dividend : unsigned(1 downto 0);
  signal p1_top_bits : unsigned(7 downto 0);
  signal p1_sign : std_ulogic;
  signal p1_exp_ov : std_ulogic; -- if p1_exp_sum > 7 => result will be maximum 
  signal p1_exp_sum : unsigned(3 downto 0);
  signal p1_rem : unsigned(41 downto 0); -- +1 bit for shift buffer, +5 for division correction, +2 to make size after division correction same as divisor
  signal p1_div_by_zero : std_ulogic;
 
  signal p2_busy : std_ulogic;
  signal p2_divisor : unsigned(35 downto 0);
  signal p2_exp : unsigned(2 downto 0);
  signal p2_exp_ov : std_ulogic;
  signal p2_exp_adjusted : unsigned(2 downto 0);
  signal p2_sign : std_ulogic;
  signal p2_rem : unsigned(41 downto 0);
  signal p2_rem_shft : unsigned(41 downto 0);
  signal p2_rem_next : unsigned(41 downto 0);
 
  signal p2_sub : unsigned(36 downto 0);
  signal p2_quo : unsigned(28 downto 0); -- extend for rounding bit calculation!!
  signal p2_quo_adjusted : unsigned(36 downto 0);
  signal p2_quo_shft : unsigned(28 downto 0);
  signal p2_quo_next : unsigned(28 downto 0);
  signal p2_cnt : unsigned(4 downto 0);
  signal p2_complete : std_ulogic;
  signal p2_complete_1d : std_ulogic;
 
begin  -- Rtl
 
  process (clk_i, reset_n_i)
  begin  -- process
    if reset_n_i = '0' then             -- asynchronous reset (active low)
      start_1d <= '0';
      p2_busy <= '0';
      p2_rem <= to_unsigned(0,42);
      p2_exp <= to_unsigned(0,3);
      p2_exp_ov <= '0';
      p2_sign <= '0';
      p2_divisor <= to_unsigned(0,36);
      p2_quo <= to_unsigned(0,29);
      p2_cnt <= to_unsigned(0,5);
      p2_complete_1d <= '0';
    elsif clk_i'event and clk_i = '1' then  -- rising clock edge
 
      start_1d <= '0';
      if start_i = '1' and p2_busy = '0' then
        start_1d <= '1';
      end if;
 
      p2_complete_1d <= '0';
      if start_1d = '1' then
        p2_rem <= p1_rem;
        p2_exp <= p1_exp;
        p2_exp_ov <= p1_exp_ov;
        p2_sign <= p1_sign;
        p2_divisor <= p1_divisor;
        p2_quo <= to_unsigned(0,29);
        p2_cnt <= to_unsigned(28,5);
        p2_busy <= '1';
      elsif p2_busy = '1' then
        p2_rem <= p2_rem_next;
        p2_quo <= p2_quo_next;
        p2_cnt <= p2_cnt-1;
        if p2_complete = '1' then
          p2_complete_1d <= '1';
          p2_busy <= '0';
          -- reset count
          p2_cnt <= to_unsigned(28,5);
        end if;
      end if;
    end if;
  end process;
 
  process (p1_adjust_dividend, p1_adjust_divisor, p1_adjust_divisor_final,
           p1_allowed_dividend_shift, p1_delta_exp, p1_div_by_zero,
           p1_dividend, p1_dividend(32 downto 25), p1_dividend_mant,
           p1_divisor_mant, p1_divisor_mant(12 downto 5),
           p1_divisor_mant(20 downto 13), p1_divisor_mant(28 downto 21),
           p1_divisor_mant(4 downto 0), p1_divisor_zero(0),
           p1_divisor_zero(1 downto 0), p1_divisor_zero(1),
           p1_divisor_zero(2 downto 1), p1_divisor_zero(2),
           p1_divisor_zero(3 downto 2), p1_divisor_zero(3), p1_exp_sum,
           p1_exp_sum(2 downto 0), p1_top_bits, p2_divisor(35 downto 0),
           p2_exp, p2_quo(27 downto 0), p2_quo(28 downto 0), p2_quo_shft,
           p2_rem(40 downto 0), p2_rem_shft, p2_rem_shft(41 downto 5),
           p2_sub(35 downto 0), p2_sub(36), regA_i.fmt.exp, regA_i.fmt.sign,
           regA_i.mant, regB_i.fmt.exp, regB_i.fmt.sign, regB_i.mant)
  begin  -- process
 
    -- stage 1
 
    p1_dividend_mant <= regA_i.mant;
    p1_divisor_mant <= regB_i.mant;
 
    p1_divisor_zero <= zero_blocks(p1_divisor_mant & to_unsigned(0,3),8);
   -- p1_dividend_zero <= zero_blocks(p1_dividend_mant,8);
 
    p1_delta_exp <= to_unsigned(3,3)+('0' & regA_i.fmt.exp)-('0' & regB_i.fmt.exp);
 
    -- determine maximum allowed left shift of dividend
    p1_allowed_dividend_shift <= to_unsigned(0,2);
 
    if p1_divisor_zero(1 downto 0) = "01" or p1_delta_exp = to_unsigned(4,3) then
      p1_allowed_dividend_shift <= to_unsigned(1,2);
    elsif p1_divisor_zero(2 downto 1) = "01" or p1_delta_exp = to_unsigned(5,3) then
      p1_allowed_dividend_shift <= to_unsigned(2,2);
    elsif p1_divisor_zero(3 downto 2) = "01" or p1_delta_exp = to_unsigned(6,3) then
      p1_allowed_dividend_shift <= to_unsigned(3,2);
    end if;
 
    -- adjust dividend
    p1_adjust_dividend <= to_unsigned(0,2);
 
    if p1_dividend_mant < to_unsigned(2**25,29) and p1_allowed_dividend_shift > to_unsigned(0,2) then
      if p1_dividend_mant >= to_unsigned(2**17,29) or p1_allowed_dividend_shift = to_unsigned(1,3) then
        p1_adjust_dividend <= to_unsigned(1,2);
      elsif p1_dividend_mant >= to_unsigned(2**9,29) or p1_allowed_dividend_shift = to_unsigned(2,3) then
        p1_adjust_dividend <= to_unsigned(2,2);
      elsif p1_dividend_mant >= to_unsigned(2**1,29) or p1_allowed_dividend_shift = to_unsigned(3,2) then
        p1_adjust_dividend <= to_unsigned(3,2);
      end if;
    end if;
 
    p1_dividend <= fast_shift(to_unsigned(0,4) & p1_dividend_mant,to_integer(p1_adjust_dividend)*8,fast_shift_left); -- extend with 4bits 
 
    -- adjust divisor so that divisor >= dividend (when possible)
    p1_div_by_zero <= '0';
    p1_adjust_divisor <= to_unsigned(0,2);
    p1_top_bits <= p1_divisor_mant(28 downto 21);
 
    if p1_divisor_zero(0) = '1' then
      if p1_divisor_zero(1) = '0' then
        p1_top_bits <= p1_divisor_mant(20 downto 13);
        p1_adjust_divisor <= to_unsigned(1,2);
      elsif p1_divisor_zero(2) = '0' then
        p1_top_bits <= p1_divisor_mant(12 downto 5);
        p1_adjust_divisor <= to_unsigned(2,2);
      elsif p1_divisor_zero(3) = '0' then
        p1_top_bits <= p1_divisor_mant(4 downto 0) & "000";
        p1_adjust_divisor <= to_unsigned(3,2);
      else
        p1_div_by_zero <= '1';
      end if;
    end if;
 
    -- because dividend will be shifted right by 5 and left by 1 (= shifted right by 4) before division, only the
    -- top 4 bits are used for extra shift determination; p1_top_bits will be
    -- shifted left by 1 cause only most significant bit position must be same; some example
    -- dividend: XXXXXAAA
    -- divisor:  BBBBBBBB
    -- msb position counts eg
    -- XXXXX111
    -- 00000100
    -- is a valid combination therefore the <= operator is not enough
 
    p1_adjust_divisor_final <= '0' & p1_adjust_divisor;
    if ('0' & p1_dividend(32 downto 25)) >= (p1_top_bits & '0') then
      p1_adjust_divisor_final <= ('0' & p1_adjust_divisor)+1;
    end if;
 
    p1_divisor <= fast_shift(to_unsigned(0,7) & p1_divisor_mant,to_integer(p1_adjust_divisor_final)*8,fast_shift_left); -- 8bit overhead for shifting left
 
    -- build resulting fmt
    p1_sign <= regA_i.fmt.sign xor regB_i.fmt.sign;
 
    p1_exp_sum <= ('0' & p1_delta_exp)-('0' & p1_adjust_dividend)+('0' & p1_adjust_divisor_final);
 
    p1_exp_ov <= '0';
    p1_exp <= to_unsigned(7,3);
    if p1_div_by_zero = '1' or p1_exp_sum >= to_unsigned(8,4) then
      p1_exp_ov <= '1';
    else
      p1_exp <= p1_exp_sum(2 downto 0);
    end if;
 
    p1_rem <= "000000000" & p1_dividend;
 
    -- stage 2
 
    -- shift
    p2_rem_shft <= p2_rem(40 downto 0) & '0';
    p2_quo_shft <= p2_quo(27 downto 0) & '0';
 
    -- situation when rem and divisor have same msb position but rem is still greater
    -- therefore the 41th of p2_rem_shft is mostly zero but in the case above
    -- it will '1' and p2_divisor is less (always has a '0' at this position, see below)
    p2_sub <= p2_rem_shft(41 downto 5)-('0' & p2_divisor(35 downto 0));
 
    p2_rem_next <= p2_rem_shft;
    p2_quo_next <= p2_quo_shft;
 
    -- check for sub overflow eg. p2_rem_shft >= p2_divisor
    if p2_sub(36) = '0' then -- no overflow: therefore do sub
      p2_rem_next(41 downto 5) <= '0' & p2_sub(35 downto 0);
      p2_quo_next(0) <= '1';
    end if;
 
    -- if exp > 3 normalize cannot correct it full therefore pre shift left (but loosing precision)
    p2_exp_adjusted <= p2_exp;
    p2_quo_adjusted <= "00000000" & p2_quo(28 downto 0);
 
    if p2_exp >= to_unsigned(4,3) then
      p2_exp_adjusted <= p2_exp-1;
      p2_quo_adjusted <= p2_quo(28 downto 0) & "00000000";
    end if;
 
  end process;
 
  p2_complete <= '1' when p2_cnt = to_unsigned(0,5) else '0';
 
  ready_o <= not p2_busy and not start_1d;
  result_o <= ((15 downto 0 => '0') & p2_quo_adjusted,to_unsigned(0,4) & p2_exp_ov,p2_exp_adjusted,p2_sign);
  complete_o <= p2_complete_1d;
 
end Rtl;
 

Browse

Tools

Subversion Repositories qfp32

[/] [qfp32/] [trunk/] [Units/] [divider.vhd] - Blame information for rev 3

Line No.	Rev	Author	Line
1	3	mgraep	`-- Copyright (c) 2013 Malte Graeper (mgraep@t-online.de) All rights reserved.`
2
3	2	mgraep	`library IEEE;`
4			`use IEEE.std_logic_1164.all;`
5			`use IEEE.numeric_std.all;`
6
7			`package qfp32_divider_p is`
8
9			`function zero_blocks (`
10			`data : unsigned;`
11			`block_size : integer)`
12			`return std_ulogic_vector;`
13
14			`end package qfp32_divider_p;`
15
16			`package body qfp32_divider_p is`
17
18			`function zero_blocks (`
19			`data : unsigned;`
20			`block_size : integer)`
21			`return std_ulogic_vector is`
22
23			`constant max_blocks : integer := data'length/block_size;`
24			`variable data_zero : std_ulogic_vector(max_blocks-1 downto 0);`
25			`variable data_downto : unsigned(data'length-1 downto 0);`
26
27			`begin`
28
29			`-- workaround for slice problems`
30			`-- if parameter is unsigned with std_ulogic_vector concat there are`
31			`-- problems => use to_unsigned instead of std_ulogic_vector`
32			`data_downto := data;`
33
34			`data_zero := (others => '0');`
35			`for i in 0 to max_blocks-1 loop`
36			`if data_downto(data'length-1 downto data'length-(i+1)block_size) = to_unsigned(0,(i+1)block_size) then`
37			`data_zero(i) := '1';`
38			`end if;`
39			`end loop; -- i`
40
41			`return data_zero;`
42			`end zero_blocks;`
43
44			`end package body qfp32_divider_p;`
45
46
47			`library IEEE;`
48			`use IEEE.std_logic_1164.all;`
49			`use IEEE.numeric_std.all;`
50
51			`library work;`
52			`use work.qfp_p.all;`
53			`use work.qfp32_divider_p.all;`
54
55			`entity qfp32_divider is`
56
57			`port (`
58			`clk_i : in std_ulogic;`
59			`reset_n_i : in std_ulogic;`
60
61			`start_i : in std_ulogic;`
62			`ready_o : out std_ulogic;`
63
64			`regA_i : in qfp32_t;`
65			`regB_i : in qfp32_t;`
66
67			`complete_o : out std_ulogic;`
68			`result_o : out qfp32_raw_t);`
69
70			`end qfp32_divider;`
71
72			`architecture Rtl of qfp32_divider is`
73
74			`-- r=(1/d)*2^(29)`
75			`-- shifting rem left each time (in loop), the result is effectivly multiplied by 2^(29)`
76			`-- QFPx0: d = v*2^24`
77			`-- QFPx8: d = v*2^16`
78			`-- QFPx16: d = v*2^8`
79			`-- QFPx24: d = v*2^0`
80
81			`signal start_1d : std_ulogic;`
82
83			`signal p1_divisor_mant : unsigned(28 downto 0);`
84			`signal p1_dividend_mant : unsigned(28 downto 0);`
85			`signal p1_divisor_zero : std_ulogic_vector(3 downto 0);`
86			`signal p1_allowed_dividend_shift : unsigned(1 downto 0);`
87			`-- if the msb of divisor is set, the possible additional shift cannot happen because`
88			`-- the condition 'dividend_top_bits >= 2*divisor_top_bits can never be`
89			`-- fullfilled (both vectors are 29 bits) therefore if an additional shift happens`
90			`-- at most the 28th bit of divisor is set and shifted by 8 eg 36bits is enough`
91			`signal p1_divisor : unsigned(35 downto 0); -- 28+8 buffer for shifting`
92			`signal p1_dividend : unsigned(32 downto 0); -- 29+4`
93			`signal p1_exp : unsigned(2 downto 0);`
94			`signal p1_delta_exp : unsigned(2 downto 0);`
95			`signal p1_adjust_divisor : unsigned(1 downto 0);`
96			`signal p1_adjust_divisor_final : unsigned(2 downto 0);`
97			`signal p1_adjust_dividend : unsigned(1 downto 0);`
98			`signal p1_top_bits : unsigned(7 downto 0);`
99			`signal p1_sign : std_ulogic;`
100			`signal p1_exp_ov : std_ulogic; -- if p1_exp_sum > 7 => result will be maximum`
101			`signal p1_exp_sum : unsigned(3 downto 0);`
102			`signal p1_rem : unsigned(41 downto 0); -- +1 bit for shift buffer, +5 for division correction, +2 to make size after division correction same as divisor`
103			`signal p1_div_by_zero : std_ulogic;`
104
105			`signal p2_busy : std_ulogic;`
106			`signal p2_divisor : unsigned(35 downto 0);`
107			`signal p2_exp : unsigned(2 downto 0);`
108			`signal p2_exp_ov : std_ulogic;`
109			`signal p2_exp_adjusted : unsigned(2 downto 0);`
110			`signal p2_sign : std_ulogic;`
111			`signal p2_rem : unsigned(41 downto 0);`
112			`signal p2_rem_shft : unsigned(41 downto 0);`
113			`signal p2_rem_next : unsigned(41 downto 0);`
114
115			`signal p2_sub : unsigned(36 downto 0);`
116			`signal p2_quo : unsigned(28 downto 0); -- extend for rounding bit calculation!!`
117			`signal p2_quo_adjusted : unsigned(36 downto 0);`
118			`signal p2_quo_shft : unsigned(28 downto 0);`
119			`signal p2_quo_next : unsigned(28 downto 0);`
120			`signal p2_cnt : unsigned(4 downto 0);`
121			`signal p2_complete : std_ulogic;`
122			`signal p2_complete_1d : std_ulogic;`
123
124			`begin -- Rtl`
125
126			`process (clk_i, reset_n_i)`
127			`begin -- process`
128			`if reset_n_i = '0' then -- asynchronous reset (active low)`
129			`start_1d <= '0';`
130			`p2_busy <= '0';`
131			`p2_rem <= to_unsigned(0,42);`
132			`p2_exp <= to_unsigned(0,3);`
133			`p2_exp_ov <= '0';`
134			`p2_sign <= '0';`
135			`p2_divisor <= to_unsigned(0,36);`
136			`p2_quo <= to_unsigned(0,29);`
137			`p2_cnt <= to_unsigned(0,5);`
138			`p2_complete_1d <= '0';`
139			`elsif clk_i'event and clk_i = '1' then -- rising clock edge`
140
141			`start_1d <= '0';`
142			`if start_i = '1' and p2_busy = '0' then`
143			`start_1d <= '1';`
144			`end if;`
145
146			`p2_complete_1d <= '0';`
147			`if start_1d = '1' then`
148			`p2_rem <= p1_rem;`
149			`p2_exp <= p1_exp;`
150			`p2_exp_ov <= p1_exp_ov;`
151			`p2_sign <= p1_sign;`
152			`p2_divisor <= p1_divisor;`
153			`p2_quo <= to_unsigned(0,29);`
154			`p2_cnt <= to_unsigned(28,5);`
155			`p2_busy <= '1';`
156			`elsif p2_busy = '1' then`
157			`p2_rem <= p2_rem_next;`
158			`p2_quo <= p2_quo_next;`
159			`p2_cnt <= p2_cnt-1;`
160			`if p2_complete = '1' then`
161			`p2_complete_1d <= '1';`
162			`p2_busy <= '0';`
163			`-- reset count`
164			`p2_cnt <= to_unsigned(28,5);`
165			`end if;`
166			`end if;`
167			`end if;`
168			`end process;`
169
170			`process (p1_adjust_dividend, p1_adjust_divisor, p1_adjust_divisor_final,`
171			`p1_allowed_dividend_shift, p1_delta_exp, p1_div_by_zero,`
172			`p1_dividend, p1_dividend(32 downto 25), p1_dividend_mant,`
173			`p1_divisor_mant, p1_divisor_mant(12 downto 5),`
174			`p1_divisor_mant(20 downto 13), p1_divisor_mant(28 downto 21),`
175			`p1_divisor_mant(4 downto 0), p1_divisor_zero(0),`
176			`p1_divisor_zero(1 downto 0), p1_divisor_zero(1),`
177			`p1_divisor_zero(2 downto 1), p1_divisor_zero(2),`
178			`p1_divisor_zero(3 downto 2), p1_divisor_zero(3), p1_exp_sum,`
179			`p1_exp_sum(2 downto 0), p1_top_bits, p2_divisor(35 downto 0),`
180			`p2_exp, p2_quo(27 downto 0), p2_quo(28 downto 0), p2_quo_shft,`
181			`p2_rem(40 downto 0), p2_rem_shft, p2_rem_shft(41 downto 5),`
182			`p2_sub(35 downto 0), p2_sub(36), regA_i.fmt.exp, regA_i.fmt.sign,`
183			`regA_i.mant, regB_i.fmt.exp, regB_i.fmt.sign, regB_i.mant)`
184			`begin -- process`
185
186			`-- stage 1`
187
188			`p1_dividend_mant <= regA_i.mant;`
189			`p1_divisor_mant <= regB_i.mant;`
190
191			`p1_divisor_zero <= zero_blocks(p1_divisor_mant & to_unsigned(0,3),8);`
192			`-- p1_dividend_zero <= zero_blocks(p1_dividend_mant,8);`
193
194			`p1_delta_exp <= to_unsigned(3,3)+('0' & regA_i.fmt.exp)-('0' & regB_i.fmt.exp);`
195
196			`-- determine maximum allowed left shift of dividend`
197			`p1_allowed_dividend_shift <= to_unsigned(0,2);`
198
199			`if p1_divisor_zero(1 downto 0) = "01" or p1_delta_exp = to_unsigned(4,3) then`
200			`p1_allowed_dividend_shift <= to_unsigned(1,2);`
201			`elsif p1_divisor_zero(2 downto 1) = "01" or p1_delta_exp = to_unsigned(5,3) then`
202			`p1_allowed_dividend_shift <= to_unsigned(2,2);`
203			`elsif p1_divisor_zero(3 downto 2) = "01" or p1_delta_exp = to_unsigned(6,3) then`
204			`p1_allowed_dividend_shift <= to_unsigned(3,2);`
205			`end if;`
206
207			`-- adjust dividend`
208			`p1_adjust_dividend <= to_unsigned(0,2);`
209
210			`if p1_dividend_mant < to_unsigned(2**25,29) and p1_allowed_dividend_shift > to_unsigned(0,2) then`
211			`if p1_dividend_mant >= to_unsigned(2**17,29) or p1_allowed_dividend_shift = to_unsigned(1,3) then`
212			`p1_adjust_dividend <= to_unsigned(1,2);`
213			`elsif p1_dividend_mant >= to_unsigned(2**9,29) or p1_allowed_dividend_shift = to_unsigned(2,3) then`
214			`p1_adjust_dividend <= to_unsigned(2,2);`
215			`elsif p1_dividend_mant >= to_unsigned(2**1,29) or p1_allowed_dividend_shift = to_unsigned(3,2) then`
216			`p1_adjust_dividend <= to_unsigned(3,2);`
217			`end if;`
218			`end if;`
219
220			`p1_dividend <= fast_shift(to_unsigned(0,4) & p1_dividend_mant,to_integer(p1_adjust_dividend)*8,fast_shift_left); -- extend with 4bits`
221
222			`-- adjust divisor so that divisor >= dividend (when possible)`
223			`p1_div_by_zero <= '0';`
224			`p1_adjust_divisor <= to_unsigned(0,2);`
225			`p1_top_bits <= p1_divisor_mant(28 downto 21);`
226
227			`if p1_divisor_zero(0) = '1' then`
228			`if p1_divisor_zero(1) = '0' then`
229			`p1_top_bits <= p1_divisor_mant(20 downto 13);`
230			`p1_adjust_divisor <= to_unsigned(1,2);`
231			`elsif p1_divisor_zero(2) = '0' then`
232			`p1_top_bits <= p1_divisor_mant(12 downto 5);`
233			`p1_adjust_divisor <= to_unsigned(2,2);`
234			`elsif p1_divisor_zero(3) = '0' then`
235			`p1_top_bits <= p1_divisor_mant(4 downto 0) & "000";`
236			`p1_adjust_divisor <= to_unsigned(3,2);`
237			`else`
238			`p1_div_by_zero <= '1';`
239			`end if;`
240			`end if;`
241
242			`-- because dividend will be shifted right by 5 and left by 1 (= shifted right by 4) before division, only the`
243			`-- top 4 bits are used for extra shift determination; p1_top_bits will be`
244			`-- shifted left by 1 cause only most significant bit position must be same; some example`
245			`-- dividend: XXXXXAAA`
246			`-- divisor: BBBBBBBB`
247			`-- msb position counts eg`
248			`-- XXXXX111`
249			`-- 00000100`
250			`-- is a valid combination therefore the <= operator is not enough`
251
252			`p1_adjust_divisor_final <= '0' & p1_adjust_divisor;`
253			`if ('0' & p1_dividend(32 downto 25)) >= (p1_top_bits & '0') then`
254			`p1_adjust_divisor_final <= ('0' & p1_adjust_divisor)+1;`
255			`end if;`
256
257			`p1_divisor <= fast_shift(to_unsigned(0,7) & p1_divisor_mant,to_integer(p1_adjust_divisor_final)*8,fast_shift_left); -- 8bit overhead for shifting left`
258
259			`-- build resulting fmt`
260			`p1_sign <= regA_i.fmt.sign xor regB_i.fmt.sign;`
261
262			`p1_exp_sum <= ('0' & p1_delta_exp)-('0' & p1_adjust_dividend)+('0' & p1_adjust_divisor_final);`
263
264			`p1_exp_ov <= '0';`
265			`p1_exp <= to_unsigned(7,3);`
266			`if p1_div_by_zero = '1' or p1_exp_sum >= to_unsigned(8,4) then`
267			`p1_exp_ov <= '1';`
268			`else`
269			`p1_exp <= p1_exp_sum(2 downto 0);`
270			`end if;`
271
272			`p1_rem <= "000000000" & p1_dividend;`
273
274			`-- stage 2`
275
276			`-- shift`
277			`p2_rem_shft <= p2_rem(40 downto 0) & '0';`
278			`p2_quo_shft <= p2_quo(27 downto 0) & '0';`
279
280			`-- situation when rem and divisor have same msb position but rem is still greater`
281			`-- therefore the 41th of p2_rem_shft is mostly zero but in the case above`
282			`-- it will '1' and p2_divisor is less (always has a '0' at this position, see below)`
283			`p2_sub <= p2_rem_shft(41 downto 5)-('0' & p2_divisor(35 downto 0));`
284
285			`p2_rem_next <= p2_rem_shft;`
286			`p2_quo_next <= p2_quo_shft;`
287
288			`-- check for sub overflow eg. p2_rem_shft >= p2_divisor`
289			`if p2_sub(36) = '0' then -- no overflow: therefore do sub`
290			`p2_rem_next(41 downto 5) <= '0' & p2_sub(35 downto 0);`
291			`p2_quo_next(0) <= '1';`
292			`end if;`
293
294			`-- if exp > 3 normalize cannot correct it full therefore pre shift left (but loosing precision)`
295			`p2_exp_adjusted <= p2_exp;`
296			`p2_quo_adjusted <= "00000000" & p2_quo(28 downto 0);`
297
298			`if p2_exp >= to_unsigned(4,3) then`
299			`p2_exp_adjusted <= p2_exp-1;`
300			`p2_quo_adjusted <= p2_quo(28 downto 0) & "00000000";`
301			`end if;`
302
303			`end process;`
304
305			`p2_complete <= '1' when p2_cnt = to_unsigned(0,5) else '0';`
306
307			`ready_o <= not p2_busy and not start_1d;`
308			`result_o <= ((15 downto 0 => '0') & p2_quo_adjusted,to_unsigned(0,4) & p2_exp_ov,p2_exp_adjusted,p2_sign);`
309			`complete_o <= p2_complete_1d;`
310
311			`end Rtl;`
312