OpenCores
URL https://opencores.org/ocsvn/qfp32/qfp32/trunk

Subversion Repositories qfp32

[/] [qfp32/] [trunk/] [Units/] [divider.vhd] - Blame information for rev 2

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 2 mgraep
library IEEE;
2
use IEEE.std_logic_1164.all;
3
use IEEE.numeric_std.all;
4
 
5
package qfp32_divider_p is
6
 
7
  function zero_blocks (
8
    data : unsigned;
9
    block_size : integer)
10
    return std_ulogic_vector;
11
 
12
end package qfp32_divider_p;
13
 
14
package body qfp32_divider_p is
15
 
16
  function zero_blocks (
17
    data : unsigned;
18
    block_size : integer)
19
    return std_ulogic_vector is
20
 
21
    constant max_blocks : integer := data'length/block_size;
22
    variable data_zero : std_ulogic_vector(max_blocks-1 downto 0);
23
    variable data_downto : unsigned(data'length-1 downto 0);
24
 
25
  begin
26
 
27
    -- workaround for slice problems
28
    -- if parameter is unsigned with std_ulogic_vector concat there are
29
    -- problems => use to_unsigned instead of std_ulogic_vector
30
    data_downto := data;
31
 
32
    data_zero := (others => '0');
33
    for i in 0 to max_blocks-1 loop
34
      if data_downto(data'length-1 downto data'length-(i+1)*block_size) = to_unsigned(0,(i+1)*block_size) then
35
        data_zero(i) := '1';
36
      end if;
37
    end loop;  -- i
38
 
39
    return data_zero;
40
  end zero_blocks;
41
 
42
end package body qfp32_divider_p;
43
 
44
 
45
library IEEE;
46
use IEEE.std_logic_1164.all;
47
use IEEE.numeric_std.all;
48
 
49
library work;
50
use work.qfp_p.all;
51
use work.qfp32_divider_p.all;
52
 
53
entity qfp32_divider is
54
 
55
  port (
56
    clk_i     : in  std_ulogic;
57
    reset_n_i : in  std_ulogic;
58
 
59
    start_i : in  std_ulogic;
60
    ready_o : out std_ulogic;
61
 
62
    regA_i : in  qfp32_t;
63
    regB_i : in  qfp32_t;
64
 
65
    complete_o : out std_ulogic;
66
    result_o   : out qfp32_raw_t);
67
 
68
end qfp32_divider;
69
 
70
architecture Rtl of qfp32_divider is
71
 
72
  -- r=(1/d)*2^(29)
73
  -- shifting rem left each time (in loop), the result is effectivly multiplied by 2^(29)
74
  -- QFPx0: d = v*2^24
75
  -- QFPx8: d = v*2^16
76
  -- QFPx16: d = v*2^8
77
  -- QFPx24: d = v*2^0
78
 
79
  signal start_1d : std_ulogic;
80
 
81
  signal p1_divisor_mant : unsigned(28 downto 0);
82
  signal p1_dividend_mant : unsigned(28 downto 0);
83
  signal p1_divisor_zero : std_ulogic_vector(3 downto 0);
84
  signal p1_allowed_dividend_shift : unsigned(1 downto 0);
85
  -- if the msb of divisor is set, the possible additional shift cannot happen because
86
  -- the condition 'dividend_top_bits >= 2*divisor_top_bits can never be
87
  -- fullfilled (both vectors are 29 bits) therefore if an additional shift happens
88
  -- at most the 28th bit of divisor is set and shifted by 8 eg 36bits is enough
89
  signal p1_divisor : unsigned(35 downto 0); -- 28+8 buffer for shifting
90
  signal p1_dividend : unsigned(32 downto 0); -- 29+4
91
  signal p1_exp : unsigned(2 downto 0);
92
  signal p1_delta_exp : unsigned(2 downto 0);
93
  signal p1_adjust_divisor : unsigned(1 downto 0);
94
  signal p1_adjust_divisor_final : unsigned(2 downto 0);
95
  signal p1_adjust_dividend : unsigned(1 downto 0);
96
  signal p1_top_bits : unsigned(7 downto 0);
97
  signal p1_sign : std_ulogic;
98
  signal p1_exp_ov : std_ulogic; -- if p1_exp_sum > 7 => result will be maximum 
99
  signal p1_exp_sum : unsigned(3 downto 0);
100
  signal p1_rem : unsigned(41 downto 0); -- +1 bit for shift buffer, +5 for division correction, +2 to make size after division correction same as divisor
101
  signal p1_div_by_zero : std_ulogic;
102
 
103
  signal p2_busy : std_ulogic;
104
  signal p2_divisor : unsigned(35 downto 0);
105
  signal p2_exp : unsigned(2 downto 0);
106
  signal p2_exp_ov : std_ulogic;
107
  signal p2_exp_adjusted : unsigned(2 downto 0);
108
  signal p2_sign : std_ulogic;
109
  signal p2_rem : unsigned(41 downto 0);
110
  signal p2_rem_shft : unsigned(41 downto 0);
111
  signal p2_rem_next : unsigned(41 downto 0);
112
 
113
  signal p2_sub : unsigned(36 downto 0);
114
  signal p2_quo : unsigned(28 downto 0); -- extend for rounding bit calculation!!
115
  signal p2_quo_adjusted : unsigned(36 downto 0);
116
  signal p2_quo_shft : unsigned(28 downto 0);
117
  signal p2_quo_next : unsigned(28 downto 0);
118
  signal p2_cnt : unsigned(4 downto 0);
119
  signal p2_complete : std_ulogic;
120
  signal p2_complete_1d : std_ulogic;
121
 
122
begin  -- Rtl
123
 
124
  process (clk_i, reset_n_i)
125
  begin  -- process
126
    if reset_n_i = '0' then             -- asynchronous reset (active low)
127
      start_1d <= '0';
128
      p2_busy <= '0';
129
      p2_rem <= to_unsigned(0,42);
130
      p2_exp <= to_unsigned(0,3);
131
      p2_exp_ov <= '0';
132
      p2_sign <= '0';
133
      p2_divisor <= to_unsigned(0,36);
134
      p2_quo <= to_unsigned(0,29);
135
      p2_cnt <= to_unsigned(0,5);
136
      p2_complete_1d <= '0';
137
    elsif clk_i'event and clk_i = '1' then  -- rising clock edge
138
 
139
      start_1d <= '0';
140
      if start_i = '1' and p2_busy = '0' then
141
        start_1d <= '1';
142
      end if;
143
 
144
      p2_complete_1d <= '0';
145
      if start_1d = '1' then
146
        p2_rem <= p1_rem;
147
        p2_exp <= p1_exp;
148
        p2_exp_ov <= p1_exp_ov;
149
        p2_sign <= p1_sign;
150
        p2_divisor <= p1_divisor;
151
        p2_quo <= to_unsigned(0,29);
152
        p2_cnt <= to_unsigned(28,5);
153
        p2_busy <= '1';
154
      elsif p2_busy = '1' then
155
        p2_rem <= p2_rem_next;
156
        p2_quo <= p2_quo_next;
157
        p2_cnt <= p2_cnt-1;
158
        if p2_complete = '1' then
159
          p2_complete_1d <= '1';
160
          p2_busy <= '0';
161
          -- reset count
162
          p2_cnt <= to_unsigned(28,5);
163
        end if;
164
      end if;
165
    end if;
166
  end process;
167
 
168
  process (p1_adjust_dividend, p1_adjust_divisor, p1_adjust_divisor_final,
169
           p1_allowed_dividend_shift, p1_delta_exp, p1_div_by_zero,
170
           p1_dividend, p1_dividend(32 downto 25), p1_dividend_mant,
171
           p1_divisor_mant, p1_divisor_mant(12 downto 5),
172
           p1_divisor_mant(20 downto 13), p1_divisor_mant(28 downto 21),
173
           p1_divisor_mant(4 downto 0), p1_divisor_zero(0),
174
           p1_divisor_zero(1 downto 0), p1_divisor_zero(1),
175
           p1_divisor_zero(2 downto 1), p1_divisor_zero(2),
176
           p1_divisor_zero(3 downto 2), p1_divisor_zero(3), p1_exp_sum,
177
           p1_exp_sum(2 downto 0), p1_top_bits, p2_divisor(35 downto 0),
178
           p2_exp, p2_quo(27 downto 0), p2_quo(28 downto 0), p2_quo_shft,
179
           p2_rem(40 downto 0), p2_rem_shft, p2_rem_shft(41 downto 5),
180
           p2_sub(35 downto 0), p2_sub(36), regA_i.fmt.exp, regA_i.fmt.sign,
181
           regA_i.mant, regB_i.fmt.exp, regB_i.fmt.sign, regB_i.mant)
182
  begin  -- process
183
 
184
    -- stage 1
185
 
186
    p1_dividend_mant <= regA_i.mant;
187
    p1_divisor_mant <= regB_i.mant;
188
 
189
    p1_divisor_zero <= zero_blocks(p1_divisor_mant & to_unsigned(0,3),8);
190
   -- p1_dividend_zero <= zero_blocks(p1_dividend_mant,8);
191
 
192
    p1_delta_exp <= to_unsigned(3,3)+('0' & regA_i.fmt.exp)-('0' & regB_i.fmt.exp);
193
 
194
    -- determine maximum allowed left shift of dividend
195
    p1_allowed_dividend_shift <= to_unsigned(0,2);
196
 
197
    if p1_divisor_zero(1 downto 0) = "01" or p1_delta_exp = to_unsigned(4,3) then
198
      p1_allowed_dividend_shift <= to_unsigned(1,2);
199
    elsif p1_divisor_zero(2 downto 1) = "01" or p1_delta_exp = to_unsigned(5,3) then
200
      p1_allowed_dividend_shift <= to_unsigned(2,2);
201
    elsif p1_divisor_zero(3 downto 2) = "01" or p1_delta_exp = to_unsigned(6,3) then
202
      p1_allowed_dividend_shift <= to_unsigned(3,2);
203
    end if;
204
 
205
    -- adjust dividend
206
    p1_adjust_dividend <= to_unsigned(0,2);
207
 
208
    if p1_dividend_mant < to_unsigned(2**25,29) and p1_allowed_dividend_shift > to_unsigned(0,2) then
209
      if p1_dividend_mant >= to_unsigned(2**17,29) or p1_allowed_dividend_shift = to_unsigned(1,3) then
210
        p1_adjust_dividend <= to_unsigned(1,2);
211
      elsif p1_dividend_mant >= to_unsigned(2**9,29) or p1_allowed_dividend_shift = to_unsigned(2,3) then
212
        p1_adjust_dividend <= to_unsigned(2,2);
213
      elsif p1_dividend_mant >= to_unsigned(2**1,29) or p1_allowed_dividend_shift = to_unsigned(3,2) then
214
        p1_adjust_dividend <= to_unsigned(3,2);
215
      end if;
216
    end if;
217
 
218
    p1_dividend <= fast_shift(to_unsigned(0,4) & p1_dividend_mant,to_integer(p1_adjust_dividend)*8,fast_shift_left); -- extend with 4bits 
219
 
220
    -- adjust divisor so that divisor >= dividend (when possible)
221
    p1_div_by_zero <= '0';
222
    p1_adjust_divisor <= to_unsigned(0,2);
223
    p1_top_bits <= p1_divisor_mant(28 downto 21);
224
 
225
    if p1_divisor_zero(0) = '1' then
226
      if p1_divisor_zero(1) = '0' then
227
        p1_top_bits <= p1_divisor_mant(20 downto 13);
228
        p1_adjust_divisor <= to_unsigned(1,2);
229
      elsif p1_divisor_zero(2) = '0' then
230
        p1_top_bits <= p1_divisor_mant(12 downto 5);
231
        p1_adjust_divisor <= to_unsigned(2,2);
232
      elsif p1_divisor_zero(3) = '0' then
233
        p1_top_bits <= p1_divisor_mant(4 downto 0) & "000";
234
        p1_adjust_divisor <= to_unsigned(3,2);
235
      else
236
        p1_div_by_zero <= '1';
237
      end if;
238
    end if;
239
 
240
    -- because dividend will be shifted right by 5 and left by 1 (= shifted right by 4) before division, only the
241
    -- top 4 bits are used for extra shift determination; p1_top_bits will be
242
    -- shifted left by 1 cause only most significant bit position must be same; some example
243
    -- dividend: XXXXXAAA
244
    -- divisor:  BBBBBBBB
245
    -- msb position counts eg
246
    -- XXXXX111
247
    -- 00000100
248
    -- is a valid combination therefore the <= operator is not enough
249
 
250
    p1_adjust_divisor_final <= '0' & p1_adjust_divisor;
251
    if ('0' & p1_dividend(32 downto 25)) >= (p1_top_bits & '0') then
252
      p1_adjust_divisor_final <= ('0' & p1_adjust_divisor)+1;
253
    end if;
254
 
255
    p1_divisor <= fast_shift(to_unsigned(0,7) & p1_divisor_mant,to_integer(p1_adjust_divisor_final)*8,fast_shift_left); -- 8bit overhead for shifting left
256
 
257
    -- build resulting fmt
258
    p1_sign <= regA_i.fmt.sign xor regB_i.fmt.sign;
259
 
260
    p1_exp_sum <= ('0' & p1_delta_exp)-('0' & p1_adjust_dividend)+('0' & p1_adjust_divisor_final);
261
 
262
    p1_exp_ov <= '0';
263
    p1_exp <= to_unsigned(7,3);
264
    if p1_div_by_zero = '1' or p1_exp_sum >= to_unsigned(8,4) then
265
      p1_exp_ov <= '1';
266
    else
267
      p1_exp <= p1_exp_sum(2 downto 0);
268
    end if;
269
 
270
    p1_rem <= "000000000" & p1_dividend;
271
 
272
    -- stage 2
273
 
274
    -- shift
275
    p2_rem_shft <= p2_rem(40 downto 0) & '0';
276
    p2_quo_shft <= p2_quo(27 downto 0) & '0';
277
 
278
    -- situation when rem and divisor have same msb position but rem is still greater
279
    -- therefore the 41th of p2_rem_shft is mostly zero but in the case above
280
    -- it will '1' and p2_divisor is less (always has a '0' at this position, see below)
281
    p2_sub <= p2_rem_shft(41 downto 5)-('0' & p2_divisor(35 downto 0));
282
 
283
    p2_rem_next <= p2_rem_shft;
284
    p2_quo_next <= p2_quo_shft;
285
 
286
    -- check for sub overflow eg. p2_rem_shft >= p2_divisor
287
    if p2_sub(36) = '0' then -- no overflow: therefore do sub
288
      p2_rem_next(41 downto 5) <= '0' & p2_sub(35 downto 0);
289
      p2_quo_next(0) <= '1';
290
    end if;
291
 
292
    -- if exp > 3 normalize cannot correct it full therefore pre shift left (but loosing precision)
293
    p2_exp_adjusted <= p2_exp;
294
    p2_quo_adjusted <= "00000000" & p2_quo(28 downto 0);
295
 
296
    if p2_exp >= to_unsigned(4,3) then
297
      p2_exp_adjusted <= p2_exp-1;
298
      p2_quo_adjusted <= p2_quo(28 downto 0) & "00000000";
299
    end if;
300
 
301
  end process;
302
 
303
  p2_complete <= '1' when p2_cnt = to_unsigned(0,5) else '0';
304
 
305
  ready_o <= not p2_busy and not start_1d;
306
  result_o <= ((15 downto 0 => '0') & p2_quo_adjusted,to_unsigned(0,4) & p2_exp_ov,p2_exp_adjusted,p2_sign);
307
  complete_o <= p2_complete_1d;
308
 
309
end Rtl;
310
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.