OpenCores
URL https://opencores.org/ocsvn/qfp32/qfp32/trunk

Subversion Repositories qfp32

[/] [qfp32/] [trunk/] [Units/] [divider.vhd] - Blame information for rev 3

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 3 mgraep
-- Copyright (c) 2013 Malte Graeper (mgraep@t-online.de) All rights reserved.
2
 
3 2 mgraep
library IEEE;
4
use IEEE.std_logic_1164.all;
5
use IEEE.numeric_std.all;
6
 
7
package qfp32_divider_p is
8
 
9
  function zero_blocks (
10
    data : unsigned;
11
    block_size : integer)
12
    return std_ulogic_vector;
13
 
14
end package qfp32_divider_p;
15
 
16
package body qfp32_divider_p is
17
 
18
  function zero_blocks (
19
    data : unsigned;
20
    block_size : integer)
21
    return std_ulogic_vector is
22
 
23
    constant max_blocks : integer := data'length/block_size;
24
    variable data_zero : std_ulogic_vector(max_blocks-1 downto 0);
25
    variable data_downto : unsigned(data'length-1 downto 0);
26
 
27
  begin
28
 
29
    -- workaround for slice problems
30
    -- if parameter is unsigned with std_ulogic_vector concat there are
31
    -- problems => use to_unsigned instead of std_ulogic_vector
32
    data_downto := data;
33
 
34
    data_zero := (others => '0');
35
    for i in 0 to max_blocks-1 loop
36
      if data_downto(data'length-1 downto data'length-(i+1)*block_size) = to_unsigned(0,(i+1)*block_size) then
37
        data_zero(i) := '1';
38
      end if;
39
    end loop;  -- i
40
 
41
    return data_zero;
42
  end zero_blocks;
43
 
44
end package body qfp32_divider_p;
45
 
46
 
47
library IEEE;
48
use IEEE.std_logic_1164.all;
49
use IEEE.numeric_std.all;
50
 
51
library work;
52
use work.qfp_p.all;
53
use work.qfp32_divider_p.all;
54
 
55
entity qfp32_divider is
56
 
57
  port (
58
    clk_i     : in  std_ulogic;
59
    reset_n_i : in  std_ulogic;
60
 
61
    start_i : in  std_ulogic;
62
    ready_o : out std_ulogic;
63
 
64
    regA_i : in  qfp32_t;
65
    regB_i : in  qfp32_t;
66
 
67
    complete_o : out std_ulogic;
68
    result_o   : out qfp32_raw_t);
69
 
70
end qfp32_divider;
71
 
72
architecture Rtl of qfp32_divider is
73
 
74
  -- r=(1/d)*2^(29)
75
  -- shifting rem left each time (in loop), the result is effectivly multiplied by 2^(29)
76
  -- QFPx0: d = v*2^24
77
  -- QFPx8: d = v*2^16
78
  -- QFPx16: d = v*2^8
79
  -- QFPx24: d = v*2^0
80
 
81
  signal start_1d : std_ulogic;
82
 
83
  signal p1_divisor_mant : unsigned(28 downto 0);
84
  signal p1_dividend_mant : unsigned(28 downto 0);
85
  signal p1_divisor_zero : std_ulogic_vector(3 downto 0);
86
  signal p1_allowed_dividend_shift : unsigned(1 downto 0);
87
  -- if the msb of divisor is set, the possible additional shift cannot happen because
88
  -- the condition 'dividend_top_bits >= 2*divisor_top_bits can never be
89
  -- fullfilled (both vectors are 29 bits) therefore if an additional shift happens
90
  -- at most the 28th bit of divisor is set and shifted by 8 eg 36bits is enough
91
  signal p1_divisor : unsigned(35 downto 0); -- 28+8 buffer for shifting
92
  signal p1_dividend : unsigned(32 downto 0); -- 29+4
93
  signal p1_exp : unsigned(2 downto 0);
94
  signal p1_delta_exp : unsigned(2 downto 0);
95
  signal p1_adjust_divisor : unsigned(1 downto 0);
96
  signal p1_adjust_divisor_final : unsigned(2 downto 0);
97
  signal p1_adjust_dividend : unsigned(1 downto 0);
98
  signal p1_top_bits : unsigned(7 downto 0);
99
  signal p1_sign : std_ulogic;
100
  signal p1_exp_ov : std_ulogic; -- if p1_exp_sum > 7 => result will be maximum 
101
  signal p1_exp_sum : unsigned(3 downto 0);
102
  signal p1_rem : unsigned(41 downto 0); -- +1 bit for shift buffer, +5 for division correction, +2 to make size after division correction same as divisor
103
  signal p1_div_by_zero : std_ulogic;
104
 
105
  signal p2_busy : std_ulogic;
106
  signal p2_divisor : unsigned(35 downto 0);
107
  signal p2_exp : unsigned(2 downto 0);
108
  signal p2_exp_ov : std_ulogic;
109
  signal p2_exp_adjusted : unsigned(2 downto 0);
110
  signal p2_sign : std_ulogic;
111
  signal p2_rem : unsigned(41 downto 0);
112
  signal p2_rem_shft : unsigned(41 downto 0);
113
  signal p2_rem_next : unsigned(41 downto 0);
114
 
115
  signal p2_sub : unsigned(36 downto 0);
116
  signal p2_quo : unsigned(28 downto 0); -- extend for rounding bit calculation!!
117
  signal p2_quo_adjusted : unsigned(36 downto 0);
118
  signal p2_quo_shft : unsigned(28 downto 0);
119
  signal p2_quo_next : unsigned(28 downto 0);
120
  signal p2_cnt : unsigned(4 downto 0);
121
  signal p2_complete : std_ulogic;
122
  signal p2_complete_1d : std_ulogic;
123
 
124
begin  -- Rtl
125
 
126
  process (clk_i, reset_n_i)
127
  begin  -- process
128
    if reset_n_i = '0' then             -- asynchronous reset (active low)
129
      start_1d <= '0';
130
      p2_busy <= '0';
131
      p2_rem <= to_unsigned(0,42);
132
      p2_exp <= to_unsigned(0,3);
133
      p2_exp_ov <= '0';
134
      p2_sign <= '0';
135
      p2_divisor <= to_unsigned(0,36);
136
      p2_quo <= to_unsigned(0,29);
137
      p2_cnt <= to_unsigned(0,5);
138
      p2_complete_1d <= '0';
139
    elsif clk_i'event and clk_i = '1' then  -- rising clock edge
140
 
141
      start_1d <= '0';
142
      if start_i = '1' and p2_busy = '0' then
143
        start_1d <= '1';
144
      end if;
145
 
146
      p2_complete_1d <= '0';
147
      if start_1d = '1' then
148
        p2_rem <= p1_rem;
149
        p2_exp <= p1_exp;
150
        p2_exp_ov <= p1_exp_ov;
151
        p2_sign <= p1_sign;
152
        p2_divisor <= p1_divisor;
153
        p2_quo <= to_unsigned(0,29);
154
        p2_cnt <= to_unsigned(28,5);
155
        p2_busy <= '1';
156
      elsif p2_busy = '1' then
157
        p2_rem <= p2_rem_next;
158
        p2_quo <= p2_quo_next;
159
        p2_cnt <= p2_cnt-1;
160
        if p2_complete = '1' then
161
          p2_complete_1d <= '1';
162
          p2_busy <= '0';
163
          -- reset count
164
          p2_cnt <= to_unsigned(28,5);
165
        end if;
166
      end if;
167
    end if;
168
  end process;
169
 
170
  process (p1_adjust_dividend, p1_adjust_divisor, p1_adjust_divisor_final,
171
           p1_allowed_dividend_shift, p1_delta_exp, p1_div_by_zero,
172
           p1_dividend, p1_dividend(32 downto 25), p1_dividend_mant,
173
           p1_divisor_mant, p1_divisor_mant(12 downto 5),
174
           p1_divisor_mant(20 downto 13), p1_divisor_mant(28 downto 21),
175
           p1_divisor_mant(4 downto 0), p1_divisor_zero(0),
176
           p1_divisor_zero(1 downto 0), p1_divisor_zero(1),
177
           p1_divisor_zero(2 downto 1), p1_divisor_zero(2),
178
           p1_divisor_zero(3 downto 2), p1_divisor_zero(3), p1_exp_sum,
179
           p1_exp_sum(2 downto 0), p1_top_bits, p2_divisor(35 downto 0),
180
           p2_exp, p2_quo(27 downto 0), p2_quo(28 downto 0), p2_quo_shft,
181
           p2_rem(40 downto 0), p2_rem_shft, p2_rem_shft(41 downto 5),
182
           p2_sub(35 downto 0), p2_sub(36), regA_i.fmt.exp, regA_i.fmt.sign,
183
           regA_i.mant, regB_i.fmt.exp, regB_i.fmt.sign, regB_i.mant)
184
  begin  -- process
185
 
186
    -- stage 1
187
 
188
    p1_dividend_mant <= regA_i.mant;
189
    p1_divisor_mant <= regB_i.mant;
190
 
191
    p1_divisor_zero <= zero_blocks(p1_divisor_mant & to_unsigned(0,3),8);
192
   -- p1_dividend_zero <= zero_blocks(p1_dividend_mant,8);
193
 
194
    p1_delta_exp <= to_unsigned(3,3)+('0' & regA_i.fmt.exp)-('0' & regB_i.fmt.exp);
195
 
196
    -- determine maximum allowed left shift of dividend
197
    p1_allowed_dividend_shift <= to_unsigned(0,2);
198
 
199
    if p1_divisor_zero(1 downto 0) = "01" or p1_delta_exp = to_unsigned(4,3) then
200
      p1_allowed_dividend_shift <= to_unsigned(1,2);
201
    elsif p1_divisor_zero(2 downto 1) = "01" or p1_delta_exp = to_unsigned(5,3) then
202
      p1_allowed_dividend_shift <= to_unsigned(2,2);
203
    elsif p1_divisor_zero(3 downto 2) = "01" or p1_delta_exp = to_unsigned(6,3) then
204
      p1_allowed_dividend_shift <= to_unsigned(3,2);
205
    end if;
206
 
207
    -- adjust dividend
208
    p1_adjust_dividend <= to_unsigned(0,2);
209
 
210
    if p1_dividend_mant < to_unsigned(2**25,29) and p1_allowed_dividend_shift > to_unsigned(0,2) then
211
      if p1_dividend_mant >= to_unsigned(2**17,29) or p1_allowed_dividend_shift = to_unsigned(1,3) then
212
        p1_adjust_dividend <= to_unsigned(1,2);
213
      elsif p1_dividend_mant >= to_unsigned(2**9,29) or p1_allowed_dividend_shift = to_unsigned(2,3) then
214
        p1_adjust_dividend <= to_unsigned(2,2);
215
      elsif p1_dividend_mant >= to_unsigned(2**1,29) or p1_allowed_dividend_shift = to_unsigned(3,2) then
216
        p1_adjust_dividend <= to_unsigned(3,2);
217
      end if;
218
    end if;
219
 
220
    p1_dividend <= fast_shift(to_unsigned(0,4) & p1_dividend_mant,to_integer(p1_adjust_dividend)*8,fast_shift_left); -- extend with 4bits 
221
 
222
    -- adjust divisor so that divisor >= dividend (when possible)
223
    p1_div_by_zero <= '0';
224
    p1_adjust_divisor <= to_unsigned(0,2);
225
    p1_top_bits <= p1_divisor_mant(28 downto 21);
226
 
227
    if p1_divisor_zero(0) = '1' then
228
      if p1_divisor_zero(1) = '0' then
229
        p1_top_bits <= p1_divisor_mant(20 downto 13);
230
        p1_adjust_divisor <= to_unsigned(1,2);
231
      elsif p1_divisor_zero(2) = '0' then
232
        p1_top_bits <= p1_divisor_mant(12 downto 5);
233
        p1_adjust_divisor <= to_unsigned(2,2);
234
      elsif p1_divisor_zero(3) = '0' then
235
        p1_top_bits <= p1_divisor_mant(4 downto 0) & "000";
236
        p1_adjust_divisor <= to_unsigned(3,2);
237
      else
238
        p1_div_by_zero <= '1';
239
      end if;
240
    end if;
241
 
242
    -- because dividend will be shifted right by 5 and left by 1 (= shifted right by 4) before division, only the
243
    -- top 4 bits are used for extra shift determination; p1_top_bits will be
244
    -- shifted left by 1 cause only most significant bit position must be same; some example
245
    -- dividend: XXXXXAAA
246
    -- divisor:  BBBBBBBB
247
    -- msb position counts eg
248
    -- XXXXX111
249
    -- 00000100
250
    -- is a valid combination therefore the <= operator is not enough
251
 
252
    p1_adjust_divisor_final <= '0' & p1_adjust_divisor;
253
    if ('0' & p1_dividend(32 downto 25)) >= (p1_top_bits & '0') then
254
      p1_adjust_divisor_final <= ('0' & p1_adjust_divisor)+1;
255
    end if;
256
 
257
    p1_divisor <= fast_shift(to_unsigned(0,7) & p1_divisor_mant,to_integer(p1_adjust_divisor_final)*8,fast_shift_left); -- 8bit overhead for shifting left
258
 
259
    -- build resulting fmt
260
    p1_sign <= regA_i.fmt.sign xor regB_i.fmt.sign;
261
 
262
    p1_exp_sum <= ('0' & p1_delta_exp)-('0' & p1_adjust_dividend)+('0' & p1_adjust_divisor_final);
263
 
264
    p1_exp_ov <= '0';
265
    p1_exp <= to_unsigned(7,3);
266
    if p1_div_by_zero = '1' or p1_exp_sum >= to_unsigned(8,4) then
267
      p1_exp_ov <= '1';
268
    else
269
      p1_exp <= p1_exp_sum(2 downto 0);
270
    end if;
271
 
272
    p1_rem <= "000000000" & p1_dividend;
273
 
274
    -- stage 2
275
 
276
    -- shift
277
    p2_rem_shft <= p2_rem(40 downto 0) & '0';
278
    p2_quo_shft <= p2_quo(27 downto 0) & '0';
279
 
280
    -- situation when rem and divisor have same msb position but rem is still greater
281
    -- therefore the 41th of p2_rem_shft is mostly zero but in the case above
282
    -- it will '1' and p2_divisor is less (always has a '0' at this position, see below)
283
    p2_sub <= p2_rem_shft(41 downto 5)-('0' & p2_divisor(35 downto 0));
284
 
285
    p2_rem_next <= p2_rem_shft;
286
    p2_quo_next <= p2_quo_shft;
287
 
288
    -- check for sub overflow eg. p2_rem_shft >= p2_divisor
289
    if p2_sub(36) = '0' then -- no overflow: therefore do sub
290
      p2_rem_next(41 downto 5) <= '0' & p2_sub(35 downto 0);
291
      p2_quo_next(0) <= '1';
292
    end if;
293
 
294
    -- if exp > 3 normalize cannot correct it full therefore pre shift left (but loosing precision)
295
    p2_exp_adjusted <= p2_exp;
296
    p2_quo_adjusted <= "00000000" & p2_quo(28 downto 0);
297
 
298
    if p2_exp >= to_unsigned(4,3) then
299
      p2_exp_adjusted <= p2_exp-1;
300
      p2_quo_adjusted <= p2_quo(28 downto 0) & "00000000";
301
    end if;
302
 
303
  end process;
304
 
305
  p2_complete <= '1' when p2_cnt = to_unsigned(0,5) else '0';
306
 
307
  ready_o <= not p2_busy and not start_1d;
308
  result_o <= ((15 downto 0 => '0') & p2_quo_adjusted,to_unsigned(0,4) & p2_exp_ov,p2_exp_adjusted,p2_sign);
309
  complete_o <= p2_complete_1d;
310
 
311
end Rtl;
312
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.