1 |
46 |
dgisselq |
////////////////////////////////////////////////////////////////////////////////
|
2 |
|
|
//
|
3 |
|
|
// Filename: div.v
|
4 |
|
|
//
|
5 |
|
|
// Project: Zip CPU -- a small, lightweight, RISC CPU soft core
|
6 |
|
|
//
|
7 |
|
|
// Purpose: Provide an Integer divide capability to the Zip CPU. Provides
|
8 |
|
|
// for both signed and unsigned divide.
|
9 |
|
|
//
|
10 |
|
|
// Steps:
|
11 |
|
|
// i_rst The DIVide unit starts in idle. It can also be placed into an
|
12 |
|
|
// idle by asserting the reset input.
|
13 |
|
|
//
|
14 |
|
|
// i_wr When i_rst is asserted, a divide begins. On the next clock:
|
15 |
|
|
//
|
16 |
|
|
// o_busy is set high so everyone else knows we are at work and they can
|
17 |
|
|
// wait for us to complete.
|
18 |
|
|
//
|
19 |
|
|
// pre_sign is set to true if we need to do a signed divide. In this
|
20 |
|
|
// case, we take a clock cycle to turn the divide into an unsigned
|
21 |
|
|
// divide.
|
22 |
|
|
//
|
23 |
|
|
// o_quotient, a place to store our result, is initialized to all zeros.
|
24 |
|
|
//
|
25 |
|
|
// r_dividend is set to the numerator
|
26 |
|
|
//
|
27 |
|
|
// r_divisor is set to 2^31 * the denominator (shift left by 31, or add
|
28 |
|
|
// 31 zeros to the right of the number.
|
29 |
|
|
//
|
30 |
|
|
// pre_sign When true (clock cycle after i_wr), a clock cycle is used
|
31 |
|
|
// to take the absolute value of the various arguments (r_dividend
|
32 |
|
|
// and r_divisor), and to calculate what sign the output result
|
33 |
|
|
// should be.
|
34 |
|
|
//
|
35 |
|
|
//
|
36 |
|
|
// At this point, the divide is has started. The divide works by walking
|
37 |
|
|
// through every shift of the
|
38 |
|
|
//
|
39 |
|
|
// DIVIDEND over the
|
40 |
|
|
// DIVISOR
|
41 |
|
|
//
|
42 |
|
|
// If the DIVISOR is bigger than the dividend, the divisor is shifted
|
43 |
|
|
// right, and nothing is done to the output quotient.
|
44 |
|
|
//
|
45 |
|
|
// DIVIDEND
|
46 |
|
|
// DIVISOR
|
47 |
|
|
//
|
48 |
|
|
// This repeats, until DIVISOR is less than or equal to the divident, as in
|
49 |
|
|
//
|
50 |
|
|
// DIVIDEND
|
51 |
|
|
// DIVISOR
|
52 |
|
|
//
|
53 |
|
|
// At this point, if the DIVISOR is less than the dividend, the
|
54 |
|
|
// divisor is subtracted from the dividend, and the DIVISOR is again
|
55 |
|
|
// shifted to the right. Further, a '1' bit gets set in the output
|
56 |
|
|
// quotient.
|
57 |
|
|
//
|
58 |
|
|
// Once we've done this for 32 clocks, we've accumulated our answer into
|
59 |
|
|
// the output quotient, and we can proceed to the next step. If the
|
60 |
|
|
// result will be signed, the next step negates the quotient, otherwise
|
61 |
|
|
// it returns the result.
|
62 |
|
|
//
|
63 |
|
|
// On the clock when we are done, o_busy is set to false, and o_valid set
|
64 |
|
|
// to true. (It is a violation of the ZipCPU internal protocol for both
|
65 |
|
|
// busy and valid to ever be true on the same clock. It is also a
|
66 |
|
|
// violation for busy to be false with valid true thereafter.)
|
67 |
|
|
//
|
68 |
|
|
//
|
69 |
|
|
// Creator: Dan Gisselquist, Ph.D.
|
70 |
|
|
// Gisselquist Technology, LLC
|
71 |
|
|
//
|
72 |
|
|
////////////////////////////////////////////////////////////////////////////////
|
73 |
|
|
//
|
74 |
|
|
// Copyright (C) 2015-2017, Gisselquist Technology, LLC
|
75 |
|
|
//
|
76 |
|
|
// This program is free software (firmware): you can redistribute it and/or
|
77 |
|
|
// modify it under the terms of the GNU General Public License as published
|
78 |
|
|
// by the Free Software Foundation, either version 3 of the License, or (at
|
79 |
|
|
// your option) any later version.
|
80 |
|
|
//
|
81 |
|
|
// This program is distributed in the hope that it will be useful, but WITHOUT
|
82 |
|
|
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or
|
83 |
|
|
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
84 |
|
|
// for more details.
|
85 |
|
|
//
|
86 |
|
|
// You should have received a copy of the GNU General Public License along
|
87 |
|
|
// with this program. (It's in the $(ROOT)/doc directory. Run make with no
|
88 |
|
|
// target there if the PDF file isn't present.) If not, see
|
89 |
|
|
// <http://www.gnu.org/licenses/> for a copy.
|
90 |
|
|
//
|
91 |
|
|
// License: GPL, v3, as defined and found on www.gnu.org,
|
92 |
|
|
// http://www.gnu.org/licenses/gpl.html
|
93 |
|
|
//
|
94 |
|
|
//
|
95 |
|
|
////////////////////////////////////////////////////////////////////////////////
|
96 |
|
|
//
|
97 |
|
|
//
|
98 |
|
|
// `include "cpudefs.v"
|
99 |
|
|
//
|
100 |
|
|
module div(i_clk, i_rst, i_wr, i_signed, i_numerator, i_denominator,
|
101 |
|
|
o_busy, o_valid, o_err, o_quotient, o_flags);
|
102 |
|
|
parameter BW=32, LGBW = 5;
|
103 |
|
|
input i_clk, i_rst;
|
104 |
|
|
// Input parameters
|
105 |
|
|
input i_wr, i_signed;
|
106 |
|
|
input [(BW-1):0] i_numerator, i_denominator;
|
107 |
|
|
// Output parameters
|
108 |
|
|
output reg o_busy, o_valid, o_err;
|
109 |
|
|
output reg [(BW-1):0] o_quotient;
|
110 |
|
|
output wire [3:0] o_flags;
|
111 |
|
|
|
112 |
|
|
// r_busy is an internal busy register. It will clear one clock
|
113 |
|
|
// before we are valid, so it can't be o_busy ...
|
114 |
|
|
//
|
115 |
|
|
reg r_busy;
|
116 |
|
|
reg [(2*BW-2):0] r_divisor;
|
117 |
|
|
reg [(BW-1):0] r_dividend;
|
118 |
|
|
wire [(BW):0] diff; // , xdiff[(BW-1):0];
|
119 |
|
|
assign diff = r_dividend - r_divisor[(BW-1):0];
|
120 |
|
|
// assign xdiff= r_dividend - { 1'b0, r_divisor[(BW-1):1] };
|
121 |
|
|
|
122 |
|
|
reg r_sign, pre_sign, r_z, r_c, last_bit;
|
123 |
|
|
reg [(LGBW-1):0] r_bit;
|
124 |
|
|
|
125 |
|
|
reg zero_divisor;
|
126 |
|
|
initial zero_divisor = 1'b0;
|
127 |
|
|
always @(posedge i_clk)
|
128 |
|
|
zero_divisor <= (r_divisor == 0)&&(r_busy);
|
129 |
|
|
|
130 |
|
|
initial r_busy = 1'b0;
|
131 |
|
|
always @(posedge i_clk)
|
132 |
|
|
if (i_rst)
|
133 |
|
|
r_busy <= 1'b0;
|
134 |
|
|
else if (i_wr)
|
135 |
|
|
r_busy <= 1'b1;
|
136 |
|
|
else if ((last_bit)||(zero_divisor))
|
137 |
|
|
r_busy <= 1'b0;
|
138 |
|
|
|
139 |
|
|
initial o_busy = 1'b0;
|
140 |
|
|
always @(posedge i_clk)
|
141 |
|
|
if (i_rst)
|
142 |
|
|
o_busy <= 1'b0;
|
143 |
|
|
else if (i_wr)
|
144 |
|
|
o_busy <= 1'b1;
|
145 |
|
|
else if (((last_bit)&&(~r_sign))||(zero_divisor))
|
146 |
|
|
o_busy <= 1'b0;
|
147 |
|
|
else if (~r_busy)
|
148 |
|
|
o_busy <= 1'b0;
|
149 |
|
|
|
150 |
|
|
always @(posedge i_clk)
|
151 |
|
|
if ((i_rst)||(i_wr))
|
152 |
|
|
o_valid <= 1'b0;
|
153 |
|
|
else if (r_busy)
|
154 |
|
|
begin
|
155 |
|
|
if ((last_bit)||(zero_divisor))
|
156 |
|
|
o_valid <= (zero_divisor)||(~r_sign);
|
157 |
|
|
end else if (r_sign)
|
158 |
|
|
begin
|
159 |
|
|
o_valid <= (~zero_divisor); // 1'b1;
|
160 |
|
|
end else
|
161 |
|
|
o_valid <= 1'b0;
|
162 |
|
|
|
163 |
|
|
initial o_err = 1'b0;
|
164 |
|
|
always @(posedge i_clk)
|
165 |
|
|
if((i_rst)||(o_valid))
|
166 |
|
|
o_err <= 1'b0;
|
167 |
|
|
else if (((r_busy)||(r_sign))&&(zero_divisor))
|
168 |
|
|
o_err <= 1'b1;
|
169 |
|
|
else
|
170 |
|
|
o_err <= 1'b0;
|
171 |
|
|
|
172 |
|
|
initial last_bit = 1'b0;
|
173 |
|
|
always @(posedge i_clk)
|
174 |
|
|
if ((i_wr)||(pre_sign)||(i_rst))
|
175 |
|
|
last_bit <= 1'b0;
|
176 |
|
|
else if (r_busy)
|
177 |
|
|
last_bit <= (r_bit == {{(LGBW-1){1'b0}},1'b1});
|
178 |
|
|
|
179 |
|
|
always @(posedge i_clk)
|
180 |
|
|
// if (i_rst) r_busy <= 1'b0;
|
181 |
|
|
// else
|
182 |
|
|
if (i_wr)
|
183 |
|
|
begin
|
184 |
|
|
//
|
185 |
|
|
// Set our values upon an initial command. Here's
|
186 |
|
|
// where we come in and start.
|
187 |
|
|
//
|
188 |
|
|
// r_busy <= 1'b1;
|
189 |
|
|
//
|
190 |
|
|
o_quotient <= 0;
|
191 |
|
|
r_bit <= {(LGBW){1'b1}};
|
192 |
|
|
r_divisor <= { i_denominator, {(BW-1){1'b0}} };
|
193 |
|
|
r_dividend <= i_numerator;
|
194 |
|
|
r_sign <= 1'b0;
|
195 |
|
|
pre_sign <= i_signed;
|
196 |
|
|
r_z <= 1'b1;
|
197 |
|
|
end else if (pre_sign)
|
198 |
|
|
begin
|
199 |
|
|
//
|
200 |
|
|
// Note that we only come in here, for one clock, if
|
201 |
|
|
// our initial value may have been signed. If we are
|
202 |
|
|
// doing an unsigned divide, we then skip this step.
|
203 |
|
|
//
|
204 |
|
|
r_sign <= ((r_divisor[(2*BW-2)])^(r_dividend[(BW-1)]));
|
205 |
|
|
// Negate our dividend if necessary so that it becomes
|
206 |
|
|
// a magnitude only value
|
207 |
|
|
if (r_dividend[BW-1])
|
208 |
|
|
r_dividend <= -r_dividend;
|
209 |
|
|
// Do the same with the divisor--rendering it into
|
210 |
|
|
// a magnitude only.
|
211 |
|
|
if (r_divisor[(2*BW-2)])
|
212 |
|
|
r_divisor[(2*BW-2):(BW-1)] <= -r_divisor[(2*BW-2):(BW-1)];
|
213 |
|
|
//
|
214 |
|
|
// We only do this stage for a single clock, so go on
|
215 |
|
|
// with the rest of the divide otherwise.
|
216 |
|
|
pre_sign <= 1'b0;
|
217 |
|
|
end else if (r_busy)
|
218 |
|
|
begin
|
219 |
|
|
// While the divide is taking place, we examine each bit
|
220 |
|
|
// in turn here.
|
221 |
|
|
//
|
222 |
|
|
r_bit <= r_bit + {(LGBW){1'b1}}; // r_bit = r_bit - 1;
|
223 |
|
|
r_divisor <= { 1'b0, r_divisor[(2*BW-2):1] };
|
224 |
|
|
if (|r_divisor[(2*BW-2):(BW)])
|
225 |
|
|
begin
|
226 |
|
|
end else if (diff[BW])
|
227 |
|
|
begin
|
228 |
|
|
//
|
229 |
|
|
// diff = r_dividend - r_divisor[(BW-1):0];
|
230 |
|
|
//
|
231 |
|
|
// If this value was negative, there wasn't
|
232 |
|
|
// enough value in the dividend to support
|
233 |
|
|
// pulling off a bit. We'll move down a bit
|
234 |
|
|
// therefore and try again.
|
235 |
|
|
//
|
236 |
|
|
end else begin
|
237 |
|
|
//
|
238 |
|
|
// Put a '1' into our output accumulator.
|
239 |
|
|
// Subtract the divisor from the dividend,
|
240 |
|
|
// and then move on to the next bit
|
241 |
|
|
//
|
242 |
|
|
r_dividend <= diff[(BW-1):0];
|
243 |
|
|
o_quotient[r_bit[(LGBW-1):0]] <= 1'b1;
|
244 |
|
|
r_z <= 1'b0;
|
245 |
|
|
end
|
246 |
|
|
r_sign <= (r_sign)&&(~zero_divisor);
|
247 |
|
|
end else if (r_sign)
|
248 |
|
|
begin
|
249 |
|
|
r_sign <= 1'b0;
|
250 |
|
|
o_quotient <= -o_quotient;
|
251 |
|
|
end
|
252 |
|
|
|
253 |
|
|
// Set Carry on an exact divide
|
254 |
|
|
wire w_n;
|
255 |
|
|
always @(posedge i_clk)
|
256 |
|
|
r_c <= (r_busy)&&((diff == 0)||(r_dividend == 0));
|
257 |
|
|
assign w_n = o_quotient[(BW-1)];
|
258 |
|
|
|
259 |
|
|
assign o_flags = { 1'b0, w_n, r_c, r_z };
|
260 |
|
|
endmodule
|