1 |
2 |
sbdesign |
/****************************************************************************
|
2 |
|
|
sbd_sqrt_fp
|
3 |
|
|
|
4 |
|
|
- square root function for single or double precisons ieee 754
|
5 |
|
|
floating-point numbers
|
6 |
|
|
|
7 |
|
|
This core is capable of handling single or double precision floating-
|
8 |
|
|
poing numbers. For single precision, set the bitlength parameter to 32;
|
9 |
|
|
for double precision, set the bitlength parameter to 64. The algorithm
|
10 |
|
|
used is iterative and will take 52 clock cycles for single precision and
|
11 |
|
|
110 clock cycles for double precision. The sign bit is simply passed
|
12 |
|
|
through from input to output, so if given negative input, this core
|
13 |
|
|
will produce a negative output corresponding to -1 * sqrt(|input|).
|
14 |
|
|
|
15 |
|
|
Ports: - all signals are active high
|
16 |
|
|
|
17 |
|
|
input [bitlength-1:0] D_IN; // single or double precison input
|
18 |
|
|
input VAL_IN; // Assert VAL_IN to signal a valid
|
19 |
|
|
input value. The module will only
|
20 |
|
|
accept input when RDY_IN is asserted.
|
21 |
|
|
If RDY_IN is low, then VAL_IN should
|
22 |
|
|
remain asserted until RDY_IN goes
|
23 |
|
|
high. VAL_IN and RDY_IN must both be
|
24 |
|
|
asserted for one clock cycle for
|
25 |
|
|
computation to begin.
|
26 |
|
|
output wire RDY_IN; // module is ready to accept input
|
27 |
|
|
input CLK; // clock
|
28 |
|
|
output reg [bitlength-1:0] D_OUT; // single or double precision output
|
29 |
|
|
output reg VAL_OUT; // VAL_OUT is asserted when the output
|
30 |
|
|
is valid. VAL_OUT will remain asserted
|
31 |
|
|
and D_OUT will persist until VAL_OUT and
|
32 |
|
|
RDY_OUT have both been asserted for one
|
33 |
|
|
clock cycle.
|
34 |
|
|
input RDY_OUT; // when asserted, downstream logic is
|
35 |
|
|
ready to accept output of module
|
36 |
|
|
|
37 |
|
|
This core was designed using synchronous resets, for use in FPGAs. The
|
38 |
|
|
synchronous resets could easily be made asynchronous for use in ASICs by
|
39 |
|
|
editing the always @ (posedge CLK) blocks in this and all dependant files.
|
40 |
|
|
|
41 |
|
|
Resource Utilization:
|
42 |
|
|
(synthesized with XST for Virtex4 - no architectural dependancies exist in this core
|
43 |
|
|
- this is merely an example)
|
44 |
|
|
|
45 |
|
|
Single Precision:
|
46 |
|
|
Slices: 155
|
47 |
|
|
Flip Flops: 204
|
48 |
|
|
4-Input LUTs: 269
|
49 |
|
|
|
50 |
|
|
Double Precision:
|
51 |
|
|
Slices: 324
|
52 |
|
|
Flip Flops: 417
|
53 |
|
|
4-Input LUTs: 566
|
54 |
|
|
|
55 |
|
|
Both numerical accuracy and performance of the single and double precision versions of
|
56 |
|
|
this core have been verified in a Xilinx XC4VLX25-10 at 100MHz. Without optimizations,
|
57 |
|
|
the single precision core should operate at up to at least 190MHz, and the double precision
|
58 |
|
|
core should operate at up to at least 134MHz in this platform. Higher performance should
|
59 |
|
|
be possible with a little effort.
|
60 |
|
|
|
61 |
|
|
This module uses the following files:
|
62 |
|
|
|
63 |
|
|
sbd_sqrt_fp_calc_mant.v
|
64 |
|
|
sbd_sqrt_fp_state_mach.v
|
65 |
|
|
sbd_shifter_left2.v
|
66 |
|
|
sbd_shifter_left3_right2.v
|
67 |
|
|
sbd_adsu.v
|
68 |
|
|
|
69 |
|
|
Copyright (C) 2005 Samuel Brown
|
70 |
|
|
sam.brown@sbdesign.org
|
71 |
|
|
|
72 |
|
|
This library is free software; you can redistribute it and/or
|
73 |
|
|
modify it under the terms of the GNU Lesser General Public
|
74 |
|
|
License as published by the Free Software Foundation; either
|
75 |
|
|
version 2.1 of the License, or (at your option) any later version.
|
76 |
|
|
|
77 |
|
|
This library is distributed in the hope that it will be useful,
|
78 |
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
79 |
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
80 |
|
|
Lesser General Public License for more details.
|
81 |
|
|
|
82 |
|
|
You should have received a copy of the GNU Lesser General Public
|
83 |
|
|
License along with this library; if not, write to the Free Software
|
84 |
|
|
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
85 |
|
|
|
86 |
|
|
****************************************************************************/
|
87 |
|
|
|
88 |
|
|
module sbd_sqrt_fp ( D_IN, VAL_IN, RDY_IN, CLK, D_OUT, VAL_OUT, RDY_OUT );
|
89 |
|
|
|
90 |
|
|
parameter bitlength = 32;
|
91 |
|
|
|
92 |
|
|
input [bitlength-1:0] D_IN;
|
93 |
|
|
input VAL_IN;
|
94 |
|
|
output wire RDY_IN;
|
95 |
|
|
input CLK;
|
96 |
|
|
output reg [bitlength-1:0] D_OUT;
|
97 |
|
|
output reg VAL_OUT;
|
98 |
|
|
input RDY_OUT;
|
99 |
|
|
|
100 |
|
|
reg [bitlength-1:0] raw_data_reg;
|
101 |
|
|
reg beg_in_reg;
|
102 |
|
|
reg d_rdy_reg;
|
103 |
|
|
wire mant_out;
|
104 |
|
|
reg mant_out_reg;
|
105 |
|
|
|
106 |
|
|
wire [bitlength-1:0] sqrt_final;
|
107 |
|
|
|
108 |
|
|
generate
|
109 |
|
|
if(bitlength == 32)
|
110 |
|
|
begin:single
|
111 |
|
|
|
112 |
|
|
//------------------ Single Precision --------------------------------------
|
113 |
|
|
|
114 |
|
|
wire [7:0] exp_off, exp_adj, exp_final;
|
115 |
|
|
wire non_zero_exp = ~(!raw_data_reg[30:23]);
|
116 |
|
|
|
117 |
|
|
wire [23:0] mant_final;
|
118 |
|
|
wire [22:0] mant_final_imp = mant_final[22:0];
|
119 |
|
|
reg [23:0] mant_adj; // mantissa with explicit leading bit - adjusted
|
120 |
|
|
// for even/odd exponent
|
121 |
|
|
|
122 |
|
|
//------------ Exponent Calculation & Mantissa Preparation --------------
|
123 |
|
|
|
124 |
|
|
sbd_adsu exp_offset_adsu (
|
125 |
|
|
.A(raw_data_reg[30:23]),
|
126 |
|
|
.B(8'h7F),
|
127 |
|
|
.ADD(1'b0),
|
128 |
|
|
.C_IN(1'b1),
|
129 |
|
|
.S(exp_off));
|
130 |
|
|
defparam exp_offset_adsu.bitlength = 8;
|
131 |
|
|
|
132 |
|
|
sbd_adsu exp_adjust_adsu (
|
133 |
|
|
.A(8'h7F),
|
134 |
|
|
.B({ exp_off[7], exp_off[7:1] }),
|
135 |
|
|
.ADD(1'b1),
|
136 |
|
|
.C_IN(1'b0),
|
137 |
|
|
.S(exp_adj));
|
138 |
|
|
defparam exp_adjust_adsu.bitlength = 8;
|
139 |
|
|
|
140 |
|
|
assign exp_final = exp_adj & {8{non_zero_exp}};
|
141 |
|
|
|
142 |
|
|
always @ (raw_data_reg, non_zero_exp)
|
143 |
|
|
begin:mant_adj_mux
|
144 |
|
|
if(raw_data_reg[23]) mant_adj = { 1'b0, non_zero_exp, raw_data_reg[22:1] };
|
145 |
|
|
else mant_adj = { non_zero_exp, raw_data_reg[22:0] };
|
146 |
|
|
end
|
147 |
|
|
|
148 |
|
|
//------------- Mantissa Calculation --------------------------------------
|
149 |
|
|
|
150 |
|
|
|
151 |
|
|
sbd_sqrt_fp_calc_mant mant_iterations (
|
152 |
|
|
.MANT_IN(mant_adj),
|
153 |
|
|
.CLK(CLK),
|
154 |
|
|
.VAL_IN(beg_in_reg),
|
155 |
|
|
.MANT_OUT(mant_final),
|
156 |
|
|
.VAL_OUT(mant_out));
|
157 |
|
|
defparam mant_iterations.mantlength = 24;
|
158 |
|
|
|
159 |
|
|
assign sqrt_final = { raw_data_reg[bitlength-1], exp_final, mant_final_imp };
|
160 |
|
|
|
161 |
|
|
end
|
162 |
|
|
else if(bitlength == 64)
|
163 |
|
|
begin:double
|
164 |
|
|
|
165 |
|
|
//------------------ Doule Precision ---------------------------------------
|
166 |
|
|
|
167 |
|
|
wire [10:0] exp_off, exp_adj, exp_final;
|
168 |
|
|
wire non_zero_exp = ~(!raw_data_reg[62:52]);
|
169 |
|
|
|
170 |
|
|
wire [52:0] mant_final;
|
171 |
|
|
wire [51:0] mant_final_imp = mant_final[51:0];
|
172 |
|
|
reg [52:0] mant_adj; // mantissa with explicit leading bit - adjusted
|
173 |
|
|
// for even/odd exponent
|
174 |
|
|
|
175 |
|
|
//------------ Exponent Calculation & Mantissa Preparation --------------
|
176 |
|
|
|
177 |
|
|
sbd_adsu exp_offset_adsu (
|
178 |
|
|
.A(raw_data_reg[62:52]),
|
179 |
|
|
.B(11'h3FF),
|
180 |
|
|
.ADD(1'b0),
|
181 |
|
|
.C_IN(1'b1),
|
182 |
|
|
.S(exp_off));
|
183 |
|
|
defparam exp_offset_adsu.bitlength = 11;
|
184 |
|
|
|
185 |
|
|
sbd_adsu exp_adjust_adsu (
|
186 |
|
|
.A(11'h3FF),
|
187 |
|
|
.B({ exp_off[10], exp_off[10:1] }),
|
188 |
|
|
.ADD(1'b1),
|
189 |
|
|
.C_IN(1'b0),
|
190 |
|
|
.S(exp_adj));
|
191 |
|
|
defparam exp_adjust_adsu.bitlength = 11;
|
192 |
|
|
|
193 |
|
|
assign exp_final = exp_adj & {11{non_zero_exp}};
|
194 |
|
|
|
195 |
|
|
always @ (raw_data_reg, non_zero_exp)
|
196 |
|
|
begin:mant_adj_mux
|
197 |
|
|
if(raw_data_reg[52]) mant_adj = { 1'b0, non_zero_exp, raw_data_reg[51:1] };
|
198 |
|
|
else mant_adj = { non_zero_exp, raw_data_reg[51:0] };
|
199 |
|
|
end
|
200 |
|
|
|
201 |
|
|
//------------- Mantissa Calculation --------------------------------------
|
202 |
|
|
|
203 |
|
|
|
204 |
|
|
sbd_sqrt_fp_calc_mant mant_iterations (
|
205 |
|
|
.MANT_IN(mant_adj),
|
206 |
|
|
.CLK(CLK),
|
207 |
|
|
.VAL_IN(beg_in_reg),
|
208 |
|
|
.MANT_OUT(mant_final),
|
209 |
|
|
.VAL_OUT(mant_out));
|
210 |
|
|
defparam mant_iterations.mantlength = 53;
|
211 |
|
|
|
212 |
|
|
assign sqrt_final = { raw_data_reg[bitlength-1], exp_final, mant_final_imp };
|
213 |
|
|
|
214 |
|
|
end
|
215 |
|
|
endgenerate
|
216 |
|
|
|
217 |
|
|
|
218 |
|
|
assign RDY_IN = ~d_rdy_reg;
|
219 |
|
|
wire beg_in = VAL_IN & RDY_IN;
|
220 |
|
|
|
221 |
|
|
initial
|
222 |
|
|
begin:simzero
|
223 |
|
|
|
224 |
|
|
D_OUT <= 0;
|
225 |
|
|
VAL_OUT <= 0;
|
226 |
|
|
raw_data_reg <= 0;
|
227 |
|
|
beg_in_reg <= 0;
|
228 |
|
|
d_rdy_reg <= 0;
|
229 |
|
|
mant_out_reg <= 0;
|
230 |
|
|
|
231 |
|
|
end
|
232 |
|
|
|
233 |
|
|
always @ (posedge CLK)
|
234 |
|
|
begin:reg_input
|
235 |
|
|
// input buffer
|
236 |
|
|
if(beg_in) raw_data_reg <= D_IN;
|
237 |
|
|
|
238 |
|
|
// val delay
|
239 |
|
|
beg_in_reg <= beg_in;
|
240 |
|
|
|
241 |
|
|
// RDY state
|
242 |
|
|
if(mant_out) d_rdy_reg <= 0;
|
243 |
|
|
else if(beg_in) d_rdy_reg <= 1;
|
244 |
|
|
|
245 |
|
|
// output registers
|
246 |
|
|
if(mant_out_reg) D_OUT <= sqrt_final;
|
247 |
|
|
if(VAL_OUT && RDY_OUT) VAL_OUT <= 0;
|
248 |
|
|
else if(mant_out_reg) VAL_OUT <= 1;
|
249 |
|
|
|
250 |
|
|
//delay output latch
|
251 |
|
|
mant_out_reg <= mant_out;
|
252 |
|
|
|
253 |
|
|
end
|
254 |
|
|
|
255 |
|
|
|
256 |
|
|
endmodule
|