URL
https://opencores.org/ocsvn/ft816float/ft816float/trunk
Subversion Repositories ft816float
Compare Revisions
- This comparison shows the changes necessary to convert path
/ft816float
- from Rev 63 to Rev 64
- ↔ Reverse comparison
Rev 63 → Rev 64
/trunk/rtl/verilog2/DFPAddsub128.sv
1,6 → 1,6
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2020-2021 Robert Finch, Waterloo |
// \\__/ o\ (C) 2020-2022 Robert Finch, Waterloo |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
46,6 → 46,7
input DFP128 b; |
output DFP128UD o; |
localparam N=34; // number of BCD digits |
localparam RIP_STAGES = 3; |
|
parameter TRUE = 1'b1; |
parameter FALSE = 1'b0; |
52,27 → 53,22
|
DFP128U au; |
DFP128U bu; |
wire sa, sb; |
wire sxa, sxb; |
wire adn, bdn; |
wire xainf, xbinf; |
wire ainf, binf; |
wire aNan, bNan; |
wire [13:0] xa, xb; |
wire [N*4-1:0] siga, sigb; |
|
DFPUnpack128 u00 (a, au); |
DFPUnpack128 u01 (b, bu); |
|
reg [(N+1)*4-1:0] oaa10; |
reg [(N+1)*4-1:0] obb10; |
wire [(N+1)*4-1:0] oss10; |
wire oss10c; |
|
BCDAddN #(.N(N+1)) ubcdan1 |
BCDAdd8NClk #(.N((N+2)/2)) ubcdadn1 |
( |
.clk(clk), |
.a({8'h00,oaa10}), |
.b({8'h00,obb10}), |
.o(oss10), |
.ci(1'b0), |
.a(oaa10), |
.b(obb10), |
.o(oss10), |
.co(oss10c) |
); |
|
79,12 → 75,13
wire [(N+1)*4-1:0] odd10; |
wire odd10c; |
|
BCDSubN #(.N(N+1)) ubcdsn1 |
BCDSub8NClk #(.N((N+2)/2)) ubcdsdn1 |
( |
.clk(clk), |
.a({8'h00,oaa10}), |
.b({8'h00,obb10}), |
.o(odd10), |
.ci(1'b0), |
.a(oaa10), |
.b(obb10), |
.o(odd10), |
.co(odd10c) |
); |
|
93,11 → 90,11
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
reg op1; |
reg az, bz; |
always @(posedge clk) |
always_ff @(posedge clk) |
op1 <= op; |
always @(posedge clk) |
always_ff @(posedge clk) |
az <= au.sig==136'd0 && au.exp==14'd0; |
always @(posedge clk) |
always_ff @(posedge clk) |
bz <= bu.sig==136'd0 && bu.exp==14'd0; |
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
126,31 → 123,31
reg expeq; |
reg sxo2; |
|
always @(posedge clk) |
always_ff @(posedge clk) |
if (ce) realOp2 = op1 ^ au.sign ^ bu.sign; |
always @(posedge clk) |
always_ff @(posedge clk) |
if (ce) op2 <= op1; |
always @(posedge clk) |
always_ff @(posedge clk) |
if (ce) xa2 <= au.exp; |
always @(posedge clk) |
always_ff @(posedge clk) |
if (ce) xb2 <= bu.exp; |
always @(posedge clk) |
always_ff @(posedge clk) |
if (ce) siga2 <= au.sig; |
always @(posedge clk) |
always_ff @(posedge clk) |
if (ce) sigb2 <= bu.sig; |
always @(posedge clk) |
always_ff @(posedge clk) |
if (ce) az2 <= az; |
always @(posedge clk) |
always_ff @(posedge clk) |
if (ce) bz2 <= bz; |
always @(posedge clk) |
always_ff @(posedge clk) |
if (ce) |
xa_gt_xb2 <= au.exp > bu.exp; |
|
always @(posedge clk) |
always_ff @(posedge clk) |
if (ce) sigeq <= au.sig==bu.sig; |
always @(posedge clk) |
always_ff @(posedge clk) |
if (ce) siga_gt_sigb <= au.sig > bu.sig; |
always @(posedge clk) |
always_ff @(posedge clk) |
if (ce) expeq <= au.exp==bu.exp; |
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
170,27 → 167,27
wire [2:0] rm3; |
reg [N*4-1:0] mfs3; |
|
always @(posedge clk) |
always_ff @(posedge clk) |
if (ce) resZero3 <= (realOp2 & expeq & sigeq) || // subtract, same magnitude |
(az2 & bz2); // both a,b zero |
always @(posedge clk) |
always_ff @(posedge clk) |
if (ce) xa3 <= xa2; |
always @(posedge clk) |
always_ff @(posedge clk) |
if (ce) xb3 <= xb2; |
always @(posedge clk) |
always_ff @(posedge clk) |
if (ce) xa_gt_xb3 <= xa_gt_xb2; |
always @(posedge clk) |
always_ff @(posedge clk) |
if (ce) a_gt_b3 <= xa_gt_xb2 | (expeq & siga_gt_sigb); |
always @(posedge clk) |
always_ff @(posedge clk) |
if (ce) op3 <= op2; |
always @(posedge clk) |
always_ff @(posedge clk) |
if (ce) mfs3 = xa_gt_xb2 ? sigb2 : siga2; |
|
delay #(.WID(1), .DEP(2)) udly3c (.clk(clk), .ce(ce), .i(au.sign), .o(sa3)); |
delay #(.WID(1), .DEP(2)) udly3d (.clk(clk), .ce(ce), .i(bu.sign), .o(sb3)); |
delay #(.WID(3), .DEP(3)) udly3e (.clk(clk), .ce(ce), .i(rm), .o(rm3)); |
delay #(.WID(1), .DEP(2)) udly3f (.clk(clk), .ce(ce), .i(aInf), .o(aInf3)); |
delay #(.WID(1), .DEP(2)) udly3g (.clk(clk), .ce(ce), .i(bInf), .o(bInf3)); |
ft_delay #(.WID(1), .DEP(2)) udly3c (.clk(clk), .ce(ce), .i(au.sign), .o(sa3)); |
ft_delay #(.WID(1), .DEP(2)) udly3d (.clk(clk), .ce(ce), .i(bu.sign), .o(sb3)); |
ft_delay #(.WID(3), .DEP(3)) udly3e (.clk(clk), .ce(ce), .i(rm), .o(rm3)); |
ft_delay #(.WID(1), .DEP(2)) udly3f (.clk(clk), .ce(ce), .i(aInf), .o(aInf3)); |
ft_delay #(.WID(1), .DEP(2)) udly3g (.clk(clk), .ce(ce), .i(bInf), .o(bInf3)); |
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #4 |
207,18 → 204,18
reg [13:0] xo4; |
reg xa_gt_xb4; |
|
always @(posedge clk) |
always_ff @(posedge clk) |
if (ce) xa4 <= xa3; |
always @(posedge clk) |
always_ff @(posedge clk) |
if (ce) xb4 <= xb3; |
always @(posedge clk) |
always_ff @(posedge clk) |
if (ce) xo4 <= resZero3 ? 14'd0 : xa_gt_xb3 ? xa3 : xb3; |
always @(posedge clk) |
always_ff @(posedge clk) |
if (ce) xa_gt_xb4 <= xa_gt_xb3; |
|
// Compute output sign |
reg so4; |
always @* |
always_comb |
case ({resZero3,sa3,op3,sb3}) // synopsys full_case parallel_case |
4'b0000: so4 <= 0; // + + + = + |
4'b0001: so4 <= !a_gt_b3; // + + - = sign of larger |
244,7 → 241,7
// Compute the difference in exponents, provides shift amount |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
reg [13:0] xdiff5; |
always @(posedge clk) |
always_ff @(posedge clk) |
if (ce) xdiff5 <= xa_gt_xb4 ? xa4 - xb4 : xb4 - xa4; |
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
257,9 → 254,9
// keep track of a difference more than 24. |
reg [6:0] xdif6; |
wire [N*4-1:0] mfs6; |
always @(posedge clk) |
always_ff @(posedge clk) |
if (ce) xdif6 <= xdiff5 > N ? N : xdiff5[6:0]; |
delay #(.WID(N*4), .DEP(3)) udly6a (.clk(clk), .ce(ce), .i(mfs3), .o(mfs6)); |
ft_delay #(.WID(N*4), .DEP(3)) udly6a (.clk(clk), .ce(ce), .i(mfs3), .o(mfs6)); |
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #7 |
274,7 → 271,8
wire [N*4-1:0] mfs7; |
wire [8:0] xdif6a = {xdif6,2'b00}; // *4 |
integer n; |
always @* begin |
always @* |
begin |
sticky6 = 1'b0; |
for (n = 0; n < N*4; n = n + 4) |
if (n <= xdif6a) |
293,15 → 291,15
wire [N*4-1:0] siga8, sigb8; |
wire xa_gt_xb8; |
wire a_gt_b8; |
always @(posedge clk) |
always_ff @(posedge clk) |
if (ce) md8 <= ({mfs7,4'b0} >> xdif7)|sticky7; // xdif7 is a multiple of four |
|
// sync control signals |
delay #(.WID(1), .DEP(4)) udly8a (.clk(clk), .ce(ce), .i(xa_gt_xb4), .o(xa_gt_xb8)); |
delay #(.WID(1), .DEP(5)) udly8b (.clk(clk), .ce(ce), .i(a_gt_b3), .o(a_gt_b8)); |
delay #(.WID(N*4), .DEP(6)) udly8d (.clk(clk), .ce(ce), .i(siga2), .o(siga8)); |
delay #(.WID(N*4), .DEP(6)) udly8e (.clk(clk), .ce(ce), .i(sigb2), .o(sigb8)); |
delay #(.WID(1), .DEP(5)) udly8j (.clk(clk), .ce(ce), .i(op3), .o(op8)); |
ft_delay #(.WID(1), .DEP(4)) udly8a (.clk(clk), .ce(ce), .i(xa_gt_xb4), .o(xa_gt_xb8)); |
ft_delay #(.WID(1), .DEP(5)) udly8b (.clk(clk), .ce(ce), .i(a_gt_b3), .o(a_gt_b8)); |
ft_delay #(.WID(N*4), .DEP(6)) udly8d (.clk(clk), .ce(ce), .i(siga2), .o(siga8)); |
ft_delay #(.WID(N*4), .DEP(6)) udly8e (.clk(clk), .ce(ce), .i(sigb2), .o(sigb8)); |
ft_delay #(.WID(1), .DEP(5)) udly8j (.clk(clk), .ce(ce), .i(op3), .o(op8)); |
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #9 |
310,56 → 308,48
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
reg [(N+1)*4-1:0] oa9, ob9; |
reg a_gt_b9; |
always @(posedge clk) |
always_ff @(posedge clk) |
if (ce) oa9 <= xa_gt_xb8 ? {siga8,4'b0} : md8; |
always @(posedge clk) |
always_ff @(posedge clk) |
if (ce) ob9 <= xa_gt_xb8 ? md8 : {sigb8,4'b0}; |
always @(posedge clk) |
always_ff @(posedge clk) |
if (ce) a_gt_b9 <= a_gt_b8; |
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #10 |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
reg [(N+1)*4-1:0] oaa10; |
reg [(N+1)*4-1:0] obb10; |
wire realOp10; |
reg [13:0] xo10; |
|
always @(posedge clk) |
always_ff @(posedge clk) |
if (ce) oaa10 <= a_gt_b9 ? oa9 : ob9; |
always @(posedge clk) |
always_ff @(posedge clk) |
if (ce) obb10 <= a_gt_b9 ? ob9 : oa9; |
delay #(.WID(1), .DEP(8)) udly10a (.clk(clk), .ce(ce), .i(realOp2), .o(realOp10)); |
delay #(.WID(14), .DEP(6)) udly10b (.clk(clk), .ce(ce), .i(xo4), .o(xo10)); |
ft_delay #(.WID(1), .DEP(8)) udly10a (.clk(clk), .ce(ce), .i(realOp2), .o(realOp10)); |
ft_delay #(.WID(14), .DEP(6)) udly10b (.clk(clk), .ce(ce), .i(xo4), .o(xo10)); |
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #11 |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
reg [(N+1)*4-1:0] mab11; |
reg mab11c; |
wire [(N+1)*4-1:0] mab11; |
wire mab11c; |
wire [N*4-1:0] siga11, sigb11; |
wire abInf11; |
wire aNan11, bNan11; |
reg xoinf11; |
wire xoinf11; |
wire op11; |
|
always @(posedge clk) |
if (ce) mab11 <= realOp10 ? odd10 : oss10; |
always @(posedge clk) |
if (ce) mab11c <= realOp10 ? odd10c : oss10c; |
ft_delay #(.WID(1), .DEP(8+RIP_STAGES)) udly11a (.clk(clk), .ce(ce), .i(aInf3&bInf3), .o(abInf11)); |
ft_delay #(.WID(1), .DEP(10+RIP_STAGES)) udly11c (.clk(clk), .ce(ce), .i(au.nan), .o(aNan11)); |
ft_delay #(.WID(1), .DEP(10+RIP_STAGES)) udly11d (.clk(clk), .ce(ce), .i(bu.nan), .o(bNan11)); |
ft_delay #(.WID(1), .DEP(3+RIP_STAGES)) udly11e (.clk(clk), .ce(ce), .i(op8), .o(op11)); |
ft_delay #(.WID(N*4), .DEP(3+RIP_STAGES)) udly11f (.clk(clk), .ce(ce), .i(siga8), .o(siga11)); |
ft_delay #(.WID(N*4), .DEP(3+RIP_STAGES)) udly11g (.clk(clk), .ce(ce), .i(sigb8), .o(sigb11)); |
ft_delay #(.WID(1), .DEP(1+RIP_STAGES)) udly11h (.clk(clk), .ce(ce), .i(xo10==14'h2FFF), .o(xoinf11)); |
ft_delay #(.WID((N+1)*4+1), .DEP(1+RIP_STAGES)) udly11i (.clk(clk), .ce(ce), .i(realOp10 ? {odd10c,odd10} : {oss10c,oss10}), .o({mab11c,mab11})); |
|
delay #(.WID(1), .DEP(8)) udly11a (.clk(clk), .ce(ce), .i(aInf3&bInf3), .o(abInf11)); |
delay #(.WID(1), .DEP(10)) udly11c (.clk(clk), .ce(ce), .i(aNan), .o(aNan11)); |
delay #(.WID(1), .DEP(10)) udly11d (.clk(clk), .ce(ce), .i(bNan), .o(bNan11)); |
delay #(.WID(1), .DEP(3)) udly11e (.clk(clk), .ce(ce), .i(op8), .o(op11)); |
delay #(.WID(N*4), .DEP(3)) udly11f (.clk(clk), .ce(ce), .i(siga8), .o(siga11)); |
delay #(.WID(N*4), .DEP(3)) udly11g (.clk(clk), .ce(ce), .i(sigb8), .o(sigb11)); |
|
always @(posedge clk) |
if (ce) xoinf11 <= xo10==14'h2FFF; |
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #12 |
// Clock #12+RIP_STAGES |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
reg [(N+1)*4*2-1:0] mo12; // mantissa output |
reg nan12; |
367,14 → 357,14
reg infinity12; |
wire sxo11; |
wire so11; |
delay #(.WID(1), .DEP(9)) udly12a (.clk(clk), .ce(ce), .i(sxo2), .o(sxo11)); |
delay #(.WID(1), .DEP(7)) udly12b (.clk(clk), .ce(ce), .i(so4), .o(so11)); |
ft_delay #(.WID(1), .DEP(9)) udly12a (.clk(clk), .ce(ce), .i(sxo2), .o(sxo11)); |
ft_delay #(.WID(1), .DEP(7)) udly12b (.clk(clk), .ce(ce), .i(so4), .o(so11)); |
|
always @(posedge clk) |
always_ff @(posedge clk) |
if (ce) |
nan12 <= aNan11|bNan11; |
|
always @(posedge clk) |
always_ff @(posedge clk) |
if (ce) begin |
infinity12 <= 1'b0; |
qnan12 <= 1'b0; |
402,13 → 392,13
wire [15:0] xo; // de normalized exponent output |
wire [(N+1)*4*2-1:0] mo; // mantissa output |
|
delay #(.WID(1), .DEP(1)) u13c (.clk(clk), .ce(ce), .i(nan12), .o(o.nan) ); |
delay #(.WID(1), .DEP(1)) u13d (.clk(clk), .ce(ce), .i(qnan12), .o(o.qnan) ); |
delay #(.WID(1), .DEP(1)) u13e (.clk(clk), .ce(ce), .i(infinity12), .o(o.infinity) ); |
delay #(.WID(1), .DEP(9)) udly13a (.clk(clk), .ce(ce), .i(so4), .o(o.sign)); |
delay #(.WID(14), .DEP(3)) udly13b (.clk(clk), .ce(ce), .i(xo10), .o(o.exp)); |
delay #(.WID((N+1)*4*2), .DEP(1)) u13f (.clk(clk), .ce(ce), .i(mo12), .o(o.sig)); |
delay #(.WID(1), .DEP(1)) udly13g (.clk(clk), .ce(ce), .i(1'b0), .o(o.snan)); |
ft_delay #(.WID(1), .DEP(1)) u13c (.clk(clk), .ce(ce), .i(nan12), .o(o.nan) ); |
ft_delay #(.WID(1), .DEP(1)) u13d (.clk(clk), .ce(ce), .i(qnan12), .o(o.qnan) ); |
ft_delay #(.WID(1), .DEP(1)) u13e (.clk(clk), .ce(ce), .i(infinity12), .o(o.infinity) ); |
ft_delay #(.WID(1), .DEP(9)) udly13a (.clk(clk), .ce(ce), .i(so4), .o(o.sign)); |
ft_delay #(.WID(14), .DEP(3)) udly13b (.clk(clk), .ce(ce), .i(xo10), .o(o.exp)); |
ft_delay #(.WID((N+1)*4*2), .DEP(1)) u13f (.clk(clk), .ce(ce), .i(mo12), .o(o.sig)); |
ft_delay #(.WID(1), .DEP(1)) udly13g (.clk(clk), .ce(ce), .i(1'b0), .o(o.snan)); |
|
endmodule |
|
/trunk/rtl/verilog2/DFPMultiply128.sv
0,0 → 1,246
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2020-2022 Robert Finch, Waterloo |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
// |
// DFPMultiply128.v |
// - decimal floating point multiplier |
// - parameterized width |
// |
// |
// BSD 3-Clause License |
// Redistribution and use in source and binary forms, with or without |
// modification, are permitted provided that the following conditions are met: |
// |
// 1. Redistributions of source code must retain the above copyright notice, this |
// list of conditions and the following disclaimer. |
// |
// 2. Redistributions in binary form must reproduce the above copyright notice, |
// this list of conditions and the following disclaimer in the documentation |
// and/or other materials provided with the distribution. |
// |
// 3. Neither the name of the copyright holder nor the names of its |
// contributors may be used to endorse or promote products derived from |
// this software without specific prior written permission. |
// |
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
// |
// |
// Floating Point Multiplier |
// |
// Properties: |
// +-inf * +-inf = -+inf (this is handled by exOver) |
// +-inf * 0 = QNaN |
// |
// ============================================================================ |
|
import DFPPkg::*; |
|
//`define DFPMUL_PARALLEL 1'b1 |
|
module DFPMultiply128(clk, ce, ld, a, b, o, sign_exe, inf, overflow, underflow, done); |
localparam N=34; |
localparam DELAY = 2; |
input clk; |
input ce; |
input ld; |
input DFP128 a, b; |
output DFP128UD o; |
output sign_exe; |
output inf; |
output overflow; |
output underflow; |
output done; |
|
reg [13:0] xo1; // extra bit for sign |
reg [N*4*2-1:0] mo1; |
|
// constants |
wire [13:0] infXp = 14'h2FFF; // infinite / NaN - all ones |
wire [13:0] bias = 14'h17FF; |
// The following is the value for an exponent of zero, with the offset |
// eg. 8'h7f for eight bit exponent, 11'h7ff for eleven bit exponent, etc. |
// The following is a template for a quiet nan. (MSB=1) |
wire [N*4-1:0] qNaN = {4'h1,{104{1'b0}}}; |
|
// variables |
reg [N*4*2-1:0] sig1; |
wire [13:0] ex2; |
|
DFP128U au, bu; |
DFPUnpack128 u01 (a, au); |
DFPUnpack128 u02 (b, bu); |
|
// Decompose the operands |
wire sa, sb; // sign bit |
wire [13:0] xa, xb; // exponent bits |
wire sxa, sxb; |
wire [N*4-1:0] siga, sigb; |
wire a_dn, b_dn; // a/b is denormalized |
wire aNan1, bNan1; |
wire az, bz; |
wire aInf1, bInf1; |
|
assign siga = au.sig; |
assign sigb = bu.sig; |
assign az = au.exp==14'h0 && au.sig==136'd0; |
assign bz = bu.exp==14'h0 && bu.sig==136'd0; |
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #1 |
// - decode the input operands |
// - derive basic information |
// - calculate exponent |
// - calculate fraction |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
// ----------------------------------------------------------- |
// First clock |
// Compute the sum of the exponents. |
// ----------------------------------------------------------- |
|
wire under, over; |
wire [15:0] sum_ex = au.exp + bu.exp - bias; |
reg [15:0] sum_ex; |
reg sx0; |
wire done1; |
assign under = &sum_ex[15:14]; |
assign over = sum_ex > 16'h2FFF; |
|
wire [N*4*2-1:0] sigoo; |
`ifdef DFPMUL_PARALLEL |
BCDMul32 u1f (.a({20'h0,siga}),.b({20'h0,sigb}),.o(sigoo)); |
`else |
dfmul #(.N(N)) u1g |
( |
.clk(clk), |
.ld(ld), |
.a(siga), |
.b(sigb), |
.p(sigoo), |
.done(done1) |
); |
`endif |
|
always @(posedge clk) |
if (ce) sig1 <= sigoo[N*4*2-1:0]; |
|
// Status |
wire under1, over1; |
|
ft_delay #(.WID(14),.DEP(DELAY)) u3 (.clk(clk), .ce(ce), .i(sum_ex[13:0]), .o(ex2) ); |
ft_delay #(.WID(1),.DEP(DELAY)) u2a (.clk(clk), .ce(ce), .i(au.infinity), .o(aInf1) ); |
ft_delay #(.WID(1),.DEP(DELAY)) u2b (.clk(clk), .ce(ce), .i(bu.infinity), .o(bInf1) ); |
ft_delay #(.WID(1),.DEP(DELAY)) u6 (.clk(clk), .ce(ce), .i(under), .o(under1) ); |
ft_delay #(.WID(1),.DEP(DELAY)) u7 (.clk(clk), .ce(ce), .i(over), .o(over1) ); |
|
// determine when a NaN is output |
wire qNaNOut; |
wire DFP128U a1,b1; |
wire asnan, bsnan, aqnan, bqnan; |
ft_delay #(.WID(1),.DEP(DELAY)) u5 (.clk(clk), .ce(ce), .i((au.infinity&bz)|(bu.infinity&az)), .o(qNaNOut) ); |
ft_delay #(.WID(1),.DEP(DELAY)) u14 (.clk(clk), .ce(ce), .i(au.nan), .o(aNan1) ); |
ft_delay #(.WID(1),.DEP(DELAY)) u15 (.clk(clk), .ce(ce), .i(bu.nan), .o(bNan1) ); |
ft_delay #(.WID(1),.DEP(DELAY)) u18 (.clk(clk), .ce(ce), .i(au.snan), .o(asnan) ); |
ft_delay #(.WID(1),.DEP(DELAY)) u19 (.clk(clk), .ce(ce), .i(bu.snan), .o(bsnan) ); |
ft_delay #(.WID(1),.DEP(DELAY)) u18a (.clk(clk), .ce(ce), .i(au.qnan), .o(aqnan) ); |
ft_delay #(.WID(1),.DEP(DELAY)) u19a (.clk(clk), .ce(ce), .i(bu.qnan), .o(bqnan) ); |
ft_delay #(.WID($bits(a1)),.DEP(DELAY)) u16 (.clk(clk), .ce(ce), .i(a), .o(a1) ); |
ft_delay #(.WID($bits(b1)),.DEP(DELAY)) u17 (.clk(clk), .ce(ce), .i(b), .o(b1) ); |
|
// ----------------------------------------------------------- |
// Second clock |
// - correct xponent and mantissa for exceptional conditions |
// ----------------------------------------------------------- |
|
wire so1, sx1; |
reg [3:0] st; |
wire done1a; |
|
ft_delay #(.WID(1),.DEP(1)) u8 (.clk(clk), .ce(ce), .i(au.sign ^ bu.sign), .o(so1) );// two clock delay! |
|
always @(posedge clk) |
if (ce) |
casez({qNaNOut|aNan1|bNan1,aInf1,bInf1,over1,under1}) |
5'b1????: xo1 = infXp; // qNaN - infinity * zero |
5'b01???: xo1 = infXp; // 'a' infinite |
5'b001??: xo1 = infXp; // 'b' infinite |
5'b0001?: xo1 = infXp; // result overflow |
5'b00001: xo1 = ex2[13:0];//0; // underflow |
default: xo1 = ex2[13:0]; // situation normal |
endcase |
|
// Force mantissa to zero when underflow or zero exponent when not supporting denormals. |
always @(posedge clk) |
if (ce) |
casez({aNan1,bNan1,qNaNOut,aInf1,bInf1,over1|under1}) |
6'b1?????: mo1 = {4'h1,a1[N*4-4-1:0],{N*4{1'b0}}}; |
6'b01????: mo1 = {4'h1,b1[N*4-4-1:0],{N*4{1'b0}}}; |
6'b001???: mo1 = {4'h1,qNaN|3'd4,{N*4{1'b0}}}; // multiply inf * zero |
6'b0001??: mo1 = 0; // mul inf's |
6'b00001?: mo1 = 0; // mul inf's |
6'b000001: mo1 = 0; // mul overflow |
default: mo1 = sig1; |
endcase |
|
ft_delay #(.WID(1),.DEP(DELAY+1)) u10 (.clk(clk), .ce(ce), .i(sa & sb), .o(sign_exe) ); |
delay1 u11 (.clk(clk), .ce(ce), .i(over1), .o(overflow) ); |
delay1 u12 (.clk(clk), .ce(ce), .i(over1), .o(inf) ); |
delay1 u13 (.clk(clk), .ce(ce), .i(under1), .o(underflow) ); |
ft_delay #(.WID(1),.DEP(3)) u18b (.clk(clk), .ce(ce), .i(done1), .o(done1a) ); |
|
assign o.nan = aNan1|bNan1|qNaNOut; |
assign o.qnan = qNaNOut|aqnan|bqnan; |
assign o.snan = qNaNOut ? 1'b0 : asnan|bsnan; |
assign o.infinity = aInf1|bInf1|over; |
assign o.sign = so1; |
assign o.exp = xo1; |
assign o.sig = {mo1,8'h00}; |
assign done = done1&done1a; |
|
endmodule |
|
|
// Multiplier with normalization and rounding. |
|
module DFPMultiply128nr(clk, ce, ld, a, b, o, rm, sign_exe, inf, overflow, underflow, done); |
localparam N=34; |
input clk; |
input ce; |
input ld; |
input DFP128 a, b; |
output DFP128 o; |
input [2:0] rm; |
output sign_exe; |
output inf; |
output overflow; |
output underflow; |
output done; |
|
wire done1, done1a; |
DFP128UD o1; |
wire sign_exe1, inf1, overflow1, underflow1; |
DFP128UN fpn0; |
|
DFPMultiply128 u1 (clk, ce, ld, a, b, o1, sign_exe1, inf1, overflow1, underflow1, done1); |
DFPNormalize128 u2(.clk(clk), .ce(ce), .under_i(underflow1), .i(o1), .o(fpn0) ); |
DFPRound128 u3(.clk(clk), .ce(ce), .rm(rm), .i(fpn0), .o(o) ); |
delay2 #(1) u4(.clk(clk), .ce(ce), .i(sign_exe1), .o(sign_exe)); |
delay2 #(1) u5(.clk(clk), .ce(ce), .i(inf1), .o(inf)); |
delay2 #(1) u6(.clk(clk), .ce(ce), .i(overflow1), .o(overflow)); |
delay2 #(1) u7(.clk(clk), .ce(ce), .i(underflow1), .o(underflow)); |
ft_delay #(.WID(1),.DEP(12)) u10 (.clk(clk), .ce(ce), .i(done1), .o(done1a) ); |
assign done = done1 & done1a; |
|
endmodule |
/trunk/rtl/verilog2/DFPRound128.sv
1,6 → 1,6
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2006-2020 Robert Finch, Waterloo |
// \\__/ o\ (C) 2006-2021 Robert Finch, Waterloo |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
52,7 → 52,7
input ce; |
input [2:0] rm; // rounding mode |
input DFP128UN i; // intermediate format input |
output DFP128U o; // rounded output |
output DFP128 o; // packed rounded output |
|
parameter ROUND_CEILING = 3'd0; |
parameter ROUND_FLOOR = 3'd1; |
71,16 → 71,7
reg [N*4-1:0] mo1; |
wire xInf = i.exp==14'h3FFF; |
wire so0 = i.sign; |
assign o = {so,xo,mo}; |
|
assign o.nan = nano; |
assign o.qnan = qnano; |
assign o.snan = snano; |
assign o.infinity = infinity; |
assign o.sign = so; |
assign o.exp = xo; |
assign o.sig = mo; |
|
wire [3:0] l = i.sig[7:4]; |
wire [3:0] r = i.sig[3:0]; |
|
180,4 → 171,24
4'b11??: mo <= {N*4{1'd0}}; // number became infinite, no need to check carry etc., rnd would be zero if input was NaN or infinite |
endcase |
|
//------------------------------------------------------------ |
// Clock #4 |
// - Pack output |
//------------------------------------------------------------ |
|
DFP128U o1; |
DFP128 o2; |
|
assign o1.nan = nano; |
assign o1.qnan = qnano; |
assign o1.snan = snano; |
assign o1.infinity = infinity; |
assign o1.sign = so; |
assign o1.exp = xo; |
assign o1.sig = mo; |
|
DFPPack128 u41 (o1, o2); |
always_ff @(posedge clk) |
if (ce) o <= o2; |
|
endmodule |
/trunk/rtl/verilog2/df128Toi.sv
59,7 → 59,7
DFPUnpack128 uunpk1 (i, ui); |
|
wire [127:0] maxInt = op ? {1'd0,{127{1'b1}}} : {128{1'b1}}; // maximum integer value |
wire [13:0] zeroXp = {1'd0,{13{1'b1}}}; |
wire [13:0] zeroXp = 14'h17FF; |
|
reg sgn; // sign |
always @(posedge clk) |
/trunk/rtl/verilog2/i2df128.sv
66,7 → 66,7
); |
|
DFP128U ui; |
wire [13:0] zeroXp = {13{1'b1}}; |
wire [13:0] zeroXp = 14'h17FF; |
|
reg iz; // zero input ? |
wire [7:0] lz; // count the leading zeros in the number |
/trunk/test_bench/DFPAddsub128_tb.v
0,0 → 1,149
`timescale 1ns / 1ps |
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2006-2022 Robert Finch, Waterloo |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
// |
// DFPAddsub128_tb.v |
// - decimal floating point addsub test bench |
// |
// BSD 3-Clause License |
// Redistribution and use in source and binary forms, with or without |
// modification, are permitted provided that the following conditions are met: |
// |
// 1. Redistributions of source code must retain the above copyright notice, this |
// list of conditions and the following disclaimer. |
// |
// 2. Redistributions in binary form must reproduce the above copyright notice, |
// this list of conditions and the following disclaimer in the documentation |
// and/or other materials provided with the distribution. |
// |
// 3. Neither the name of the copyright holder nor the names of its |
// contributors may be used to endorse or promote products derived from |
// this software without specific prior written permission. |
// |
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
// |
// ============================================================================ |
|
module DFPAddsub128_tb(); |
reg rst; |
reg clk; |
reg [15:0] adr; |
reg [127:0] a,b; |
wire [127:0] o; |
reg [127:0] ad,bd; |
reg [127:0] od; |
reg [3:0] rm; |
|
integer n; |
reg [127:0] a1, b1; |
wire [63:0] doubleA = {a[31], a[30], {3{~a[30]}}, a[29:23], a[22:0], {29{1'b0}}}; |
wire [63:0] doubleB = {b[31], b[30], {3{~b[30]}}, b[29:23], b[22:0], {29{1'b0}}}; |
|
integer outfile; |
|
initial begin |
rst = 1'b0; |
clk = 1'b0; |
adr = 0; |
a = $urandom(1); |
b = 1; |
#20 rst = 1; |
#50 rst = 0; |
#10000000 $fclose(outfile); |
#10 $finish; |
end |
|
always #5 |
clk = ~clk; |
|
genvar g; |
generate begin : gRand |
for (g = 0; g < 128; g = g + 4) begin |
always @(posedge clk) begin |
a1[g+3:g] <= $urandom() % 10; |
b1[g+3:g] <= $urandom() % 10; |
end |
end |
end |
endgenerate |
|
reg [7:0] count; |
always @(posedge clk) |
if (rst) begin |
adr <= 0; |
count <= 0; |
end |
else |
begin |
if (adr==0) begin |
outfile = $fopen("d:/cores2022/rf6809/rtl/dfpu/test_bench/DFPAddsub128_tvo.txt", "wb"); |
$fwrite(outfile, " rm ------- A ------ ------- B ------ ------ sum ----- -- SIM Sum --\n"); |
end |
count <= count + 1; |
if (count > 35) |
count <= 1'd1; |
if (count==2) begin |
a <= a1; |
b <= b1; |
a[127:124] <= 4'h5; |
b[127:124] <= 4'h5; |
rm <= adr[14:12]; |
//ad <= memd[adr][63: 0]; |
//bd <= memd[adr][127:64]; |
end |
if (adr==1 && count==2) begin |
a <= 128'h50000700000000000000000000000000; |
b <= 128'h50000200000000000000000000000000; |
end |
if (adr==2 && count==2) begin |
a <= 128'h50000900000000000000000000000000; |
b <= 128'h50000200000000000000000000000000; |
end |
if (adr==3 && count==2) begin |
a <= 128'h50000000000000000000000000000000; |
b <= 128'h50000000000000000000000000000000; |
end |
if (adr==4 && count==2) begin |
a <= 128'h44000000000000000000000000000000; |
b <= 128'h44000000000000000000000000000000; |
end |
if (adr==5 && count==2) begin |
a <= 128'h44004000000000000000000000000000; |
b <= 128'h44004000000000000000000000000000; |
end |
if (count==35) begin |
if (adr[11]) begin |
$fwrite(outfile, "%c%h\t%h\t%h\t%h\n", "-",rm, a, b, o); |
end |
else begin |
$fwrite(outfile, "%c%h\t%h\t%h\t%h\n", "+",rm, a, b, o); |
end |
adr <= adr + 1; |
end |
end |
|
//fpMulnr #(64) u1 (clk, 1'b1, a, b, o, rm);//, sign_exe, inf, overflow, underflow); |
DFPAddsub128nr u6 ( |
.clk(clk), |
.ce(1'b1), |
.op(adr[11]), |
.a(a), |
.b(b), |
.o(o), |
.rm(rm) |
); |
|
endmodule |
/trunk/test_bench/DFPMultiply128_tb.v
0,0 → 1,139
`timescale 1ns / 1ps |
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2006-2022 Robert Finch, Waterloo |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
// |
// DFPMultiply128_tb.v |
// - decimal floating point multiplier test bench |
// |
// This source file is free software: you can redistribute it and/or modify |
// it under the terms of the GNU Lesser General Public License as published |
// by the Free Software Foundation, either version 3 of the License, or |
// (at your option) any later version. |
// |
// This source file is distributed in the hope that it will be useful, |
// but WITHOUT ANY WARRANTY; without even the implied warranty of |
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
// GNU General Public License for more details. |
// |
// You should have received a copy of the GNU General Public License |
// along with this program. If not, see <http://www.gnu.org/licenses/>. |
// |
// Floating Point Multiplier / Divider |
// |
// This multiplier/divider handles denormalized numbers. |
// The output format is of an internal expanded representation |
// in preparation to be fed into a normalization unit, then |
// rounding. Basically, it's the same as the regular format |
// except the mantissa is doubled in size, the leading two |
// bits of which are assumed to be whole bits. |
// |
// |
// ============================================================================ |
|
module DFPMultiply128_tb(); |
parameter N=33; |
reg rst; |
reg clk; |
reg [15:0] adr; |
reg [127:0] a,b; |
wire [127:0] o; |
reg [3:0] rm; |
|
integer n; |
reg [127:0] a1, b1; |
wire done; |
reg ld; |
|
integer outfile; |
|
initial begin |
rst = 1'b0; |
clk = 1'b0; |
adr = 0; |
a = $urandom(1); |
#20 rst = 1; |
#50 rst = 0; |
#2000000 $fclose(outfile); |
#10 $finish; |
end |
|
always #5 |
clk = ~clk; |
|
genvar g; |
generate begin : gRand |
for (g = 0; g < N*4+16+4; g = g + 4) begin |
always @(posedge clk) begin |
a1[g+3:g] <= $urandom() % 16; |
b1[g+3:g] <= $urandom() % 16; |
end |
end |
end |
endgenerate |
|
reg [9:0] count; |
always @(posedge clk) |
if (rst) begin |
adr <= 0; |
count <= 0; |
end |
else |
begin |
ld <= 1'b0; |
if (adr==0) begin |
outfile = $fopen("d:/cores2022/rf6809/rtl/dfpu/test_bench/DFPMultiply128_tvo.txt", "wb"); |
$fwrite(outfile, "rm ------ A ------ ------- B ------ - DUT Product - - SIM Product -\n"); |
end |
count <= count + 1; |
if (count > 750) |
count <= 1'd1; |
if (count==2) begin |
a <= a1; |
b <= b1; |
rm <= adr[15:13]; |
ld <= 1'b1; |
//ad <= memd[adr][63: 0]; |
//bd <= memd[adr][127:64]; |
end |
if (adr==1 && count==2) begin |
a <= 128'h25ffc000000000000000000000000000; // 1 |
b <= 128'h25ffc000000000000000000000000000; // 1 |
end |
if (adr==2 && count==2) begin |
a <= 128'h26000000000000000000000000000000; // 10 |
b <= 128'h26000000000000000000000000000000; // 10 |
end |
if (adr==3 && count==2) begin |
a <= 128'h26004000000000000000000000000000; // 100 |
b <= 128'h26004000000000000000000000000000; // 100 |
end |
if (adr==4 && count==2) begin |
a <= 128'h26008000000000000000000000000000; // 1000 |
b <= 128'h26008000000000000000000000000000; // 1000 |
end |
if (adr==5 && count==2) begin |
a <= 128'h2601934B9C0C00000000000000000000; // 12345678 |
b <= 128'h26000000000000000000000000000000; // 10 |
end |
if (adr==6 && count==2) begin |
a <= 128'h44000000000000000000000000000000; |
b <= 128'h44000000000000000000000000000000; |
end |
if (adr==7 && count==2) begin |
a <= 128'h44004000000000000000000000000000; |
b <= 128'h44004000000000000000000000000000; |
end |
if (count==750) begin |
$fwrite(outfile, "%h\t%h\t%h\t%h\n", rm, a, b, o); |
adr <= adr + 1; |
end |
end |
|
//fpMulnr #(64) u1 (clk, 1'b1, a, b, o, rm);//, sign_exe, inf, overflow, underflow); |
DFPMultiply128nr u6 (clk, 1'b1, ld, a, b, o, rm, done);//, sign_exe, inf, overflow, underflow); |
|
endmodule |
/trunk/test_bench/df128Toi_tb.sv
89,19 → 89,19
if (count > 140) |
count <= 1'd1; |
if (adr==1) begin |
flt <= 128'h27ffc000000000000000000000000000; |
flt <= 128'h25ffc000000000000000000000000000; // 1 |
end |
if (adr==2) begin |
flt <= 128'h44000000000000000000000000000000; |
flt <= 128'h26000000000000000000000000000000; // 10 |
end |
if (adr==3) begin |
flt <= 128'h44004000000000000000000000000000; |
flt <= 128'h26004000000000000000000000000000; // 100 |
end |
if (adr==4) begin |
flt <= 128'h44008000000000000000000000000000; |
flt <= 128'h26008000000000000000000000000000; // 1000 |
end |
if (adr==5) begin |
flt <= 128'h48000000000000000000000000000000; |
flt <= 128'h2601934B9C0C00000000000000000000; // 12345678 |
end |
if (count==140) begin |
$fwrite(outfile, "%c %h\t%h%c\n", adr[11] ? "s" : "u", flt, bin, vf ? "v": " "); |