URL
https://opencores.org/ocsvn/ft816float/ft816float/trunk
Subversion Repositories ft816float
Compare Revisions
- This comparison shows the changes necessary to convert path
/ft816float/trunk
- from Rev 54 to Rev 55
- ↔ Reverse comparison
Rev 54 → Rev 55
/rtl/verilog2/DFPAddsub.sv
36,13 → 36,14
// ============================================================================ |
|
module DFPAddsub(clk, ce, rm, op, a, b, o); |
parameter N=33; |
input clk; |
input ce; |
input [2:0] rm; |
input op; |
input [127:0] a; |
input [127:0] b; |
output [243:0] o; |
input [N*4+16+4-1:0] a; |
input [N*4+16+4-1:0] b; |
output [(N+1)*4*2+16+4-1:0] o; |
|
parameter TRUE = 1'b1; |
parameter FALSE = 1'b0; |
54,7 → 55,7
wire ainf, binf; |
wire aNan, bNan; |
wire [15:0] xa, xb; |
wire [107:0] siga, sigb; |
wire [N*4-1:0] siga, sigb; |
|
wire [15:0] xabdif4; |
BCDSub4 ubcds1( |
66,10 → 67,10
.c8() |
); |
|
wire [111:0] oss10; |
wire [(N+1)*4-1:0] oss10; |
wire oss10c; |
|
BCDAddN #(.N(28)) ubcdan1 |
BCDAddN #(.N(N+1)) ubcdan1 |
( |
.ci(1'b0), |
.a(oaa10), |
78,10 → 79,10
.co(oss10c) |
); |
|
wire [111:0] odd10; |
wire [(N+1)*4-1:0] odd10; |
wire odd10c; |
|
BCDSubN #(.N(28)) ubcdsn1 |
BCDSubN #(.N(N+1)) ubcdsn1 |
( |
.ci(1'b0), |
.a(oaa10), |
96,8 → 97,8
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
reg op1; |
|
DFPDecomposeReg u1a (.clk(clk), .ce(ce), .i(a), .sgn(sa), .sx(sxa), .exp(xa), .sig(siga), .xz(adn), .vz(az), .inf(aInf), .nan(aNan) ); |
DFPDecomposeReg u1b (.clk(clk), .ce(ce), .i(b), .sgn(sb), .sx(sxb), .exp(xb), .sig(sigb), .xz(bdn), .vz(bz), .inf(bInf), .nan(bNan) ); |
DFPDecomposeReg #(.N(N)) u1a (.clk(clk), .ce(ce), .i(a), .sgn(sa), .sx(sxa), .exp(xa), .sig(siga), .xz(adn), .vz(az), .inf(aInf), .nan(aNan) ); |
DFPDecomposeReg #(.N(N)) u1b (.clk(clk), .ce(ce), .i(b), .sgn(sb), .sx(sxb), .exp(xb), .sig(sigb), .xz(bdn), .vz(bz), .inf(bInf), .nan(bNan) ); |
|
always @(posedge clk) |
if (ce) op1 <= op; |
122,7 → 123,7
reg [15:0] xa2, xb2; |
reg az2, bz2; |
reg xa_gt_xb2; |
reg [107:0] siga2, sigb2; |
reg [N*4-1:0] siga2, sigb2; |
reg sigeq, siga_gt_sigb; |
reg xa_gt_xb2; |
reg expeq; |
178,7 → 179,7
reg op3; |
wire sa3, sb3; |
wire [2:0] rm3; |
reg [107:0] mfs3; |
reg [N*4-1:0] mfs3; |
|
always @(posedge clk) |
if (ce) resZero3 <= (realOp2 & expeq & sigeq) || // subtract, same magnitude |
230,22 → 231,22
reg so4; |
always @* |
case ({resZero3,sa3,op3,sb3}) // synopsys full_case parallel_case |
4'b0000: so4 <= 0; // + + + = + |
4'b0001: so4 <= !a_gt_b3; // + + - = sign of larger |
4'b0010: so4 <= !a_gt_b3; // + - + = sign of larger |
4'b0011: so4 <= 0; // + - - = + |
4'b0100: so4 <= a_gt_b3; // - + + = sign of larger |
4'b0101: so4 <= 1; // - + - = - |
4'b0110: so4 <= 1; // - - + = - |
4'b0111: so4 <= a_gt_b3; // - - - = sign of larger |
4'b1000: so4 <= 0; // A + B, sign = + |
4'b1001: so4 <= rm3==3'd3; // A + -B, sign = + unless rounding down |
4'b1010: so4 <= rm3==3'd3; // A - B, sign = + unless rounding down |
4'b1011: so4 <= 0; // +A - -B, sign = + |
4'b1100: so4 <= rm3==3'd3; // -A + B, sign = + unless rounding down |
4'b1101: so4 <= 1; // -A + -B, sign = - |
4'b1110: so4 <= 1; // -A - +B, sign = - |
4'b1111: so4 <= rm3==3'd3; // -A - -B, sign = + unless rounding down |
4'b0000: so4 <= 0; // - + - = - |
4'b0001: so4 <= !a_gt_b3; // - + + = sign of larger |
4'b0010: so4 <= !a_gt_b3; // - - - = sign of larger |
4'b0011: so4 <= 0; // - - + = - |
4'b0100: so4 <= a_gt_b3; // + + - = sign of larger |
4'b0101: so4 <= 1; // + + + = + |
4'b0110: so4 <= 1; // + - - = + |
4'b0111: so4 <= a_gt_b3; // + - + = sign of larger |
4'b1000: so4 <= 0; // -A + -B, sign = - |
4'b1001: so4 <= (rm3==3'd3); // -A + B, sign = + unless rounding down |
4'b1010: so4 <= (rm3==3'd3); // -A - -B, sign = + unless rounding down |
4'b1011: so4 <= 0; // -A - B, sign = - |
4'b1100: so4 <= (rm3==3'd3); // A - B, sign = + unless rounding down |
4'b1101: so4 <= 1; // A + B, sign = + |
4'b1110: so4 <= 1; // A - -B, sign = + |
4'b1111: so4 <= (rm3==3'd3); // -A - -B, sign = + unless rounding down |
endcase |
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
266,10 → 267,10
// less) then all of the bits will be shifted out to zero. There is no need to |
// keep track of a difference more than 24. |
reg [11:0] xdif6; |
wire [107:0] mfs6; |
wire [N*4-1:0] mfs6; |
always @(posedge clk) |
if (ce) xdif6 <= xdiff5 > 16'h0024 ? 8'h24 : xdiff5[7:0]; |
delay #(.WID(108), .DEP(3)) udly6a (.clk(clk), .ce(ce), .i(mfs3), .o(mfs6)); |
if (ce) xdif6 <= xdiff5 > N ? N : xdiff5[7:0]; |
delay #(.WID(N*4), .DEP(3)) udly6a (.clk(clk), .ce(ce), .i(mfs3), .o(mfs6)); |
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #7 |
281,12 → 282,12
reg sticky6; |
wire sticky7; |
wire [7:0] xdif7; |
wire [107:0] mfs7; |
wire [N*4-1:0] mfs7; |
wire [7:0] xdif6a = {xdif6[7:4] * 10 + xdif6[3:0],2'b00}; // Convert base then *4 |
integer n; |
always @* begin |
sticky6 = 1'b0; |
for (n = 0; n < 96; n = n + 4) |
for (n = 0; n < N*4; n = n + 4) |
if (n <= xdif6a) |
sticky6 = sticky6| mfs6[n]|mfs6[n+1]|mfs6[n+2]|mfs6[n+3]; // non-zeero nybble |
end |
294,13 → 295,13
// register inputs to shifter and shift |
delay1 #(1) d16(.clk(clk), .ce(ce), .i(sticky6), .o(sticky7) ); |
delay1 #(8) d15(.clk(clk), .ce(ce), .i(xdif6a), .o(xdif7) ); |
delay1 #(108) d14(.clk(clk), .ce(ce), .i(mfs6), .o(mfs7) ); |
delay1 #(N*4) d14(.clk(clk), .ce(ce), .i(mfs6), .o(mfs7) ); |
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #8 |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
reg [111:0] md8; |
wire [107:0] siga8, sigb8; |
reg [(N+1)*4-1:0] md8; |
wire [N*4-1:0] siga8, sigb8; |
wire xa_gt_xb8; |
wire a_gt_b8; |
always @(posedge clk) |
309,8 → 310,8
// sync control signals |
delay #(.WID(1), .DEP(4)) udly8a (.clk(clk), .ce(ce), .i(xa_gt_xb4), .o(xa_gt_xb8)); |
delay #(.WID(1), .DEP(5)) udly8b (.clk(clk), .ce(ce), .i(a_gt_b3), .o(a_gt_b8)); |
delay #(.WID(108), .DEP(6)) udly8d (.clk(clk), .ce(ce), .i(siga2), .o(siga8)); |
delay #(.WID(108), .DEP(6)) udly8e (.clk(clk), .ce(ce), .i(sigb2), .o(sigb8)); |
delay #(.WID(N*4), .DEP(6)) udly8d (.clk(clk), .ce(ce), .i(siga2), .o(siga8)); |
delay #(.WID(N*4), .DEP(6)) udly8e (.clk(clk), .ce(ce), .i(sigb2), .o(sigb8)); |
delay #(.WID(1), .DEP(5)) udly8j (.clk(clk), .ce(ce), .i(op3), .o(op8)); |
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
318,7 → 319,7
// Sort operands and perform add/subtract |
// addition can generate an extra bit, subtract can't go negative |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
reg [111:0] oa9, ob9; |
reg [(N+1)*4-1:0] oa9, ob9; |
reg a_gt_b9; |
always @(posedge clk) |
if (ce) oa9 <= xa_gt_xb8 ? {siga8,4'b0} : md8; |
330,8 → 331,8
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #10 |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
reg [111:0] oaa10; |
reg [111:0] obb10; |
reg [(N+1)*4-1:0] oaa10; |
reg [(N+1)*4-1:0] obb10; |
wire realOp10; |
reg [15:0] xo10; |
|
345,9 → 346,9
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #11 |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
reg [111:0] mab11; |
reg [(N+1)*4-1:0] mab11; |
reg mab11c; |
wire [107:0] siga11, sigb11; |
wire [N*4-1:0] siga11, sigb11; |
wire abInf11; |
wire aNan11, bNan11; |
reg xoinf11; |
362,8 → 363,8
delay #(.WID(1), .DEP(10)) udly11c (.clk(clk), .ce(ce), .i(aNan), .o(aNan11)); |
delay #(.WID(1), .DEP(10)) udly11d (.clk(clk), .ce(ce), .i(bNan), .o(bNan11)); |
delay #(.WID(1), .DEP(3)) udly11e (.clk(clk), .ce(ce), .i(op8), .o(op11)); |
delay #(.WID(108), .DEP(3)) udly11f (.clk(clk), .ce(ce), .i(siga8), .o(siga11)); |
delay #(.WID(108), .DEP(3)) udly11g (.clk(clk), .ce(ce), .i(sigb8), .o(sigb11)); |
delay #(.WID(N*4), .DEP(3)) udly11f (.clk(clk), .ce(ce), .i(siga8), .o(siga11)); |
delay #(.WID(N*4), .DEP(3)) udly11g (.clk(clk), .ce(ce), .i(sigb8), .o(sigb11)); |
|
always @(posedge clk) |
if (ce) xoinf11 <= xo10==16'h9999; |
371,7 → 372,7
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #12 |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
reg [223:0] mo12; // mantissa output |
reg [(N+1)*4*2-1:0] mo12; // mantissa output |
reg [3:0] st12; |
wire sxo11; |
wire so11; |
391,13 → 392,13
casez({abInf11,aNan11,bNan11,xoinf11}) |
4'b1???: // inf +/- inf - generate QNaN on subtract, inf on add |
if (op11) |
mo12 <= {4'h9,220'd0}; |
mo12 <= {4'h9,{(N+1)*4*2-4{1'd0}}}; |
else |
mo12 <= {56{4'h9}}; |
4'b01??: mo12 <= {4'b0,siga11[107:0],112'd0}; |
4'b001?: mo12 <= {4'b0,sigb11[107:0],112'd0}; |
4'b0001: mo12 <= 224'd0; |
default: mo12 <= {3'b0,mab11c,mab11,108'd0}; // mab has an extra lead bit and four trailing bits |
mo12 <= {(N+1)*2{4'h9}}; |
4'b01??: mo12 <= {4'b0,siga11[107:0],{(N+1)*4{1'd0}}}; |
4'b001?: mo12 <= {4'b0,sigb11[107:0],{(N+1)*4{1'd0}}}; |
4'b0001: mo12 <= {(N+1)*4*2{1'd0}}; |
default: mo12 <= {3'b0,mab11c,mab11,{N*4{1'd0}}}; // mab has an extra lead bit and four trailing bits |
endcase |
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
406,12 → 407,12
wire so; // sign output |
wire [3:0] st; |
wire [15:0] xo; // de normalized exponent output |
wire [223:0] mo; // mantissa output |
wire [(N+1)*4*2-1:0] mo; // mantissa output |
|
delay #(.WID(4), .DEP(1)) u13c (.clk(clk), .ce(ce), .i(st12), .o(st[3:0]) ); |
delay #(.WID(1), .DEP(9)) udly13a (.clk(clk), .ce(ce), .i(so4), .o(so)); |
delay #(.WID(16), .DEP(3)) udly13b (.clk(clk), .ce(ce), .i(xo10), .o(xo)); |
delay #(.WID(224), .DEP(1)) u13d (.clk(clk), .ce(ce), .i(mo12), .o(mo) ); |
delay #(.WID((N+1)*4*2), .DEP(1)) u13d (.clk(clk), .ce(ce), .i(mo12), .o(mo) ); |
|
assign o = {st,xo,mo}; |
|
419,16 → 420,17
|
|
module DFPAddsubnr(clk, ce, rm, op, a, b, o); |
parameter N=33; |
input clk; // system clock |
input ce; // core clock enable |
input [2:0] rm; // rounding mode |
input op; // operation 0 = add, 1 = subtract |
input [127:0] a; // operand a |
input [127:0] b; // operand b |
output [127:0] o; // output |
input [N*4+16+4-1:0] a; // operand a |
input [N*4+16+4-1:0] b; // operand b |
output [N*4+16+4-1:0] o; // output |
|
wire [243:0] o1; |
wire [131:0] fpn0; |
wire [(N+1)*4*2+16+4-1:0] o1; |
wire [N*4+16+4-1+4:0] fpn0; |
|
DFPAddsub u1 (clk, ce, rm, op, a, b, o1); |
DFPNormalize u2(.clk(clk), .ce(ce), .under_i(1'b0), .i(o1), .o(fpn0) ); |
/rtl/verilog2/DFPDecompose.sv
36,22 → 36,23
// ============================================================================ |
|
module DFPDecompose(i, sgn, sx, exp, sig, xz, vz, inf, nan); |
input [127:0] i; |
parameter N=33; |
input [(N*4)+16+4-1:0] i; |
output sgn; |
output sx; |
output [15:0] exp; |
output [107:0] sig; |
output [N*4-1:0] sig; |
output xz; |
output vz; |
output inf; |
output nan; |
|
assign nan = i[127]; |
assign sgn = i[126]; |
assign inf = i[125]; |
assign sx = i[124]; |
assign exp = i[123:108]; |
assign sig = i[107:0]; |
assign nan = i[N*4+19]; |
assign sgn = i[N*4+18]; |
assign inf = i[N*4+17]; |
assign sx = i[N*4+16]; |
assign exp = i[N*4+15:N*4]; |
assign sig = i[N*4-1:0]; |
assign xz = ~|exp; |
assign vz = ~|{exp,sig}; |
|
59,13 → 60,14
|
|
module DFPDecomposeReg(clk, ce, i, sgn, sx, exp, sig, xz, vz, inf, nan); |
parameter N=33; |
input clk; |
input ce; |
input [127:0] i; |
input [N*4+16+4-1:0] i; |
output reg sgn; |
output reg sx; |
output reg [15:0] exp; |
output reg [107:0] sig; |
output reg [N*4-1:0] sig; |
output reg xz; |
output reg vz; |
output reg inf; |
73,12 → 75,12
|
always @(posedge clk) |
if (ce) begin |
nan <= i[127]; |
sgn <= i[126]; |
inf <= i[125]; |
sx <= i[124]; |
exp <= i[123:108]; |
sig <= i[107:0]; |
nan <= i[N*4+19]; |
sgn <= i[N*4+18]; |
inf <= i[N*4+17]; |
sx <= i[N*4+16]; |
exp <= i[N*4+15:N*4]; |
sig <= i[N*4-1:0]; |
xz <= ~|exp; |
vz <= ~|{exp,sig}; |
end |
/rtl/verilog2/DFPDivide.sv
47,6 → 47,7
import fp::*; |
|
module DFPDivide(rst, clk, ce, ld, op, a, b, o, done, sign_exe, overflow, underflow); |
parameter N=33; |
// FADD is a constant that makes the divider width a multiple of four and includes eight extra bits. |
input rst; |
input clk; |
53,8 → 54,8
input ce; |
input ld; |
input op; |
input [127:0] a, b; |
output [243:0] o; |
input [N*4+16+4-1:0] a, b; |
output [(N+1)*4*2+16+4-1:0] o; |
output reg done; |
output sign_exe; |
output overflow; |
69,7 → 70,7
reg so, sxo; |
reg [3:0] st; |
reg [15:0] xo; |
reg [223:0] mo; |
reg [(N+1)*4*2-1:0] mo; |
assign o = {st,xo,mo}; |
|
// constants |
77,16 → 78,16
// The following is the value for an exponent of zero, with the offset |
// eg. 8'h7f for eight bit exponent, 11'h7ff for eleven bit exponent, etc. |
// The following is a template for a quiet nan. (MSB=1) |
wire [107:0] qNaN = {4'h1,{104{1'b0}}}; |
wire [N*4-1:0] qNaN = {4'h1,{(N-1)*4{1'b0}}}; |
|
// variables |
wire [231:0] divo; |
wire [(N+2)*4*2-1:0] divo; |
|
// Operands |
wire sa, sb; // sign bit |
wire sxa, sxb; |
wire [15:0] xa, xb; // exponent bits |
wire [107:0] siga, sigb; |
wire [N*4-1:0] siga, sigb; |
wire a_dn, b_dn; // a/b is denormalized |
wire az, bz; |
wire aInf, bInf; |
111,9 → 112,9
// ----------------------------------------------------------- |
wire done3a,done3; |
// Perform divide |
dfdiv #(108+8) u2 (.clk(clk), .ld(ld1), .a({siga,8'b0}), .b({sigb,8'b0}), .q(divo), .r(), .done(done1), .lzcnt(lzcnt)); |
dfdiv #(N+2) u2 (.clk(clk), .ld(ld1), .a({siga,8'b0}), .b({sigb,8'b0}), .q(divo), .r(), .done(done1), .lzcnt(lzcnt)); |
wire [7:0] lzcnt_bin = lzcnt[3:0] + (lzcnt[7:4] * 10); |
wire [231:0] divo1 = divo[231:0] << ({lzcnt_bin,2'b0}+(FPWID+44)); |
wire [(N+2)*4*2-1:0] divo1 = divo[(N+2)*4*2-1:0] << ({lzcnt_bin,2'b0}+(N*4));//WAS FPWID=128?+44 |
delay #(.WID(1), .DEP(3)) u3 (.clk(clk), .ce(ce), .i(done1), .o(done3a)); |
assign done3 = done1&done3a; |
|
208,15 → 209,15
endcase |
|
casez({aNan,bNan,qNaNOut,bInf,bz,over,aInf&bInf,az&bz}) |
8'b1???????: begin mo <= {4'h1,a[107:0],{111{1'b0}}}; st[3] <= 1'b1; end |
8'b01??????: begin mo <= {4'h1,b[107:0],{111{1'b0}}}; st[3] <= 1'b1; end |
8'b001?????: begin mo <= {4'h1,qNaN[107:0]|{aInf,1'b0}|{az,bz},{1111{1'b0}}}; st[3] <= 1'b1; end |
8'b0001????: begin mo <= 224'd0; st[3] <= 1'b0; end // div by inf |
8'b00001???: begin mo <= 224'd0; st[3] <= 1'b0; end // div by zero |
8'b000001??: begin mo <= 224'd0; st[3] <= 1'b0; end // Inf exponent |
8'b0000001?: begin mo <= {4'h1,qNaN|`QINFDIV,{111{1'b0}}}; st[3] <= 1'b1; end // infinity / infinity |
8'b00000001: begin mo <= {4'h1,qNaN|`QZEROZERO,{111{1'b0}}}; st[3] <= 1'b1; end // zero / zero |
default: begin mo <= divo1[231:8]; st[3] <= 1'b0; end // plain div |
8'b1???????: begin mo <= {4'h1,a[N*4-1:0],{(N+1)*4-1{1'b0}}}; st[3] <= 1'b1; end |
8'b01??????: begin mo <= {4'h1,b[N*4-1:0],{(N+1)*4-1{1'b0}}}; st[3] <= 1'b1; end |
8'b001?????: begin mo <= {4'h1,qNaN[N*4-1:0]|{aInf,1'b0}|{az,bz},{(N+1)*4-1{1'b0}}}; st[3] <= 1'b1; end |
8'b0001????: begin mo <= {(N+1)*4*2-1{1'd0}}; st[3] <= 1'b0; end // div by inf |
8'b00001???: begin mo <= {(N+1)*4*2-1{1'd0}}; st[3] <= 1'b0; end // div by zero |
8'b000001??: begin mo <= {(N+1)*4*2-1{1'd0}}; st[3] <= 1'b0; end // Inf exponent |
8'b0000001?: begin mo <= {4'h1,qNaN|`QINFDIV,{(N+1)*4-1{1'b0}}}; st[3] <= 1'b1; end // infinity / infinity |
8'b00000001: begin mo <= {4'h1,qNaN|`QZEROZERO,{(N+1)*4-1{1'b0}}}; st[3] <= 1'b1; end // zero / zero |
default: begin mo <= divo1[(N+2)*4*2-1:8]; st[3] <= 1'b0; end // plain div |
endcase |
|
st[0] <= sxo; |
232,13 → 233,14
endmodule |
|
module DFPDividenr(rst, clk, ce, ld, op, a, b, o, rm, done, sign_exe, inf, overflow, underflow); |
parameter N=33; |
input rst; |
input clk; |
input ce; |
input ld; |
input op; |
input [127:0] a, b; |
output [127:0] o; |
input [N*4+16+4-1:0] a, b; |
output [N*4+16+4-1:0] o; |
input [2:0] rm; |
output sign_exe; |
output done; |
246,14 → 248,14
output overflow; |
output underflow; |
|
wire [243:0] o1; |
wire [(N+1)*4*2+16+4-1:0] o1; |
wire sign_exe1, inf1, overflow1, underflow1; |
wire [131:0] fpn0; |
wire [N*4+16+4-1+4:0] fpn0; |
wire done1, done1a; |
|
DFPDivide #(FPWID) u1 (rst, clk, ce, ld, op, a, b, o1, done1, sign_exe1, overflow1, underflow1); |
DFPNormalize #(FPWID) u2(.clk(clk), .ce(ce), .under_i(underflow1), .i(o1), .o(fpn0) ); |
DFPRound #(FPWID) u3(.clk(clk), .ce(ce), .rm(rm), .i(fpn0), .o(o) ); |
DFPDivide #(.N(N)) u1 (rst, clk, ce, ld, op, a, b, o1, done1, sign_exe1, overflow1, underflow1); |
DFPNormalize #(.N(N)) u2(.clk(clk), .ce(ce), .under_i(underflow1), .i(o1), .o(fpn0) ); |
DFPRound #(.N(N)) u3(.clk(clk), .ce(ce), .rm(rm), .i(fpn0), .o(o) ); |
delay2 #(1) u4(.clk(clk), .ce(ce), .i(sign_exe1), .o(sign_exe)); |
delay2 #(1) u5(.clk(clk), .ce(ce), .i(inf1), .o(inf)); |
delay2 #(1) u6(.clk(clk), .ce(ce), .i(overflow1), .o(overflow)); |
/rtl/verilog2/DFPMultiply.sv
61,11 → 61,12
//`define DFPMUL_PARALLEL 1'b1 |
|
module DFPMultiply(clk, ce, ld, a, b, o, sign_exe, inf, overflow, underflow, done); |
parameter N=33; |
input clk; |
input ce; |
input ld; |
input [127:0] a, b; |
output [243:0] o; |
input [N*4+16+4-1:0] a, b; |
output [(N+1)*4*2+16+4-1:0] o; |
output sign_exe; |
output inf; |
output overflow; |
80,7 → 81,7
FPWID == 16 ? 2 : 2); |
|
reg [15:0] xo1; // extra bit for sign |
reg [215:0] mo1; |
reg [N*4*2-1:0] mo1; |
|
// constants |
wire [15:0] infXp = 16'h9999; // infinite / NaN - all ones |
87,10 → 88,10
// The following is the value for an exponent of zero, with the offset |
// eg. 8'h7f for eight bit exponent, 11'h7ff for eleven bit exponent, etc. |
// The following is a template for a quiet nan. (MSB=1) |
wire [107:0] qNaN = {4'h1,{104{1'b0}}}; |
wire [N*4-1:0] qNaN = {4'h1,{104{1'b0}}}; |
|
// variables |
reg [215:0] sig1; |
reg [N*4*2-1:0] sig1; |
wire [15:0] ex2; |
|
// Decompose the operands |
97,7 → 98,7
wire sa, sb; // sign bit |
wire [15:0] xa, xb; // exponent bits |
wire sxa, sxb; |
wire [107:0] siga, sigb; |
wire [N*4-1:0] siga, sigb; |
wire a_dn, b_dn; // a/b is denormalized |
wire aNan, bNan, aNan1, bNan1; |
wire az, bz; |
152,11 → 153,11
else |
sum_ex <= sum_ex1; |
|
wire [255:0] sigoo; |
wire [N*4*2-1:0] sigoo; |
`ifdef DFPMUL_PARALLEL |
BCDMul32 u1f (.a({20'h0,siga}),.b({20'h0,sigb}),.o(sigoo)); |
`else |
dfmul u1g |
dfmul #(.N(N)) u1g |
( |
.clk(clk), |
.ld(ld), |
168,7 → 169,7
`endif |
|
always @(posedge clk) |
if (ce) sig1 <= sigoo[215:0]; |
if (ce) sig1 <= sigoo[N*4*2-1:0]; |
|
// Status |
wire under1, over1; |
181,12 → 182,12
|
// determine when a NaN is output |
wire qNaNOut; |
wire [127:0] a1,b1; |
wire [N*4+16+4-1:0] a1,b1; |
delay #(.WID(1),.DEP(DELAY)) u5 (.clk(clk), .ce(ce), .i((aInf&bz)|(bInf&az)), .o(qNaNOut) ); |
delay #(.WID(1),.DEP(DELAY)) u14 (.clk(clk), .ce(ce), .i(aNan), .o(aNan1) ); |
delay #(.WID(1),.DEP(DELAY)) u15 (.clk(clk), .ce(ce), .i(bNan), .o(bNan1) ); |
delay #(.WID(128),.DEP(DELAY)) u16 (.clk(clk), .ce(ce), .i(a), .o(a1) ); |
delay #(.WID(128),.DEP(DELAY)) u17 (.clk(clk), .ce(ce), .i(b), .o(b1) ); |
delay #(.WID(N*4+16+4),.DEP(DELAY)) u16 (.clk(clk), .ce(ce), .i(a), .o(a1) ); |
delay #(.WID(N*4+16+4),.DEP(DELAY)) u17 (.clk(clk), .ce(ce), .i(b), .o(b1) ); |
|
// ----------------------------------------------------------- |
// Second clock |
215,9 → 216,9
always @(posedge clk) |
if (ce) |
casez({aNan1,bNan1,qNaNOut,aInf1,bInf1,over1|under1}) |
6'b1?????: mo1 = {4'h1,a1[103:0],108'b0}; |
6'b01????: mo1 = {4'h1,b1[103:0],108'b0}; |
6'b001???: mo1 = {4'h1,qNaN|3'd4,108'b0}; // multiply inf * zero |
6'b1?????: mo1 = {4'h1,a1[N*4-4-1:0],{N*4{1'b0}}}; |
6'b01????: mo1 = {4'h1,b1[N*4-4-1:0],{N*4{1'b0}}}; |
6'b001???: mo1 = {4'h1,qNaN|3'd4,{N*4{1'b0}}}; // multiply inf * zero |
6'b0001??: mo1 = 0; // mul inf's |
6'b00001?: mo1 = 0; // mul inf's |
6'b000001: mo1 = 0; // mul overflow |
247,11 → 248,12
// Multiplier with normalization and rounding. |
|
module DFPMultiplynr(clk, ce, ld, a, b, o, rm, sign_exe, inf, overflow, underflow, done); |
parameter N=33; |
input clk; |
input ce; |
input ld; |
input [127:0] a, b; |
output [127:0] o; |
input [N*4+16+4-1:0] a, b; |
output [N*4+16+4-1:0] o; |
input [2:0] rm; |
output sign_exe; |
output inf; |
260,9 → 262,9
output done; |
|
wire done1, done1a; |
wire [243:0] o1; |
wire [(N+1)*4*2+16+4-1:0] o1; |
wire sign_exe1, inf1, overflow1, underflow1; |
wire [131:0] fpn0; |
wire [N*4+16+4-1+4:0] fpn0; |
|
DFPMultiply u1 (clk, ce, ld, a, b, o1, sign_exe1, inf1, overflow1, underflow1, done1); |
DFPNormalize u2(.clk(clk), .ce(ce), .under_i(underflow1), .i(o1), .o(fpn0) ); |
/rtl/verilog2/DFPNormalize.sv
41,10 → 41,11
import fp::*; |
|
module DFPNormalize(clk, ce, i, o, under_i, under_o, inexact_o); |
parameter N=33; |
input clk; |
input ce; |
input [243:0] i; // expanded format input |
output [131:0] o; // normalized output + guard, sticky and round bits, + 1 whole digit |
input [(N+1)*4*2+16+4-1:0] i; // expanded format input |
output [N*4+16+4-1+4:0] o; // normalized output + guard, sticky and round bits, + 1 whole digit |
input under_i; |
output under_o; |
output inexact_o; |
60,15 → 61,15
reg inf0; |
|
always @* |
xo0 <= i[239:224]; |
xo0 <= i[(N+1)*4*2+15:(N+1)*4*2]; |
always @* |
so0 <= i[242]; // sign doesn't change |
so0 <= i[(N+1)*4*2+16+4-2]; // sign doesn't change |
always @* |
sx0 <= i[240]; |
sx0 <= i[(N+1)*4*2+16+4-4]; |
always @* |
nan0 <= i[243]; |
nan0 <= i[(N+1)*4*2+16+4-1]; |
always @* |
inf0 <= i[241] || xo0==16'h9999 && i[220]; |
inf0 <= i[(N+1)*4*2+16+4-3] || xo0==16'h9999 && i[(N+1)*4*2-4]; |
|
// ---------------------------------------------------------------------------- |
// Clock #1 |
75,8 → 76,8
// - Capture exponent information |
// ---------------------------------------------------------------------------- |
reg xInf1a, xInf1b, xInf1c; |
wire [243:0] i1; |
delay #(.WID(244),.DEP(1)) u11 (.clk(clk), .ce(ce), .i(i), .o(i1)); |
wire [(N+1)*4*2+16+4-1:0] i1; |
delay #(.WID((N+2)*4*2+16+4),.DEP(1)) u11 (.clk(clk), .ce(ce), .i(i), .o(i1)); |
|
always @(posedge clk) |
if (ce) xInf1a <= xo0==16'h9999 & !under_i; |
101,7 → 102,7
delay #(.WID(1),.DEP(2)) u24 (.clk(clk), .ce(ce), .i(under_i), .o(under2)); |
|
always @(posedge clk) |
if (ce) incExpByOne2 <= !xInf1a & i1[220]; |
if (ce) incExpByOne2 <= !xInf1a & i1[(N+1)*4*2-4]; |
|
// ---------------------------------------------------------------------------- |
// Clock #3 |
110,11 → 111,11
// ---------------------------------------------------------------------------- |
|
wire incExpByOne3; |
wire [243:0] i3; |
wire [(N+1)*4*2+16+4-1:0] i3; |
reg [15:0] xo3; |
reg zeroMan3; |
delay #(.WID(1),.DEP(1)) u32 (.clk(clk), .ce(ce), .i(incExpByOne2), .o(incExpByOne3)); |
delay #(.WID(244),.DEP(3)) u33 (.clk(clk), .ce(ce), .i(i[243:0]), .o(i3)); |
delay #(.WID((N+1)*4*2+16+4),.DEP(3)) u33 (.clk(clk), .ce(ce), .i(i[(N+3)*4*2+16+4-1:0]), .o(i3)); |
|
wire [15:0] xo2a; |
BCDAddN #(.N(4)) ubcdan1 |
140,7 → 141,7
// - create sticky bit |
// ---------------------------------------------------------------------------- |
|
reg [115:0] mo4; |
reg [(N+2)*4-1:0] mo4; |
reg inexact4; |
|
always @(posedge clk) |
147,8 → 148,8
if(ce) |
casez({zeroMan3,incExpByOne3}) |
2'b1?: mo4 <= 1'd0; |
2'b01: mo4 <= {i3[223:112],3'b0,|i3[111:0]}; |
default: mo4 <= {i3[219:108],3'b0,|i3[107:0]}; |
2'b01: mo4 <= {i3[(N+1)*4*2-1:(N+1)*4],3'b0,|i3[(N+1)*4-1:0]}; |
default: mo4 <= {i3[(N+1)*4*2-1-4:N*4],3'b0,|i3[N*4-1:0]}; |
endcase |
|
always @(posedge clk) |
155,8 → 156,8
if(ce) |
casez({zeroMan3,incExpByOne3}) |
2'b1?: inexact4 <= 1'd0; |
2'b01: inexact4 <= |i3[111:0]; |
default: inexact4 <= |i3[107:0]; |
2'b01: inexact4 <= |i3[(N+1)*4-1:0]; |
default: inexact4 <= |i3[N*4-1:0]; |
endcase |
|
// ---------------------------------------------------------------------------- |
211,7 → 212,7
begin |
got_one = 1'b0; |
lzc = 8'h00; |
for (n = 115; n >= 0; n = n - 4) begin |
for (n = (N+2)*4-1; n >= 0; n = n - 4) begin |
if (!got_one) begin |
if (mo4[n]|mo4[n-1]|mo4[n-2]|mo4[n-3]) |
got_one = 1'b1; |
225,7 → 226,7
`else |
always @(posedge clk) |
if (ce) |
casez(mo4[111:103]) |
casez(mo4[(N+1)*4-1:(N-1)*4-1]) |
8'h00000000: leadingZeros5 <= 8'd2; |
8'h0000????: leadingZeros5 <= 8'd1; |
default: leadingZeros5 <= 8'd0; |
249,11 → 250,11
wire rightOrLeft6; // 0=left,1=right |
wire xInf6; |
wire [15:0] xo6; |
wire [115:0] mo6; |
wire [(N+2)*4-1:0] mo6; |
wire zeroMan6; |
vtdl #(1) u61 (.clk(clk), .ce(ce), .a(4'd5), .d(under_i), .q(rightOrLeft6) ); |
delay #(.WID(16),.DEP(1)) u62 (.clk(clk), .ce(ce), .i(xo5), .o(xo6)); |
delay #(.WID(116),.DEP(2)) u63 (.clk(clk), .ce(ce), .i(mo4), .o(mo6) ); |
delay #(.WID((N+2)*4),.DEP(2)) u63 (.clk(clk), .ce(ce), .i(mo4), .o(mo6) ); |
delay #(.WID(1),.DEP(1)) u64 (.clk(clk), .ce(ce), .i(xInf5), .o(xInf6) ); |
delay #(.WID(1),.DEP(3)) u65 (.clk(clk), .ce(ce), .i(zeroMan3), .o(zeroMan6)); |
delay #(.WID(1),.DEP(5)) u66 (.clk(clk), .ce(ce), .i(sx0), .o(sx5) ); |
264,7 → 265,7
if (ce) lshiftAmt6 <= {leadingZeros5 > xo5d ? xo5d : leadingZeros5,2'b0}; |
|
always @(posedge clk) |
if (ce) rshiftAmt6 <= xInf5 ? 1'd0 : sx5 ? 1'd0 : xo5d > 14'd24 ? 8'd96 : {xo5d[5:0],2'b00}; // xo2 is negative ! |
if (ce) rshiftAmt6 <= xInf5 ? 1'd0 : sx5 ? 1'd0 : xo5d > N ? N*4 : {xo5d[5:0],2'b00}; // xo2 is negative ! |
|
// ---------------------------------------------------------------------------- |
// Clock edge #7 |
275,7 → 276,7
|
reg [15:0] xo7; |
wire rightOrLeft7; |
reg [115:0] mo7l, mo7r; |
reg [(N+2)*4-1:0] mo7l, mo7r; |
reg St6,St7; |
delay #(.WID(1),.DEP(1)) u71 (.clk(clk), .ce(ce), .i(rightOrLeft6), .o(rightOrLeft7)); |
|
308,7 → 309,7
always @* |
begin |
St6 = 1'b0; |
for (n = 0; n < 116; n = n + 1) |
for (n = 0; n < (N+2)*4; n = n + 1) |
if (n <= rshiftAmt6 + 1) St6 = St6|mo6[n]; |
end |
always @(posedge clk) |
321,7 → 322,7
|
wire so,sxo,nano,info; |
wire [15:0] xo; |
reg [115:0] mo; |
reg [(N+2)*4-1:0] mo; |
vtdl #(1) u81 (.clk(clk), .ce(ce), .a(4'd7), .d(so0), .q(so) ); |
delay #(.WID(16),.DEP(1)) u82 (.clk(clk), .ce(ce), .i(xo7), .o(xo)); |
vtdl #(.WID(1)) u83 (.clk(clk), .ce(ce), .a(4'd3), .d(inexact4), .q(inexact_o)); |
333,7 → 334,7
always @(posedge clk) |
if (ce) mo <= rightOrLeft7 ? mo7r|{St7,4'b0} : mo7l; |
|
assign o = {nano,so,info,sxo,xo,mo[115:4]}; |
assign o = {nano,so,info,sxo,xo,mo[(N+2)*4-1:4]}; |
|
endmodule |
|
/rtl/verilog2/DFPRound.sv
48,11 → 48,12
`endif |
|
module DFPRound(clk, ce, rm, i, o); |
parameter N=33; |
input clk; |
input ce; |
input [2:0] rm; // rounding mode |
input [131:0] i; // intermediate format input |
output [127:0] o; // rounded output |
input [N*4+16+4+4-1:0] i; // intermediate format input |
output [N*4+16+4-1:0] o; // rounded output |
|
parameter ROUND_CEILING = 3'd0; |
parameter ROUND_FLOOR = 3'd1; |
64,11 → 65,11
// variables |
wire [3:0] so; |
wire [15:0] xo; |
reg [107:0] mo; |
reg [N*4-1:0] mo; |
reg [15:0] xo1; |
reg [107:0] mo1; |
wire xInf = i[127:112]==16'h9999; |
wire so0 = i[130]; |
reg [N*4-1:0] mo1; |
wire xInf = i[N*4+16+4-1:(N+1)*4]==16'h9999; |
wire so0 = i[N*4+16+4-2]; |
assign o = {so,xo,mo}; |
|
wire [3:0] l = i[7:4]; |
82,9 → 83,9
//------------------------------------------------------------ |
|
always @`PIPE_ADV |
if (ce) xo1 <= i[127:112]; |
if (ce) xo1 <= i[N*4+16+4-1:(N+1)*4]; |
always @`PIPE_ADV |
if (ce) mo1 <= i[111:4]; |
if (ce) mo1 <= i[(N+1)*4-1:4]; |
|
// Compute the round bit |
// Infinities and NaNs are not rounded! |
109,20 → 110,20
// note: exponent increments if there is a carry (can only increment to infinity) |
//------------------------------------------------------------ |
|
wire [123:0] rounded1; |
wire [N*4+16+4-1-4:0] rounded1; |
wire co1; |
|
BCDAddN #(.N(31)) ubcdan1 |
BCDAddN #(.N(N+4)) ubcdan1 |
( |
.ci(1'b0), |
.a({xo1,mo1}), |
.b({123'd0,rnd}), |
.b({{N*4+16+4-1-4{1'd0}},rnd}), |
.o(rounded1), |
.co(co1) |
); |
|
|
reg [123:0] rounded2; |
reg [N*4+16+4-1-4:0] rounded2; |
reg carry2; |
reg rnd2; |
reg dn2; |
135,7 → 136,7
if (ce) rnd2 <= rnd; |
always @`PIPE_ADV |
if (ce) dn2 <= !(|xo1); |
assign xo2 = rounded2[123:108]; |
assign xo2 = rounded2[N*4+16+4-1-4:N*4]; |
|
//------------------------------------------------------------ |
// Clock #3 |
142,10 → 143,10
// - shift mantissa if required. |
//------------------------------------------------------------ |
`ifdef MIN_LATENCY |
assign so = i[131:128]; |
assign so = i[N*4+16+4+3:N*4+16+4]; |
assign xo = xo2; |
`else |
delay3 #(4) u21 (.clk(clk), .ce(ce), .i(i[131:128]), .o(so)); |
delay3 #(4) u21 (.clk(clk), .ce(ce), .i(i[N*4+16+4+3:N*4+16+4]), .o(so)); |
delay1 #(16) u22 (.clk(clk), .ce(ce), .i(xo2), .o(xo)); |
`endif |
|
152,13 → 153,13
always @`PIPE_ADV |
if (ce) |
casez({rnd2,xo2==16'h9999,carry2,dn2}) |
4'b0??0: mo <= mo1[107:0]; // not rounding, not denormalized |
4'b0??1: mo <= mo1[107:0]; // not rounding, denormalized |
4'b1000: mo <= rounded2[107: 0]; // exponent didn't change, number was normalized |
4'b1001: mo <= rounded2[107: 0]; // exponent didn't change, but number was denormalized |
4'b1010: mo <= {4'h1,rounded2[107: 4]}; // exponent incremented (new MSD generated), number was normalized |
4'b1011: mo <= rounded2[107:0]; // exponent incremented (new MSB generated), number was denormalized, number became normalized |
4'b11??: mo <= 108'd0; // number became infinite, no need to check carry etc., rnd would be zero if input was NaN or infinite |
4'b0??0: mo <= mo1[N*4-1:0]; // not rounding, not denormalized |
4'b0??1: mo <= mo1[N*4-1:0]; // not rounding, denormalized |
4'b1000: mo <= rounded2[N*4-1: 0]; // exponent didn't change, number was normalized |
4'b1001: mo <= rounded2[N*4-1: 0]; // exponent didn't change, but number was denormalized |
4'b1010: mo <= {4'h1,rounded2[N*4-1: 4]}; // exponent incremented (new MSD generated), number was normalized |
4'b1011: mo <= rounded2[N*4-1:0]; // exponent incremented (new MSB generated), number was denormalized, number became normalized |
4'b11??: mo <= {N*4{1'd0}}; // number became infinite, no need to check carry etc., rnd would be zero if input was NaN or infinite |
endcase |
|
endmodule |
/rtl/verilog2/DPD1000Decode.sv
0,0 → 1,43
|
module DPD1000Decode(clk, i, o); |
input clk; |
input [9:0] i; |
output [11:0] o; |
|
reg [9:0] i1; |
genvar g; |
|
(* ram_style="block" *) |
reg [11:0] tbl [0:1023]; |
|
generate begin : gDPDTbl |
for (g = 0; g < 1024; g = g + 1) begin |
initial begin |
tbl[g] = (g % 10) | (((g / 10) & 15) << 4) | (((g/100) & 15) << 8); |
end |
end |
end |
endgenerate |
|
always @(posedge clk) |
i1 <= i; |
|
assign o = tbl[i1]; |
|
endmodule |
|
module DPDDecodeN(clk, i, o); |
parameter N=11; |
input clk; |
input [N*10-1:0] i; |
output [N*12-1:0] o; |
|
genvar g; |
|
generate begin : gDPD |
for (g = 0; g < N; g = g + 1) |
DPD1000Decode(clk, i[g*10+9:g*10], o[g*12+11:g*12]); |
end |
endgenerate |
|
endmodule |
/rtl/verilog2/DPD1000Encode.sv
0,0 → 1,22
|
module DPD1000Encode(i, o); |
input [11:0] i; |
output [9:0] o; |
|
assign o = i[3:0] + i[7:4] * 10 + i[11:8] * 100; |
|
endmodule |
|
module DPD1000EncodeN(i, o); |
parameter N=11; |
input [N*12-1:0] i; |
output [N*10-1:0] o; |
|
genvar g; |
generate begin : gDPDEncodeN |
for (g = 0; g < N; g = g + 1) |
DPD1000Encode u1 (i[g*12+11:g*12],o[g*10+9:g*10]); |
end |
endgenerate |
|
endmodule |
/rtl/verilog2/dfdiv.v
37,7 → 37,8
// ============================================================================ |
|
module dfdiv(clk, ld, a, b, q, r, done, lzcnt); |
parameter FPWID = 108; |
parameter N=33; |
localparam FPWID = N*4; |
parameter RADIX = 10; |
localparam FPWID1 = FPWID;//((FPWID+2)/3)*3; // make FPWIDth a multiple of three |
localparam DMSB = FPWID1-1; |
57,7 → 58,7
parameter DONE = 2'd2; |
|
reg [3:0] cnt; // iteration count |
reg [5:0] dcnt; // digit count |
reg [7:0] dcnt; // digit count |
reg [9:0] clkcnt; |
reg [FPWID*2-1:0] qi = 0; |
reg [FPWID+4-1:0] ri = 0; |
168,7 → 169,7
|
always #5 clk = ~clk; |
|
dfdiv #(108) u1 ( |
dfdiv #(.N(27)) u1 ( |
.clk(clk), |
.ld(ld), |
.a(a), |
/rtl/verilog2/dfmul.sv
37,7 → 37,8
// ============================================================================ |
|
module dfmul(clk, ld, a, b, p, done); |
parameter FPWID = 108; |
parameter N=33; |
localparam FPWID = N*4; |
parameter RADIX = 10; |
localparam FPWID1 = FPWID;//((FPWID+2)/3)*3; // make FPWIDth a multiple of three |
localparam DMSB = FPWID1-1; |
55,7 → 56,7
parameter DONE = 2'd2; |
|
reg [3:0] cnt; // iteration count |
reg [5:0] dcnt; // digit count |
reg [7:0] dcnt; // digit count |
reg [9:0] clkcnt; |
reg [FPWID*2-1:0] pi = 0; |
reg [FPWID-1:0] ai = 0; |
131,7 → 132,7
|
always #5 clk = ~clk; |
|
dfmul #(108) u1 ( |
dfmul #(27) u1 ( |
.clk(clk), |
.ld(ld), |
.a(a), |
/test_bench/DFPAddsub_tb.v
41,14 → 41,14
reg rst; |
reg clk; |
reg [15:0] adr; |
reg [127:0] a,b; |
wire [127:0] o; |
reg [127:0] ad,bd; |
reg [127:0] od; |
reg [151:0] a,b; |
wire [151:0] o; |
reg [151:0] ad,bd; |
reg [151:0] od; |
reg [3:0] rm; |
|
integer n; |
reg [127:0] a1, b1; |
reg [151:0] a1, b1; |
wire [63:0] doubleA = {a[31], a[30], {3{~a[30]}}, a[29:23], a[22:0], {29{1'b0}}}; |
wire [63:0] doubleB = {b[31], b[30], {3{~b[30]}}, b[29:23], b[22:0], {29{1'b0}}}; |
|
71,7 → 71,7
|
genvar g; |
generate begin : gRand |
for (g = 0; g < 128; g = g + 4) begin |
for (g = 0; g < 152; g = g + 4) begin |
always @(posedge clk) begin |
a1[g+3:g] <= $urandom() % 10; |
b1[g+3:g] <= $urandom() % 10; |
96,25 → 96,25
if (count > 32) |
count <= 1'd1; |
if (count==2) begin |
a[127:0] <= a1; |
b[127:0] <= b1; |
a[127:124] <= 4'h5; |
b[127:124] <= 4'h5; |
a <= a1; |
b <= b1; |
a[151:148] <= 4'h5; |
b[151:148] <= 4'h5; |
rm <= adr[14:12]; |
//ad <= memd[adr][63: 0]; |
//bd <= memd[adr][127:64]; |
end |
if (adr==1 && count==2) begin |
a <= 127'h50000700000000000000000000000000; |
b <= 127'h50000200000000000000000000000000; |
a <= 152'h50000700000000000000000000000000000000; |
b <= 152'h50000200000000000000000000000000000000; |
end |
if (adr==2 && count==2) begin |
a <= 127'h50000900000000000000000000000000; |
b <= 127'h50000200000000000000000000000000; |
a <= 152'h50000900000000000000000000000000000000; |
b <= 152'h50000200000000000000000000000000000000; |
end |
if (adr==3 && count==2) begin |
a <= 127'h50000000000000000000000000000000; |
b <= 127'h50000000000000000000000000000000; |
a <= 152'h50000000000000000000000000000000000000; |
b <= 152'h50000000000000000000000000000000000000; |
end |
if (count==31) begin |
if (adr[11]) begin |
/test_bench/DFPDivide_tb.v
49,18 → 49,19
// ============================================================================ |
|
module DFPDivide_tb(); |
parameter N=33; |
reg rst; |
reg clk; |
reg [15:0] adr; |
reg [127:0] a,b; |
wire [127:0] o; |
reg [127:0] ad,bd; |
wire [127:0] od; |
reg [N*4+16+4-1:0] a,b; |
wire [N*4+16+4-1:0] o; |
reg [N*4+16+4-1:0] ad,bd; |
wire [N*4+16+4-1:0] od; |
reg [3:0] rm; |
wire done; |
|
integer n; |
reg [127:0] a1, b1; |
reg [N*4+16+4-1:0] a1, b1; |
reg [39:0] sum_cc; |
|
wire [63:0] doubleA = {a[31], a[30], {3{~a[30]}}, a[29:23], a[22:0], {29{1'b0}}}; |
85,7 → 86,7
|
genvar g; |
generate begin : gRand |
for (g = 0; g < 128; g = g + 4) begin |
for (g = 0; g < N*4+16+4; g = g + 4) begin |
always @(posedge clk) begin |
a1[g+3:g] <= $urandom() % 10; |
b1[g+3:g] <= $urandom() % 10; |
109,42 → 110,42
sum_cc = 0; |
end |
count <= count + 1; |
if (count > 700) |
if (count > 750) |
count <= 1'd1; |
if (count==2) begin |
a[127:0] <= a1; |
b[127:0] <= b1; |
a[127:124] <= 4'h5; |
b[127:124] <= 4'h5; |
a[N*4+16+4-1:0] <= a1; |
b[N*4+16+4-1:0] <= b1; |
a[N*4+16+4-1:N*4+16+4-4] <= 4'h5; |
b[N*4+16+4-1:N*4+16+4-4] <= 4'h5; |
rm <= adr[15:13]; |
//ad <= memd[adr][63: 0]; |
//bd <= memd[adr][127:64]; |
end |
if (adr==1 && count==2) begin |
a <= 127'h50000700000000000000000000000000; |
b <= 127'h50000200000000000000000000000000; |
a <= 152'h50000700000000000000000000000000000000; |
b <= 152'h50000200000000000000000000000000000000; |
end |
if (adr==1 && count==2) begin |
a <= 127'h50000100000000000000000000000000; |
b <= 127'h50000300000000000000000000000000; |
a <= 152'h50000100000000000000000000000000000000; |
b <= 152'h50000300000000000000000000000000000000; |
end |
if (adr==2 && count==2) begin |
a <= 127'h50000900000000000000000000000000; |
b <= 127'h50000200000000000000000000000000; |
a <= 152'h50000900000000000000000000000000000000; |
b <= 152'h50000200000000000000000000000000000000; |
end |
if (adr==3 && count==2) begin |
a <= 127'h50000000000000000000000000000000; |
b <= 127'h50000000000000000000000000000000; |
a <= 152'h50000000000000000000000000000000000000; |
b <= 152'h50000000000000000000000000000000000000; |
end |
if (adr==4 && count==2) begin |
a <= 127'h50001100000000000000000000000000; |
b <= 127'h50001100000000000000000000000000; |
a <= 152'h50001100000000000000000000000000000000; |
b <= 152'h50001100000000000000000000000000000000; |
end |
if (adr==4 && count==2) begin |
a <= 127'h50000100000000000000000000000000; |
b <= 127'h50000300000000000000000000000000; |
a <= 152'h50000100000000000000000000000000000000; |
b <= 152'h50000300000000000000000000000000000000; |
end |
if (count > 700) begin |
if (count > 750) begin |
sum_cc = sum_cc + u6.u1.u2.clkcnt; |
$fwrite(outfile, "%h\t%h\t%h\t%h\t%d\t%f\n", rm, a, b, o, u6.u1.u2.clkcnt, $itor(sum_cc) / $itor(adr)); |
adr <= adr + 1; |
152,7 → 153,7
end |
|
//fpMulnr #(64) u1 (clk, 1'b1, a, b, o, rm);//, sign_exe, inf, overflow, underflow); |
DFPDividenr u6 ( |
DFPDividenr #(.N(N)) u6 ( |
.rst(rst), |
.clk(clk), |
.ce(1'b1), |
/test_bench/DFPMultiply_tb.v
35,13 → 35,14
// ============================================================================ |
|
module DFPMultiply_tb(); |
parameter N=33; |
reg rst; |
reg clk; |
reg [15:0] adr; |
reg [127:0] a,b; |
wire [127:0] o; |
reg [127:0] ad,bd; |
wire [127:0] od; |
reg [N*4+16+4-1:0] a,b; |
wire [N*4+16+4-1:0] o; |
reg [N*4+16+4-1:0] ad,bd; |
wire [N*4+16+4-1:0] od; |
reg [3:0] rm; |
|
integer n; |
60,7 → 61,7
a = $urandom(1); |
#20 rst = 1; |
#50 rst = 0; |
#1000000 $fclose(outfile); |
#2000000 $fclose(outfile); |
#10 $finish; |
end |
|
69,7 → 70,7
|
genvar g; |
generate begin : gRand |
for (g = 0; g < 128; g = g + 4) begin |
for (g = 0; g < N*4+16+4; g = g + 4) begin |
always @(posedge clk) begin |
a1[g+3:g] <= $urandom() % 10; |
b1[g+3:g] <= $urandom() % 10; |
92,13 → 93,17
$fwrite(outfile, "rm ------ A ------ ------- B ------ - DUT Product - - SIM Product -\n"); |
end |
count <= count + 1; |
if (count > 600) |
if (count > 750) |
count <= 1'd1; |
if (count==2) begin |
a[127:0] <= a1; |
b[127:0] <= b1; |
a[127:124] <= 4'h5; |
b[127:124] <= 4'h5; |
a[N*4+16+4-1:0] <= a1; |
b[N*4+16+4-1:0] <= b1; |
a[N*4+16+4-1:N*4+16+4-4] <= 4'h5; |
b[N*4+16+4-1:N*4+16+4-4] <= 4'h5; |
a[N*4+16+4-2] <= adr[7]; |
a[N*4+16+4-3] <= adr[6]; |
b[N*4+16+4-1] <= adr[5]; |
b[N*4+16+4-4] <= adr[4]; |
ld <= 1'b1; |
rm <= adr[15:13]; |
//ad <= memd[adr][63: 0]; |
105,26 → 110,26
//bd <= memd[adr][127:64]; |
end |
if (adr==1 && count==2) begin |
a <= 127'h50000700000000000000000000000000; |
b <= 127'h50000200000000000000000000000000; |
a <= 152'h50000700000000000000000000000000000000; |
b <= 152'h50000200000000000000000000000000000000; |
end |
if (adr==1 && count==2) begin |
a <= 127'h40001333333333333333333333333333; |
b <= 127'h50000300000000000000000000000000; |
a <= 152'h40001333333333333333333333333333333333; |
b <= 152'h50000300000000000000000000000000000000; |
end |
if (adr==2 && count==2) begin |
a <= 127'h50000900000000000000000000000000; |
b <= 127'h50000200000000000000000000000000; |
a <= 152'h50000900000000000000000000000000000000; |
b <= 152'h50000200000000000000000000000000000000; |
end |
if (adr==3 && count==2) begin |
a <= 127'h50000000000000000000000000000000; |
b <= 127'h50000000000000000000000000000000; |
a <= 152'h50000000000000000000000000000000000000; |
b <= 152'h50000000000000000000000000000000000000; |
end |
if (adr==4 && count==2) begin |
a <= 127'h50001100000000000000000000000000; |
b <= 127'h50001100000000000000000000000000; |
a <= 152'h50001100000000000000000000000000000000; |
b <= 152'h50001100000000000000000000000000000000; |
end |
if (count==600) begin |
if (count==750) begin |
$fwrite(outfile, "%h\t%h\t%h\t%h\n", rm, a, b, o); |
adr <= adr + 1; |
end |