URL
https://opencores.org/ocsvn/ft816float/ft816float/trunk
Subversion Repositories ft816float
Compare Revisions
- This comparison shows the changes necessary to convert path
/ft816float
- from Rev 49 to Rev 50
- ↔ Reverse comparison
Rev 49 → Rev 50
/trunk/rtl/verilog2/BCDMath.v
0,0 → 1,409
`timescale 1ns / 1ps |
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2012-2020 Robert Finch, Waterloo |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
// |
// BCDMath.sv |
// |
// BSD 3-Clause License |
// Redistribution and use in source and binary forms, with or without |
// modification, are permitted provided that the following conditions are met: |
// |
// 1. Redistributions of source code must retain the above copyright notice, this |
// list of conditions and the following disclaimer. |
// |
// 2. Redistributions in binary form must reproduce the above copyright notice, |
// this list of conditions and the following disclaimer in the documentation |
// and/or other materials provided with the distribution. |
// |
// 3. Neither the name of the copyright holder nor the names of its |
// contributors may be used to endorse or promote products derived from |
// this software without specific prior written permission. |
// |
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
// |
// ============================================================================ |
// |
module BCDAdd(ci,a,b,o,c); |
input ci; // carry input |
input [7:0] a; |
input [7:0] b; |
output [7:0] o; |
output c; |
|
wire c0,c1; |
|
wire [4:0] hsN0 = a[3:0] + b[3:0] + ci; |
wire [4:0] hsN1 = a[7:4] + b[7:4] + c0; |
|
BCDAddAdjust u1 (hsN0,o[3:0],c0); |
BCDAddAdjust u2 (hsN1,o[7:4],c); |
|
endmodule |
|
module BCDAdd4(ci,a,b,o,c,c8); |
input ci; // carry input |
input [15:0] a; |
input [15:0] b; |
output [15:0] o; |
output c; |
output c8; |
|
wire c0,c1,c2; |
assign c8 = c1; |
|
wire [4:0] hsN0 = a[3:0] + b[3:0] + ci; |
wire [4:0] hsN1 = a[7:4] + b[7:4] + c0; |
wire [4:0] hsN2 = a[11:8] + b[11:8] + c1; |
wire [4:0] hsN3 = a[15:12] + b[15:12] + c2; |
|
BCDAddAdjust u1 (hsN0,o[3:0],c0); |
BCDAddAdjust u2 (hsN1,o[7:4],c1); |
BCDAddAdjust u3 (hsN2,o[11:8],c2); |
BCDAddAdjust u4 (hsN3,o[15:12],c); |
|
endmodule |
|
module BCDAddN(ci,a,b,o,co); |
parameter N=24; |
input ci; // carry input |
input [N*4-1:0] a; |
input [N*4-1:0] b; |
output [N*4-1:0] o; |
output co; |
|
genvar g; |
generate begin : gBCDAddN |
reg [4:0] hsN [0:N-1]; |
wire [N:0] c; |
|
assign c[0] = ci; |
assign co = c[N]; |
|
for (g = 0; g < N; g = g + 1) |
always @* |
hsN[g] = a[g*4+3:g*4] + b[g*4+3:g*4] + c[g]; |
|
for (g = 0; g < N; g = g + 1) |
BCDAddAdjust u1 (hsN[g],o[g*4+3:g*4],c[g+1]); |
end |
endgenerate |
|
endmodule |
|
module BCDSub(ci,a,b,o,c); |
input ci; // carry input |
input [7:0] a; |
input [7:0] b; |
output [7:0] o; |
output c; |
|
wire c0,c1; |
|
wire [4:0] hdN0 = a[3:0] - b[3:0] - ci; |
wire [4:0] hdN1 = a[7:4] - b[7:4] - c0; |
|
BCDSubAdjust u1 (hdN0,o[3:0],c0); |
BCDSubAdjust u2 (hdN1,o[7:4],c); |
|
endmodule |
|
module BCDSub4(ci,a,b,o,c,c8); |
input ci; // carry input |
input [15:0] a; |
input [15:0] b; |
output [15:0] o; |
output c; |
output c8; |
|
wire c0,c1,c2; |
assign c8 = c1; |
|
wire [4:0] hdN0 = a[3:0] - b[3:0] - ci; |
wire [4:0] hdN1 = a[7:4] - b[7:4] - c0; |
wire [4:0] hdN2 = a[11:8] - b[11:8] - c1; |
wire [4:0] hdN3 = a[15:12] - b[15:12] - c2; |
|
BCDSubAdjust u1 (hdN0,o[3:0],c0); |
BCDSubAdjust u2 (hdN1,o[7:4],c1); |
BCDSubAdjust u3 (hdN2,o[11:8],c2); |
BCDSubAdjust u4 (hdN3,o[15:12],c); |
|
endmodule |
|
module BCDSubN(ci,a,b,o,co); |
parameter N=24; |
input ci; // carry input |
input [N*4-1:0] a; |
input [N*4-1:0] b; |
output [N*4-1:0] o; |
output co; |
|
genvar g; |
generate begin : gBCDSubN |
reg [4:0] hdN [0:N-1]; |
wire [N:0] c; |
|
assign c[0] = ci; |
assign co = c[N]; |
|
for (g = 0; g < N; g = g + 1) |
always @* |
hdN[g] = a[g*4+3:g*4] - b[g*4+3:g*4] - c[g]; |
|
for (g = 0; g < N; g = g + 1) |
BCDSubAdjust u1 (hdN[g],o[g*4+3:g*4],c[g+1]); |
end |
endgenerate |
|
endmodule |
|
module BCDAddAdjust(i,o,c); |
input [4:0] i; |
output [3:0] o; |
reg [3:0] o; |
output c; |
reg c; |
always @(i) |
case(i) |
5'h0: begin o = 4'h0; c = 1'b0; end |
5'h1: begin o = 4'h1; c = 1'b0; end |
5'h2: begin o = 4'h2; c = 1'b0; end |
5'h3: begin o = 4'h3; c = 1'b0; end |
5'h4: begin o = 4'h4; c = 1'b0; end |
5'h5: begin o = 4'h5; c = 1'b0; end |
5'h6: begin o = 4'h6; c = 1'b0; end |
5'h7: begin o = 4'h7; c = 1'b0; end |
5'h8: begin o = 4'h8; c = 1'b0; end |
5'h9: begin o = 4'h9; c = 1'b0; end |
5'hA: begin o = 4'h0; c = 1'b1; end |
5'hB: begin o = 4'h1; c = 1'b1; end |
5'hC: begin o = 4'h2; c = 1'b1; end |
5'hD: begin o = 4'h3; c = 1'b1; end |
5'hE: begin o = 4'h4; c = 1'b1; end |
5'hF: begin o = 4'h5; c = 1'b1; end |
5'h10: begin o = 4'h6; c = 1'b1; end |
5'h11: begin o = 4'h7; c = 1'b1; end |
5'h12: begin o = 4'h8; c = 1'b1; end |
5'h13: begin o = 4'h9; c = 1'b1; end |
default: begin o = 4'h9; c = 1'b1; end |
endcase |
endmodule |
|
module BCDSubAdjust(i,o,c); |
input [4:0] i; |
output [3:0] o; |
reg [3:0] o; |
output c; |
reg c; |
always @(i) |
case(i) |
5'h0: begin o = 4'h0; c = 1'b0; end |
5'h1: begin o = 4'h1; c = 1'b0; end |
5'h2: begin o = 4'h2; c = 1'b0; end |
5'h3: begin o = 4'h3; c = 1'b0; end |
5'h4: begin o = 4'h4; c = 1'b0; end |
5'h5: begin o = 4'h5; c = 1'b0; end |
5'h6: begin o = 4'h6; c = 1'b0; end |
5'h7: begin o = 4'h7; c = 1'b0; end |
5'h8: begin o = 4'h8; c = 1'b0; end |
5'h9: begin o = 4'h9; c = 1'b0; end |
5'h16: begin o = 4'h0; c = 1'b1; end |
5'h17: begin o = 4'h1; c = 1'b1; end |
5'h18: begin o = 4'h2; c = 1'b1; end |
5'h19: begin o = 4'h3; c = 1'b1; end |
5'h1A: begin o = 4'h4; c = 1'b1; end |
5'h1B: begin o = 4'h5; c = 1'b1; end |
5'h1C: begin o = 4'h6; c = 1'b1; end |
5'h1D: begin o = 4'h7; c = 1'b1; end |
5'h1E: begin o = 4'h8; c = 1'b1; end |
5'h1F: begin o = 4'h9; c = 1'b1; end |
default: begin o = 4'h9; c = 1'b1; end |
endcase |
endmodule |
|
// Multiply two BCD digits |
// Method used is table lookup |
module BCDMul1(a,b,o); |
input [3:0] a; |
input [3:0] b; |
output [7:0] o; |
reg [7:0] o; |
|
always @(a or b) |
casex({a,b}) |
8'h00: o = 8'h00; |
8'h01: o = 8'h00; |
8'h02: o = 8'h00; |
8'h03: o = 8'h00; |
8'h04: o = 8'h00; |
8'h05: o = 8'h00; |
8'h06: o = 8'h00; |
8'h07: o = 8'h00; |
8'h08: o = 8'h00; |
8'h09: o = 8'h00; |
8'h10: o = 8'h00; |
8'h11: o = 8'h01; |
8'h12: o = 8'h02; |
8'h13: o = 8'h03; |
8'h14: o = 8'h04; |
8'h15: o = 8'h05; |
8'h16: o = 8'h06; |
8'h17: o = 8'h07; |
8'h18: o = 8'h08; |
8'h19: o = 8'h09; |
8'h20: o = 8'h00; |
8'h21: o = 8'h02; |
8'h22: o = 8'h04; |
8'h23: o = 8'h06; |
8'h24: o = 8'h08; |
8'h25: o = 8'h10; |
8'h26: o = 8'h12; |
8'h27: o = 8'h14; |
8'h28: o = 8'h16; |
8'h29: o = 8'h18; |
8'h30: o = 8'h00; |
8'h31: o = 8'h03; |
8'h32: o = 8'h06; |
8'h33: o = 8'h09; |
8'h34: o = 8'h12; |
8'h35: o = 8'h15; |
8'h36: o = 8'h18; |
8'h37: o = 8'h21; |
8'h38: o = 8'h24; |
8'h39: o = 8'h27; |
8'h40: o = 8'h00; |
8'h41: o = 8'h04; |
8'h42: o = 8'h08; |
8'h43: o = 8'h12; |
8'h44: o = 8'h16; |
8'h45: o = 8'h20; |
8'h46: o = 8'h24; |
8'h47: o = 8'h28; |
8'h48: o = 8'h32; |
8'h49: o = 8'h36; |
8'h50: o = 8'h00; |
8'h51: o = 8'h05; |
8'h52: o = 8'h10; |
8'h53: o = 8'h15; |
8'h54: o = 8'h20; |
8'h55: o = 8'h25; |
8'h56: o = 8'h30; |
8'h57: o = 8'h35; |
8'h58: o = 8'h40; |
8'h59: o = 8'h45; |
8'h60: o = 8'h00; |
8'h61: o = 8'h06; |
8'h62: o = 8'h12; |
8'h63: o = 8'h18; |
8'h64: o = 8'h24; |
8'h65: o = 8'h30; |
8'h66: o = 8'h36; |
8'h67: o = 8'h42; |
8'h68: o = 8'h48; |
8'h69: o = 8'h54; |
8'h70: o = 8'h00; |
8'h71: o = 8'h07; |
8'h72: o = 8'h14; |
8'h73: o = 8'h21; |
8'h74: o = 8'h28; |
8'h75: o = 8'h35; |
8'h76: o = 8'h42; |
8'h77: o = 8'h49; |
8'h78: o = 8'h56; |
8'h79: o = 8'h63; |
8'h80: o = 8'h00; |
8'h81: o = 8'h08; |
8'h82: o = 8'h16; |
8'h83: o = 8'h24; |
8'h84: o = 8'h32; |
8'h85: o = 8'h40; |
8'h86: o = 8'h48; |
8'h87: o = 8'h56; |
8'h88: o = 8'h64; |
8'h89: o = 8'h72; |
8'h90: o = 8'h00; |
8'h91: o = 8'h09; |
8'h92: o = 8'h18; |
8'h93: o = 8'h27; |
8'h94: o = 8'h36; |
8'h95: o = 8'h45; |
8'h96: o = 8'h54; |
8'h97: o = 8'h63; |
8'h98: o = 8'h72; |
8'h99: o = 8'h81; |
default: o = 8'h00; |
endcase |
endmodule |
|
|
// Multiply two pairs of BCD digits |
// handles from 0x0 to 99x99 |
module BCDMul2(a,b,o); |
input [7:0] a; |
input [7:0] b; |
output [15:0] o; |
|
wire [7:0] p1,p2,p3,p4; |
wire [15:0] s1; |
|
BCDMul1 u1 (a[3:0],b[3:0],p1); |
BCDMul1 u2 (a[7:4],b[3:0],p2); |
BCDMul1 u3 (a[3:0],b[7:4],p3); |
BCDMul1 u4 (a[7:4],b[7:4],p4); |
|
BCDAdd4 u5 (1'b0,{p4,p1},{4'h0,p2,4'h0},s1); |
BCDAdd4 u6 (1'b0,s1,{4'h0,p3,4'h0},o); |
|
endmodule |
|
module BCDMul_tb(); |
|
wire [15:0] o1,o2,o3,o4; |
|
BCDMul2 u1 (8'h00,8'h00,o1); |
BCDMul2 u2 (8'h99,8'h99,o2); |
BCDMul2 u3 (8'h25,8'h18,o3); |
BCDMul2 u4 (8'h37,8'h21,o4); |
|
endmodule |
|
module BinToBCD(i, o); |
input [7:0] i; |
output [11:0] o; |
|
reg [11:0] tbl [0:255]; |
|
genvar g; |
generate begin : gTbl |
reg [3:0] n0 [0:255]; |
reg [3:0] n1 [0:255]; |
reg [3:0] n2 [0:255]; |
|
for (g = 0; g < 256; g = g + 1) begin |
initial begin |
n0[g] = g % 10; |
n1[g] = g / 10; |
n2[g] = g / 100; |
tbl[g] <= {n2[g],n1[g],n0[g]}; |
end |
end |
|
assign o = tbl[i]; |
|
end |
endgenerate |
|
endmodule |
/trunk/rtl/verilog2/DFPAddsub.sv
0,0 → 1,437
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2020 Robert Finch, Waterloo |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
// |
// DFPAddsub.sv |
// |
// BSD 3-Clause License |
// Redistribution and use in source and binary forms, with or without |
// modification, are permitted provided that the following conditions are met: |
// |
// 1. Redistributions of source code must retain the above copyright notice, this |
// list of conditions and the following disclaimer. |
// |
// 2. Redistributions in binary form must reproduce the above copyright notice, |
// this list of conditions and the following disclaimer in the documentation |
// and/or other materials provided with the distribution. |
// |
// 3. Neither the name of the copyright holder nor the names of its |
// contributors may be used to endorse or promote products derived from |
// this software without specific prior written permission. |
// |
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
// |
// ============================================================================ |
|
module DFPAddsub(clk, ce, rm, op, a, b, o); |
input clk; |
input ce; |
input [2:0] rm; |
input op; |
input [127:0] a; |
input [127:0] b; |
output [219:0] o; |
|
parameter TRUE = 1'b1; |
parameter FALSE = 1'b0; |
|
wire sa, sb; |
wire sxa, sxb; |
wire adn, bdn; |
wire xainf, xbinf; |
wire ainf, binf; |
wire aNan, bNan; |
wire [15:0] xa, xb; |
wire [95:0] siga, sigb; |
|
wire [15:0] xabdif4; |
BCDSub4 ubcds1( |
.ci(1'b0), |
.a(xa_gt_xb4 ? xa4 : xb4), |
.b(xa_gt_xb4 ? xb4 : xa4), |
.o(xabdif4), |
.c(), |
.c8() |
); |
|
wire [99:0] oss10; |
wire oss10c; |
|
BCDAddN #(.N(25)) ubcdan1 |
( |
.ci(1'b0), |
.a(oaa10), |
.b(obb10), |
.o(oss10), |
.co(oss10c) |
); |
|
wire [99:0] odd10; |
wire odd10c; |
|
BCDSubN #(.N(25)) ubcdsn1 |
( |
.ci(1'b0), |
.a(oaa10), |
.b(obb10), |
.o(odd10), |
.co(odd10c) |
); |
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #1 |
// - decode the input operands |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
reg op1; |
|
DFPDecomposeReg u1a (.clk(clk), .ce(ce), .i(a), .sgn(sa), .sx(sxa), .exp(xa), .sig(siga), .xz(adn), .vz(az), .inf(aInf), .nan(aNan) ); |
DFPDecomposeReg u1b (.clk(clk), .ce(ce), .i(b), .sgn(sb), .sx(sxb), .exp(xb), .sig(sigb), .xz(bdn), .vz(bz), .inf(bInf), .nan(bNan) ); |
|
always @(posedge clk) |
if (ce) op1 <= op; |
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #2 |
// |
// Figure out which operation is really needed an add or subtract ? |
// If the signs are the same, use the orignal op, |
// otherwise flip the operation |
// a + b = add,+ |
// a + -b = sub, so of larger |
// -a + b = sub, so of larger |
// -a + -b = add,- |
// a - b = sub, so of larger |
// a - -b = add,+ |
// -a - b = add,- |
// -a - -b = sub, so of larger |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
reg realOp2; |
reg op2; |
reg [15:0] xa2, xb2; |
reg az2, bz2; |
reg xa_gt_xb2; |
reg [95:0] siga2, sigb2; |
reg sigeq, siga_gt_sigb; |
reg xa_gt_xb2; |
reg expeq; |
reg sxo2; |
|
always @(posedge clk) |
if (ce) realOp2 = op1 ^ sa ^ sb; |
always @(posedge clk) |
if (ce) op2 <= op1; |
always @(posedge clk) |
if (ce) xa2 <= xa; |
always @(posedge clk) |
if (ce) xb2 <= xb; |
always @(posedge clk) |
if (ce) siga2 <= siga; |
always @(posedge clk) |
if (ce) sigb2 <= sigb; |
always @(posedge clk) |
if (ce) az2 <= az; |
always @(posedge clk) |
if (ce) bz2 <= bz; |
always @(posedge clk) |
if (ce) |
if (sxa & ~sxb) |
xa_gt_xb2 <= TRUE; |
else if (~sxa & sxb) |
xa_gt_xb2 <= FALSE; |
else |
xa_gt_xb2 <= sxa ? xa > xb : xa < xb; |
always @(posedge clk) |
if (ce) |
sxo2 <= sxa|sxb; |
|
always @(posedge clk) |
if (ce) sigeq <= siga==sigb; |
always @(posedge clk) |
if (ce) siga_gt_sigb <= siga > sigb; |
always @(posedge clk) |
if (ce) expeq <= {sxa,xa}=={sxb,xb}; |
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #3 |
// |
// Find out if the result will be zero. |
// Determine which fraction to denormalize |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// |
reg [15:0] xa3, xb3; |
reg resZero3; |
wire xaInf3, xbInf3; |
reg xa_gt_xb3; |
reg a_gt_b3; |
reg op3; |
wire sa3, sb3; |
wire [2:0] rm3; |
reg [95:0] mfs3; |
|
always @(posedge clk) |
if (ce) resZero3 <= (realOp2 & expeq & sigeq) || // subtract, same magnitude |
(az2 & bz2); // both a,b zero |
always @(posedge clk) |
if (ce) xa3 <= xa2; |
always @(posedge clk) |
if (ce) xb3 <= xb2; |
always @(posedge clk) |
if (ce) xa_gt_xb3 <= xa_gt_xb2; |
always @(posedge clk) |
if (ce) a_gt_b3 <= xa_gt_xb2 | (expeq & siga_gt_sigb); |
always @(posedge clk) |
if (ce) op3 <= op2; |
always @(posedge clk) |
if (ce) mfs3 = xa_gt_xb2 ? sigb2 : siga2; |
|
delay #(.WID(1), .DEP(2)) udly3c (.clk(clk), .ce(ce), .i(sa), .o(sa3)); |
delay #(.WID(1), .DEP(2)) udly3d (.clk(clk), .ce(ce), .i(sb), .o(sb3)); |
delay #(.WID(3), .DEP(3)) udly3e (.clk(clk), .ce(ce), .i(rm), .o(rm3)); |
delay #(.WID(1), .DEP(2)) udly3f (.clk(clk), .ce(ce), .i(aInf), .o(aInf3)); |
delay #(.WID(1), .DEP(2)) udly3g (.clk(clk), .ce(ce), .i(bInf), .o(bInf3)); |
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #4 |
// |
// Compute output exponent |
// |
// The output exponent is the larger of the two exponents, |
// unless a subtract operation is in progress and the two |
// numbers are equal, in which case the exponent should be |
// zero. |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
reg [15:0] xa4, xb4; |
reg [15:0] xo4; |
reg xa_gt_xb4; |
|
always @(posedge clk) |
if (ce) xa4 <= xa3; |
always @(posedge clk) |
if (ce) xb4 <= xb3; |
always @(posedge clk) |
if (ce) xo4 <= resZero3 ? 16'd0 : xa_gt_xb3 ? xa3 : xb3; |
always @(posedge clk) |
if (ce) xa_gt_xb4 <= xa_gt_xb3; |
|
// Compute output sign |
reg so4; |
always @* |
case ({resZero3,sa3,op3,sb3}) // synopsys full_case parallel_case |
4'b0000: so4 <= 0; // + + + = + |
4'b0001: so4 <= !a_gt_b3; // + + - = sign of larger |
4'b0010: so4 <= !a_gt_b3; // + - + = sign of larger |
4'b0011: so4 <= 0; // + - - = + |
4'b0100: so4 <= a_gt_b3; // - + + = sign of larger |
4'b0101: so4 <= 1; // - + - = - |
4'b0110: so4 <= 1; // - - + = - |
4'b0111: so4 <= a_gt_b3; // - - - = sign of larger |
4'b1000: so4 <= 0; // A + B, sign = + |
4'b1001: so4 <= rm3==3'd3; // A + -B, sign = + unless rounding down |
4'b1010: so4 <= rm3==3'd3; // A - B, sign = + unless rounding down |
4'b1011: so4 <= 0; // +A - -B, sign = + |
4'b1100: so4 <= rm3==3'd3; // -A + B, sign = + unless rounding down |
4'b1101: so4 <= 1; // -A + -B, sign = - |
4'b1110: so4 <= 1; // -A - +B, sign = - |
4'b1111: so4 <= rm3==3'd3; // -A - -B, sign = + unless rounding down |
endcase |
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #5 |
// |
// Compute the difference in exponents, provides shift amount |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
reg [15:0] xdiff5; |
always @(posedge clk) |
if (ce) xdiff5 <= xabdif4; |
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #6 |
// |
// Compute the difference in exponents, provides shift amount |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// If the difference in the exponent is 24 or greater (assuming 24 nybble dfp or |
// less) then all of the bits will be shifted out to zero. There is no need to |
// keep track of a difference more than 24. |
reg [11:0] xdif6; |
wire [95:0] mfs6; |
always @(posedge clk) |
if (ce) xdif6 <= xdiff5 > 16'h0024 ? 8'h24 : xdiff5[7:0]; |
delay #(.WID(96), .DEP(3)) udly6a (.clk(clk), .ce(ce), .i(mfs3), .o(mfs6)); |
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #7 |
// |
// Determine the sticky bit. The sticky bit is the bitwise or of all the bits |
// being shifted out the right side. The sticky bit is computed here to |
// reduce the number of regs required. |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
reg sticky6; |
wire sticky7; |
wire [7:0] xdif7; |
wire [95:0] mfs7; |
wire [7:0] xdif6a = {xdif6[7:4] * 10 + xdif6[3:0],2'b00}; // Convert base then *4 |
integer n; |
always @* begin |
sticky6 = 1'b0; |
for (n = 0; n < 96; n = n + 4) |
if (n <= xdif6a) |
sticky6 = sticky6| mfs6[n]|mfs6[n+1]|mfs6[n+2]|mfs6[n+3]; // non-zeero nybble |
end |
|
// register inputs to shifter and shift |
delay1 #(1) d16(.clk(clk), .ce(ce), .i(sticky6), .o(sticky7) ); |
delay1 #(8) d15(.clk(clk), .ce(ce), .i(xdif6a), .o(xdif7) ); |
delay1 #(96) d14(.clk(clk), .ce(ce), .i(mfs6), .o(mfs7) ); |
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #8 |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
reg [99:0] md8; |
wire [95:0] siga8, sigb8; |
wire xa_gt_xb8; |
wire a_gt_b8; |
always @(posedge clk) |
if (ce) md8 <= ({mfs7,4'b0} >> xdif7)|sticky7; // xdif7 is a multiple of four |
|
// sync control signals |
delay #(.WID(1), .DEP(4)) udly8a (.clk(clk), .ce(ce), .i(xa_gt_xb4), .o(xa_gt_xb8)); |
delay #(.WID(1), .DEP(5)) udly8b (.clk(clk), .ce(ce), .i(a_gt_b3), .o(a_gt_b8)); |
delay #(.WID(96), .DEP(6)) udly8d (.clk(clk), .ce(ce), .i(siga2), .o(siga8)); |
delay #(.WID(96), .DEP(6)) udly8e (.clk(clk), .ce(ce), .i(sigb2), .o(sigb8)); |
delay #(.WID(1), .DEP(5)) udly8j (.clk(clk), .ce(ce), .i(op3), .o(op8)); |
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #9 |
// Sort operands and perform add/subtract |
// addition can generate an extra bit, subtract can't go negative |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
reg [99:0] oa9, ob9; |
reg a_gt_b9; |
always @(posedge clk) |
if (ce) oa9 <= xa_gt_xb8 ? {siga8,4'b0} : md8; |
always @(posedge clk) |
if (ce) ob9 <= xa_gt_xb8 ? md8 : {sigb8,4'b0}; |
always @(posedge clk) |
if (ce) a_gt_b9 <= a_gt_b8; |
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #10 |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
reg [99:0] oaa10; |
reg [99:0] obb10; |
wire realOp10; |
reg [15:0] xo10; |
|
always @(posedge clk) |
if (ce) oaa10 <= a_gt_b9 ? oa9 : ob9; |
always @(posedge clk) |
if (ce) obb10 <= a_gt_b9 ? ob9 : oa9; |
delay #(.WID(1), .DEP(8)) udly10a (.clk(clk), .ce(ce), .i(realOp2), .o(realOp10)); |
delay #(.WID(16), .DEP(6)) udly10b (.clk(clk), .ce(ce), .i(xo4), .o(xo10)); |
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #11 |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
reg [99:0] mab11; |
reg mab11c; |
wire [95:0] siga11, sigb11; |
wire abInf11; |
wire aNan11, bNan11; |
reg xoinf11; |
wire op11; |
|
always @(posedge clk) |
if (ce) mab11 <= realOp10 ? odd10 : oss10; |
always @(posedge clk) |
if (ce) mab11c <= realOp10 ? odd10c : oss10c; |
|
delay #(.WID(1), .DEP(8)) udly11a (.clk(clk), .ce(ce), .i(aInf3&bInf3), .o(abInf11)); |
delay #(.WID(1), .DEP(10)) udly11c (.clk(clk), .ce(ce), .i(aNan), .o(aNan11)); |
delay #(.WID(1), .DEP(10)) udly11d (.clk(clk), .ce(ce), .i(bNan), .o(bNan11)); |
delay #(.WID(1), .DEP(3)) udly11e (.clk(clk), .ce(ce), .i(op8), .o(op11)); |
delay #(.WID(96), .DEP(3)) udly11f (.clk(clk), .ce(ce), .i(siga8), .o(siga11)); |
delay #(.WID(96), .DEP(3)) udly11g (.clk(clk), .ce(ce), .i(sigb8), .o(sigb11)); |
|
always @(posedge clk) |
if (ce) xoinf11 <= xo10==16'h9999; |
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #12 |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
reg [199:0] mo12; // mantissa output |
reg [3:0] st12; |
wire sxo11; |
wire so11; |
delay #(.WID(1), .DEP(9)) udly12a (.clk(clk), .ce(ce), .i(sxo2), .o(sxo11)); |
delay #(.WID(1), .DEP(7)) udly12b (.clk(clk), .ce(ce), .i(so4), .o(so11)); |
|
always @(posedge clk) |
if (ce) begin |
st12[0] <= sxo11; |
st12[1] <= abInf11; |
st12[2] <= so11; |
st12[3] <= aNan11|bNan11; |
end |
|
always @(posedge clk) |
if (ce) |
casez({abInf11,aNan11,bNan11,xoinf11}) |
4'b1???: // inf +/- inf - generate QNaN on subtract, inf on add |
if (op11) |
mo12 <= {4'h9,196'd0}; |
else |
mo12 <= {50{4'h9}}; |
4'b01??: mo12 <= {4'b0,siga11[95:0],100'd0}; |
4'b001?: mo12 <= {4'b0,sigb11[95:0],100'd0}; |
4'b0001: mo12 <= 200'd0; |
default: mo12 <= {3'b0,mab11c,mab11,96'd0}; // mab has an extra lead bit and four trailing bits |
endcase |
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #13 |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
wire so; // sign output |
wire [3:0] st; |
wire [15:0] xo; // de normalized exponent output |
wire [199:0] mo; // mantissa output |
|
delay #(.WID(4), .DEP(1)) u13c (.clk(clk), .ce(ce), .i(st12), .o(st[3:0]) ); |
delay #(.WID(1), .DEP(9)) udly13a (.clk(clk), .ce(ce), .i(so4), .o(so)); |
delay #(.WID(16), .DEP(3)) udly13b (.clk(clk), .ce(ce), .i(xo10), .o(xo)); |
delay #(.WID(200), .DEP(1)) u13d (.clk(clk), .ce(ce), .i(mo12), .o(mo) ); |
|
assign o = {st,xo,mo}; |
|
endmodule |
|
|
module DFPAddsubnr(clk, ce, rm, op, a, b, o); |
input clk; // system clock |
input ce; // core clock enable |
input [2:0] rm; // rounding mode |
input op; // operation 0 = add, 1 = subtract |
input [127:0] a; // operand a |
input [127:0] b; // operand b |
output [127:0] o; // output |
|
wire [219:0] o1; |
wire [119:0] fpn0; |
|
DFPAddsub u1 (clk, ce, rm, op, a, b, o1); |
DFPNormalize u2(.clk(clk), .ce(ce), .under_i(1'b0), .i(o1), .o(fpn0) ); |
DFPRound u3(.clk(clk), .ce(ce), .rm(rm), .i(fpn0), .o(o) ); |
|
endmodule |
/trunk/rtl/verilog2/DFPDecompose.sv
0,0 → 1,86
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2020 Robert Finch, Waterloo |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
// |
// DFPDecompose.sv |
// |
// BSD 3-Clause License |
// Redistribution and use in source and binary forms, with or without |
// modification, are permitted provided that the following conditions are met: |
// |
// 1. Redistributions of source code must retain the above copyright notice, this |
// list of conditions and the following disclaimer. |
// |
// 2. Redistributions in binary form must reproduce the above copyright notice, |
// this list of conditions and the following disclaimer in the documentation |
// and/or other materials provided with the distribution. |
// |
// 3. Neither the name of the copyright holder nor the names of its |
// contributors may be used to endorse or promote products derived from |
// this software without specific prior written permission. |
// |
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
// |
// ============================================================================ |
|
module DFPDecompose(i, sgn, sx, exp, sig, xz, vz, inf, nan); |
input [127:0] i; |
output sgn; |
output sx; |
output [15:0] exp; |
output [95:0] sig; |
output xz; |
output vz; |
output inf; |
output nan; |
|
assign nan = i[115]; |
assign sgn = i[114]; |
assign inf = i[113]; |
assign sx = i[112]; |
assign exp = i[111:96]; |
assign sig = i[95:0]; |
assign xz = ~|exp; |
assign vz = ~|{exp,sig}; |
|
endmodule |
|
|
module DFPDecomposeReg(clk, ce, i, sgn, sx, exp, sig, xz, vz, inf, nan); |
input clk; |
input ce; |
input [127:0] i; |
output reg sgn; |
output reg sx; |
output reg [15:0] exp; |
output reg [95:0] sig; |
output reg xz; |
output reg vz; |
output reg inf; |
output reg nan; |
|
always @(posedge clk) |
if (ce) begin |
nan <= i[115]; |
sgn <= i[114]; |
inf <= i[113]; |
sx <= i[112]; |
exp <= i[111:96]; |
sig <= i[95:0]; |
xz <= ~|exp; |
vz <= ~|{exp,sig}; |
end |
|
endmodule |
/trunk/rtl/verilog2/DFPNormalize.sv
0,0 → 1,339
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2006-2020 Robert Finch, Waterloo |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
// |
// DFPNormalize.sv |
// - decimal floating point normalization unit |
// - eight cycle latency |
// - parameterized width |
// |
// |
// This source file is free software: you can redistribute it and/or modify |
// it under the terms of the GNU Lesser General Public License as published |
// by the Free Software Foundation, either version 3 of the License, or |
// (at your option) any later version. |
// |
// This source file is distributed in the hope that it will be useful, |
// but WITHOUT ANY WARRANTY; without even the implied warranty of |
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
// GNU General Public License for more details. |
// |
// You should have received a copy of the GNU General Public License |
// along with this program. If not, see <http://www.gnu.org/licenses/>. |
// |
// This unit takes a floating point number in an intermediate |
// format and normalizes it. No normalization occurs |
// for NaN's or infinities. The unit has a two cycle latency. |
// |
// The mantissa is assumed to start with two whole bits on |
// the left. The remaining bits are fractional. |
// |
// The width of the incoming format is reduced via a generation |
// of sticky bit in place of the low order fractional bits. |
// |
// On an underflowed input, the incoming exponent is assumed |
// to be negative. A right shift is needed. |
// ============================================================================ |
|
import fp::*; |
|
module DFPNormalize(clk, ce, i, o, under_i, under_o, inexact_o); |
input clk; |
input ce; |
input [219:0] i; // expanded format input |
output [119:0] o; // normalized output + guard, sticky and round bits, + 1 whole digit |
input under_i; |
output under_o; |
output inexact_o; |
|
integer n; |
// ---------------------------------------------------------------------------- |
// No Clock required |
// ---------------------------------------------------------------------------- |
reg [15:0] xo0; |
reg so0; |
reg sx0; |
reg nan0; |
reg inf0; |
|
always @* |
xo0 <= i[215:200]; |
always @* |
so0 <= i[218]; // sign doesn't change |
always @* |
sx0 <= i[216]; |
always @* |
nan0 <= i[219]; |
always @* |
inf0 <= i[217] || xo0==16'h9999 && i[196]; |
|
// ---------------------------------------------------------------------------- |
// Clock #1 |
// - Capture exponent information |
// ---------------------------------------------------------------------------- |
reg xInf1a, xInf1b, xInf1c; |
wire [219:0] i1; |
delay #(.WID(220),.DEP(1)) u11 (.clk(clk), .ce(ce), .i(i), .o(i1)); |
|
always @(posedge clk) |
if (ce) xInf1a <= xo0==16'h9999 & !under_i; |
always @(posedge clk) |
if (ce) xInf1b <= xo0==16'h9998 & !under_i; |
always @(posedge clk) |
if (ce) xInf1c <= xo0==16'h9999; |
|
// ---------------------------------------------------------------------------- |
// Clock #2 |
// - determine exponent increment |
// Since the there are *three* whole digits in the incoming format |
// the number of whole digits needs to be reduced. If the MSB is |
// set, then increment the exponent and no shift is needed. |
// ---------------------------------------------------------------------------- |
wire xInf2c, xInf2b; |
wire [15:0] xo2; |
reg incExpByOne2, incExpByTwo2; |
delay #(.WID(1),.DEP(1)) u21 (.clk(clk), .ce(ce), .i(xInf1c), .o(xInf2c)); |
delay #(.WID(1),.DEP(1)) u22 (.clk(clk), .ce(ce), .i(xInf1b), .o(xInf2b)); |
delay #(.WID(16),.DEP(2)) u23 (.clk(clk), .ce(ce), .i(xo0), .o(xo2)); |
delay #(.WID(1),.DEP(2)) u24 (.clk(clk), .ce(ce), .i(under_i), .o(under2)); |
|
always @(posedge clk) |
if (ce) incExpByOne2 <= !xInf1a & i1[196]; |
|
// ---------------------------------------------------------------------------- |
// Clock #3 |
// - increment exponent |
// - detect a zero mantissa |
// ---------------------------------------------------------------------------- |
|
wire incExpByOne3; |
wire [219:0] i3; |
reg [15:0] xo3; |
reg zeroMan3; |
delay #(.WID(1),.DEP(1)) u32 (.clk(clk), .ce(ce), .i(incExpByOne2), .o(incExpByOne3)); |
delay #(.WID(220),.DEP(3)) u33 (.clk(clk), .ce(ce), .i(i[219:0]), .o(i3)); |
|
wire [15:0] xo2a; |
BCDAddN #(.N(4)) ubcdan1 |
( |
.ci(1'b0), |
.a(xo2), |
.b(16'h0001), |
.o(xo2a), |
.co() |
); |
|
always @(posedge clk) |
if (ce) xo3 <= (incExpByOne2 ? xo2a : xo2); |
|
always @(posedge clk) |
if(ce) zeroMan3 <= 1'b0; |
|
// ---------------------------------------------------------------------------- |
// Clock #4 |
// - Shift mantissa left |
// - If infinity is reached then set the mantissa to zero |
// shift mantissa left to reduce to a single whole digit |
// - create sticky bit |
// ---------------------------------------------------------------------------- |
|
reg [103:0] mo4; |
reg inexact4; |
|
always @(posedge clk) |
if(ce) |
casez({zeroMan3,incExpByOne3}) |
2'b1?: mo4 <= 1'd0; |
2'b01: mo4 <= {i3[199:100],3'b0,|i3[99:0]}; |
default: mo4 <= {i3[195:96],3'b0,|i3[95:0]}; |
endcase |
|
always @(posedge clk) |
if(ce) |
casez({zeroMan3,incExpByOne3}) |
2'b1?: inexact4 <= 1'd0; |
2'b01: inexact4 <= |i3[99:0]; |
default: inexact4 <= |i3[95:0]; |
endcase |
|
// ---------------------------------------------------------------------------- |
// Clock edge #5 |
// - count leading zeros |
// ---------------------------------------------------------------------------- |
reg [7:0] leadingZeros5; |
wire [15:0] xo5; |
wire xInf5; |
delay #(.WID(16),.DEP(2)) u51 (.clk(clk), .ce(ce), .i(xo3), .o(xo5)); |
delay #(.WID(1),.DEP(3)) u52 (.clk(clk), .ce(ce), .i(xInf2c), .o(xInf5) ); |
|
/* Lookup table based leading zero count modules give slightly better |
performance but cases must be coded. |
generate |
begin |
if (FPWID <= 32) begin |
cntlz32Reg clz0 (.clk(clk), .ce(ce), .i({mo4,4'b0}), .o(leadingZeros5) ); |
assign leadingZeros5[7:6] = 2'b00; |
end |
else if (FPWID<=64) begin |
assign leadingZeros5[7] = 1'b0; |
cntlz64Reg clz0 (.clk(clk), .ce(ce), .i({mo4,7'h0}), .o(leadingZeros5) ); |
end |
else if (FPWID<=80) begin |
assign leadingZeros5[7] = 1'b0; |
cntlz80Reg clz0 (.clk(clk), .ce(ce), .i({mo4,11'b0}), .o(leadingZeros5) ); |
end |
else if (FPWID<=84) begin |
assign leadingZeros5[7] = 1'b0; |
cntlz96Reg clz0 (.clk(clk), .ce(ce), .i({mo4,23'b0}), .o(leadingZeros5) ); |
end |
else if (FPWID<=96) begin |
assign leadingZeros5[7] = 1'b0; |
cntlz96Reg clz0 (.clk(clk), .ce(ce), .i({mo4,11'b0}), .o(leadingZeros5) ); |
end |
else if (FPWID<=128) |
cntlz128Reg clz0 (.clk(clk), .ce(ce), .i({mo4,11'b0}), .o(leadingZeros5) ); |
end |
endgenerate |
*/ |
|
// Sideways add. |
// Normally there would be only one to two leading zeros. It is tempting then |
// to check for only one or two. But, denormalized numbers might have more |
// leading zeros. If denormals were not supported this could be made smaller |
// and faster. |
`ifdef SUPPORT_DENORMALS |
reg [7:0] lzc; |
reg got_one; |
always @* |
begin |
got_one = 1'b0; |
lzc = 8'h00; |
for (n = 103; n >= 0; n = n - 4) begin |
if (!got_one) begin |
if (mo4[n]|mo4[n-1]|mo4[n-2]|mo4[n-3]) |
got_one = 1'b1; |
else |
lzc = lzc + 1'b1; |
end |
end |
end |
always @(posedge clk) |
if (ce) leadingZeros5 <= lzc; |
`else |
always @(posedge clk) |
if (ce) |
casez(mo4[99:92]) |
8'h00000000: leadingZeros5 <= 8'd2; |
8'h0000????: leadingZeros5 <= 8'd1; |
default: leadingZeros5 <= 8'd0; |
endcase |
`endif |
|
|
// ---------------------------------------------------------------------------- |
// Clock edge #6 |
// - Compute how much we want to decrement exponent by |
// - compute amount to shift left and right |
// - at infinity the exponent can't be incremented, so we can't shift right |
// otherwise it was an underflow situation so the exponent was negative |
// shift amount needs to be negated for shift register |
// If the exponent underflowed, then the shift direction must be to the |
// right regardless of mantissa bits; the number is denormalized. |
// Otherwise the shift direction must be to the left. |
// ---------------------------------------------------------------------------- |
reg [7:0] lshiftAmt6; |
reg [7:0] rshiftAmt6; |
wire rightOrLeft6; // 0=left,1=right |
wire xInf6; |
wire [15:0] xo6; |
wire [103:0] mo6; |
wire zeroMan6; |
vtdl #(1) u61 (.clk(clk), .ce(ce), .a(4'd5), .d(under_i), .q(rightOrLeft6) ); |
delay #(.WID(16),.DEP(1)) u62 (.clk(clk), .ce(ce), .i(xo5), .o(xo6)); |
delay #(.WID(104),.DEP(2)) u63 (.clk(clk), .ce(ce), .i(mo4), .o(mo6) ); |
delay #(.WID(1),.DEP(1)) u64 (.clk(clk), .ce(ce), .i(xInf5), .o(xInf6) ); |
delay #(.WID(1),.DEP(3)) u65 (.clk(clk), .ce(ce), .i(zeroMan3), .o(zeroMan6)); |
delay #(.WID(1),.DEP(5)) u66 (.clk(clk), .ce(ce), .i(sx0), .o(sx5) ); |
|
wire [13:0] xo5d = xo5[3:0] + xo5[7:4] * 10 + xo5[11:8] * 100 + xo5[15:12] * 1000; |
|
always @(posedge clk) |
if (ce) lshiftAmt6 <= {leadingZeros5 > xo5d ? xo5d : leadingZeros5,2'b0}; |
|
always @(posedge clk) |
if (ce) rshiftAmt6 <= xInf5 ? 1'd0 : sx5 ? 1'd0 : xo5d > 14'd24 ? 8'd96 : {xo5d[5:0],2'b00}; // xo2 is negative ! |
|
// ---------------------------------------------------------------------------- |
// Clock edge #7 |
// - figure exponent |
// - shift mantissa |
// - figure sticky bit |
// ---------------------------------------------------------------------------- |
|
reg [15:0] xo7; |
wire rightOrLeft7; |
reg [103:0] mo7l, mo7r; |
reg St6,St7; |
delay #(.WID(1),.DEP(1)) u71 (.clk(clk), .ce(ce), .i(rightOrLeft6), .o(rightOrLeft7)); |
|
wire [11:0] lshftAmtBCD; |
wire [15:0] xo7d; |
BinToBCD ubbcd1 (lshiftAmt6, lshftAmtBCD); |
BCDSubN #(.N(4)) ubcdsn1 |
( |
.ci(1'b0), |
.a(xo6), |
.b({4'h0,lshftAmtBCD}), |
.o(xo7d), |
.co() |
); |
|
|
always @(posedge clk) |
if (ce) |
xo7 <= zeroMan6 ? xo6 : |
xInf6 ? xo6 : // an infinite exponent is either a NaN or infinity; no need to change |
rightOrLeft6 ? 1'd0 : // on a right shift, the exponent was negative, it's being made to zero |
xo7d; // on a left shift, the exponent can't be decremented below zero |
|
always @(posedge clk) |
if (ce) mo7r <= mo6 >> rshiftAmt6; |
always @(posedge clk) |
if (ce) mo7l <= mo6 << lshiftAmt6; |
|
// The sticky bit is set if the bits shifted out on a right shift are set. |
always @* |
begin |
St6 = 1'b0; |
for (n = 0; n < 104; n = n + 1) |
if (n <= rshiftAmt6 + 1) St6 = St6|mo6[n]; |
end |
always @(posedge clk) |
if (ce) St7 <= St6; |
|
// ---------------------------------------------------------------------------- |
// Clock edge #8 |
// - select mantissa |
// ---------------------------------------------------------------------------- |
|
wire so,sxo,nano,info; |
wire [15:0] xo; |
reg [103:0] mo; |
vtdl #(1) u81 (.clk(clk), .ce(ce), .a(4'd7), .d(so0), .q(so) ); |
delay #(.WID(16),.DEP(1)) u82 (.clk(clk), .ce(ce), .i(xo7), .o(xo)); |
vtdl #(.WID(1)) u83 (.clk(clk), .ce(ce), .a(4'd3), .d(inexact4), .q(inexact_o)); |
delay #(.WID(1),.DEP(1)) u84 (.clk(clk), .ce(ce), .i(rightOrLeft7), .o(under_o)); |
vtdl #(1) u85 (.clk(clk), .ce(ce), .a(4'd7), .d(sx0), .q(sxo) ); |
vtdl #(1) u86 (.clk(clk), .ce(ce), .a(4'd7), .d(nan0), .q(nano) ); |
vtdl #(1) u87 (.clk(clk), .ce(ce), .a(4'd7), .d(inf0), .q(info) ); |
|
always @(posedge clk) |
if (ce) mo <= rightOrLeft7 ? mo7r|{St7,4'b0} : mo7l; |
|
assign o = {nano,so,info,sxo,xo,mo[103:4]}; |
|
endmodule |
|
/trunk/rtl/verilog2/DFPRound.sv
0,0 → 1,164
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2006-2020 Robert Finch, Waterloo |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
// |
// DFPRound.sv |
// - decimal floating point rounding unit |
// - parameterized width |
// - IEEE 754 representation |
// |
// |
// BSD 3-Clause License |
// Redistribution and use in source and binary forms, with or without |
// modification, are permitted provided that the following conditions are met: |
// |
// 1. Redistributions of source code must retain the above copyright notice, this |
// list of conditions and the following disclaimer. |
// |
// 2. Redistributions in binary form must reproduce the above copyright notice, |
// this list of conditions and the following disclaimer in the documentation |
// and/or other materials provided with the distribution. |
// |
// 3. Neither the name of the copyright holder nor the names of its |
// contributors may be used to endorse or promote products derived from |
// this software without specific prior written permission. |
// |
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
// |
// ============================================================================ |
|
import fp::*; |
|
`ifdef MIN_LATENCY |
`define PIPE_ADV * |
`else |
`define PIPE_ADV (posedge clk) |
`endif |
|
module DFPRound(clk, ce, rm, i, o); |
input clk; |
input ce; |
input [2:0] rm; // rounding mode |
input [119:0] i; // intermediate format input |
output [127:0] o; // rounded output |
|
parameter ROUND_CEILING = 3'd0; |
parameter ROUND_FLOOR = 3'd1; |
parameter ROUND_HALF_UP = 3'd2; |
parameter ROUND_HALF_EVEN = 3'd3; |
parameter ROUND_DOWN = 3'd4; |
|
//------------------------------------------------------------ |
// variables |
wire [3:0] so; |
wire [15:0] xo; |
reg [95:0] mo; |
reg [15:0] xo1; |
reg [95:0] mo1; |
wire xInf = i[115:100]==16'h9999; |
wire so0 = i[118]; |
assign o = {12'hDF0,so,xo,mo}; |
|
wire [3:0] l = i[7:4]; |
wire [3:0] r = i[3:0]; |
|
reg rnd; |
|
//------------------------------------------------------------ |
// Clock #1 |
// - determine round amount (add 1 or 0) |
//------------------------------------------------------------ |
|
always @`PIPE_ADV |
if (ce) xo1 <= i[115:100]; |
always @`PIPE_ADV |
if (ce) mo1 <= i[99:4]; |
|
// Compute the round bit |
// Infinities and NaNs are not rounded! |
always @`PIPE_ADV |
if (ce) |
if (|so[1:0]) |
rnd = 1'b0; |
else |
case (rm) |
ROUND_CEILING: rnd <= (r == 4'd0 || so[2]==1'b0) ? 1'b0 : 1'b1; |
ROUND_FLOOR: rnd <= (r == 4'd0 || so[2]==1'b1) ? 1'b0 : 1'b1; |
ROUND_HALF_UP: rnd <= r >= 4'h5; |
ROUND_HALF_EVEN: rnd <= r==4'h5 ? l[0] : r > 4'h5 ? 1'b1 : 1'b0; |
ROUND_DOWN: rnd <= 1'b0; |
default: rnd <= 1'b0; |
endcase |
|
//------------------------------------------------------------ |
// Clock #2 |
// round the number, check for carry |
// note: inf. exponent checked above (if the exponent was infinite already, then no rounding occurs as rnd = 0) |
// note: exponent increments if there is a carry (can only increment to infinity) |
//------------------------------------------------------------ |
|
wire [111:0] rounded1; |
wire co1; |
|
BCDAddN #(.N(29)) ubcdan1 |
( |
.ci(1'b0), |
.a({xo1,mo1}), |
.b({111'd0,rnd}), |
.o(rounded1), |
.co(co1) |
); |
|
|
reg [111:0] rounded2; |
reg carry2; |
reg rnd2; |
reg dn2; |
wire [15:0] xo2; |
always @`PIPE_ADV |
if (ce) rounded2 <= rounded1; |
always @`PIPE_ADV |
if (ce) carry2 <= co1; |
always @`PIPE_ADV |
if (ce) rnd2 <= rnd; |
always @`PIPE_ADV |
if (ce) dn2 <= !(|xo1); |
assign xo2 = rounded2[111:96]; |
|
//------------------------------------------------------------ |
// Clock #3 |
// - shift mantissa if required. |
//------------------------------------------------------------ |
`ifdef MIN_LATENCY |
assign so = i[119:116]; |
assign xo = xo2; |
`else |
delay3 #(4) u21 (.clk(clk), .ce(ce), .i(i[119:116]), .o(so)); |
delay1 #(16) u22 (.clk(clk), .ce(ce), .i(xo2), .o(xo)); |
`endif |
|
always @`PIPE_ADV |
if (ce) |
casez({rnd2,xo2==16'h9999,carry2,dn2}) |
4'b0??0: mo <= mo1[95:0]; // not rounding, not denormalized |
4'b0??1: mo <= mo1[95:0]; // not rounding, denormalized |
4'b1000: mo <= rounded2[95: 0]; // exponent didn't change, number was normalized |
4'b1001: mo <= rounded2[95: 0]; // exponent didn't change, but number was denormalized |
4'b1010: mo <= {4'h1,rounded2[95: 4]}; // exponent incremented (new MSD generated), number was normalized |
4'b1011: mo <= rounded2[95:0]; // exponent incremented (new MSB generated), number was denormalized, number became normalized |
4'b11??: mo <= 96'd0; // number became infinite, no need to check carry etc., rnd would be zero if input was NaN or infinite |
endcase |
|
endmodule |
/trunk/test_bench/DFPAddsub_tb.v
0,0 → 1,143
`timescale 1ns / 1ps |
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2006-2020 Robert Finch, Waterloo |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
// |
// DFPAddsub_tb.v |
// - decimal floating point addsub test bench |
// |
// BSD 3-Clause License |
// Redistribution and use in source and binary forms, with or without |
// modification, are permitted provided that the following conditions are met: |
// |
// 1. Redistributions of source code must retain the above copyright notice, this |
// list of conditions and the following disclaimer. |
// |
// 2. Redistributions in binary form must reproduce the above copyright notice, |
// this list of conditions and the following disclaimer in the documentation |
// and/or other materials provided with the distribution. |
// |
// 3. Neither the name of the copyright holder nor the names of its |
// contributors may be used to endorse or promote products derived from |
// this software without specific prior written permission. |
// |
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
// |
// ============================================================================ |
|
module DFPAddsub_tb(); |
reg rst; |
reg clk; |
reg [15:0] adr; |
reg [127:0] a,b; |
wire [127:0] o; |
reg [127:0] ad,bd; |
reg [127:0] od; |
reg [3:0] rm; |
|
integer n; |
reg [115:0] a1, b1; |
wire [63:0] doubleA = {a[31], a[30], {3{~a[30]}}, a[29:23], a[22:0], {29{1'b0}}}; |
wire [63:0] doubleB = {b[31], b[30], {3{~b[30]}}, b[29:23], b[22:0], {29{1'b0}}}; |
|
integer outfile; |
|
initial begin |
rst = 1'b0; |
clk = 1'b0; |
adr = 0; |
a = $urandom(1); |
b = 1; |
#20 rst = 1; |
#50 rst = 0; |
#10000000 $fclose(outfile); |
#10 $finish; |
end |
|
always #5 |
clk = ~clk; |
|
genvar g; |
generate begin : gRand |
for (g = 0; g < 116; g = g + 4) begin |
always @(posedge clk) begin |
a1[g+3:g] <= $urandom() % 10; |
b1[g+3:g] <= $urandom() % 10; |
end |
end |
end |
endgenerate |
|
reg [7:0] count; |
always @(posedge clk) |
if (rst) begin |
adr <= 0; |
count <= 0; |
end |
else |
begin |
if (adr==0) begin |
outfile = $fopen("d:/cores2020/rtf64/v2/rtl/verilog/cpu/fpu/test_bench/DFPAddsub_tvo.txt", "wb"); |
$fwrite(outfile, " rm ------- A ------ ------- B ------ ------ sum ----- -- SIM Sum --\n"); |
end |
count <= count + 1; |
if (count > 32) |
count <= 1'd1; |
if (count==2) begin |
a[115:0] <= a1; |
b[115:0] <= b1; |
a[115:112] <= 4'h5; |
b[115:112] <= 4'h5; |
a[127:116] <= 12'hDF0; |
b[127:116] <= 12'hDF0; |
rm <= adr[14:12]; |
//ad <= memd[adr][63: 0]; |
//bd <= memd[adr][127:64]; |
end |
if (adr==1 && count==2) begin |
a <= 127'hDF050000700000000000000000000000; |
b <= 127'hDF050000200000000000000000000000; |
end |
if (adr==2 && count==2) begin |
a <= 127'hDF050000900000000000000000000000; |
b <= 127'hDF050000200000000000000000000000; |
end |
if (adr==3 && count==2) begin |
a <= 127'hDF050000000000000000000000000000; |
b <= 127'hDF050000000000000000000000000000; |
end |
if (count==31) begin |
if (adr[11]) begin |
$fwrite(outfile, "%c%h\t%h\t%h\t%h\n", "-",rm, a, b, o); |
end |
else begin |
$fwrite(outfile, "%c%h\t%h\t%h\t%h\n", "+",rm, a, b, o); |
end |
adr <= adr + 1; |
end |
end |
|
//fpMulnr #(64) u1 (clk, 1'b1, a, b, o, rm);//, sign_exe, inf, overflow, underflow); |
DFPAddsubnr u6 ( |
.clk(clk), |
.ce(1'b1), |
.op(adr[11]), |
.a(a), |
.b(b), |
.o(o), |
.rm(rm) |
); |
|
endmodule |