URL
https://opencores.org/ocsvn/ft816float/ft816float/trunk
Subversion Repositories ft816float
Compare Revisions
- This comparison shows the changes necessary to convert path
/ft816float/trunk/rtl
- from Rev 74 to Rev 75
- ↔ Reverse comparison
Rev 74 → Rev 75
/verilog2/DFPAddsub96.sv
0,0 → 1,422
`timescale 1ns / 1ps |
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2020-2022 Robert Finch, Waterloo |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
// |
// DFPAddsub96.sv |
// |
// BSD 3-Clause License |
// Redistribution and use in source and binary forms, with or without |
// modification, are permitted provided that the following conditions are met: |
// |
// 1. Redistributions of source code must retain the above copyright notice, this |
// list of conditions and the following disclaimer. |
// |
// 2. Redistributions in binary form must reproduce the above copyright notice, |
// this list of conditions and the following disclaimer in the documentation |
// and/or other materials provided with the distribution. |
// |
// 3. Neither the name of the copyright holder nor the names of its |
// contributors may be used to endorse or promote products derived from |
// this software without specific prior written permission. |
// |
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
// |
// ============================================================================ |
|
import DFPPkg::*; |
|
module DFPAddsub96(clk, ce, rm, op, a, b, o); |
input clk; |
input ce; |
input [2:0] rm; |
input op; |
input DFP96 a; |
input DFP96 b; |
output DFP96UD o; |
localparam N=25; // number of BCD digits |
localparam RIP_STAGES = 3; |
|
parameter TRUE = 1'b1; |
parameter FALSE = 1'b0; |
|
DFP96U au; |
DFP96U bu; |
|
DFPUnpack96 u00 (a, au); |
DFPUnpack96 u01 (b, bu); |
|
reg [(N+1)*4-1:0] oaa10; |
reg [(N+1)*4-1:0] obb10; |
wire [(N+1)*4-1:0] oss10; |
wire oss10c; |
|
BCDAdd8NClk #(.N((N+2)/2)) ubcdadn1 |
( |
.clk(clk), |
.a({8'h00,oaa10}), |
.b({8'h00,obb10}), |
.o(oss10), |
.ci(1'b0), |
.co(oss10c) |
); |
|
wire [(N+1)*4-1:0] odd10; |
wire odd10c; |
|
BCDSub8NClk #(.N((N+2)/2)) ubcdsdn1 |
( |
.clk(clk), |
.a({8'h00,oaa10}), |
.b({8'h00,obb10}), |
.o(odd10), |
.ci(1'b0), |
.co(odd10c) |
); |
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #1 |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
reg op1; |
reg az, bz; |
always_ff @(posedge clk) |
op1 <= op; |
always_ff @(posedge clk) |
az <= au.sig==100'd0 && au.exp==12'd0; |
always_ff @(posedge clk) |
bz <= bu.sig==100'd0 && bu.exp==12'd0; |
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #2 |
// |
// Figure out which operation is really needed an add or subtract ? |
// If the signs are the same, use the orignal op, |
// otherwise flip the operation |
// a + b = add,+ |
// a + -b = sub, so of larger |
// -a + b = sub, so of larger |
// -a + -b = add,- |
// a - b = sub, so of larger |
// a - -b = add,+ |
// -a - b = add,- |
// -a - -b = sub, so of larger |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
reg realOp2; |
reg op2; |
reg [13:0] xa2, xb2; |
reg az2, bz2; |
reg xa_gt_xb2; |
reg [N*4-1:0] siga2, sigb2; |
reg sigeq, siga_gt_sigb; |
reg expeq; |
reg sxo2; |
|
always_ff @(posedge clk) |
if (ce) realOp2 = op1 ^ au.sign ^ bu.sign; |
always_ff @(posedge clk) |
if (ce) op2 <= op1; |
always_ff @(posedge clk) |
if (ce) xa2 <= au.exp; |
always_ff @(posedge clk) |
if (ce) xb2 <= bu.exp; |
always_ff @(posedge clk) |
if (ce) siga2 <= au.sig; |
always_ff @(posedge clk) |
if (ce) sigb2 <= bu.sig; |
always_ff @(posedge clk) |
if (ce) az2 <= az; |
always_ff @(posedge clk) |
if (ce) bz2 <= bz; |
always_ff @(posedge clk) |
if (ce) |
xa_gt_xb2 <= au.exp > bu.exp; |
|
always_ff @(posedge clk) |
if (ce) sigeq <= au.sig==bu.sig; |
always_ff @(posedge clk) |
if (ce) siga_gt_sigb <= au.sig > bu.sig; |
always_ff @(posedge clk) |
if (ce) expeq <= au.exp==bu.exp; |
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #3 |
// |
// Find out if the result will be zero. |
// Determine which fraction to denormalize |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// |
reg [11:0] xa3, xb3; |
reg resZero3; |
wire xaInf3, xbInf3; |
reg xa_gt_xb3; |
reg a_gt_b3; |
reg op3; |
wire sa3, sb3; |
wire [2:0] rm3; |
reg [N*4-1:0] mfs3; |
|
always_ff @(posedge clk) |
if (ce) resZero3 <= (realOp2 & expeq & sigeq) || // subtract, same magnitude |
(az2 & bz2); // both a,b zero |
always_ff @(posedge clk) |
if (ce) xa3 <= xa2; |
always_ff @(posedge clk) |
if (ce) xb3 <= xb2; |
always_ff @(posedge clk) |
if (ce) xa_gt_xb3 <= xa_gt_xb2; |
always_ff @(posedge clk) |
if (ce) a_gt_b3 <= xa_gt_xb2 | (expeq & siga_gt_sigb); |
always_ff @(posedge clk) |
if (ce) op3 <= op2; |
always_ff @(posedge clk) |
if (ce) mfs3 = xa_gt_xb2 ? sigb2 : siga2; |
|
ft_delay #(.WID(1), .DEP(2)) udly3c (.clk(clk), .ce(ce), .i(au.sign), .o(sa3)); |
ft_delay #(.WID(1), .DEP(2)) udly3d (.clk(clk), .ce(ce), .i(bu.sign), .o(sb3)); |
ft_delay #(.WID(3), .DEP(3)) udly3e (.clk(clk), .ce(ce), .i(rm), .o(rm3)); |
ft_delay #(.WID(1), .DEP(2)) udly3f (.clk(clk), .ce(ce), .i(aInf), .o(aInf3)); |
ft_delay #(.WID(1), .DEP(2)) udly3g (.clk(clk), .ce(ce), .i(bInf), .o(bInf3)); |
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #4 |
// |
// Compute output exponent |
// |
// The output exponent is the larger of the two exponents, |
// unless a subtract operation is in progress and the two |
// numbers are equal, in which case the exponent should be |
// zero. |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
reg [11:0] xa4, xb4; |
reg [11:0] xo4; |
reg xa_gt_xb4; |
|
always_ff @(posedge clk) |
if (ce) xa4 <= xa3; |
always_ff @(posedge clk) |
if (ce) xb4 <= xb3; |
always_ff @(posedge clk) |
if (ce) xo4 <= resZero3 ? 12'd0 : xa_gt_xb3 ? xa3 : xb3; |
always_ff @(posedge clk) |
if (ce) xa_gt_xb4 <= xa_gt_xb3; |
|
// Compute output sign |
reg so4; |
always_comb |
case ({resZero3,sa3,op3,sb3}) // synopsys full_case parallel_case |
4'b0000: so4 <= 0; // + + + = + |
4'b0001: so4 <= !a_gt_b3; // + + - = sign of larger |
4'b0010: so4 <= !a_gt_b3; // + - + = sign of larger |
4'b0011: so4 <= 0; // + - - = + |
4'b0100: so4 <= a_gt_b3; // - + + = sign of larger |
4'b0101: so4 <= 1; // - + - = - |
4'b0110: so4 <= 1; // - - + = - |
4'b0111: so4 <= a_gt_b3; // - - - = sign of larger |
4'b1000: so4 <= 0; // A + B, sign = + |
4'b1001: so4 <= (rm3==3'd3); // A + -B, sign = + unless rounding down |
4'b1010: so4 <= (rm3==3'd3); // A - B, sign = + unless rounding down |
4'b1011: so4 <= 0; // A - -B, sign = + |
4'b1100: so4 <= (rm3==3'd3); // -A - -B, sign = + unless rounding down |
4'b1101: so4 <= 1; // -A + -B, sign = - |
4'b1110: so4 <= 1; // -A - +B, sign = - |
4'b1111: so4 <= (rm3==3'd3); // A - B, sign = + unless rounding down |
endcase |
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #5 |
// |
// Compute the difference in exponents, provides shift amount |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
reg [11:0] xdiff5; |
always_ff @(posedge clk) |
if (ce) xdiff5 <= xa_gt_xb4 ? xa4 - xb4 : xb4 - xa4; |
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #6 |
// |
// Compute the difference in exponents, provides shift amount |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// If the difference in the exponent is 24 or greater (assuming 24 nybble dfp or |
// less) then all of the bits will be shifted out to zero. There is no need to |
// keep track of a difference more than 24. |
reg [6:0] xdif6; |
wire [N*4-1:0] mfs6; |
always_ff @(posedge clk) |
if (ce) xdif6 <= xdiff5 > N ? N : xdiff5[6:0]; |
ft_delay #(.WID(N*4), .DEP(3)) udly6a (.clk(clk), .ce(ce), .i(mfs3), .o(mfs6)); |
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #7 |
// |
// Determine the sticky bit. The sticky bit is the bitwise or of all the bits |
// being shifted out the right side. The sticky bit is computed here to |
// reduce the number of regs required. |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
reg sticky6; |
wire sticky7; |
wire [7:0] xdif7; |
wire [N*4-1:0] mfs7; |
wire [8:0] xdif6a = {xdif6,2'b00}; // *4 |
integer n; |
always @* |
begin |
sticky6 = 1'b0; |
for (n = 0; n < N*4; n = n + 4) |
if (n <= xdif6a) |
sticky6 = sticky6| mfs6[n]|mfs6[n+1]|mfs6[n+2]|mfs6[n+3]; // non-zero nybble |
end |
|
// register inputs to shifter and shift |
delay1 #(1) d16(.clk(clk), .ce(ce), .i(sticky6), .o(sticky7) ); |
delay1 #(9) d15(.clk(clk), .ce(ce), .i(xdif6a), .o(xdif7) ); |
delay1 #(N*4) d14(.clk(clk), .ce(ce), .i(mfs6), .o(mfs7) ); |
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #8 |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
reg [(N+1)*4-1:0] md8; |
wire [N*4-1:0] siga8, sigb8; |
wire xa_gt_xb8; |
wire a_gt_b8; |
always_ff @(posedge clk) |
if (ce) md8 <= ({mfs7,4'b0} >> xdif7)|sticky7; // xdif7 is a multiple of four |
|
// sync control signals |
ft_delay #(.WID(1), .DEP(4)) udly8a (.clk(clk), .ce(ce), .i(xa_gt_xb4), .o(xa_gt_xb8)); |
ft_delay #(.WID(1), .DEP(5)) udly8b (.clk(clk), .ce(ce), .i(a_gt_b3), .o(a_gt_b8)); |
ft_delay #(.WID(N*4), .DEP(6)) udly8d (.clk(clk), .ce(ce), .i(siga2), .o(siga8)); |
ft_delay #(.WID(N*4), .DEP(6)) udly8e (.clk(clk), .ce(ce), .i(sigb2), .o(sigb8)); |
ft_delay #(.WID(1), .DEP(5)) udly8j (.clk(clk), .ce(ce), .i(op3), .o(op8)); |
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #9 |
// Sort operands and perform add/subtract |
// addition can generate an extra bit, subtract can't go negative |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
reg [(N+1)*4-1:0] oa9, ob9; |
reg a_gt_b9; |
always_ff @(posedge clk) |
if (ce) oa9 <= xa_gt_xb8 ? {siga8,4'b0} : md8; |
always_ff @(posedge clk) |
if (ce) ob9 <= xa_gt_xb8 ? md8 : {sigb8,4'b0}; |
always_ff @(posedge clk) |
if (ce) a_gt_b9 <= a_gt_b8; |
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #10 |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
wire realOp10; |
reg [11:0] xo10; |
|
always_ff @(posedge clk) |
if (ce) oaa10 <= a_gt_b9 ? oa9 : ob9; |
always_ff @(posedge clk) |
if (ce) obb10 <= a_gt_b9 ? ob9 : oa9; |
ft_delay #(.WID(1), .DEP(8)) udly10a (.clk(clk), .ce(ce), .i(realOp2), .o(realOp10)); |
ft_delay #(.WID(12), .DEP(6)) udly10b (.clk(clk), .ce(ce), .i(xo4), .o(xo10)); |
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #11 |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
wire [(N+1)*4-1:0] mab11; |
wire mab11c; |
wire [N*4-1:0] siga11, sigb11; |
wire abInf11; |
wire aNan11, bNan11; |
wire xoinf11; |
wire op11; |
|
ft_delay #(.WID(1), .DEP(8+RIP_STAGES)) udly11a (.clk(clk), .ce(ce), .i(aInf3&bInf3), .o(abInf11)); |
ft_delay #(.WID(1), .DEP(10+RIP_STAGES)) udly11c (.clk(clk), .ce(ce), .i(au.nan), .o(aNan11)); |
ft_delay #(.WID(1), .DEP(10+RIP_STAGES)) udly11d (.clk(clk), .ce(ce), .i(bu.nan), .o(bNan11)); |
ft_delay #(.WID(1), .DEP(3+RIP_STAGES)) udly11e (.clk(clk), .ce(ce), .i(op8), .o(op11)); |
ft_delay #(.WID(N*4), .DEP(3+RIP_STAGES)) udly11f (.clk(clk), .ce(ce), .i(siga8), .o(siga11)); |
ft_delay #(.WID(N*4), .DEP(3+RIP_STAGES)) udly11g (.clk(clk), .ce(ce), .i(sigb8), .o(sigb11)); |
ft_delay #(.WID(1), .DEP(1+RIP_STAGES)) udly11h (.clk(clk), .ce(ce), .i(xo10==14'h2FFF), .o(xoinf11)); |
ft_delay #(.WID((N+1)*4+1), .DEP(1+RIP_STAGES)) udly11i (.clk(clk), .ce(ce), .i(realOp10 ? {odd10c,odd10} : {oss10c,oss10}), .o({mab11c,mab11})); |
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #12+RIP_STAGES |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
reg [(N+1)*4*2-1:0] mo12; // mantissa output |
reg nan12; |
reg qnan12; |
reg infinity12; |
wire sxo11; |
wire so11; |
ft_delay #(.WID(1), .DEP(9)) udly12a (.clk(clk), .ce(ce), .i(sxo2), .o(sxo11)); |
ft_delay #(.WID(1), .DEP(7)) udly12b (.clk(clk), .ce(ce), .i(so4), .o(so11)); |
|
always_ff @(posedge clk) |
if (ce) |
nan12 <= aNan11|bNan11; |
|
always_ff @(posedge clk) |
if (ce) begin |
infinity12 <= 1'b0; |
qnan12 <= 1'b0; |
casez({abInf11,aNan11,bNan11,xoinf11}) |
4'b1???: // inf +/- inf - generate QNaN on subtract, inf on add |
if (op11) begin |
mo12 <= {4'h9,{(N+1)*4*2-4{1'd0}}}; |
qnan12 <= 1'b1; |
end |
else begin |
mo12 <= {(N+1)*2{4'h9}}; |
infinity12 <= 1'b1; |
end |
4'b01??: mo12 <= {4'b0,siga11[87:0],{(N+1)*4{1'd0}}}; |
4'b001?: mo12 <= {4'b0,sigb11[87:0],{(N+1)*4{1'd0}}}; |
4'b0001: begin mo12 <= {(N+1)*4*2{1'd0}}; infinity12 <= 1'b1; end |
default: mo12 <= {3'b0,mab11c,mab11,{N*4{1'd0}}}; // mab has an extra lead bit and four trailing bits |
endcase |
end |
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #13 |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
wire so; // sign output |
wire [13:0] xo; // de normalized exponent output |
wire [(N+1)*4*2-1:0] mo; // mantissa output |
|
ft_delay #(.WID(1), .DEP(1)) u13c (.clk(clk), .ce(ce), .i(nan12), .o(o.nan) ); |
ft_delay #(.WID(1), .DEP(1)) u13d (.clk(clk), .ce(ce), .i(qnan12), .o(o.qnan) ); |
ft_delay #(.WID(1), .DEP(1)) u13e (.clk(clk), .ce(ce), .i(infinity12), .o(o.infinity) ); |
ft_delay #(.WID(1), .DEP(9)) udly13a (.clk(clk), .ce(ce), .i(so4), .o(o.sign)); |
ft_delay #(.WID(12), .DEP(3)) udly13b (.clk(clk), .ce(ce), .i(xo10), .o(o.exp)); |
ft_delay #(.WID((N+1)*4*2), .DEP(1)) u13f (.clk(clk), .ce(ce), .i(mo12), .o(o.sig)); |
ft_delay #(.WID(1), .DEP(1)) udly13g (.clk(clk), .ce(ce), .i(1'b0), .o(o.snan)); |
|
endmodule |
|
|
module DFPAddsub96nr(clk, ce, rm, op, a, b, o); |
input clk; // system clock |
input ce; // core clock enable |
input [2:0] rm; // rounding mode |
input op; // operation 0 = add, 1 = subtract |
input DFP96 a; // operand a |
input DFP96 b; // operand b |
output DFP96 o; // output |
|
wire DFP96UD o1; |
wire DFP96UN fpn0; |
|
DFPAddsub96 u1 (clk, ce, rm, op, a, b, o1); |
DFPNormalize96 u2(.clk(clk), .ce(ce), .under_i(1'b0), .i(o1), .o(fpn0) ); |
DFPRound96 u3(.clk(clk), .ce(ce), .rm(rm), .i(fpn0), .o(o) ); |
|
endmodule |
/verilog2/DFPCompare96.sv
0,0 → 1,89
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2020-2022 Robert Finch, Waterloo |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
// |
// DFPCompare96.sv |
// |
// BSD 3-Clause License |
// Redistribution and use in source and binary forms, with or without |
// modification, are permitted provided that the following conditions are met: |
// |
// 1. Redistributions of source code must retain the above copyright notice, this |
// list of conditions and the following disclaimer. |
// |
// 2. Redistributions in binary form must reproduce the above copyright notice, |
// this list of conditions and the following disclaimer in the documentation |
// and/or other materials provided with the distribution. |
// |
// 3. Neither the name of the copyright holder nor the names of its |
// contributors may be used to endorse or promote products derived from |
// this software without specific prior written permission. |
// |
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
// |
// ============================================================================ |
|
import DFPPkg::*; |
|
module DFPCompare96(a, b, o); |
input DFP96 a; |
input DFP96 b; |
output reg [11:0] o ='d0; |
localparam N=34; // number of BCD digits |
|
parameter TRUE = 1'b1; |
parameter FALSE = 1'b0; |
|
DFP96U au; |
DFP96U bu; |
|
DFPUnpack96 u00 (a, au); |
DFPUnpack96 u01 (b, bu); |
|
reg sa, sb; |
always_comb |
sa = au.sign; |
always_comb |
sb = bu.sign; |
wire az = ~|{au.exp,au.sig}; |
wire bz = ~|{bu.exp,bu.sig}; |
wire unordered = au.nan | bu.nan; |
|
wire eq = !unordered & ((az & bz) || (a==b)); // special test for zero |
wire gt1 = {au.exp,au.sig} > {bu.exp,bu.sig}; |
wire lt1 = {au.exp,au.sig} < {bu.exp,bu.sig}; |
|
wire lt = sa ^ sb ? sa & !(az & bz): sa ? gt1 : lt1; |
|
always_comb |
begin |
o[0] = eq; |
o[1] = lt; |
o[2] = lt|eq; |
o[3] = lt1; |
o[4] = unordered; |
o[5] = ~eq; |
o[6] = ~lt; |
o[7] = ~(lt|eq); |
o[8] = ~lt1; |
o[9] = ~unordered; |
o[10] = 1'b0; |
o[11] = lt; |
end |
|
// an unorder comparison will signal a nan exception |
//assign nanx = op!=`FCOR && op!=`FCUN && unordered; |
|
endmodule |
/verilog2/DFPDivide96.sv
0,0 → 1,255
`timescale 1ns / 1ps |
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2006-2022 Robert Finch, Waterloo |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
// |
// DFPDivide96.sv |
// - decimal floating point divider |
// - parameterized width |
// |
// |
// BSD 3-Clause License |
// Redistribution and use in source and binary forms, with or without |
// modification, are permitted provided that the following conditions are met: |
// |
// 1. Redistributions of source code must retain the above copyright notice, this |
// list of conditions and the following disclaimer. |
// |
// 2. Redistributions in binary form must reproduce the above copyright notice, |
// this list of conditions and the following disclaimer in the documentation |
// and/or other materials provided with the distribution. |
// |
// 3. Neither the name of the copyright holder nor the names of its |
// contributors may be used to endorse or promote products derived from |
// this software without specific prior written permission. |
// |
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
// |
// Floating Point Divider |
// |
//Properties: |
//+-inf * +-inf = -+inf (this is handled by exOver) |
//+-inf * 0 = QNaN |
//+-0 / +-0 = QNaN |
// ============================================================================ |
|
import DFPPkg::*; |
|
`define QINFDIV 4'd2 |
`define QZEROZERO 4'd3 |
|
module DFPDivide96(rst, clk, ce, ld, op, a, b, o, done, sign_exe, overflow, underflow); |
parameter N=25; |
// FADD is a constant that makes the divider width a multiple of four and includes eight extra bits. |
input rst; |
input clk; |
input ce; |
input ld; |
input op; |
input DFP96 a, b; |
output DFP96UD o; |
output reg done; |
output sign_exe; |
output overflow; |
output underflow; |
|
// registered outputs |
reg sign_exe=0; |
reg inf=0; |
reg overflow=0; |
reg underflow=0; |
|
reg so, sxo; |
reg [11:0] xo; |
reg [(N+1)*4*2-1:0] mo; |
|
DFP96U au, bu; |
DFPUnpack96 u01 (a, au); |
DFPUnpack96 u02 (b, bu); |
|
// constants |
wire [11:0] infXp = 12'hBFF; // infinite / NaN - all ones |
wire [11:0] bias = 12'h5FF; |
// The following is the value for an exponent of zero, with the offset |
// eg. 8'h7f for eight bit exponent, 11'h7ff for eleven bit exponent, etc. |
// The following is a template for a quiet nan. (MSB=1) |
wire [N*4-1:0] qNaN = {4'h1,{(N-1)*4{1'b0}}}; |
|
// variables |
wire [(N+2)*4*2-1:0] divo; |
|
// Operands |
reg sa, sb; // sign bit |
reg [N*4-1:0] siga, sigb; |
reg az, bz; |
reg aInf, bInf; |
reg aNan,bNan; |
wire done1; |
wire signed [7:0] lzcnt; |
|
// ----------------------------------------------------------- |
// Clock #1 |
// - decode the input operands |
// - derive basic information |
// - calculate fraction |
// ----------------------------------------------------------- |
reg ld1; |
always @(posedge clk) |
if (ce) sa <= au.sign; |
always @(posedge clk) |
if (ce) sb <= bu.sign; |
always @(posedge clk) |
if (ce) siga <= au.sig; |
always @(posedge clk) |
if (ce) sigb <= bu.sig; |
always @(posedge clk) |
if (ce) az <= au.exp==12'd0 && au.sig==100'd0; |
always @(posedge clk) |
if (ce) bz <= bu.exp==12'd0 && bu.sig==100'd0; |
always @(posedge clk) |
if (ce) aInf <= au.infinity; |
always @(posedge clk) |
if (ce) bInf <= bu.infinity; |
always @(posedge clk) |
if (ce) aNan <= au.nan; |
always @(posedge clk) |
if (ce) bNan <= bu.nan; |
ft_delay #(.WID(1), .DEP(1)) udly1 (.clk(clk), .ce(ce), .i(ld), .o(ld1)); |
|
// ----------------------------------------------------------- |
// Clock #2 to N |
// - calculate fraction |
// ----------------------------------------------------------- |
wire done3a,done3; |
// Perform divide |
dfdiv #(N+2) u2 (.clk(clk), .ld(ld1), .a({siga,8'b0}), .b({sigb,8'b0}), .q(divo), .r(), .done(done1), .lzcnt(lzcnt)); |
wire [7:0] lzcnt_bin = lzcnt[3:0] + (lzcnt[7:4] * 10); |
wire [(N+2)*4*2-1:0] divo1 = divo[(N+2)*4*2-1:0] << ({lzcnt_bin,2'b0}+N*4);//WAS FPWID=128?+44 |
ft_delay #(.WID(1), .DEP(3)) u3 (.clk(clk), .ce(ce), .i(done1), .o(done3a)); |
assign done3 = done1&done3a; |
|
// ----------------------------------------------------------- |
// Clock #N+1 |
// - calculate exponent |
// - calculate fraction |
// - determine when a NaN is output |
// ----------------------------------------------------------- |
// Compute the exponent. |
// - correct the exponent for denormalized operands |
// - adjust the difference by the bias (add 127) |
// - also factor in the different decimal position for division |
reg [13:0] ex1; // sum of exponents |
reg qNaNOut; |
|
always @(posedge clk) |
if (ce) ex1 <= au.exp - bu.exp + bias - lzcnt_bin; |
|
always @(posedge clk) |
if (ce) qNaNOut <= (az&bz)|(aInf&bInf); |
|
wire over = 1'b0; |
wire under = &ex1[13:12]; |
reg [3:0] st; |
|
// ----------------------------------------------------------- |
// Clock #N+3 |
// ----------------------------------------------------------- |
always_ff @(posedge clk) |
// Simulation likes to see these values reset to zero on reset. Otherwise the |
// values propagate in sim as X's. |
if (rst) begin |
xo <= 1'd0; |
mo <= 1'd0; |
so <= 1'd0; |
sign_exe <= 1'd0; |
overflow <= 1'd0; |
underflow <= 1'd0; |
done <= 1'b1; |
end |
else if (ce) begin |
done <= 1'b0; |
if (done3&done1) begin |
done <= 1'b1; |
|
casez({qNaNOut|aNan|bNan,bInf,bz,over,under}) |
5'b1????: xo <= infXp; // NaN exponent value |
5'b01???: xo <= 1'd0; // divide by inf |
5'b001??: xo <= infXp; // divide by zero |
5'b0001?: xo <= infXp; // overflow |
5'b00001: xo <= 1'd0; // underflow |
default: xo <= ex1; // normal or underflow: passthru neg. exp. for normalization |
endcase |
|
casez({aNan,bNan,qNaNOut,bInf,bz,over,aInf&bInf,az&bz}) |
8'b1???????: begin mo <= {4'h1,au[N*4-1:0],{(N+1)*4-1{1'b0}}}; st[3] <= 1'b1; end |
8'b01??????: begin mo <= {4'h1,bu[N*4-1:0],{(N+1)*4-1{1'b0}}}; st[3] <= 1'b1; end |
8'b001?????: begin mo <= {4'h1,qNaN[N*4-1:0]|{aInf,1'b0}|{az,bz},{(N+1)*4-1{1'b0}}}; st[3] <= 1'b1; end |
8'b0001????: begin mo <= {(N+1)*4*2-1{1'd0}}; st[3] <= 1'b0; end // div by inf |
8'b00001???: begin mo <= {(N+1)*4*2-1{1'd0}}; st[3] <= 1'b0; end // div by zero |
8'b000001??: begin mo <= {(N+1)*4*2-1{1'd0}}; st[3] <= 1'b0; end // Inf exponent |
8'b0000001?: begin mo <= {4'h1,qNaN|`QINFDIV,{(N+1)*4-1{1'b0}}}; st[3] <= 1'b1; end // infinity / infinity |
8'b00000001: begin mo <= {4'h1,qNaN|`QZEROZERO,{(N+1)*4-1{1'b0}}}; st[3] <= 1'b1; end // zero / zero |
default: begin mo <= divo1[(N+2)*4*2-1:8]; st[3] <= 1'b0; end // plain div |
endcase |
|
sign_exe <= sa & sb; |
overflow <= over; |
underflow <= under; |
|
o.nan <= aNan|bNan|qNaNOut; |
o.snan <= aNan|bNan|qNaNOut; |
o.qnan <= 1'b0; |
o.infinity <= over|aInf; |
o.sign <= sa ^ sb; |
o.exp <= xo; |
o.sig <= mo; |
end |
end |
|
endmodule |
|
module DFPDivide96nr(rst, clk, ce, ld, op, a, b, o, rm, done, sign_exe, inf, overflow, underflow); |
parameter N=25; |
input rst; |
input clk; |
input ce; |
input ld; |
input op; |
input DFP96 a, b; |
output DFP96 o; |
input [2:0] rm; |
output sign_exe; |
output done; |
output inf; |
output overflow; |
output underflow; |
|
DFP96UD o1; |
wire sign_exe1, inf1, overflow1, underflow1; |
DFP96UN fpn0; |
wire done1, done1a; |
|
DFPDivide96 #(.N(N)) u1 (rst, clk, ce, ld, op, a, b, o1, done1, sign_exe1, overflow1, underflow1); |
DFPNormalize96 #(.N(N)) u2(.clk(clk), .ce(ce), .under_i(underflow1), .i(o1), .o(fpn0) ); |
DFPRound96 #(.N(N)) u3(.clk(clk), .ce(ce), .rm(rm), .i(fpn0), .o(o) ); |
delay2 #(1) u4(.clk(clk), .ce(ce), .i(sign_exe1), .o(sign_exe)); |
delay2 #(1) u5(.clk(clk), .ce(ce), .i(inf1), .o(inf)); |
delay2 #(1) u6(.clk(clk), .ce(ce), .i(overflow1), .o(overflow)); |
delay2 #(1) u7(.clk(clk), .ce(ce), .i(underflow1), .o(underflow)); |
ft_delay #(.WID(1),.DEP(11)) u8(.clk(clk), .ce(ce), .i(done1), .o(done1a)); |
assign done = done1&done1a; |
|
endmodule |
|
/verilog2/DFPMultiply96.sv
0,0 → 1,246
`timescale 1ns / 1ps |
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2020-2022 Robert Finch, Waterloo |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
// |
// DFPMultiply96.v |
// - decimal floating point multiplier |
// - parameterized width |
// |
// |
// BSD 3-Clause License |
// Redistribution and use in source and binary forms, with or without |
// modification, are permitted provided that the following conditions are met: |
// |
// 1. Redistributions of source code must retain the above copyright notice, this |
// list of conditions and the following disclaimer. |
// |
// 2. Redistributions in binary form must reproduce the above copyright notice, |
// this list of conditions and the following disclaimer in the documentation |
// and/or other materials provided with the distribution. |
// |
// 3. Neither the name of the copyright holder nor the names of its |
// contributors may be used to endorse or promote products derived from |
// this software without specific prior written permission. |
// |
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
// |
// |
// Floating Point Multiplier |
// |
// Properties: |
// +-inf * +-inf = -+inf (this is handled by exOver) |
// +-inf * 0 = QNaN |
// |
// ============================================================================ |
|
import DFPPkg::*; |
|
//`define DFPMUL_PARALLEL 1'b1 |
|
module DFPMultiply96(clk, ce, ld, a, b, o, sign_exe, inf, overflow, underflow, done); |
localparam N=25; |
localparam DELAY = 2; |
input clk; |
input ce; |
input ld; |
input DFP96 a, b; |
output DFP96UD o; |
output sign_exe; |
output inf; |
output overflow; |
output underflow; |
output done; |
|
reg [11:0] xo1; // extra bit for sign |
reg [N*4*2-1:0] mo1; |
|
// constants |
wire [11:0] infXp = 12'hBFF; // infinite / NaN - all ones |
wire [11:0] bias = 12'h5FF; |
// The following is the value for an exponent of zero, with the offset |
// eg. 8'h7f for eight bit exponent, 11'h7ff for eleven bit exponent, etc. |
// The following is a template for a quiet nan. (MSB=1) |
wire [N*4-1:0] qNaN = {4'h1,{96{1'b0}}}; |
|
// variables |
reg [N*4*2-1:0] sig1; |
wire [13:0] ex2; |
|
DFP96U au, bu; |
DFPUnpack96 u01 (a, au); |
DFPUnpack96 u02 (b, bu); |
|
// Decompose the operands |
wire sa, sb; // sign bit |
wire [14:0] xa, xb; // exponent bits |
wire sxa, sxb; |
wire [N*4-1:0] siga, sigb; |
wire a_dn, b_dn; // a/b is denormalized |
wire aNan1, bNan1; |
wire az, bz; |
wire aInf1, bInf1; |
|
assign siga = au.sig; |
assign sigb = bu.sig; |
assign az = au.exp==12'h0 && au.sig==100'd0; |
assign bz = bu.exp==12'h0 && bu.sig==100'd0; |
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #1 |
// - decode the input operands |
// - derive basic information |
// - calculate exponent |
// - calculate fraction |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
// ----------------------------------------------------------- |
// First clock |
// Compute the sum of the exponents. |
// ----------------------------------------------------------- |
|
wire under, over; |
wire [13:0] sum_ex = au.exp + bu.exp - bias; |
reg sx0; |
wire done1; |
assign under = &sum_ex[13:12]; |
assign over = sum_ex > 14'hBFF && !under; |
|
wire [N*4*2-1:0] sigoo; |
`ifdef DFPMUL_PARALLEL |
BCDMul32 u1f (.a({20'h0,siga}),.b({20'h0,sigb}),.o(sigoo)); |
`else |
dfmul #(.N(N)) u1g |
( |
.clk(clk), |
.ld(ld), |
.a(siga), |
.b(sigb), |
.p(sigoo), |
.done(done1) |
); |
`endif |
|
always_ff @(posedge clk) |
if (ce) sig1 <= sigoo[N*4*2-1:0]; |
|
// Status |
wire under1, over1; |
|
ft_delay #(.WID(12),.DEP(DELAY)) u3 (.clk(clk), .ce(ce), .i(sum_ex[11:0]), .o(ex2) ); |
ft_delay #(.WID(1),.DEP(DELAY)) u2a (.clk(clk), .ce(ce), .i(au.infinity), .o(aInf1) ); |
ft_delay #(.WID(1),.DEP(DELAY)) u2b (.clk(clk), .ce(ce), .i(bu.infinity), .o(bInf1) ); |
ft_delay #(.WID(1),.DEP(DELAY)) u6 (.clk(clk), .ce(ce), .i(under), .o(under1) ); |
ft_delay #(.WID(1),.DEP(DELAY)) u7 (.clk(clk), .ce(ce), .i(over), .o(over1) ); |
|
// determine when a NaN is output |
wire qNaNOut; |
wire DFP96U a1,b1; |
wire asnan, bsnan, aqnan, bqnan; |
ft_delay #(.WID(1),.DEP(DELAY)) u5 (.clk(clk), .ce(ce), .i((au.infinity&bz)|(bu.infinity&az)), .o(qNaNOut) ); |
ft_delay #(.WID(1),.DEP(DELAY)) u14 (.clk(clk), .ce(ce), .i(au.nan), .o(aNan1) ); |
ft_delay #(.WID(1),.DEP(DELAY)) u15 (.clk(clk), .ce(ce), .i(bu.nan), .o(bNan1) ); |
ft_delay #(.WID(1),.DEP(DELAY)) u18 (.clk(clk), .ce(ce), .i(au.snan), .o(asnan) ); |
ft_delay #(.WID(1),.DEP(DELAY)) u19 (.clk(clk), .ce(ce), .i(bu.snan), .o(bsnan) ); |
ft_delay #(.WID(1),.DEP(DELAY)) u18a (.clk(clk), .ce(ce), .i(au.qnan), .o(aqnan) ); |
ft_delay #(.WID(1),.DEP(DELAY)) u19a (.clk(clk), .ce(ce), .i(bu.qnan), .o(bqnan) ); |
ft_delay #(.WID($bits(a1)),.DEP(DELAY)) u16 (.clk(clk), .ce(ce), .i(a), .o(a1) ); |
ft_delay #(.WID($bits(b1)),.DEP(DELAY)) u17 (.clk(clk), .ce(ce), .i(b), .o(b1) ); |
|
// ----------------------------------------------------------- |
// Second clock |
// - correct xponent and mantissa for exceptional conditions |
// ----------------------------------------------------------- |
|
wire so1, sx1; |
reg [3:0] st; |
wire done1a; |
|
ft_delay #(.WID(1),.DEP(1)) u8 (.clk(clk), .ce(ce), .i(au.sign ^ bu.sign), .o(so1) );// two clock delay! |
|
always_ff @(posedge clk) |
if (ce) |
casez({qNaNOut|aNan1|bNan1,aInf1,bInf1,over1,under1}) |
5'b1????: xo1 = infXp; // qNaN - infinity * zero |
5'b01???: xo1 = infXp; // 'a' infinite |
5'b001??: xo1 = infXp; // 'b' infinite |
5'b0001?: xo1 = infXp; // result overflow |
5'b00001: xo1 = ex2[11:0];//0; // underflow |
default: xo1 = ex2[11:0]; // situation normal |
endcase |
|
// Force mantissa to zero when underflow or zero exponent when not supporting denormals. |
always_ff @(posedge clk) |
if (ce) |
casez({aNan1,bNan1,qNaNOut,aInf1,bInf1,over1|under1}) |
6'b1?????: mo1 = {4'h1,a1[N*4-4-1:0],{N*4{1'b0}}}; |
6'b01????: mo1 = {4'h1,b1[N*4-4-1:0],{N*4{1'b0}}}; |
6'b001???: mo1 = {4'h1,qNaN|3'd4,{N*4{1'b0}}}; // multiply inf * zero |
6'b0001??: mo1 = 0; // mul inf's |
6'b00001?: mo1 = 0; // mul inf's |
6'b000001: mo1 = 0; // mul overflow |
default: mo1 = sig1; |
endcase |
|
ft_delay #(.WID(1),.DEP(DELAY+1)) u10 (.clk(clk), .ce(ce), .i(sa & sb), .o(sign_exe) ); |
delay1 u11 (.clk(clk), .ce(ce), .i(over1), .o(overflow) ); |
delay1 u12 (.clk(clk), .ce(ce), .i(over1), .o(inf) ); |
delay1 u13 (.clk(clk), .ce(ce), .i(under1), .o(underflow) ); |
ft_delay #(.WID(1),.DEP(3)) u18b (.clk(clk), .ce(ce), .i(done1), .o(done1a) ); |
|
assign o.nan = aNan1|bNan1|qNaNOut; |
assign o.qnan = qNaNOut|aqnan|bqnan; |
assign o.snan = qNaNOut ? 1'b0 : asnan|bsnan; |
assign o.infinity = aInf1|bInf1|over; |
assign o.sign = so1; |
assign o.exp = xo1; |
assign o.sig = {mo1,8'h00}; |
assign done = done1&done1a; |
|
endmodule |
|
|
// Multiplier with normalization and rounding. |
|
module DFPMultiply96nr(clk, ce, ld, a, b, o, rm, sign_exe, inf, overflow, underflow, done); |
localparam N=25; |
input clk; |
input ce; |
input ld; |
input DFP96 a, b; |
output DFP96 o; |
input [2:0] rm; |
output sign_exe; |
output inf; |
output overflow; |
output underflow; |
output done; |
|
wire done1, done1a; |
DFP96UD o1; |
wire sign_exe1, inf1, overflow1, underflow1; |
DFP96UN fpn0; |
|
DFPMultiply96 u1 (clk, ce, ld, a, b, o1, sign_exe1, inf1, overflow1, underflow1, done1); |
DFPNormalize96 u2(.clk(clk), .ce(ce), .under_i(underflow1), .i(o1), .o(fpn0) ); |
DFPRound96 u3(.clk(clk), .ce(ce), .rm(rm), .i(fpn0), .o(o) ); |
delay2 #(1) u4(.clk(clk), .ce(ce), .i(sign_exe1), .o(sign_exe)); |
delay2 #(1) u5(.clk(clk), .ce(ce), .i(inf1), .o(inf)); |
delay2 #(1) u6(.clk(clk), .ce(ce), .i(overflow1), .o(overflow)); |
delay2 #(1) u7(.clk(clk), .ce(ce), .i(underflow1), .o(underflow)); |
ft_delay #(.WID(1),.DEP(12)) u10 (.clk(clk), .ce(ce), .i(done1), .o(done1a) ); |
assign done = done1 & done1a; |
|
endmodule |
/verilog2/DFPNormalize96.sv
0,0 → 1,331
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2006-2022 Robert Finch, Waterloo |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
// |
// DFPNormalize96.sv |
// - decimal floating point normalization unit |
// - eight cycle latency |
// - parameterized width |
// |
// |
// This source file is free software: you can redistribute it and/or modify |
// it under the terms of the GNU Lesser General Public License as published |
// by the Free Software Foundation, either version 3 of the License, or |
// (at your option) any later version. |
// |
// This source file is distributed in the hope that it will be useful, |
// but WITHOUT ANY WARRANTY; without even the implied warranty of |
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
// GNU General Public License for more details. |
// |
// You should have received a copy of the GNU General Public License |
// along with this program. If not, see <http://www.gnu.org/licenses/>. |
// |
// This unit takes a floating point number in an intermediate |
// format and normalizes it. No normalization occurs |
// for NaN's or infinities. The unit has a two cycle latency. |
// |
// The mantissa is assumed to start with two whole bits on |
// the left. The remaining bits are fractional. |
// |
// The width of the incoming format is reduced via a generation |
// of sticky bit in place of the low order fractional bits. |
// |
// On an underflowed input, the incoming exponent is assumed |
// to be negative. A right shift is needed. |
// ============================================================================ |
|
import DFPPkg::*; |
|
module DFPNormalize96(clk, ce, i, o, under_i, under_o, inexact_o); |
parameter N=25; |
input clk; |
input ce; |
input DFP96UD i; // expanded format input |
output DFP96UN o; // normalized output + guard, sticky and round bits, + 1 whole digit |
input under_i; |
output under_o; |
output inexact_o; |
|
integer n; |
// ---------------------------------------------------------------------------- |
// No Clock required |
// ---------------------------------------------------------------------------- |
reg [11:0] xo0; |
reg so0; |
reg sx0; |
reg nan0, qnan0, snan0; |
reg inf0; |
|
always_comb |
xo0 <= i.exp; |
always_comb |
so0 <= i.sign; // sign doesn't change |
always_comb |
nan0 <= i.nan; |
always_comb |
qnan0 <= i.qnan; |
always_comb |
snan0 <= i.snan; |
always_comb |
inf0 <= i.infinity; |
|
// ---------------------------------------------------------------------------- |
// Clock #1 |
// - Capture exponent information |
// ---------------------------------------------------------------------------- |
reg xInf1a, xInf1b, xInf1c; |
DFP96UD i1; |
always_ff @(posedge clk) |
if (ce) |
i1 <= i; |
|
always_ff @(posedge clk) |
if (ce) xInf1a <= xo0==12'hBFF & !under_i; |
always_ff @(posedge clk) |
if (ce) xInf1b <= xo0==12'hBFE & !under_i; |
always_ff @(posedge clk) |
if (ce) xInf1c <= xo0==12'hBFF; |
|
// ---------------------------------------------------------------------------- |
// Clock #2 |
// - determine exponent increment |
// Since the there are *three* whole digits in the incoming format |
// the number of whole digits needs to be reduced. If the MSB is |
// set, then increment the exponent and no shift is needed. |
// ---------------------------------------------------------------------------- |
wire xInf2c, xInf2b; |
wire [11:0] xo2; |
reg incExpByOne2; |
ft_delay #(.WID(1),.DEP(1)) u21 (.clk(clk), .ce(ce), .i(xInf1c), .o(xInf2c)); |
ft_delay #(.WID(1),.DEP(1)) u22 (.clk(clk), .ce(ce), .i(xInf1b), .o(xInf2b)); |
ft_delay #(.WID(12),.DEP(2)) u23 (.clk(clk), .ce(ce), .i(xo0), .o(xo2)); |
ft_delay #(.WID(1),.DEP(2)) u24 (.clk(clk), .ce(ce), .i(under_i), .o(under2)); |
|
always_ff @(posedge clk) |
if (ce) incExpByOne2 <= !xInf1a & |i1.sig[207:204]; |
|
// ---------------------------------------------------------------------------- |
// Clock #3 |
// - increment exponent |
// - detect a zero mantissa |
// ---------------------------------------------------------------------------- |
|
wire incExpByOne3; |
DFP96UD i3; |
reg [11:0] xo3; |
reg zeroMan3; |
ft_delay #(.WID(1),.DEP(1)) u32 (.clk(clk), .ce(ce), .i(incExpByOne2), .o(incExpByOne3)); |
ft_delay #(.WID($bits(i3)),.DEP(3)) u33 (.clk(clk), .ce(ce), .i(i), .o(i3)); |
|
wire [11:0] xo2a = xo2 + 1'd1; |
|
always_ff @(posedge clk) |
if (ce) xo3 <= (incExpByOne2 ? xo2a : xo2); |
|
always_ff @(posedge clk) |
if(ce) zeroMan3 <= 1'b0; |
|
// ---------------------------------------------------------------------------- |
// Clock #4 |
// - Shift mantissa left |
// - If infinity is reached then set the mantissa to zero |
// shift mantissa left to reduce to a single whole digit |
// - create sticky bit |
// ---------------------------------------------------------------------------- |
|
reg [(N+2)*4-1:0] mo4; |
reg inexact4; |
|
always_ff @(posedge clk) |
if(ce) |
casez({zeroMan3,incExpByOne3}) |
2'b1?: mo4 <= 1'd0; |
2'b01: mo4 <= {i3[(N+1)*4*2-1:(N+1)*4],3'b0,|i3[(N+1)*4-1:0]}; |
default: mo4 <= {i3[(N+1)*4*2-1-4:N*4],3'b0,|i3[N*4-1:0]}; |
endcase |
|
always_ff @(posedge clk) |
if(ce) |
casez({zeroMan3,incExpByOne3}) |
2'b1?: inexact4 <= 1'd0; |
2'b01: inexact4 <= |i3[(N+1)*4-1:0]; |
default: inexact4 <= |i3[N*4-1:0]; |
endcase |
|
// ---------------------------------------------------------------------------- |
// Clock edge #5 |
// - count leading zeros |
// ---------------------------------------------------------------------------- |
reg [7:0] leadingZeros5; |
wire [11:0] xo5; |
wire xInf5; |
ft_delay #(.WID(12),.DEP(2)) u51 (.clk(clk), .ce(ce), .i(xo3), .o(xo5)); |
ft_delay #(.WID(1),.DEP(3)) u52 (.clk(clk), .ce(ce), .i(xInf2c), .o(xInf5) ); |
|
/* Lookup table based leading zero count modules give slightly better |
performance but cases must be coded. |
generate |
begin |
if (FPWID <= 32) begin |
cntlz32Reg clz0 (.clk(clk), .ce(ce), .i({mo4,4'b0}), .o(leadingZeros5) ); |
assign leadingZeros5[7:6] = 2'b00; |
end |
else if (FPWID<=64) begin |
assign leadingZeros5[7] = 1'b0; |
cntlz64Reg clz0 (.clk(clk), .ce(ce), .i({mo4,7'h0}), .o(leadingZeros5) ); |
end |
else if (FPWID<=80) begin |
assign leadingZeros5[7] = 1'b0; |
cntlz80Reg clz0 (.clk(clk), .ce(ce), .i({mo4,11'b0}), .o(leadingZeros5) ); |
end |
else if (FPWID<=84) begin |
assign leadingZeros5[7] = 1'b0; |
cntlz96Reg clz0 (.clk(clk), .ce(ce), .i({mo4,23'b0}), .o(leadingZeros5) ); |
end |
else if (FPWID<=96) begin |
assign leadingZeros5[7] = 1'b0; |
cntlz96Reg clz0 (.clk(clk), .ce(ce), .i({mo4,11'b0}), .o(leadingZeros5) ); |
end |
else if (FPWID<=128) |
cntlz128Reg clz0 (.clk(clk), .ce(ce), .i({mo4,11'b0}), .o(leadingZeros5) ); |
end |
endgenerate |
*/ |
|
// Sideways add. |
// Normally there would be only one to two leading zeros. It is tempting then |
// to check for only one or two. But, denormalized numbers might have more |
// leading zeros. If denormals were not supported this could be made smaller |
// and faster. |
`ifdef SUPPORT_DENORMALS |
reg [7:0] lzc; |
reg got_one; |
always @* |
begin |
got_one = 1'b0; |
lzc = 8'h00; |
for (n = (N+2)*4-1; n >= 0; n = n - 4) begin |
if (!got_one) begin |
if (mo4[n]|mo4[n-1]|mo4[n-2]|mo4[n-3]) |
got_one = 1'b1; |
else |
lzc = lzc + 1'b1; |
end |
end |
end |
always_ff @(posedge clk) |
if (ce) leadingZeros5 <= lzc; |
`else |
wire [7:0] lead2 = mo4[(N+2)*4-1:N*4]; |
always_ff @(posedge clk) |
if (ce) |
casez(lead2) |
8'b00000000: leadingZeros5 <= 8'd2; |
8'b0000????: leadingZeros5 <= 8'd1; |
default: leadingZeros5 <= 8'd0; |
endcase |
`endif |
|
|
// ---------------------------------------------------------------------------- |
// Clock edge #6 |
// - Compute how much we want to decrement exponent by |
// - compute amount to shift left and right |
// - at infinity the exponent can't be incremented, so we can't shift right |
// otherwise it was an underflow situation so the exponent was negative |
// shift amount needs to be negated for shift register |
// If the exponent underflowed, then the shift direction must be to the |
// right regardless of mantissa bits; the number is denormalized. |
// Otherwise the shift direction must be to the left. |
// ---------------------------------------------------------------------------- |
reg [7:0] lshiftAmt6; |
reg [7:0] rshiftAmt6; |
wire rightOrLeft6; // 0=left,1=right |
wire xInf6; |
wire [11:0] xo6; |
wire [(N+2)*4-1:0] mo6; |
wire zeroMan6; |
vtdl #(1) u61 (.clk(clk), .ce(ce), .a(4'd5), .d(under_i), .q(rightOrLeft6) ); |
ft_delay #(.WID(12),.DEP(1)) u62 (.clk(clk), .ce(ce), .i(xo5), .o(xo6)); |
ft_delay #(.WID((N+2)*4),.DEP(2)) u63 (.clk(clk), .ce(ce), .i(mo4), .o(mo6) ); |
ft_delay #(.WID(1),.DEP(1)) u64 (.clk(clk), .ce(ce), .i(xInf5), .o(xInf6) ); |
ft_delay #(.WID(1),.DEP(3)) u65 (.clk(clk), .ce(ce), .i(zeroMan3), .o(zeroMan6)); |
ft_delay #(.WID(1),.DEP(5)) u66 (.clk(clk), .ce(ce), .i(sx0), .o(sx5) ); |
|
always_ff @(posedge clk) |
if (ce) lshiftAmt6 <= {leadingZeros5 > xo5 ? xo5 : leadingZeros5,2'b0}; |
|
always_ff @(posedge clk) |
if (ce) rshiftAmt6 <= {xInf5 ? 1'd0 : $signed(xo5) > 14'd0 ? 8'd0 : ~xo5+2'd1,2'b00}; // xo2 is negative ! |
|
// ---------------------------------------------------------------------------- |
// Clock edge #7 |
// - figure exponent |
// - shift mantissa |
// - figure sticky bit |
// ---------------------------------------------------------------------------- |
|
reg [13:0] xo7; |
wire rightOrLeft7; |
reg [(N+2)*4-1:0] mo7l, mo7r; |
reg St6,St7; |
ft_delay #(.WID(1),.DEP(1)) u71 (.clk(clk), .ce(ce), .i(rightOrLeft6), .o(rightOrLeft7)); |
|
wire [11:0] xo7d = xo6 - lshiftAmt6; |
|
always_ff @(posedge clk) |
if (ce) |
xo7 <= zeroMan6 ? xo6 : |
xInf6 ? xo6 : // an infinite exponent is either a NaN or infinity; no need to change |
rightOrLeft6 ? 1'd0 : // on a right shift, the exponent was negative, it's being made to zero |
xo7d; // on a left shift, the exponent can't be decremented below zero |
|
always_ff @(posedge clk) |
if (ce) mo7r <= mo6 >> rshiftAmt6; |
always_ff @(posedge clk) |
if (ce) mo7l <= mo6 << lshiftAmt6; |
|
// The sticky bit is set if the bits shifted out on a right shift are set. |
always @* |
begin |
St6 = 1'b0; |
for (n = 0; n < (N+2)*4; n = n + 1) |
if (n <= rshiftAmt6 + 1) St6 = St6|mo6[n]; |
end |
always_ff @(posedge clk) |
if (ce) St7 <= St6; |
|
// ---------------------------------------------------------------------------- |
// Clock edge #8 |
// - select mantissa |
// ---------------------------------------------------------------------------- |
|
wire so,sxo,nano,info,qnano,snano; |
wire [11:0] xo; |
reg [(N+2)*4-1:0] mo; |
vtdl #(1) u81 (.clk(clk), .ce(ce), .a(4'd7), .d(so0), .q(so) ); |
ft_delay #(.WID(12),.DEP(1)) u82 (.clk(clk), .ce(ce), .i(xo7), .o(xo)); |
vtdl #(.WID(1)) u83 (.clk(clk), .ce(ce), .a(4'd3), .d(inexact4), .q(inexact_o)); |
ft_delay #(.WID(1),.DEP(1)) u84 (.clk(clk), .ce(ce), .i(rightOrLeft7), .o(under_o)); |
vtdl #(1) u86 (.clk(clk), .ce(ce), .a(4'd7), .d(nan0), .q(nano) ); |
vtdl #(1) u87 (.clk(clk), .ce(ce), .a(4'd7), .d(qnan0), .q(qnano) ); |
vtdl #(1) u88 (.clk(clk), .ce(ce), .a(4'd7), .d(snan0), .q(snano) ); |
vtdl #(1) u89 (.clk(clk), .ce(ce), .a(4'd7), .d(inf0), .q(info) ); |
|
always_ff @(posedge clk) |
if (ce) mo <= rightOrLeft7 ? mo7r|{St7,4'b0} : mo7l; |
|
assign o.nan = nano; |
assign o.qnan = qnano; |
assign o.snan = snano; |
assign o.infinity = info; |
assign o.sign = so; |
assign o.exp = xo; |
assign o.sig = mo[(N+2)*4-1:4]; |
|
endmodule |
|
/verilog2/DFPPack.sv
44,7 → 44,7
wire [109:0] enc_sig; |
DPDEncodeN #(.N(11)) u1 (i.sig[131:0], enc_sig); |
|
always @* |
always_comb |
begin |
// sign |
o.sign <= i.sign; |
68,6 → 68,36
|
endmodule |
|
module DFPPack96(i, o); |
input DFP96U i; |
output DFP96 o; |
|
wire [79:0] enc_sig; |
DPDEncodeN #(.N(8)) u1 (i.sig[95:0], enc_sig); |
|
always_comb |
begin |
// sign |
o.sign <= i.sign; |
// combo |
if (i.qnan|i.snan) |
o.combo <= 5'b11111; |
else if (i.infinity) |
o.combo <= 5'b11110; |
else |
o.combo <= i.sig[99:96] > 4'h7 ? {2'b11,i.exp[11:10],i.sig[96]} : {i.exp[11:10],i.sig[98:96]}; |
// exponent continuation |
if (i.qnan) |
o.expc <= {1'b0,i.exp[8:0]}; |
else if (i.snan) |
o.expc <= {1'b1,i.exp[8:0]}; |
else |
o.expc <= i.exp[9:0]; |
// significand continuation |
o.sigc <= enc_sig; |
end |
|
endmodule |
module DFPPack64(i, o); |
input DFP64U i; |
output DFP64 o; |
75,7 → 105,7
wire [49:0] enc_sig; |
DPDEncodeN #(.N(5)) u1 (i.sig[59:0], enc_sig); |
|
always @* |
always_comb |
begin |
// sign |
o.sign <= i.sign; |
/verilog2/DFPPkg.sv
1,6 → 1,6
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2020-2021 Robert Finch, Waterloo |
// \\__/ o\ (C) 2020-2022 Robert Finch, Waterloo |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
40,6 → 40,9
|
`define SUPPORT_DENORMALS 1'b1 |
|
`define QINFDIV 4'd2 |
`define QZEROZERO 4'd3 |
|
typedef struct packed |
{ |
logic sign; |
57,9 → 60,33
logic [109:0] sigc; // significand continuation field |
} DFP128; |
|
// Packed 128 bit (storage) format |
typedef struct packed |
{ |
logic sign; |
logic [4:0] combo; |
logic [9:0] expc; // exponent continuation field |
logic [79:0] sigc; // significand continuation field |
} DFP96; |
|
typedef logic [11:0] DFP96EXP; |
typedef logic [99:0] DFP96SIG; |
|
typedef logic [13:0] DFP128EXP; |
typedef logic [135:0] DFP128SIG; |
|
// Unpacked 96 bit format |
typedef struct packed |
{ |
logic nan; |
logic qnan; |
logic snan; |
logic infinity; |
logic sign; |
logic [11:0] exp; |
logic [99:0] sig; // significand 25 digits |
} DFP96U; |
|
// Unpacked 128 bit format |
typedef struct packed |
{ |
80,10 → 107,34
logic snan; |
logic infinity; |
logic sign; |
logic [11:0] exp; |
logic [103:0] sig; // significand 26 digits |
} DFP96UN; |
|
// Normalizer output to rounding, one extra digit |
typedef struct packed |
{ |
logic nan; |
logic qnan; |
logic snan; |
logic infinity; |
logic sign; |
logic [13:0] exp; |
logic [139:0] sig; // significand 35 digits |
} DFP128UN; |
|
// 96-bit Double width significand, normalizer input |
typedef struct packed |
{ |
logic nan; |
logic qnan; |
logic snan; |
logic infinity; |
logic sign; |
logic [11:0] exp; |
logic [207:0] sig; // significand 50+ 1 lead, 1-trail digit |
} DFP96UD; |
|
// 128-bit Double width significand, normalizer input |
typedef struct packed |
{ |
/verilog2/DFPRound96.sv
0,0 → 1,194
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2006-2022 Robert Finch, Waterloo |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
// |
// DFPRound96.sv |
// - decimal floating point rounding unit |
// - parameterized width |
// |
// |
// BSD 3-Clause License |
// Redistribution and use in source and binary forms, with or without |
// modification, are permitted provided that the following conditions are met: |
// |
// 1. Redistributions of source code must retain the above copyright notice, this |
// list of conditions and the following disclaimer. |
// |
// 2. Redistributions in binary form must reproduce the above copyright notice, |
// this list of conditions and the following disclaimer in the documentation |
// and/or other materials provided with the distribution. |
// |
// 3. Neither the name of the copyright holder nor the names of its |
// contributors may be used to endorse or promote products derived from |
// this software without specific prior written permission. |
// |
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
// |
// ============================================================================ |
|
import DFPPkg::*; |
|
`ifdef MIN_LATENCY |
`define PIPE_ADV * |
`else |
`define PIPE_ADV (posedge clk) |
`endif |
|
module DFPRound96(clk, ce, rm, i, o); |
parameter N=25; |
input clk; |
input ce; |
input [2:0] rm; // rounding mode |
input DFP96UN i; // intermediate format input |
output DFP96 o; // packed rounded output |
|
parameter ROUND_CEILING = 3'd0; |
parameter ROUND_FLOOR = 3'd1; |
parameter ROUND_HALF_UP = 3'd2; |
parameter ROUND_HALF_EVEN = 3'd3; |
parameter ROUND_DOWN = 3'd4; |
|
//------------------------------------------------------------ |
// variables |
wire nano, qnano, snano; |
wire infinity; |
wire so; |
wire [11:0] xo; |
reg [N*4-1:0] mo; |
reg [11:0] xo1; |
reg [N*4-1:0] mo1; |
wire xInf = i.exp==12'hBFF; |
wire so0 = i.sign; |
|
wire [3:0] l = i.sig[7:4]; |
wire [3:0] r = i.sig[3:0]; |
|
reg rnd; |
|
//------------------------------------------------------------ |
// Clock #1 |
// - determine round amount (add 1 or 0) |
//------------------------------------------------------------ |
|
always @`PIPE_ADV |
if (ce) xo1 <= i.exp; |
always @`PIPE_ADV |
if (ce) mo1 <= i.sig[(N+1)*4-1:4]; |
|
// Compute the round bit |
// Infinities and NaNs are not rounded! |
always @`PIPE_ADV |
if (ce) |
if (i.nan | i.infinity) |
rnd = 1'b0; |
else |
case (rm) |
ROUND_CEILING: rnd <= (r == 4'd0 || i.sign==1'b1) ? 1'b0 : 1'b1; |
ROUND_FLOOR: rnd <= (r == 4'd0 || i.sign==1'b0) ? 1'b0 : 1'b1; |
ROUND_HALF_UP: rnd <= r >= 4'h5; |
ROUND_HALF_EVEN: rnd <= r==4'h5 ? l[0] : r > 4'h5 ? 1'b1 : 1'b0; |
ROUND_DOWN: rnd <= 1'b0; |
default: rnd <= 1'b0; |
endcase |
|
//------------------------------------------------------------ |
// Clock #2 |
// round the number, check for carry |
// note: inf. exponent checked above (if the exponent was infinite already, then no rounding occurs as rnd = 0) |
// note: exponent increments if there is a carry (can only increment to infinity) |
//------------------------------------------------------------ |
|
wire [N*4-1:0] rounded1; |
wire cobcd; |
|
BCDAddN #(.N(N)) ubcdan1 |
( |
.ci(1'b0), |
.a(mo1), |
.b({{N*4-1{1'd0}},rnd}), |
.o(rounded1), |
.co(cobcd) |
); |
|
reg [N*4-1:0] rounded2; |
reg rnd2; |
reg dn2; |
reg [12:0] xo2; |
always @`PIPE_ADV |
if (ce) rounded2 <= rounded1; |
always @`PIPE_ADV |
if (ce) rnd2 <= rnd; |
always @`PIPE_ADV |
if (ce) dn2 <= !(|xo1); |
always @`PIPE_ADV |
if (ce) xo2 <= xo1 + cobcd; |
|
//------------------------------------------------------------ |
// Clock #3 |
// - shift mantissa if required. |
//------------------------------------------------------------ |
wire infinity2; |
`ifdef MIN_LATENCY |
assign nano = i.nan; |
assign qnano = i.qnan; |
assign snano = i.snan; |
assign infinity = i.infinity | (rnd2 && xo2[11:0]==12'hBFF); |
assign so = i.sign; |
assign xo = xo2[11:0]; |
`else |
delay3 #(1) u21 (.clk(clk), .ce(ce), .i(i.nan), .o(nano)); |
delay3 #(1) u22 (.clk(clk), .ce(ce), .i(i.qnan), .o(qnano)); |
delay3 #(1) u23 (.clk(clk), .ce(ce), .i(i.snan), .o(snano)); |
delay2 #(1) u24 (.clk(clk), .ce(ce), .i(i.infinity), .o(infinity2)); |
delay3 #(1) u25 (.clk(clk), .ce(ce), .i(i.sign), .o(so)); |
delay1 #(12) u26 (.clk(clk), .ce(ce), .i(xo2[13:0]), .o(xo)); |
delay1 #(1) u27 (.clk(clk), .ce(ce), .i(infinity2 | (rnd2 && xo2[11:0]==12'hBFF)), .o(infinity)); |
`endif |
|
wire carry2 = xo2[12]; |
|
always @`PIPE_ADV |
if (ce) |
casez({rnd2,xo2[11:0]==12'hBFF,carry2,dn2}) |
4'b0??0: mo <= mo1[N*4-1:0]; // not rounding, not denormalized |
4'b0??1: mo <= mo1[N*4-1:0]; // not rounding, denormalized |
4'b1000: mo <= rounded2[N*4-1: 0]; // exponent didn't change, number was normalized |
4'b1001: mo <= rounded2[N*4-1: 0]; // exponent didn't change, but number was denormalized |
4'b1010: mo <= {4'h1,rounded2[N*4-1: 4]}; // exponent incremented (new MSD generated), number was normalized |
4'b1011: mo <= rounded2[N*4-1:0]; // exponent incremented (new MSB generated), number was denormalized, number became normalized |
4'b11??: mo <= {N*4{1'd0}}; // number became infinite, no need to check carry etc., rnd would be zero if input was NaN or infinite |
endcase |
|
//------------------------------------------------------------ |
// Clock #4 |
// - Pack output |
//------------------------------------------------------------ |
|
DFP96U o1; |
DFP96 o2; |
|
assign o1.nan = nano; |
assign o1.qnan = qnano; |
assign o1.snan = snano; |
assign o1.infinity = infinity; |
assign o1.sign = so; |
assign o1.exp = xo; |
assign o1.sig = mo; |
|
DFPPack96 u41 (o1, o2); |
always_ff @(posedge clk) |
if (ce) o <= o2; |
|
endmodule |
/verilog2/DFPSqrt96.sv
0,0 → 1,204
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2018-2022 Robert Finch, Waterloo |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
// |
// DFPSqrt96.sv |
// - decimal floating point square root |
// - parameterized width |
// - IEEE 754 representation |
// |
// |
// BSD 3-Clause License |
// Redistribution and use in source and binary forms, with or without |
// modification, are permitted provided that the following conditions are met: |
// |
// 1. Redistributions of source code must retain the above copyright notice, this |
// list of conditions and the following disclaimer. |
// |
// 2. Redistributions in binary form must reproduce the above copyright notice, |
// this list of conditions and the following disclaimer in the documentation |
// and/or other materials provided with the distribution. |
// |
// 3. Neither the name of the copyright holder nor the names of its |
// contributors may be used to endorse or promote products derived from |
// this software without specific prior written permission. |
// |
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
// |
// ============================================================================ |
|
import DFPPkg::*; |
import fp::*; |
|
module DFPSqrt96(rst, clk, ce, ld, a, o, done, sqrinf, sqrneg); |
parameter N=25; |
localparam pShiftAmt = |
FPWID==80 ? 48 : |
FPWID==64 ? 36 : |
FPWID==32 ? 7 : (FMSB+1-16); |
input rst; |
input clk; |
input ce; |
input ld; |
input DFP96 a; |
output DFP96UD o; |
output done; |
output sqrinf; |
output sqrneg; |
|
// registered outputs |
reg sign_exe; |
reg inf; |
reg overflow; |
reg underflow; |
|
wire so; |
wire [13:0] xo; |
wire [(N+1)*4*2-1:0] mo; |
|
// constants |
wire [13:0] infXp = 12'hBFF; // infinite / NaN - all ones |
// The following is a template for a quiet nan. (MSB=1) |
wire [N*4-1:0] qNaN = {4'h1,{N*4-4{1'b0}}}; |
|
// variables |
wire [13:0] ex1; // sum of exponents |
wire ex1c; |
wire [(N+1)*4*2-1:0] sqrto; |
|
// Operands |
reg sa; // sign bit |
reg [11:0] xa; // exponent bits |
reg [N*4-1:0] siga; |
reg a_dn; // a/b is denormalized |
reg az; |
reg aInf; |
reg aNan; |
wire done1; |
wire [7:0] lzcnt; |
wire [N*4-1:0] aa; |
DFP96U au; |
|
// ----------------------------------------------------------- |
// - decode the input operand |
// - derive basic information |
// - calculate exponent |
// - calculate fraction |
// ----------------------------------------------------------- |
|
DFPUnpack96 u01 (a, au); |
always @(posedge clk) |
if (ce) sa <= au.sign; |
always @(posedge clk) |
if (ce) xa <= au.exp; |
always @(posedge clk) |
if (ce) siga <= au.sig; |
always @(posedge clk) |
if (ce) a_dn <= au.exp==12'd0; |
always @(posedge clk) |
if (ce) az <= au.exp==12'd0 && au.sig==100'd0; |
always @(posedge clk) |
if (ce) aInf <= au.infinity; |
always @(posedge clk) |
if (ce) aNan <= au.nan; |
|
assign ex1 = xa + 1'd1; |
assign xo = ex1 >> 1'd1; |
|
assign so = 1'b0; // square root of positive numbers only |
assign mo = aNan ? {4'h1,aa[N*4-1:0],{N*4{1'b0}}} : sqrto; //(sqrto << pShiftAmt); |
assign sqrinf = aInf; |
assign sqrneg = !az & so; |
|
wire [(N+1)*4-1:0] siga1 = xa[0] ? {siga,4'h0} : {4'h0,siga}; |
|
wire ldd; |
delay1 #(1) u3 (.clk(clk), .ce(ce), .i(ld), .o(ldd)); |
|
// Ensure an even number of digits are processed. |
dfisqrt #((N+2)&-2) u2 |
( |
.rst(rst), |
.clk(clk), |
.ce(ce), |
.ld(ldd), |
.a({4'h0,siga1}), |
.o(sqrto), |
.done(done) |
); |
|
always @* |
casez({aNan,sqrinf,sqrneg}) |
3'b1??: |
begin |
o.sign <= sign; |
o.nan <= 1'b1; |
o.exp <= 12'hBFF; |
o.sig <= {siga,{N*4-4{1'b0}}}; |
end |
3'b01?: |
begin |
o.sign <= sign; |
o.nan <= 1'b1; |
o.exp <= 12'hBFF; |
o.sig <= {4'h1,qNaN|4'h5,{N*4-4{1'b0}}}; |
end |
3'b001: |
begin |
o.sign <= sign; |
o.nan <= 1'b1; |
o.exp <= 12'hBFF; |
o.sig <= {4'h1,qNaN|4'h6,{N*4-4{1'b0}}}; |
end |
default: |
begin |
o.sign <= 1'b0; |
o.nan <= 1'b0; |
o.exp <= xo; |
o.sig <= mo; |
end |
endcase |
|
|
endmodule |
|
module DFPSqrt96nr(rst, clk, ce, ld, a, o, rm, done, inf, sqrinf, sqrneg); |
parameter N=25; |
input rst; |
input clk; |
input ce; |
input ld; |
input DFP96 a; |
output DFP96 o; |
input [2:0] rm; |
output done; |
output inf; |
output sqrinf; |
output sqrneg; |
|
wire DFP96UD o1; |
wire inf1; |
wire DFP96UN fpn0; |
wire done1; |
wire done2; |
|
DFPSqrt96 #(.N(N)) u1 (rst, clk, ce, ld, a, o1, done1, sqrinf, sqrneg); |
DFPNormalize96 #(.N(N)) u2(.clk(clk), .ce(ce), .under_i(1'b0), .i(o1), .o(fpn0) ); |
DFPRound96 #(.N(N)) u3(.clk(clk), .ce(ce), .rm(rm), .i(fpn0), .o(o) ); |
delay2 #(1) u5(.clk(clk), .ce(ce), .i(inf1), .o(inf)); |
delay2 #(1) u8(.clk(clk), .ce(ce), .i(done1), .o(done2)); |
assign done = done1&done2; |
|
endmodule |
/verilog2/DFPUnpack.sv
1,6 → 1,6
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2020-2021 Robert Finch, Waterloo |
// \\__/ o\ (C) 2020-2022 Robert Finch, Waterloo |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
51,6 → 51,21
assign o.sig[135:132] = i.combo[4:3]==2'b11 ? {3'b100,i.combo[0]} : {1'b0,i.combo[2:0]}; |
endmodule |
|
module DFPUnpack96(i, o); |
input DFP96 i; |
output DFP96U o; |
|
assign o.sign = i.sign; |
assign o.exp = {i.combo[4:3]==2'b11 ? i.combo[2:1] : i.combo[4:3],i.expc}; |
assign o.nan = i.combo==5'b11111; |
assign o.qnan = i.combo==5'b11111 && i.expc[9]==1'b0; |
assign o.snan = i.combo==5'b11111 && i.expc[9]==1'b1; |
assign o.infinity = i.combo==5'b11110; |
DPDDecodeN #(.N(8)) u1 (i.sigc, o.sig[95:0]); |
assign o.sig[99:96] = i.combo[4:3]==2'b11 ? {3'b100,i.combo[0]} : {1'b0,i.combo[2:0]}; |
|
endmodule |
|
module DFPUnpack64(i, o); |
input DFP64 i; |
output DFP64U o; |
/verilog2/df96Toi.sv
0,0 → 1,110
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2022 Robert Finch, Waterloo |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
// |
// df128Toi.sv |
// - convert decimal floating point to integer |
// |
// BSD 3-Clause License |
// Redistribution and use in source and binary forms, with or without |
// modification, are permitted provided that the following conditions are met: |
// |
// 1. Redistributions of source code must retain the above copyright notice, this |
// list of conditions and the following disclaimer. |
// |
// 2. Redistributions in binary form must reproduce the above copyright notice, |
// this list of conditions and the following disclaimer in the documentation |
// and/or other materials provided with the distribution. |
// |
// 3. Neither the name of the copyright holder nor the names of its |
// contributors may be used to endorse or promote products derived from |
// this software without specific prior written permission. |
// |
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
// |
// ============================================================================ |
|
import DFPPkg::*; |
|
module df96Toi (rst, clk, ce, ld, op, i, o, overflow, done); |
input rst; |
input clk; |
input ce; |
input ld; |
input op; // 1 = signed, 0 = unsigned |
input [95:0] i; // float input |
output [95:0] o; // integer output |
output overflow; |
output done; |
|
wire done1; |
reg done2; |
assign done = done1 & done2; |
|
wire [95:0] sig; |
|
DFP96U ui; |
DFPUnpack96 uunpk1 (i, ui); |
|
wire [95:0] maxInt = op ? {1'd0,{95{1'b1}}} : {96{1'b1}}; // maximum integer value |
wire [11:0] zeroXp = 12'h5FF; |
|
reg sgn; // sign |
always_ff @(posedge clk) |
if (ce) sgn = ui.sign; |
wire [11:0] exp = ui.exp; // exponent |
|
wire iz = i[94:0]==0; // zero value (special) |
|
wire [12:0] ovx = exp - zeroXp; |
assign overflow = ovx > 23 && !ovx[12]; // lots of numbers are too big - don't forget one less bit is available due to signed values |
wire underflow = exp < zeroXp - 2'd1; // value less than 1/2 |
|
wire [7:0] shamt = 8'd128 - {(exp - zeroXp),2'd0}; // exp - zeroXp will be <= MSB |
|
wire [128:0] o1 = {ui.sig,33'b0} >> shamt; // keep an extra bit for rounding |
wire [95:0] o2; // round up |
reg [95:0] o3; |
|
DDBCDToBin #(.WID(96)) ub2b1 |
( |
.rst(rst), |
.clk(clk), |
.ld(ld), |
.bcd({o1[128:1]+o1[0]}), |
.bin(o2), |
.done(done1) |
); |
|
|
always @(posedge clk) |
if (ce) begin |
if (underflow|iz) |
o3 <='d0; |
else if (overflow) |
o3 <= maxInt; |
// value between 1/2 and 1 - round up |
else if (exp==zeroXp-1) |
o3 <= 96'd1; |
// value > 1 |
else |
o3 <= o2; |
end |
always @(posedge clk) |
if (ce) done2 <= done1; |
|
assign o = (op & sgn) ? -o3 : o3; // adjust output for correct signed value |
|
endmodule |
/verilog2/i2df96.sv
0,0 → 1,144
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2022 Robert Finch, Waterloo |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
// |
// i2df128.sv |
// - convert integer to decimal floating point |
// |
// BSD 3-Clause License |
// Redistribution and use in source and binary forms, with or without |
// modification, are permitted provided that the following conditions are met: |
// |
// 1. Redistributions of source code must retain the above copyright notice, this |
// list of conditions and the following disclaimer. |
// |
// 2. Redistributions in binary form must reproduce the above copyright notice, |
// this list of conditions and the following disclaimer in the documentation |
// and/or other materials provided with the distribution. |
// |
// 3. Neither the name of the copyright holder nor the names of its |
// contributors may be used to endorse or promote products derived from |
// this software without specific prior written permission. |
// |
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
// |
// ============================================================================ |
|
import DFPPkg::*; |
|
module i2df96 (rst, clk, ce, ld, op, rm, i, o, done); |
parameter FPWID=96; |
input rst; |
input clk; |
input ce; |
input ld; |
input op; // 1 = signed, 0 = unsigned |
input [2:0] rm; // rounding mode |
input [95:0] i; // integer input |
output [95:0] o; // float output |
output done; |
|
wire [95:0] i1 = (op & i[95]) ? -i : i; |
wire [127:0] bcd; |
wire done1, done2; |
assign done = done1 & done2; |
|
DDBinToBCD #(.WID(96)) ub2b1 |
( |
.rst(rst), |
.clk(clk), |
.ld(ld), |
.bin(i1), |
.bcd(bcd), |
.done(done1) |
); |
|
DFP96U ui; |
wire [11:0] zeroXp = 12'h5FF; |
|
reg iz; // zero input ? |
wire [7:0] lz; // count the leading zeros in the number |
reg [7:0] lz4; // leading zero rounded to multiple of four |
wire [13:0] wd; // compute number of whole digits |
reg so; // copy the sign of the input (easy) |
reg [2:0] rmd; |
wire [127:0] bcd1; |
reg [99:0] simag; |
|
always_ff @(posedge clk) |
rmd <= rm; |
always_ff @(posedge clk) |
iz <= i==0; |
always_ff @(posedge clk) |
so <= i[95]; |
|
delay1 #(128) u2 (.clk(clk), .ce(ce), .i(bcd), .o(bcd1) ); |
cntlz128Reg u4 (.clk(clk), .ce(ce), .i(bcd), .o(lz) ); |
|
always_comb |
lz4 = lz >> 2'd2; |
|
assign wd = zeroXp - 8'd1 + 8'd25 - lz4 + 8'd9; // constant except for lz |
|
reg [11:0] xo; |
|
always_ff @(posedge clk) |
xo <= iz ? 'd0 : wd; |
|
// left align number |
// The number may to too large to represent entirely precisely in which case a |
// right shift is required. There are only about 114 bits of precision, but the |
// incoming number is allowed to be 128-bit. |
// Rounding is required only when the number needs to be right-shifted. |
|
always_ff @(posedge clk) |
if (lz4 < 8'd9) |
simag = bcd1 >> {8'd9 - lz4,2'd0}; |
else |
simag = bcd1 << {lz4 - 8'd9,2'd0}; |
|
wire g = bcd1[{8'd9 - lz4,2'd0}]; // guard bit (lsb) |
wire r = bcd1[{8'd9 - lz4,2'd0}-1]; // rounding bit |
wire s = |(bcd1 & (128'd1 << {8'd9 - lz4,2'd0}-2) - 2'd1); // "sticky" bit |
reg rnd; |
|
// Compute the round bit |
always_ff @(posedge clk) |
if (lz4 < 8'd9) |
case (rmd) |
3'd0: rnd = (g & r) | (r & s); // round to nearest even |
3'd1: rnd = 0; // round to zero (truncate) |
3'd2: rnd = (r | s) & !so; // round towards +infinity |
3'd3: rnd = (r | s) & so; // round towards -infinity |
3'd4: rnd = (r | s); |
default: rnd = (g & r) | (r & s); // round to nearest even |
endcase |
else |
rnd = 1'b0; |
|
// round the result |
assign ui.sig = simag[99:0] + rnd; |
assign ui.exp = xo[11:0]; |
assign ui.sign = op & so; |
assign ui.nan = 1'b0; |
assign ui.qnan = 1'b0; |
assign ui.snan = 1'b0; |
assign ui.infinity = 1'b0; |
|
DFPPack96 upk1 (ui, o); |
|
ft_delay #(.WID(1), .DEP(4)) udly1 (.clk(clk), .ce(1'b1), .i(done1), .o(done2)); |
|
endmodule |