OpenCores
URL https://opencores.org/ocsvn/ft816float/ft816float/trunk

Subversion Repositories ft816float

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /ft816float/trunk/rtl
    from Rev 74 to Rev 75
    Reverse comparison

Rev 74 → Rev 75

/verilog2/DFPAddsub96.sv
0,0 → 1,422
`timescale 1ns / 1ps
// ============================================================================
// __
// \\__/ o\ (C) 2020-2022 Robert Finch, Waterloo
// \ __ / All rights reserved.
// \/_// robfinch<remove>@finitron.ca
// ||
//
// DFPAddsub96.sv
//
// BSD 3-Clause License
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// 3. Neither the name of the copyright holder nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// ============================================================================
 
import DFPPkg::*;
 
module DFPAddsub96(clk, ce, rm, op, a, b, o);
input clk;
input ce;
input [2:0] rm;
input op;
input DFP96 a;
input DFP96 b;
output DFP96UD o;
localparam N=25; // number of BCD digits
localparam RIP_STAGES = 3;
 
parameter TRUE = 1'b1;
parameter FALSE = 1'b0;
 
DFP96U au;
DFP96U bu;
 
DFPUnpack96 u00 (a, au);
DFPUnpack96 u01 (b, bu);
 
reg [(N+1)*4-1:0] oaa10;
reg [(N+1)*4-1:0] obb10;
wire [(N+1)*4-1:0] oss10;
wire oss10c;
 
BCDAdd8NClk #(.N((N+2)/2)) ubcdadn1
(
.clk(clk),
.a({8'h00,oaa10}),
.b({8'h00,obb10}),
.o(oss10),
.ci(1'b0),
.co(oss10c)
);
 
wire [(N+1)*4-1:0] odd10;
wire odd10c;
 
BCDSub8NClk #(.N((N+2)/2)) ubcdsdn1
(
.clk(clk),
.a({8'h00,oaa10}),
.b({8'h00,obb10}),
.o(odd10),
.ci(1'b0),
.co(odd10c)
);
 
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Clock #1
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
reg op1;
reg az, bz;
always_ff @(posedge clk)
op1 <= op;
always_ff @(posedge clk)
az <= au.sig==100'd0 && au.exp==12'd0;
always_ff @(posedge clk)
bz <= bu.sig==100'd0 && bu.exp==12'd0;
 
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Clock #2
//
// Figure out which operation is really needed an add or subtract ?
// If the signs are the same, use the orignal op,
// otherwise flip the operation
// a + b = add,+
// a + -b = sub, so of larger
// -a + b = sub, so of larger
// -a + -b = add,-
// a - b = sub, so of larger
// a - -b = add,+
// -a - b = add,-
// -a - -b = sub, so of larger
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
reg realOp2;
reg op2;
reg [13:0] xa2, xb2;
reg az2, bz2;
reg xa_gt_xb2;
reg [N*4-1:0] siga2, sigb2;
reg sigeq, siga_gt_sigb;
reg expeq;
reg sxo2;
 
always_ff @(posedge clk)
if (ce) realOp2 = op1 ^ au.sign ^ bu.sign;
always_ff @(posedge clk)
if (ce) op2 <= op1;
always_ff @(posedge clk)
if (ce) xa2 <= au.exp;
always_ff @(posedge clk)
if (ce) xb2 <= bu.exp;
always_ff @(posedge clk)
if (ce) siga2 <= au.sig;
always_ff @(posedge clk)
if (ce) sigb2 <= bu.sig;
always_ff @(posedge clk)
if (ce) az2 <= az;
always_ff @(posedge clk)
if (ce) bz2 <= bz;
always_ff @(posedge clk)
if (ce)
xa_gt_xb2 <= au.exp > bu.exp;
 
always_ff @(posedge clk)
if (ce) sigeq <= au.sig==bu.sig;
always_ff @(posedge clk)
if (ce) siga_gt_sigb <= au.sig > bu.sig;
always_ff @(posedge clk)
if (ce) expeq <= au.exp==bu.exp;
 
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Clock #3
//
// Find out if the result will be zero.
// Determine which fraction to denormalize
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
//
reg [11:0] xa3, xb3;
reg resZero3;
wire xaInf3, xbInf3;
reg xa_gt_xb3;
reg a_gt_b3;
reg op3;
wire sa3, sb3;
wire [2:0] rm3;
reg [N*4-1:0] mfs3;
 
always_ff @(posedge clk)
if (ce) resZero3 <= (realOp2 & expeq & sigeq) || // subtract, same magnitude
(az2 & bz2); // both a,b zero
always_ff @(posedge clk)
if (ce) xa3 <= xa2;
always_ff @(posedge clk)
if (ce) xb3 <= xb2;
always_ff @(posedge clk)
if (ce) xa_gt_xb3 <= xa_gt_xb2;
always_ff @(posedge clk)
if (ce) a_gt_b3 <= xa_gt_xb2 | (expeq & siga_gt_sigb);
always_ff @(posedge clk)
if (ce) op3 <= op2;
always_ff @(posedge clk)
if (ce) mfs3 = xa_gt_xb2 ? sigb2 : siga2;
 
ft_delay #(.WID(1), .DEP(2)) udly3c (.clk(clk), .ce(ce), .i(au.sign), .o(sa3));
ft_delay #(.WID(1), .DEP(2)) udly3d (.clk(clk), .ce(ce), .i(bu.sign), .o(sb3));
ft_delay #(.WID(3), .DEP(3)) udly3e (.clk(clk), .ce(ce), .i(rm), .o(rm3));
ft_delay #(.WID(1), .DEP(2)) udly3f (.clk(clk), .ce(ce), .i(aInf), .o(aInf3));
ft_delay #(.WID(1), .DEP(2)) udly3g (.clk(clk), .ce(ce), .i(bInf), .o(bInf3));
 
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Clock #4
//
// Compute output exponent
//
// The output exponent is the larger of the two exponents,
// unless a subtract operation is in progress and the two
// numbers are equal, in which case the exponent should be
// zero.
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 
reg [11:0] xa4, xb4;
reg [11:0] xo4;
reg xa_gt_xb4;
 
always_ff @(posedge clk)
if (ce) xa4 <= xa3;
always_ff @(posedge clk)
if (ce) xb4 <= xb3;
always_ff @(posedge clk)
if (ce) xo4 <= resZero3 ? 12'd0 : xa_gt_xb3 ? xa3 : xb3;
always_ff @(posedge clk)
if (ce) xa_gt_xb4 <= xa_gt_xb3;
 
// Compute output sign
reg so4;
always_comb
case ({resZero3,sa3,op3,sb3}) // synopsys full_case parallel_case
4'b0000: so4 <= 0; // + + + = +
4'b0001: so4 <= !a_gt_b3; // + + - = sign of larger
4'b0010: so4 <= !a_gt_b3; // + - + = sign of larger
4'b0011: so4 <= 0; // + - - = +
4'b0100: so4 <= a_gt_b3; // - + + = sign of larger
4'b0101: so4 <= 1; // - + - = -
4'b0110: so4 <= 1; // - - + = -
4'b0111: so4 <= a_gt_b3; // - - - = sign of larger
4'b1000: so4 <= 0; // A + B, sign = +
4'b1001: so4 <= (rm3==3'd3); // A + -B, sign = + unless rounding down
4'b1010: so4 <= (rm3==3'd3); // A - B, sign = + unless rounding down
4'b1011: so4 <= 0; // A - -B, sign = +
4'b1100: so4 <= (rm3==3'd3); // -A - -B, sign = + unless rounding down
4'b1101: so4 <= 1; // -A + -B, sign = -
4'b1110: so4 <= 1; // -A - +B, sign = -
4'b1111: so4 <= (rm3==3'd3); // A - B, sign = + unless rounding down
endcase
 
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Clock #5
//
// Compute the difference in exponents, provides shift amount
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
reg [11:0] xdiff5;
always_ff @(posedge clk)
if (ce) xdiff5 <= xa_gt_xb4 ? xa4 - xb4 : xb4 - xa4;
 
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Clock #6
//
// Compute the difference in exponents, provides shift amount
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// If the difference in the exponent is 24 or greater (assuming 24 nybble dfp or
// less) then all of the bits will be shifted out to zero. There is no need to
// keep track of a difference more than 24.
reg [6:0] xdif6;
wire [N*4-1:0] mfs6;
always_ff @(posedge clk)
if (ce) xdif6 <= xdiff5 > N ? N : xdiff5[6:0];
ft_delay #(.WID(N*4), .DEP(3)) udly6a (.clk(clk), .ce(ce), .i(mfs3), .o(mfs6));
 
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Clock #7
//
// Determine the sticky bit. The sticky bit is the bitwise or of all the bits
// being shifted out the right side. The sticky bit is computed here to
// reduce the number of regs required.
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
reg sticky6;
wire sticky7;
wire [7:0] xdif7;
wire [N*4-1:0] mfs7;
wire [8:0] xdif6a = {xdif6,2'b00}; // *4
integer n;
always @*
begin
sticky6 = 1'b0;
for (n = 0; n < N*4; n = n + 4)
if (n <= xdif6a)
sticky6 = sticky6| mfs6[n]|mfs6[n+1]|mfs6[n+2]|mfs6[n+3]; // non-zero nybble
end
 
// register inputs to shifter and shift
delay1 #(1) d16(.clk(clk), .ce(ce), .i(sticky6), .o(sticky7) );
delay1 #(9) d15(.clk(clk), .ce(ce), .i(xdif6a), .o(xdif7) );
delay1 #(N*4) d14(.clk(clk), .ce(ce), .i(mfs6), .o(mfs7) );
 
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Clock #8
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
reg [(N+1)*4-1:0] md8;
wire [N*4-1:0] siga8, sigb8;
wire xa_gt_xb8;
wire a_gt_b8;
always_ff @(posedge clk)
if (ce) md8 <= ({mfs7,4'b0} >> xdif7)|sticky7; // xdif7 is a multiple of four
 
// sync control signals
ft_delay #(.WID(1), .DEP(4)) udly8a (.clk(clk), .ce(ce), .i(xa_gt_xb4), .o(xa_gt_xb8));
ft_delay #(.WID(1), .DEP(5)) udly8b (.clk(clk), .ce(ce), .i(a_gt_b3), .o(a_gt_b8));
ft_delay #(.WID(N*4), .DEP(6)) udly8d (.clk(clk), .ce(ce), .i(siga2), .o(siga8));
ft_delay #(.WID(N*4), .DEP(6)) udly8e (.clk(clk), .ce(ce), .i(sigb2), .o(sigb8));
ft_delay #(.WID(1), .DEP(5)) udly8j (.clk(clk), .ce(ce), .i(op3), .o(op8));
 
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Clock #9
// Sort operands and perform add/subtract
// addition can generate an extra bit, subtract can't go negative
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
reg [(N+1)*4-1:0] oa9, ob9;
reg a_gt_b9;
always_ff @(posedge clk)
if (ce) oa9 <= xa_gt_xb8 ? {siga8,4'b0} : md8;
always_ff @(posedge clk)
if (ce) ob9 <= xa_gt_xb8 ? md8 : {sigb8,4'b0};
always_ff @(posedge clk)
if (ce) a_gt_b9 <= a_gt_b8;
 
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Clock #10
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
wire realOp10;
reg [11:0] xo10;
 
always_ff @(posedge clk)
if (ce) oaa10 <= a_gt_b9 ? oa9 : ob9;
always_ff @(posedge clk)
if (ce) obb10 <= a_gt_b9 ? ob9 : oa9;
ft_delay #(.WID(1), .DEP(8)) udly10a (.clk(clk), .ce(ce), .i(realOp2), .o(realOp10));
ft_delay #(.WID(12), .DEP(6)) udly10b (.clk(clk), .ce(ce), .i(xo4), .o(xo10));
 
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Clock #11
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
wire [(N+1)*4-1:0] mab11;
wire mab11c;
wire [N*4-1:0] siga11, sigb11;
wire abInf11;
wire aNan11, bNan11;
wire xoinf11;
wire op11;
 
ft_delay #(.WID(1), .DEP(8+RIP_STAGES)) udly11a (.clk(clk), .ce(ce), .i(aInf3&bInf3), .o(abInf11));
ft_delay #(.WID(1), .DEP(10+RIP_STAGES)) udly11c (.clk(clk), .ce(ce), .i(au.nan), .o(aNan11));
ft_delay #(.WID(1), .DEP(10+RIP_STAGES)) udly11d (.clk(clk), .ce(ce), .i(bu.nan), .o(bNan11));
ft_delay #(.WID(1), .DEP(3+RIP_STAGES)) udly11e (.clk(clk), .ce(ce), .i(op8), .o(op11));
ft_delay #(.WID(N*4), .DEP(3+RIP_STAGES)) udly11f (.clk(clk), .ce(ce), .i(siga8), .o(siga11));
ft_delay #(.WID(N*4), .DEP(3+RIP_STAGES)) udly11g (.clk(clk), .ce(ce), .i(sigb8), .o(sigb11));
ft_delay #(.WID(1), .DEP(1+RIP_STAGES)) udly11h (.clk(clk), .ce(ce), .i(xo10==14'h2FFF), .o(xoinf11));
ft_delay #(.WID((N+1)*4+1), .DEP(1+RIP_STAGES)) udly11i (.clk(clk), .ce(ce), .i(realOp10 ? {odd10c,odd10} : {oss10c,oss10}), .o({mab11c,mab11}));
 
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Clock #12+RIP_STAGES
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
reg [(N+1)*4*2-1:0] mo12; // mantissa output
reg nan12;
reg qnan12;
reg infinity12;
wire sxo11;
wire so11;
ft_delay #(.WID(1), .DEP(9)) udly12a (.clk(clk), .ce(ce), .i(sxo2), .o(sxo11));
ft_delay #(.WID(1), .DEP(7)) udly12b (.clk(clk), .ce(ce), .i(so4), .o(so11));
 
always_ff @(posedge clk)
if (ce)
nan12 <= aNan11|bNan11;
 
always_ff @(posedge clk)
if (ce) begin
infinity12 <= 1'b0;
qnan12 <= 1'b0;
casez({abInf11,aNan11,bNan11,xoinf11})
4'b1???: // inf +/- inf - generate QNaN on subtract, inf on add
if (op11) begin
mo12 <= {4'h9,{(N+1)*4*2-4{1'd0}}};
qnan12 <= 1'b1;
end
else begin
mo12 <= {(N+1)*2{4'h9}};
infinity12 <= 1'b1;
end
4'b01??: mo12 <= {4'b0,siga11[87:0],{(N+1)*4{1'd0}}};
4'b001?: mo12 <= {4'b0,sigb11[87:0],{(N+1)*4{1'd0}}};
4'b0001: begin mo12 <= {(N+1)*4*2{1'd0}}; infinity12 <= 1'b1; end
default: mo12 <= {3'b0,mab11c,mab11,{N*4{1'd0}}}; // mab has an extra lead bit and four trailing bits
endcase
end
 
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Clock #13
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
wire so; // sign output
wire [13:0] xo; // de normalized exponent output
wire [(N+1)*4*2-1:0] mo; // mantissa output
 
ft_delay #(.WID(1), .DEP(1)) u13c (.clk(clk), .ce(ce), .i(nan12), .o(o.nan) );
ft_delay #(.WID(1), .DEP(1)) u13d (.clk(clk), .ce(ce), .i(qnan12), .o(o.qnan) );
ft_delay #(.WID(1), .DEP(1)) u13e (.clk(clk), .ce(ce), .i(infinity12), .o(o.infinity) );
ft_delay #(.WID(1), .DEP(9)) udly13a (.clk(clk), .ce(ce), .i(so4), .o(o.sign));
ft_delay #(.WID(12), .DEP(3)) udly13b (.clk(clk), .ce(ce), .i(xo10), .o(o.exp));
ft_delay #(.WID((N+1)*4*2), .DEP(1)) u13f (.clk(clk), .ce(ce), .i(mo12), .o(o.sig));
ft_delay #(.WID(1), .DEP(1)) udly13g (.clk(clk), .ce(ce), .i(1'b0), .o(o.snan));
 
endmodule
 
 
module DFPAddsub96nr(clk, ce, rm, op, a, b, o);
input clk; // system clock
input ce; // core clock enable
input [2:0] rm; // rounding mode
input op; // operation 0 = add, 1 = subtract
input DFP96 a; // operand a
input DFP96 b; // operand b
output DFP96 o; // output
 
wire DFP96UD o1;
wire DFP96UN fpn0;
 
DFPAddsub96 u1 (clk, ce, rm, op, a, b, o1);
DFPNormalize96 u2(.clk(clk), .ce(ce), .under_i(1'b0), .i(o1), .o(fpn0) );
DFPRound96 u3(.clk(clk), .ce(ce), .rm(rm), .i(fpn0), .o(o) );
 
endmodule
/verilog2/DFPCompare96.sv
0,0 → 1,89
// ============================================================================
// __
// \\__/ o\ (C) 2020-2022 Robert Finch, Waterloo
// \ __ / All rights reserved.
// \/_// robfinch<remove>@finitron.ca
// ||
//
// DFPCompare96.sv
//
// BSD 3-Clause License
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// 3. Neither the name of the copyright holder nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// ============================================================================
 
import DFPPkg::*;
 
module DFPCompare96(a, b, o);
input DFP96 a;
input DFP96 b;
output reg [11:0] o ='d0;
localparam N=34; // number of BCD digits
 
parameter TRUE = 1'b1;
parameter FALSE = 1'b0;
 
DFP96U au;
DFP96U bu;
 
DFPUnpack96 u00 (a, au);
DFPUnpack96 u01 (b, bu);
 
reg sa, sb;
always_comb
sa = au.sign;
always_comb
sb = bu.sign;
wire az = ~|{au.exp,au.sig};
wire bz = ~|{bu.exp,bu.sig};
wire unordered = au.nan | bu.nan;
 
wire eq = !unordered & ((az & bz) || (a==b)); // special test for zero
wire gt1 = {au.exp,au.sig} > {bu.exp,bu.sig};
wire lt1 = {au.exp,au.sig} < {bu.exp,bu.sig};
 
wire lt = sa ^ sb ? sa & !(az & bz): sa ? gt1 : lt1;
 
always_comb
begin
o[0] = eq;
o[1] = lt;
o[2] = lt|eq;
o[3] = lt1;
o[4] = unordered;
o[5] = ~eq;
o[6] = ~lt;
o[7] = ~(lt|eq);
o[8] = ~lt1;
o[9] = ~unordered;
o[10] = 1'b0;
o[11] = lt;
end
 
// an unorder comparison will signal a nan exception
//assign nanx = op!=`FCOR && op!=`FCUN && unordered;
 
endmodule
/verilog2/DFPDivide96.sv
0,0 → 1,255
`timescale 1ns / 1ps
// ============================================================================
// __
// \\__/ o\ (C) 2006-2022 Robert Finch, Waterloo
// \ __ / All rights reserved.
// \/_// robfinch<remove>@finitron.ca
// ||
//
// DFPDivide96.sv
// - decimal floating point divider
// - parameterized width
//
//
// BSD 3-Clause License
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// 3. Neither the name of the copyright holder nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Floating Point Divider
//
//Properties:
//+-inf * +-inf = -+inf (this is handled by exOver)
//+-inf * 0 = QNaN
//+-0 / +-0 = QNaN
// ============================================================================
 
import DFPPkg::*;
 
`define QINFDIV 4'd2
`define QZEROZERO 4'd3
 
module DFPDivide96(rst, clk, ce, ld, op, a, b, o, done, sign_exe, overflow, underflow);
parameter N=25;
// FADD is a constant that makes the divider width a multiple of four and includes eight extra bits.
input rst;
input clk;
input ce;
input ld;
input op;
input DFP96 a, b;
output DFP96UD o;
output reg done;
output sign_exe;
output overflow;
output underflow;
 
// registered outputs
reg sign_exe=0;
reg inf=0;
reg overflow=0;
reg underflow=0;
 
reg so, sxo;
reg [11:0] xo;
reg [(N+1)*4*2-1:0] mo;
 
DFP96U au, bu;
DFPUnpack96 u01 (a, au);
DFPUnpack96 u02 (b, bu);
 
// constants
wire [11:0] infXp = 12'hBFF; // infinite / NaN - all ones
wire [11:0] bias = 12'h5FF;
// The following is the value for an exponent of zero, with the offset
// eg. 8'h7f for eight bit exponent, 11'h7ff for eleven bit exponent, etc.
// The following is a template for a quiet nan. (MSB=1)
wire [N*4-1:0] qNaN = {4'h1,{(N-1)*4{1'b0}}};
 
// variables
wire [(N+2)*4*2-1:0] divo;
 
// Operands
reg sa, sb; // sign bit
reg [N*4-1:0] siga, sigb;
reg az, bz;
reg aInf, bInf;
reg aNan,bNan;
wire done1;
wire signed [7:0] lzcnt;
 
// -----------------------------------------------------------
// Clock #1
// - decode the input operands
// - derive basic information
// - calculate fraction
// -----------------------------------------------------------
reg ld1;
always @(posedge clk)
if (ce) sa <= au.sign;
always @(posedge clk)
if (ce) sb <= bu.sign;
always @(posedge clk)
if (ce) siga <= au.sig;
always @(posedge clk)
if (ce) sigb <= bu.sig;
always @(posedge clk)
if (ce) az <= au.exp==12'd0 && au.sig==100'd0;
always @(posedge clk)
if (ce) bz <= bu.exp==12'd0 && bu.sig==100'd0;
always @(posedge clk)
if (ce) aInf <= au.infinity;
always @(posedge clk)
if (ce) bInf <= bu.infinity;
always @(posedge clk)
if (ce) aNan <= au.nan;
always @(posedge clk)
if (ce) bNan <= bu.nan;
ft_delay #(.WID(1), .DEP(1)) udly1 (.clk(clk), .ce(ce), .i(ld), .o(ld1));
 
// -----------------------------------------------------------
// Clock #2 to N
// - calculate fraction
// -----------------------------------------------------------
wire done3a,done3;
// Perform divide
dfdiv #(N+2) u2 (.clk(clk), .ld(ld1), .a({siga,8'b0}), .b({sigb,8'b0}), .q(divo), .r(), .done(done1), .lzcnt(lzcnt));
wire [7:0] lzcnt_bin = lzcnt[3:0] + (lzcnt[7:4] * 10);
wire [(N+2)*4*2-1:0] divo1 = divo[(N+2)*4*2-1:0] << ({lzcnt_bin,2'b0}+N*4);//WAS FPWID=128?+44
ft_delay #(.WID(1), .DEP(3)) u3 (.clk(clk), .ce(ce), .i(done1), .o(done3a));
assign done3 = done1&done3a;
 
// -----------------------------------------------------------
// Clock #N+1
// - calculate exponent
// - calculate fraction
// - determine when a NaN is output
// -----------------------------------------------------------
// Compute the exponent.
// - correct the exponent for denormalized operands
// - adjust the difference by the bias (add 127)
// - also factor in the different decimal position for division
reg [13:0] ex1; // sum of exponents
reg qNaNOut;
 
always @(posedge clk)
if (ce) ex1 <= au.exp - bu.exp + bias - lzcnt_bin;
 
always @(posedge clk)
if (ce) qNaNOut <= (az&bz)|(aInf&bInf);
 
wire over = 1'b0;
wire under = &ex1[13:12];
reg [3:0] st;
 
// -----------------------------------------------------------
// Clock #N+3
// -----------------------------------------------------------
always_ff @(posedge clk)
// Simulation likes to see these values reset to zero on reset. Otherwise the
// values propagate in sim as X's.
if (rst) begin
xo <= 1'd0;
mo <= 1'd0;
so <= 1'd0;
sign_exe <= 1'd0;
overflow <= 1'd0;
underflow <= 1'd0;
done <= 1'b1;
end
else if (ce) begin
done <= 1'b0;
if (done3&done1) begin
done <= 1'b1;
 
casez({qNaNOut|aNan|bNan,bInf,bz,over,under})
5'b1????: xo <= infXp; // NaN exponent value
5'b01???: xo <= 1'd0; // divide by inf
5'b001??: xo <= infXp; // divide by zero
5'b0001?: xo <= infXp; // overflow
5'b00001: xo <= 1'd0; // underflow
default: xo <= ex1; // normal or underflow: passthru neg. exp. for normalization
endcase
 
casez({aNan,bNan,qNaNOut,bInf,bz,over,aInf&bInf,az&bz})
8'b1???????: begin mo <= {4'h1,au[N*4-1:0],{(N+1)*4-1{1'b0}}}; st[3] <= 1'b1; end
8'b01??????: begin mo <= {4'h1,bu[N*4-1:0],{(N+1)*4-1{1'b0}}}; st[3] <= 1'b1; end
8'b001?????: begin mo <= {4'h1,qNaN[N*4-1:0]|{aInf,1'b0}|{az,bz},{(N+1)*4-1{1'b0}}}; st[3] <= 1'b1; end
8'b0001????: begin mo <= {(N+1)*4*2-1{1'd0}}; st[3] <= 1'b0; end // div by inf
8'b00001???: begin mo <= {(N+1)*4*2-1{1'd0}}; st[3] <= 1'b0; end // div by zero
8'b000001??: begin mo <= {(N+1)*4*2-1{1'd0}}; st[3] <= 1'b0; end // Inf exponent
8'b0000001?: begin mo <= {4'h1,qNaN|`QINFDIV,{(N+1)*4-1{1'b0}}}; st[3] <= 1'b1; end // infinity / infinity
8'b00000001: begin mo <= {4'h1,qNaN|`QZEROZERO,{(N+1)*4-1{1'b0}}}; st[3] <= 1'b1; end // zero / zero
default: begin mo <= divo1[(N+2)*4*2-1:8]; st[3] <= 1'b0; end // plain div
endcase
 
sign_exe <= sa & sb;
overflow <= over;
underflow <= under;
 
o.nan <= aNan|bNan|qNaNOut;
o.snan <= aNan|bNan|qNaNOut;
o.qnan <= 1'b0;
o.infinity <= over|aInf;
o.sign <= sa ^ sb;
o.exp <= xo;
o.sig <= mo;
end
end
 
endmodule
 
module DFPDivide96nr(rst, clk, ce, ld, op, a, b, o, rm, done, sign_exe, inf, overflow, underflow);
parameter N=25;
input rst;
input clk;
input ce;
input ld;
input op;
input DFP96 a, b;
output DFP96 o;
input [2:0] rm;
output sign_exe;
output done;
output inf;
output overflow;
output underflow;
 
DFP96UD o1;
wire sign_exe1, inf1, overflow1, underflow1;
DFP96UN fpn0;
wire done1, done1a;
 
DFPDivide96 #(.N(N)) u1 (rst, clk, ce, ld, op, a, b, o1, done1, sign_exe1, overflow1, underflow1);
DFPNormalize96 #(.N(N)) u2(.clk(clk), .ce(ce), .under_i(underflow1), .i(o1), .o(fpn0) );
DFPRound96 #(.N(N)) u3(.clk(clk), .ce(ce), .rm(rm), .i(fpn0), .o(o) );
delay2 #(1) u4(.clk(clk), .ce(ce), .i(sign_exe1), .o(sign_exe));
delay2 #(1) u5(.clk(clk), .ce(ce), .i(inf1), .o(inf));
delay2 #(1) u6(.clk(clk), .ce(ce), .i(overflow1), .o(overflow));
delay2 #(1) u7(.clk(clk), .ce(ce), .i(underflow1), .o(underflow));
ft_delay #(.WID(1),.DEP(11)) u8(.clk(clk), .ce(ce), .i(done1), .o(done1a));
assign done = done1&done1a;
 
endmodule
 
/verilog2/DFPMultiply96.sv
0,0 → 1,246
`timescale 1ns / 1ps
// ============================================================================
// __
// \\__/ o\ (C) 2020-2022 Robert Finch, Waterloo
// \ __ / All rights reserved.
// \/_// robfinch<remove>@finitron.ca
// ||
//
// DFPMultiply96.v
// - decimal floating point multiplier
// - parameterized width
//
//
// BSD 3-Clause License
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// 3. Neither the name of the copyright holder nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
//
// Floating Point Multiplier
//
// Properties:
// +-inf * +-inf = -+inf (this is handled by exOver)
// +-inf * 0 = QNaN
//
// ============================================================================
 
import DFPPkg::*;
 
//`define DFPMUL_PARALLEL 1'b1
 
module DFPMultiply96(clk, ce, ld, a, b, o, sign_exe, inf, overflow, underflow, done);
localparam N=25;
localparam DELAY = 2;
input clk;
input ce;
input ld;
input DFP96 a, b;
output DFP96UD o;
output sign_exe;
output inf;
output overflow;
output underflow;
output done;
 
reg [11:0] xo1; // extra bit for sign
reg [N*4*2-1:0] mo1;
 
// constants
wire [11:0] infXp = 12'hBFF; // infinite / NaN - all ones
wire [11:0] bias = 12'h5FF;
// The following is the value for an exponent of zero, with the offset
// eg. 8'h7f for eight bit exponent, 11'h7ff for eleven bit exponent, etc.
// The following is a template for a quiet nan. (MSB=1)
wire [N*4-1:0] qNaN = {4'h1,{96{1'b0}}};
 
// variables
reg [N*4*2-1:0] sig1;
wire [13:0] ex2;
 
DFP96U au, bu;
DFPUnpack96 u01 (a, au);
DFPUnpack96 u02 (b, bu);
 
// Decompose the operands
wire sa, sb; // sign bit
wire [14:0] xa, xb; // exponent bits
wire sxa, sxb;
wire [N*4-1:0] siga, sigb;
wire a_dn, b_dn; // a/b is denormalized
wire aNan1, bNan1;
wire az, bz;
wire aInf1, bInf1;
 
assign siga = au.sig;
assign sigb = bu.sig;
assign az = au.exp==12'h0 && au.sig==100'd0;
assign bz = bu.exp==12'h0 && bu.sig==100'd0;
 
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Clock #1
// - decode the input operands
// - derive basic information
// - calculate exponent
// - calculate fraction
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 
// -----------------------------------------------------------
// First clock
// Compute the sum of the exponents.
// -----------------------------------------------------------
 
wire under, over;
wire [13:0] sum_ex = au.exp + bu.exp - bias;
reg sx0;
wire done1;
assign under = &sum_ex[13:12];
assign over = sum_ex > 14'hBFF && !under;
 
wire [N*4*2-1:0] sigoo;
`ifdef DFPMUL_PARALLEL
BCDMul32 u1f (.a({20'h0,siga}),.b({20'h0,sigb}),.o(sigoo));
`else
dfmul #(.N(N)) u1g
(
.clk(clk),
.ld(ld),
.a(siga),
.b(sigb),
.p(sigoo),
.done(done1)
);
`endif
 
always_ff @(posedge clk)
if (ce) sig1 <= sigoo[N*4*2-1:0];
 
// Status
wire under1, over1;
 
ft_delay #(.WID(12),.DEP(DELAY)) u3 (.clk(clk), .ce(ce), .i(sum_ex[11:0]), .o(ex2) );
ft_delay #(.WID(1),.DEP(DELAY)) u2a (.clk(clk), .ce(ce), .i(au.infinity), .o(aInf1) );
ft_delay #(.WID(1),.DEP(DELAY)) u2b (.clk(clk), .ce(ce), .i(bu.infinity), .o(bInf1) );
ft_delay #(.WID(1),.DEP(DELAY)) u6 (.clk(clk), .ce(ce), .i(under), .o(under1) );
ft_delay #(.WID(1),.DEP(DELAY)) u7 (.clk(clk), .ce(ce), .i(over), .o(over1) );
 
// determine when a NaN is output
wire qNaNOut;
wire DFP96U a1,b1;
wire asnan, bsnan, aqnan, bqnan;
ft_delay #(.WID(1),.DEP(DELAY)) u5 (.clk(clk), .ce(ce), .i((au.infinity&bz)|(bu.infinity&az)), .o(qNaNOut) );
ft_delay #(.WID(1),.DEP(DELAY)) u14 (.clk(clk), .ce(ce), .i(au.nan), .o(aNan1) );
ft_delay #(.WID(1),.DEP(DELAY)) u15 (.clk(clk), .ce(ce), .i(bu.nan), .o(bNan1) );
ft_delay #(.WID(1),.DEP(DELAY)) u18 (.clk(clk), .ce(ce), .i(au.snan), .o(asnan) );
ft_delay #(.WID(1),.DEP(DELAY)) u19 (.clk(clk), .ce(ce), .i(bu.snan), .o(bsnan) );
ft_delay #(.WID(1),.DEP(DELAY)) u18a (.clk(clk), .ce(ce), .i(au.qnan), .o(aqnan) );
ft_delay #(.WID(1),.DEP(DELAY)) u19a (.clk(clk), .ce(ce), .i(bu.qnan), .o(bqnan) );
ft_delay #(.WID($bits(a1)),.DEP(DELAY)) u16 (.clk(clk), .ce(ce), .i(a), .o(a1) );
ft_delay #(.WID($bits(b1)),.DEP(DELAY)) u17 (.clk(clk), .ce(ce), .i(b), .o(b1) );
 
// -----------------------------------------------------------
// Second clock
// - correct xponent and mantissa for exceptional conditions
// -----------------------------------------------------------
 
wire so1, sx1;
reg [3:0] st;
wire done1a;
 
ft_delay #(.WID(1),.DEP(1)) u8 (.clk(clk), .ce(ce), .i(au.sign ^ bu.sign), .o(so1) );// two clock delay!
 
always_ff @(posedge clk)
if (ce)
casez({qNaNOut|aNan1|bNan1,aInf1,bInf1,over1,under1})
5'b1????: xo1 = infXp; // qNaN - infinity * zero
5'b01???: xo1 = infXp; // 'a' infinite
5'b001??: xo1 = infXp; // 'b' infinite
5'b0001?: xo1 = infXp; // result overflow
5'b00001: xo1 = ex2[11:0];//0; // underflow
default: xo1 = ex2[11:0]; // situation normal
endcase
 
// Force mantissa to zero when underflow or zero exponent when not supporting denormals.
always_ff @(posedge clk)
if (ce)
casez({aNan1,bNan1,qNaNOut,aInf1,bInf1,over1|under1})
6'b1?????: mo1 = {4'h1,a1[N*4-4-1:0],{N*4{1'b0}}};
6'b01????: mo1 = {4'h1,b1[N*4-4-1:0],{N*4{1'b0}}};
6'b001???: mo1 = {4'h1,qNaN|3'd4,{N*4{1'b0}}}; // multiply inf * zero
6'b0001??: mo1 = 0; // mul inf's
6'b00001?: mo1 = 0; // mul inf's
6'b000001: mo1 = 0; // mul overflow
default: mo1 = sig1;
endcase
 
ft_delay #(.WID(1),.DEP(DELAY+1)) u10 (.clk(clk), .ce(ce), .i(sa & sb), .o(sign_exe) );
delay1 u11 (.clk(clk), .ce(ce), .i(over1), .o(overflow) );
delay1 u12 (.clk(clk), .ce(ce), .i(over1), .o(inf) );
delay1 u13 (.clk(clk), .ce(ce), .i(under1), .o(underflow) );
ft_delay #(.WID(1),.DEP(3)) u18b (.clk(clk), .ce(ce), .i(done1), .o(done1a) );
 
assign o.nan = aNan1|bNan1|qNaNOut;
assign o.qnan = qNaNOut|aqnan|bqnan;
assign o.snan = qNaNOut ? 1'b0 : asnan|bsnan;
assign o.infinity = aInf1|bInf1|over;
assign o.sign = so1;
assign o.exp = xo1;
assign o.sig = {mo1,8'h00};
assign done = done1&done1a;
 
endmodule
 
 
// Multiplier with normalization and rounding.
 
module DFPMultiply96nr(clk, ce, ld, a, b, o, rm, sign_exe, inf, overflow, underflow, done);
localparam N=25;
input clk;
input ce;
input ld;
input DFP96 a, b;
output DFP96 o;
input [2:0] rm;
output sign_exe;
output inf;
output overflow;
output underflow;
output done;
 
wire done1, done1a;
DFP96UD o1;
wire sign_exe1, inf1, overflow1, underflow1;
DFP96UN fpn0;
 
DFPMultiply96 u1 (clk, ce, ld, a, b, o1, sign_exe1, inf1, overflow1, underflow1, done1);
DFPNormalize96 u2(.clk(clk), .ce(ce), .under_i(underflow1), .i(o1), .o(fpn0) );
DFPRound96 u3(.clk(clk), .ce(ce), .rm(rm), .i(fpn0), .o(o) );
delay2 #(1) u4(.clk(clk), .ce(ce), .i(sign_exe1), .o(sign_exe));
delay2 #(1) u5(.clk(clk), .ce(ce), .i(inf1), .o(inf));
delay2 #(1) u6(.clk(clk), .ce(ce), .i(overflow1), .o(overflow));
delay2 #(1) u7(.clk(clk), .ce(ce), .i(underflow1), .o(underflow));
ft_delay #(.WID(1),.DEP(12)) u10 (.clk(clk), .ce(ce), .i(done1), .o(done1a) );
assign done = done1 & done1a;
 
endmodule
/verilog2/DFPNormalize96.sv
0,0 → 1,331
// ============================================================================
// __
// \\__/ o\ (C) 2006-2022 Robert Finch, Waterloo
// \ __ / All rights reserved.
// \/_// robfinch<remove>@finitron.ca
// ||
//
// DFPNormalize96.sv
// - decimal floating point normalization unit
// - eight cycle latency
// - parameterized width
//
//
// This source file is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published
// by the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This source file is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//
// This unit takes a floating point number in an intermediate
// format and normalizes it. No normalization occurs
// for NaN's or infinities. The unit has a two cycle latency.
//
// The mantissa is assumed to start with two whole bits on
// the left. The remaining bits are fractional.
//
// The width of the incoming format is reduced via a generation
// of sticky bit in place of the low order fractional bits.
//
// On an underflowed input, the incoming exponent is assumed
// to be negative. A right shift is needed.
// ============================================================================
 
import DFPPkg::*;
 
module DFPNormalize96(clk, ce, i, o, under_i, under_o, inexact_o);
parameter N=25;
input clk;
input ce;
input DFP96UD i; // expanded format input
output DFP96UN o; // normalized output + guard, sticky and round bits, + 1 whole digit
input under_i;
output under_o;
output inexact_o;
 
integer n;
// ----------------------------------------------------------------------------
// No Clock required
// ----------------------------------------------------------------------------
reg [11:0] xo0;
reg so0;
reg sx0;
reg nan0, qnan0, snan0;
reg inf0;
 
always_comb
xo0 <= i.exp;
always_comb
so0 <= i.sign; // sign doesn't change
always_comb
nan0 <= i.nan;
always_comb
qnan0 <= i.qnan;
always_comb
snan0 <= i.snan;
always_comb
inf0 <= i.infinity;
 
// ----------------------------------------------------------------------------
// Clock #1
// - Capture exponent information
// ----------------------------------------------------------------------------
reg xInf1a, xInf1b, xInf1c;
DFP96UD i1;
always_ff @(posedge clk)
if (ce)
i1 <= i;
 
always_ff @(posedge clk)
if (ce) xInf1a <= xo0==12'hBFF & !under_i;
always_ff @(posedge clk)
if (ce) xInf1b <= xo0==12'hBFE & !under_i;
always_ff @(posedge clk)
if (ce) xInf1c <= xo0==12'hBFF;
 
// ----------------------------------------------------------------------------
// Clock #2
// - determine exponent increment
// Since the there are *three* whole digits in the incoming format
// the number of whole digits needs to be reduced. If the MSB is
// set, then increment the exponent and no shift is needed.
// ----------------------------------------------------------------------------
wire xInf2c, xInf2b;
wire [11:0] xo2;
reg incExpByOne2;
ft_delay #(.WID(1),.DEP(1)) u21 (.clk(clk), .ce(ce), .i(xInf1c), .o(xInf2c));
ft_delay #(.WID(1),.DEP(1)) u22 (.clk(clk), .ce(ce), .i(xInf1b), .o(xInf2b));
ft_delay #(.WID(12),.DEP(2)) u23 (.clk(clk), .ce(ce), .i(xo0), .o(xo2));
ft_delay #(.WID(1),.DEP(2)) u24 (.clk(clk), .ce(ce), .i(under_i), .o(under2));
 
always_ff @(posedge clk)
if (ce) incExpByOne2 <= !xInf1a & |i1.sig[207:204];
 
// ----------------------------------------------------------------------------
// Clock #3
// - increment exponent
// - detect a zero mantissa
// ----------------------------------------------------------------------------
 
wire incExpByOne3;
DFP96UD i3;
reg [11:0] xo3;
reg zeroMan3;
ft_delay #(.WID(1),.DEP(1)) u32 (.clk(clk), .ce(ce), .i(incExpByOne2), .o(incExpByOne3));
ft_delay #(.WID($bits(i3)),.DEP(3)) u33 (.clk(clk), .ce(ce), .i(i), .o(i3));
 
wire [11:0] xo2a = xo2 + 1'd1;
 
always_ff @(posedge clk)
if (ce) xo3 <= (incExpByOne2 ? xo2a : xo2);
 
always_ff @(posedge clk)
if(ce) zeroMan3 <= 1'b0;
 
// ----------------------------------------------------------------------------
// Clock #4
// - Shift mantissa left
// - If infinity is reached then set the mantissa to zero
// shift mantissa left to reduce to a single whole digit
// - create sticky bit
// ----------------------------------------------------------------------------
 
reg [(N+2)*4-1:0] mo4;
reg inexact4;
 
always_ff @(posedge clk)
if(ce)
casez({zeroMan3,incExpByOne3})
2'b1?: mo4 <= 1'd0;
2'b01: mo4 <= {i3[(N+1)*4*2-1:(N+1)*4],3'b0,|i3[(N+1)*4-1:0]};
default: mo4 <= {i3[(N+1)*4*2-1-4:N*4],3'b0,|i3[N*4-1:0]};
endcase
 
always_ff @(posedge clk)
if(ce)
casez({zeroMan3,incExpByOne3})
2'b1?: inexact4 <= 1'd0;
2'b01: inexact4 <= |i3[(N+1)*4-1:0];
default: inexact4 <= |i3[N*4-1:0];
endcase
 
// ----------------------------------------------------------------------------
// Clock edge #5
// - count leading zeros
// ----------------------------------------------------------------------------
reg [7:0] leadingZeros5;
wire [11:0] xo5;
wire xInf5;
ft_delay #(.WID(12),.DEP(2)) u51 (.clk(clk), .ce(ce), .i(xo3), .o(xo5));
ft_delay #(.WID(1),.DEP(3)) u52 (.clk(clk), .ce(ce), .i(xInf2c), .o(xInf5) );
 
/* Lookup table based leading zero count modules give slightly better
performance but cases must be coded.
generate
begin
if (FPWID <= 32) begin
cntlz32Reg clz0 (.clk(clk), .ce(ce), .i({mo4,4'b0}), .o(leadingZeros5) );
assign leadingZeros5[7:6] = 2'b00;
end
else if (FPWID<=64) begin
assign leadingZeros5[7] = 1'b0;
cntlz64Reg clz0 (.clk(clk), .ce(ce), .i({mo4,7'h0}), .o(leadingZeros5) );
end
else if (FPWID<=80) begin
assign leadingZeros5[7] = 1'b0;
cntlz80Reg clz0 (.clk(clk), .ce(ce), .i({mo4,11'b0}), .o(leadingZeros5) );
end
else if (FPWID<=84) begin
assign leadingZeros5[7] = 1'b0;
cntlz96Reg clz0 (.clk(clk), .ce(ce), .i({mo4,23'b0}), .o(leadingZeros5) );
end
else if (FPWID<=96) begin
assign leadingZeros5[7] = 1'b0;
cntlz96Reg clz0 (.clk(clk), .ce(ce), .i({mo4,11'b0}), .o(leadingZeros5) );
end
else if (FPWID<=128)
cntlz128Reg clz0 (.clk(clk), .ce(ce), .i({mo4,11'b0}), .o(leadingZeros5) );
end
endgenerate
*/
 
// Sideways add.
// Normally there would be only one to two leading zeros. It is tempting then
// to check for only one or two. But, denormalized numbers might have more
// leading zeros. If denormals were not supported this could be made smaller
// and faster.
`ifdef SUPPORT_DENORMALS
reg [7:0] lzc;
reg got_one;
always @*
begin
got_one = 1'b0;
lzc = 8'h00;
for (n = (N+2)*4-1; n >= 0; n = n - 4) begin
if (!got_one) begin
if (mo4[n]|mo4[n-1]|mo4[n-2]|mo4[n-3])
got_one = 1'b1;
else
lzc = lzc + 1'b1;
end
end
end
always_ff @(posedge clk)
if (ce) leadingZeros5 <= lzc;
`else
wire [7:0] lead2 = mo4[(N+2)*4-1:N*4];
always_ff @(posedge clk)
if (ce)
casez(lead2)
8'b00000000: leadingZeros5 <= 8'd2;
8'b0000????: leadingZeros5 <= 8'd1;
default: leadingZeros5 <= 8'd0;
endcase
`endif
 
 
// ----------------------------------------------------------------------------
// Clock edge #6
// - Compute how much we want to decrement exponent by
// - compute amount to shift left and right
// - at infinity the exponent can't be incremented, so we can't shift right
// otherwise it was an underflow situation so the exponent was negative
// shift amount needs to be negated for shift register
// If the exponent underflowed, then the shift direction must be to the
// right regardless of mantissa bits; the number is denormalized.
// Otherwise the shift direction must be to the left.
// ----------------------------------------------------------------------------
reg [7:0] lshiftAmt6;
reg [7:0] rshiftAmt6;
wire rightOrLeft6; // 0=left,1=right
wire xInf6;
wire [11:0] xo6;
wire [(N+2)*4-1:0] mo6;
wire zeroMan6;
vtdl #(1) u61 (.clk(clk), .ce(ce), .a(4'd5), .d(under_i), .q(rightOrLeft6) );
ft_delay #(.WID(12),.DEP(1)) u62 (.clk(clk), .ce(ce), .i(xo5), .o(xo6));
ft_delay #(.WID((N+2)*4),.DEP(2)) u63 (.clk(clk), .ce(ce), .i(mo4), .o(mo6) );
ft_delay #(.WID(1),.DEP(1)) u64 (.clk(clk), .ce(ce), .i(xInf5), .o(xInf6) );
ft_delay #(.WID(1),.DEP(3)) u65 (.clk(clk), .ce(ce), .i(zeroMan3), .o(zeroMan6));
ft_delay #(.WID(1),.DEP(5)) u66 (.clk(clk), .ce(ce), .i(sx0), .o(sx5) );
 
always_ff @(posedge clk)
if (ce) lshiftAmt6 <= {leadingZeros5 > xo5 ? xo5 : leadingZeros5,2'b0};
 
always_ff @(posedge clk)
if (ce) rshiftAmt6 <= {xInf5 ? 1'd0 : $signed(xo5) > 14'd0 ? 8'd0 : ~xo5+2'd1,2'b00}; // xo2 is negative !
 
// ----------------------------------------------------------------------------
// Clock edge #7
// - figure exponent
// - shift mantissa
// - figure sticky bit
// ----------------------------------------------------------------------------
 
reg [13:0] xo7;
wire rightOrLeft7;
reg [(N+2)*4-1:0] mo7l, mo7r;
reg St6,St7;
ft_delay #(.WID(1),.DEP(1)) u71 (.clk(clk), .ce(ce), .i(rightOrLeft6), .o(rightOrLeft7));
 
wire [11:0] xo7d = xo6 - lshiftAmt6;
 
always_ff @(posedge clk)
if (ce)
xo7 <= zeroMan6 ? xo6 :
xInf6 ? xo6 : // an infinite exponent is either a NaN or infinity; no need to change
rightOrLeft6 ? 1'd0 : // on a right shift, the exponent was negative, it's being made to zero
xo7d; // on a left shift, the exponent can't be decremented below zero
 
always_ff @(posedge clk)
if (ce) mo7r <= mo6 >> rshiftAmt6;
always_ff @(posedge clk)
if (ce) mo7l <= mo6 << lshiftAmt6;
 
// The sticky bit is set if the bits shifted out on a right shift are set.
always @*
begin
St6 = 1'b0;
for (n = 0; n < (N+2)*4; n = n + 1)
if (n <= rshiftAmt6 + 1) St6 = St6|mo6[n];
end
always_ff @(posedge clk)
if (ce) St7 <= St6;
 
// ----------------------------------------------------------------------------
// Clock edge #8
// - select mantissa
// ----------------------------------------------------------------------------
 
wire so,sxo,nano,info,qnano,snano;
wire [11:0] xo;
reg [(N+2)*4-1:0] mo;
vtdl #(1) u81 (.clk(clk), .ce(ce), .a(4'd7), .d(so0), .q(so) );
ft_delay #(.WID(12),.DEP(1)) u82 (.clk(clk), .ce(ce), .i(xo7), .o(xo));
vtdl #(.WID(1)) u83 (.clk(clk), .ce(ce), .a(4'd3), .d(inexact4), .q(inexact_o));
ft_delay #(.WID(1),.DEP(1)) u84 (.clk(clk), .ce(ce), .i(rightOrLeft7), .o(under_o));
vtdl #(1) u86 (.clk(clk), .ce(ce), .a(4'd7), .d(nan0), .q(nano) );
vtdl #(1) u87 (.clk(clk), .ce(ce), .a(4'd7), .d(qnan0), .q(qnano) );
vtdl #(1) u88 (.clk(clk), .ce(ce), .a(4'd7), .d(snan0), .q(snano) );
vtdl #(1) u89 (.clk(clk), .ce(ce), .a(4'd7), .d(inf0), .q(info) );
 
always_ff @(posedge clk)
if (ce) mo <= rightOrLeft7 ? mo7r|{St7,4'b0} : mo7l;
 
assign o.nan = nano;
assign o.qnan = qnano;
assign o.snan = snano;
assign o.infinity = info;
assign o.sign = so;
assign o.exp = xo;
assign o.sig = mo[(N+2)*4-1:4];
 
endmodule
/verilog2/DFPPack.sv
44,7 → 44,7
wire [109:0] enc_sig;
DPDEncodeN #(.N(11)) u1 (i.sig[131:0], enc_sig);
 
always @*
always_comb
begin
// sign
o.sign <= i.sign;
68,6 → 68,36
 
endmodule
 
module DFPPack96(i, o);
input DFP96U i;
output DFP96 o;
 
wire [79:0] enc_sig;
DPDEncodeN #(.N(8)) u1 (i.sig[95:0], enc_sig);
 
always_comb
begin
// sign
o.sign <= i.sign;
// combo
if (i.qnan|i.snan)
o.combo <= 5'b11111;
else if (i.infinity)
o.combo <= 5'b11110;
else
o.combo <= i.sig[99:96] > 4'h7 ? {2'b11,i.exp[11:10],i.sig[96]} : {i.exp[11:10],i.sig[98:96]};
// exponent continuation
if (i.qnan)
o.expc <= {1'b0,i.exp[8:0]};
else if (i.snan)
o.expc <= {1'b1,i.exp[8:0]};
else
o.expc <= i.exp[9:0];
// significand continuation
o.sigc <= enc_sig;
end
 
endmodule
module DFPPack64(i, o);
input DFP64U i;
output DFP64 o;
75,7 → 105,7
wire [49:0] enc_sig;
DPDEncodeN #(.N(5)) u1 (i.sig[59:0], enc_sig);
 
always @*
always_comb
begin
// sign
o.sign <= i.sign;
/verilog2/DFPPkg.sv
1,6 → 1,6
// ============================================================================
// __
// \\__/ o\ (C) 2020-2021 Robert Finch, Waterloo
// \\__/ o\ (C) 2020-2022 Robert Finch, Waterloo
// \ __ / All rights reserved.
// \/_// robfinch<remove>@finitron.ca
// ||
40,6 → 40,9
 
`define SUPPORT_DENORMALS 1'b1
 
`define QINFDIV 4'd2
`define QZEROZERO 4'd3
 
typedef struct packed
{
logic sign;
57,9 → 60,33
logic [109:0] sigc; // significand continuation field
} DFP128;
 
// Packed 128 bit (storage) format
typedef struct packed
{
logic sign;
logic [4:0] combo;
logic [9:0] expc; // exponent continuation field
logic [79:0] sigc; // significand continuation field
} DFP96;
 
typedef logic [11:0] DFP96EXP;
typedef logic [99:0] DFP96SIG;
 
typedef logic [13:0] DFP128EXP;
typedef logic [135:0] DFP128SIG;
 
// Unpacked 96 bit format
typedef struct packed
{
logic nan;
logic qnan;
logic snan;
logic infinity;
logic sign;
logic [11:0] exp;
logic [99:0] sig; // significand 25 digits
} DFP96U;
 
// Unpacked 128 bit format
typedef struct packed
{
80,10 → 107,34
logic snan;
logic infinity;
logic sign;
logic [11:0] exp;
logic [103:0] sig; // significand 26 digits
} DFP96UN;
 
// Normalizer output to rounding, one extra digit
typedef struct packed
{
logic nan;
logic qnan;
logic snan;
logic infinity;
logic sign;
logic [13:0] exp;
logic [139:0] sig; // significand 35 digits
} DFP128UN;
 
// 96-bit Double width significand, normalizer input
typedef struct packed
{
logic nan;
logic qnan;
logic snan;
logic infinity;
logic sign;
logic [11:0] exp;
logic [207:0] sig; // significand 50+ 1 lead, 1-trail digit
} DFP96UD;
 
// 128-bit Double width significand, normalizer input
typedef struct packed
{
/verilog2/DFPRound96.sv
0,0 → 1,194
// ============================================================================
// __
// \\__/ o\ (C) 2006-2022 Robert Finch, Waterloo
// \ __ / All rights reserved.
// \/_// robfinch<remove>@finitron.ca
// ||
//
// DFPRound96.sv
// - decimal floating point rounding unit
// - parameterized width
//
//
// BSD 3-Clause License
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// 3. Neither the name of the copyright holder nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// ============================================================================
 
import DFPPkg::*;
 
`ifdef MIN_LATENCY
`define PIPE_ADV *
`else
`define PIPE_ADV (posedge clk)
`endif
 
module DFPRound96(clk, ce, rm, i, o);
parameter N=25;
input clk;
input ce;
input [2:0] rm; // rounding mode
input DFP96UN i; // intermediate format input
output DFP96 o; // packed rounded output
 
parameter ROUND_CEILING = 3'd0;
parameter ROUND_FLOOR = 3'd1;
parameter ROUND_HALF_UP = 3'd2;
parameter ROUND_HALF_EVEN = 3'd3;
parameter ROUND_DOWN = 3'd4;
 
//------------------------------------------------------------
// variables
wire nano, qnano, snano;
wire infinity;
wire so;
wire [11:0] xo;
reg [N*4-1:0] mo;
reg [11:0] xo1;
reg [N*4-1:0] mo1;
wire xInf = i.exp==12'hBFF;
wire so0 = i.sign;
 
wire [3:0] l = i.sig[7:4];
wire [3:0] r = i.sig[3:0];
 
reg rnd;
 
//------------------------------------------------------------
// Clock #1
// - determine round amount (add 1 or 0)
//------------------------------------------------------------
 
always @`PIPE_ADV
if (ce) xo1 <= i.exp;
always @`PIPE_ADV
if (ce) mo1 <= i.sig[(N+1)*4-1:4];
 
// Compute the round bit
// Infinities and NaNs are not rounded!
always @`PIPE_ADV
if (ce)
if (i.nan | i.infinity)
rnd = 1'b0;
else
case (rm)
ROUND_CEILING: rnd <= (r == 4'd0 || i.sign==1'b1) ? 1'b0 : 1'b1;
ROUND_FLOOR: rnd <= (r == 4'd0 || i.sign==1'b0) ? 1'b0 : 1'b1;
ROUND_HALF_UP: rnd <= r >= 4'h5;
ROUND_HALF_EVEN: rnd <= r==4'h5 ? l[0] : r > 4'h5 ? 1'b1 : 1'b0;
ROUND_DOWN: rnd <= 1'b0;
default: rnd <= 1'b0;
endcase
 
//------------------------------------------------------------
// Clock #2
// round the number, check for carry
// note: inf. exponent checked above (if the exponent was infinite already, then no rounding occurs as rnd = 0)
// note: exponent increments if there is a carry (can only increment to infinity)
//------------------------------------------------------------
 
wire [N*4-1:0] rounded1;
wire cobcd;
 
BCDAddN #(.N(N)) ubcdan1
(
.ci(1'b0),
.a(mo1),
.b({{N*4-1{1'd0}},rnd}),
.o(rounded1),
.co(cobcd)
);
 
reg [N*4-1:0] rounded2;
reg rnd2;
reg dn2;
reg [12:0] xo2;
always @`PIPE_ADV
if (ce) rounded2 <= rounded1;
always @`PIPE_ADV
if (ce) rnd2 <= rnd;
always @`PIPE_ADV
if (ce) dn2 <= !(|xo1);
always @`PIPE_ADV
if (ce) xo2 <= xo1 + cobcd;
 
//------------------------------------------------------------
// Clock #3
// - shift mantissa if required.
//------------------------------------------------------------
wire infinity2;
`ifdef MIN_LATENCY
assign nano = i.nan;
assign qnano = i.qnan;
assign snano = i.snan;
assign infinity = i.infinity | (rnd2 && xo2[11:0]==12'hBFF);
assign so = i.sign;
assign xo = xo2[11:0];
`else
delay3 #(1) u21 (.clk(clk), .ce(ce), .i(i.nan), .o(nano));
delay3 #(1) u22 (.clk(clk), .ce(ce), .i(i.qnan), .o(qnano));
delay3 #(1) u23 (.clk(clk), .ce(ce), .i(i.snan), .o(snano));
delay2 #(1) u24 (.clk(clk), .ce(ce), .i(i.infinity), .o(infinity2));
delay3 #(1) u25 (.clk(clk), .ce(ce), .i(i.sign), .o(so));
delay1 #(12) u26 (.clk(clk), .ce(ce), .i(xo2[13:0]), .o(xo));
delay1 #(1) u27 (.clk(clk), .ce(ce), .i(infinity2 | (rnd2 && xo2[11:0]==12'hBFF)), .o(infinity));
`endif
 
wire carry2 = xo2[12];
 
always @`PIPE_ADV
if (ce)
casez({rnd2,xo2[11:0]==12'hBFF,carry2,dn2})
4'b0??0: mo <= mo1[N*4-1:0]; // not rounding, not denormalized
4'b0??1: mo <= mo1[N*4-1:0]; // not rounding, denormalized
4'b1000: mo <= rounded2[N*4-1: 0]; // exponent didn't change, number was normalized
4'b1001: mo <= rounded2[N*4-1: 0]; // exponent didn't change, but number was denormalized
4'b1010: mo <= {4'h1,rounded2[N*4-1: 4]}; // exponent incremented (new MSD generated), number was normalized
4'b1011: mo <= rounded2[N*4-1:0]; // exponent incremented (new MSB generated), number was denormalized, number became normalized
4'b11??: mo <= {N*4{1'd0}}; // number became infinite, no need to check carry etc., rnd would be zero if input was NaN or infinite
endcase
 
//------------------------------------------------------------
// Clock #4
// - Pack output
//------------------------------------------------------------
 
DFP96U o1;
DFP96 o2;
 
assign o1.nan = nano;
assign o1.qnan = qnano;
assign o1.snan = snano;
assign o1.infinity = infinity;
assign o1.sign = so;
assign o1.exp = xo;
assign o1.sig = mo;
 
DFPPack96 u41 (o1, o2);
always_ff @(posedge clk)
if (ce) o <= o2;
 
endmodule
/verilog2/DFPSqrt96.sv
0,0 → 1,204
// ============================================================================
// __
// \\__/ o\ (C) 2018-2022 Robert Finch, Waterloo
// \ __ / All rights reserved.
// \/_// robfinch<remove>@finitron.ca
// ||
//
// DFPSqrt96.sv
// - decimal floating point square root
// - parameterized width
// - IEEE 754 representation
//
//
// BSD 3-Clause License
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// 3. Neither the name of the copyright holder nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// ============================================================================
 
import DFPPkg::*;
import fp::*;
 
module DFPSqrt96(rst, clk, ce, ld, a, o, done, sqrinf, sqrneg);
parameter N=25;
localparam pShiftAmt =
FPWID==80 ? 48 :
FPWID==64 ? 36 :
FPWID==32 ? 7 : (FMSB+1-16);
input rst;
input clk;
input ce;
input ld;
input DFP96 a;
output DFP96UD o;
output done;
output sqrinf;
output sqrneg;
 
// registered outputs
reg sign_exe;
reg inf;
reg overflow;
reg underflow;
 
wire so;
wire [13:0] xo;
wire [(N+1)*4*2-1:0] mo;
 
// constants
wire [13:0] infXp = 12'hBFF; // infinite / NaN - all ones
// The following is a template for a quiet nan. (MSB=1)
wire [N*4-1:0] qNaN = {4'h1,{N*4-4{1'b0}}};
 
// variables
wire [13:0] ex1; // sum of exponents
wire ex1c;
wire [(N+1)*4*2-1:0] sqrto;
 
// Operands
reg sa; // sign bit
reg [11:0] xa; // exponent bits
reg [N*4-1:0] siga;
reg a_dn; // a/b is denormalized
reg az;
reg aInf;
reg aNan;
wire done1;
wire [7:0] lzcnt;
wire [N*4-1:0] aa;
DFP96U au;
 
// -----------------------------------------------------------
// - decode the input operand
// - derive basic information
// - calculate exponent
// - calculate fraction
// -----------------------------------------------------------
 
DFPUnpack96 u01 (a, au);
always @(posedge clk)
if (ce) sa <= au.sign;
always @(posedge clk)
if (ce) xa <= au.exp;
always @(posedge clk)
if (ce) siga <= au.sig;
always @(posedge clk)
if (ce) a_dn <= au.exp==12'd0;
always @(posedge clk)
if (ce) az <= au.exp==12'd0 && au.sig==100'd0;
always @(posedge clk)
if (ce) aInf <= au.infinity;
always @(posedge clk)
if (ce) aNan <= au.nan;
 
assign ex1 = xa + 1'd1;
assign xo = ex1 >> 1'd1;
 
assign so = 1'b0; // square root of positive numbers only
assign mo = aNan ? {4'h1,aa[N*4-1:0],{N*4{1'b0}}} : sqrto; //(sqrto << pShiftAmt);
assign sqrinf = aInf;
assign sqrneg = !az & so;
 
wire [(N+1)*4-1:0] siga1 = xa[0] ? {siga,4'h0} : {4'h0,siga};
 
wire ldd;
delay1 #(1) u3 (.clk(clk), .ce(ce), .i(ld), .o(ldd));
 
// Ensure an even number of digits are processed.
dfisqrt #((N+2)&-2) u2
(
.rst(rst),
.clk(clk),
.ce(ce),
.ld(ldd),
.a({4'h0,siga1}),
.o(sqrto),
.done(done)
);
 
always @*
casez({aNan,sqrinf,sqrneg})
3'b1??:
begin
o.sign <= sign;
o.nan <= 1'b1;
o.exp <= 12'hBFF;
o.sig <= {siga,{N*4-4{1'b0}}};
end
3'b01?:
begin
o.sign <= sign;
o.nan <= 1'b1;
o.exp <= 12'hBFF;
o.sig <= {4'h1,qNaN|4'h5,{N*4-4{1'b0}}};
end
3'b001:
begin
o.sign <= sign;
o.nan <= 1'b1;
o.exp <= 12'hBFF;
o.sig <= {4'h1,qNaN|4'h6,{N*4-4{1'b0}}};
end
default:
begin
o.sign <= 1'b0;
o.nan <= 1'b0;
o.exp <= xo;
o.sig <= mo;
end
endcase
 
endmodule
 
module DFPSqrt96nr(rst, clk, ce, ld, a, o, rm, done, inf, sqrinf, sqrneg);
parameter N=25;
input rst;
input clk;
input ce;
input ld;
input DFP96 a;
output DFP96 o;
input [2:0] rm;
output done;
output inf;
output sqrinf;
output sqrneg;
 
wire DFP96UD o1;
wire inf1;
wire DFP96UN fpn0;
wire done1;
wire done2;
 
DFPSqrt96 #(.N(N)) u1 (rst, clk, ce, ld, a, o1, done1, sqrinf, sqrneg);
DFPNormalize96 #(.N(N)) u2(.clk(clk), .ce(ce), .under_i(1'b0), .i(o1), .o(fpn0) );
DFPRound96 #(.N(N)) u3(.clk(clk), .ce(ce), .rm(rm), .i(fpn0), .o(o) );
delay2 #(1) u5(.clk(clk), .ce(ce), .i(inf1), .o(inf));
delay2 #(1) u8(.clk(clk), .ce(ce), .i(done1), .o(done2));
assign done = done1&done2;
 
endmodule
/verilog2/DFPUnpack.sv
1,6 → 1,6
// ============================================================================
// __
// \\__/ o\ (C) 2020-2021 Robert Finch, Waterloo
// \\__/ o\ (C) 2020-2022 Robert Finch, Waterloo
// \ __ / All rights reserved.
// \/_// robfinch<remove>@finitron.ca
// ||
51,6 → 51,21
assign o.sig[135:132] = i.combo[4:3]==2'b11 ? {3'b100,i.combo[0]} : {1'b0,i.combo[2:0]};
endmodule
 
module DFPUnpack96(i, o);
input DFP96 i;
output DFP96U o;
 
assign o.sign = i.sign;
assign o.exp = {i.combo[4:3]==2'b11 ? i.combo[2:1] : i.combo[4:3],i.expc};
assign o.nan = i.combo==5'b11111;
assign o.qnan = i.combo==5'b11111 && i.expc[9]==1'b0;
assign o.snan = i.combo==5'b11111 && i.expc[9]==1'b1;
assign o.infinity = i.combo==5'b11110;
DPDDecodeN #(.N(8)) u1 (i.sigc, o.sig[95:0]);
assign o.sig[99:96] = i.combo[4:3]==2'b11 ? {3'b100,i.combo[0]} : {1'b0,i.combo[2:0]};
 
endmodule
 
module DFPUnpack64(i, o);
input DFP64 i;
output DFP64U o;
/verilog2/df96Toi.sv
0,0 → 1,110
// ============================================================================
// __
// \\__/ o\ (C) 2022 Robert Finch, Waterloo
// \ __ / All rights reserved.
// \/_// robfinch<remove>@finitron.ca
// ||
//
// df128Toi.sv
// - convert decimal floating point to integer
//
// BSD 3-Clause License
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// 3. Neither the name of the copyright holder nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// ============================================================================
 
import DFPPkg::*;
 
module df96Toi (rst, clk, ce, ld, op, i, o, overflow, done);
input rst;
input clk;
input ce;
input ld;
input op; // 1 = signed, 0 = unsigned
input [95:0] i; // float input
output [95:0] o; // integer output
output overflow;
output done;
 
wire done1;
reg done2;
assign done = done1 & done2;
 
wire [95:0] sig;
 
DFP96U ui;
DFPUnpack96 uunpk1 (i, ui);
 
wire [95:0] maxInt = op ? {1'd0,{95{1'b1}}} : {96{1'b1}}; // maximum integer value
wire [11:0] zeroXp = 12'h5FF;
 
reg sgn; // sign
always_ff @(posedge clk)
if (ce) sgn = ui.sign;
wire [11:0] exp = ui.exp; // exponent
 
wire iz = i[94:0]==0; // zero value (special)
 
wire [12:0] ovx = exp - zeroXp;
assign overflow = ovx > 23 && !ovx[12]; // lots of numbers are too big - don't forget one less bit is available due to signed values
wire underflow = exp < zeroXp - 2'd1; // value less than 1/2
 
wire [7:0] shamt = 8'd128 - {(exp - zeroXp),2'd0}; // exp - zeroXp will be <= MSB
 
wire [128:0] o1 = {ui.sig,33'b0} >> shamt; // keep an extra bit for rounding
wire [95:0] o2; // round up
reg [95:0] o3;
 
DDBCDToBin #(.WID(96)) ub2b1
(
.rst(rst),
.clk(clk),
.ld(ld),
.bcd({o1[128:1]+o1[0]}),
.bin(o2),
.done(done1)
);
 
 
always @(posedge clk)
if (ce) begin
if (underflow|iz)
o3 <='d0;
else if (overflow)
o3 <= maxInt;
// value between 1/2 and 1 - round up
else if (exp==zeroXp-1)
o3 <= 96'd1;
// value > 1
else
o3 <= o2;
end
always @(posedge clk)
if (ce) done2 <= done1;
assign o = (op & sgn) ? -o3 : o3; // adjust output for correct signed value
 
endmodule
/verilog2/i2df96.sv
0,0 → 1,144
// ============================================================================
// __
// \\__/ o\ (C) 2022 Robert Finch, Waterloo
// \ __ / All rights reserved.
// \/_// robfinch<remove>@finitron.ca
// ||
//
// i2df128.sv
// - convert integer to decimal floating point
//
// BSD 3-Clause License
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// 3. Neither the name of the copyright holder nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// ============================================================================
 
import DFPPkg::*;
 
module i2df96 (rst, clk, ce, ld, op, rm, i, o, done);
parameter FPWID=96;
input rst;
input clk;
input ce;
input ld;
input op; // 1 = signed, 0 = unsigned
input [2:0] rm; // rounding mode
input [95:0] i; // integer input
output [95:0] o; // float output
output done;
 
wire [95:0] i1 = (op & i[95]) ? -i : i;
wire [127:0] bcd;
wire done1, done2;
assign done = done1 & done2;
 
DDBinToBCD #(.WID(96)) ub2b1
(
.rst(rst),
.clk(clk),
.ld(ld),
.bin(i1),
.bcd(bcd),
.done(done1)
);
 
DFP96U ui;
wire [11:0] zeroXp = 12'h5FF;
 
reg iz; // zero input ?
wire [7:0] lz; // count the leading zeros in the number
reg [7:0] lz4; // leading zero rounded to multiple of four
wire [13:0] wd; // compute number of whole digits
reg so; // copy the sign of the input (easy)
reg [2:0] rmd;
wire [127:0] bcd1;
reg [99:0] simag;
 
always_ff @(posedge clk)
rmd <= rm;
always_ff @(posedge clk)
iz <= i==0;
always_ff @(posedge clk)
so <= i[95];
 
delay1 #(128) u2 (.clk(clk), .ce(ce), .i(bcd), .o(bcd1) );
cntlz128Reg u4 (.clk(clk), .ce(ce), .i(bcd), .o(lz) );
 
always_comb
lz4 = lz >> 2'd2;
 
assign wd = zeroXp - 8'd1 + 8'd25 - lz4 + 8'd9; // constant except for lz
 
reg [11:0] xo;
 
always_ff @(posedge clk)
xo <= iz ? 'd0 : wd;
 
// left align number
// The number may to too large to represent entirely precisely in which case a
// right shift is required. There are only about 114 bits of precision, but the
// incoming number is allowed to be 128-bit.
// Rounding is required only when the number needs to be right-shifted.
 
always_ff @(posedge clk)
if (lz4 < 8'd9)
simag = bcd1 >> {8'd9 - lz4,2'd0};
else
simag = bcd1 << {lz4 - 8'd9,2'd0};
 
wire g = bcd1[{8'd9 - lz4,2'd0}]; // guard bit (lsb)
wire r = bcd1[{8'd9 - lz4,2'd0}-1]; // rounding bit
wire s = |(bcd1 & (128'd1 << {8'd9 - lz4,2'd0}-2) - 2'd1); // "sticky" bit
reg rnd;
 
// Compute the round bit
always_ff @(posedge clk)
if (lz4 < 8'd9)
case (rmd)
3'd0: rnd = (g & r) | (r & s); // round to nearest even
3'd1: rnd = 0; // round to zero (truncate)
3'd2: rnd = (r | s) & !so; // round towards +infinity
3'd3: rnd = (r | s) & so; // round towards -infinity
3'd4: rnd = (r | s);
default: rnd = (g & r) | (r & s); // round to nearest even
endcase
else
rnd = 1'b0;
// round the result
assign ui.sig = simag[99:0] + rnd;
assign ui.exp = xo[11:0];
assign ui.sign = op & so;
assign ui.nan = 1'b0;
assign ui.qnan = 1'b0;
assign ui.snan = 1'b0;
assign ui.infinity = 1'b0;
 
DFPPack96 upk1 (ui, o);
 
ft_delay #(.WID(1), .DEP(4)) udly1 (.clk(clk), .ce(1'b1), .i(done1), .o(done2));
 
endmodule

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.