OpenCores
URL https://opencores.org/ocsvn/ft816float/ft816float/trunk

Subversion Repositories ft816float

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /ft816float/trunk/rtl/verilog2
    from Rev 35 to Rev 48
    Reverse comparison

Rev 35 → Rev 48

/README.md
1,5 → 1,8
# Verilog2
 
This directory is a newer version of the cores with the 'WID' parameter renamed to 'FPWID' to avoid conflicts with other modules.
Also experimental and not completely implemented is the 'EXTRA_BITS' definition. EXTRA_BITS defines the number of extra precision bits to maintain for a given precision. Setting this to zero should generate the usual cores. It's sometimes desirable to maintain extra precision bits in registers which are trimmed off when a transfer to memory occurs. The EXTRA_BITS definition must be a multiple of four.
"EXTRA_BITS" was removed.
 
There are two versions of the cores one in verilog the other in SystemVerilog. They can be distinguished by the filename extension. Include one or the other in a project as they are using the same module names. However, the verilog versions are not likely to be updated in the future.
The SystemVerilog versions of the cores import the fp package rather than using fpConfig and fpSize. It is a little cleaner.
 
/f2i.sv
0,0 → 1,81
// ============================================================================
// __
// \\__/ o\ (C) 2006-2020 Robert Finch, Waterloo
// \ __ / All rights reserved.
// \/_// robfinch<remove>@finitron.ca
// ||
//
// f2i.v
// - convert floating point to integer
// - single cycle latency floating point unit
// - parameterized width
// - IEEE 754 representation
//
//
// This source file is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published
// by the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This source file is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//
// i2f - convert integer to floating point
// f2i - convert floating point to integer
//
// ============================================================================
 
import fp::*;
 
module f2i(clk, ce, op, i, o, overflow);
input clk;
input ce;
input op; // 1 = signed, 0 = unsigned
input [MSB:0] i;
output [MSB:0] o;
output overflow;
 
wire [MSB:0] maxInt = op ? {MSB{1'b1}} : {FPWID{1'b1}}; // maximum integer value
wire [EMSB:0] zeroXp = {EMSB{1'b1}}; // simple constant - value of exp for zero
 
// Decompose fp value
reg sgn; // sign
always @(posedge clk)
if (ce) sgn = i[MSB];
wire [EMSB:0] exp = i[MSB-1:FMSB+1]; // exponent
wire [FMSB+1:0] man = {exp!=0,i[FMSB:0]}; // mantissa including recreate hidden bit
 
wire iz = i[MSB-1:0]==0; // zero value (special)
 
assign overflow = exp - zeroXp > (op ? MSB : FPWID); // lots of numbers are too big - don't forget one less bit is available due to signed values
wire underflow = exp < zeroXp - 1; // value less than 1/2
 
wire [7:0] shamt = (op ? MSB : FPWID) - (exp - zeroXp); // exp - zeroXp will be <= MSB
 
wire [MSB+1:0] o1 = {man,{EMSB+1{1'b0}},1'b0} >> shamt; // keep an extra bit for rounding
wire [MSB:0] o2 = o1[MSB+1:1] + o1[0]; // round up
reg [MSB:0] o3;
 
always @(posedge clk)
if (ce) begin
if (underflow|iz)
o3 <= 0;
else if (overflow)
o3 <= maxInt;
// value between 1/2 and 1 - round up
else if (exp==zeroXp-1)
o3 <= 1;
// value > 1
else
o3 <= o2;
end
assign o = (op & sgn) ? -o3 : o3; // adjust output for correct signed value
 
endmodule
 
/fp.sv
0,0 → 1,112
// ============================================================================
// __
// \\__/ o\ (C) 2020 Robert Finch, Waterloo
// \ __ / All rights reserved.
// \/_// robfinch<remove>@finitron.ca
// ||
//
// fp.sv
//
// This source file is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published
// by the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This source file is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//
// ============================================================================
//
package fp;
 
`define QINFOS 23'h7FC000 // info
`define QSUBINF 4'd1
`define QINFDIV 4'd2
`define QZEROZERO 4'd3
`define QINFZERO 4'd4
`define QSQRTINF 4'd5
`define QSQRTNEG 4'd6
 
`define QSUBINFS 31'h7FC00001 // - infinity - infinity
`define QINFDIVS 31'h7FC00002 // - infinity / infinity
`define QZEROZEROS 31'h7FC00003 // - zero / zero
`define QINFZEROS 31'h7FC00004 // - infinity X zero
`define QSQRTINFS 31'h7FC00005 // - square root of infinity
`define QSQRTNEGS 31'h7FC00006 // - square root of negaitve number
 
`define QINFOD 52'hFF80000000000 // info
`define QSUBINFD 63'h7FF0000000000001 // - infinity - infinity
`define QINFDIVD 63'h7FF0000000000002 // - infinity / infinity
`define QZEROZEROD 63'h7FF0000000000003 // - zero / zero
`define QINFZEROD 63'h7FF0000000000004 // - infinity X zero
`define QSQRTINFD 63'h7FF0000000000005 // - square root of infinity
`define QSQRTNEGD 63'h7FF0000000000006 // - square root of negaitve number
 
`define QINFODX 64'hFF800000_00000000 // info
`define QSUBINFDX 79'h7FFF000000_0000000001 // - infinity - infinity
`define QINFDIVDX 79'h7FFF000000_0000000002 // - infinity / infinity
`define QZEROZERODX 79'h7FFF000000_0000000003 // - zero / zero
`define QINFZERODX 79'h7FFF000000_0000000004 // - infinity X zero
`define QSQRTINFDX 79'h7FFF000000_0000000005 // - square root of infinity
`define QSQRTNEGDX 79'h7FFF000000_0000000006 // - square root of negaitve number
 
`define QINFOQ 112'hFF800000_0000000000_0000000000 // info
`define QSUBINFQ 127'h7F_FF00000000_0000000000_0000000001 // - infinity - infinity
`define QINFDIVQ 127'h7F_FF00000000_0000000000_0000000002 // - infinity / infinity
`define QZEROZEROQ 127'h7F_FF00000000_0000000000_0000000003 // - zero / zero
`define QINFZEROQ 127'h7F_FF00000000_0000000000_0000000004 // - infinity X zero
`define QSQRTINFQ 127'h7F_FF00000000_0000000000_0000000005 // - square root of infinity
`define QSQRTNEGQ 127'h7F_FF00000000_0000000000_0000000006 // - square root of negaitve number
 
`define POINT5S 32'h3F000000
`define POINT5SX 40'h3F80000000
`define POINT5D 64'h3FE0000000000000
`define POINT5DX 80'h3FFE0000000000000000
`define ZEROS 32'h00000000
`define ZEROSX 40'h0000000000
`define ZEROD 64'h0000000000000000
`define ZERODX 80'h00000000000000000000
 
`define AIN 3'd0
`define BIN 3'd1
`define CIN 3'd2
`define RES 3'd3
`define POINT5 3'd4
`define ZERO 3'd5
 
`define MIN_LATENCY 1'b1
parameter FPWID = 64;
// This file contains defintions for fields to ease dealing with different fp
// widths. Some of the code still needs to be modified to support widths
// other than standard 32,64 or 80 bit.
localparam MSB = FPWID-1;
localparam EMSB = FPWID==128 ? 14 :
FPWID==96 ? 14 :
FPWID==80 ? 14 :
FPWID==64 ? 10 :
FPWID==52 ? 10 :
FPWID==48 ? 10 :
FPWID==44 ? 10 :
FPWID==42 ? 10 :
FPWID==40 ? 9 :
FPWID==32 ? 7 :
FPWID==24 ? 6 : 4;
localparam FMSB = FPWID==128 ? (111) :
FPWID==96 ? (79) :
FPWID==80 ? (63) :
FPWID==64 ? (51) :
FPWID==52 ? (39) :
FPWID==48 ? (35) :
FPWID==44 ? (31) :
FPWID==42 ? (29) :
FPWID==40 ? (28) :
FPWID==32 ? (22) :
FPWID==24 ? (15) : (9);
localparam FX = (FMSB+2)*2; // the MSB of the expanded fraction
localparam EX = FX + 1 + EMSB + 1 + 1 - 1;
endpackage
/fpAddsub.sv
0,0 → 1,217
// ============================================================================
// __
// \\__/ o\ (C) 2006-2020 Robert Finch, Waterloo
// \ __ / All rights reserved.
// \/_// robfinch<remove>@finitron.ca
// ||
//
// fpAddsub.sv
// - floating point adder/subtracter
// - two cycle latency
// - can issue every clock cycle
// - parameterized width
// - IEEE 754 representation
//
//
// This source file is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published
// by the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This source file is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//
// ============================================================================
 
import fp::*;
 
module fpAddsub(clk, ce, rm, op, a, b, o);
input clk; // system clock
input ce; // core clock enable
input [2:0] rm; // rounding mode
input op; // operation 0 = add, 1 = subtract
input [MSB:0] a; // operand a
input [MSB:0] b; // operand b
output [EX:0] o; // output
 
 
// variables
wire so; // sign output
wire [EMSB:0] xo; // de normalized exponent output
reg [EMSB:0] xo1; // de normalized exponent output
wire [FX:0] mo; // mantissa output
reg [FX:0] mo1; // mantissa output
 
assign o = {so,xo,mo};
 
// operands sign,exponent,mantissa
wire sa, sb;
wire [EMSB:0] xa, xb;
wire [FMSB:0] ma, mb;
wire [FMSB+1:0] fracta, fractb;
wire [FMSB+1:0] fracta1, fractb1;
 
// which has greater magnitude ? Used for sign calc
wire xa_gt_xb = xa > xb;
wire xa_gt_xb1;
wire a_gt_b = xa_gt_xb || (xa==xb && ma > mb);
wire a_gt_b1;
wire az, bz; // operand a,b is zero
 
wire adn, bdn; // a,b denormalized ?
wire xaInf, xbInf;
wire aInf, bInf, aInf1, bInf1;
wire aNan, bNan, aNan1, bNan1;
 
wire [EMSB:0] xad = xa|adn; // operand a exponent, compensated for denormalized numbers
wire [EMSB:0] xbd = xb|bdn; // operand b exponent, compensated for denormalized numbers
 
fpDecomp u1a (.i(a), .sgn(sa), .exp(xa), .man(ma), .fract(fracta), .xz(adn), .vz(az), .xinf(xaInf), .inf(aInf), .nan(aNan) );
fpDecomp u1b (.i(b), .sgn(sb), .exp(xb), .man(mb), .fract(fractb), .xz(bdn), .vz(bz), .xinf(xbInf), .inf(bInf), .nan(bNan) );
 
// Figure out which operation is really needed an add or
// subtract ?
// If the signs are the same, use the orignal op,
// otherwise flip the operation
// a + b = add,+
// a + -b = sub, so of larger
// -a + b = sub, so of larger
// -a + -b = add,-
// a - b = sub, so of larger
// a - -b = add,+
// -a - b = add,-
// -a - -b = sub, so of larger
wire realOp = op ^ sa ^ sb;
wire realOp1;
wire op1;
 
// Find out if the result will be zero.
wire resZero = (realOp && xa==xb && ma==mb) || // subtract, same magnitude
(az & bz); // both a,b zero
 
// Compute output exponent
//
// The output exponent is the larger of the two exponents,
// unless a subtract operation is in progress and the two
// numbers are equal, in which case the exponent should be
// zero.
 
always @(xaInf,xbInf,resZero,xa,xb,xa_gt_xb)
xo1 = (xaInf&xbInf) ? xa : resZero ? 0 : xa_gt_xb ? xa : xb;
 
// Compute output sign
reg so1;
always @*
case ({resZero,sa,op,sb}) // synopsys full_case parallel_case
4'b0000: so1 <= 0; // + + + = +
4'b0001: so1 <= !a_gt_b; // + + - = sign of larger
4'b0010: so1 <= !a_gt_b; // + - + = sign of larger
4'b0011: so1 <= 0; // + - - = +
4'b0100: so1 <= a_gt_b; // - + + = sign of larger
4'b0101: so1 <= 1; // - + - = -
4'b0110: so1 <= 1; // - - + = -
4'b0111: so1 <= a_gt_b; // - - - = sign of larger
4'b1000: so1 <= 0; // A + B, sign = +
4'b1001: so1 <= rm==3; // A + -B, sign = + unless rounding down
4'b1010: so1 <= rm==3; // A - B, sign = + unless rounding down
4'b1011: so1 <= 0; // +A - -B, sign = +
4'b1100: so1 <= rm==3; // -A + B, sign = + unless rounding down
4'b1101: so1 <= 1; // -A + -B, sign = -
4'b1110: so1 <= 1; // -A - +B, sign = -
4'b1111: so1 <= rm==3; // -A - -B, sign = + unless rounding down
endcase
 
delay2 #(EMSB+1) d1(.clk(clk), .ce(ce), .i(xo1), .o(xo) );
delay2 #(1) d2(.clk(clk), .ce(ce), .i(so1), .o(so) );
 
// Compute the difference in exponents, provides shift amount
wire [EMSB:0] xdiff = xa_gt_xb ? xad - xbd : xbd - xad;
wire [6:0] xdif = xdiff > FMSB+3 ? FMSB+3 : xdiff;
wire [6:0] xdif1;
 
// determine which fraction to denormalize
wire [FMSB+1:0] mfs = xa_gt_xb ? fractb : fracta;
wire [FMSB+1:0] mfs1;
 
// Determine the sticky bit
wire sticky, sticky1;
generate
begin
if (FPWID==128)
redor128 u1 (.a(xdif), .b({mfs,2'b0}), .o(sticky) );
else if (FPWID==96)
redor96 u1 (.a(xdif), .b({mfs,2'b0}), .o(sticky) );
else if (FPWID==84)
redor84 u1 (.a(xdif), .b({mfs,2'b0}), .o(sticky) );
else if (FPWID==80)
redor80 u1 (.a(xdif), .b({mfs,2'b0}), .o(sticky) );
else if (FPWID==64)
redor64 u1 (.a(xdif), .b({mfs,2'b0}), .o(sticky) );
else if (FPWID==32)
redor32 u1 (.a(xdif), .b({mfs,2'b0}), .o(sticky) );
end
endgenerate
 
// register inputs to shifter and shift
delay1 #(1) d16(.clk(clk), .ce(ce), .i(sticky), .o(sticky1) );
delay1 #(7) d15(.clk(clk), .ce(ce), .i(xdif), .o(xdif1) );
delay1 #(FMSB+2) d14(.clk(clk), .ce(ce), .i(mfs), .o(mfs1) );
 
wire [FMSB+3:0] md1 = ({mfs1,2'b0} >> xdif1)|sticky1;
 
// sync control signals
delay1 #(1) d4 (.clk(clk), .ce(ce), .i(xa_gt_xb), .o(xa_gt_xb1) );
delay1 #(1) d17(.clk(clk), .ce(ce), .i(a_gt_b), .o(a_gt_b1) );
delay1 #(1) d5 (.clk(clk), .ce(ce), .i(realOp), .o(realOp1) );
delay1 #(FMSB+2) d5a(.clk(clk), .ce(ce), .i(fracta), .o(fracta1) );
delay1 #(FMSB+2) d6a(.clk(clk), .ce(ce), .i(fractb), .o(fractb1) );
delay1 #(1) d7 (.clk(clk), .ce(ce), .i(aInf), .o(aInf1) );
delay1 #(1) d8 (.clk(clk), .ce(ce), .i(bInf), .o(bInf1) );
delay1 #(1) d9 (.clk(clk), .ce(ce), .i(aNan), .o(aNan1) );
delay1 #(1) d10(.clk(clk), .ce(ce), .i(bNan), .o(bNan1) );
delay1 #(1) d11(.clk(clk), .ce(ce), .i(op), .o(op1) );
 
// Sort operands and perform add/subtract
// addition can generate an extra bit, subtract can't go negative
wire [FMSB+3:0] oa = xa_gt_xb1 ? {fracta1,2'b0} : md1;
wire [FMSB+3:0] ob = xa_gt_xb1 ? md1 : {fractb1,2'b0};
wire [FMSB+3:0] oaa = a_gt_b1 ? oa : ob;
wire [FMSB+3:0] obb = a_gt_b1 ? ob : oa;
wire [FMSB+4:0] mab = realOp1 ? oaa - obb : oaa + obb;
wire xoinf = &xo;
 
always @*
casez({aInf1&bInf1,aNan1,bNan1,xoinf})
4'b1???: mo1 = {1'b0,op1,{FMSB-1{1'b0}},op1,{FMSB{1'b0}}}; // inf +/- inf - generate QNaN on subtract, inf on add
4'b01??: mo1 = {1'b0,fracta1[FMSB+1:0],{FMSB{1'b0}}};
4'b001?: mo1 = {1'b0,fractb1[FMSB+1:0],{FMSB{1'b0}}};
4'b0001: mo1 = 1'd0;
default: mo1 = {mab,{FMSB-1{1'b0}}}; // mab has an extra lead bit and two trailing bits
endcase
 
delay1 #(FX+1) d3(.clk(clk), .ce(ce), .i(mo1), .o(mo) );
 
endmodule
 
module fpAddsubnr(clk, ce, rm, op, a, b, o);
input clk; // system clock
input ce; // core clock enable
input [2:0] rm; // rounding mode
input op; // operation 0 = add, 1 = subtract
input [MSB:0] a; // operand a
input [MSB:0] b; // operand b
output [MSB:0] o; // output
 
wire [EX:0] o1;
wire [MSB+3:0] fpn0;
 
fpAddsub #(FPWID) u1 (clk, ce, rm, op, a, b, o1);
fpNormalize #(FPWID) u2(.clk(clk), .ce(ce), .under_i(1'b0), .i(o1), .o(fpn0) );
fpRound #(FPWID) u3(.clk(clk), .ce(ce), .rm(rm), .i(fpn0), .o(o) );
 
endmodule
/fpCompare.sv
1,6 → 1,6
// ============================================================================
// __
// \\__/ o\ (C) 2007-2019 Robert Finch, Waterloo
// \\__/ o\ (C) 2007-2020 Robert Finch, Waterloo
// \ __ / All rights reserved.
// \/_// robfinch<remove>@finitron.ca
// ||
7,7 → 7,7
//
// fpCompare.sv
// - floating point comparison unit
// - parameterized FPWIDth
// - parameterized width
// - IEEE 754 representation
//
//
26,16 → 26,14
//
// ============================================================================
 
`include "fpConfig.sv"
import fp::*;
 
module fpCompare(a, b, o, nanx);
parameter FPWID = 32;
`include "fpSize.sv"
 
input [MSB:0] a, b;
module fpCompare(a, b, o, nan, snan);
input [FPWID-1:0] a, b;
output [4:0] o;
reg [4:0] o;
output nanx;
output nan;
output snan;
 
// Decompose the operands
wire sa;
47,8 → 45,8
wire az, bz;
wire nan_a, nan_b;
 
fpDecomp #(FPWID) u1(.i(a), .sgn(sa), .exp(xa), .man(ma), .vz(az), .qnan(), .snan(), .nan(nan_a) );
fpDecomp #(FPWID) u2(.i(b), .sgn(sb), .exp(xb), .man(mb), .vz(bz), .qnan(), .snan(), .nan(nan_b) );
fpDecomp u1(.i(a), .sgn(sa), .exp(xa), .man(ma), .vz(az), .qnan(), .snan(), .nan(nan_a) );
fpDecomp u2(.i(b), .sgn(sb), .exp(xb), .man(mb), .vz(bz), .qnan(), .snan(), .nan(nan_b) );
 
wire unordered = nan_a | nan_b;
 
69,6 → 67,7
 
// an unorder comparison will signal a nan exception
//assign nanx = op!=`FCOR && op!=`FCUN && unordered;
assign nanx = 1'b0;
assign nan = nan_a|nan_b;
assign snan = (nan_a & ~ma[FMSB]) | (nan_b & ~mb[FMSB]);
 
endmodule
/fpDecompReg.sv
0,0 → 1,99
// ============================================================================
// __
// \\__/ o\ (C) 2006-2019 Robert Finch, Waterloo
// \ __ / All rights reserved.
// \/_// robfinch<remove>@finitron.ca
// ||
//
// fpDecompReg.v
// - decompose floating point value with registered outputs
// - parameterized width
//
//
// This source file is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published
// by the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This source file is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//
// ============================================================================
 
import fp::*;
 
module fpDecomp(i, sgn, exp, man, fract, xz, mz, vz, inf, xinf, qnan, snan, nan);
input [MSB:0] i;
output sgn;
output [EMSB:0] exp;
output [FMSB:0] man;
output [FMSB+1:0] fract; // mantissa with hidden bit recovered
output xz; // denormalized - exponent is zero
output mz; // mantissa is zero
output vz; // value is zero (both exponent and mantissa are zero)
output inf; // all ones exponent, zero mantissa
output xinf; // all ones exponent
output qnan; // nan
output snan; // signalling nan
output nan;
 
// Decompose input
assign sgn = i[MSB];
assign exp = i[MSB-1:FMSB+1];
assign man = i[FMSB:0];
assign xz = !(|exp); // denormalized - exponent is zero
assign mz = !(|man); // mantissa is zero
assign vz = xz & mz; // value is zero (both exponent and mantissa are zero)
assign inf = &exp & mz; // all ones exponent, zero mantissa
assign xinf = &exp;
assign qnan = &exp & man[FMSB];
assign snan = &exp & !man[FMSB] & !mz;
assign nan = &exp & !mz;
assign fract = {!xz,i[FMSB:0]};
 
endmodule
 
 
module fpDecompReg(clk, ce, i, o, sgn, exp, man, fract, xz, mz, vz, inf, xinf, qnan, snan, nan);
input clk;
input ce;
input [MSB:0] i;
 
output reg [MSB:0] o;
output reg sgn;
output reg [EMSB:0] exp;
output reg [FMSB:0] man;
output reg [FMSB+1:0] fract; // mantissa with hidden bit recovered
output reg xz; // denormalized - exponent is zero
output reg mz; // mantissa is zero
output reg vz; // value is zero (both exponent and mantissa are zero)
output reg inf; // all ones exponent, zero mantissa
output reg xinf; // all ones exponent
output reg qnan; // nan
output reg snan; // signalling nan
output reg nan;
 
// Decompose input
always @(posedge clk)
if (ce) begin
o <= i;
sgn = i[MSB];
exp = i[MSB-1:FMSB+1];
man = i[FMSB:0];
xz = !(|exp); // denormalized - exponent is zero
mz = !(|man); // mantissa is zero
vz = xz & mz; // value is zero (both exponent and mantissa are zero)
inf = &exp & mz; // all ones exponent, zero mantissa
xinf = &exp;
qnan = &exp & man[FMSB];
snan = &exp & !man[FMSB] & !mz;
nan = &exp & !mz;
fract = {|exp,i[FMSB:0]};
end
 
endmodule
/fpDivide.sv
0,0 → 1,224
// ============================================================================
// __
// \\__/ o\ (C) 2006-2019 Robert Finch, Waterloo
// \ __ / All rights reserved.
// \/_// robfinch<remove>@finitron.ca
// ||
//
// fpDiv.v
// - floating point divider
// - parameterized width
// - IEEE 754 representation
//
//
// This source file is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published
// by the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This source file is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//
// Floating Point Multiplier / Divider
//
//Properties:
//+-inf * +-inf = -+inf (this is handled by exOver)
//+-inf * 0 = QNaN
//+-0 / +-0 = QNaN
// ============================================================================
 
import fp::*;
//`define GOLDSCHMIDT 1'b1
 
module fpDivide(rst, clk, clk4x, ce, ld, op, a, b, o, done, sign_exe, overflow, underflow);
// FADD is a constant that makes the divider width a multiple of four and includes eight extra bits.
localparam FADD = FPWID==128 ? 9 :
FPWID==96 ? 9 :
FPWID==84 ? 9 :
FPWID==80 ? 9 :
FPWID==64 ? 13 :
FPWID==52 ? 9 :
FPWID==48 ? 10 :
FPWID==44 ? 9 :
FPWID==42 ? 11 :
FPWID==40 ? 8 :
FPWID==32 ? 10 :
FPWID==24 ? 9 : 11;
input rst;
input clk;
input clk4x;
input ce;
input ld;
input op;
input [MSB:0] a, b;
output [EX:0] o;
output done;
output sign_exe;
output overflow;
output underflow;
 
// registered outputs
reg sign_exe=0;
reg inf=0;
reg overflow=0;
reg underflow=0;
 
reg so;
reg [EMSB:0] xo;
reg [FX:0] mo;
assign o = {so,xo,mo};
 
// constants
wire [EMSB:0] infXp = {EMSB+1{1'b1}}; // infinite / NaN - all ones
// The following is the value for an exponent of zero, with the offset
// eg. 8'h7f for eight bit exponent, 11'h7ff for eleven bit exponent, etc.
wire [EMSB:0] bias = {1'b0,{EMSB{1'b1}}}; //2^0 exponent
// The following is a template for a quiet nan. (MSB=1)
wire [FMSB:0] qNaN = {1'b1,{FMSB{1'b0}}};
 
// variables
wire [EMSB+2:0] ex1; // sum of exponents
`ifndef GOLDSCHMIDT
wire [(FMSB+FADD)*2-1:0] divo;
`else
wire [(FMSB+5)*2-1:0] divo;
`endif
 
// Operands
wire sa, sb; // sign bit
wire [EMSB:0] xa, xb; // exponent bits
wire [FMSB+1:0] fracta, fractb;
wire a_dn, b_dn; // a/b is denormalized
wire az, bz;
wire aInf, bInf;
wire aNan,bNan;
wire done1;
wire signed [7:0] lzcnt;
 
// -----------------------------------------------------------
// - decode the input operands
// - derive basic information
// - calculate exponent
// - calculate fraction
// -----------------------------------------------------------
 
fpDecomp u1a (.i(a), .sgn(sa), .exp(xa), .fract(fracta), .xz(a_dn), .vz(az), .inf(aInf), .nan(aNan) );
fpDecomp u1b (.i(b), .sgn(sb), .exp(xb), .fract(fractb), .xz(b_dn), .vz(bz), .inf(bInf), .nan(bNan) );
 
// Compute the exponent.
// - correct the exponent for denormalized operands
// - adjust the difference by the bias (add 127)
// - also factor in the different decimal position for division
`ifndef GOLDSCHMIDT
assign ex1 = (xa|a_dn) - (xb|b_dn) + bias + FMSB + (FADD-1) - lzcnt - 8'd1;
`else
assign ex1 = (xa|a_dn) - (xb|b_dn) + bias + FMSB - lzcnt + 8'd4;
`endif
 
// check for exponent underflow/overflow
wire under = ex1[EMSB+2]; // MSB set = negative exponent
wire over = (&ex1[EMSB:0] | ex1[EMSB+1]) & !ex1[EMSB+2];
 
// Perform divide
// Divider width must be a multiple of four
`ifndef GOLDSCHMIDT
fpdivr16 #(FMSB+FADD) u2 (.clk(clk), .ld(ld), .a({3'b0,fracta,8'b0}), .b({3'b0,fractb,8'b0}), .q(divo), .r(), .done(done1), .lzcnt(lzcnt));
//fpdivr2 #(FMSB+FADD) u2 (.clk4x(clk4x), .ld(ld), .a({3'b0,fracta,8'b0}), .b({3'b0,fractb,8'b0}), .q(divo), .r(), .done(done1), .lzcnt(lzcnt));
wire [(FMSB+FADD)*2-1:0] divo1 = divo[(FMSB+FADD)*2-1:0] << (lzcnt-2);
`else
DivGoldschmidt #(.WID(FMSB+6),.WHOLE(1),.POINTS(FMSB+5))
u2 (.rst(rst), .clk(clk), .ld(ld), .a({fracta,4'b0}), .b({fractb,4'b0}), .q(divo), .done(done1), .lzcnt(lzcnt));
wire [(FMSB+6)*2+1:0] divo1 =
lzcnt > 8'd5 ? divo << (lzcnt-8'd6) :
divo >> (8'd6-lzcnt);
;
`endif
delay1 #(1) u3 (.clk(clk), .ce(ce), .i(done1), .o(done));
 
 
// determine when a NaN is output
wire qNaNOut = (az&bz)|(aInf&bInf);
 
always @(posedge clk)
// Simulation likes to see these values reset to zero on reset. Otherwise the
// values propagate in sim as X's.
if (rst) begin
xo <= 1'd0;
mo <= 1'd0;
so <= 1'd0;
sign_exe <= 1'd0;
overflow <= 1'd0;
underflow <= 1'd0;
end
else if (ce) begin
if (done1) begin
casez({qNaNOut|aNan|bNan,bInf,bz,over,under})
5'b1????: xo <= infXp; // NaN exponent value
5'b01???: xo <= 1'd0; // divide by inf
5'b001??: xo <= infXp; // divide by zero
5'b0001?: xo <= infXp; // overflow
5'b00001: xo <= 1'd0; // underflow
default: xo <= ex1; // normal or underflow: passthru neg. exp. for normalization
endcase
 
casez({aNan,bNan,qNaNOut,bInf,bz,over,aInf&bInf,az&bz})
8'b1???????: mo <= {1'b1,a[FMSB:0],{FMSB+1{1'b0}}};
8'b01??????: mo <= {1'b1,b[FMSB:0],{FMSB+1{1'b0}}};
8'b001?????: mo <= {1'b1,qNaN[FMSB:0]|{aInf,1'b0}|{az,bz},{FMSB+1{1'b0}}};
8'b0001????: mo <= 1'd0; // div by inf
8'b00001???: mo <= 1'd0; // div by zero
8'b000001??: mo <= 1'd0; // Inf exponent
8'b0000001?: mo <= {1'b1,qNaN|`QINFDIV,{FMSB+1{1'b0}}}; // infinity / infinity
8'b00000001: mo <= {1'b1,qNaN|`QZEROZERO,{FMSB+1{1'b0}}}; // zero / zero
`ifndef GOLDSCHMIDT
default: mo <= divo1[(FMSB+FADD)*2-1:(FADD-2)*2-2]; // plain div
`else
default: mo <= divo1[(FMSB+6)*2+1:2]; // plain div
`endif
endcase
 
so <= sa ^ sb;
sign_exe <= sa & sb;
overflow <= over;
underflow <= under;
end
end
 
endmodule
 
module fpDividenr(rst, clk, clk4x, ce, ld, op, a, b, o, rm, done, sign_exe, inf, overflow, underflow);
input rst;
input clk;
input clk4x;
input ce;
input ld;
input op;
input [MSB:0] a, b;
output [MSB:0] o;
input [2:0] rm;
output sign_exe;
output done;
output inf;
output overflow;
output underflow;
 
wire [EX:0] o1;
wire sign_exe1, inf1, overflow1, underflow1;
wire [MSB+3:0] fpn0;
wire done1;
 
fpDivide #(FPWID) u1 (rst, clk, clk4x, ce, ld, op, a, b, o1, done1, sign_exe1, overflow1, underflow1);
fpNormalize #(FPWID) u2(.clk(clk), .ce(ce), .under_i(underflow1), .i(o1), .o(fpn0) );
fpRound #(FPWID) u3(.clk(clk), .ce(ce), .rm(rm), .i(fpn0), .o(o) );
delay2 #(1) u4(.clk(clk), .ce(ce), .i(sign_exe1), .o(sign_exe));
delay2 #(1) u5(.clk(clk), .ce(ce), .i(inf1), .o(inf));
delay2 #(1) u6(.clk(clk), .ce(ce), .i(overflow1), .o(overflow));
delay2 #(1) u7(.clk(clk), .ce(ce), .i(underflow1), .o(underflow));
delay2 #(1) u8(.clk(clk), .ce(ce), .i(done1), .o(done));
endmodule
 
/fpFMA.sv
0,0 → 1,808
// ============================================================================
// __
// \\__/ o\ (C) 2019-2020 Robert Finch, Waterloo
// \ __ / All rights reserved.
// \/_// robfinch<remove>@finitron.ca
// ||
//
// fpFMA.sv
// - floating point fused multiplier + adder
// - can issue every clock cycle
// - parameterized FPWIDth
// - IEEE 754 representation
//
//
// This source file is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published
// by the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This source file is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//
// ============================================================================
 
import fp::*;
 
module fpFMA (clk, ce, op, rm, a, b, c, o, under, over, inf, zero);
input clk;
input ce;
input op; // operation 0 = add, 1 = subtract
input [2:0] rm;
input [MSB:0] a, b, c;
output [EX:0] o;
output under;
output over;
output inf;
output zero;
 
// constants
wire [EMSB:0] infXp = {EMSB+1{1'b1}}; // infinite / NaN - all ones
// The following is the value for an exponent of zero, with the offset
// eg. 8'h7f for eight bit exponent, 11'h7ff for eleven bit exponent, etc.
wire [EMSB:0] bias = {1'b0,{EMSB{1'b1}}}; //2^0 exponent
// The following is a template for a quiet nan. (MSB=1)
wire [FMSB:0] qNaN = {1'b1,{FMSB{1'b0}}};
 
// -----------------------------------------------------------
// Clock #1
// - decode the input operands
// - derive basic information
// -----------------------------------------------------------
 
wire sa1, sb1, sc1; // sign bit
wire [EMSB:0] xa1, xb1, xc1; // exponent bits
wire [FMSB+1:0] fracta1, fractb1, fractc1; // includes unhidden bit
wire a_dn1, b_dn1, c_dn1; // a/b is denormalized
wire aNan1, bNan1, cNan1;
wire az1, bz1, cz1;
wire aInf1, bInf1, cInf1;
reg op1;
 
fpDecompReg u1a (.clk(clk), .ce(ce), .i(a), .sgn(sa1), .exp(xa1), .fract(fracta1), .xz(a_dn1), .vz(az1), .inf(aInf1), .nan(aNan1) );
fpDecompReg u1b (.clk(clk), .ce(ce), .i(b), .sgn(sb1), .exp(xb1), .fract(fractb1), .xz(b_dn1), .vz(bz1), .inf(bInf1), .nan(bNan1) );
fpDecompReg u1c (.clk(clk), .ce(ce), .i(c), .sgn(sc1), .exp(xc1), .fract(fractc1), .xz(c_dn1), .vz(cz1), .inf(cInf1), .nan(cNan1) );
 
always @(posedge clk)
if (ce) op1 <= op;
 
// -----------------------------------------------------------
// Clock #2
// Compute the sum of the exponents.
// correct the exponent for denormalized operands
// adjust the sum by the exponent offset (subtract 127)
// mul: ex1 = xa + xb, result should always be < 1ffh
// Form partial products (clocks 2 to 5)
// -----------------------------------------------------------
 
reg abz2;
reg [EMSB+2:0] ex2;
reg [EMSB:0] xc2;
reg realOp2;
reg xcInf2;
 
always @(posedge clk)
if (ce) abz2 <= az1|bz1;
always @(posedge clk)
if (ce) ex2 <= (xa1|a_dn1) + (xb1|b_dn1) - bias;
always @(posedge clk)
if (ce) xc2 <= (xc1|c_dn1);
always @(posedge clk)
if (ce) xcInf2 = &xc1;
 
// Figure out which operation is really needed an add or
// subtract ?
// If the signs are the same, use the orignal op,
// otherwise flip the operation
// a + b = add,+
// a + -b = sub, so of larger
// -a + b = sub, so of larger
// -a + -b = add,-
// a - b = sub, so of larger
// a - -b = add,+
// -a - b = add,-
// -a - -b = sub, so of larger
always @(posedge clk)
if (ce) realOp2 <= op1 ^ (sa1 ^ sb1) ^ sc1;
 
 
reg [FX:0] fract5;
generate
if (FPWID==84) begin
reg [33:0] p00,p01,p02,p03;
reg [33:0] p10,p11,p12,p13;
reg [33:0] p20,p21,p22,p23;
reg [33:0] p30,p31,p32,p33;
reg [135:0] fract3a;
reg [135:0] fract3b;
reg [135:0] fract3c;
reg [135:0] fract3d;
reg [135:0] fract4a;
reg [135:0] fract4b;
 
always @(posedge clk)
if (ce) begin
p00 <= fracta1[16: 0] * fractb1[16: 0];
p01 <= fracta1[33:17] * fractb1[16: 0];
p02 <= fracta1[50:34] * fractb1[16: 0];
p03 <= fracta1[67:51] * fractb1[16: 0];
p10 <= fracta1[16: 0] * fractb1[33:17];
p11 <= fracta1[33:17] * fractb1[33:17];
p12 <= fracta1[50:34] * fractb1[33:17];
p13 <= fracta1[67:51] * fractb1[33:17];
 
p20 <= fracta1[16: 0] * fractb1[50:34];
p21 <= fracta1[33:17] * fractb1[50:34];
p22 <= fracta1[50:34] * fractb1[50:34];
p23 <= fracta1[67:51] * fractb1[50:34];
 
p30 <= fracta1[15: 0] * fractb1[67:51];
p31 <= fracta1[31:16] * fractb1[67:51];
p32 <= fracta1[47:32] * fractb1[67:51];
p33 <= fracta1[63:48] * fractb1[67:51];
end
always @(posedge clk)
if (ce) begin
fract3a <= {p33,p31,p20,p00};
fract3b <= {p32,p12,p10,17'b0} + {p23,p03,p01,17'b0};
fract3c <= {p22,p11,34'b0} + {p13,p02,34'b0};
fract3d <= {p12,51'b0} + {p03,51'b0};
end
always @(posedge clk)
if (ce) begin
fract4a <= fract3a + fract3b;
fract4b <= fract3c + fract3d;
end
always @(posedge clk)
if (ce) begin
fract5 <= fract4a + fract4b;
end
end
else if (FPWID==80) begin
reg [31:0] p00,p01,p02,p03;
reg [31:0] p10,p11,p12,p13;
reg [31:0] p20,p21,p22,p23;
reg [31:0] p30,p31,p32,p33;
reg [127:0] fract3a;
reg [127:0] fract3b;
reg [127:0] fract3c;
reg [127:0] fract3d;
reg [127:0] fract4a;
reg [127:0] fract4b;
 
always @(posedge clk)
if (ce) begin
p00 <= fracta1[15: 0] * fractb1[15: 0];
p01 <= fracta1[31:16] * fractb1[15: 0];
p02 <= fracta1[47:32] * fractb1[15: 0];
p03 <= fracta1[63:48] * fractb1[15: 0];
p10 <= fracta1[15: 0] * fractb1[31:16];
p11 <= fracta1[31:16] * fractb1[31:16];
p12 <= fracta1[47:32] * fractb1[31:16];
p13 <= fracta1[63:48] * fractb1[31:16];
 
p20 <= fracta1[15: 0] * fractb1[47:32];
p21 <= fracta1[31:16] * fractb1[47:32];
p22 <= fracta1[47:32] * fractb1[47:32];
p23 <= fracta1[63:48] * fractb1[47:32];
 
p30 <= fracta1[15: 0] * fractb1[63:48];
p31 <= fracta1[31:16] * fractb1[63:48];
p32 <= fracta1[47:32] * fractb1[63:48];
p33 <= fracta1[63:48] * fractb1[63:48];
end
always @(posedge clk)
if (ce) begin
fract3a <= {p33,p31,p20,p00};
fract3b <= {p32,p12,p10,16'b0} + {p23,p03,p01,16'b0};
fract3c <= {p22,p11,32'b0} + {p13,p02,32'b0};
fract3d <= {p12,48'b0} + {p03,48'b0};
end
always @(posedge clk)
if (ce) begin
fract4a <= fract3a + fract3b;
fract4b <= fract3c + fract3d;
end
always @(posedge clk)
if (ce) begin
fract5 <= fract4a + fract4b;
end
end
else if (FPWID==64) begin
reg [35:0] p00,p01,p02;
reg [35:0] p10,p11,p12;
reg [35:0] p20,p21,p22;
reg [71:0] fract3a;
reg [89:0] fract3b;
reg [107:0] fract3c;
reg [108:0] fract4a;
reg [108:0] fract4b;
 
always @(posedge clk)
if (ce) begin
p00 <= fracta1[17: 0] * fractb1[17: 0];
p01 <= fracta1[35:18] * fractb1[17: 0];
p02 <= fracta1[52:36] * fractb1[17: 0];
p10 <= fracta1[17: 0] * fractb1[35:18];
p11 <= fracta1[35:18] * fractb1[35:18];
p12 <= fracta1[52:36] * fractb1[35:18];
p20 <= fracta1[17: 0] * fractb1[52:36];
p21 <= fracta1[35:18] * fractb1[52:36];
p22 <= fracta1[52:36] * fractb1[52:36];
end
always @(posedge clk)
if (ce) begin
fract3a <= {p02,p00};
fract3b <= {p21,p10,18'b0} + {p12,p01,18'b0};
fract3c <= {p22,p20,36'b0} + {p11,36'b0};
end
always @(posedge clk)
if (ce) begin
fract4a <= fract3a + fract3b;
fract4b <= fract3c;
end
always @(posedge clk)
if (ce) begin
fract5 <= fract4a + fract4b;
end
end
else if (FPWID==40) begin
reg [27:0] p00,p01,p02;
reg [27:0] p10,p11,p12;
reg [27:0] p20,p21,p22;
reg [79:0] fract3a;
reg [79:0] fract3b;
reg [79:0] fract3c;
reg [79:0] fract4a;
reg [79:0] fract4b;
always @(posedge clk)
if (ce) begin
p00 <= fracta1[13: 0] * fractb1[13: 0];
p01 <= fracta1[27:14] * fractb1[13: 0];
p02 <= fracta1[39:28] * fractb1[13: 0];
p10 <= fracta1[13: 0] * fractb1[27:14];
p11 <= fracta1[27:14] * fractb1[27:14];
p12 <= fracta1[39:28] * fractb1[27:14];
p20 <= fracta1[13: 0] * fractb1[39:28];
p21 <= fracta1[27:14] * fractb1[39:28];
p22 <= fracta1[39:28] * fractb1[39:28];
end
always @(posedge clk)
if (ce) begin
fract3a <= {p02,p00};
fract3b <= {p21,p10,18'b0} + {p12,p01,18'b0};
fract3c <= {p22,p20,36'b0} + {p11,36'b0};
end
always @(posedge clk)
if (ce) begin
fract4a <= fract3a + fract3b;
fract4b <= fract3c;
end
always @(posedge clk)
if (ce) begin
fract5 <= fract4a + fract4b;
end
end
else if (FPWID==32) begin
reg [23:0] p00,p01,p02;
reg [23:0] p10,p11,p12;
reg [23:0] p20,p21,p22;
reg [63:0] fract3a;
reg [63:0] fract3b;
reg [63:0] fract4;
 
always @(posedge clk)
if (ce) begin
p00 <= fracta1[11: 0] * fractb1[11: 0];
p01 <= fracta1[23:12] * fractb1[11: 0];
p10 <= fracta1[11: 0] * fractb1[23:12];
p11 <= fracta1[23:12] * fractb1[23:12];
end
always @(posedge clk)
if (ce) begin
fract3a <= {p11,p00};
fract3b <= {p01,12'b0} + {p10,12'b0};
end
always @(posedge clk)
if (ce) begin
fract4 <= fract3a + fract3b;
end
always @(posedge clk)
if (ce) begin
fract5 <= fract4;
end
end
else begin
reg [FX:0] p00;
reg [FX:0] fract3;
reg [FX:0] fract4;
always @(posedge clk)
if (ce) begin
p00 <= fracta1 * fractb1;
end
always @(posedge clk)
if (ce)
fract3 <= p00;
always @(posedge clk)
if (ce)
fract4 <= fract3;
always @(posedge clk)
if (ce)
fract5 <= fract4;
end
endgenerate
 
// -----------------------------------------------------------
// Clock #3
// Select zero exponent
// -----------------------------------------------------------
 
reg [EMSB+2:0] ex3;
reg [EMSB:0] xc3;
always @(posedge clk)
if (ce) ex3 <= abz2 ? 1'd0 : ex2;
always @(posedge clk)
if (ce) xc3 <= xc2;
 
// -----------------------------------------------------------
// Clock #4
// Generate partial products.
// -----------------------------------------------------------
 
reg [EMSB+2:0] ex4;
reg [EMSB:0] xc4;
 
always @(posedge clk)
if (ce) ex4 <= ex3;
always @(posedge clk)
if (ce) xc4 <= xc3;
 
// -----------------------------------------------------------
// Clock #5
// Sum partial products (above)
// compute multiplier overflow and underflow
// -----------------------------------------------------------
 
// Status
reg under5;
reg over5;
reg [EMSB+2:0] ex5;
reg [EMSB:0] xc5;
wire aInf5, bInf5;
wire aNan5, bNan5;
wire qNaNOut5;
 
always @(posedge clk)
if (ce) under5 <= ex4[EMSB+2];
always @(posedge clk)
if (ce) over5 <= (&ex4[EMSB:0] | ex4[EMSB+1]) & !ex4[EMSB+2];
always @(posedge clk)
if (ce) ex5 <= ex4;
always @(posedge clk)
if (ce) xc5 <= xc4;
 
delay4 u2a (.clk(clk), .ce(ce), .i(aInf1), .o(aInf5) );
delay4 u2b (.clk(clk), .ce(ce), .i(bInf1), .o(bInf5) );
 
// determine when a NaN is output
wire [MSB:0] a5,b5;
delay4 u5 (.clk(clk), .ce(ce), .i((aInf1&bz1)|(bInf1&az1)), .o(qNaNOut5) );
delay4 u14 (.clk(clk), .ce(ce), .i(aNan1), .o(aNan5) );
delay4 u15 (.clk(clk), .ce(ce), .i(bNan1), .o(bNan5) );
delay5 #(MSB+1) u16 (.clk(clk), .ce(ce), .i(a), .o(a5) );
delay5 #(MSB+1) u17 (.clk(clk), .ce(ce), .i(b), .o(b5) );
 
// -----------------------------------------------------------
// Clock #6
// - figure multiplier mantissa output
// - figure multiplier exponent output
// - correct xponent and mantissa for exceptional conditions
// -----------------------------------------------------------
 
reg [FX:0] mo6;
reg [EMSB+2:0] ex6;
reg [EMSB:0] xc6;
wire [FMSB+1:0] fractc6;
vtdl #(FMSB+2) u61 (.clk(clk), .ce(ce), .a(4'd4), .d(fractc1), .q(fractc6) );
delay1 u62 (.clk(clk), .ce(ce), .i(under5), .o(under6));
 
always @(posedge clk)
if (ce) xc6 <= xc5;
 
always @(posedge clk)
if (ce)
casez({aNan5,bNan5,qNaNOut5,aInf5,bInf5,over5})
6'b1?????: mo6 <= {1'b1,1'b1,a5[FMSB-1:0],{FMSB+1{1'b0}}};
6'b01????: mo6 <= {1'b1,1'b1,b5[FMSB-1:0],{FMSB+1{1'b0}}};
6'b001???: mo6 <= {1'b1,qNaN|3'd4,{FMSB+1{1'b0}}}; // multiply inf * zero
6'b0001??: mo6 <= 0; // mul inf's
6'b00001?: mo6 <= 0; // mul inf's
6'b000001: mo6 <= 0; // mul overflow
default: mo6 <= fract5;
endcase
 
always @(posedge clk)
if (ce)
casez({qNaNOut5|aNan5|bNan5,aInf5,bInf5,over5,under5})
5'b1????: ex6 <= infXp; // qNaN - infinity * zero
5'b01???: ex6 <= infXp; // 'a' infinite
5'b001??: ex6 <= infXp; // 'b' infinite
5'b0001?: ex6 <= infXp; // result overflow
5'b00001: ex6 <= ex5; //0; // underflow
default: ex6 <= ex5; // situation normal
endcase
 
// -----------------------------------------------------------
// Clock #7
// - prep for addition, determine greater operand
// -----------------------------------------------------------
reg ex_gt_xc7;
reg xeq7;
reg ma_gt_mc7;
reg meq7;
wire az7, bz7, cz7;
wire realOp7;
 
// which has greater magnitude ? Used for sign calc
always @(posedge clk)
if (ce) ex_gt_xc7 <= $signed(ex6) > $signed({2'b0,xc6});
always @(posedge clk)
if (ce) xeq7 <= (ex6=={2'b0,xc6});
always @(posedge clk)
if (ce) ma_gt_mc7 <= mo6 > {fractc6,{FMSB+1{1'b0}}};
always @(posedge clk)
if (ce) meq7 <= mo6 == {fractc6,{FMSB+1{1'b0}}};
vtdl #(1) u71 (.clk(clk), .ce(ce), .a(4'd5), .d(az1), .q(az7));
vtdl #(1) u72 (.clk(clk), .ce(ce), .a(4'd5), .d(bz1), .q(bz7));
vtdl #(1) u73 (.clk(clk), .ce(ce), .a(4'd5), .d(cz1), .q(cz7));
vtdl #(1) u74 (.clk(clk), .ce(ce), .a(4'd4), .d(realOp2), .q(realOp7));
 
// -----------------------------------------------------------
// Clock #8
// - prep for addition, determine greater operand
// - determine if result will be zero
// -----------------------------------------------------------
 
reg a_gt_b8;
reg resZero8;
reg ex_gt_xc8;
wire [EMSB+2:0] ex8;
wire [EMSB:0] xc8;
wire xcInf8;
wire [2:0] rm8;
wire op8;
wire sa8, sc8;
 
delay2 #(EMSB+3) u81 (.clk(clk), .ce(ce), .i(ex6), .o(ex8));
delay2 #(EMSB+1) u82 (.clk(clk), .ce(ce), .i(xc6), .o(xc8));
vtdl #(1) u83 (.clk(clk), .ce(ce), .a(4'd5), .d(xcInf2), .q(xcInf8));
vtdl #(3) u84 (.clk(clk), .ce(ce), .a(4'd7), .d(rm), .q(rm8));
vtdl #(1) u85 (.clk(clk), .ce(ce), .a(4'd6), .d(op1), .q(op8));
vtdl #(1) u86 (.clk(clk), .ce(ce), .a(4'd6), .d(sa1 ^ sb1), .q(sa8));
vtdl #(1) u87 (.clk(clk), .ce(ce), .a(4'd6), .d(sc1), .q(sc8));
 
always @(posedge clk)
if (ce) ex_gt_xc8 <= ex_gt_xc7;
always @(posedge clk)
if (ce)
a_gt_b8 <= ex_gt_xc7 || (xeq7 && ma_gt_mc7);
 
// Find out if the result will be zero.
always @(posedge clk)
if (ce)
resZero8 <= (realOp7 & xeq7 & meq7) || // subtract, same magnitude
((az7 | bz7) & cz7); // a or b zero and c zero
 
// -----------------------------------------------------------
// CLock #9
// Compute output exponent and sign
//
// The output exponent is the larger of the two exponents,
// unless a subtract operation is in progress and the two
// numbers are equal, in which case the exponent should be
// zero.
// -----------------------------------------------------------
 
reg so9;
reg [EMSB+2:0] ex9;
reg [EMSB+2:0] ex9a;
reg ex_gt_xc9;
reg [EMSB:0] xc9;
reg a_gt_c9;
wire [FX:0] mo9;
wire [FMSB+1:0] fractc9;
wire under9;
wire xeq9;
 
always @(posedge clk)
if (ce) ex_gt_xc9 <= ex_gt_xc8;
always @(posedge clk)
if (ce) a_gt_c9 <= a_gt_b8;
always @(posedge clk)
if (ce) xc9 <= xc8;
always @(posedge clk)
if (ce) ex9a <= ex8;
 
delay3 #(FX+1) u93 (.clk(clk), .ce(ce), .i(mo6), .o(mo9));
delay3 #(FMSB+2) u94 (.clk(clk), .ce(ce), .i(fractc6), .o(fractc9));
delay3 u95 (.clk(clk), .ce(ce), .i(under6), .o(under9));
delay2 u96 (.clk(clk), .ce(ce), .i(xeq7), .o(xeq9));
 
always @(posedge clk)
if (ce) ex9 <= resZero8 ? 1'd0 : ex_gt_xc8 ? ex8 : {2'b0,xc8};
 
// Compute output sign
always @(posedge clk)
if (ce)
case ({resZero8,sa8,op8,sc8}) // synopsys full_case parallel_case
4'b0000: so9 <= 0; // + + + = +
4'b0001: so9 <= !a_gt_b8; // + + - = sign of larger
4'b0010: so9 <= !a_gt_b8; // + - + = sign of larger
4'b0011: so9 <= 0; // + - - = +
4'b0100: so9 <= a_gt_b8; // - + + = sign of larger
4'b0101: so9 <= 1; // - + - = -
4'b0110: so9 <= 1; // - - + = -
4'b0111: so9 <= a_gt_b8; // - - - = sign of larger
4'b1000: so9 <= 0; // A + B, sign = +
4'b1001: so9 <= rm8==3; // A + -B, sign = + unless rounding down
4'b1010: so9 <= rm8==3; // A - B, sign = + unless rounding down
4'b1011: so9 <= 0; // +A - -B, sign = +
4'b1100: so9 <= rm8==3; // -A + B, sign = + unless rounding down
4'b1101: so9 <= 1; // -A + -B, sign = -
4'b1110: so9 <= 1; // -A - +B, sign = -
4'b1111: so9 <= rm8==3; // -A - -B, sign = + unless rounding down
endcase
 
// -----------------------------------------------------------
// Clock #10
// Compute the difference in exponents, provides shift amount
// Note that ex9a will be negative for an underflow condition
// so it's added rather than subtracted from xc9 as -(-num)
// is the same as an add. The underflow is tracked rather than
// using extra bits in the exponent.
// -----------------------------------------------------------
reg [EMSB+2:0] xdiff10;
reg [FX:0] mfs;
reg ops10;
 
// If the multiplier exponent was negative (underflowed) then
// the mantissa needs to be shifted right even more (until
// the exponent is zero. The total shift would be xc9-0-
// amount underflows which is xc9 + -ex9a.
 
always @(posedge clk)
if (ce) xdiff10 <= ex_gt_xc9 ? ex9a - xc9
: ex9a[EMSB+2] ? xc9 + (~ex9a+2'd1)
: xc9 - ex9a;
 
// Determine which fraction to denormalize (the one with the
// smaller exponent is denormalized). If the exponents are equal
// denormalize the smaller fraction.
always @(posedge clk)
if (ce) mfs <=
xeq9 ? (a_gt_c9 ? {4'b0,fractc9,{FMSB+1{1'b0}}} : mo9)
: ex_gt_xc9 ? {4'b0,fractc9,{FMSB+1{1'b0}}} : mo9;
 
always @(posedge clk)
if (ce) ops10 <= xeq9 ? (a_gt_c9 ? 1'b1 : 1'b0)
: (ex_gt_xc9 ? 1'b1 : 1'b0);
 
// -----------------------------------------------------------
// Clock #11
// Limit the size of the shifter to only bits needed.
// -----------------------------------------------------------
reg [7:0] xdif11;
 
always @(posedge clk)
if (ce) xdif11 <= xdiff10 > FX+3 ? FX+3 : xdiff10;
 
// -----------------------------------------------------------
// Clock #12
// Determine the sticky bit
// -----------------------------------------------------------
 
wire sticky, sticky12;
wire [FX:0] mfs12;
wire [7:0] xdif12;
 
generate
begin
if (FPWID==128)
redor128 u121 (.a(xdif11), .b({mfs,2'b0}), .o(sticky) );
else if (FPWID==96)
redor96 u121 (.a(xdif11), .b({mfs,2'b0}), .o(sticky) );
else if (FPWID==84)
redor84 u121 (.a(xdif11), .b({mfs,2'b0}), .o(sticky) );
else if (FPWID==80)
redor80 u121 (.a(xdif11), .b({mfs,2'b0}), .o(sticky) );
else if (FPWID==64)
redor64 u121 (.a(xdif11), .b({mfs,2'b0}), .o(sticky) );
else if (FPWID==32)
redor32 u121 (.a(xdif11), .b({mfs,2'b0}), .o(sticky) );
else begin
always @* begin
$display("redor operation needed in fpFMA");
$finish;
end
end
end
endgenerate
 
// register inputs to shifter and shift
delay1 #(1) u122(.clk(clk), .ce(ce), .i(sticky), .o(sticky12) );
delay1 #(8) u123(.clk(clk), .ce(ce), .i(xdif11), .o(xdif12) );
delay2 #(FX+1) u124(.clk(clk), .ce(ce), .i(mfs), .o(mfs12) );
 
// -----------------------------------------------------------
// Clock #13
// - denormalize operand (shift right)
// -----------------------------------------------------------
reg [FX+2:0] mfs13;
wire [FX:0] mo13;
wire ex_gt_xc13;
wire [FMSB+1:0] fractc13;
wire ops13;
 
delay4 #(FX+1) u131 (.clk(clk), .ce(ce), .i(mo9), .o(mo13));
delay4 u132 (.clk(clk), .ce(ce), .i(ex_gt_xc9), .o(ex_gt_xc13));
vtdl #(FMSB+2) u133 (.clk(clk), .ce(ce), .a(4'd3), .d(fractc9), .q(fractc13));
delay3 u134 (.clk(clk), .ce(ce), .i(ops10), .o(ops13));
 
always @(posedge clk)
if (ce) mfs13 <= ({mfs12,2'b0} >> xdif12)|sticky12;
 
// -----------------------------------------------------------
// Clock #14
// Sort operands
// -----------------------------------------------------------
reg [FX+2:0] oa, ob;
wire a_gt_b14;
 
vtdl #(1) u141 (.clk(clk), .ce(ce), .a(4'd5), .d(a_gt_b8), .q(a_gt_b14));
 
always @(posedge clk)
if (ce) oa <= ops13 ? {mo13,2'b00} : mfs13;
always @(posedge clk)
if (ce) ob <= ops13 ? mfs13 : {fractc13,{FMSB+1{1'b0}},2'b00};
 
// -----------------------------------------------------------
// Clock #15
// - Sort operands
// -----------------------------------------------------------
reg [FX+2:0] oaa, obb;
wire realOp15;
wire [EMSB:0] ex15;
wire [EMSB:0] ex9c = ex9[EMSB+1] ? infXp : ex9[EMSB:0];
wire overflow15;
vtdl #(1) u151 (.clk(clk), .ce(ce), .a(4'd7), .d(realOp7), .q(realOp15));
vtdl #(EMSB+1) u152 (.clk(clk), .ce(ce), .a(4'd5), .d(ex9c), .q(ex15));
vtdl #(EMSB+1) u153 (.clk(clk), .ce(ce), .a(4'd5), .d(ex9[EMSB+1]| &ex9[EMSB:0]), .q(overflow15));
 
always @(posedge clk)
if (ce) oaa <= a_gt_b14 ? oa : ob;
always @(posedge clk)
if (ce) obb <= a_gt_b14 ? ob : oa;
 
// -----------------------------------------------------------
// Clock #16
// - perform add/subtract
// - addition can generate an extra bit, subtract can't go negative
// -----------------------------------------------------------
reg [FX+3:0] mab;
wire [FX:0] mo16;
wire [FMSB+1:0] fractc16;
wire Nan16;
wire cNan16;
wire aInf16, cInf16;
wire op16;
wire exinf16;
 
vtdl #(1) u161 (.clk(clk), .ce(ce), .a(4'd10), .d(qNaNOut5|aNan5|bNan5), .q(Nan16));
vtdl #(1) u162 (.clk(clk), .ce(ce), .a(4'd14), .d(cNan1), .q(cNan16));
vtdl #(1) u163 (.clk(clk), .ce(ce), .a(4'd9), .d(&ex6), .q(aInf16));
vtdl #(1) u164 (.clk(clk), .ce(ce), .a(4'd14), .d(cInf1), .q(cInf16));
vtdl #(1) u165 (.clk(clk), .ce(ce), .a(4'd14), .d(op1), .q(op16));
delay3 #(FX+1) u166 (.clk(clk), .ce(ce), .i(mo13), .o(mo16));
vtdl #(FMSB+2) u167 (.clk(clk), .ce(ce), .a(4'd6), .d(fractc9), .q(fractc16));
delay1 u169 (.clk(clk), .ce(ce), .i(&ex15), .o(exinf16));
 
always @(posedge clk)
if (ce) mab <= realOp15 ? oaa - obb : oaa + obb;
 
// -----------------------------------------------------------
// Clock #17
// - adjust for Nans
// -----------------------------------------------------------
wire [EMSB:0] ex17;
reg [FX:0] mo17;
wire so17;
wire exinf17;
wire overflow17;
 
vtdl #(1) u171 (.clk(clk), .ce(ce), .a(4'd7), .d(so9), .q(so17));
delay2 #(EMSB+1) u172 (.clk(clk), .ce(ce), .i(ex15), .o(ex17));
delay1 #(1) u173 (.clk(clk), .ce(ce), .i(exinf16), .o(exinf17));
delay2 u174 (.clk(clk), .ce(ce), .i(overflow15), .o(overflow17));
 
always @(posedge clk)
casez({aInf16&cInf16,Nan16,cNan16,exinf16})
4'b1???: mo17 <= {1'b0,op16,{FMSB-1{1'b0}},op16,{FMSB{1'b0}}}; // inf +/- inf - generate QNaN on subtract, inf on add
4'b01??: mo17 <= {1'b0,mo16};
4'b001?: mo17 <= {1'b1,1'b1,fractc16[FMSB-1:0],{FMSB+1{1'b0}}};
4'b0001: mo17 <= 1'd0;
default: mo17 <= mab[FX+3:2]; // mab has two extra lead bits and two trailing bits
endcase
 
assign o = {so17,ex17,mo17};
assign zero = {ex17,mo17}==1'd0;
assign inf = exinf17;
assign under = ex17==1'd0;
assign over = overflow17;
 
endmodule
 
 
// Multiplier with normalization and rounding.
 
module fpFMAnr(clk, ce, op, rm, a, b, c, o, inf, zero, overflow, underflow, inexact);
input clk;
input ce;
input op;
input [2:0] rm;
input [MSB:0] a, b, c;
output [MSB:0] o;
output zero;
output inf;
output overflow;
output underflow;
output inexact;
 
wire [EX:0] fma_o;
wire fma_underflow;
wire fma_overflow;
wire norm_underflow;
wire norm_inexact;
wire sign_exe1, inf1, overflow1, underflow1;
wire [MSB+3:0] fpn0;
 
fpFMA #(FPWID) u1
(
.clk(clk),
.ce(ce),
.op(op),
.rm(rm),
.a(a),
.b(b),
.c(c),
.o(fma_o),
.under(fma_underflow),
.over(fma_overflow),
.zero(),
.inf()
);
fpNormalize #(FPWID) u2
(
.clk(clk),
.ce(ce),
.i(fma_o),
.o(fpn0),
.under_i(fma_underflow),
.under_o(norm_underflow),
.inexact_o(norm_inexact)
);
fpRound #(FPWID) u3(.clk(clk), .ce(ce), .rm(rm), .i(fpn0), .o(o) );
fpDecomp #(FPWID) u4(.i(o), .xz(), .vz(zero), .inf(inf));
vtdl u5 (.clk(clk), .ce(ce), .a(4'd11), .d(fma_underflow), .q(underflow));
vtdl u6 (.clk(clk), .ce(ce), .a(4'd11), .d(fma_overflow), .q(overflow));
delay3 #(1) u7 (.clk(clk), .ce(ce), .i(norm_inexact), .o(inexact));
assign overflow = inf;
 
endmodule
 
/fpMultiply.sv
0,0 → 1,270
// ============================================================================
// __
// \\__/ o\ (C) 2006-2020 Robert Finch, Waterloo
// \ __ / All rights reserved.
// \/_// robfinch<remove>@finitron.ca
// ||
//
// fpMultiply.v
// - floating point multiplier
// - two cycle latency
// - can issue every clock cycle
// - parameterized width
// - IEEE 754 representation
//
//
// This source file is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published
// by the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This source file is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//
// Floating Point Multiplier / Divider
//
// This multiplier/divider handles denormalized numbers.
// The output format is of an internal expanded representation
// in preparation to be fed into a normalization unit, then
// rounding. Basically, it's the same as the regular format
// except the mantissa is doubled in size, the leading two
// bits of which are assumed to be whole bits.
//
//
// Floating Point Multiplier
//
// Properties:
// +-inf * +-inf = -+inf (this is handled by exOver)
// +-inf * 0 = QNaN
//
// 1 sign number
// 8 exponent
// 48 mantissa
//
// ============================================================================
 
import fp::*;
 
module fpMultiply(clk, ce, a, b, o, sign_exe, inf, overflow, underflow);
input clk;
input ce;
input [MSB:0] a, b;
output [EX:0] o;
output sign_exe;
output inf;
output overflow;
output underflow;
 
reg [EMSB:0] xo1; // extra bit for sign
reg [FX:0] mo1;
 
// constants
wire [EMSB:0] infXp = {EMSB+1{1'b1}}; // infinite / NaN - all ones
// The following is the value for an exponent of zero, with the offset
// eg. 8'h7f for eight bit exponent, 11'h7ff for eleven bit exponent, etc.
wire [EMSB:0] bias = {1'b0,{EMSB{1'b1}}}; //2^0 exponent
// The following is a template for a quiet nan. (MSB=1)
wire [FMSB:0] qNaN = {1'b1,{FMSB{1'b0}}};
 
// variables
reg [FX:0] fract1,fract1a;
wire [FX:0] fracto;
wire [EMSB+2:0] ex1; // sum of exponents
wire [EMSB :0] ex2;
 
// Decompose the operands
wire sa, sb; // sign bit
wire [EMSB:0] xa, xb; // exponent bits
wire [FMSB+1:0] fracta, fractb;
wire a_dn, b_dn; // a/b is denormalized
wire aNan, bNan, aNan1, bNan1;
wire az, bz;
wire aInf, bInf, aInf1, bInf1;
 
 
// -----------------------------------------------------------
// First clock
// - decode the input operands
// - derive basic information
// - calculate exponent
// - calculate fraction
// -----------------------------------------------------------
 
fpDecomp u1a (.i(a), .sgn(sa), .exp(xa), .fract(fracta), .xz(a_dn), .vz(az), .inf(aInf), .nan(aNan) );
fpDecomp u1b (.i(b), .sgn(sb), .exp(xb), .fract(fractb), .xz(b_dn), .vz(bz), .inf(bInf), .nan(bNan) );
 
// Compute the sum of the exponents.
// correct the exponent for denormalized operands
// adjust the sum by the exponent offset (subtract 127)
// mul: ex1 = xa + xb, result should always be < 1ffh
assign ex1 = (az|bz) ? 0 : (xa|a_dn) + (xb|b_dn) - bias;
 
generate
if (FPWID==80) begin
reg [31:0] p00,p01,p02,p03;
reg [31:0] p10,p11,p12,p13;
reg [31:0] p20,p21,p22,p23;
reg [31:0] p30,p31,p32,p33;
always @(posedge clk)
if (ce) begin
p00 <= fracta[15: 0] * fractb[15: 0];
p01 <= fracta[31:16] * fractb[15: 0];
p02 <= fracta[47:32] * fractb[15: 0];
p03 <= fracta[63:48] * fractb[15: 0];
p10 <= fracta[15: 0] * fractb[31:16];
p11 <= fracta[31:16] * fractb[31:16];
p12 <= fracta[47:32] * fractb[31:16];
p13 <= fracta[63:48] * fractb[31:16];
 
p20 <= fracta[15: 0] * fractb[47:32];
p21 <= fracta[31:16] * fractb[47:32];
p22 <= fracta[47:32] * fractb[47:32];
p23 <= fracta[63:48] * fractb[47:32];
 
p30 <= fracta[15: 0] * fractb[63:48];
p31 <= fracta[31:16] * fractb[63:48];
p32 <= fracta[47:32] * fractb[63:48];
p33 <= fracta[63:48] * fractb[63:48];
 
fract1 <= {p03,48'b0} + {p02,32'b0} + {p01,16'b0} + p00 +
{p13,64'b0} + {p12,48'b0} + {p11,32'b0} + {p10,16'b0} +
{p23,80'b0} + {p22,64'b0} + {p21,48'b0} + {p20,32'b0} +
{p33,96'b0} + {p32,80'b0} + {p31,64'b0} + {p30,48'b0}
;
end
end
else if (FPWID==64) begin
reg [35:0] p00,p01,p02;
reg [35:0] p10,p11,p12;
reg [35:0] p20,p21,p22;
always @(posedge clk)
if (ce) begin
p00 <= fracta[17: 0] * fractb[17: 0];
p01 <= fracta[35:18] * fractb[17: 0];
p02 <= fracta[52:36] * fractb[17: 0];
p10 <= fracta[17: 0] * fractb[35:18];
p11 <= fracta[35:18] * fractb[35:18];
p12 <= fracta[52:36] * fractb[35:18];
p20 <= fracta[17: 0] * fractb[52:36];
p21 <= fracta[35:18] * fractb[52:36];
p22 <= fracta[52:36] * fractb[52:36];
fract1 <= {p02,36'b0} + {p01,18'b0} + p00 +
{p12,54'b0} + {p11,36'b0} + {p10,18'b0} +
{p22,72'b0} + {p21,54'b0} + {p20,36'b0}
;
end
end
else if (FPWID==32) begin
reg [23:0] p00,p01,p02;
reg [23:0] p10,p11,p12;
reg [23:0] p20,p21,p22;
always @(posedge clk)
if (ce) begin
p00 <= fracta[11: 0] * fractb[11: 0];
p01 <= fracta[23:12] * fractb[11: 0];
p10 <= fracta[11: 0] * fractb[23:12];
p11 <= fracta[23:12] * fractb[23:12];
fract1 <= {p11,p00} + {p01,12'b0} + {p10,12'b0};
end
end
else begin
always @(posedge clk)
if (ce) begin
fract1a <= fracta * fractb;
fract1 <= fract1a;
end
end
endgenerate
 
// Status
wire under1, over1;
wire under = ex1[EMSB+2]; // exponent underflow
wire over = (&ex1[EMSB:0] | ex1[EMSB+1]) & !ex1[EMSB+2];
 
delay2 #(EMSB+1) u3 (.clk(clk), .ce(ce), .i(ex1[EMSB:0]), .o(ex2) );
delay2 u2a (.clk(clk), .ce(ce), .i(aInf), .o(aInf1) );
delay2 u2b (.clk(clk), .ce(ce), .i(bInf), .o(bInf1) );
delay2 u6 (.clk(clk), .ce(ce), .i(under), .o(under1) );
delay2 u7 (.clk(clk), .ce(ce), .i(over), .o(over1) );
 
// determine when a NaN is output
wire qNaNOut;
wire [FPWID-1:0] a1,b1;
delay2 u5 (.clk(clk), .ce(ce), .i((aInf&bz)|(bInf&az)), .o(qNaNOut) );
delay2 u14 (.clk(clk), .ce(ce), .i(aNan), .o(aNan1) );
delay2 u15 (.clk(clk), .ce(ce), .i(bNan), .o(bNan1) );
delay2 #(FPWID) u16 (.clk(clk), .ce(ce), .i(a), .o(a1) );
delay2 #(FPWID) u17 (.clk(clk), .ce(ce), .i(b), .o(b1) );
 
// -----------------------------------------------------------
// Second clock
// - correct xponent and mantissa for exceptional conditions
// -----------------------------------------------------------
 
wire so1;
delay3 u8 (.clk(clk), .ce(ce), .i(sa ^ sb), .o(so1) );// two clock delay!
 
always @(posedge clk)
if (ce)
casez({qNaNOut|aNan1|bNan1,aInf1,bInf1,over1,under1})
5'b1????: xo1 = infXp; // qNaN - infinity * zero
5'b01???: xo1 = infXp; // 'a' infinite
5'b001??: xo1 = infXp; // 'b' infinite
5'b0001?: xo1 = infXp; // result overflow
5'b00001: xo1 = ex2[EMSB:0];//0; // underflow
default: xo1 = ex2[EMSB:0]; // situation normal
endcase
 
always @(posedge clk)
if (ce)
casez({aNan1,bNan1,qNaNOut,aInf1,bInf1,over1})
6'b1?????: mo1 = {1'b1,a1[FMSB:0],{FMSB+1{1'b0}}};
6'b01????: mo1 = {1'b1,b1[FMSB:0],{FMSB+1{1'b0}}};
6'b001???: mo1 = {1'b1,qNaN|3'd4,{FMSB+1{1'b0}}}; // multiply inf * zero
6'b0001??: mo1 = 0; // mul inf's
6'b00001?: mo1 = 0; // mul inf's
6'b000001: mo1 = 0; // mul overflow
default: mo1 = fract1;
endcase
 
delay3 u10 (.clk(clk), .ce(ce), .i(sa & sb), .o(sign_exe) );
delay1 u11 (.clk(clk), .ce(ce), .i(over1), .o(overflow) );
delay1 u12 (.clk(clk), .ce(ce), .i(over1), .o(inf) );
delay1 u13 (.clk(clk), .ce(ce), .i(under1), .o(underflow) );
 
assign o = {so1,xo1,mo1};
 
endmodule
 
 
// Multiplier with normalization and rounding.
 
module fpMulnr(clk, ce, a, b, o, rm, sign_exe, inf, overflow, underflow);
input clk;
input ce;
input [MSB:0] a, b;
output [MSB:0] o;
input [2:0] rm;
output sign_exe;
output inf;
output overflow;
output underflow;
 
wire [EX:0] o1;
wire sign_exe1, inf1, overflow1, underflow1;
wire [MSB+3:0] fpn0;
 
fpMul #(FPWID) u1 (clk, ce, a, b, o1, sign_exe1, inf1, overflow1, underflow1);
fpNormalize #(FPWID) u2(.clk(clk), .ce(ce), .under_i(underflow1), .i(o1), .o(fpn0) );
fpRound #(FPWID) u3(.clk(clk), .ce(ce), .rm(rm), .i(fpn0), .o(o) );
delay2 #(1) u4(.clk(clk), .ce(ce), .i(sign_exe1), .o(sign_exe));
delay2 #(1) u5(.clk(clk), .ce(ce), .i(inf1), .o(inf));
delay2 #(1) u6(.clk(clk), .ce(ce), .i(overflow1), .o(overflow));
delay2 #(1) u7(.clk(clk), .ce(ce), .i(underflow1), .o(underflow));
endmodule
/fpNormalize.sv
0,0 → 1,264
// ============================================================================
// __
// \\__/ o\ (C) 2006-2020 Robert Finch, Waterloo
// \ __ / All rights reserved.
// \/_// robfinch<remove>@finitron.ca
// ||
//
// fpNormalize.sv
// - floating point normalization unit
// - eight cycle latency
// - parameterized width
// - IEEE 754 representation
//
//
// This source file is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published
// by the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This source file is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//
// This unit takes a floating point number in an intermediate
// format and normalizes it. No normalization occurs
// for NaN's or infinities. The unit has a two cycle latency.
//
// The mantissa is assumed to start with two whole bits on
// the left. The remaining bits are fractional.
//
// The width of the incoming format is reduced via a generation
// of sticky bit in place of the low order fractional bits.
//
// On an underflowed input, the incoming exponent is assumed
// to be negative. A right shift is needed.
// ============================================================================
 
import fp::*;
 
module fpNormalize(clk, ce, i, o, under_i, under_o, inexact_o);
input clk;
input ce;
input [EX:0] i; // expanded format input
output [MSB+3:0] o; // normalized output + guard, sticky and round bits, + 1 whole digit
input under_i;
output under_o;
output inexact_o;
 
 
// ----------------------------------------------------------------------------
// No Clock required
// ----------------------------------------------------------------------------
reg [EMSB:0] xo0;
reg so0;
 
always @*
xo0 <= i[EX-1:FX+1];
always @*
so0 <= i[EX]; // sign doesn't change
 
// ----------------------------------------------------------------------------
// Clock #1
// - Capture exponent information
// ----------------------------------------------------------------------------
reg xInf1a, xInf1b, xInf1c;
wire [FX:0] i1;
delay1 #(FX+1) u11 (.clk(clk), .ce(ce), .i(i), .o(i1));
 
always @(posedge clk)
if (ce) xInf1a <= &xo0 & !under_i;
always @(posedge clk)
if (ce) xInf1b <= &xo0[EMSB:1] & !under_i;
always @(posedge clk)
if (ce) xInf1c = &xo0;
 
// ----------------------------------------------------------------------------
// Clock #2
// - determine exponent increment
// Since the there are *three* whole digits in the incoming format
// the number of whole digits needs to be reduced. If the MSB is
// set, then increment the exponent and no shift is needed.
// ----------------------------------------------------------------------------
wire xInf2c, xInf2b;
wire [EMSB:0] xo2;
reg incExpByOne2, incExpByTwo2;
delay1 u21 (.clk(clk), .ce(ce), .i(xInf1c), .o(xInf2c));
delay1 u22 (.clk(clk), .ce(ce), .i(xInf1b), .o(xInf2b));
delay2 #(EMSB+1) u23 (.clk(clk), .ce(ce), .i(xo0), .o(xo2));
delay2 u24 (.clk(clk), .ce(ce), .i(under_i), .o(under2));
 
always @(posedge clk)
if (ce) incExpByTwo2 <= !xInf1b & i1[FX];
always @(posedge clk)
if (ce) incExpByOne2 <= !xInf1a & i1[FX-1];
 
// ----------------------------------------------------------------------------
// Clock #3
// - increment exponent
// - detect a zero mantissa
// ----------------------------------------------------------------------------
 
wire incExpByTwo3;
wire incExpByOne3;
wire [FX:0] i3;
reg [EMSB:0] xo3;
reg zeroMan3;
delay1 u31 (.clk(clk), .ce(ce), .i(incExpByTwo2), .o(incExpByTwo3));
delay1 u32 (.clk(clk), .ce(ce), .i(incExpByOne2), .o(incExpByOne3));
delay3 #(FX+1) u33 (.clk(clk), .ce(ce), .i(i[FX:0]), .o(i3));
wire [EMSB+1:0] xv3a = xo2 + {incExpByTwo2,1'b0};
wire [EMSB+1:0] xv3b = xo2 + incExpByOne2;
 
always @(posedge clk)
if (ce) xo3 <= xo2 + (incExpByTwo2 ? 2'd2 : incExpByOne2 ? 2'd1 : 2'd0);
 
always @(posedge clk)
if(ce) zeroMan3 <= ((xv3b[EMSB+1]|| &xv3b[EMSB:0])||(xv3a[EMSB+1]| &xv3a[EMSB:0]))
&& !under2 && !xInf2c;
 
// ----------------------------------------------------------------------------
// Clock #4
// - Shift mantissa left
// - If infinity is reached then set the mantissa to zero
// shift mantissa left to reduce to a single whole digit
// - create sticky bit
// ----------------------------------------------------------------------------
 
reg [FMSB+4:0] mo4;
reg inexact4;
 
always @(posedge clk)
if(ce)
casez({zeroMan3,incExpByTwo3,incExpByOne3})
3'b1??: mo4 <= 1'd0;
3'b01?: mo4 <= {i3[FX:FMSB+1],|i3[FMSB:0]};
3'b001: mo4 <= {i3[FX-1:FMSB],|i3[FMSB-1:0]};
default: mo4 <= {i3[FX-2:FMSB-1],|i3[FMSB-2:0]};
endcase
 
always @(posedge clk)
if(ce)
casez({zeroMan3,incExpByTwo3,incExpByOne3})
3'b1??: inexact4 <= 1'd0;
3'b01?: inexact4 <= |i3[FMSB:0];
3'b001: inexact4 <= |i3[FMSB-1:0];
default: inexact4 <= |i3[FMSB-2:0];
endcase
 
// ----------------------------------------------------------------------------
// Clock edge #5
// - count leading zeros
// ----------------------------------------------------------------------------
wire [7:0] leadingZeros5;
wire [EMSB:0] xo5;
wire xInf5;
delay2 #(EMSB+1) u51 (.clk(clk), .ce(ce), .i(xo3), .o(xo5));
delay3 #(1) u52 (.clk(clk), .ce(ce), .i(xInf2c), .o(xInf5) );
 
generate
begin
if (FPWID <= 32) begin
cntlz32Reg clz0 (.clk(clk), .ce(ce), .i({mo4,5'b0}), .o(leadingZeros5) );
assign leadingZeros5[7:6] = 2'b00;
end
else if (FPWID<=64) begin
assign leadingZeros5[7] = 1'b0;
cntlz64Reg clz0 (.clk(clk), .ce(ce), .i({mo4,8'h0}), .o(leadingZeros5) );
end
else if (FPWID<=80) begin
assign leadingZeros5[7] = 1'b0;
cntlz80Reg clz0 (.clk(clk), .ce(ce), .i({mo4,12'b0}), .o(leadingZeros5) );
end
else if (FPWID<=84) begin
assign leadingZeros5[7] = 1'b0;
cntlz96Reg clz0 (.clk(clk), .ce(ce), .i({mo4,24'b0}), .o(leadingZeros5) );
end
else if (FPWID<=96) begin
assign leadingZeros5[7] = 1'b0;
cntlz96Reg clz0 (.clk(clk), .ce(ce), .i({mo4,12'b0}), .o(leadingZeros5) );
end
else if (FPWID<=128)
cntlz128Reg clz0 (.clk(clk), .ce(ce), .i({mo4,12'b0}), .o(leadingZeros5) );
end
endgenerate
 
 
// ----------------------------------------------------------------------------
// Clock edge #6
// - Compute how much we want to decrement exponent by
// - compute amount to shift left and right
// - at infinity the exponent can't be incremented, so we can't shift right
// otherwise it was an underflow situation so the exponent was negative
// shift amount needs to be negated for shift register
// If the exponent underflowed, then the shift direction must be to the
// right regardless of mantissa bits; the number is denormalized.
// Otherwise the shift direction must be to the left.
// ----------------------------------------------------------------------------
reg [7:0] lshiftAmt6;
reg [7:0] rshiftAmt6;
wire rightOrLeft6; // 0=left,1=right
wire xInf6;
wire [EMSB:0] xo6;
wire [FMSB+4:0] mo6;
wire zeroMan6;
vtdl #(1) u61 (.clk(clk), .ce(ce), .a(4'd5), .d(under_i), .q(rightOrLeft6) );
delay1 #(EMSB+1) u62 (.clk(clk), .ce(ce), .i(xo5), .o(xo6));
delay2 #(FMSB+5) u63 (.clk(clk), .ce(ce), .i(mo4), .o(mo6) );
delay1 #(1) u64 (.clk(clk), .ce(ce), .i(xInf5), .o(xInf6) );
delay3 u65 (.clk(clk), .ce(ce), .i(zeroMan3), .o(zeroMan6));
 
always @(posedge clk)
if (ce) lshiftAmt6 <= leadingZeros5 > xo5 ? xo5 : leadingZeros5;
 
always @(posedge clk)
if (ce) rshiftAmt6 <= xInf5 ? 1'd0 : $signed(xo5) > 1'd0 ? 1'd0 : ~xo5+2'd1; // xo2 is negative !
 
// ----------------------------------------------------------------------------
// Clock edge #7
// - fogure exponent
// - shift mantissa
// ----------------------------------------------------------------------------
 
reg [EMSB:0] xo7;
wire rightOrLeft7;
reg [FMSB+4:0] mo7l, mo7r;
delay1 u71 (.clk(clk), .ce(ce), .i(rightOrLeft6), .o(rightOrLeft7));
 
always @(posedge clk)
if (ce)
xo7 <= zeroMan6 ? xo6 :
xInf6 ? xo6 : // an infinite exponent is either a NaN or infinity; no need to change
rightOrLeft6 ? 1'd0 : // on a right shift, the exponent was negative, it's being made to zero
xo6 - lshiftAmt6; // on a left shift, the exponent can't be decremented below zero
 
always @(posedge clk)
if (ce) mo7r <= mo6 >> rshiftAmt6;
always @(posedge clk)
if (ce) mo7l <= mo6 << lshiftAmt6;
 
 
// ----------------------------------------------------------------------------
// Clock edge #8
// - select mantissa
// ----------------------------------------------------------------------------
 
wire so;
wire [EMSB:0] xo;
reg [FMSB+4:0] mo;
vtdl #(1) u81 (.clk(clk), .ce(ce), .a(4'd7), .d(so0), .q(so) );
delay1 #(EMSB+1) u82 (.clk(clk), .ce(ce), .i(xo7), .o(xo));
vtdl u83 (.clk(clk), .ce(ce), .a(4'd3), .d(inexact4), .q(inexact_o));
delay1 u84 (.clk(clk), .ce(ce), .i(rightOrLeft7), .o(under_o));
 
always @(posedge clk)
if (ce) mo <= rightOrLeft7 ? mo7r : mo7l;
 
assign o = {so,xo,mo[FMSB+4:1]};
 
endmodule
/fpRound.sv
0,0 → 1,179
// ============================================================================
// __
// \\__/ o\ (C) 2006-2020 Robert Finch, Waterloo
// \ __ / All rights reserved.
// \/_// robfinch<remove>@finitron.ca
// ||
//
// fpRound.sv
// - floating point rounding unit
// - parameterized width
// - IEEE 754 representation
//
//
// This source file is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published
// by the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This source file is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//
// ============================================================================
 
import fp::*;
 
module fpRound(clk, ce, rm, i, o);
input clk;
input ce;
input [2:0] rm; // rounding mode
input [MSB+3:0] i; // intermediate format input
output [MSB:0] o; // rounded output
 
//------------------------------------------------------------
// variables
wire so;
wire [EMSB:0] xo;
reg [FMSB:0] mo;
reg [EMSB:0] xo1;
reg [FMSB+3:0] mo1;
wire xInf = &i[MSB+2:FMSB+4];
wire so0 = i[MSB+3];
assign o = {so,xo,mo};
 
wire g = i[2]; // guard bit: always the same bit for all operations
wire r = i[1]; // rounding bit
wire s = i[0]; // sticky bit
reg rnd;
 
//------------------------------------------------------------
// Clock #1
// - determine round amount (add 1 or 0)
//------------------------------------------------------------
 
`ifdef MIN_LATENCY
always @*
`else
always @(posedge clk)
`endif
if (ce) xo1 <= i[MSB+2:FMSB+4];
`ifdef MIN_LATENCY
always @*
`else
always @(posedge clk)
`endif
if (ce) mo1 <= i[FMSB+3:0];
 
// Compute the round bit
// Infinities and NaNs are not rounded!
`ifdef MIN_LATENCY
always @*
`else
always @(posedge clk)
`endif
if (ce)
casez ({xInf,rm})
4'b0000: rnd <= (g & r) | (r & s); // round to nearest even
4'b0001: rnd <= 1'd0; // round to zero (truncate)
4'b0010: rnd <= (r | s) & !so0; // round towards +infinity
4'b0011: rnd <= (r | s) & so0; // round towards -infinity
4'b0100: rnd <= (r | s); // round to nearest away from zero
4'b1???: rnd <= 1'd0; // no rounding if exponent indicates infinite or NaN
default: rnd <= 0;
endcase
 
//------------------------------------------------------------
// Clock #2
// round the number, check for carry
// note: inf. exponent checked above (if the exponent was infinite already, then no rounding occurs as rnd = 0)
// note: exponent increments if there is a carry (can only increment to infinity)
//------------------------------------------------------------
 
reg [MSB:0] rounded2;
reg carry2;
reg rnd2;
reg dn2;
wire [EMSB:0] xo2;
wire [MSB:0] rounded1 = {xo1,mo1[FMSB+3:2]} + rnd;
`ifdef MIN_LATENCY
always @*
`else
always @(posedge clk)
`endif
if (ce) rounded2 <= rounded1;
`ifdef MIN_LATENCY
always @*
`else
always @(posedge clk)
`endif
if (ce) carry2 <= mo1[FMSB+3] & !rounded1[FMSB+1];
`ifdef MIN_LATENCY
always @*
`else
always @(posedge clk)
`endif
if (ce) rnd2 <= rnd;
`ifdef MIN_LATENCY
always @*
`else
always @(posedge clk)
`endif
if (ce) dn2 <= !(|xo1);
assign xo2 = rounded2[MSB:FMSB+2];
 
//------------------------------------------------------------
// Clock #3
// - shift mantissa if required.
//------------------------------------------------------------
`ifdef MIN_LATENCY
assign so = i[MSB+3];
assign xo = xo2;
`else
delay3 #(1) u21 (.clk(clk), .ce(ce), .i(i[MSB+3]), .o(so));
delay1 #(EMSB+1) u22 (.clk(clk), .ce(ce), .i(xo2), .o(xo));
`endif
 
`ifdef MIN_LATENCY
always @*
`else
always @(posedge clk)
`endif
casez({rnd2,&xo2,carry2,dn2})
4'b0??0: mo <= mo1[FMSB+2:2]; // not rounding, not denormalized, => hide MSB
4'b0??1: mo <= mo1[FMSB+3:3]; // not rounding, denormalized
4'b1000: mo <= rounded2[FMSB :0]; // exponent didn't change, number was normalized, => hide MSB,
4'b1001: mo <= rounded2[FMSB+1:1]; // exponent didn't change, but number was denormalized, => retain MSB
4'b1010: mo <= rounded2[FMSB+1:1]; // exponent incremented (new MSB generated), number was normalized, => hide 'extra (FMSB+2)' MSB
4'b1011: mo <= rounded2[FMSB+1:1]; // exponent incremented (new MSB generated), number was denormalized, number became normalized, => hide 'extra (FMSB+2)' MSB
4'b11??: mo <= 1'd0; // number became infinite, no need to check carry etc., rnd would be zero if input was NaN or infinite
endcase
 
endmodule
 
 
// Round and register the output
/*
module fpRoundReg(clk, ce, rm, i, o);
parameter WID = 128;
`include "fpSize.sv"
 
input clk;
input ce;
input [2:0] rm; // rounding mode
input [MSB+3:0] i; // expanded format input
output reg [WID-1:0] o; // rounded output
 
wire [WID-1:0] o1;
fpRound #(WID) u1 (.rm(rm), .i(i), .o(o1) );
 
always @(posedge clk)
if (ce)
o <= o1;
 
endmodule
*/
/fpSqrt.sv
0,0 → 1,162
// ============================================================================
// __
// \\__/ o\ (C) 2018-2020 Robert Finch, Waterloo
// \ __ / All rights reserved.
// \/_// robfinch<remove>@finitron.ca
// ||
//
// fpSqrt.v
// - floating point square root
// - parameterized width
// - IEEE 754 representation
//
//
// This source file is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published
// by the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This source file is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//
// Floating Point Multiplier / Divider
//
// ============================================================================
 
import fp::*;
 
module fpSqrt(rst, clk, ce, ld, a, o, done, sqrinf, sqrneg);
localparam pShiftAmt =
FPWID==80 ? 48 :
FPWID==64 ? 36 :
FPWID==32 ? 7 : (FMSB+1-16);
input rst;
input clk;
input ce;
input ld;
input [MSB:0] a;
output reg [EX:0] o;
output done;
output sqrinf;
output sqrneg;
 
// registered outputs
reg sign_exe;
reg inf;
reg overflow;
reg underflow;
 
wire so;
wire [EMSB:0] xo;
wire [FX:0] mo;
 
// constants
wire [EMSB:0] infXp = {EMSB+1{1'b1}}; // infinite / NaN - all ones
// The following is the value for an exponent of zero, with the offset
// eg. 8'h7f for eight bit exponent, 11'h7ff for eleven bit exponent, etc.
wire [EMSB:0] bias = {1'b0,{EMSB{1'b1}}}; //2^0 exponent
// The following is a template for a quiet nan. (MSB=1)
wire [FMSB:0] qNaN = {1'b1,{FMSB{1'b0}}};
 
// variables
wire [EMSB+2:0] ex1; // sum of exponents
wire [FX:0] sqrto;
 
// Operands
wire sa; // sign bit
wire [EMSB:0] xa; // exponent bits
wire [FMSB+1:0] fracta;
wire a_dn; // a/b is denormalized
wire az;
wire aInf;
wire aNan;
wire done1;
wire [7:0] lzcnt;
wire [MSB:0] aa;
 
// -----------------------------------------------------------
// - decode the input operand
// - derive basic information
// - calculate exponent
// - calculate fraction
// -----------------------------------------------------------
 
fpDecompReg u1
(
.clk(clk),
.ce(ce),
.i(a),
.o(aa),
.sgn(sa),
.exp(xa),
.fract(fracta),
.xz(a_dn),
.vz(az),
.inf(aInf),
.nan(aNan)
);
 
assign ex1 = xa + 8'd1;
assign so = 1'b0; // square root of positive numbers only
assign xo = (ex1 >> 1) + (bias >> 1); // divide by 2 cuts the bias in half, so 1/2 of it is added back in.
assign mo = aNan ? {1'b1,aa[FMSB:0],{FMSB+1{1'b0}}} : (sqrto << pShiftAmt);
assign sqrinf = aInf;
assign sqrneg = !az & so;
 
wire [FMSB+2:0] fracta1 = ex1[0] ? {1'b0,fracta} << 1 : {2'b0,fracta};
 
wire ldd;
delay1 #(1) u3 (.clk(clk), .ce(ce), .i(ld), .o(ldd));
 
isqrt #(FX+1) u2
(
.rst(rst),
.clk(clk),
.ce(ce),
.ld(ldd),
.a({1'b0,fracta1,{FMSB+1{1'b0}}}),
.o(sqrto),
.done(done)
);
 
always @*
casez({aNan,sqrinf,sqrneg})
3'b1??: o <= {sa,xa,mo};
3'b01?: o <= {sa,1'b1,qNaN|`QSQRTINF,{FMSB+1{1'b0}}};
3'b001: o <= {sa,1'b1,qNaN|`QSQRTNEG,{FMSB+1{1'b0}}};
default: o <= {so,xo,mo};
endcase
 
endmodule
 
module fpSqrtnr(rst, clk, ce, ld, a, o, rm, done, inf, sqrinf, sqrneg);
input rst;
input clk;
input ce;
input ld;
input [MSB:0] a;
output [MSB:0] o;
input [2:0] rm;
output done;
output inf;
output sqrinf;
output sqrneg;
 
wire [EX:0] o1;
wire inf1;
wire [MSB+3:0] fpn0;
wire done1;
 
fpSqrt #(FPWID) u1 (rst, clk, ce, ld, a, o1, done1, sqrinf, sqrneg);
fpNormalize #(FPWID) u2(.clk(clk), .ce(ce), .under_i(1'b0), .i(o1), .o(fpn0) );
fpRound #(FPWID) u3(.clk(clk), .ce(ce), .rm(rm), .i(fpn0), .o(o) );
delay2 #(1) u5(.clk(clk), .ce(ce), .i(inf1), .o(inf));
delay2 #(1) u8(.clk(clk), .ce(ce), .i(done1), .o(done));
endmodule
 
/fpToPosit.sv
0,0 → 1,114
// ============================================================================
// __
// \\__/ o\ (C) 2020 Robert Finch, Waterloo
// \ __ / All rights reserved.
// \/_// robfinch<remove>@finitron.ca
// ||
//
// fpToPosit.v
// - floating point to posit number convertor
// - can issue every clock cycle
// - parameterized width
// - IEEE 754 representation
//
// Parts of this code originated from FP_to_Posit.v by Manish Kumar Jaiswal
//
// This source file is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published
// by the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This source file is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//
// ============================================================================
 
`include "positConfig.sv"
`include "fpConfig.sv"
`include "fpTypes.sv"
 
module fpToPosit(i, o);
parameter FPWID = 32;
`include "fpSize.sv"
`include "positSize.sv"
input [FPWID-1:0] i;
output reg [FPWID-1:0] o;
 
parameter BIAS = {1'b0,{EMSB{1'b1}}};
localparam N = FPWID;
localparam E = EMSB+1;
localparam Bs = $clog2(FPWID-1);
 
// operands sign,exponent,significand
wire sa;
wire [EMSB:0] xa;
wire [FMSB:0] ma;
wire [FMSB+1:0] fracta;
wire adn;
wire az;
wire xainf;
wire aInf;
wire aNan;
 
fpDecomp #(FPWID) u1 (.i(i), .sgn(sa), .exp(xa), .man(ma), .fract(fracta), .xz(adn), .vz(az), .xinf(xaInf), .inf(aInf), .nan(aNan) );
assign sgno = sa;
wire [$clog2(FMSB+1):0] lzcnt;
generate begin : gCntlz
case(FPWID)
16: begin cntlz16 u2 ({fracta,5'h1f},lzcnt); end //1-5-10
20: begin cntlz16 u2 ({fracta,2'h3},lzcnt); end //1-6-13
32: begin cntlz32 u2 ({fracta,8'hFF},lzcnt); end // 1-8-23
40: begin cntlz32 u2 ({fracta,2'h3},lzcnt); end // 1-10-29
52: begin cntlz48 u2 ({fracta,7'h7F},lzcnt); end // 1-11-40
64: begin cntlz64 u2 ({fracta,11'h7FF},lzcnt); end // 1-11-52
80: begin cntlz80 u2 ({fracta,15'h7FFF},lzcnt); end // 1-15-64
default:
always @*
begin
$display("fpToPosit: Unsupported size");
$finish;
end
endcase
end
endgenerate
 
wire [N-1:0] sig_tmp = {fracta,{E{1'b0}}} << lzcnt;
 
// Convert exponent to twos complement from BIAS offset
wire [E:0] exp = xa - BIAS - lzcnt;
wire sxp = exp[E]; // get exponent sign
wire [E:0] absexp = sxp ? -exp : exp; // get absolute value
wire [es-1:0] e_o = (sxp & |absexp[es-1:0]) ? exp[es-1:0] : absexp[es-1:0];
wire [E-es-1:0] r_o = (~sxp || (sxp & |absexp[es-1:0])) ? {{Bs{1'b0}},absexp[E-1:es]} + 1'b1 : {{Bs{1'b0}},absexp[E-1:es]};
// Exponent and Significand Packing
wire [2*N-1:0] tmp = {{N{~sxp}},sxp,e_o,sig_tmp[N-2:es]};
 
// Including Regime bits in Exponent-Significand Packing
wire [Bs-1:0] diff_b;
 
generate begin : gDiffb
if (E-es > Bs)
assign diff_b = |r_o[E-es-1:Bs] ? {{(Bs-2){1'b1}},2'b01} : r_o[Bs-1:0];
else
assign diff_b = r_o;
end
endgenerate
 
wire [2*N-1:0] tmp1 = tmp >> diff_b;
wire [N-1:0] tmp1s = sa ? -tmp1[N-1:0] : tmp1[N-1:0];
 
always @*
casez({az,aInf,aNan,~sig_tmp[N-1]})
4'b1???: o = {FPWID{1'b0}};
4'b01??: o = {1'b1,{FPWID-1{1'b0}}};
4'b001?: o = {1'b1,{FPWID-1{1'b0}}};
4'b0001: o = {1'b1,{FPWID-1{1'b0}}};
default: o = {sa,tmp1s[N-1:1]};
endcase
 
endmodule
/i2f.sv
0,0 → 1,107
// ============================================================================
// __
// \\__/ o\ (C) 2006-2020 Robert Finch, Waterloo
// \ __ / All rights reserved.
// \/_// robfinch<remove>@finitron.ca
// ||
//
// i2f.sv
// - convert integer to floating point
// - parameterized width
// - IEEE 754 representation
// - pipelineable
// - single cycle latency
//
// This source file is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published
// by the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This source file is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//
// ============================================================================
 
import fp::*;
 
module i2f (clk, ce, op, rm, i, o);
input clk;
input ce;
input op; // 1 = signed, 0 = unsigned
input [2:0] rm; // rounding mode
input [FPWID-1:0] i; // integer input
output [FPWID-1:0] o; // float output
 
wire [EMSB:0] zeroXp = {EMSB{1'b1}};
 
wire iz; // zero input ?
wire [MSB:0] imag; // get magnitude of i
wire [MSB:0] imag1 = (op & i[MSB]) ? -i : i;
wire [7:0] lz; // count the leading zeros in the number
wire [EMSB:0] wd; // compute number of whole digits
wire so; // copy the sign of the input (easy)
wire [2:0] rmd;
 
delay1 #(3) u0 (.clk(clk), .ce(ce), .i(rm), .o(rmd) );
delay1 #(1) u1 (.clk(clk), .ce(ce), .i(i==0), .o(iz) );
delay1 #(FPWID) u2 (.clk(clk), .ce(ce), .i(imag1), .o(imag) );
delay1 #(1) u3 (.clk(clk), .ce(ce), .i(i[MSB]), .o(so) );
generate
if (FPWID==128) begin
cntlz128Reg u4 (.clk(clk), .ce(ce), .i(imag1), .o(lz) );
end else if (FPWID==96) begin
cntlz96Reg u4 (.clk(clk), .ce(ce), .i(imag1), .o(lz[6:0]) );
assign lz[7]=1'b0;
end else if (FPWID==84) begin
cntlz96Reg u4 (.clk(clk), .ce(ce), .i({imag1,12'hfff}), .o(lz[6:0]) );
assign lz[7]=1'b0;
end else if (FPWID==80) begin
cntlz80Reg u4 (.clk(clk), .ce(ce), .i(imag1), .o(lz[6:0]) );
assign lz[7]=1'b0;
end else if (FPWID==64) begin
cntlz64Reg u4 (.clk(clk), .ce(ce), .i(imag1), .o(lz[6:0]) );
assign lz[7]=1'b0;
end else if (FPWID==32) begin
cntlz32Reg u4 (.clk(clk), .ce(ce), .i(imag1), .o(lz[5:0]) );
assign lz[7:6]=2'b00;
end else begin
always @* begin
$display("Uncoded leading zero count in i2f");
$finish;
end
end
endgenerate
 
assign wd = zeroXp - 1 + FPWID - lz; // constant except for lz
 
wire [EMSB:0] xo = iz ? 0 : wd;
wire [MSB:0] simag = imag << lz; // left align number
 
wire g = simag[EMSB+2]; // guard bit (lsb)
wire r = simag[EMSB+1]; // rounding bit
wire s = |simag[EMSB:0]; // "sticky" bit
reg rnd;
 
// Compute the round bit
always @(rmd,g,r,s,so)
case (rmd)
3'd0: rnd = (g & r) | (r & s); // round to nearest even
3'd1: rnd = 0; // round to zero (truncate)
3'd2: rnd = (r | s) & !so; // round towards +infinity
3'd3: rnd = (r | s) & so; // round towards -infinity
3'd4: rnd = (r | s);
default: rnd = (g & r) | (r & s); // round to nearest even
endcase
 
// "hide" the leading one bit = MSB-1
// round the result
wire [FMSB:0] mo = simag[MSB-1:EMSB+1]+rnd;
 
assign o = {op & so,xo,mo};
 
endmodule
/positCntlo.sv
0,0 → 1,45
`include "positConfig.sv"
// ============================================================================
// __
// \\__/ o\ (C) 2020 Robert Finch, Waterloo
// \ __ / All rights reserved.
// \/_// robfinch<remove>@finitron.ca
// ||
//
// positCntlo.sv
//
// This source file is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published
// by the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This source file is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//
// ============================================================================
//
module positCntlo(i, o);
parameter PSTWID = `PSTWID;
input [PSTWID-2:0] i;
output [$clog2(PSTWID-2):0] o;
 
generate begin : gClz
case(PSTWID)
16: cntlo16 u1 (.i({i,1'b1}), .o(o));
20: cntlo24 u1 (.i({i,1'b1,4'hF}), .o(o));
32: cntlo32 u1 (.i({i,1'b1}), .o(o));
40: cntlo48 u1 (.i({i,1'b1,8'hFF}), .o(o));
52: cntlo64 u1 (.i({i,1'b1,12'hFFF}), .o(o));
64: cntlo64 u1 (.i({i,1'b1}), .o(o));
80: cntlo80 u1 (.i({i,1'b1}), .o(o));
default: ;
endcase
end
endgenerate
 
endmodule
/positCntlz.sv
0,0 → 1,45
`include "positConfig.sv"
// ============================================================================
// __
// \\__/ o\ (C) 2020 Robert Finch, Waterloo
// \ __ / All rights reserved.
// \/_// robfinch<remove>@finitron.ca
// ||
//
// positCntlz.sv
//
// This source file is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published
// by the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This source file is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//
// ============================================================================
//
module positCntlz(i, o);
parameter PSTWID = `PSTWID;
input [PSTWID-2:0] i;
output [$clog2(PSTWID-2):0] o;
 
generate begin : gClz
case(PSTWID)
16: cntlz16 u1 (.i({i,1'b1}), .o(o));
20: cntlz24 u1 (.i({i,1'b1,4'hF}), .o(o));
32: cntlz32 u1 (.i({i,1'b1}), .o(o));
40: cntlz48 u1 (.i({i,1'b1,8'hFF}), .o(o));
52: cntlz64 u1 (.i({i,1'b1,12'hFFF}), .o(o));
64: cntlz64 u1 (.i({i,1'b1}), .o(o));
80: cntlz80 u1 (.i({i,1'b1}), .o(o));
default: ;
endcase
end
endgenerate
 
endmodule
/positConfig.sv
0,0 → 1,30
// ============================================================================
// __
// \\__/ o\ (C) 2020 Robert Finch, Waterloo
// \ __ / All rights reserved.
// \/_// robfinch<remove>@finitron.ca
// ||
//
// positConfig.sv
//
// This source file is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published
// by the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This source file is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//
// ============================================================================
//
`ifndef POSIT_CONFIG_SV
`define POSIT_CONFIG_SV 1
 
`define PSTWID 32
 
`endif
/positDecompose.sv
0,0 → 1,96
`include "positConfig.sv"
// ============================================================================
// __
// \\__/ o\ (C) 2020 Robert Finch, Waterloo
// \ __ / All rights reserved.
// \/_// robfinch<remove>@finitron.ca
// ||
//
// positDecompose.sv
//
// This source file is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published
// by the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This source file is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//
// ============================================================================
//
`include "positConfig.sv"
 
// Decompose a posit number.
module positDecompose(i, sgn, rgs, rgm, exp, sig, zer, inf);
`include "positSize.sv"
input [PSTWID-1:0] i;
output sgn; // sign of number
output rgs; // sign of regime
output [$clog2(PSTWID)-1:0] rgm; // regime (absolute value)
output [es-1:0] exp; // exponent
output [PSTWID-es-1:0] sig; // significand
output zer; // number is zero
output inf; // number is infinite
 
wire [$clog2(PSTWID-2):0] lzcnt;
wire [$clog2(PSTWID-2):0] locnt;
 
 
assign sgn = i[PSTWID-1];
assign inf = ~|i[PSTWID-2:0] & i[PSTWID-1];
assign zer = ~|i;
wire [PSTWID-1:0] ii = sgn ? -i : i;
assign rgs = ii[PSTWID-2];
 
positCntlz #(PSTWID,es) u1 (.i(ii[PSTWID-2:0]), .o(lzcnt));
positCntlo #(PSTWID,es) u2 (.i(ii[PSTWID-2:0]), .o(locnt));
 
assign rgm = rgs ? locnt - 1 : lzcnt;
wire [$clog2(PSTWID)-1:0] shamt = rgs ? locnt + 2'd1 : lzcnt + 2'd1;
wire [PSTWID-1:0] tmp = ii << shamt;
assign exp = |es ? tmp[PSTWID-2:PSTWID-1-es] : 0;
assign sig = {1'b1,tmp[PSTWID-2-es:0]};
 
endmodule
 
// Decompose posit number and register outputs.
module positDecomposeReg(clk, ce, i, sgn, rgs, rgm, exp, sig, zer, inf);
`include "positSize.sv"
input clk;
input ce;
input [PSTWID-1:0] i;
output reg sgn;
output reg rgs;
output reg [$clog2(PSTWID)-1:0] rgm;
output reg [es-1:0] exp;
output reg [PSTWID-es-1:0] sig;
output reg zer;
output reg inf;
 
wire isgn;
wire irgs;
wire [$clog2(PSTWID)-1:0] irgm;
wire [es-1:0] iexp;
wire [PSTWID-es-1:0] isig;
wire izer;
wire iinf;
 
positDecompose #(PSTWID) u1 (i, isgn, irgs, irgm, iexp, isig, iinf);
 
always @(posedge clk)
if (ce) begin
sgn = isgn;
rgs = irgs;
rgm = irgm;
exp = iexp;
sig = isig;
inf = iinf;
end
 
endmodule
 
/positSize.sv
0,0 → 1,12
parameter PSTWID = `PSTWID;
parameter es =
PSTWID >= 80 ? 4 :
PSTWID >= 64 ? 3 :
PSTWID >= 52 ? 3 :
PSTWID >= 40 ? 3 :
PSTWID >= 32 ? 2 :
PSTWID >= 24 ? 2 :
PSTWID >= 16 ? 1 :
PSTWID >= 8 ? 1 :
0 ;
 
/positToFp.sv
0,0 → 1,75
// ============================================================================
// __
// \\__/ o\ (C) 2020 Robert Finch, Waterloo
// \ __ / All rights reserved.
// \/_// robfinch<remove>@finitron.ca
// ||
//
// positToFp.v
// - posit number to floating point convertor
// - can issue every clock cycle
// - parameterized width
// - IEEE 754 representation
//
// Parts of this code originated from Posit_to_FP.v by Manish Kumar Jaiswal
//
// This source file is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published
// by the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This source file is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//
// ============================================================================
 
`include "positConfig.sv"
`include "fpConfig.sv"
`include "fpTypes.sv"
 
module positToFp(i, o);
parameter FPWID = 32;
`include "fpSize.sv"
`include "positSize.sv"
input [FPWID-1:0] i;
output reg [FPWID-1:0] o;
 
parameter BIAS = {1'b0,{EMSB{1'b1}}};
localparam N = FPWID;
localparam E = EMSB+1;
localparam M = FMSB+1;
localparam Bs = $clog2(FPWID-1);
localparam EO = E > es+Bs ? E : es+Bs;
 
wire sgn;
wire rgs;
wire [Bs-1:0] rgm;
wire [es-1:0] exp;
wire [N-es-1:0] sig;
wire zer;
wire inf;
 
positDecompose #(.PSTWID(PSTWID), .es(es)) u1 (.i(i), .sgn(sgn), .rgs(rgs), .rgm(rgm), .exp(exp), .sig(sig), .zer(zer), .inf(inf));
 
wire [N-1:0] m = {sig,{es{1'b0}}};
wire [EO+1:0] e;
assign e = {(rgs ? {{EO-es-Bs+1{1'b0}},rgm} : -{{EO-es-Bs+1{1'b0}},rgm}),exp} + BIAS;
wire exv = |e[EO:E];
wire exinf = &e[E-1:0];
 
always @*
casez({zer,inf|exv|exinf}) // exponent all ones or exponent overflow?
// convert to +0.0 zero-in zero-out (the sign will always be plus)
2'b1?: o = {sgn,{FPWID-1{1'b0}}};
// Infinity in or exponent overflow in conversion = infinity out
2'b01: o = {sgn,{E-1{1'b1}},{M{1'b0}}};
// Other numbers
default: o = {sgn,e[E-1:0],m[N-2:E]};
endcase
 
endmodule

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.