URL
https://opencores.org/ocsvn/ft816float/ft816float/trunk
Subversion Repositories ft816float
Compare Revisions
- This comparison shows the changes necessary to convert path
/ft816float/trunk/rtl/verilog2
- from Rev 48 to Rev 49
- ↔ Reverse comparison
Rev 48 → Rev 49
/fp.sv
24,6 → 24,10
// |
package fp; |
|
`ifndef FPWID |
`define FPWID 64 |
`endif |
|
`define QINFOS 23'h7FC000 // info |
`define QSUBINF 4'd1 |
`define QINFDIV 4'd2 |
79,8 → 83,11
`define POINT5 3'd4 |
`define ZERO 3'd5 |
|
`define SUPPORT_DENORMALS 1'b1 |
`define MIN_LATENCY 1'b1 |
parameter FPWID = 64; |
|
parameter FPWID = `FPWID; |
|
// This file contains defintions for fields to ease dealing with different fp |
// widths. Some of the code still needs to be modified to support widths |
// other than standard 32,64 or 80 bit. |
/fpAddsub.sv
7,25 → 7,37
// |
// fpAddsub.sv |
// - floating point adder/subtracter |
// - two cycle latency |
// - can issue every clock cycle |
// - parameterized width |
// - IEEE 754 representation |
// |
// |
// This source file is free software: you can redistribute it and/or modify |
// it under the terms of the GNU Lesser General Public License as published |
// by the Free Software Foundation, either version 3 of the License, or |
// (at your option) any later version. |
// |
// This source file is distributed in the hope that it will be useful, |
// but WITHOUT ANY WARRANTY; without even the implied warranty of |
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
// GNU General Public License for more details. |
// |
// You should have received a copy of the GNU General Public License |
// along with this program. If not, see <http://www.gnu.org/licenses/>. |
// |
// BSD 3-Clause License |
// Redistribution and use in source and binary forms, with or without |
// modification, are permitted provided that the following conditions are met: |
// |
// 1. Redistributions of source code must retain the above copyright notice, this |
// list of conditions and the following disclaimer. |
// |
// 2. Redistributions in binary form must reproduce the above copyright notice, |
// this list of conditions and the following disclaimer in the documentation |
// and/or other materials provided with the distribution. |
// |
// 3. Neither the name of the copyright holder nor the names of its |
// contributors may be used to endorse or promote products derived from |
// this software without specific prior written permission. |
// |
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
// |
// ============================================================================ |
|
import fp::*; |
41,41 → 53,36
|
|
// variables |
wire so; // sign output |
wire [EMSB:0] xo; // de normalized exponent output |
reg [EMSB:0] xo1; // de normalized exponent output |
wire [FX:0] mo; // mantissa output |
reg [FX:0] mo1; // mantissa output |
|
assign o = {so,xo,mo}; |
|
// operands sign,exponent,mantissa |
wire sa, sb; |
wire [EMSB:0] xa, xb; |
wire [FMSB:0] ma, mb; |
wire [FMSB+1:0] fracta, fractb; |
wire [FMSB+1:0] fracta1, fractb1; |
|
// which has greater magnitude ? Used for sign calc |
wire xa_gt_xb = xa > xb; |
wire xa_gt_xb1; |
wire a_gt_b = xa_gt_xb || (xa==xb && ma > mb); |
wire a_gt_b1; |
wire az, bz; // operand a,b is zero |
|
wire adn, bdn; // a,b denormalized ? |
wire xaInf, xbInf; |
wire aInf, bInf, aInf1, bInf1; |
wire aNan, bNan, aNan1, bNan1; |
wire aInf, bInf; |
wire aNan, bNan; |
|
wire [EMSB:0] xad = xa|adn; // operand a exponent, compensated for denormalized numbers |
wire [EMSB:0] xbd = xb|bdn; // operand b exponent, compensated for denormalized numbers |
|
fpDecomp u1a (.i(a), .sgn(sa), .exp(xa), .man(ma), .fract(fracta), .xz(adn), .vz(az), .xinf(xaInf), .inf(aInf), .nan(aNan) ); |
fpDecomp u1b (.i(b), .sgn(sb), .exp(xb), .man(mb), .fract(fractb), .xz(bdn), .vz(bz), .xinf(xbInf), .inf(bInf), .nan(bNan) ); |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #1 |
// - decode the input operands |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
reg op1; |
|
// Figure out which operation is really needed an add or |
// subtract ? |
fpDecompReg u1a (.clk(clk), .ce(ce), .i(a), .sgn(sa), .exp(xa), .man(ma), .fract(fracta), .xz(adn), .vz(az), .xinf(xaInf), .inf(aInf), .nan(aNan) ); |
fpDecompReg u1b (.clk(clk), .ce(ce), .i(b), .sgn(sb), .exp(xb), .man(mb), .fract(fractb), .xz(bdn), .vz(bz), .xinf(xbInf), .inf(bInf), .nan(bNan) ); |
always @(posedge clk) |
if (ce) op1 <= op; |
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #2 |
// |
// Figure out which operation is really needed an add or subtract ? |
// If the signs are the same, use the orignal op, |
// otherwise flip the operation |
// a + b = add,+ |
86,14 → 93,90
// a - -b = add,+ |
// -a - b = add,- |
// -a - -b = sub, so of larger |
wire realOp = op ^ sa ^ sb; |
wire realOp1; |
wire op1; |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
reg realOp2; |
reg op2; |
reg [EMSB:0] xa2, xb2; |
reg [FMSB:0] ma2, mb2; |
reg az2, bz2; |
reg xa_gt_xb2; |
reg [FMSB+1:0] fracta2, fractb2; |
reg maneq, ma_gt_mb; |
reg expeq; |
|
always @(posedge clk) |
if (ce) realOp2 = op1 ^ sa ^ sb; |
always @(posedge clk) |
if (ce) op2 <= op1; |
always @(posedge clk) |
if (ce) xa2 <= xad; |
always @(posedge clk) |
if (ce) xb2 <= xbd; |
always @(posedge clk) |
if (ce) ma2 <= ma; |
always @(posedge clk) |
if (ce) mb2 <= mb; |
always @(posedge clk) |
if (ce) fracta2 <= fracta; |
always @(posedge clk) |
if (ce) fractb2 <= fractb; |
always @(posedge clk) |
if (ce) az2 <= az; |
always @(posedge clk) |
if (ce) bz2 <= bz; |
always @(posedge clk) |
if (ce) xa_gt_xb2 <= xad > xbd; |
always @(posedge clk) |
if (ce) maneq <= ma==mb; |
always @(posedge clk) |
if (ce) ma_gt_mb <= ma > mb; |
always @(posedge clk) |
if (ce) expeq <= xad==xbd; |
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #3 |
// |
// Find out if the result will be zero. |
wire resZero = (realOp && xa==xb && ma==mb) || // subtract, same magnitude |
(az & bz); // both a,b zero |
// Determine which fraction to denormalize |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// |
reg [EMSB:0] xa3, xb3; |
reg resZero3; |
wire xaInf3, xbInf3; |
reg xa_gt_xb3; |
reg a_gt_b3; |
reg op3; |
wire sa3, sb3; |
wire [2:0] rm3; |
reg [FMSB+1:0] mfs3; |
|
always @(posedge clk) |
if (ce) resZero3 <= (realOp2 & expeq & maneq) || // subtract, same magnitude |
(az2 & bz2); // both a,b zero |
always @(posedge clk) |
if (ce) xa3 <= xa2; |
always @(posedge clk) |
if (ce) xb3 <= xb2; |
always @(posedge clk) |
if (ce) xa_gt_xb3 <= xa_gt_xb2; |
always @(posedge clk) |
if (ce) a_gt_b3 <= xa_gt_xb2 | (expeq & ma_gt_mb); |
always @(posedge clk) |
if (ce) op3 <= op2; |
always @(posedge clk) |
if (ce) mfs3 = xa_gt_xb2 ? fractb2 : fracta2; |
|
delay #(.WID(1), .DEP(2)) udly3a (.clk(clk), .ce(ce), .i(xaInf), .o(xaInf3)); |
delay #(.WID(1), .DEP(2)) udly3b (.clk(clk), .ce(ce), .i(xbInf), .o(xbInf3)); |
delay #(.WID(1), .DEP(2)) udly3c (.clk(clk), .ce(ce), .i(sa), .o(sa3)); |
delay #(.WID(1), .DEP(2)) udly3d (.clk(clk), .ce(ce), .i(sb), .o(sb3)); |
delay #(.WID(3), .DEP(3)) udly3e (.clk(clk), .ce(ce), .i(rm), .o(rm3)); |
delay #(.WID(1), .DEP(2)) udly3f (.clk(clk), .ce(ce), .i(aInf), .o(aInf3)); |
delay #(.WID(1), .DEP(2)) udly3g (.clk(clk), .ce(ce), .i(bInf), .o(bInf3)); |
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #4 |
// |
// Compute output exponent |
// |
// The output exponent is the larger of the two exponents, |
100,102 → 183,187
// unless a subtract operation is in progress and the two |
// numbers are equal, in which case the exponent should be |
// zero. |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
always @(xaInf,xbInf,resZero,xa,xb,xa_gt_xb) |
xo1 = (xaInf&xbInf) ? xa : resZero ? 0 : xa_gt_xb ? xa : xb; |
reg [EMSB:0] xa4, xb4; |
reg [EMSB:0] xo4; |
reg xa_gt_xb4; |
reg xa4,xb4; |
|
always @(posedge clk) |
if (ce) xa4 <= xa3; |
always @(posedge clk) |
if (ce) xb4 <= xb3; |
always @(posedge clk) |
if (ce) xo4 <= (xaInf3&xbInf3) ? {EMSB+1{1'b1}} : resZero3 ? 0 : xa_gt_xb3 ? xa3 : xb3; |
always @(posedge clk) |
if (ce) xa_gt_xb4 <= xa_gt_xb3; |
|
// Compute output sign |
reg so1; |
reg so4; |
always @* |
case ({resZero,sa,op,sb}) // synopsys full_case parallel_case |
4'b0000: so1 <= 0; // + + + = + |
4'b0001: so1 <= !a_gt_b; // + + - = sign of larger |
4'b0010: so1 <= !a_gt_b; // + - + = sign of larger |
4'b0011: so1 <= 0; // + - - = + |
4'b0100: so1 <= a_gt_b; // - + + = sign of larger |
4'b0101: so1 <= 1; // - + - = - |
4'b0110: so1 <= 1; // - - + = - |
4'b0111: so1 <= a_gt_b; // - - - = sign of larger |
4'b1000: so1 <= 0; // A + B, sign = + |
4'b1001: so1 <= rm==3; // A + -B, sign = + unless rounding down |
4'b1010: so1 <= rm==3; // A - B, sign = + unless rounding down |
4'b1011: so1 <= 0; // +A - -B, sign = + |
4'b1100: so1 <= rm==3; // -A + B, sign = + unless rounding down |
4'b1101: so1 <= 1; // -A + -B, sign = - |
4'b1110: so1 <= 1; // -A - +B, sign = - |
4'b1111: so1 <= rm==3; // -A - -B, sign = + unless rounding down |
case ({resZero3,sa3,op3,sb3}) // synopsys full_case parallel_case |
4'b0000: so4 <= 0; // + + + = + |
4'b0001: so4 <= !a_gt_b3; // + + - = sign of larger |
4'b0010: so4 <= !a_gt_b3; // + - + = sign of larger |
4'b0011: so4 <= 0; // + - - = + |
4'b0100: so4 <= a_gt_b3; // - + + = sign of larger |
4'b0101: so4 <= 1; // - + - = - |
4'b0110: so4 <= 1; // - - + = - |
4'b0111: so4 <= a_gt_b3; // - - - = sign of larger |
4'b1000: so4 <= 0; // A + B, sign = + |
4'b1001: so4 <= rm3==3'd3; // A + -B, sign = + unless rounding down |
4'b1010: so4 <= rm3==3'd3; // A - B, sign = + unless rounding down |
4'b1011: so4 <= 0; // +A - -B, sign = + |
4'b1100: so4 <= rm3==3'd3; // -A + B, sign = + unless rounding down |
4'b1101: so4 <= 1; // -A + -B, sign = - |
4'b1110: so4 <= 1; // -A - +B, sign = - |
4'b1111: so4 <= rm3==3'd3; // -A - -B, sign = + unless rounding down |
endcase |
|
delay2 #(EMSB+1) d1(.clk(clk), .ce(ce), .i(xo1), .o(xo) ); |
delay2 #(1) d2(.clk(clk), .ce(ce), .i(so1), .o(so) ); |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #5 |
// |
// Compute the difference in exponents, provides shift amount |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
reg [EMSB+1:0] xdiff5; |
always @(posedge clk) |
if (ce) xdiff5 <= xa_gt_xb4 ? xa4 - xb4 : xb4 - xa4; |
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #6 |
// |
// Compute the difference in exponents, provides shift amount |
wire [EMSB:0] xdiff = xa_gt_xb ? xad - xbd : xbd - xad; |
wire [6:0] xdif = xdiff > FMSB+3 ? FMSB+3 : xdiff; |
wire [6:0] xdif1; |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// If the difference in the exponent is 128 or greater (assuming 128 bit fp or |
// less) then all of the bits will be shifted out to zero. There is no need to |
// keep track of a difference more than 128. |
reg [7:0] xdif6; |
wire [FMSB+1:0] mfs6; |
always @(posedge clk) |
if (ce) xdif6 <= xdiff5 > FMSB+4 ? FMSB+4 : xdiff5; |
delay #(.WID(FMSB+2), .DEP(3)) udly6a (.clk(clk), .ce(ce), .i(mfs3), .o(mfs6)); |
|
// determine which fraction to denormalize |
wire [FMSB+1:0] mfs = xa_gt_xb ? fractb : fracta; |
wire [FMSB+1:0] mfs1; |
|
// Determine the sticky bit |
wire sticky, sticky1; |
generate |
begin |
if (FPWID==128) |
redor128 u1 (.a(xdif), .b({mfs,2'b0}), .o(sticky) ); |
else if (FPWID==96) |
redor96 u1 (.a(xdif), .b({mfs,2'b0}), .o(sticky) ); |
else if (FPWID==84) |
redor84 u1 (.a(xdif), .b({mfs,2'b0}), .o(sticky) ); |
else if (FPWID==80) |
redor80 u1 (.a(xdif), .b({mfs,2'b0}), .o(sticky) ); |
else if (FPWID==64) |
redor64 u1 (.a(xdif), .b({mfs,2'b0}), .o(sticky) ); |
else if (FPWID==32) |
redor32 u1 (.a(xdif), .b({mfs,2'b0}), .o(sticky) ); |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #7 |
// |
// Determine the sticky bit. The sticky bit is the bitwise or of all the bits |
// being shifted out the right side. The sticky bit is computed here to |
// reduce the number of regs required. |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
reg sticky6; |
wire sticky7; |
wire [7:0] xdif7; |
wire [FMSB+1:0] mfs7; |
integer n; |
always @* begin |
sticky6 = 1'b0; |
for (n = 0; n < FMSB+2; n = n + 1) |
if (n <= xdif6) |
sticky6 = sticky6|mfs6[n]; |
end |
endgenerate |
|
// register inputs to shifter and shift |
delay1 #(1) d16(.clk(clk), .ce(ce), .i(sticky), .o(sticky1) ); |
delay1 #(7) d15(.clk(clk), .ce(ce), .i(xdif), .o(xdif1) ); |
delay1 #(FMSB+2) d14(.clk(clk), .ce(ce), .i(mfs), .o(mfs1) ); |
delay1 #(1) d16(.clk(clk), .ce(ce), .i(sticky6), .o(sticky7) ); |
delay1 #(8) d15(.clk(clk), .ce(ce), .i(xdif6), .o(xdif7) ); |
delay1 #(FMSB+2) d14(.clk(clk), .ce(ce), .i(mfs6), .o(mfs7) ); |
|
wire [FMSB+3:0] md1 = ({mfs1,2'b0} >> xdif1)|sticky1; |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #8 |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
reg [FMSB+4:0] md8; |
wire [FMSB+1:0] fracta8, fractb8; |
wire xa_gt_xb8; |
wire a_gt_b8; |
always @(posedge clk) |
if (ce) md8 <= ({mfs7,3'b0} >> xdif7)|sticky7; |
|
// sync control signals |
delay1 #(1) d4 (.clk(clk), .ce(ce), .i(xa_gt_xb), .o(xa_gt_xb1) ); |
delay1 #(1) d17(.clk(clk), .ce(ce), .i(a_gt_b), .o(a_gt_b1) ); |
delay1 #(1) d5 (.clk(clk), .ce(ce), .i(realOp), .o(realOp1) ); |
delay1 #(FMSB+2) d5a(.clk(clk), .ce(ce), .i(fracta), .o(fracta1) ); |
delay1 #(FMSB+2) d6a(.clk(clk), .ce(ce), .i(fractb), .o(fractb1) ); |
delay1 #(1) d7 (.clk(clk), .ce(ce), .i(aInf), .o(aInf1) ); |
delay1 #(1) d8 (.clk(clk), .ce(ce), .i(bInf), .o(bInf1) ); |
delay1 #(1) d9 (.clk(clk), .ce(ce), .i(aNan), .o(aNan1) ); |
delay1 #(1) d10(.clk(clk), .ce(ce), .i(bNan), .o(bNan1) ); |
delay1 #(1) d11(.clk(clk), .ce(ce), .i(op), .o(op1) ); |
delay #(.WID(1), .DEP(4)) udly8a (.clk(clk), .ce(ce), .i(xa_gt_xb4), .o(xa_gt_xb8)); |
delay #(.WID(1), .DEP(5)) udly8b (.clk(clk), .ce(ce), .i(a_gt_b3), .o(a_gt_b8)); |
delay #(.WID(FMSB+2), .DEP(6)) udly8d (.clk(clk), .ce(ce), .i(fracta2), .o(fracta8)); |
delay #(.WID(FMSB+2), .DEP(6)) udly8e (.clk(clk), .ce(ce), .i(fractb2), .o(fractb8)); |
delay #(.WID(1), .DEP(5)) udly8j (.clk(clk), .ce(ce), .i(op3), .o(op8)); |
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #9 |
// Sort operands and perform add/subtract |
// addition can generate an extra bit, subtract can't go negative |
wire [FMSB+3:0] oa = xa_gt_xb1 ? {fracta1,2'b0} : md1; |
wire [FMSB+3:0] ob = xa_gt_xb1 ? md1 : {fractb1,2'b0}; |
wire [FMSB+3:0] oaa = a_gt_b1 ? oa : ob; |
wire [FMSB+3:0] obb = a_gt_b1 ? ob : oa; |
wire [FMSB+4:0] mab = realOp1 ? oaa - obb : oaa + obb; |
wire xoinf = &xo; |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
reg [FMSB+4:0] oa9, ob9; |
reg a_gt_b9; |
always @(posedge clk) |
if (ce) oa9 <= xa_gt_xb8 ? {fracta8,3'b0} : md8; |
always @(posedge clk) |
if (ce) ob9 <= xa_gt_xb8 ? md8 : {fractb8,3'b0}; |
always @(posedge clk) |
if (ce) a_gt_b9 <= a_gt_b8; |
|
always @* |
casez({aInf1&bInf1,aNan1,bNan1,xoinf}) |
4'b1???: mo1 = {1'b0,op1,{FMSB-1{1'b0}},op1,{FMSB{1'b0}}}; // inf +/- inf - generate QNaN on subtract, inf on add |
4'b01??: mo1 = {1'b0,fracta1[FMSB+1:0],{FMSB{1'b0}}}; |
4'b001?: mo1 = {1'b0,fractb1[FMSB+1:0],{FMSB{1'b0}}}; |
4'b0001: mo1 = 1'd0; |
default: mo1 = {mab,{FMSB-1{1'b0}}}; // mab has an extra lead bit and two trailing bits |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #10 |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
reg [FMSB+4:0] oaa10; |
reg [FMSB+4:0] obb10; |
wire realOp10; |
reg [EMSB:0] xo10; |
|
always @(posedge clk) |
if (ce) oaa10 <= a_gt_b9 ? oa9 : ob9; |
always @(posedge clk) |
if (ce) obb10 <= a_gt_b9 ? ob9 : oa9; |
delay #(.WID(1), .DEP(8)) udly10a (.clk(clk), .ce(ce), .i(realOp2), .o(realOp10)); |
delay #(.WID(EMSB+1), .DEP(6)) udly10b (.clk(clk), .ce(ce), .i(xo4), .o(xo10)); |
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #11 |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
reg [FMSB+5:0] mab11; |
wire [FMSB+1:0] fracta11, fractb11; |
wire abInf11; |
wire aNan11, bNan11; |
reg xoinf11; |
wire op11; |
|
always @(posedge clk) |
if (ce) mab11 <= realOp10 ? oaa10 - obb10 : oaa10 + obb10; |
delay #(.WID(1), .DEP(8)) udly11a (.clk(clk), .ce(ce), .i(aInf3&bInf3), .o(abInf11)); |
delay #(.WID(1), .DEP(10)) udly11c (.clk(clk), .ce(ce), .i(aNan), .o(aNan11)); |
delay #(.WID(1), .DEP(10)) udly11d (.clk(clk), .ce(ce), .i(bNan), .o(bNan11)); |
delay #(.WID(1), .DEP(3)) udly11e (.clk(clk), .ce(ce), .i(op8), .o(op11)); |
delay #(.WID(FMSB+2), .DEP(3)) udly11f (.clk(clk), .ce(ce), .i(fracta8), .o(fracta11)); |
delay #(.WID(FMSB+2), .DEP(3)) udly11g (.clk(clk), .ce(ce), .i(fractb8), .o(fractb11)); |
|
always @(posedge clk) |
if (ce) xoinf11 <= &xo10; |
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #12 |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
reg [FX:0] mo12; // mantissa output |
|
always @(posedge clk) |
if (ce) |
casez({abInf11,aNan11,bNan11,xoinf11}) |
4'b1???: mo12 <= {1'b0,op11,{FMSB-1{1'b0}},op11,{FMSB{1'b0}}}; // inf +/- inf - generate QNaN on subtract, inf on add |
4'b01??: mo12 <= {1'b0,fracta11[FMSB+1:0],{FMSB{1'b0}}}; |
4'b001?: mo12 <= {1'b0,fractb11[FMSB+1:0],{FMSB{1'b0}}}; |
4'b0001: mo12 <= 1'd0; |
default: mo12 <= {mab11,{FMSB-2{1'b0}}}; // mab has an extra lead bit and three trailing bits |
endcase |
|
delay1 #(FX+1) d3(.clk(clk), .ce(ce), .i(mo1), .o(mo) ); |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #13 |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
wire so; // sign output |
wire [EMSB:0] xo; // de normalized exponent output |
wire [FX:0] mo; // mantissa output |
|
delay #(.WID(1), .DEP(9)) udly13a (.clk(clk), .ce(ce), .i(so4), .o(so)); |
delay #(.WID(EMSB+1), .DEP(3)) udly13b (.clk(clk), .ce(ce), .i(xo10), .o(xo)); |
delay #(.WID(FX+1), .DEP(1)) u13c (.clk(clk), .ce(ce), .i(mo12), .o(mo) ); |
|
assign o = {so,xo,mo}; |
|
endmodule |
|
module fpAddsubnr(clk, ce, rm, op, a, b, o); |
210,8 → 378,8
wire [EX:0] o1; |
wire [MSB+3:0] fpn0; |
|
fpAddsub #(FPWID) u1 (clk, ce, rm, op, a, b, o1); |
fpNormalize #(FPWID) u2(.clk(clk), .ce(ce), .under_i(1'b0), .i(o1), .o(fpn0) ); |
fpRound #(FPWID) u3(.clk(clk), .ce(ce), .rm(rm), .i(fpn0), .o(o) ); |
fpAddsub u1 (clk, ce, rm, op, a, b, o1); |
fpNormalize u2(.clk(clk), .ce(ce), .under_i(1'b0), .i(o1), .o(fpn0) ); |
fpRound u3(.clk(clk), .ce(ce), .rm(rm), .i(fpn0), .o(o) ); |
|
endmodule |
/fpDivide.sv
1,31 → 1,44
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2006-2019 Robert Finch, Waterloo |
// \\__/ o\ (C) 2006-2020 Robert Finch, Waterloo |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
// |
// fpDiv.v |
// fpDivide.sv |
// - floating point divider |
// - parameterized width |
// - IEEE 754 representation |
// |
// |
// This source file is free software: you can redistribute it and/or modify |
// it under the terms of the GNU Lesser General Public License as published |
// by the Free Software Foundation, either version 3 of the License, or |
// (at your option) any later version. |
// |
// This source file is distributed in the hope that it will be useful, |
// but WITHOUT ANY WARRANTY; without even the implied warranty of |
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
// GNU General Public License for more details. |
// |
// You should have received a copy of the GNU General Public License |
// along with this program. If not, see <http://www.gnu.org/licenses/>. |
// |
// Floating Point Multiplier / Divider |
// BSD 3-Clause License |
// Redistribution and use in source and binary forms, with or without |
// modification, are permitted provided that the following conditions are met: |
// |
// 1. Redistributions of source code must retain the above copyright notice, this |
// list of conditions and the following disclaimer. |
// |
// 2. Redistributions in binary form must reproduce the above copyright notice, |
// this list of conditions and the following disclaimer in the documentation |
// and/or other materials provided with the distribution. |
// |
// 3. Neither the name of the copyright holder nor the names of its |
// contributors may be used to endorse or promote products derived from |
// this software without specific prior written permission. |
// |
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
// |
// Floating Point Divider |
// |
//Properties: |
//+-inf * +-inf = -+inf (this is handled by exOver) |
//+-inf * 0 = QNaN |
/fpMultiply.sv
7,28 → 7,42
// |
// fpMultiply.v |
// - floating point multiplier |
// - two cycle latency |
// - two cycle latency minimum (latency depends on precision) |
// - can issue every clock cycle |
// - parameterized width |
// - IEEE 754 representation |
// |
// |
// This source file is free software: you can redistribute it and/or modify |
// it under the terms of the GNU Lesser General Public License as published |
// by the Free Software Foundation, either version 3 of the License, or |
// (at your option) any later version. |
// BSD 3-Clause License |
// Redistribution and use in source and binary forms, with or without |
// modification, are permitted provided that the following conditions are met: |
// |
// 1. Redistributions of source code must retain the above copyright notice, this |
// list of conditions and the following disclaimer. |
// |
// 2. Redistributions in binary form must reproduce the above copyright notice, |
// this list of conditions and the following disclaimer in the documentation |
// and/or other materials provided with the distribution. |
// |
// 3. Neither the name of the copyright holder nor the names of its |
// contributors may be used to endorse or promote products derived from |
// this software without specific prior written permission. |
// |
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
// |
// This source file is distributed in the hope that it will be useful, |
// but WITHOUT ANY WARRANTY; without even the implied warranty of |
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
// GNU General Public License for more details. |
// |
// You should have received a copy of the GNU General Public License |
// along with this program. If not, see <http://www.gnu.org/licenses/>. |
// |
// Floating Point Multiplier / Divider |
// |
// This multiplier/divider handles denormalized numbers. |
// Floating Point Multiplier |
// |
// This multiplier handles denormalized numbers. |
// The output format is of an internal expanded representation |
// in preparation to be fed into a normalization unit, then |
// rounding. Basically, it's the same as the regular format |
42,10 → 56,6
// +-inf * +-inf = -+inf (this is handled by exOver) |
// +-inf * 0 = QNaN |
// |
// 1 sign number |
// 8 exponent |
// 48 mantissa |
// |
// ============================================================================ |
|
import fp::*; |
59,6 → 69,13
output inf; |
output overflow; |
output underflow; |
parameter DELAY = |
(FPWID == 128 ? 17 : |
FPWID == 80 ? 17 : |
FPWID == 64 ? 13 : |
FPWID == 40 ? 8 : |
FPWID == 32 ? 2 : |
FPWID == 16 ? 2 : 2); |
|
reg [EMSB:0] xo1; // extra bit for sign |
reg [FX:0] mo1; |
87,13 → 104,17
wire aInf, bInf, aInf1, bInf1; |
|
|
// ----------------------------------------------------------- |
// First clock |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Clock #1 |
// - decode the input operands |
// - derive basic information |
// - calculate exponent |
// - calculate fraction |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
// ----------------------------------------------------------- |
// First clock |
// ----------------------------------------------------------- |
|
fpDecomp u1a (.i(a), .sgn(sa), .exp(xa), .fract(fracta), .xz(a_dn), .vz(az), .inf(aInf), .nan(aNan) ); |
fpDecomp u1b (.i(b), .sgn(sb), .exp(xb), .fract(fractb), .xz(b_dn), .vz(bz), .inf(bInf), .nan(bNan) ); |
102,82 → 123,51
// correct the exponent for denormalized operands |
// adjust the sum by the exponent offset (subtract 127) |
// mul: ex1 = xa + xb, result should always be < 1ffh |
`ifdef SUPPORT_DENORMALS |
assign ex1 = (az|bz) ? 0 : (xa|a_dn) + (xb|b_dn) - bias; |
`else |
assign ex1 = (az|bz) ? 0 : xa + xb - bias; |
`endif |
|
generate |
if (FPWID==80) begin |
reg [31:0] p00,p01,p02,p03; |
reg [31:0] p10,p11,p12,p13; |
reg [31:0] p20,p21,p22,p23; |
reg [31:0] p30,p31,p32,p33; |
always @(posedge clk) |
if (ce) begin |
p00 <= fracta[15: 0] * fractb[15: 0]; |
p01 <= fracta[31:16] * fractb[15: 0]; |
p02 <= fracta[47:32] * fractb[15: 0]; |
p03 <= fracta[63:48] * fractb[15: 0]; |
|
p10 <= fracta[15: 0] * fractb[31:16]; |
p11 <= fracta[31:16] * fractb[31:16]; |
p12 <= fracta[47:32] * fractb[31:16]; |
p13 <= fracta[63:48] * fractb[31:16]; |
|
p20 <= fracta[15: 0] * fractb[47:32]; |
p21 <= fracta[31:16] * fractb[47:32]; |
p22 <= fracta[47:32] * fractb[47:32]; |
p23 <= fracta[63:48] * fractb[47:32]; |
|
p30 <= fracta[15: 0] * fractb[63:48]; |
p31 <= fracta[31:16] * fractb[63:48]; |
p32 <= fracta[47:32] * fractb[63:48]; |
p33 <= fracta[63:48] * fractb[63:48]; |
|
fract1 <= {p03,48'b0} + {p02,32'b0} + {p01,16'b0} + p00 + |
{p13,64'b0} + {p12,48'b0} + {p11,32'b0} + {p10,16'b0} + |
{p23,80'b0} + {p22,64'b0} + {p21,48'b0} + {p20,32'b0} + |
{p33,96'b0} + {p32,80'b0} + {p31,64'b0} + {p30,48'b0} |
; |
end |
if (FPWID==128) begin |
wire [255:0] fractoo; |
mult128x128 umul1 (.clk(clk), .ce(ce), .a({16'b0,fracta}), .b({16'b0,fractb}), .o(fractoo)); |
always @(posedge clk) |
if (ce) fract1 <= fractoo[224:0]; |
end |
else if (FPWID==80) begin |
wire [255:0] fractoo; |
mult128x128 umul1 (.clk(clk), .ce(ce), .a({63'd0,fracta}), .b({63'd0,fractb}), .o(fractoo)); |
always @(posedge clk) |
if (ce) fract1 <= fractoo[130:0]; |
end |
else if (FPWID==64) begin |
reg [35:0] p00,p01,p02; |
reg [35:0] p10,p11,p12; |
reg [35:0] p20,p21,p22; |
always @(posedge clk) |
if (ce) begin |
p00 <= fracta[17: 0] * fractb[17: 0]; |
p01 <= fracta[35:18] * fractb[17: 0]; |
p02 <= fracta[52:36] * fractb[17: 0]; |
p10 <= fracta[17: 0] * fractb[35:18]; |
p11 <= fracta[35:18] * fractb[35:18]; |
p12 <= fracta[52:36] * fractb[35:18]; |
p20 <= fracta[17: 0] * fractb[52:36]; |
p21 <= fracta[35:18] * fractb[52:36]; |
p22 <= fracta[52:36] * fractb[52:36]; |
fract1 <= {p02,36'b0} + {p01,18'b0} + p00 + |
{p12,54'b0} + {p11,36'b0} + {p10,18'b0} + |
{p22,72'b0} + {p21,54'b0} + {p20,36'b0} |
; |
end |
wire [127:0] fractoo; |
mult64x64 umul1 (.clk(clk), .ce(ce), .a({11'd0,fracta}), .b({11'd0,fractb}), .o(fractoo)); |
always @(posedge clk) |
if (ce) fract1 <= fractoo[106:0]; |
end |
else if (FPWID==40) begin |
wire [63:0] fractoo; |
mult32x32 umul1 (.clk(clk), .ce(ce), .a({3'd0,fracta}), .b({3'd0,fractb}), .o(fractoo)); |
always @(posedge clk) |
if (ce) fract1 <= fractoo[58:0]; |
end |
else if (FPWID==32) begin |
reg [23:0] p00,p01,p02; |
reg [23:0] p10,p11,p12; |
reg [23:0] p20,p21,p22; |
always @(posedge clk) |
if (ce) begin |
p00 <= fracta[11: 0] * fractb[11: 0]; |
p01 <= fracta[23:12] * fractb[11: 0]; |
p10 <= fracta[11: 0] * fractb[23:12]; |
p11 <= fracta[23:12] * fractb[23:12]; |
fract1 <= {p11,p00} + {p01,12'b0} + {p10,12'b0}; |
end |
reg [23:0] p00,p11; |
always @(posedge clk) |
if (ce) begin |
p00 <= fracta[23: 0] * fractb[11: 0]; |
p11 <= fracta[23: 0] * fractb[23:12]; |
fract1 <= {p11,12'b0} + p00; |
end |
end |
else begin |
always @(posedge clk) |
if (ce) begin |
fract1a <= fracta * fractb; |
fract1 <= fract1a; |
fract1 <= fract1a; |
end |
end |
endgenerate |
187,20 → 177,20
wire under = ex1[EMSB+2]; // exponent underflow |
wire over = (&ex1[EMSB:0] | ex1[EMSB+1]) & !ex1[EMSB+2]; |
|
delay2 #(EMSB+1) u3 (.clk(clk), .ce(ce), .i(ex1[EMSB:0]), .o(ex2) ); |
delay2 u2a (.clk(clk), .ce(ce), .i(aInf), .o(aInf1) ); |
delay2 u2b (.clk(clk), .ce(ce), .i(bInf), .o(bInf1) ); |
delay2 u6 (.clk(clk), .ce(ce), .i(under), .o(under1) ); |
delay2 u7 (.clk(clk), .ce(ce), .i(over), .o(over1) ); |
delay #(.WID(EMSB+1),.DEP(DELAY)) u3 (.clk(clk), .ce(ce), .i(ex1[EMSB:0]), .o(ex2) ); |
delay #(.WID(1),.DEP(DELAY)) u2a (.clk(clk), .ce(ce), .i(aInf), .o(aInf1) ); |
delay #(.WID(1),.DEP(DELAY)) u2b (.clk(clk), .ce(ce), .i(bInf), .o(bInf1) ); |
delay #(.WID(1),.DEP(DELAY)) u6 (.clk(clk), .ce(ce), .i(under), .o(under1) ); |
delay #(.WID(1),.DEP(DELAY)) u7 (.clk(clk), .ce(ce), .i(over), .o(over1) ); |
|
// determine when a NaN is output |
wire qNaNOut; |
wire [FPWID-1:0] a1,b1; |
delay2 u5 (.clk(clk), .ce(ce), .i((aInf&bz)|(bInf&az)), .o(qNaNOut) ); |
delay2 u14 (.clk(clk), .ce(ce), .i(aNan), .o(aNan1) ); |
delay2 u15 (.clk(clk), .ce(ce), .i(bNan), .o(bNan1) ); |
delay2 #(FPWID) u16 (.clk(clk), .ce(ce), .i(a), .o(a1) ); |
delay2 #(FPWID) u17 (.clk(clk), .ce(ce), .i(b), .o(b1) ); |
delay #(.WID(1),.DEP(DELAY)) u5 (.clk(clk), .ce(ce), .i((aInf&bz)|(bInf&az)), .o(qNaNOut) ); |
delay #(.WID(1),.DEP(DELAY)) u14 (.clk(clk), .ce(ce), .i(aNan), .o(aNan1) ); |
delay #(.WID(1),.DEP(DELAY)) u15 (.clk(clk), .ce(ce), .i(bNan), .o(bNan1) ); |
delay #(.WID(FPWID),.DEP(DELAY)) u16 (.clk(clk), .ce(ce), .i(a), .o(a1) ); |
delay #(.WID(FPWID),.DEP(DELAY)) u17 (.clk(clk), .ce(ce), .i(b), .o(b1) ); |
|
// ----------------------------------------------------------- |
// Second clock |
208,7 → 198,7
// ----------------------------------------------------------- |
|
wire so1; |
delay3 u8 (.clk(clk), .ce(ce), .i(sa ^ sb), .o(so1) );// two clock delay! |
delay #(.WID(1),.DEP(DELAY+1)) u8 (.clk(clk), .ce(ce), .i(sa ^ sb), .o(so1) );// two clock delay! |
|
always @(posedge clk) |
if (ce) |
221,9 → 211,14
default: xo1 = ex2[EMSB:0]; // situation normal |
endcase |
|
// Force mantissa to zero when underflow or zero exponent when not supporting denormals. |
always @(posedge clk) |
if (ce) |
`ifdef SUPPORT_DENORMALS |
casez({aNan1,bNan1,qNaNOut,aInf1,bInf1,over1}) |
`else |
casez({aNan1,bNan1,qNaNOut,aInf1,bInf1,over1|under1}) |
`endif |
6'b1?????: mo1 = {1'b1,a1[FMSB:0],{FMSB+1{1'b0}}}; |
6'b01????: mo1 = {1'b1,b1[FMSB:0],{FMSB+1{1'b0}}}; |
6'b001???: mo1 = {1'b1,qNaN|3'd4,{FMSB+1{1'b0}}}; // multiply inf * zero |
233,7 → 228,7
default: mo1 = fract1; |
endcase |
|
delay3 u10 (.clk(clk), .ce(ce), .i(sa & sb), .o(sign_exe) ); |
delay #(.WID(1),.DEP(DELAY+1)) u10 (.clk(clk), .ce(ce), .i(sa & sb), .o(sign_exe) ); |
delay1 u11 (.clk(clk), .ce(ce), .i(over1), .o(overflow) ); |
delay1 u12 (.clk(clk), .ce(ce), .i(over1), .o(inf) ); |
delay1 u13 (.clk(clk), .ce(ce), .i(under1), .o(underflow) ); |
245,7 → 240,7
|
// Multiplier with normalization and rounding. |
|
module fpMulnr(clk, ce, a, b, o, rm, sign_exe, inf, overflow, underflow); |
module fpMultiplynr(clk, ce, a, b, o, rm, sign_exe, inf, overflow, underflow); |
input clk; |
input ce; |
input [MSB:0] a, b; |
260,9 → 255,9
wire sign_exe1, inf1, overflow1, underflow1; |
wire [MSB+3:0] fpn0; |
|
fpMul #(FPWID) u1 (clk, ce, a, b, o1, sign_exe1, inf1, overflow1, underflow1); |
fpNormalize #(FPWID) u2(.clk(clk), .ce(ce), .under_i(underflow1), .i(o1), .o(fpn0) ); |
fpRound #(FPWID) u3(.clk(clk), .ce(ce), .rm(rm), .i(fpn0), .o(o) ); |
fpMultiply u1 (clk, ce, a, b, o1, sign_exe1, inf1, overflow1, underflow1); |
fpNormalize u2(.clk(clk), .ce(ce), .under_i(underflow1), .i(o1), .o(fpn0) ); |
fpRound u3(.clk(clk), .ce(ce), .rm(rm), .i(fpn0), .o(o) ); |
delay2 #(1) u4(.clk(clk), .ce(ce), .i(sign_exe1), .o(sign_exe)); |
delay2 #(1) u5(.clk(clk), .ce(ce), .i(inf1), .o(inf)); |
delay2 #(1) u6(.clk(clk), .ce(ce), .i(overflow1), .o(overflow)); |
/fpNormalize.sv
50,7 → 50,7
output under_o; |
output inexact_o; |
|
|
integer n; |
// ---------------------------------------------------------------------------- |
// No Clock required |
// ---------------------------------------------------------------------------- |
220,13 → 220,15
|
// ---------------------------------------------------------------------------- |
// Clock edge #7 |
// - fogure exponent |
// - figure exponent |
// - shift mantissa |
// - figure sticky bit |
// ---------------------------------------------------------------------------- |
|
reg [EMSB:0] xo7; |
wire rightOrLeft7; |
reg [FMSB+4:0] mo7l, mo7r; |
reg St6,St7; |
delay1 u71 (.clk(clk), .ce(ce), .i(rightOrLeft6), .o(rightOrLeft7)); |
|
always @(posedge clk) |
241,6 → 243,15
always @(posedge clk) |
if (ce) mo7l <= mo6 << lshiftAmt6; |
|
// The sticky bit is set if the bits shifted out on a right shift are set. |
always @* |
begin |
St6 = 1'b0; |
for (n = 0; n < FMSB+5; n = n + 1) |
if (n <= rshiftAmt6 + 1) St6 = St6|mo6[n]; |
end |
always @(posedge clk) |
if (ce) St7 <= St6; |
|
// ---------------------------------------------------------------------------- |
// Clock edge #8 |
256,7 → 267,7
delay1 u84 (.clk(clk), .ce(ce), .i(rightOrLeft7), .o(under_o)); |
|
always @(posedge clk) |
if (ce) mo <= rightOrLeft7 ? mo7r : mo7l; |
if (ce) mo <= rightOrLeft7 ? mo7r|{St7,1'b0} : mo7l; |
|
assign o = {so,xo,mo[FMSB+4:1]}; |
|
/fpRound.sv
11,23 → 11,42
// - IEEE 754 representation |
// |
// |
// This source file is free software: you can redistribute it and/or modify |
// it under the terms of the GNU Lesser General Public License as published |
// by the Free Software Foundation, either version 3 of the License, or |
// (at your option) any later version. |
// BSD 3-Clause License |
// Redistribution and use in source and binary forms, with or without |
// modification, are permitted provided that the following conditions are met: |
// |
// 1. Redistributions of source code must retain the above copyright notice, this |
// list of conditions and the following disclaimer. |
// |
// 2. Redistributions in binary form must reproduce the above copyright notice, |
// this list of conditions and the following disclaimer in the documentation |
// and/or other materials provided with the distribution. |
// |
// 3. Neither the name of the copyright holder nor the names of its |
// contributors may be used to endorse or promote products derived from |
// this software without specific prior written permission. |
// |
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
// |
// This source file is distributed in the hope that it will be useful, |
// but WITHOUT ANY WARRANTY; without even the implied warranty of |
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
// GNU General Public License for more details. |
// |
// You should have received a copy of the GNU General Public License |
// along with this program. If not, see <http://www.gnu.org/licenses/>. |
// |
// ============================================================================ |
|
import fp::*; |
|
`ifdef MIN_LATENCY |
`define PIPE_ADV * |
`else |
`define PIPE_ADV (posedge clk) |
`endif |
|
module fpRound(clk, ce, rm, i, o); |
input clk; |
input ce; |
46,6 → 65,7
wire so0 = i[MSB+3]; |
assign o = {so,xo,mo}; |
|
wire l = i[3]; |
wire g = i[2]; // guard bit: always the same bit for all operations |
wire r = i[1]; // rounding bit |
wire s = i[0]; // sticky bit |
56,33 → 76,22
// - determine round amount (add 1 or 0) |
//------------------------------------------------------------ |
|
`ifdef MIN_LATENCY |
always @* |
`else |
always @(posedge clk) |
`endif |
always @`PIPE_ADV |
if (ce) xo1 <= i[MSB+2:FMSB+4]; |
`ifdef MIN_LATENCY |
always @* |
`else |
always @(posedge clk) |
`endif |
always @`PIPE_ADV |
if (ce) mo1 <= i[FMSB+3:0]; |
|
wire tie = g & ~(r|s); |
// Compute the round bit |
// Infinities and NaNs are not rounded! |
`ifdef MIN_LATENCY |
always @* |
`else |
always @(posedge clk) |
`endif |
always @`PIPE_ADV |
if (ce) |
casez ({xInf,rm}) |
4'b0000: rnd <= (g & r) | (r & s); // round to nearest even |
4'b0001: rnd <= 1'd0; // round to zero (truncate) |
4'b0010: rnd <= (r | s) & !so0; // round towards +infinity |
4'b0011: rnd <= (r | s) & so0; // round towards -infinity |
4'b0100: rnd <= (r | s); // round to nearest away from zero |
4'b0000: rnd <= (g & (r|s)) | (l & tie); // round to nearest ties to even |
4'b0001: rnd <= 1'd0; // round to zero (truncate) |
4'b0010: rnd <= g & !so0; // round towards +infinity |
4'b0011: rnd <= g & so0; // round towards -infinity |
4'b0100: rnd <= (g & (r|s)) | tie; // round to nearest ties away from zero |
4'b1???: rnd <= 1'd0; // no rounding if exponent indicates infinite or NaN |
default: rnd <= 0; |
endcase |
99,30 → 108,14
reg rnd2; |
reg dn2; |
wire [EMSB:0] xo2; |
wire [MSB:0] rounded1 = {xo1,mo1[FMSB+3:2]} + rnd; |
`ifdef MIN_LATENCY |
always @* |
`else |
always @(posedge clk) |
`endif |
wire [MSB:0] rounded1 = {xo1,mo1[FMSB+3:3],1'b0} + {rnd,1'b0}; // Add onto LSB, GRS=0 |
always @`PIPE_ADV |
if (ce) rounded2 <= rounded1; |
`ifdef MIN_LATENCY |
always @* |
`else |
always @(posedge clk) |
`endif |
always @`PIPE_ADV |
if (ce) carry2 <= mo1[FMSB+3] & !rounded1[FMSB+1]; |
`ifdef MIN_LATENCY |
always @* |
`else |
always @(posedge clk) |
`endif |
always @`PIPE_ADV |
if (ce) rnd2 <= rnd; |
`ifdef MIN_LATENCY |
always @* |
`else |
always @(posedge clk) |
`endif |
always @`PIPE_ADV |
if (ce) dn2 <= !(|xo1); |
assign xo2 = rounded2[MSB:FMSB+2]; |
|
138,11 → 131,8
delay1 #(EMSB+1) u22 (.clk(clk), .ce(ce), .i(xo2), .o(xo)); |
`endif |
|
`ifdef MIN_LATENCY |
always @* |
`else |
always @(posedge clk) |
`endif |
always @`PIPE_ADV |
if (ce) |
casez({rnd2,&xo2,carry2,dn2}) |
4'b0??0: mo <= mo1[FMSB+2:2]; // not rounding, not denormalized, => hide MSB |
4'b0??1: mo <= mo1[FMSB+3:3]; // not rounding, denormalized |
154,26 → 144,3
endcase |
|
endmodule |
|
|
// Round and register the output |
/* |
module fpRoundReg(clk, ce, rm, i, o); |
parameter WID = 128; |
`include "fpSize.sv" |
|
input clk; |
input ce; |
input [2:0] rm; // rounding mode |
input [MSB+3:0] i; // expanded format input |
output reg [WID-1:0] o; // rounded output |
|
wire [WID-1:0] o1; |
fpRound #(WID) u1 (.rm(rm), .i(i), .o(o1) ); |
|
always @(posedge clk) |
if (ce) |
o <= o1; |
|
endmodule |
*/ |
/mult128x128.sv
0,0 → 1,119
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2020 Robert Finch, Waterloo |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
// |
// mult128x128.sv |
// - Karatsuba multiply |
// - 15 cycle latency |
// |
// BSD 3-Clause License |
// Redistribution and use in source and binary forms, with or without |
// modification, are permitted provided that the following conditions are met: |
// |
// 1. Redistributions of source code must retain the above copyright notice, this |
// list of conditions and the following disclaimer. |
// |
// 2. Redistributions in binary form must reproduce the above copyright notice, |
// this list of conditions and the following disclaimer in the documentation |
// and/or other materials provided with the distribution. |
// |
// 3. Neither the name of the copyright holder nor the names of its |
// contributors may be used to endorse or promote products derived from |
// this software without specific prior written permission. |
// |
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
// |
// ============================================================================ |
|
module mult128x128(clk, ce, a, b, o); |
input clk; |
input ce; |
input [127:0] a; |
input [127:0] b; |
output reg [255:0] o; |
|
reg [63:0] a2, b2; |
reg [64:0] a1, b1; |
reg [127:0] z0, z2, z0a, z2a, z0b, z2b, z0c, z2c, z0d, z2d, p3, p4; |
reg [128:0] z1; // extra bit for carry |
reg sgn2, sgn10; |
wire sgn9; |
|
always @(posedge clk) |
if (ce) a1 <= a[63: 0] - a[127:64]; // x0-x1 |
always @(posedge clk) |
if (ce) b1 <= b[127:64] - b[63: 0]; // y1-y0 |
always @(posedge clk) |
if (ce) a2 <= a1[64] ? -a1 : a1; |
always @(posedge clk) |
if (ce) b2 <= b1[64] ? -b1 : b1; |
always @(posedge clk) |
if (ce) sgn2 <= a1[64]^b1[64]; |
|
delay #(.WID(1), .DEP(12)) udl1 (.clk(clk), .ce(ce), .i(sgn2), .o(sgn9)); |
always @(posedge clk) |
if (ce) sgn10 <= sgn9; |
|
// 11 cycle latency |
mult64x64 u1 ( |
.clk(clk), |
.ce(ce), |
.a(a[127:64]), |
.b(b[127:64]), |
.o(z2) // z2 = x1 * y1 |
); |
|
mult64x64 u2 ( |
.clk(clk), |
.ce(ce), |
.a(a[63:0]), |
.b(b[63:0]), |
.o(z0) // z0 = x0 * y0 |
); |
|
mult64x64 u3 ( |
.clk(clk), |
.ce(ce), |
.a(a2[63:0]), |
.b(b2[63:0]), |
.o(p3) // p3 = abs(x0-x1) * abs(y1-y0) |
); |
|
always @(posedge clk) |
if (ce) p4 <= sgn9 ? -p3 : p3; |
|
always @(posedge clk) |
if (ce) z2a <= z2; |
always @(posedge clk) |
if (ce) z0a <= z0; |
always @(posedge clk) |
if (ce) z2b <= z2a; |
always @(posedge clk) |
if (ce) z0b <= z0a; |
always @(posedge clk) |
if (ce) z2c <= z2b; |
always @(posedge clk) |
if (ce) z0c <= z0b; |
always @(posedge clk) |
if (ce) z1 <= {{128{sgn10}},p4} + z2c + z0c; |
|
always @(posedge clk) |
if (ce) z2d <= z2c; |
always @(posedge clk) |
if (ce) z0d <= z0c; |
always @(posedge clk) |
if (ce) o <= {z2d,z0d} + {z1,64'd0}; |
|
endmodule |
/mult16x16.sv
0,0 → 1,49
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2020 Robert Finch, Waterloo |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
// |
// mult16x16.sv |
// - this should typically synthesize to a single DSP multiplier |
// |
// BSD 3-Clause License |
// Redistribution and use in source and binary forms, with or without |
// modification, are permitted provided that the following conditions are met: |
// |
// 1. Redistributions of source code must retain the above copyright notice, this |
// list of conditions and the following disclaimer. |
// |
// 2. Redistributions in binary form must reproduce the above copyright notice, |
// this list of conditions and the following disclaimer in the documentation |
// and/or other materials provided with the distribution. |
// |
// 3. Neither the name of the copyright holder nor the names of its |
// contributors may be used to endorse or promote products derived from |
// this software without specific prior written permission. |
// |
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
// |
// ============================================================================ |
|
module mult16x16(clk, ce, a, b, o); |
input clk; |
input ce; |
input [15:0] a; |
input [15:0] b; |
output reg [31:0] o; |
|
always @(posedge clk) |
if (ce) o <= a * b; |
|
endmodule |
/mult32x32.sv
0,0 → 1,117
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2020 Robert Finch, Waterloo |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
// |
// mult32x32.sv |
// - Karatsuba multiply |
// - six clock cycles |
// |
// BSD 3-Clause License |
// Redistribution and use in source and binary forms, with or without |
// modification, are permitted provided that the following conditions are met: |
// |
// 1. Redistributions of source code must retain the above copyright notice, this |
// list of conditions and the following disclaimer. |
// |
// 2. Redistributions in binary form must reproduce the above copyright notice, |
// this list of conditions and the following disclaimer in the documentation |
// and/or other materials provided with the distribution. |
// |
// 3. Neither the name of the copyright holder nor the names of its |
// contributors may be used to endorse or promote products derived from |
// this software without specific prior written permission. |
// |
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
// |
// ============================================================================ |
|
module mult32x32(clk, ce, a, b, o); |
input clk; |
input ce; |
input [31:0] a; |
input [31:0] b; |
output reg [63:0] o; |
|
reg [15:0] a2, b2; |
reg [16:0] a1, b1; |
reg [31:0] z0, z2, z0a, z2a, z0b, z2b, z0c, z2c, z0d, z2d, p3, p4; |
reg [32:0] z1; // extra bit for carry |
reg sgn2, sgn3, sgn4; |
|
always @(posedge clk) |
if (ce) a1 <= a[15: 0] - a[31:16]; // x0-x1 |
always @(posedge clk) |
if (ce) b1 <= b[31:16] - b[15: 0]; // y1-y0 |
always @(posedge clk) |
if (ce) a2 <= a1[16] ? -a1 : a1; |
always @(posedge clk) |
if (ce) b2 <= b1[16] ? -b1 : b1; |
always @(posedge clk) |
if (ce) sgn2 <= a1[16]^b1[16]; |
always @(posedge clk) |
if (ce) sgn3 <= sgn2; |
always @(posedge clk) |
if (ce) sgn4 <= sgn3; |
|
mult16x16 u1 ( |
.clk(clk), |
.ce(ce), |
.a(a[31:16]), |
.b(b[31:16]), |
.o(z2) // z2 = x1 * y1 |
); |
|
mult16x16 u2 ( |
.clk(clk), |
.ce(ce), |
.a(a[15:0]), |
.b(b[15:0]), |
.o(z0) // z0 = x0 * y0 |
); |
|
mult16x16 u3 ( |
.clk(clk), |
.ce(ce), |
.a(a2[15:0]), |
.b(b2[15:0]), |
.o(p3) // p3 = abs(x0-x1) * abs(y1-y0) |
); |
|
always @(posedge clk) |
if (ce) p4 <= sgn3 ? -p3 : p3; |
|
always @(posedge clk) |
if (ce) z2a <= z2; |
always @(posedge clk) |
if (ce) z0a <= z0; |
always @(posedge clk) |
if (ce) z2b <= z2a; |
always @(posedge clk) |
if (ce) z0b <= z0a; |
always @(posedge clk) |
if (ce) z2c <= z2b; |
always @(posedge clk) |
if (ce) z0c <= z0b; |
always @(posedge clk) |
if (ce) z1 <= {{32{sgn4}},p4} + z2c + z0c; |
|
always @(posedge clk) |
if (ce) z2d <= z2c; |
always @(posedge clk) |
if (ce) z0d <= z0c; |
always @(posedge clk) |
if (ce) o <= {z2d,z0d} + {z1,16'd0}; |
|
endmodule |
/mult64x64.sv
0,0 → 1,119
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2020 Robert Finch, Waterloo |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
// |
// mult64x64.sv |
// - Karatsuba multiply |
// - 11 cycle latency |
// |
// BSD 3-Clause License |
// Redistribution and use in source and binary forms, with or without |
// modification, are permitted provided that the following conditions are met: |
// |
// 1. Redistributions of source code must retain the above copyright notice, this |
// list of conditions and the following disclaimer. |
// |
// 2. Redistributions in binary form must reproduce the above copyright notice, |
// this list of conditions and the following disclaimer in the documentation |
// and/or other materials provided with the distribution. |
// |
// 3. Neither the name of the copyright holder nor the names of its |
// contributors may be used to endorse or promote products derived from |
// this software without specific prior written permission. |
// |
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
// |
// ============================================================================ |
|
module mult64x64(clk, ce, a, b, o); |
input clk; |
input ce; |
input [63:0] a; |
input [63:0] b; |
output reg [127:0] o; |
|
reg [31:0] a2, b2; |
reg [32:0] a1, b1; |
reg [63:0] z0, z2, z0a, z2a, z0b, z2b, z0c, z2c, z0d, z2d, p3, p4; |
reg [64:0] z1; // extra bit for carry |
reg sgn2, sgn10; |
wire sgn9; |
|
always @(posedge clk) |
if (ce) a1 <= a[31: 0] - a[63:32]; // x0-x1 |
always @(posedge clk) |
if (ce) b1 <= b[63:32] - b[31: 0]; // y1-y0 |
always @(posedge clk) |
if (ce) a2 <= a1[32] ? -a1 : a1; |
always @(posedge clk) |
if (ce) b2 <= b1[32] ? -b1 : b1; |
always @(posedge clk) |
if (ce) sgn2 <= a1[32]^b1[32]; |
|
delay #(.WID(1), .DEP(7)) udl1 (.clk(clk), .ce(ce), .i(sgn2), .o(sgn9)); |
always @(posedge clk) |
if (ce) sgn10 <= sgn9; |
|
// 6 cycle latency |
mult32x32 u1 ( |
.clk(clk), |
.ce(ce), |
.a(a[63:32]), |
.b(b[63:32]), |
.o(z2) // z2 = x1 * y1 |
); |
|
mult32x32 u2 ( |
.clk(clk), |
.ce(ce), |
.a(a[31:0]), |
.b(b[31:0]), |
.o(z0) // z0 = x0 * y0 |
); |
|
mult32x32 u3 ( |
.clk(clk), |
.ce(ce), |
.a(a2[31:0]), |
.b(b2[31:0]), |
.o(p3) // p3 = abs(x0-x1) * abs(y1-y0) |
); |
|
always @(posedge clk) |
if (ce) p4 <= sgn9 ? -p3 : p3; |
|
always @(posedge clk) |
if (ce) z2a <= z2; |
always @(posedge clk) |
if (ce) z0a <= z0; |
always @(posedge clk) |
if (ce) z2b <= z2a; |
always @(posedge clk) |
if (ce) z0b <= z0a; |
always @(posedge clk) |
if (ce) z2c <= z2b; |
always @(posedge clk) |
if (ce) z0c <= z0b; |
always @(posedge clk) |
if (ce) z1 <= {{64{sgn10}},p4} + z2c + z0c; |
|
always @(posedge clk) |
if (ce) z2d <= z2c; |
always @(posedge clk) |
if (ce) z0d <= z0c; |
always @(posedge clk) |
if (ce) o <= {z2d,z0d} + {z1,32'd0}; |
|
endmodule |