URL
https://opencores.org/ocsvn/ft816float/ft816float/trunk
Subversion Repositories ft816float
Compare Revisions
- This comparison shows the changes necessary to convert path
/ft816float/trunk/rtl/verilog2
- from Rev 73 to Rev 74
- ↔ Reverse comparison
Rev 73 → Rev 74
/fpFMA32combo.sv
0,0 → 1,644
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2019-2022 Robert Finch, Waterloo |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
// |
// fpFMA32combo.sv |
// - floating point fused multiplier + adder |
// - combinational logic only |
// - IEEE 754 representation |
// |
// |
// BSD 3-Clause License |
// Redistribution and use in source and binary forms, with or without |
// modification, are permitted provided that the following conditions are met: |
// |
// 1. Redistributions of source code must retain the above copyright notice, this |
// list of conditions and the following disclaimer. |
// |
// 2. Redistributions in binary form must reproduce the above copyright notice, |
// this list of conditions and the following disclaimer in the documentation |
// and/or other materials provided with the distribution. |
// |
// 3. Neither the name of the copyright holder nor the names of its |
// contributors may be used to endorse or promote products derived from |
// this software without specific prior written permission. |
// |
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
// |
// ============================================================================ |
|
import fp32Pkg::*; |
|
module fpFMA32combo (op, rm, a, b, c, o, under, over, inf, zero); |
input op; // operation 0 = add, 1 = subtract |
input [2:0] rm; |
input FP32 a, b, c; |
output FP32X o; |
output under; |
output over; |
output inf; |
output zero; |
|
// constants |
wire [fp32Pkg::EMSB:0] infXp = {fp32Pkg::EMSB+1{1'b1}}; // infinite / NaN - all ones |
// The following is the value for an exponent of zero, with the offset |
// eg. 8'h7f for eight bit exponent, 11'h7ff for eleven bit exponent, etc. |
wire [fp32Pkg::EMSB:0] bias = {1'b0,{fp32Pkg::EMSB{1'b1}}}; //2^0 exponent |
// The following is a template for a quiet nan. (MSB=1) |
wire [fp32Pkg::FMSB:0] qNaN = {1'b1,{fp32Pkg::FMSB{1'b0}}}; |
|
// ----------------------------------------------------------- |
// Clock #1 |
// - decode the input operands |
// - derive basic information |
// ----------------------------------------------------------- |
|
wire sa1, sb1, sc1; // sign bit |
wire [fp32Pkg::EMSB:0] xa1, xb1, xc1; // exponent bits |
wire [fp32Pkg::FMSB+1:0] fracta1, fractb1, fractc1; // includes unhidden bit |
wire a_dn1, b_dn1, c_dn1; // a/b is denormalized |
wire aNan1, bNan1, cNan1; |
wire az1, bz1, cz1; |
wire aInf1, bInf1, cInf1; |
reg op1; |
|
fpDecomp32 u1a (.i(a), .sgn(sa1), .exp(xa1), .fract(fracta1), .xz(a_dn1), .vz(az1), .inf(aInf1), .nan(aNan1) ); |
fpDecomp32 u1b (.i(b), .sgn(sb1), .exp(xb1), .fract(fractb1), .xz(b_dn1), .vz(bz1), .inf(bInf1), .nan(bNan1) ); |
fpDecomp32 u1c (.i(c), .sgn(sc1), .exp(xc1), .fract(fractc1), .xz(c_dn1), .vz(cz1), .inf(cInf1), .nan(cNan1) ); |
|
always_comb |
op1 <= op; |
|
// ----------------------------------------------------------- |
// Clock #2 |
// Compute the sum of the exponents. |
// correct the exponent for denormalized operands |
// adjust the sum by the exponent offset (subtract 127) |
// mul: ex1 = xa + xb, result should always be < 1ffh |
// Form partial products (clocks 2 to 5) |
// ----------------------------------------------------------- |
|
reg abz2; |
reg [fp32Pkg::EMSB+2:0] ex2; |
reg [fp32Pkg::EMSB:0] xc2; |
reg realOp2; |
reg xcInf2; |
|
always_comb |
abz2 <= az1|bz1; |
always_comb |
ex2 <= (xa1|(a_dn1&~az1)) + (xb1|(b_dn1&~bz1)) - bias; |
always_comb |
xc2 <= (xc1|(c_dn1&~cz1)); |
always_comb |
xcInf2 = &xc1; |
|
// Figure out which operation is really needed an add or |
// subtract ? |
// If the signs are the same, use the orignal op, |
// otherwise flip the operation |
// a + b = add,+ |
// a + -b = sub, so of larger |
// -a + b = sub, so of larger |
// -a + -b = add,- |
// a - b = sub, so of larger |
// a - -b = add,+ |
// -a - b = add,- |
// -a - -b = sub, so of larger |
always_comb |
realOp2 <= (sa1 ^ sb1) ^ sc1 ? ~op1 : op1; |
|
reg [fp32Pkg::FX:0] fract5; |
wire [63:0] fractoo; |
mult32x32combo umul1 (.a({9'd0,fracta1}), .b({9'd0,fractb1}), .o(fractoo)); |
always_comb |
fract5 <= fractoo[fp32Pkg::FX:0]; |
|
// ----------------------------------------------------------- |
// Clock #3 |
// Select zero exponent |
// ----------------------------------------------------------- |
|
reg [fp32Pkg::EMSB+2:0] ex3; |
reg [fp32Pkg::EMSB:0] xc3; |
always_comb |
ex3 <= abz2 ? 1'd0 : ex2; |
always_comb |
xc3 <= xc2; |
|
// ----------------------------------------------------------- |
// Clock #4 |
// Generate partial products. |
// ----------------------------------------------------------- |
|
reg [fp32Pkg::EMSB+2:0] ex4; |
reg [fp32Pkg::EMSB:0] xc4; |
|
always_comb |
ex4 <= ex3; |
always_comb |
xc4 <= xc3; |
|
// ----------------------------------------------------------- |
// Clock #5 |
// Sum partial products (above) |
// compute multiplier overflow and underflow |
// ----------------------------------------------------------- |
|
// Status |
reg under5; |
reg over5; |
reg [fp32Pkg::EMSB+2:0] ex5; |
reg [fp32Pkg::EMSB:0] xc5; |
reg aInf5, bInf5; |
reg aNan5, bNan5; |
reg qNaNOut5; |
|
always_comb |
under5 <= ex4[fp32Pkg::EMSB+2]; |
always_comb |
over5 <= (&ex4[fp32Pkg::EMSB:0] | ex4[fp32Pkg::EMSB+1]) & !ex4[fp32Pkg::EMSB+2]; |
always_comb |
ex5 <= ex4; |
always_comb |
xc5 <= xc4; |
always_comb |
aInf5 <= aInf1; |
always_comb |
bInf5 <= bInf1; |
|
// determine when a NaN is output |
reg [fp32Pkg::MSB:0] a5,b5; |
always_comb |
qNaNOut5 <= (aInf1&bz1)|(bInf1&az1); |
always_comb |
aNan5 <= aNan1; |
always_comb |
bNan5 <= bNan1; |
always_comb |
a5 <= a; |
always_comb |
b5 <= b; |
|
// ----------------------------------------------------------- |
// Clock #6 |
// - figure multiplier mantissa output |
// - figure multiplier exponent output |
// - correct xponent and mantissa for exceptional conditions |
// ----------------------------------------------------------- |
|
reg [fp32Pkg::FX:0] mo6; |
reg [fp32Pkg::EMSB+2:0] ex6; |
reg [fp32Pkg::EMSB:0] xc6; |
reg [fp32Pkg::FMSB+1:0] fractc6; |
reg under6; |
always_comb |
fractc6 <= fractc1; |
always_comb |
under6 <= under5; |
|
always_comb |
xc6 <= xc5; |
|
always_comb |
casez({aNan5,bNan5,qNaNOut5,aInf5,bInf5,over5}) |
6'b1?????: mo6 <= {1'b1,1'b1,a5[fp32Pkg::FMSB-1:0],{fp32Pkg::FMSB+1{1'b0}}}; |
6'b01????: mo6 <= {1'b1,1'b1,b5[fp32Pkg::FMSB-1:0],{fp32Pkg::FMSB+1{1'b0}}}; |
6'b001???: mo6 <= {1'b1,qNaN|3'd4,{fp32Pkg::FMSB+1{1'b0}}}; // multiply inf * zero |
6'b0001??: mo6 <= 0; // mul inf's |
6'b00001?: mo6 <= 0; // mul inf's |
6'b000001: mo6 <= 0; // mul overflow |
default: mo6 <= fract5; |
endcase |
|
always_comb |
casez({qNaNOut5|aNan5|bNan5,aInf5,bInf5,over5,under5}) |
5'b1????: ex6 <= infXp; // qNaN - infinity * zero |
5'b01???: ex6 <= infXp; // 'a' infinite |
5'b001??: ex6 <= infXp; // 'b' infinite |
5'b0001?: ex6 <= infXp; // result overflow |
5'b00001: ex6 <= ex5; //0; // underflow |
default: ex6 <= ex5; // situation normal |
endcase |
|
// ----------------------------------------------------------- |
// Clock #7 |
// - prep for addition, determine greater operand |
// ----------------------------------------------------------- |
reg ex_gt_xc7; |
reg xeq7; |
reg ma_gt_mc7; |
reg meq7; |
reg az7, bz7, cz7; |
reg realOp7; |
|
// which has greater magnitude ? Used for sign calc |
always_comb |
ex_gt_xc7 <= xc6=='d0 ? |ex6 : $signed(ex6) > $signed({2'b0,xc6}); |
always_comb |
xeq7 <= (ex6=={2'b0,xc6}); |
always_comb |
ma_gt_mc7 <= mo6 > {fractc6,{fp32Pkg::FMSB+1{1'b0}}}; |
always_comb |
meq7 <= mo6 == {fractc6,{fp32Pkg::FMSB+1{1'b0}}}; |
always_comb |
az7 <= az1; |
always_comb |
bz7 <= bz1; |
always_comb |
cz7 <= cz1; |
always_comb |
realOp7 <= realOp2; |
|
// ----------------------------------------------------------- |
// Clock #8 |
// - prep for addition, determine greater operand |
// - determine if result will be zero |
// ----------------------------------------------------------- |
|
reg a_gt_b8; |
reg resZero8; |
reg ex_gt_xc8; |
reg [fp32Pkg::EMSB+2:0] ex8; |
reg [fp32Pkg::EMSB:0] xc8; |
reg xcInf8; |
reg [2:0] rm8; |
reg op8; |
reg sa8, sc8; |
|
always_comb |
ex8 <= ex6; |
always_comb |
xc8 <= xc6; |
always_comb |
xcInf8 <= xcInf2; |
always_comb |
rm8 <= rm; |
always_comb |
op8 <= op1; |
always_comb |
sa8 <= sa1 ^ sb1; |
always_comb |
sc8 <= sc1; |
|
always_comb |
ex_gt_xc8 <= ex_gt_xc7; |
always_comb |
a_gt_b8 <= ex_gt_xc7 || (xeq7 && ma_gt_mc7); |
|
// Find out if the result will be zero. |
always_comb |
resZero8 <= (realOp7 & xeq7 & meq7) || // subtract, same magnitude |
((az7 | bz7) & cz7); // a or b zero and c zero |
|
// ----------------------------------------------------------- |
// CLock #9 |
// Compute output exponent and sign |
// |
// The output exponent is the larger of the two exponents, |
// unless a subtract operation is in progress and the two |
// numbers are equal, in which case the exponent should be |
// zero. |
// ----------------------------------------------------------- |
|
reg so9; |
reg [fp32Pkg::EMSB+2:0] ex9; |
reg [fp32Pkg::EMSB+2:0] ex9a; |
reg ex_gt_xc9; |
reg [fp32Pkg::EMSB:0] xc9; |
reg a_gt_c9; |
reg [fp32Pkg::FX:0] mo9; |
reg [fp32Pkg::FMSB+1:0] fractc9; |
reg under9; |
reg xeq9; |
|
always_comb |
ex_gt_xc9 <= ex_gt_xc8; |
always_comb |
a_gt_c9 <= a_gt_b8; |
always_comb |
xc9 <= xc8; |
always_comb |
ex9a <= ex8; |
always_comb |
mo9 <= mo6; |
always_comb |
fractc9 <= fractc6; |
always_comb |
under9 <= under6; |
always_comb |
xeq9 <= xeq7; |
|
always_comb |
ex9 <= resZero8 ? 1'd0 : ex_gt_xc8 ? ex8 : {2'b0,xc8}; |
|
// Compute output sign |
always_comb |
case ({resZero8,sa8,op8,sc8}) // synopsys full_case parallel_case |
4'b0000: so9 <= 0; // + + + = + |
4'b0001: so9 <= !a_gt_b8; // + + - = sign of larger |
4'b0010: so9 <= !a_gt_b8; // + - + = sign of larger |
4'b0011: so9 <= 0; // + - - = + |
4'b0100: so9 <= a_gt_b8; // - + + = sign of larger |
4'b0101: so9 <= 1; // - + - = - |
4'b0110: so9 <= 1; // - - + = - |
4'b0111: so9 <= a_gt_b8; // - - - = sign of larger |
4'b1000: so9 <= 0; // A + B, sign = + |
4'b1001: so9 <= rm8==3; // A + -B, sign = + unless rounding down |
4'b1010: so9 <= rm8==3; // A - B, sign = + unless rounding down |
4'b1011: so9 <= 0; // +A - -B, sign = + |
4'b1100: so9 <= rm8==3; // -A + B, sign = + unless rounding down |
4'b1101: so9 <= 1; // -A + -B, sign = - |
4'b1110: so9 <= 1; // -A - +B, sign = - |
4'b1111: so9 <= rm8==3; // -A - -B, sign = + unless rounding down |
endcase |
|
// ----------------------------------------------------------- |
// Clock #10 |
// Compute the difference in exponents, provides shift amount |
// Note that ex9a will be negative for an underflow condition |
// so it's added rather than subtracted from xc9 as -(-num) |
// is the same as an add. The underflow is tracked rather than |
// using extra bits in the exponent. |
// ----------------------------------------------------------- |
reg [fp32Pkg::EMSB+2:0] xdiff10; |
reg [fp32Pkg::FX:0] mfs; |
reg ops10; |
|
// If the multiplier exponent was negative (underflowed) then |
// the mantissa needs to be shifted right even more (until |
// the exponent is zero. The total shift would be xc9-0- |
// amount underflows which is xc9 + -ex9a. |
|
always_comb |
xdiff10 <= ex_gt_xc9 ? ex9a - xc9 |
: ex9a[fp32Pkg::EMSB+2] ? xc9 + (~ex9a+2'd1) |
: xc9 - ex9a; |
|
// Determine which fraction to denormalize (the one with the |
// smaller exponent is denormalized). If the exponents are equal |
// denormalize the smaller fraction. |
always_comb |
mfs <= |
xeq9 ? (a_gt_c9 ? {4'b0,fractc9,{fp32Pkg::FMSB+1{1'b0}}} : mo9) |
: ex_gt_xc9 ? {4'b0,fractc9,{fp32Pkg::FMSB+1{1'b0}}} : mo9; |
|
always_comb |
ops10 <= xeq9 ? (a_gt_c9 ? 1'b1 : 1'b0) |
: (ex_gt_xc9 ? 1'b1 : 1'b0); |
|
// ----------------------------------------------------------- |
// Clock #11 |
// Limit the size of the shifter to only bits needed. |
// ----------------------------------------------------------- |
reg [7:0] xdif11; |
|
always_comb |
xdif11 <= xdiff10 > fp32Pkg::FX+3 ? fp32Pkg::FX+3 : xdiff10; |
|
// ----------------------------------------------------------- |
// Clock #12 |
// Determine the sticky bit |
// ----------------------------------------------------------- |
|
wire sticky; |
reg sticky12; |
reg [fp32Pkg::FX:0] mfs12; |
reg [7:0] xdif12; |
|
redorN #(.BSIZE(fp32Pkg::FX+1)) uredor1 (.a({1'b0,xdif11+fp32Pkg::FMSB}), .b(mfs), .o(sticky)); |
/* |
generate |
begin |
if (FPWID==128) |
redor128 u121 (.a(xdif11), .b({mfs,2'b0}), .o(sticky) ); |
else if (FPWID==96) |
redor96 u121 (.a(xdif11), .b({mfs,2'b0}), .o(sticky) ); |
else if (FPWID==84) |
redor84 u121 (.a(xdif11), .b({mfs,2'b0}), .o(sticky) ); |
else if (FPWID==80) |
redor80 u121 (.a(xdif11), .b({mfs,2'b0}), .o(sticky) ); |
else if (FPWID==64) |
redor64 u121 (.a(xdif11), .b({mfs,2'b0}), .o(sticky) ); |
else if (FPWID==32) |
redor32 u121 (.a(xdif11), .b({mfs,2'b0}), .o(sticky) ); |
else begin |
always @* begin |
$display("redor operation needed in fpFMA"); |
$finish; |
end |
end |
end |
endgenerate |
*/ |
|
// register inputs to shifter and shift |
always_comb |
sticky12 <= sticky; |
always_comb |
xdif12 <= xdif11; |
always_comb |
mfs12 <= mfs; |
|
// ----------------------------------------------------------- |
// Clock #13 |
// - denormalize operand (shift right) |
// ----------------------------------------------------------- |
reg [fp32Pkg::FX+2:0] mfs13; |
reg [fp32Pkg::FX:0] mo13; |
reg ex_gt_xc13; |
reg [fp32Pkg::FMSB+1:0] fractc13; |
reg ops13; |
|
always_comb |
mo13 <= mo9; |
always_comb |
ex_gt_xc13 <= ex_gt_xc9; |
always_comb |
fractc13 <= fractc9; |
always_comb |
ops13 <= ops10; |
|
always_comb |
mfs13 <= ({mfs12,2'b0} >> xdif12)|sticky12; |
|
// ----------------------------------------------------------- |
// Clock #14 |
// Sort operands |
// ----------------------------------------------------------- |
reg [fp32Pkg::FX+2:0] oa, ob; |
reg a_gt_b14; |
|
always_comb |
a_gt_b14 <= a_gt_b8; |
|
always_comb |
oa <= ops13 ? {mo13,2'b00} : mfs13; |
always_comb |
ob <= ops13 ? mfs13 : {fractc13,{fp32Pkg::FMSB+1{1'b0}},2'b00}; |
|
// ----------------------------------------------------------- |
// Clock #15 |
// - Sort operands |
// ----------------------------------------------------------- |
reg [fp32Pkg::FX+2:0] oaa, obb; |
reg realOp15; |
reg [fp32Pkg::EMSB:0] ex15; |
reg underflow15; |
|
//wire [fp32Pkg::EMSB:0] ex9c = ex9[fp32Pkg::EMSB+1] ? infXp : ex9[fp32Pkg::EMSB:0]; |
wire [fp32Pkg::EMSB:0] ex9c = (&ex9[fp32Pkg::EMSB:0] | ex9[fp32Pkg::EMSB+1]) & !ex9[fp32Pkg::EMSB+2] ? infXp : ex9[fp32Pkg::EMSB:0]; |
reg overflow15; |
always_comb |
realOp15 <= realOp7; |
always_comb |
ex15 <= ex9c; |
always_comb |
overflow15 <= (ex9[fp32Pkg::EMSB+1]| &ex9[fp32Pkg::EMSB:0]) & !ex9[fp32Pkg::EMSB+2]; |
always_comb |
underflow15 = ex9[fp32Pkg::EMSB+2]; |
always_comb |
oaa <= a_gt_b14 ? oa : ob; |
always_comb |
obb <= a_gt_b14 ? ob : oa; |
|
// ----------------------------------------------------------- |
// Clock #16 |
// - perform add/subtract |
// - addition can generate an extra bit, subtract can't go negative |
// ----------------------------------------------------------- |
reg [fp32Pkg::FX+3:0] mab; |
reg [fp32Pkg::FX:0] mo16; |
reg [fp32Pkg::FMSB+1:0] fractc16; |
reg Nan16; |
reg cNan16; |
reg aInf16, cInf16; |
reg op16; |
reg exinf16; |
|
always_comb |
Nan16 <= qNaNOut5|aNan5|bNan5; |
always_comb |
cNan16 <= cNan1; |
always_comb |
aInf16 <= &ex6; |
always_comb |
cInf16 <= cInf1; |
always_comb |
op16 <= op1; |
always_comb |
mo16 <= mo13; |
always_comb |
fractc16 <= fractc9; |
always_comb |
exinf16 <= &ex15; |
|
always_comb |
mab <= realOp15 ? oaa - obb : oaa + obb; |
|
// ----------------------------------------------------------- |
// Clock #17 |
// - adjust for Nans |
// ----------------------------------------------------------- |
reg [fp32Pkg::EMSB:0] ex17; |
reg [fp32Pkg::FX:0] mo17; |
reg so17; |
reg exinf17; |
reg overflow17; |
|
always_comb |
so17 <= so9; |
always_comb |
ex17 <= ex15; |
always_comb |
exinf17 <= exinf16; |
always_comb |
overflow17 <= overflow15; |
|
always_comb |
casez({aInf16&cInf16,Nan16,cNan16,exinf16}) |
4'b1???: mo17 <= {1'b0,op16,{fp32Pkg::FMSB-1{1'b0}},op16,{fp32Pkg::FMSB{1'b0}}}; // inf +/- inf - generate QNaN on subtract, inf on add |
4'b01??: mo17 <= {1'b0,mo16}; |
4'b001?: mo17 <= {1'b1,1'b1,fractc16[fp32Pkg::FMSB-1:0],{fp32Pkg::FMSB+1{1'b0}}}; |
4'b0001: mo17 <= 1'd0; |
default: mo17 <= mab[fp32Pkg::FX+3:2]; // mab has two extra lead bits and two trailing bits |
endcase |
|
assign o.sign = so17; |
assign o.exp = ex17; |
assign o.sig = mo17; |
|
assign zero = {ex17,mo17}==1'd0; |
assign inf = exinf17; |
assign under = underflow15;//ex17==1'd0; |
assign over = overflow17; |
|
endmodule |
|
|
// Multiplier with normalization and rounding. |
|
module fpFMA32nrCombo(op, rm, a, b, c, o, inf, zero, overflow, underflow, inexact); |
input op; |
input [2:0] rm; |
input FP32 a, b, c; |
output FP32 o; |
output zero; |
output inf; |
output reg overflow; |
output reg underflow; |
output reg inexact; |
|
wire FP32X fma_o; |
wire fma_underflow; |
wire fma_overflow; |
wire norm_underflow; |
wire norm_inexact; |
wire sign_exe1, inf1, overflow1, underflow1; |
wire FP32N fpn0; |
|
fpFMA32combo u1 |
( |
.op(op), |
.rm(rm), |
.a(a), |
.b(b), |
.c(c), |
.o(fma_o), |
.under(fma_underflow), |
.over(fma_overflow), |
.zero(), |
.inf() |
); |
fpNormalize32combo u2 |
( |
.i(fma_o), |
.o(fpn0), |
.under_i(fma_underflow), |
.under_o(norm_underflow), |
.inexact_o(norm_inexact) |
); |
fpRound32combo u3(.rm(rm), .i(fpn0), .o(o) ); |
fpDecomp32 u4(.i(o), .xz(), .vz(zero), .inf(inf)); |
always_comb |
underflow <= fma_underflow; |
always_comb |
overflow <= fma_overflow; |
always_comb |
inexact <= norm_inexact; |
//assign overflow = inf; |
|
endmodule |
|
/fpNormalize32combo.sv
0,0 → 1,339
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2006-2022 Robert Finch, Waterloo |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
// |
// fpNormalize32combo.sv |
// - floating point normalization unit |
// - combinational logic only |
// - IEEE 754 representation |
// |
// |
// BSD 3-Clause License |
// Redistribution and use in source and binary forms, with or without |
// modification, are permitted provided that the following conditions are met: |
// |
// 1. Redistributions of source code must retain the above copyright notice, this |
// list of conditions and the following disclaimer. |
// |
// 2. Redistributions in binary form must reproduce the above copyright notice, |
// this list of conditions and the following disclaimer in the documentation |
// and/or other materials provided with the distribution. |
// |
// 3. Neither the name of the copyright holder nor the names of its |
// contributors may be used to endorse or promote products derived from |
// this software without specific prior written permission. |
// |
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
// |
// This unit takes a floating point number in an intermediate |
// format and normalizes it. No normalization occurs |
// for NaN's or infinities. The unit has a two cycle latency. |
// |
// The mantissa is assumed to start with two whole bits on |
// the left. The remaining bits are fractional. |
// |
// The width of the incoming format is reduced via a generation |
// of sticky bit in place of the low order fractional bits. |
// |
// On an underflowed input, the incoming exponent is assumed |
// to be negative. A right shift is needed. |
// ============================================================================ |
|
import fp32Pkg::*; |
|
module fpNormalize32combo(i, o, under_i, under_o, inexact_o); |
input FP32X i; // expanded format input |
output FP32N o; // normalized output + guard, sticky and round bits, + 1 whole digit |
input under_i; |
output reg under_o; |
output reg inexact_o; |
|
integer n; |
// ---------------------------------------------------------------------------- |
// No Clock required |
// ---------------------------------------------------------------------------- |
reg [fp32Pkg::EMSB+1:0] xo0; |
reg so0; |
|
always_comb |
xo0 <= {under_i,i.exp}; |
always_comb |
so0 <= i.sign; // sign doesn't change |
|
// ---------------------------------------------------------------------------- |
// Clock #1 |
// - Capture exponent information |
// ---------------------------------------------------------------------------- |
reg xInf1a, xInf1b, xInf1c; |
FP32X i1; |
|
always_comb |
i1 <= i; |
always_comb |
xInf1a <= &xo0 & !under_i; |
always_comb |
xInf1b <= &xo0[fp32Pkg::EMSB:1] & !under_i; |
always_comb |
xInf1c <= &xo0[fp32Pkg::EMSB:0] & !under_i; |
|
// ---------------------------------------------------------------------------- |
// Clock #2 |
// - determine exponent increment |
// Since the there are *three* whole digits in the incoming format |
// the number of whole digits needs to be reduced. If the MSB is |
// set, then increment the exponent and no shift is needed. |
// ---------------------------------------------------------------------------- |
reg xInf2c, xInf2b; |
reg [fp32Pkg::EMSB:0] xo2; |
reg incExpByOne2, incExpByTwo2; |
reg under2; |
always_comb |
xInf2c <= xInf1c; |
always_comb |
xInf2b <= xInf1b; |
always_comb |
xo2 <= xo0; |
always_comb |
under2 <= under_i; |
always_comb |
incExpByTwo2 <= !xInf1b & i1[fp32Pkg::FX]; |
always_comb |
incExpByOne2 <= !xInf1a & i1[fp32Pkg::FX-1]; |
|
// ---------------------------------------------------------------------------- |
// Clock #3 |
// - increment exponent |
// - detect a zero mantissa |
// ---------------------------------------------------------------------------- |
|
reg incExpByTwo3; |
reg incExpByOne3; |
FP32X i3; |
reg [fp32Pkg::EMSB+1:0] xo3; |
reg zeroMan3; |
always_comb |
incExpByTwo3 <= incExpByTwo2; |
always_comb |
incExpByOne3 <= incExpByOne2; |
always_comb |
i3 <= i; |
wire [fp32Pkg::EMSB+1:0] xv3a = xo2 + {incExpByTwo2,1'b0}; |
wire [fp32Pkg::EMSB+1:0] xv3b = xo2 + incExpByOne2; |
|
always_comb |
xo3 <= xo2 + (incExpByTwo2 ? 2'd2 : incExpByOne2 ? 2'd1 : 2'd0); |
|
always_comb |
zeroMan3 <= ((xv3b[fp32Pkg::EMSB+1]|| &xv3b[fp32Pkg::EMSB:0])||(xv3a[fp32Pkg::EMSB+1]| &xv3a[fp32Pkg::EMSB:0])) |
&& !under2 && !xInf2c; |
|
// ---------------------------------------------------------------------------- |
// Clock #4 |
// - Shift mantissa left |
// - If infinity is reached then set the mantissa to zero |
// shift mantissa left to reduce to a single whole digit |
// - create sticky bit |
// ---------------------------------------------------------------------------- |
|
reg [fp32Pkg::FMSB+5:0] mo4; |
reg inexact4; |
|
always_comb |
casez({zeroMan3,incExpByTwo3,incExpByOne3}) |
3'b1??: mo4 <= 1'd0; |
3'b01?: mo4 <= {i3[fp32Pkg::FX:fp32Pkg::FMSB],|i3[fp32Pkg::FMSB-1:0]}; |
3'b001: mo4 <= {i3[fp32Pkg::FX-1:fp32Pkg::FMSB-1],|i3[fp32Pkg::FMSB-2:0]}; |
default: mo4 <= {i3[fp32Pkg::FX-2:fp32Pkg::FMSB-2],|i3[fp32Pkg::FMSB-3:0]}; |
endcase |
|
always_comb |
casez({zeroMan3,incExpByTwo3,incExpByOne3}) |
3'b1??: inexact4 <= 1'd0; |
3'b01?: inexact4 <= |i3[fp32Pkg::FMSB+1:0]; |
3'b001: inexact4 <= |i3[fp32Pkg::FMSB:0]; |
default: inexact4 <= |i3[fp32Pkg::FMSB-1:0]; |
endcase |
|
// ---------------------------------------------------------------------------- |
// Clock edge #5 |
// - count leading zeros |
// ---------------------------------------------------------------------------- |
reg [7:0] leadingZeros5; |
reg [fp32Pkg::EMSB+1:0] xo5; |
reg xInf5; |
always_comb |
xo5 <= xo3; |
always_comb |
xInf5 <= xInf2c; |
|
/* Lookup table based leading zero count modules give slightly better |
performance but cases must be coded. |
generate |
begin |
if (FPWID <= 32) begin |
cntlz32Reg clz0 (.clk(clk), .ce(ce), .i({mo4,4'b0}), .o(leadingZeros5) ); |
assign leadingZeros5[7:6] = 2'b00; |
end |
else if (FPWID<=32) begin |
assign leadingZeros5[7] = 1'b0; |
cntlz32Reg clz0 (.clk(clk), .ce(ce), .i({mo4,7'h0}), .o(leadingZeros5) ); |
end |
else if (FPWID<=80) begin |
assign leadingZeros5[7] = 1'b0; |
cntlz80Reg clz0 (.clk(clk), .ce(ce), .i({mo4,11'b0}), .o(leadingZeros5) ); |
end |
else if (FPWID<=84) begin |
assign leadingZeros5[7] = 1'b0; |
cntlz96Reg clz0 (.clk(clk), .ce(ce), .i({mo4,23'b0}), .o(leadingZeros5) ); |
end |
else if (FPWID<=96) begin |
assign leadingZeros5[7] = 1'b0; |
cntlz96Reg clz0 (.clk(clk), .ce(ce), .i({mo4,11'b0}), .o(leadingZeros5) ); |
end |
else if (FPWID<=128) |
cntlz128Reg clz0 (.clk(clk), .ce(ce), .i({mo4,11'b0}), .o(leadingZeros5) ); |
end |
endgenerate |
*/ |
|
// Sideways add. |
// Normally there would be only one to two leading zeros. It is tempting then |
// to check for only one or two. But, denormalized numbers might have more |
// leading zeros. If denormals were not supported this could be made smaller |
// and faster. |
`ifdef SUPPORT_DENORMALS |
reg [7:0] lzc; |
reg got_one; |
always_comb |
begin |
got_one = 1'b0; |
lzc = 8'h00; |
for (n = fp32Pkg::FMSB+5; n >= 0; n = n - 1) begin |
if (!got_one) begin |
if (mo4[n]) |
got_one = 1'b1; |
else |
lzc = lzc + 1'b1; |
end |
end |
end |
always_comb |
leadingZeros5 <= lzc; |
`else |
always_comb |
casez(mo4[fp32Pkg::FMSB+5:fp32Pkg::FMSB+4]) |
2'b1?: leadingZeros5 <= 8'd0; |
2'b01: leadingZeros5 <= 8'd1; |
2'b00: leadingZeros5 <= 8'd2; |
endcase |
`endif |
|
|
// ---------------------------------------------------------------------------- |
// Clock edge #6 |
// - Compute how much we want to decrement exponent by |
// - compute amount to shift left and right |
// - at infinity the exponent can't be incremented, so we can't shift right |
// otherwise it was an underflow situation so the exponent was negative |
// shift amount needs to be negated for shift register |
// If the exponent underflowed, then the shift direction must be to the |
// right regardless of mantissa bits; the number is denormalized. |
// Otherwise the shift direction must be to the left. |
// ---------------------------------------------------------------------------- |
reg [7:0] lshiftAmt6; |
reg [7:0] rshiftAmt6; |
reg rightOrLeft6; // 0=left,1=right |
reg xInf6; |
reg [fp32Pkg::EMSB+1:0] xo6; |
reg [fp32Pkg::FMSB+5:0] mo6; |
reg zeroMan6; |
always_comb |
rightOrLeft6 <= under_i; |
always_comb |
xo6 <= xo5; |
always_comb |
mo6 <= mo4; |
always_comb |
xInf6 <= xInf5; |
always_comb |
zeroMan6 <= zeroMan3; |
|
always_comb |
lshiftAmt6 <= leadingZeros5 > xo5 ? xo5 : leadingZeros5; |
|
always_comb |
rshiftAmt6 <= xInf5 ? 1'd0 : $signed(xo5) > 1'd0 ? 1'd0 : ~xo5+2'd1; // xo2 is negative ! |
|
// ---------------------------------------------------------------------------- |
// Clock edge #7 |
// - figure exponent |
// - shift mantissa |
// - figure sticky bit |
// ---------------------------------------------------------------------------- |
|
reg [fp32Pkg::EMSB:0] xo7; |
reg rightOrLeft7; |
reg [fp32Pkg::FMSB+5:0] mo7l, mo7r; |
reg St6,St7; |
always_comb |
rightOrLeft7 <= rightOrLeft6; |
|
always_comb |
xo7 <= zeroMan6 ? xo6 : |
xInf6 ? xo6 : // an infinite exponent is either a NaN or infinity; no need to change |
rightOrLeft6 ? 1'd0 : // on a right shift, the exponent was negative, it's being made to zero |
xo6 - lshiftAmt6; // on a left shift, the exponent can't be decremented below zero |
|
always_comb |
mo7r <= mo6 >> rshiftAmt6; |
always_comb |
mo7l <= mo6 << lshiftAmt6; |
|
// The sticky bit is set if the bits shifted out on a right shift are set. |
always_comb |
begin |
St6 = 1'b0; |
for (n = 0; n < FMSB+5; n = n + 1) |
if (n <= rshiftAmt6 + 1) St6 = St6|mo6[n]; |
end |
always_comb |
St7 <= St6; |
|
// ---------------------------------------------------------------------------- |
// Clock edge #8 |
// - select mantissa |
// ---------------------------------------------------------------------------- |
|
reg so; |
reg [fp32Pkg::EMSB:0] xo; |
reg [fp32Pkg::FMSB+5:0] mo; |
always_comb |
so <= so0; |
always_comb |
xo <= xo7; |
always_comb |
inexact_o <= inexact4; |
always_comb |
under_o <= rightOrLeft7; |
|
always_comb |
mo <= rightOrLeft7 ? mo7r|{St7,2'b0} : mo7l; |
|
assign o.sign = so; |
assign o.exp = xo; |
assign o.sig = mo[FMSB+5:2]; |
|
endmodule |
|
/fpRound32combo.sv
0,0 → 1,135
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2006-2022 Robert Finch, Waterloo |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
// |
// fpRound32combo.sv |
// - floating point rounding unit |
// - IEEE 754 representation |
// - combinational logic only |
// |
// |
// BSD 3-Clause License |
// Redistribution and use in source and binary forms, with or without |
// modification, are permitted provided that the following conditions are met: |
// |
// 1. Redistributions of source code must retain the above copyright notice, this |
// list of conditions and the following disclaimer. |
// |
// 2. Redistributions in binary form must reproduce the above copyright notice, |
// this list of conditions and the following disclaimer in the documentation |
// and/or other materials provided with the distribution. |
// |
// 3. Neither the name of the copyright holder nor the names of its |
// contributors may be used to endorse or promote products derived from |
// this software without specific prior written permission. |
// |
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
// |
// ============================================================================ |
|
import fp32Pkg::*; |
|
module fpRound32combo(rm, i, o); |
input [2:0] rm; // rounding mode |
input FP32N i; // intermediate format input |
output FP32 o; // rounded output |
|
//------------------------------------------------------------ |
// variables |
wire so; |
wire [fp32Pkg::EMSB:0] xo; |
reg [fp32Pkg::FMSB:0] mo; |
reg [fp32Pkg::EMSB:0] xo1; |
reg [fp32Pkg::FMSB+3:0] mo1; |
wire xInf = &i[fp32Pkg::MSB+2:fp32Pkg::FMSB+4]; |
wire so0 = i[fp32Pkg::MSB+3]; |
|
assign o.sign = so; |
assign o.exp = xo; |
assign o.sig = mo; |
|
|
wire l = i[3]; |
wire g = i[2]; // guard bit: always the same bit for all operations |
wire r = i[1]; // rounding bit |
wire s = i[0]; // sticky bit |
reg rnd; |
|
//------------------------------------------------------------ |
// Clock #1 |
// - determine round amount (add 1 or 0) |
//------------------------------------------------------------ |
|
always_comb |
xo1 <= i[fp32Pkg::MSB+2:fp32Pkg::FMSB+4]; |
always_comb |
mo1 <= i[fp32Pkg::FMSB+3:0]; |
|
wire tie = g & ~(r|s); |
// Compute the round bit |
// Infinities and NaNs are not rounded! |
always_comb |
casez ({xInf,rm}) |
4'b0000: rnd <= (g & (r|s)) | (l & tie); // round to nearest ties to even |
4'b0001: rnd <= 1'd0; // round to zero (truncate) |
4'b0010: rnd <= g & !so0; // round towards +infinity |
4'b0011: rnd <= g & so0; // round towards -infinity |
4'b0100: rnd <= (g & (r|s)) | tie; // round to nearest ties away from zero |
4'b1???: rnd <= 1'd0; // no rounding if exponent indicates infinite or NaN |
default: rnd <= 0; |
endcase |
|
//------------------------------------------------------------ |
// Clock #2 |
// round the number, check for carry |
// note: inf. exponent checked above (if the exponent was infinite already, then no rounding occurs as rnd = 0) |
// note: exponent increments if there is a carry (can only increment to infinity) |
//------------------------------------------------------------ |
|
reg [fp32Pkg::MSB:0] rounded2; |
reg carry2; |
reg rnd2; |
reg dn2; |
wire [fp32Pkg::EMSB:0] xo2; |
wire [fp32Pkg::MSB:0] rounded1 = {xo1,mo1[fp32Pkg::FMSB+3:3],1'b0} + {rnd,1'b0}; // Add onto LSB, GRS=0 |
always_comb |
rounded2 <= rounded1; |
always_comb |
carry2 <= mo1[fp32Pkg::FMSB+3] & !rounded1[fp32Pkg::FMSB+1]; |
always_comb |
rnd2 <= rnd; |
always_comb |
dn2 <= !(|xo1); |
assign xo2 = rounded2[fp32Pkg::MSB:fp32Pkg::FMSB+2]; |
|
//------------------------------------------------------------ |
// Clock #3 |
// - shift mantissa if required. |
//------------------------------------------------------------ |
assign so = i[fp32Pkg::MSB+3]; |
assign xo = xo2; |
|
always_comb |
casez({rnd2,&xo2,carry2,dn2}) |
4'b0??0: mo <= mo1[fp32Pkg::FMSB+2:2]; // not rounding, not denormalized, => hide MSB |
4'b0??1: mo <= mo1[fp32Pkg::FMSB+3:3]; // not rounding, denormalized |
4'b1000: mo <= rounded2[fp32Pkg::FMSB :0]; // exponent didn't change, number was normalized, => hide MSB, |
4'b1001: mo <= rounded2[fp32Pkg::FMSB+1:1]; // exponent didn't change, but number was denormalized, => retain MSB |
4'b1010: mo <= rounded2[fp32Pkg::FMSB+1:1]; // exponent incremented (new MSB generated), number was normalized, => hide 'extra (FMSB+2)' MSB |
4'b1011: mo <= rounded2[fp32Pkg::FMSB+1:1]; // exponent incremented (new MSB generated), number was denormalized, number became normalized, => hide 'extra (FMSB+2)' MSB |
4'b11??: mo <= 1'd0; // number became infinite, no need to check carry etc., rnd would be zero if input was NaN or infinite |
endcase |
|
endmodule |
/mult16x16combo.sv
0,0 → 1,47
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2020 Robert Finch, Waterloo |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
// |
// mult16x16combo.sv |
// - this should typically synthesize to a single DSP multiplier |
// |
// BSD 3-Clause License |
// Redistribution and use in source and binary forms, with or without |
// modification, are permitted provided that the following conditions are met: |
// |
// 1. Redistributions of source code must retain the above copyright notice, this |
// list of conditions and the following disclaimer. |
// |
// 2. Redistributions in binary form must reproduce the above copyright notice, |
// this list of conditions and the following disclaimer in the documentation |
// and/or other materials provided with the distribution. |
// |
// 3. Neither the name of the copyright holder nor the names of its |
// contributors may be used to endorse or promote products derived from |
// this software without specific prior written permission. |
// |
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
// |
// ============================================================================ |
|
module mult16x16combo(a, b, o); |
input [15:0] a; |
input [15:0] b; |
output reg [31:0] o ='d0; |
|
always_comb |
o <= a * b; |
|
endmodule |
/mult32x32combo.sv
0,0 → 1,145
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2020-2022 Robert Finch, Waterloo |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
// |
// mult32x32combo.sv |
// - Karatsuba multiply |
// - combinational logic only |
// |
// BSD 3-Clause License |
// Redistribution and use in source and binary forms, with or without |
// modification, are permitted provided that the following conditions are met: |
// |
// 1. Redistributions of source code must retain the above copyright notice, this |
// list of conditions and the following disclaimer. |
// |
// 2. Redistributions in binary form must reproduce the above copyright notice, |
// this list of conditions and the following disclaimer in the documentation |
// and/or other materials provided with the distribution. |
// |
// 3. Neither the name of the copyright holder nor the names of its |
// contributors may be used to endorse or promote products derived from |
// this software without specific prior written permission. |
// |
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
// |
// ============================================================================ |
|
`define KARATSUBA 1 |
|
`ifdef KARATSUBA |
|
module mult32x32combo(a, b, o); |
input [31:0] a; |
input [31:0] b; |
output reg [63:0] o='d0; |
|
reg [15:0] a2='d0, b2='d0; |
reg [16:0] a1='d0, b1='d0; |
reg [31:0] z0, z2, z0a, z2a, z0b, z2b, z0c, z2c, z0d, z2d, p3; |
reg [32:0] p4; |
reg [32:0] z1; // extra bit for carry |
reg sgn2, sgn3, sgn4; |
|
always_comb |
a1 <= a[15: 0] - a[31:16]; // x0-x1 |
always_comb |
b1 <= b[31:16] - b[15: 0]; // y1-y0 |
always_comb |
a2 <= a1[16] ? -a1 : a1; |
always_comb |
b2 <= b1[16] ? -b1 : b1; |
always_comb |
sgn2 <= a1[16]^b1[16]; |
always_comb |
sgn3 <= sgn2; |
always_comb |
sgn4 <= sgn3; |
|
mult16x16combo u1 ( |
.a(a[31:16]), |
.b(b[31:16]), |
.o(z2) // z2 = x1 * y1 |
); |
|
mult16x16combo u2 ( |
.a(a[15:0]), |
.b(b[15:0]), |
.o(z0) // z0 = x0 * y0 |
); |
|
mult16x16combo u3 ( |
.a(a2[15:0]), |
.b(b2[15:0]), |
.o(p3) // p3 = abs(x0-x1) * abs(y1-y0) |
); |
|
always_comb |
p4 <= sgn3 ? -p3 : p3; |
|
always_comb |
z2a <= z2; |
always_comb |
z0a <= z0; |
always_comb |
z2b <= z2a; |
always_comb |
z0b <= z0a; |
always_comb |
z2c <= z2b; |
always_comb |
z0c <= z0b; |
always_comb |
z1 <= {{32{p4[32]}},p4} + z2c + z0c; |
|
always_comb |
z2d <= z2c; |
always_comb |
z0d <= z0c; |
always_comb |
o <= {z2d,z0d} + {z1,16'd0}; |
|
endmodule |
|
`else |
|
// This version of the multiply has a parameterized pipeline depth and allows |
// the tools to perform the multiply. Relies on the ability of tools to retime. |
|
module mult32x32combo(a, b, o); |
parameter DEP = 6; |
input [31:0] a; |
input [31:0] b; |
output reg [63:0] o; |
|
reg [63:0] prod [0:DEP-1]; |
reg [63:0] prd; |
integer n; |
|
always_comb |
prd <= a * b; |
always_comb |
prod[0] <= prd; |
|
always_comb |
for (n = 0; n < DEP - 1; n = n + 1) |
prod[n+1] <= prod[n]; |
|
always_comb |
o <= prod[DEP-1]; |
|
endmodule |
|
`endif |