URL
https://opencores.org/ocsvn/ft816float/ft816float/trunk
Subversion Repositories ft816float
Compare Revisions
- This comparison shows the changes necessary to convert path
/ft816float/trunk/rtl/verilog
- from Rev 5 to Rev 6
- ↔ Reverse comparison
Rev 5 → Rev 6
/fpCompare.v
0,0 → 1,105
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2007-2016 Robert Finch, Stratford |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
// |
// This source file is free software: you can redistribute it and/or modify |
// it under the terms of the GNU Lesser General Public License as published |
// by the Free Software Foundation, either version 3 of the License, or |
// (at your option) any later version. |
// |
// This source file is distributed in the hope that it will be useful, |
// but WITHOUT ANY WARRANTY; without even the implied warranty of |
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
// GNU General Public License for more details. |
// |
// You should have received a copy of the GNU General Public License |
// along with this program. If not, see <http://www.gnu.org/licenses/>. |
// |
// fpCompare.v |
// - floating point comparison unit |
// - parameterized width |
// - IEEE 754 representation |
// |
// Compares two fgloating point numbers and returns a status output. |
// Bit: |
// 0: 1 = equal, 0=not equal |
// 1: 1 = less than, |
// 2: 1 = magnitude less than |
// 3: 1 = unordered (nan in compare) |
// ============================================================================ |
// |
module fpCompare(a, b, o, nanx); |
parameter WID = 32; |
localparam MSB = WID-1; |
localparam EMSB = WID==80 ? 14 : |
WID==64 ? 10 : |
WID==52 ? 10 : |
WID==48 ? 10 : |
WID==44 ? 10 : |
WID==42 ? 10 : |
WID==40 ? 9 : |
WID==32 ? 7 : |
WID==24 ? 6 : 4; |
localparam FMSB = WID==80 ? 63 : |
WID==64 ? 51 : |
WID==52 ? 39 : |
WID==48 ? 35 : |
WID==44 ? 31 : |
WID==42 ? 29 : |
WID==40 ? 28 : |
WID==32 ? 22 : |
WID==24 ? 15 : 9; |
|
input [WID-1:0] a, b; |
output [3:0] o; |
reg [3:0] o; |
output nanx; |
|
// Decompose the operands |
wire sa; |
wire sb; |
wire [EMSB:0] xa; |
wire [EMSB:0] xb; |
wire [FMSB:0] ma; |
wire [FMSB:0] mb; |
wire az, bz; |
wire nan_a, nan_b; |
|
fpDecompose #(WID) u1(.i(a), .sgn(sa), .exp(xa), .man(ma), .fract(), .xz(), .mz(), .vz(az), .inf(), .xinf(), .qnan(), .snan(), .nan(nan_a) ); |
fpDecompose #(WID) u2(.i(b), .sgn(sb), .exp(xb), .man(mb), .fract(), .xz(), .mz(), .vz(bz), .inf(), .xinf(), .qnan(), .snan(), .nan(nan_b) ); |
|
wire unordered = nan_a | nan_b; |
|
wire eq = (az & bz) || (a==b); // special test for zero |
wire gt1 = {xa,ma} > {xb,mb}; |
wire lt1 = {xa,ma} < {xb,mb}; |
|
wire lt = sa ^ sb ? sa & !(az & bz): sa ? gt1 : lt1; |
|
always @(unordered or eq or lt) |
begin |
o[0] = eq; |
o[1] = lt; |
o[2] = lt1; |
o[3] = unordered; |
end |
|
// an unorder comparison will signal a nan exception |
//assign nanx = op!=`FCOR && op!=`FCUN && unordered; |
assign nanx = 1'b0; |
|
endmodule |
|
module fpCompare_tb(); |
|
wire [3:0] o1,o2,o3,o4; |
|
fpCompare #(32) u1 (.a(32'h80000000), .b(32'h00000000), .o(o1), .nanx() ); // -0 to +0 |
fpCompare #(32) u2 (.a(32'h3F800000), .b(32'hBF800000), .o(o2), .nanx() ); // 1 to -1 |
fpCompare #(32) u3 (.a(32'hC1200000), .b(32'h41C80000), .o(o3), .nanx() ); // -10 to 25 |
fpCompare #(32) u4 (.a(32'h42C80000), .b(32'h43520000), .o(o4), .nanx() ); // 100 to 210 |
|
endmodule |
/fpdivr2.v
0,0 → 1,124
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2006-2016 Robert Finch, Stratford |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
// |
// This source file is free software: you can redistribute it and/or modify |
// it under the terms of the GNU Lesser General Public License as published |
// by the Free Software Foundation, either version 3 of the License, or |
// (at your option) any later version. |
// |
// This source file is distributed in the hope that it will be useful, |
// but WITHOUT ANY WARRANTY; without even the implied warranty of |
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
// GNU General Public License for more details. |
// |
// You should have received a copy of the GNU General Public License |
// along with this program. If not, see <http://www.gnu.org/licenses/>. |
// |
// fpdivr2.v |
// Radix 2 floating point divider primitive |
// |
// ============================================================================ |
// |
module fpdivr2 |
#( parameter WID = 24 ) |
( |
input clk, |
input ld, |
input [WID-1:0] a, |
input [WID-1:0] b, |
output reg [WID*2-1:0] q, |
output [WID-1:0] r, |
output done |
); |
localparam DMSB = WID-1; |
|
reg [DMSB:0] rx [2:0]; // remainder holds |
reg [DMSB:0] rxx; |
reg [7:0] cnt; // iteration count |
wire [DMSB:0] sdq; |
wire [DMSB:0] sdr; |
wire sdval = 1'b0; |
wire sdbz; |
reg willGo0; |
|
//specialCaseDivider #(WID) u1 (.a(a), .b(b), .q(sdq), .r(sdr), .val(sdval), .dbz(sdbz) ); |
|
initial begin |
rx[0] = 0; |
end |
|
always @(posedge clk) |
if (ld) |
cnt <= sdval ? 8'b10000000 : WID*2-2; |
else if (!done) |
cnt <= cnt - 1; |
|
|
always @(posedge clk) |
if (ld) begin |
rxx <= 0; |
if (sdval) |
q <= {sdq,{WID{1'b0}}}; |
else |
q <= {a,{WID{1'b0}}}; |
end |
else if (!done) begin |
willGo0 = {rxx ,q[WID*2-1 ]} > b; |
rx[0] = willGo0 ? {rxx ,q[WID*2-1 ]} - b : {rxx ,q[WID*2-1 ]}; |
q[WID*2-1:1] <= q[WID*2-1-1:0]; |
q[0] <= willGo0; |
rxx <= rx[0]; |
end |
|
// correct remainder |
assign r = sdval ? sdr : rx[2][DMSB] ? rx[2] + b : rx[2]; |
assign done = cnt[7]; |
|
endmodule |
|
/* |
module fpdivr2_tb(); |
|
reg rst; |
reg clk; |
reg ld; |
reg [6:0] cnt; |
|
wire ce = 1'b1; |
wire [23:0] a = 24'h0_4000; |
wire [23:0] b = 24'd101; |
wire [45:0] q; |
wire [23:0] r; |
wire done; |
|
initial begin |
clk = 1; |
rst = 0; |
#100 rst = 1; |
#100 rst = 0; |
end |
|
always #20 clk = ~clk; // 25 MHz |
|
always @(posedge clk) |
if (rst) |
cnt <= 0; |
else begin |
ld <= 0; |
cnt <= cnt + 1; |
if (cnt == 3) |
ld <= 1; |
$display("ld=%b q=%h r=%h done=%b", ld, q, r, done); |
end |
|
|
fpdivr2 #(24) divu0(.clk(clk), .ld(ld), .a(a), .b(b), .q(q), .r(r), .done(done) ); |
|
endmodule |
*/ |
|
|
/fpRound.v
0,0 → 1,162
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2006-2016 Robert Finch, Stratford |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
// |
// This source file is free software: you can redistribute it and/or modify |
// it under the terms of the GNU Lesser General Public License as published |
// by the Free Software Foundation, either version 3 of the License, or |
// (at your option) any later version. |
// |
// This source file is distributed in the hope that it will be useful, |
// but WITHOUT ANY WARRANTY; without even the implied warranty of |
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
// GNU General Public License for more details. |
// |
// You should have received a copy of the GNU General Public License |
// along with this program. If not, see <http://www.gnu.org/licenses/>. |
// |
// fpRound.v |
// - floating point rounding unit |
// - parameterized width |
// - IEEE 754 representation |
// |
// This unit takes a normalized floating point number in an |
// expanded format and rounds it according to the IEEE-754 |
// standard. NaN's and infinities are not rounded. |
// This module has a single cycle latency. |
// |
// Mode |
// 0: round to nearest even |
// 1: round to zero (truncate) |
// 2: round towards +infinity |
// 3: round towards -infinity |
// ============================================================================ |
// |
module fpRound(rm, i, o); |
parameter WID = 32; |
localparam MSB = WID-1; |
localparam EMSB = WID==80 ? 14 : |
WID==64 ? 10 : |
WID==52 ? 10 : |
WID==48 ? 10 : |
WID==44 ? 10 : |
WID==42 ? 10 : |
WID==40 ? 9 : |
WID==32 ? 7 : |
WID==24 ? 6 : 4; |
localparam FMSB = WID==80 ? 63 : |
WID==64 ? 51 : |
WID==52 ? 39 : |
WID==48 ? 35 : |
WID==44 ? 31 : |
WID==42 ? 29 : |
WID==40 ? 28 : |
WID==32 ? 22 : |
WID==24 ? 15 : 9; |
|
input [2:0] rm; // rounding mode |
input [WID+3:0] i; // intermediate format input |
output [WID-1:0] o; // rounded output |
|
//------------------------------------------------------------ |
// variables |
wire so; |
wire [EMSB:0] xo; |
reg [FMSB:0] mo; |
wire [EMSB:0] xo1 = i[EMSB+FMSB+5:FMSB+5]; |
wire [FMSB+4:0] mo1 = i[FMSB+4:0]; |
wire xInf = &xo1; |
wire dn = !(|xo1); // denormalized input |
assign o = {so,xo,mo}; |
|
wire g = i[2]; // guard bit: always the same bit for all operations |
wire r = i[1]; // rounding bit |
wire s = i[0]; // sticky bit |
reg rnd; |
|
// Compute the round bit |
// Infinities and NaNs are not rounded! |
always @(xInf,rm,g,r,s,so) |
case ({xInf,rm}) |
4'd0: rnd = (g & r) | (r & s); // round to nearest even |
4'd1: rnd = 0; // round to zero (truncate) |
4'd2: rnd = (r | s) & !so; // round towards +infinity |
4'd3: rnd = (r | s) & so; // round towards -infinity |
default: rnd = 0; // no rounding if exponent indicates infinite or NaN |
endcase |
|
// round the number, check for carry |
// note: inf. exponent checked above (if the exponent was infinite already, then no rounding occurs as rnd = 0) |
// note: exponent increments if there is a carry (can only increment to infinity) |
// performance note: use the carry chain to increment the exponent |
wire [MSB+2:0] rounded = {xo1,mo1[FMSB+4:2]} + rnd; |
wire carry = mo1[FMSB+4] & !rounded[FMSB+2]; |
|
assign so = i[WID+3]; |
assign xo = rounded[MSB+2:FMSB+3]; |
|
always @(rnd or xo or carry or dn or rounded or mo1) |
casex({rnd,&xo,carry,dn}) |
4'b0xx0: mo = mo1[FMSB+3:3]; // not rounding, not denormalized, => hide MSB |
4'b0xx1: mo = mo1[FMSB+4:4]; // not rounding, denormalized |
4'b1000: mo = rounded[FMSB+1:1]; // exponent didn't change, number was normalized, => hide MSB |
4'b1001: mo = rounded[FMSB+2:2]; // exponent didn't change, but number was denormalized, => retain MSB |
4'b1010: mo = rounded[FMSB+2:2]; // exponent incremented (new MSB generated), number was normalized, => hide 'extra (FMSB+2)' MSB |
4'b1011: mo = rounded[FMSB+2:2]; // exponent incremented (new MSB generated), number was denormalized, number became normalized, => hide 'extra (FMSB+2)' MSB |
4'b11xx: mo = 0; // number became infinite, no need to check carry etc., rnd would be zero if input was NaN or infinite |
endcase |
|
endmodule |
|
|
// Round and register the output |
|
module fpRoundReg(clk, ce, rm, i, o); |
parameter WID = 32; |
localparam MSB = WID-1; |
localparam EMSB = WID==80 ? 14 : |
WID==64 ? 10 : |
WID==52 ? 10 : |
WID==48 ? 10 : |
WID==44 ? 10 : |
WID==42 ? 10 : |
WID==40 ? 9 : |
WID==32 ? 7 : |
WID==24 ? 6 : 4; |
localparam FMSB = WID==80 ? 63 : |
WID==64 ? 51 : |
WID==52 ? 39 : |
WID==48 ? 35 : |
WID==44 ? 31 : |
WID==42 ? 29 : |
WID==40 ? 28 : |
WID==32 ? 22 : |
WID==24 ? 15 : 9; |
|
input clk; |
input ce; |
input [1:0] rm; // rounding mode |
input [WID+2:0] i; // expanded format input |
output reg [WID-1:0] o; // rounded output |
|
wire [WID-1:0] o1; |
fpRound #(WID) u1 (.rm(rm), .i(i), .o(o1) ); |
|
always @(posedge clk) |
if (ce) |
o <= o1; |
|
endmodule |
|
module fpRound_tb(); |
|
wire [31:0] o1,o2,o3,o4,o5,o6; |
|
fpRound u1 (3'd1, 36'h0, o1); // zero for zero |
fpRound u2 (3'd1, 36'h444444444, o2); // |
fpRound u3 (3'd1, 36'h444444444, o3); // |
|
endmodule |
/fpMul.v
0,0 → 1,239
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2006-2016 Robert Finch, Stratford |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
// |
// This source file is free software: you can redistribute it and/or modify |
// it under the terms of the GNU Lesser General Public License as published |
// by the Free Software Foundation, either version 3 of the License, or |
// (at your option) any later version. |
// |
// This source file is distributed in the hope that it will be useful, |
// but WITHOUT ANY WARRANTY; without even the implied warranty of |
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
// GNU General Public License for more details. |
// |
// You should have received a copy of the GNU General Public License |
// along with this program. If not, see <http://www.gnu.org/licenses/>. |
// |
// fpMul.v |
// - floating point multiplier |
// - two cycle latency |
// - can issue every clock cycle |
// - parameterized width |
// - IEEE 754 representation |
// |
// Floating Point Multiplier |
// |
// Properties: |
// +-inf * +-inf = -+inf (this is handled by exOver) |
// +-inf * 0 = QNaN |
// |
// ============================================================================ |
// |
module fpMul (clk, ce, a, b, o, sign_exe, inf, overflow, underflow); |
parameter WID = 32; |
localparam MSB = WID-1; |
localparam EMSB = |
WID==80 ? 14 : |
WID==64 ? 10 : |
WID==52 ? 10 : |
WID==48 ? 10 : |
WID==44 ? 10 : |
WID==42 ? 10 : |
WID==40 ? 9 : |
WID==32 ? 7 : |
WID==24 ? 6 : 4; |
localparam FMSB = |
WID==80 ? 63 : |
WID==64 ? 51 : |
WID==52 ? 39 : |
WID==48 ? 35 : |
WID==44 ? 31 : |
WID==42 ? 29 : |
WID==40 ? 28 : |
WID==32 ? 22 : |
WID==24 ? 15 : 9; |
|
localparam WX = 3; |
localparam FX = (FMSB+1)*2-1; // the MSB of the expanded fraction |
localparam EX = FX + WX + EMSB + 1; |
|
input clk; |
input ce; |
input [WID:1] a, b; |
output [EX+1:0] o; |
output sign_exe; |
output inf; |
output overflow; |
output underflow; |
|
reg [EMSB:0] xo1; // extra bit for sign |
reg [FX+WX:0] mo1; |
|
// constants |
wire [EMSB:0] infXp = {EMSB+1{1'b1}}; // infinite / NaN - all ones |
// The following is the value for an exponent of zero, with the offset |
// eg. 8'h7f for eight bit exponent, 11'h7ff for eleven bit exponent, etc. |
wire [EMSB:0] bias = {1'b0,{EMSB{1'b1}}}; //2^0 exponent |
// The following is a template for a quiet nan. (MSB=1) |
wire [FMSB:0] qNaN = {1'b1,{FMSB{1'b0}}}; |
|
// variables |
reg [FX+WX:0] fract1,fract1a; |
wire [FX+WX:0] fracto; |
wire [EMSB+2:0] ex1; // sum of exponents |
wire [EMSB :0] ex2; |
|
// Decompose the operands |
wire sa, sb; // sign bit |
wire [EMSB:0] xa, xb; // exponent bits |
wire [FMSB+1:0] fracta, fractb; |
wire a_dn, b_dn; // a/b is denormalized |
wire az, bz; |
wire aInf, bInf, aInf1, bInf1; |
|
|
// ----------------------------------------------------------- |
// First clock |
// - decode the input operands |
// - derive basic information |
// - calculate exponent |
// - calculate fraction |
// ----------------------------------------------------------- |
|
fpDecompose #(WID) u1a (.i(a), .sgn(sa), .exp(xa), .fract(fracta), .xz(a_dn), .vz(az), .inf(aInf) ); |
fpDecompose #(WID) u1b (.i(b), .sgn(sb), .exp(xb), .fract(fractb), .xz(b_dn), .vz(bz), .inf(bInf) ); |
|
// Compute the sum of the exponents. |
// correct the exponent for denormalized operands |
// adjust the sum by the exponent offset (subtract 127) |
// mul: ex1 = xa + xb, result should always be < 1ffh |
assign ex1 = (az|bz) ? 0 : (xa|a_dn) + (xb|b_dn) - bias; |
generate |
if (WID==64) begin |
reg [35:0] p00,p01,p02; |
reg [35:0] p10,p11,p12; |
reg [35:0] p20,p21,p22; |
always @(posedge clk) |
if (ce) begin |
p00 <= fracta[17: 0] * fractb[17: 0]; |
p01 <= fracta[35:18] * fractb[17: 0]; |
p02 <= fracta[52:36] * fractb[17: 0]; |
p10 <= fracta[17: 0] * fractb[35:18]; |
p11 <= fracta[35:18] * fractb[35:18]; |
p12 <= fracta[52:36] * fractb[35:18]; |
p20 <= fracta[17: 0] * fractb[52:36]; |
p21 <= fracta[35:18] * fractb[52:36]; |
p22 <= fracta[52:36] * fractb[52:36]; |
fract1 <= {p02,36'b0} + {p01,18'b0} + p00 + |
{p12,54'b0} + {p11,36'b0} + {p10,18'b0} + |
{p22,72'b0} + {p21,54'b0} + {p20,36'b0} |
; |
end |
end |
else if (WID==32) begin |
reg [35:0] p00,p01; |
reg [35:0] p10,p11; |
always @(posedge clk) |
if (ce) begin |
p00 <= fracta[17: 0] * fractb[17: 0]; |
p01 <= fracta[23:18] * fractb[17: 0]; |
p10 <= fracta[17: 0] * fractb[23:18]; |
p11 <= fracta[23:18] * fractb[23:18]; |
fract1 <= {p11,p00} + {p01,18'b0} + {p10,18'b0}; |
end |
end |
endgenerate |
|
// Status |
wire under1, over1; |
wire under = ex1[EMSB+2]; // exponent underflow |
wire over = (&ex1[EMSB:0] | ex1[EMSB+1]) & !ex1[EMSB+2]; |
|
delay2 #(EMSB+1) u3 (.clk(clk), .ce(ce), .i(ex1[EMSB:0]), .o(ex2) ); |
delay2 #(FX+WX+1) u4 (.clk(clk), .ce(ce), .i(fract1), .o(fracto) ); |
delay2 u2a (.clk(clk), .ce(ce), .i(aInf), .o(aInf1) ); |
delay2 u2b (.clk(clk), .ce(ce), .i(bInf), .o(bInf1) ); |
delay2 u6 (.clk(clk), .ce(ce), .i(under), .o(under1) ); |
delay2 u7 (.clk(clk), .ce(ce), .i(over), .o(over1) ); |
|
// determine when a NaN is output |
wire qNaNOut; |
delay2 u5 (.clk(clk), .ce(ce), .i((aInf&bz)|(bInf&az)), .o(qNaNOut) ); |
|
|
// ----------------------------------------------------------- |
// Second clock |
// - correct xponent and mantissa for exceptional conditions |
// ----------------------------------------------------------- |
|
wire so1; |
delay3 u8 (.clk(clk), .ce(ce), .i(sa ^ sb), .o(so1) );// two clock delay! |
|
always @(posedge clk) |
if (ce) |
casex({qNaNOut,aInf1,bInf1,over1,under1}) |
5'b1xxxx: xo1 = infXp; // qNaN - infinity * zero |
5'b01xxx: xo1 = infXp; // 'a' infinite |
5'b001xx: xo1 = infXp; // 'b' infinite |
5'b0001x: xo1 = infXp; // result overflow |
5'b00001: xo1 = 0; // underflow |
default: xo1 = ex2[EMSB:0]; // situation normal |
endcase |
|
always @(posedge clk) |
if (ce) |
casex({qNaNOut,aInf1,bInf1,over1}) |
4'b1xxx: mo1 = {1'b0,qNaN|3'd4,{FMSB+1{1'b0}}}; // multiply inf * zero |
4'b01xx: mo1 = 0; // mul inf's |
4'b001x: mo1 = 0; // mul inf's |
4'b0001: mo1 = 0; // mul overflow |
default: mo1 = fracto; |
endcase |
|
delay3 u10 (.clk(clk), .ce(ce), .i(sa & sb), .o(sign_exe) ); |
delay1 u11 (.clk(clk), .ce(ce), .i(over1), .o(overflow) ); |
delay1 u12 (.clk(clk), .ce(ce), .i(over1), .o(inf) ); |
delay1 u13 (.clk(clk), .ce(ce), .i(under1), .o(underflow) ); |
|
assign o = {so1,xo1,mo1}; |
|
endmodule |
|
module fpMul_tb(); |
reg clk; |
wire ce = 1'b1; |
wire sgnx1,sgnx2,sgnx3,sgnx4,sgnx5,sgnx6; |
wire inf1,inf2,inf3,inf4,inf5,inf6; |
wire of1,of2,of3,of4,of5,of6; |
wire uf1,uf2,uf3,uf4,uf5,uf6; |
wire [57:0] o1,o2,o3,o4,o5,o6; |
wire [35:0] o11,o12,o13; |
wire [31:0] o21,o22,o23; |
|
initial begin |
clk = 0; |
end |
always #10 clk <= ~clk; |
|
fpMul u1 (.clk(clk), .ce(1'b1), .a(0), .b(0), .o(o1), .sign_exe(sgnx1), .inf(inf1), .overflow(of1), .underflow(uf1)); |
fpMul u2 (.clk(clk), .ce(1'b1), .a(0), .b(0), .o(o2), .sign_exe(sgnx2), .inf(inf2), .overflow(of2), .underflow(uf2)); |
// 10x10 |
fpMul u3 (.clk(clk), .ce(1'b1), .a(32'h41200000), .b(32'h41200000), .o(o3), .sign_exe(sgnx2), .inf(inf2), .overflow(of2), .underflow(uf2)); |
// 21*-17 |
fpMul u4 (.clk(clk), .ce(1'b1), .a(32'h41a80000), .b(32'hc1880000), .o(o4), .sign_exe(sgnx2), .inf(inf2), .overflow(of2), .underflow(uf2)); |
// -17*-15 |
fpMul u5 (.clk(clk), .ce(1'b1), .a(32'hc1880000), .b(32'hc1700000), .o(o5), .sign_exe(sgnx2), .inf(inf2), .overflow(of2), .underflow(uf2)); |
|
fpNormalize u11 (clk, ce, 1'b0, o3, o11); |
fpNormalize u12 (clk, ce, 1'b0, o4, o12); |
fpNormalize u13 (clk, ce, 1'b0, o5, o13); |
|
fpRound u21 (3'd1, o11, o21); // zero for zero |
fpRound u22 (3'd1, o12, o22); // |
fpRound u23 (3'd1, o13, o23); // |
|
endmodule |
/fpDecompose.v
0,0 → 1,81
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2006-2016 Robert Finch, Stratford |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
// |
// This source file is free software: you can redistribute it and/or modify |
// it under the terms of the GNU Lesser General Public License as published |
// by the Free Software Foundation, either version 3 of the License, or |
// (at your option) any later version. |
// |
// This source file is distributed in the hope that it will be useful, |
// but WITHOUT ANY WARRANTY; without even the implied warranty of |
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
// GNU General Public License for more details. |
// |
// You should have received a copy of the GNU General Public License |
// along with this program. If not, see <http://www.gnu.org/licenses/>. |
// |
// fpDecompose.v |
// - decompose floating point value |
// - parameterized width |
// - IEEE 754 representation |
// |
// ============================================================================ |
// |
module fpDecompose(i, sgn, exp, man, fract, xz, mz, vz, inf, xinf, qnan, snan, nan); |
parameter WID=32; |
localparam MSB = WID-1; |
localparam EMSB = WID==80 ? 14 : |
WID==64 ? 10 : |
WID==52 ? 10 : |
WID==48 ? 10 : |
WID==44 ? 10 : |
WID==42 ? 10 : |
WID==40 ? 9 : |
WID==32 ? 7 : |
WID==24 ? 6 : 4; |
localparam FMSB = WID==80 ? 63 : |
WID==64 ? 51 : |
WID==52 ? 39 : |
WID==48 ? 35 : |
WID==44 ? 31 : |
WID==42 ? 29 : |
WID==40 ? 28 : |
WID==32 ? 22 : |
WID==24 ? 15 : 9; |
|
input [MSB:0] i; |
|
output sgn; |
output [EMSB:0] exp; |
output [FMSB:0] man; |
output [FMSB+1:0] fract; // mantissa with hidden bit recovered |
output xz; // denormalized - exponent is zero |
output mz; // mantissa is zero |
output vz; // value is zero (both exponent and mantissa are zero) |
output inf; // all ones exponent, zero mantissa |
output xinf; // all ones exponent |
output qnan; // nan |
output snan; // signalling nan |
output nan; |
|
// Decompose input |
assign sgn = i[MSB]; |
assign exp = i[MSB-1:FMSB+1]; |
assign man = i[FMSB:0]; |
assign xz = !(|exp); // denormalized - exponent is zero |
assign mz = !(|man); // mantissa is zero |
assign vz = xz & mz; // value is zero (both exponent and mantissa are zero) |
assign inf = &exp & mz; // all ones exponent, zero mantissa |
assign xinf = &exp; |
assign qnan = &exp & man[FMSB]; |
assign snan = &exp & !man[FMSB] & !mz; |
assign nan = &exp & !mz; |
assign fract = {!xz,i[FMSB:0]}; |
|
endmodule |
|
|
/fpNormalize.v
0,0 → 1,208
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2006-2016 Robert Finch, Stratford |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
// |
// This source file is free software: you can redistribute it and/or modify |
// it under the terms of the GNU Lesser General Public License as published |
// by the Free Software Foundation, either version 3 of the License, or |
// (at your option) any later version. |
// |
// This source file is distributed in the hope that it will be useful, |
// but WITHOUT ANY WARRANTY; without even the implied warranty of |
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
// GNU General Public License for more details. |
// |
// You should have received a copy of the GNU General Public License |
// along with this program. If not, see <http://www.gnu.org/licenses/>. |
// |
// fpNormalize.v |
// - floating point normalization unit |
// - two cycle latency |
// - parameterized width |
// |
// This unit takes a floating point number in an intermediate |
// format and normalizes it. No normalization occurs |
// for NaN's or infinities. The unit has a two cycle latency. |
// |
// The mantissa is assumed to start with three whole bits on |
// the left. The remaining bits are fractional. The three whole bits |
// result from a MAC (multiply accumulate) operation. The result from |
// a MAC can vary from 0 to 8 which requires three whole digits. |
// |
// The width of the incoming format is reduced via a generation |
// of sticky bit in place of the low order fractional bits. |
// |
// On an underflowed input, the incoming exponent is assumed |
// to be negative. A right shift is needed. |
// ============================================================================ |
// |
module fpNormalize(clk, ce, under, i, o); |
parameter WID = 32; |
localparam MSB = WID-1; |
localparam EMSB = |
WID==80 ? 14 : |
WID==64 ? 10 : |
WID==52 ? 10 : |
WID==48 ? 10 : |
WID==44 ? 10 : |
WID==42 ? 10 : |
WID==40 ? 9 : |
WID==32 ? 7 : |
WID==24 ? 6 : 4; |
localparam FMSB = |
WID==80 ? 63 : |
WID==64 ? 51 : |
WID==52 ? 39 : |
WID==48 ? 35 : |
WID==44 ? 31 : |
WID==42 ? 29 : |
WID==40 ? 28 : |
WID==32 ? 22 : |
WID==24 ? 15 : 9; |
|
localparam WX = 3; // Three whole digits |
localparam FX = (FMSB+1)*2-1; // the MSB of the expanded fraction |
// Fraction + Three whole bits |
localparam EX = FX + WX + EMSB + 1; // The MSB of the exponent |
|
input clk; |
input ce; |
input under; |
input [EX+1:0] i; // expanded format input |
output [WID+3:0] o; // normalized output + guard, sticky and round bits, + 1 whole digit |
|
wire [EMSB:0] infXp = {EMSB+1{1'b1}}; // simple constant - value of exp for inifinity |
|
// variables |
wire so; |
|
wire so1 = i[EX+1]; // sign doesn't change |
|
// Since the there are *three* whole digits in the incoming format |
// the number of whole digits needs to be reduced. If the MSB is |
// set, then increment the exponent by two and no shift is needed. |
// Otherwise if the next MSB is set, increment the exponent by one, |
// and shift left once. |
wire [EMSB:0] xo; |
wire [EMSB:0] xo1a = i[EX:FX+WX+1]; |
|
wire incExp2 = i[FX+WX-1]|i[FX+WX-2]; |
// Allow an extra bit for exponent overflow |
// Add two to exponent to shift the decimal place left twice. |
// (Gives 1 leading whole digit). |
wire [EMSB+1:0] xo1b = xo1a + 2; |
wire [EMSB:0] xo1; |
wire [EMSB:0] xo2; |
wire xInf1a = &xo1a[EMSB:0]; |
|
// If there was a carry from the addition and we were in the underflow |
// state, then the number became normal again. Clear the carry bit. |
// Otherwise if the exponent overflowed and it's not the underflow |
// state, then set the exponent to infinity. Othwerise just keep the |
// remaining exponent bits - the result is still underflowed. |
assign xo1 = (under & xo1b[EMSB+1]) ? xo1b[EMSB:0] : |
(xInf1a & !under) ? infXp : xo1b[EMSB+1] ? infXp : xo1b; |
wire xInf = &xo1 & !under; |
wire under1 = under & !xo1b[EMSB+1]; // keep trakc of renormallzation |
|
// shift mantissa left by one to reduce to a single whole digit |
// if there is no exponent increment |
wire [FMSB+1+3:0] mo; //GRS+1whole digit |
wire [FX+WX:0] mo1 = xInf & incExp2 ? 0 : // set mantissa to zero for infinity |
i[FX+WX:0]; |
wire [FX+WX:0] mo2; |
wire [7:0] leadingZeros2; |
|
// Adjust the operand to the leading zero counter by left aligning it |
// by padding trailing zeros. This is a constant shift that doesn't take |
// any hardware. |
generate |
begin |
if (WID==64) begin |
wire [127:0] mo1a = {mo1,{127-(FX+3){1'b0}}}; |
cntlz128Reg clz0 (.clk(clk), .ce(ce), .i(mo1a), .o(leadingZeros2) ); |
end |
else begin // 32 bits |
wire [63:0] mo1a = {mo1,{63-(FX+3){1'b0}}}; |
cntlz64Reg clz0 (.clk(clk), .ce(ce), .i(mo1a), .o(leadingZeros2) ); |
assign leadingZeros2[7] = 1'b0; |
end |
end |
endgenerate |
|
// compensate for leadingZeros delay |
wire xInf2; |
delay1 #(EMSB+1) d2(.clk(clk), .ce(ce), .i(xo1), .o(xo2) ); |
delay1 #(1) d3(.clk(clk), .ce(ce), .i(xInf), .o(xInf2) ); |
|
// If the exponent underflowed, then the shift direction must be to the |
// right regardless of mantissa bits; the number is denormalized. |
// Otherwise the shift direction must be to the left. |
wire rightOrLeft2; // 0=left,1=right |
delay1 #(1) d8(.clk(clk), .ce(ce), .i(under1), .o(rightOrLeft2) ); |
|
// Compute how much we want to decrement by. We can't decrement by |
// more than the exponent as the number becomes denormal when the |
// exponent reaches zero. |
wire [7:0] lshiftAmt2 = leadingZeros2 > xo2 ? xo2 : leadingZeros2; |
|
// compute amount to shift right |
// at infinity the exponent can't be incremented, so we can't shift right |
// otherwise it was an underflow situation so the exponent was negative |
// shift amount needs to be negated for shift register |
wire [EMSB:0] nxo2 = -xo2; |
wire [7:0] rshiftAmt2 = xInf2 ? 0 : nxo2 > FMSB+WX ? FMSB+WX+1 : nxo2; // xo2 is negative ! |
|
|
// sign |
// the output sign is the same as the input sign |
delay1 #(1) d7(.clk(clk), .ce(ce), .i(so1), .o(so) ); |
|
// exponent |
// always @(posedge clk) |
// if (ce) |
assign xo = |
xInf2 ? xo2 : // an infinite exponent is either a NaN or infinity; no need to change |
rightOrLeft2 ? 0 : // on a right shift, the exponent was negative, it's being made to zero |
xo2 - lshiftAmt2; // on a left shift, the exponent can't be decremented below zero |
|
// mantissa |
delay1 #(FX+WX+1) d4(.clk(clk), .ce(ce), .i(mo1), .o(mo2) ); |
|
wire [FX+WX:0] mo2a; |
// Now do the shifting |
assign mo2a = rightOrLeft2 ? mo2 >> rshiftAmt2 : mo2 << lshiftAmt2; |
|
// always @(posedge clk) |
// if (ce) |
// If infinity is reached then set the mantissa to zero |
wire gbit = mo2a[FMSB+3]; |
wire rbit = mo2a[FMSB+2]; |
wire sbit = |mo2a[FMSB+1:0]; |
assign mo = {mo2a[FX+WX:FMSB+3],gbit,rbit,sbit}; |
|
assign o = {so,xo,mo}; |
|
endmodule |
|
module fpNormalize_tb(); |
reg clk; |
wire [35:0] o1,o2,o3,o4,o5,o6; |
initial begin |
clk = 0; |
end |
|
always #10 clk = ~clk; |
// input = |
// 23*2 + 3 + 8 + 1 = 58 bits |
fpNormalize #(32) u1 (clk, 1'b1, 1'b0, 58'h0, o1); // zeor should result in a zero |
fpNormalize #(32) u2 (clk, 1'b1, 1'b0, 58'h1FE123456781234, o2); // Nan should be a Nan |
fpNormalize #(32) u3 (clk, 1'b1, 1'b1, 58'h000001234567890, o3); // denomral should be denormal |
fpNormalize #(32) u4 (clk, 1'b1, 1'b1, 58'h1F0001234567890, o4); // denomral should be denormal (underflow exp is neg) |
fpNormalize #(32) u5 (clk, 1'b1, 1'b0, 58'h0FF000000000000, o5); // the value 4 |
fpNormalize #(32) u6 (clk, 1'b1, 1'b0, 58'h104900000000000, o6); // the value 100 |
|
endmodule |
/fpAddsub.v
0,0 → 1,243
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2006-2016 Robert Finch, Stratford |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
// |
// This source file is free software: you can redistribute it and/or modify |
// it under the terms of the GNU Lesser General Public License as published |
// by the Free Software Foundation, either version 3 of the License, or |
// (at your option) any later version. |
// |
// This source file is distributed in the hope that it will be useful, |
// but WITHOUT ANY WARRANTY; without even the implied warranty of |
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
// GNU General Public License for more details. |
// |
// You should have received a copy of the GNU General Public License |
// along with this program. If not, see <http://www.gnu.org/licenses/>. |
// |
// fpAddsub.v |
// - floating point adder/subtracter |
// - two cycle latency |
// - can issue every clock cycle |
// - parameterized width |
// - IEEE 754 representation |
// |
// This adder/subtractor handles denormalized numbers. |
// It has a two cycle latency. |
// The output format is of an internal expanded representation |
// in preparation to be fed into a normalization unit, then |
// rounding. Basically, it's the same as the regular format |
// except the mantissa is doubled in size, the leading two |
// bits of which are assumed to be whole bits. |
// ============================================================================ |
// |
module fpAddsub(clk, ce, rm, op, a, b, o); |
parameter WID = 32; |
localparam MSB = WID-1; |
localparam EMSB = WID==80 ? 14 : |
WID==64 ? 10 : |
WID==52 ? 10 : |
WID==48 ? 10 : |
WID==44 ? 10 : |
WID==42 ? 10 : |
WID==40 ? 9 : |
WID==32 ? 7 : |
WID==24 ? 6 : 4; |
localparam FMSB = WID==80 ? 63 : |
WID==64 ? 51 : |
WID==52 ? 39 : |
WID==48 ? 35 : |
WID==44 ? 31 : |
WID==42 ? 29 : |
WID==40 ? 28 : |
WID==32 ? 22 : |
WID==24 ? 15 : 9; |
|
localparam WX = 3; |
localparam FX = (FMSB+1)*2-1; // the MSB of the expanded fraction |
localparam EX = FX + WX + EMSB + 1; |
|
input clk; // system clock |
input ce; // core clock enable |
input [2:0] rm; // rounding mode |
input op; // operation 0 = add, 1 = subtract |
input [WID-1:0] a; // operand a |
input [WID-1:0] b; // operand b |
output [EX+1:0] o; // output |
|
|
// variables |
wire so; // sign output |
wire [EMSB:0] xo; // de normalized exponent output |
reg [EMSB:0] xo1; // de normalized exponent output |
wire [FX+WX:0] mo; // mantissa output |
reg [FX+WX:0] mo1; // mantissa output |
|
// There's an extra bit output in the mantissa to allow for three whole |
// digits which the normalizer uses. |
assign o = {so,xo,mo}; |
|
// operands sign,exponent,mantissa |
wire sa, sb; |
wire [EMSB:0] xa, xb; |
wire [FMSB:0] ma, mb; |
wire [FMSB+1:0] fracta, fractb; |
wire [FMSB+1:0] fracta1, fractb1; |
|
// which has greater magnitude ? Used for sign calc |
wire xa_gt_xb = xa > xb; |
wire xa_gt_xb1; |
wire a_gt_b = xa_gt_xb || (xa==xb && ma > mb); |
wire a_gt_b1; |
wire az, bz; // operand a,b is zero |
|
wire adn, bdn; // a,b denormalized ? |
wire xaInf, xbInf; |
wire aInf, bInf, aInf1, bInf1; |
wire aNan, bNan, aNan1, bNan1; |
|
wire [EMSB:0] xad = xa|adn; // operand a exponent, compensated for denormalized numbers |
wire [EMSB:0] xbd = xb|bdn; // operand b exponent, compensated for denormalized numbers |
|
fpDecompose #(WID) u1a (.i(a), .sgn(sa), .exp(xa), .man(ma), .fract(fracta), .xz(adn), .vz(az), .xinf(xaInf), .inf(aInf), .nan(aNan) ); |
fpDecompose #(WID) u1b (.i(b), .sgn(sb), .exp(xb), .man(mb), .fract(fractb), .xz(bdn), .vz(bz), .xinf(xbInf), .inf(bInf), .nan(bNan) ); |
|
// Figure out which operation is really needed an add or |
// subtract ? |
// If the signs are the same, use the orignal op, |
// otherwise flip the operation |
// a + b = add,+ |
// a + -b = sub, so of larger |
// -a + b = sub, so of larger |
// -a + -b = add,- |
// a - b = sub, so of larger |
// a - -b = add,+ |
// -a - b = add,- |
// -a - -b = sub, so of larger |
wire realOp = op ^ sa ^ sb; |
wire realOp1; |
wire op1; |
|
// Find out if the result will be zero. |
wire resZero = (realOp && xa==xb && ma==mb) || // subtract, same magnitude |
(az & bz); // both a,b zero |
|
// Compute output exponent |
// |
// The output exponent is the larger of the two exponents, |
// unless a subtract operation is in progress and the two |
// numbers are equal, in which case the exponent should be |
// zero. |
|
always @(xaInf,xbInf,resZero,xa,xb,xa_gt_xb) |
xo1 = (xaInf&xbInf) ? xa : resZero ? 0 : xa_gt_xb ? xa : xb; |
|
// Compute output sign |
reg so1; |
always @* |
case ({resZero,sa,op,sb}) // synopsys full_case parallel_case |
4'b0000: so1 <= 0; // + + + = + |
4'b0001: so1 <= !a_gt_b; // + + - = sign of larger |
4'b0010: so1 <= !a_gt_b; // + - + = sign of larger |
4'b0011: so1 <= 0; // + - - = + |
4'b0100: so1 <= a_gt_b; // - + + = sign of larger |
4'b0101: so1 <= 1; // - + - = - |
4'b0110: so1 <= 1; // - - + = - |
4'b0111: so1 <= a_gt_b; // - - - = sign of larger |
4'b1000: so1 <= 0; // A + B, sign = + |
4'b1001: so1 <= rm==3; // A + -B, sign = + unless rounding down |
4'b1010: so1 <= rm==3; // A - B, sign = + unless rounding down |
4'b1011: so1 <= 0; // +A - -B, sign = + |
4'b1100: so1 <= rm==3; // -A + B, sign = + unless rounding down |
4'b1101: so1 <= 1; // -A + -B, sign = - |
4'b1110: so1 <= 1; // -A - +B, sign = - |
4'b1111: so1 <= rm==3; // -A - -B, sign = + unless rounding down |
endcase |
|
delay2 #(EMSB+1) d1(.clk(clk), .ce(ce), .i(xo1), .o(xo) ); |
delay2 #(1) d2(.clk(clk), .ce(ce), .i(so1), .o(so) ); |
|
// Compute the difference in exponents, provides shift amount |
wire [EMSB:0] xdiff = xa_gt_xb ? xad - xbd : xbd - xad; |
wire [6:0] xdif = xdiff > FMSB+3 ? FMSB+3 : xdiff; |
wire [6:0] xdif1; |
|
// determine which fraction to denormalize |
wire [FMSB+1:0] mfs = xa_gt_xb ? fractb : fracta; |
wire [FMSB+1:0] mfs1; |
|
// Determine the sticky bit |
wire sticky, sticky1; |
redor64 u1 (.a(xdif), .b({mfs,2'b0}), .o(sticky) ); |
|
// register inputs to shifter and shift |
delay1 #(1) d16(.clk(clk), .ce(ce), .i(sticky), .o(sticky1) ); |
delay1 #(7) d15(.clk(clk), .ce(ce), .i(xdif), .o(xdif1) ); |
delay1 #(FMSB+2) d14(.clk(clk), .ce(ce), .i(mfs), .o(mfs1) ); |
|
wire [FMSB+3:0] md1 = ({mfs1,2'b0} >> xdif1)|sticky1; |
|
// sync control signals |
delay1 #(1) d4 (.clk(clk), .ce(ce), .i(xa_gt_xb), .o(xa_gt_xb1) ); |
delay1 #(1) d17(.clk(clk), .ce(ce), .i(a_gt_b), .o(a_gt_b1) ); |
delay1 #(1) d5 (.clk(clk), .ce(ce), .i(realOp), .o(realOp1) ); |
delay1 #(FMSB+2) d5a(.clk(clk), .ce(ce), .i(fracta), .o(fracta1) ); |
delay1 #(FMSB+2) d6a(.clk(clk), .ce(ce), .i(fractb), .o(fractb1) ); |
delay1 #(1) d7 (.clk(clk), .ce(ce), .i(aInf), .o(aInf1) ); |
delay1 #(1) d8 (.clk(clk), .ce(ce), .i(bInf), .o(bInf1) ); |
delay1 #(1) d9 (.clk(clk), .ce(ce), .i(aNan), .o(aNan1) ); |
delay1 #(1) d10(.clk(clk), .ce(ce), .i(bNan), .o(bNan1) ); |
delay1 #(1) d11(.clk(clk), .ce(ce), .i(op), .o(op1) ); |
|
// Sort operands and perform add/subtract |
// addition can generate an extra bit, subtract can't go negative |
wire [FMSB+3:0] oa = xa_gt_xb1 ? {fracta1,2'b0} : md1; |
wire [FMSB+3:0] ob = xa_gt_xb1 ? md1 : {fractb1,2'b0}; |
wire [FMSB+3:0] oaa = a_gt_b1 ? oa : ob; |
wire [FMSB+3:0] obb = a_gt_b1 ? ob : oa; |
wire [FMSB+4:0] mab = realOp1 ? oaa - obb : oaa + obb; |
|
always @* |
casex({aInf1&bInf1,aNan1,bNan1}) |
3'b1xx: mo1 = {1'b0,op1,{FMSB-1{1'b0}},op1,{FMSB{1'b0}}}; // inf +/- inf - generate QNaN on subtract, inf on add |
3'bx1x: mo1 = {1'b0,fracta1[FMSB+1:0],{FMSB{1'b0}}}; |
3'bxx1: mo1 = {1'b0,fractb1[FMSB+1:0],{FMSB{1'b0}}}; |
default: mo1 = {mab,{FMSB-1{1'b0}}}; // mab has an extra lead bit |
endcase |
|
delay1 #(FX+WX+1) d3(.clk(clk), .ce(ce), .i(mo1), .o(mo) ); |
|
endmodule |
|
module fpAddsub_tb(); |
reg clk; |
wire ce = 1'b1; |
wire [2:0] rm = 3'b0; |
wire [57:0] o1,o2,o3,o4,o5,o6; |
wire [35:0] o11,o12,o13; |
wire [31:0] o21,o22,o23; |
|
initial begin |
clk = 1'b0; |
end |
always #10 clk = ~clk; |
|
fpAddsub u1 (clk, ce, rm, 1'b0, 32'h0, 32'h0, o1); // zero plus zero |
fpAddsub u2 (clk, ce, rm, 1'b1, 32'h0, 32'h0, o2); // zero minus zero |
fpAddsub u3 (clk, ce, rm, 1'b0, 32'h3F000000, 32'h3F000000, o3); // .5 + .5 |
fpAddsub u4 (clk, ce, rm, 1'b0, 32'h43520000, 32'h41700000, o4); // 210+15 |
fpAddsub u5 (clk, ce, rm, 1'b1, 32'hC3520000, 32'hC1700000, o5); // -210- -15 |
|
fpNormalize u11 (clk, ce, 1'b0, o3, o11); |
fpNormalize u12 (clk, ce, 1'b0, o4, o12); |
fpNormalize u13 (clk, ce, 1'b0, o5, o13); |
|
fpRound u21 (3'd1, o11, o21); // zero for zero |
fpRound u22 (3'd1, o12, o22); // |
fpRound u23 (3'd1, o13, o23); // |
|
endmodule |
|
/fpDiv.v
0,0 → 1,194
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2006-2016 Robert Finch, Stratford |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
// |
// This source file is free software: you can redistribute it and/or modify |
// it under the terms of the GNU Lesser General Public License as published |
// by the Free Software Foundation, either version 3 of the License, or |
// (at your option) any later version. |
// |
// This source file is distributed in the hope that it will be useful, |
// but WITHOUT ANY WARRANTY; without even the implied warranty of |
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
// GNU General Public License for more details. |
// |
// You should have received a copy of the GNU General Public License |
// along with this program. If not, see <http://www.gnu.org/licenses/>. |
// |
// fpDiv.v |
// - floating point divider |
// - parameterized width |
// - IEEE 754 representation |
// |
// Floating Point Divider |
// |
// Properties: |
// +-0 / +-0 = QNaN |
// |
// ============================================================================ |
// |
module fpDiv(clk, ce, ld, a, b, o, done, sign_exe, overflow, underflow); |
|
parameter WID = 32; |
localparam MSB = WID-1; |
localparam EMSB = WID==80 ? 14 : |
WID==64 ? 10 : |
WID==52 ? 10 : |
WID==48 ? 10 : |
WID==44 ? 10 : |
WID==42 ? 10 : |
WID==40 ? 9 : |
WID==32 ? 7 : |
WID==24 ? 6 : 4; |
localparam FMSB = WID==80 ? 63 : |
WID==64 ? 51 : |
WID==52 ? 39 : |
WID==48 ? 35 : |
WID==44 ? 31 : |
WID==42 ? 29 : |
WID==40 ? 28 : |
WID==32 ? 22 : |
WID==24 ? 15 : 9; |
|
localparam WX = 3; |
localparam FX = (FMSB+1)*2-1; // the MSB of the expanded fraction |
localparam EX = FX + WX + EMSB + 1; |
|
input clk; |
input ce; |
input ld; |
input [MSB:0] a, b; |
output [EX+1:0] o; |
output done; |
output sign_exe; |
output overflow; |
output underflow; |
|
// registered outputs |
reg sign_exe; |
reg inf; |
reg overflow; |
reg underflow; |
|
reg so; |
reg [EMSB:0] xo; |
reg [FX+WX:0] mo; |
assign o = {so,xo,mo}; |
|
// constants |
wire [EMSB:0] infXp = {EMSB+1{1'b1}}; // infinite / NaN - all ones |
// The following is the value for an exponent of zero, with the offset |
// eg. 8'h7f for eight bit exponent, 11'h7ff for eleven bit exponent, etc. |
wire [EMSB:0] bias = {1'b0,{EMSB{1'b1}}}; //2^0 exponent |
// The following is a template for a quiet nan. (MSB=1) |
wire [FMSB:0] qNaN = {1'b1,{FMSB{1'b0}}}; |
|
// variables |
wire [EMSB+2:0] ex1; // sum of exponents |
wire [FX+WX:0] divo; |
|
// Operands |
wire sa, sb; // sign bit |
wire [EMSB:0] xa, xb; // exponent bits |
wire [FMSB+1:0] fracta, fractb; |
wire a_dn, b_dn; // a/b is denormalized |
wire az, bz; |
wire aInf, bInf; |
|
|
// ----------------------------------------------------------- |
// - decode the input operands |
// - derive basic information |
// - calculate exponent |
// - calculate fraction |
// ----------------------------------------------------------- |
|
fpDecompose #(WID) u1a (.i(a), .sgn(sa), .exp(xa), .fract(fracta), .xz(a_dn), .vz(az), .inf(aInf) ); |
fpDecompose #(WID) u1b (.i(b), .sgn(sb), .exp(xb), .fract(fractb), .xz(b_dn), .vz(bz), .inf(bInf) ); |
|
// Compute the exponent. |
// - correct the exponent for denormalized operands |
// - adjust the difference by the bias (add 127) |
// - also factor in the different decimal position for division |
assign ex1 = (xa|a_dn) - (xb|b_dn) + bias + FMSB + 1; |
|
// check for exponent underflow/overflow |
wire under = ex1[EMSB+2]; // MSB set = negative exponent |
wire over = (&ex1[EMSB:0] | ex1[EMSB+1]) & !ex1[EMSB+2]; |
|
// Perform divide |
// could take either 1 or 16 clock cycles |
fpdivr2 #(FMSB+2) u2 (.clk(clk), .ld(ld), .a(fracta), .b(fractb), .q(divo[(FMSB+1)*2-1:0]), .r(), .done(done)); |
assign divo[FX+WX:(FMSB+1)*2] = 0; |
|
// determine when a NaN is output |
wire qNaNOut = (az&bz)|(aInf&bInf); |
|
always @(posedge clk) |
if (ce) begin |
if (done) begin |
casex({qNaNOut,bInf,bz}) |
3'b1xx: xo = infXp; // NaN exponent value |
3'bx1x: xo = 0; // divide by inf |
3'bxx1: xo = infXp; // divide by zero |
default: xo = ex1; // normal or underflow: passthru neg. exp. for normalization |
endcase |
|
casex({qNaNOut,bInf,bz}) |
3'b1xx: mo = {1'b0,qNaN[FMSB:0]|{aInf,1'b0}|{az,bz},{FMSB+1{1'b0}}}; |
3'bx1x: mo = 0; // div by inf |
3'bxx1: mo = 0; // div by zero |
default: mo = divo; // plain div |
endcase |
|
so = sa ^ sb; |
sign_exe = sa & sb; |
overflow = over; |
underflow = under; |
end |
end |
|
endmodule |
|
module fpDiv_tb(); |
reg clk; |
reg ld; |
wire ce = 1'b1; |
wire sgnx1,sgnx2,sgnx3,sgnx4,sgnx5,sgnx6; |
wire inf1,inf2,inf3,inf4,inf5,inf6; |
wire of1,of2,of3,of4,of5,of6; |
wire uf1,uf2,uf3,uf4,uf5,uf6; |
wire [57:0] o1,o2,o3,o4,o5,o6; |
wire [35:0] o11,o12,o13; |
wire [31:0] o21,o22,o23; |
wire done0,done1,done2,done3,done4,done5,done6; |
|
initial begin |
clk = 0; |
ld = 0; |
#20 ld = 1; |
#40 ld = 0; |
end |
always #10 clk <= ~clk; |
|
fpDiv u1 (.clk(clk), .ce(1'b1), .ld(ld), .a(0), .b(0), .o(o1), .done(done1), .sign_exe(sgnx1), .overflow(of1), .underflow(uf1)); |
fpDiv u2 (.clk(clk), .ce(1'b1), .ld(ld), .a(0), .b(0), .o(o2), .done(done2), .sign_exe(sgnx2), .overflow(of2), .underflow(uf2)); |
// 10/10 |
fpDiv u3 (.clk(clk), .ce(1'b1), .ld(ld), .a(32'h41200000), .b(32'h41200000), .done(done3), .o(o3), .sign_exe(sgnx2), .overflow(of2), .underflow(uf2)); |
// 21/-17 |
fpDiv u4 (.clk(clk), .ce(1'b1), .ld(ld), .a(32'h41a80000), .b(32'hc1880000), .done(done4), .o(o4), .sign_exe(sgnx2), .overflow(of2), .underflow(uf2)); |
// -17/-15 |
fpDiv u5 (.clk(clk), .ce(1'b1), .ld(ld), .a(32'hc1880000), .b(32'hc1700000), .done(done5), .o(o5), .sign_exe(sgnx2), .overflow(of2), .underflow(uf2)); |
|
fpNormalize u11 (clk, ce, 1'b0, o3, o11); |
fpNormalize u12 (clk, ce, 1'b0, o4, o12); |
fpNormalize u13 (clk, ce, 1'b0, o5, o13); |
|
fpRound u21 (3'd1, o11, o21); // zero for zero |
fpRound u22 (3'd1, o12, o22); // |
fpRound u23 (3'd1, o13, o23); // |
|
endmodule |