URL
https://opencores.org/ocsvn/thor/thor/trunk
Subversion Repositories thor
Compare Revisions
- This comparison shows the changes necessary to convert path
/thor/trunk/rtl
- from Rev 3 to Rev 6
- ↔ Reverse comparison
Rev 3 → Rev 6
/verilog/fpUnit/fpDecompReg.v
0,0 → 1,157
/* ============================================================================ |
(C) 2006, 2007 Robert T Finch |
All rights reserved. |
rob@birdcomputer.ca |
|
fpDecompReg.v |
- decompose floating point value with registered outputs |
- parameterized width |
|
Verilog 1995 |
|
This source code is free for use and modification for non-commercial or |
evaluation purposes, provided this copyright statement and disclaimer |
remains present in the file. |
|
If the code is modified, please state the origin and note that the code |
has been modified. |
|
NO WARRANTY. |
THIS Work, IS PROVIDEDED "AS IS" WITH NO WARRANTIES OF ANY KIND, WHETHER |
EXPRESS OR IMPLIED. The user must assume the entire risk of using the |
Work. |
|
IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY |
INCIDENTAL, CONSEQUENTIAL, OR PUNITIVE DAMAGES WHATSOEVER RELATING TO |
THE USE OF THIS WORK, OR YOUR RELATIONSHIP WITH THE AUTHOR. |
|
IN ADDITION, IN NO EVENT DOES THE AUTHOR AUTHORIZE YOU TO USE THE WORK |
IN APPLICATIONS OR SYSTEMS WHERE THE WORK'S FAILURE TO PERFORM CAN |
REASONABLY BE EXPECTED TO RESULT IN A SIGNIFICANT PHYSICAL INJURY, OR IN |
LOSS OF LIFE. ANY SUCH USE BY YOU IS ENTIRELY AT YOUR OWN RISK, AND YOU |
AGREE TO HOLD THE AUTHOR AND CONTRIBUTORS HARMLESS FROM ANY CLAIMS OR |
LOSSES RELATING TO SUCH UNAUTHORIZED USE. |
|
|
Ref: Webpack 8.1i Spartan3-4 xc3s1000 4ft256 |
10 slices / 20 LUTs / 12 ns (32 bits) |
|
============================================================================ */ |
|
module fpDecomp(i, sgn, exp, man, fract, xz, mz, vz, inf, xinf, qnan, snan, nan); |
|
parameter WID=32; |
|
localparam MSB = WID-1; |
localparam EMSB = WID==80 ? 14 : |
WID==64 ? 10 : |
WID==52 ? 10 : |
WID==48 ? 10 : |
WID==44 ? 10 : |
WID==42 ? 10 : |
WID==40 ? 9 : |
WID==32 ? 7 : |
WID==24 ? 6 : 4; |
localparam FMSB = WID==80 ? 63 : |
WID==64 ? 51 : |
WID==52 ? 39 : |
WID==48 ? 35 : |
WID==44 ? 31 : |
WID==42 ? 29 : |
WID==40 ? 28 : |
WID==32 ? 22 : |
WID==24 ? 15 : 9; |
|
input [MSB:0] i; |
|
output sgn; |
output [EMSB:0] exp; |
output [FMSB:0] man; |
output [FMSB+1:0] fract; // mantissa with hidden bit recovered |
output xz; // denormalized - exponent is zero |
output mz; // mantissa is zero |
output vz; // value is zero (both exponent and mantissa are zero) |
output inf; // all ones exponent, zero mantissa |
output xinf; // all ones exponent |
output qnan; // nan |
output snan; // signalling nan |
output nan; |
|
// Decompose input |
assign sgn = i[MSB]; |
assign exp = i[MSB-1:FMSB+1]; |
assign man = i[FMSB:0]; |
assign xz = !(|exp); // denormalized - exponent is zero |
assign mz = !(|man); // mantissa is zero |
assign vz = xz & mz; // value is zero (both exponent and mantissa are zero) |
assign inf = &exp & mz; // all ones exponent, zero mantissa |
assign xinf = &exp; |
assign qnan = &exp & man[FMSB]; |
assign snan = &exp & !man[FMSB] & !mz; |
assign nan = &exp & !mz; |
assign fract = {!xz,i[FMSB:0]}; |
|
endmodule |
|
|
module fpDecompReg(clk, ce, i, o, sgn, exp, man, fract, xz, mz, vz, inf, xinf, qnan, snan, nan); |
|
parameter WID=32; |
|
localparam MSB = WID-1; |
localparam EMSB = WID==80 ? 14 : |
WID==64 ? 10 : |
WID==52 ? 10 : |
WID==48 ? 10 : |
WID==44 ? 10 : |
WID==42 ? 10 : |
WID==40 ? 9 : |
WID==32 ? 7 : |
WID==24 ? 6 : 4; |
localparam FMSB = WID==80 ? 63 : |
WID==64 ? 51 : |
WID==52 ? 39 : |
WID==48 ? 35 : |
WID==44 ? 31 : |
WID==42 ? 29 : |
WID==40 ? 28 : |
WID==32 ? 22 : |
WID==24 ? 15 : 9; |
|
input clk; |
input ce; |
input [MSB:0] i; |
|
output reg [MSB:0] o; |
output reg sgn; |
output reg [EMSB:0] exp; |
output reg [FMSB:0] man; |
output reg [FMSB+1:0] fract; // mantissa with hidden bit recovered |
output reg xz; // denormalized - exponent is zero |
output reg mz; // mantissa is zero |
output reg vz; // value is zero (both exponent and mantissa are zero) |
output reg inf; // all ones exponent, zero mantissa |
output reg xinf; // all ones exponent |
output reg qnan; // nan |
output reg snan; // signalling nan |
output reg nan; |
|
// Decompose input |
always @(posedge clk) |
if (ce) begin |
o <= i; |
sgn = i[MSB]; |
exp = i[MSB-1:FMSB+1]; |
man = i[FMSB:0]; |
xz = !(|exp); // denormalized - exponent is zero |
mz = !(|man); // mantissa is zero |
vz = xz & mz; // value is zero (both exponent and mantissa are zero) |
inf = &exp & mz; // all ones exponent, zero mantissa |
xinf = &exp; |
qnan = &exp & man[FMSB]; |
snan = &exp & !man[FMSB] & !mz; |
nan = &exp & !mz; |
fract = {|exp,i[FMSB:0]}; |
end |
|
endmodule |
/verilog/fpUnit/fpZLUnit.v
0,0 → 1,113
`timescale 1ns / 1ps |
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2007,2014,2015 Robert Finch, Stratford |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
// |
// fpZLUnit.v |
// - zero latency floating point unit |
// - instructions can execute in a single cycle without |
// a clock |
// - parameterized width |
// - IEEE 754 representation |
// |
// |
// This source file is free software: you can redistribute it and/or modify |
// it under the terms of the GNU Lesser General Public License as published |
// by the Free Software Foundation, either version 3 of the License, or |
// (at your option) any later version. |
// |
// This source file is distributed in the hope that it will be useful, |
// but WITHOUT ANY WARRANTY; without even the implied warranty of |
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
// GNU General Public License for more details. |
// |
// You should have received a copy of the GNU General Public License |
// along with this program. If not, see <http://www.gnu.org/licenses/>. |
// |
// fabs - get absolute value of number |
// fnabs - get negative absolute value of number |
// fneg - negate number |
// fmov - copy input to output |
// fsign - get sign of number (set number to +1,0, or -1) |
// fman - get mantissa (set exponent to zero) |
// fcmp |
// |
// ============================================================================ |
|
`include "..\Thor_defines.v" |
|
module fpZLUnit |
#(parameter WID=32) |
( |
input [7:0] op, |
input [5:0] fn, |
input [WID:1] a, |
input [WID:1] b, // for fcmp |
output reg [WID:1] o, |
output nanx |
); |
localparam MSB = WID-1; |
localparam EMSB = WID==80 ? 14 : |
WID==64 ? 10 : |
WID==52 ? 10 : |
WID==48 ? 10 : |
WID==44 ? 10 : |
WID==42 ? 10 : |
WID==40 ? 9 : |
WID==32 ? 7 : |
WID==24 ? 6 : 4; |
localparam FMSB = WID==80 ? 63 : |
WID==64 ? 51 : |
WID==52 ? 39 : |
WID==48 ? 35 : |
WID==44 ? 31 : |
WID==42 ? 29 : |
WID==40 ? 28 : |
WID==32 ? 22 : |
WID==24 ? 15 : 9; |
|
wire nanxd,nanxs; |
wire single = op==`SINGLE_R; |
wire az = single ? a[31:1]==0 : WID==64 ? a[63:1]==0 : 0; |
wire [3:0] cmp_o,cmps_o; |
assign nanx = op==`FLOAT && fn==`FCMPS ? nanxs : nanxd; |
|
fp_cmp_unit #(64) u1 (.a(a), .b(b), .o(cmp_o), .nanx(nanxd) ); |
fp_cmp_unit #(32) u2 (.a(a[32:1]), .b(b[32:1]), .o(cmps_o), .nanx(nanxs) ); |
|
always @(op,a,cmp_o,az,cmps_o) |
case (op) |
`DOUBLE_R: |
if (WID==64) |
case(fn) |
`FABS: o <= {1'b0,a[63:1]}; // fabs |
`FNABS: o <= {1'b1,a[63:1]}; // fnabs |
`FNEG: o <= {~a[64],a[63:1]}; // fneg |
`FMOV: o <= a; // fmov |
`FSIGN: o <= az ? 0 : {a[64],1'b0,{10{1'b1}},{52{1'b0}}}; // fsign |
`FMAN: o <= {a[64],1'b0,{10{1'b1}},a[51:1]}; // fman |
default: o <= 0; |
endcase |
`SINGLE_R: |
case(fn) |
`FABSS: o <= {1'b0,a[31:1]}; // fabs |
`FNABSS: o <= {1'b1,a[31:1]}; // fnabs |
`FNEGS: o <= {~a[32],a[31:1]}; // fneg |
`FMOVS: o <= a; // fmov |
`FSIGNS: o <= az ? 0 : {a[32],1'b0,{7{1'b1}},{23{1'b0}}}; // fsign |
`FMANS: o <= {a[32],1'b0,{7{1'b1}},a[23:1]}; // fman |
default: o <= 0; |
endcase |
`FLOAT: |
case(fn) |
`FCMP: o <= cmp_o; |
`FCMPS: o <= cmps_o; |
default: o <= 0; |
endcase |
default: o <= 0; |
endcase |
|
endmodule |
/verilog/fpUnit/fp_decomp.v
0,0 → 1,97
/* ============================================================================ |
(C) 2006, 2007 Robert T Finch |
All rights reserved. |
rob@birdcomputer.ca |
|
fp_decomp.v |
- decompose floating point value |
- parameterized width |
|
|
Verilog 1995 |
|
This source code is free for use and modification for non-commercial or |
evaluation purposes, provided this copyright statement and disclaimer |
remains present in the file. |
|
If the code is modified, please state the origin and note that the code |
has been modified. |
|
NO WARRANTY. |
THIS Work, IS PROVIDEDED "AS IS" WITH NO WARRANTIES OF ANY KIND, WHETHER |
EXPRESS OR IMPLIED. The user must assume the entire risk of using the |
Work. |
|
IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY |
INCIDENTAL, CONSEQUENTIAL, OR PUNITIVE DAMAGES WHATSOEVER RELATING TO |
THE USE OF THIS WORK, OR YOUR RELATIONSHIP WITH THE AUTHOR. |
|
IN ADDITION, IN NO EVENT DOES THE AUTHOR AUTHORIZE YOU TO USE THE WORK |
IN APPLICATIONS OR SYSTEMS WHERE THE WORK'S FAILURE TO PERFORM CAN |
REASONABLY BE EXPECTED TO RESULT IN A SIGNIFICANT PHYSICAL INJURY, OR IN |
LOSS OF LIFE. ANY SUCH USE BY YOU IS ENTIRELY AT YOUR OWN RISK, AND YOU |
AGREE TO HOLD THE AUTHOR AND CONTRIBUTORS HARMLESS FROM ANY CLAIMS OR |
LOSSES RELATING TO SUCH UNAUTHORIZED USE. |
|
|
Ref: Webpack 8.1i Spartan3-4 xc3s1000 4ft256 |
10 slices / 20 LUTs / 12 ns (32 bits) |
|
============================================================================ */ |
|
module fp_decomp(i, sgn, exp, man, fract, xz, mz, vz, inf, xinf, qnan, snan, nan); |
|
parameter WID=32; |
|
localparam MSB = WID-1; |
localparam EMSB = WID==80 ? 14 : |
WID==64 ? 10 : |
WID==52 ? 10 : |
WID==48 ? 10 : |
WID==44 ? 10 : |
WID==42 ? 10 : |
WID==40 ? 9 : |
WID==32 ? 7 : |
WID==24 ? 6 : 4; |
localparam FMSB = WID==80 ? 63 : |
WID==64 ? 51 : |
WID==52 ? 39 : |
WID==48 ? 35 : |
WID==44 ? 31 : |
WID==42 ? 29 : |
WID==40 ? 28 : |
WID==32 ? 22 : |
WID==24 ? 15 : 9; |
|
input [MSB:0] i; |
|
output sgn; |
output [EMSB:0] exp; |
output [FMSB:0] man; |
output [FMSB+1:0] fract; // mantissa with hidden bit recovered |
output xz; // denormalized - exponent is zero |
output mz; // mantissa is zero |
output vz; // value is zero (both exponent and mantissa are zero) |
output inf; // all ones exponent, zero mantissa |
output xinf; // all ones exponent |
output qnan; // nan |
output snan; // signalling nan |
output nan; |
|
// Decompose input |
assign sgn = i[MSB]; |
assign exp = i[MSB-1:FMSB+1]; |
assign man = i[FMSB:0]; |
assign xz = !(|exp); // denormalized - exponent is zero |
assign mz = !(|man); // mantissa is zero |
assign vz = xz & mz; // value is zero (both exponent and mantissa are zero) |
assign inf = &exp & mz; // all ones exponent, zero mantissa |
assign xinf = &exp; |
assign qnan = &exp & man[FMSB]; |
assign snan = &exp & !man[FMSB] & !mz; |
assign nan = &exp & !mz; |
assign fract = {!xz,i[FMSB:0]}; |
|
endmodule |
|
|
/verilog/fpUnit/fpRound.v
0,0 → 1,168
/* =============================================================== |
(C) 2006 Robert Finch |
All rights reserved. |
rob@birdcomputer.ca |
|
fpRound.v |
- floating point rounding unit |
- parameterized width |
- IEEE 754 representation |
|
This source code is free for use and modification for |
non-commercial or evaluation purposes, provided this |
copyright statement and disclaimer remains present in |
the file. |
|
If the code is modified, please state the origin and |
note that the code has been modified. |
|
NO WARRANTY. |
THIS Work, IS PROVIDEDED "AS IS" WITH NO WARRANTIES OF |
ANY KIND, WHETHER EXPRESS OR IMPLIED. The user must assume |
the entire risk of using the Work. |
|
IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR |
ANY INCIDENTAL, CONSEQUENTIAL, OR PUNITIVE DAMAGES |
WHATSOEVER RELATING TO THE USE OF THIS WORK, OR YOUR |
RELATIONSHIP WITH THE AUTHOR. |
|
IN ADDITION, IN NO EVENT DOES THE AUTHOR AUTHORIZE YOU |
TO USE THE WORK IN APPLICATIONS OR SYSTEMS WHERE THE |
WORK'S FAILURE TO PERFORM CAN REASONABLY BE EXPECTED |
TO RESULT IN A SIGNIFICANT PHYSICAL INJURY, OR IN LOSS |
OF LIFE. ANY SUCH USE BY YOU IS ENTIRELY AT YOUR OWN RISK, |
AND YOU AGREE TO HOLD THE AUTHOR AND CONTRIBUTORS HARMLESS |
FROM ANY CLAIMS OR LOSSES RELATING TO SUCH UNAUTHORIZED |
USE. |
|
|
This unit takes a normalized floating point number in an |
expanded format and rounds it according to the IEEE-754 |
standard. NaN's and infinities are not rounded. |
This module has a single cycle latency. |
|
Mode |
0: round to nearest even |
1: round to zero (truncate) |
2: round towards +infinity |
3: round towards -infinity |
|
Ref: Webpack 8.1i Spartan3-4 xc3s1000-4ft256 |
69 slices / 129 LUTS / 21.3 ns (32 bit) |
=============================================================== */ |
|
module fpRound(rm, i, o); |
parameter WID = 32; |
localparam MSB = WID-1; |
localparam EMSB = WID==80 ? 14 : |
WID==64 ? 10 : |
WID==52 ? 10 : |
WID==48 ? 10 : |
WID==44 ? 10 : |
WID==42 ? 10 : |
WID==40 ? 9 : |
WID==32 ? 7 : |
WID==24 ? 6 : 4; |
localparam FMSB = WID==80 ? 63 : |
WID==64 ? 51 : |
WID==52 ? 39 : |
WID==48 ? 35 : |
WID==44 ? 31 : |
WID==42 ? 29 : |
WID==40 ? 28 : |
WID==32 ? 22 : |
WID==24 ? 15 : 9; |
|
input [1:0] rm; // rounding mode |
input [MSB+2:0] i; // intermediate format input |
output [WID-1:0] o; // rounded output |
|
//------------------------------------------------------------ |
// variables |
wire so; |
wire [EMSB:0] xo; |
reg [FMSB:0] mo; |
wire [EMSB:0] xo1 = i[MSB+1:FMSB+4]; |
wire [FMSB+3:0] mo1 = i[FMSB+3:0]; |
wire xInf = &xo1; |
wire dn = !(|xo1); // denormalized input |
assign o = {so,xo,mo}; |
|
wire g = i[2]; // guard bit: always the same bit for all operations |
wire r = i[1]; // rounding bit |
wire s = i[0]; // sticky bit |
reg rnd; |
|
// Compute the round bit |
// Infinities and NaNs are not rounded! |
always @(xInf,rm,g,r,s,so) |
case ({xInf,rm}) |
3'd0: rnd = (g & r) | (r & s); // round to nearest even |
3'd1: rnd = 0; // round to zero (truncate) |
3'd2: rnd = (r | s) & !so; // round towards +infinity |
3'd3: rnd = (r | s) & so; // round towards -infinity |
default: rnd = 0; // no rounding if exponent indicates infinite or NaN |
endcase |
|
// round the number, check for carry |
// note: inf. exponent checked above (if the exponent was infinite already, then no rounding occurs as rnd = 0) |
// note: exponent increments if there is a carry (can only increment to infinity) |
// performance note: use the carry chain to increment the exponent |
wire [MSB:0] rounded = {xo1,mo1[FMSB+3:2]} + rnd; |
wire carry = mo1[FMSB+3] & !rounded[FMSB+1]; |
|
assign so = i[MSB+2]; |
assign xo = rounded[MSB:FMSB+2]; |
|
always @(rnd or xo or carry or dn or rounded or mo1) |
casex({rnd,&xo,carry,dn}) |
4'b0xx0: mo = mo1[FMSB+2:1]; // not rounding, not denormalized, => hide MSB |
4'b0xx1: mo = mo1[FMSB+3:2]; // not rounding, denormalized |
4'b1000: mo = rounded[FMSB :0]; // exponent didn't change, number was normalized, => hide MSB |
4'b1001: mo = rounded[FMSB+1:1]; // exponent didn't change, but number was denormalized, => retain MSB |
4'b1010: mo = rounded[FMSB+1:1]; // exponent incremented (new MSB generated), number was normalized, => hide 'extra (FMSB+2)' MSB |
4'b1011: mo = rounded[FMSB+1:1]; // exponent incremented (new MSB generated), number was denormalized, number became normalized, => hide 'extra (FMSB+2)' MSB |
4'b11xx: mo = 0; // number became infinite, no need to check carry etc., rnd would be zero if input was NaN or infinite |
endcase |
|
endmodule |
|
|
// Round and register the output |
|
module fpRoundReg(clk, ce, rm, i, o); |
parameter WID = 32; |
localparam MSB = WID-1; |
localparam EMSB = WID==80 ? 14 : |
WID==64 ? 10 : |
WID==52 ? 10 : |
WID==48 ? 10 : |
WID==44 ? 10 : |
WID==42 ? 10 : |
WID==40 ? 9 : |
WID==32 ? 7 : |
WID==24 ? 6 : 4; |
localparam FMSB = WID==80 ? 63 : |
WID==64 ? 51 : |
WID==52 ? 39 : |
WID==48 ? 35 : |
WID==44 ? 31 : |
WID==42 ? 29 : |
WID==40 ? 28 : |
WID==32 ? 22 : |
WID==24 ? 15 : 9; |
|
input clk; |
input ce; |
input [1:0] rm; // rounding mode |
input [MSB+2:0] i; // expanded format input |
output reg [WID-1:0] o; // rounded output |
|
wire [WID-1:0] o1; |
fpRound #(WID) u1 (.rm(rm), .i(i), .o(o1) ); |
|
always @(posedge clk) |
if (ce) |
o <= o1; |
|
endmodule |
/verilog/fpUnit/fpLOOUnit.v
0,0 → 1,110
/* =============================================================== |
(C) 2006,2015 Robert Finch |
All rights reserved. |
rob@birdcomputer.ca |
|
fpLOOUnit.v |
- 'latency of one' floating point unit |
- instructions can execute using a single cycle |
- issue rate is one per clock cycle |
- latency is one clock cycle |
- parameterized width |
- IEEE 754 representation |
|
This source code is free for use and modification for |
non-commercial or evaluation purposes, provided this |
copyright statement and disclaimer remains present in |
the file. |
|
If the code is modified, please state the origin and |
note that the code has been modified. |
|
NO WARRANTY. |
THIS Work, IS PROVIDEDED "AS IS" WITH NO WARRANTIES OF |
ANY KIND, WHETHER EXPRESS OR IMPLIED. The user must assume |
the entire risk of using the Work. |
|
IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR |
ANY INCIDENTAL, CONSEQUENTIAL, OR PUNITIVE DAMAGES |
WHATSOEVER RELATING TO THE USE OF THIS WORK, OR YOUR |
RELATIONSHIP WITH THE AUTHOR. |
|
IN ADDITION, IN NO EVENT DOES THE AUTHOR AUTHORIZE YOU |
TO USE THE WORK IN APPLICATIONS OR SYSTEMS WHERE THE |
WORK'S FAILURE TO PERFORM CAN REASONABLY BE EXPECTED |
TO RESULT IN A SIGNIFICANT PHYSICAL INJURY, OR IN LOSS |
OF LIFE. ANY SUCH USE BY YOU IS ENTIRELY AT YOUR OWN RISK, |
AND YOU AGREE TO HOLD THE AUTHOR AND CONTRIBUTORS HARMLESS |
FROM ANY CLAIMS OR LOSSES RELATING TO SUCH UNAUTHORIZED |
USE. |
|
|
i2f - convert integer to floating point |
f2i - convert floating point to integer |
|
Ref: Webpack 8.1i Spartan3-4 xc3s1000 4ft256 |
61 LUTS / 34 slices / 16 ns |
=============================================================== */ |
`include "..\Thor_defines.v" |
|
module fpLOOUnit |
#(parameter WID=32) |
( |
input clk, |
input ce, |
input [1:0] rm, |
input [7:0] op, |
input [5:0] fn, |
input [WID:1] a, |
output reg [WID:1] o, |
output done |
); |
localparam MSB = WID-1; |
localparam EMSB = WID==80 ? 14 : |
WID==64 ? 10 : |
WID==52 ? 10 : |
WID==48 ? 10 : |
WID==44 ? 10 : |
WID==42 ? 10 : |
WID==40 ? 9 : |
WID==32 ? 7 : |
WID==24 ? 6 : 4; |
localparam FMSB = WID==80 ? 63 : |
WID==64 ? 51 : |
WID==52 ? 39 : |
WID==48 ? 35 : |
WID==44 ? 31 : |
WID==42 ? 29 : |
WID==40 ? 28 : |
WID==32 ? 22 : |
WID==24 ? 15 : 9; |
|
wire [64:1] i2f_o; |
wire [64:1] f2i_o; |
wire [32:1] i2fs_o; |
wire [32:1] f2is_o; |
|
delay1 u1 (.clk(clk), .ce(ce), .i(op==`ITOF||op==`FTOI), .o(done) ); |
i2f #(64) i2f0 (.clk(clk), .ce(ce), .rm(rm), .i(a), .o(i2f_o) ); |
f2i #(64) f2i0 (.clk(clk), .ce(ce), .i(a), .o(f2i_o) ); |
i2f #(32) i2fs (.clk(clk), .ce(ce), .rm(rm), .i(a[32:1]), .o(i2fs_o) ); |
f2i #(32) f2is (.clk(clk), .ce(ce), .i(a[32:1]), .o(f2is_o) ); |
|
always @(op,a,i2f_o,f2i_o) |
case (op) |
`DOUBLE_R: |
case(fn) |
`ITOF: o <= i2f_o; |
`FTOI: o <= f2i_o; |
default: o <= 0; |
endcase |
`SINGLE_R: |
case(fn) |
`ITOFS: o <= i2fs_o; |
`FTOIS: o <= f2is_o; |
default: o <= 0; |
endcase |
default: o <= 0; |
endcase |
|
endmodule |
/verilog/fpUnit/f2i.v
0,0 → 1,129
/* =============================================================== |
(C) 2006 Robert Finch |
All rights reserved. |
rob@birdcomputer.ca |
|
f2i.v |
- convert floating point to integer |
- parameterized width |
- IEEE 754 representation |
|
This source code is free for use and modification for |
non-commercial or evaluation purposes, provided this |
copyright statement and disclaimer remains present in |
the file. |
|
If the code is modified, please state the origin and |
note that the code has been modified. |
|
NO WARRANTY. |
THIS Work, IS PROVIDEDED "AS IS" WITH NO WARRANTIES OF |
ANY KIND, WHETHER EXPRESS OR IMPLIED. The user must assume |
the entire risk of using the Work. |
|
IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR |
ANY INCIDENTAL, CONSEQUENTIAL, OR PUNITIVE DAMAGES |
WHATSOEVER RELATING TO THE USE OF THIS WORK, OR YOUR |
RELATIONSHIP WITH THE AUTHOR. |
|
IN ADDITION, IN NO EVENT DOES THE AUTHOR AUTHORIZE YOU |
TO USE THE WORK IN APPLICATIONS OR SYSTEMS WHERE THE |
WORK'S FAILURE TO PERFORM CAN REASONABLY BE EXPECTED |
TO RESULT IN A SIGNIFICANT PHYSICAL INJURY, OR IN LOSS |
OF LIFE. ANY SUCH USE BY YOU IS ENTIRELY AT YOUR OWN RISK, |
AND YOU AGREE TO HOLD THE AUTHOR AND CONTRIBUTORS HARMLESS |
FROM ANY CLAIMS OR LOSSES RELATING TO SUCH UNAUTHORIZED |
USE. |
|
- pipelinable |
- one cycle latency |
|
Ref: Spartan3-4 |
212 LUTs / 135 slices / (28.2 ns no clock) |
=============================================================== */ |
|
module f2i |
#( parameter WID = 32) |
( |
input clk, |
input ce, |
input [WID-1:0] i, |
output [WID-1:0] o, |
output overflow |
); |
localparam MSB = WID-1; |
localparam EMSB = WID==80 ? 14 : |
WID==64 ? 10 : |
WID==52 ? 10 : |
WID==48 ? 10 : |
WID==44 ? 10 : |
WID==42 ? 10 : |
WID==40 ? 9 : |
WID==32 ? 7 : |
WID==24 ? 6 : 4; |
localparam FMSB = WID==80 ? 63 : |
WID==64 ? 51 : |
WID==52 ? 39 : |
WID==48 ? 35 : |
WID==44 ? 31 : |
WID==42 ? 29 : |
WID==40 ? 28 : |
WID==32 ? 22 : |
WID==24 ? 15 : 9; |
|
|
wire [MSB:0] maxInt = {MSB{1'b1}}; // maximum unsigned integer value |
wire [EMSB:0] zeroXp = {EMSB{1'b1}}; // simple constant - value of exp for zero |
|
// Decompose fp value |
reg sgn; // sign |
always @(posedge clk) |
if (ce) sgn = i[MSB]; |
wire [EMSB:0] exp = i[MSB-1:FMSB+1]; // exponent |
wire [FMSB+1:0] man = {exp!=0,i[FMSB:0]}; // mantissa including recreate hidden bit |
|
wire iz = i[MSB-1:0]==0; // zero value (special) |
|
assign overflow = exp - zeroXp > MSB; // lots of numbers are too big - don't forget one less bit is available due to signed values |
wire underflow = exp < zeroXp - 1; // value less than 1/2 |
|
wire [6:0] shamt = MSB - (exp - zeroXp); // exp - zeroXp will be <= MSB |
|
wire [MSB+1:0] o1 = {man,{EMSB+1{1'b0}},1'b0} >> shamt; // keep an extra bit for rounding |
wire [MSB:0] o2 = o1[MSB+1:1] + o1[0]; // round up |
reg [MSB:0] o3; |
|
always @(posedge clk) |
if (ce) begin |
if (underflow|iz) |
o3 <= 0; |
else if (overflow) |
o3 <= maxInt; |
// value between 1/2 and 1 - round up |
else if (exp==zeroXp-1) |
o3 <= 1; |
// value > 1 |
else |
o3 <= o2; |
end |
|
assign o = sgn ? -o3 : o3; // adjust output for correct signed value |
|
endmodule |
|
module f2i_tb(); |
|
wire ov0,ov1; |
wire [31:0] io0,io1; |
reg clk; |
|
initial begin |
clk = 0; |
end |
|
always #10 clk = ~clk; |
|
f2i #(32) u1 (.clk(clk), .ce(1'b1), .i(32'h3F800000), .o(io1), .overflow(ov1) ); |
f2i #(32) u2 (.clk(clk), .ce(1'b1), .i(32'h00000000), .o(io0), .overflow(ov0) ); |
|
endmodule |
/verilog/fpUnit/i2f.v
0,0 → 1,148
/* =============================================================== |
(C) 2006 Robert Finch |
All rights reserved. |
rob@birdcomputer.ca |
|
i2f.v |
- convert integer to floating point |
- parameterized width |
- IEEE 754 representation |
|
This source code is free for use and modification for |
non-commercial or evaluation purposes, provided this |
copyright statement and disclaimer remains present in |
the file. |
|
If the code is modified, please state the origin and |
note that the code has been modified. |
|
NO WARRANTY. |
THIS Work, IS PROVIDEDED "AS IS" WITH NO WARRANTIES OF |
ANY KIND, WHETHER EXPRESS OR IMPLIED. The user must assume |
the entire risk of using the Work. |
|
IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR |
ANY INCIDENTAL, CONSEQUENTIAL, OR PUNITIVE DAMAGES |
WHATSOEVER RELATING TO THE USE OF THIS WORK, OR YOUR |
RELATIONSHIP WITH THE AUTHOR. |
|
IN ADDITION, IN NO EVENT DOES THE AUTHOR AUTHORIZE YOU |
TO USE THE WORK IN APPLICATIONS OR SYSTEMS WHERE THE |
WORK'S FAILURE TO PERFORM CAN REASONABLY BE EXPECTED |
TO RESULT IN A SIGNIFICANT PHYSICAL INJURY, OR IN LOSS |
OF LIFE. ANY SUCH USE BY YOU IS ENTIRELY AT YOUR OWN RISK, |
AND YOU AGREE TO HOLD THE AUTHOR AND CONTRIBUTORS HARMLESS |
FROM ANY CLAIMS OR LOSSES RELATING TO SUCH UNAUTHORIZED |
USE. |
|
- pipelinable |
- single stage latency |
|
Ref: Spartan3-4 |
267 LUTs / 167 slices / 20? ns (32 bits) |
=============================================================== */ |
|
module i2f |
#( parameter WID = 32) |
( |
input clk, |
input ce, |
input [1:0] rm, // rounding mode |
input [WID-1:0] i, // integer input |
output [WID-1:0] o // float output |
); |
localparam MSB = WID-1; |
localparam EMSB = WID==80 ? 14 : |
WID==64 ? 10 : |
WID==52 ? 10 : |
WID==48 ? 10 : |
WID==44 ? 10 : |
WID==42 ? 10 : |
WID==40 ? 9 : |
WID==32 ? 7 : |
WID==24 ? 6 : 4; |
localparam FMSB = WID==80 ? 63 : |
WID==64 ? 51 : |
WID==52 ? 39 : |
WID==48 ? 35 : |
WID==44 ? 31 : |
WID==42 ? 29 : |
WID==40 ? 28 : |
WID==32 ? 22 : |
WID==24 ? 15 : 9; |
|
wire [EMSB:0] zeroXp = {EMSB{1'b1}}; |
|
wire iz; // zero input ? |
wire [MSB:0] imag; // get magnitude of i |
wire [MSB:0] imag1 = i[MSB] ? -i : i; |
wire [6:0] lz; // count the leading zeros in the number |
wire [EMSB:0] wd; // compute number of whole digits |
wire so; // copy the sign of the input (easy) |
wire [1:0] rmd; |
|
delay1 #(2) u0 (.clk(clk), .ce(ce), .i(rm), .o(rmd) ); |
delay1 #(1) u1 (.clk(clk), .ce(ce), .i(i==0), .o(iz) ); |
delay1 #(WID) u2 (.clk(clk), .ce(ce), .i(imag1), .o(imag) ); |
delay1 #(1) u3 (.clk(clk), .ce(ce), .i(i[MSB]), .o(so) ); |
generate |
if (WID==64) begin |
cntlz64Reg u4 (.clk(clk), .ce(ce), .i(imag1), .o(lz) ); |
end else begin |
cntlz32Reg u4 (.clk(clk), .ce(ce), .i(imag1), .o(lz) ); |
assign lz[6]=1'b0; |
end |
endgenerate |
|
assign wd = zeroXp - 1 + WID - lz; // constant except for lz |
|
wire [EMSB:0] xo = iz ? 0 : wd; |
wire [MSB:0] simag = imag << lz; // left align number |
|
wire g = simag[EMSB+2]; // guard bit (lsb) |
wire r = simag[EMSB+1]; // rounding bit |
wire s = |simag[EMSB:0]; // "sticky" bit |
reg rnd; |
|
// Compute the round bit |
always @(rmd,g,r,s,so) |
case (rmd) |
2'd0: rnd = (g & r) | (r & s); // round to nearest even |
2'd1: rnd = 0; // round to zero (truncate) |
2'd2: rnd = (r | s) & !so; // round towards +infinity |
2'd3: rnd = (r | s) & so; // round towards -infinity |
endcase |
|
// "hide" the leading one bit = MSB-1 |
// round the result |
wire [FMSB:0] mo = simag[MSB-1:EMSB+1]+rnd; |
|
assign o = {so,xo,mo}; |
|
endmodule |
|
|
module i2f_tb(); |
|
reg clk; |
reg [7:0] cnt; |
wire [31:0] fo; |
reg [31:0] i; |
initial begin |
clk = 1'b0; |
cnt = 0; |
end |
always #10 clk=!clk; |
|
always @(posedge clk) |
cnt = cnt + 1; |
|
always @(cnt) |
case(cnt) |
8'd0: i <= 32'd0; |
8'd1: i <= 32'd16777226; |
endcase |
|
i2f #(32) u1 (.clk(clk), .ce(1), .rm(2'd0), .i(i), .o(fo) ); |
|
endmodule |
/verilog/fpUnit/fpdivr8.v
0,0 → 1,147
/* =============================================================== |
(C) 2006 Robert Finch |
All rights reserved. |
rob@birdcomputer.ca |
|
fpdivr8.v |
Radix 8 floating point divider primitive |
|
|
This source code is free for use and modification for |
non-commercial or evaluation purposes, provided this |
copyright statement and disclaimer remains present in |
the file. |
|
If you do modify the code, please state the origin and |
note that you have modified the code. |
|
NO WARRANTY. |
THIS Work, IS PROVIDEDED "AS IS" WITH NO WARRANTIES OF |
ANY KIND, WHETHER EXPRESS OR IMPLIED. The user must assume |
the entire risk of using the Work. |
|
IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR |
ANY INCIDENTAL, CONSEQUENTIAL, OR PUNITIVE DAMAGES |
WHATSOEVER RELATING TO THE USE OF THIS WORK, OR YOUR |
RELATIONSHIP WITH THE AUTHOR. |
|
IN ADDITION, IN NO EVENT DOES THE AUTHOR AUTHORIZE YOU |
TO USE THE WORK IN APPLICATIONS OR SYSTEMS WHERE THE |
WORK'S FAILURE TO PERFORM CAN REASONABLY BE EXPECTED |
TO RESULT IN A SIGNIFICANT PHYSICAL INJURY, OR IN LOSS |
OF LIFE. ANY SUCH USE BY YOU IS ENTIRELY AT YOUR OWN RISK, |
AND YOU AGREE TO HOLD THE AUTHOR AND CONTRIBUTORS HARMLESS |
FROM ANY CLAIMS OR LOSSES RELATING TO SUCH UNAUTHORIZED |
USE. |
|
|
Performance |
Webpack 7.1i xc3s1000-4ft256 |
222 slices / 410 LUTs / 51.5 MHz |
=============================================================== */ |
|
module fpdivr8 |
#( parameter WID = 24 ) |
( |
input clk, |
input ld, |
input [WID-1:0] a, |
input [WID-1:0] b, |
output reg [WID*2-1:0] q, |
output [WID-1:0] r, |
output done |
); |
localparam DMSB = WID-1; |
|
wire [DMSB:0] rx [2:0]; // remainder holds |
reg [DMSB:0] rxx; |
reg [5:0] cnt; // iteration count |
wire [DMSB:0] sdq; |
wire [DMSB:0] sdr; |
wire sdval; |
wire sddbz; |
|
specialCaseDivider #(WID) u1 (.a(a), .b(b), .q(sdq), .val(sdval), .dbz(sdbz) ); |
|
|
assign rx[0] = rxx [DMSB] ? {rxx ,q[WID*2-1 ]} + b : {rxx ,q[WID*2-1 ]} - b; |
assign rx[1] = rx[0][DMSB] ? {rx[0],q[WID*2-1-1]} + b : {rx[0],q[WID*2-1-1]} - b; |
assign rx[2] = rx[1][DMSB] ? {rx[1],q[WID*2-1-2]} + b : {rx[1],q[WID*2-1-2]} - b; |
|
|
always @(posedge clk) |
if (ld) |
cnt <= sdval ? 6'b100000 : WID*2/3; |
else if (!done) |
cnt <= cnt - 1; |
|
|
always @(posedge clk) |
if (ld) |
rxx <= 0; |
else if (!done) |
rxx <= rx[2]; |
|
|
always @(posedge clk) |
if (ld) begin |
if (sdval) |
q <= {sdq,{WID{1'b0}}}; |
else |
q <= {a,{WID{1'b0}}}; |
end |
else if (!done) begin |
q[WID*2-1:3] <= q[WID*2-1-3:0]; |
q[0] <= ~rx[2][DMSB]; |
q[1] <= ~rx[1][DMSB]; |
q[2] <= ~rx[0][DMSB]; |
end |
|
// correct remainder |
assign r = sdval ? sdr : rx[2][DMSB] ? rx[2] + b : rx[2]; |
assign done = cnt[5]; |
|
endmodule |
|
/* |
module fpdiv_tb(); |
|
reg rst; |
reg clk; |
reg ld; |
reg [6:0] cnt; |
|
wire ce = 1'b1; |
wire [49:0] a = 50'h0_0000_0400_0000; |
wire [23:0] b = 24'd101; |
wire [49:0] q; |
wire [49:0] r; |
wire done; |
|
initial begin |
clk = 1; |
rst = 0; |
#100 rst = 1; |
#100 rst = 0; |
end |
|
always #20 clk = ~clk; // 25 MHz |
|
always @(posedge clk) |
if (rst) |
cnt <= 0; |
else begin |
ld <= 0; |
cnt <= cnt + 1; |
if (cnt == 3) |
ld <= 1; |
$display("ld=%b q=%h r=%h done=%b", ld, q, r, done); |
end |
|
|
fpdivr8 divu0(.clk(clk), .ce(ce), .ld(ld), .a(a), .b(b), .q(q), .r(r), .done(done) ); |
|
endmodule |
|
*/ |
|
/verilog/fpUnit/fpMul.v
0,0 → 1,241
// =============================================================== |
// (C) 2006 Robert Finch |
// All rights reserved. |
// rob@birdcomputer.ca |
// |
// fpMul.v |
// - floating point multiplier |
// - two cycle latency |
// - can issue every clock cycle |
// - parameterized width |
// - IEEE 754 representation |
// |
// This source code is free for use and modification for |
// non-commercial or evaluation purposes, provided this |
// copyright statement and disclaimer remains present in |
// the file. |
// |
// If the code is modified, please state the origin and |
// note that the code has been modified. |
// |
// NO WARRANTY. |
// THIS Work, IS PROVIDEDED "AS IS" WITH NO WARRANTIES OF |
// ANY KIND, WHETHER EXPRESS OR IMPLIED. The user must assume |
// the entire risk of using the Work. |
// |
// IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR |
// ANY INCIDENTAL, CONSEQUENTIAL, OR PUNITIVE DAMAGES |
// WHATSOEVER RELATING TO THE USE OF THIS WORK, OR YOUR |
// RELATIONSHIP WITH THE AUTHOR. |
// |
// IN ADDITION, IN NO EVENT DOES THE AUTHOR AUTHORIZE YOU |
// TO USE THE WORK IN APPLICATIONS OR SYSTEMS WHERE THE |
// WORK'S FAILURE TO PERFORM CAN REASONABLY BE EXPECTED |
// TO RESULT IN A SIGNIFICANT PHYSICAL INJURY, OR IN LOSS |
// OF LIFE. ANY SUCH USE BY YOU IS ENTIRELY AT YOUR OWN RISK, |
// AND YOU AGREE TO HOLD THE AUTHOR AND CONTRIBUTORS HARMLESS |
// FROM ANY CLAIMS OR LOSSES RELATING TO SUCH UNAUTHORIZED |
// USE. |
// |
// This multiplier/divider handles denormalized numbers. |
// The output format is of an internal expanded representation |
// in preparation to be fed into a normalization unit, then |
// rounding. Basically, it's the same as the regular format |
// except the mantissa is doubled in size, the leading two |
// bits of which are assumed to be whole bits. |
// |
// |
// Floating Point Multiplier |
// |
// Properties: |
// +-inf * +-inf = -+inf (this is handled by exOver) |
// +-inf * 0 = QNaN |
// |
// 1 sign number |
// 8 exponent |
// 48 mantissa |
// |
// Ref: Webpack8.1i Spartan3-4 xc3s1000-4ft256 |
// 174 LUTS / 113 slices / 24.7 ns |
// 4 Mults |
//=============================================================== */ |
|
module fpMul (clk, ce, a, b, o, sign_exe, inf, overflow, underflow); |
parameter WID = 32; |
localparam MSB = WID-1; |
localparam EMSB = WID==80 ? 14 : |
WID==64 ? 10 : |
WID==52 ? 10 : |
WID==48 ? 10 : |
WID==44 ? 10 : |
WID==42 ? 10 : |
WID==40 ? 9 : |
WID==32 ? 7 : |
WID==24 ? 6 : 4; |
localparam FMSB = WID==80 ? 63 : |
WID==64 ? 51 : |
WID==52 ? 39 : |
WID==48 ? 35 : |
WID==44 ? 31 : |
WID==42 ? 29 : |
WID==40 ? 28 : |
WID==32 ? 22 : |
WID==24 ? 15 : 9; |
|
localparam FX = (FMSB+2)*2-1; // the MSB of the expanded fraction |
localparam EX = FX + 1 + EMSB + 1 + 1 - 1; |
|
input clk; |
input ce; |
input [WID:1] a, b; |
output [EX:0] o; |
output sign_exe; |
output inf; |
output overflow; |
output underflow; |
|
reg [EMSB:0] xo1; // extra bit for sign |
reg [FX:0] mo1; |
|
// constants |
wire [EMSB:0] infXp = {EMSB+1{1'b1}}; // infinite / NaN - all ones |
// The following is the value for an exponent of zero, with the offset |
// eg. 8'h7f for eight bit exponent, 11'h7ff for eleven bit exponent, etc. |
wire [EMSB:0] bias = {1'b0,{EMSB{1'b1}}}; //2^0 exponent |
// The following is a template for a quiet nan. (MSB=1) |
wire [FMSB:0] qNaN = {1'b1,{FMSB{1'b0}}}; |
|
// variables |
reg [FX:0] fract1,fract1a; |
wire [FX:0] fracto; |
wire [EMSB+2:0] ex1; // sum of exponents |
wire [EMSB :0] ex2; |
|
// Decompose the operands |
wire sa, sb; // sign bit |
wire [EMSB:0] xa, xb; // exponent bits |
wire [FMSB+1:0] fracta, fractb; |
wire a_dn, b_dn; // a/b is denormalized |
wire az, bz; |
wire aInf, bInf, aInf1, bInf1; |
|
|
// ----------------------------------------------------------- |
// First clock |
// - decode the input operands |
// - derive basic information |
// - calculate exponent |
// - calculate fraction |
// ----------------------------------------------------------- |
|
fpDecomp #(WID) u1a (.i(a), .sgn(sa), .exp(xa), .fract(fracta), .xz(a_dn), .vz(az), .inf(aInf) ); |
fpDecomp #(WID) u1b (.i(b), .sgn(sb), .exp(xb), .fract(fractb), .xz(b_dn), .vz(bz), .inf(bInf) ); |
|
// Compute the sum of the exponents. |
// correct the exponent for denormalized operands |
// adjust the sum by the exponent offset (subtract 127) |
// mul: ex1 = xa + xb, result should always be < 1ffh |
assign ex1 = (az|bz) ? 0 : (xa|a_dn) + (xb|b_dn) - bias; |
generate |
if (WID==64) begin |
reg [35:0] p00,p01,p02; |
reg [35:0] p10,p11,p12; |
reg [35:0] p20,p21,p22; |
always @(posedge clk) |
if (ce) begin |
p00 <= fracta[17: 0] * fractb[17: 0]; |
p01 <= fracta[35:18] * fractb[17: 0]; |
p02 <= fracta[52:36] * fractb[17: 0]; |
p10 <= fracta[17: 0] * fractb[35:18]; |
p11 <= fracta[35:18] * fractb[35:18]; |
p12 <= fracta[52:36] * fractb[35:18]; |
p20 <= fracta[17: 0] * fractb[52:36]; |
p21 <= fracta[35:18] * fractb[52:36]; |
p22 <= fracta[52:36] * fractb[52:36]; |
fract1 <= {p02,36'b0} + {p01,18'b0} + p00 + |
{p12,54'b0} + {p11,36'b0} + {p10,18'b0} + |
{p22,72'b0} + {p21,54'b0} + {p20,36'b0} |
; |
end |
end |
else if (WID==32) begin |
reg [35:0] p00,p01; |
reg [35:0] p10,p11; |
always @(posedge clk) |
if (ce) begin |
p00 <= fracta[17: 0] * fractb[17: 0]; |
p01 <= fracta[23:18] * fractb[17: 0]; |
p10 <= fracta[17: 0] * fractb[23:18]; |
p11 <= fracta[23:18] * fractb[23:18]; |
fract1 <= {p11,p00} + {p01,18'b0} + {p10,18'b0}; |
end |
end |
endgenerate |
|
// Status |
wire under1, over1; |
wire under = ex1[EMSB+2]; // exponent underflow |
wire over = (&ex1[EMSB:0] | ex1[EMSB+1]) & !ex1[EMSB+2]; |
|
delay2 #(EMSB) u3 (.clk(clk), .ce(ce), .i(ex1[EMSB:0]), .o(ex2) ); |
delay2 #(FX+1) u4 (.clk(clk), .ce(ce), .i(fract1), .o(fracto) ); |
delay2 u2a (.clk(clk), .ce(ce), .i(aInf), .o(aInf1) ); |
delay2 u2b (.clk(clk), .ce(ce), .i(bInf), .o(bInf1) ); |
delay2 u6 (.clk(clk), .ce(ce), .i(under), .o(under1) ); |
delay2 u7 (.clk(clk), .ce(ce), .i(over), .o(over1) ); |
|
// determine when a NaN is output |
wire qNaNOut; |
delay2 u5 (.clk(clk), .ce(ce), .i((aInf&bz)|(bInf&az)), .o(qNaNOut) ); |
|
|
// ----------------------------------------------------------- |
// Second clock |
// - correct xponent and mantissa for exceptional conditions |
// ----------------------------------------------------------- |
|
wire so1; |
delay3 u8 (.clk(clk), .ce(ce), .i(sa ^ sb), .o(so1) );// two clock delay! |
|
always @(posedge clk) |
if (ce) |
casex({qNaNOut,aInf1,bInf1,over1,under1}) |
5'b1xxxx: xo1 = infXp; // qNaN - infinity * zero |
5'b01xxx: xo1 = infXp; // 'a' infinite |
5'b001xx: xo1 = infXp; // 'b' infinite |
5'b0001x: xo1 = infXp; // result overflow |
5'b00001: xo1 = 0; // underflow |
default: xo1 = ex2[EMSB:0]; // situation normal |
endcase |
|
always @(posedge clk) |
if (ce) |
casex({qNaNOut,aInf1,bInf1,over1}) |
4'b1xxx: mo1 = {1'b0,qNaN|3'd4,{FMSB+1{1'b0}}}; // multiply inf * zero |
4'b01xx: mo1 = 0; // mul inf's |
4'b001x: mo1 = 0; // mul inf's |
4'b0001: mo1 = 0; // mul overflow |
default: mo1 = fracto; |
endcase |
|
delay3 u10 (.clk(clk), .ce(ce), .i(sa & sb), .o(sign_exe) ); |
delay1 u11 (.clk(clk), .ce(ce), .i(over1), .o(overflow) ); |
delay1 u12 (.clk(clk), .ce(ce), .i(over1), .o(inf) ); |
delay1 u13 (.clk(clk), .ce(ce), .i(under1), .o(underflow) ); |
|
assign o = {so1,xo1,mo1}; |
|
endmodule |
|
module fpMul_tb(); |
reg clk; |
|
initial begin |
clk = 0; |
end |
always #10 clk <= ~clk; |
|
fpMul u1 (.clk(clk), .ce(1'b1), .a(0), .b(0), .o(o1), .sign_exe(sgnx1), .inf(inf1), .overflow(of1), .underflow(uf1)); |
fpMul u2 (.clk(clk), .ce(1'b1), .a(0), .b(0), .o(o1), .sign_exe(sgnx1), .inf(inf1), .overflow(of1), .underflow(uf1)); |
|
endmodule |
/verilog/fpUnit/fp_cmp_unit.v
0,0 → 1,84
/* ============================================================================ |
(C) 2007,2015 Robert T Finch |
All rights reserved. |
rob@birdcomputer.ca |
|
fp_cmp_unit.v |
- floating point comparison unit |
- parameterized width |
- IEEE 754 representation |
|
Verilog 2001 |
|
Notice of Confidentiality |
|
http://en.wikipedia.org/wiki/IEEE_754 |
|
Ref: Webpack 8.1i Spartan3-4 xc3s1000-4ft256 |
111 LUTS / 58 slices / 16 ns |
Ref: Webpack 8.1i Spartan3-4 xc3s1000-4ft256 |
109 LUTS / 58 slices / 16.4 ns |
|
============================================================================ */ |
|
module fp_cmp_unit(a, b, o, nanx); |
parameter WID = 32; |
localparam MSB = WID-1; |
localparam EMSB = WID==80 ? 14 : |
WID==64 ? 10 : |
WID==52 ? 10 : |
WID==48 ? 10 : |
WID==44 ? 10 : |
WID==42 ? 10 : |
WID==40 ? 9 : |
WID==32 ? 7 : |
WID==24 ? 6 : 4; |
localparam FMSB = WID==80 ? 63 : |
WID==64 ? 51 : |
WID==52 ? 39 : |
WID==48 ? 35 : |
WID==44 ? 31 : |
WID==42 ? 29 : |
WID==40 ? 28 : |
WID==32 ? 22 : |
WID==24 ? 15 : 9; |
|
input [WID-1:0] a, b; |
output [3:0] o; |
reg [3:0] o; |
output nanx; |
|
// Decompose the operands |
wire sa; |
wire sb; |
wire [EMSB:0] xa; |
wire [EMSB:0] xb; |
wire [FMSB:0] ma; |
wire [FMSB:0] mb; |
wire az, bz; |
wire nan_a, nan_b; |
|
fp_decomp #(WID) u1(.i(a), .sgn(sa), .exp(xa), .man(ma), .vz(az), .qnan(), .snan(), .nan(nan_a) ); |
fp_decomp #(WID) u2(.i(b), .sgn(sb), .exp(xb), .man(mb), .vz(bz), .qnan(), .snan(), .nan(nan_b) ); |
|
wire unordered = nan_a | nan_b; |
|
wire eq = (az & bz) || (a==b); // special test for zero |
wire gt1 = {xa,ma} > {xb,mb}; |
wire lt1 = {xa,ma} < {xb,mb}; |
|
wire lt = sa ^ sb ? sa & !(az & bz): sa ? gt1 : lt1; |
|
always @(unordered or eq or lt) |
begin |
o[0] = eq; |
o[1] = lt; |
o[2] = lt1; |
o[3] = unordered; |
end |
|
// an unorder comparison will signal a nan exception |
//assign nanx = op!=`FCOR && op!=`FCUN && unordered; |
assign nanx = 1'b0; |
|
endmodule |
/verilog/fpUnit/fpUnit.v
0,0 → 1,455
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2006,2015 Robert Finch, Stratford |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
// |
// This source file is free software: you can redistribute it and/or modify |
// it under the terms of the GNU Lesser General Public License as published |
// by the Free Software Foundation, either version 3 of the License, or |
// (at your option) any later version. |
// |
// This source file is distributed in the hope that it will be useful, |
// but WITHOUT ANY WARRANTY; without even the implied warranty of |
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
// GNU General Public License for more details. |
// |
// You should have received a copy of the GNU General Public License |
// along with this program. If not, see <http://www.gnu.org/licenses/>. |
// |
// |
// Thor SuperScalar |
// fpUnit.v |
// - floating point unit |
// - parameterized width |
// - IEEE 754 representation |
// |
// NaN Value Origin |
// 31'h7FC00001 - infinity - infinity |
// 31'h7FC00002 - infinity / infinity |
// 31'h7FC00003 - zero / zero |
// 31'h7FC00004 - infinity X zero |
// |
// Whenever the fpu encounters a NaN input, the NaN is |
// passed through to the output. |
// |
// Ref: Webpack 8.2 Spartan3-4 xc3s1000-4ft256 |
// 2335 LUTS / 1260 slices / 43.4 MHz |
// Ref: Webpack 13.1 Spartan3e xc3s1200e-4fg320 |
// 2433 LUTs / 1301 slices / 51.6 MHz |
// |
// Instr. Cyc Lat |
// fc__ ; 1 0 compare, lt le gt ge eq ne or un |
// fabs ; 1 0 absolute value |
// fnabs ; 1 0 negative absolute value |
// fneg ; 1 0 negate |
// fmov ; 1 0 move |
// fman ; 1 0 get mantissa |
// fsign ; 1 0 get sign |
// |
// f2i ; 1 1 convert float to integer |
// i2f ; 1 1 convert integer to float |
// |
// fadd ; 1 4 addition |
// fsub ; 1 4 subtraction |
// fmul ; 1 4 multiplication |
// |
// fdiv ; 16 4 division |
// |
// ftx ; 1 0 trigger fp exception |
// fcx ; 1 0 clear fp exception |
// fex ; 1 0 enable fp exception |
// fdx ; 1 0 disable fp exception |
// frm ; 1 0 set rounding mode |
// fstat ; 1 0 get status register |
// |
// related integer: |
// graf ; 1 0 get random float (0,1] |
// |
// ============================================================================ |
// |
`include "..\Thor_defines.v" |
|
`define QINFOS 23'h7FC000 // info |
`define QSUBINFS 31'h7FC00001 // - infinity - infinity |
`define QINFDIVS 31'h7FC00002 // - infinity / infinity |
`define QZEROZEROS 31'h7FC00003 // - zero / zero |
`define QINFZEROS 31'h7FC00004 // - infinity X zero |
|
`define QINFO 52'h7FC000 // info |
`define QSUBINF 62'h7FF0000000000001 // - infinity - infinity |
`define QINFDIV 62'h7FF0000000000002 // - infinity / infinity |
`define QZEROZERO 62'h7FF0000000000003 // - zero / zero |
`define QINFZERO 62'h7FF0000000000004 // - infinity X zero |
|
module fpUnit(rst, clk, ce, op, fn, ld, a, b, o, exception); |
|
parameter WID = 32; |
localparam MSB = WID-1; |
localparam EMSB = WID==80 ? 14 : |
WID==64 ? 10 : |
WID==52 ? 10 : |
WID==48 ? 10 : |
WID==44 ? 10 : |
WID==42 ? 10 : |
WID==40 ? 9 : |
WID==32 ? 7 : |
WID==24 ? 6 : 4; |
localparam FMSB = WID==80 ? 63 : |
WID==64 ? 51 : |
WID==52 ? 39 : |
WID==48 ? 35 : |
WID==44 ? 31 : |
WID==42 ? 29 : |
WID==40 ? 28 : |
WID==32 ? 22 : |
WID==24 ? 15 : 9; |
localparam EMSBS = 7; |
localparam FMSBS = 22; |
localparam FX = (FMSB+2)*2-1; // the MSB of the expanded fraction |
localparam EX = FX + 1 + EMSB + 1 + 1 - 1; |
localparam FXS = (FMSBS+2)*2-1; // the MSB of the expanded fraction |
localparam EXS = FXS + 1 + EMSBS + 1 + 1 - 1; |
|
input rst; |
input clk; |
input ce; |
input [7:0] op; |
input [5:0] fn; |
input ld; |
input [MSB:0] a; |
input [MSB:0] b; |
output tri [MSB:0] o; |
output exception; |
|
|
//------------------------------------------------------------ |
// constants |
wire infXp = {11{1'b1}}; // value for infinite exponent / nan |
wire infXps = {8{1'b1}}; |
|
// Variables |
wire divByZero; // attempt to divide by zero |
wire inf; // result is infinite (+ or -) |
wire zero; // result is zero (+ or -) |
wire ns; // nan sign |
wire nss; |
wire nso; |
wire nsos; |
wire isNan,isNans; |
wire nanx,nanxs; |
|
// Decode fp operation |
wire fstat = op==`FLOAT && fn==`FSTAT; // get status |
wire fdiv = op==`FLOAT && fn==`FDIV; |
wire fdivs = op==`FLOAT && fn==`FDIVS; |
wire ftx = op==`FLOAT && fn==`FTX; // trigger exception |
wire fcx = op==`FLOAT && fn==`FCX; // clear exception |
wire fex = op==`FLOAT && fn==`FEX; // enable exception |
wire fdx = op==`FLOAT && fn==`FDX; // disable exception |
wire fcmp = op==`FLOAT && (fn==`FCMP || fn==`FCMPS); |
wire frm = op==`FLOAT && fn==`FRM; // set rounding mode |
wire single = (op==`FLOAT && fn[5:4]==2'b01) || op==`SINGLE_R; |
wire zl_op = (op==`DOUBLE_R && (fn==`FABS || fn==`FNABS || fn==`FMOV || fn==`FNEG || fn==`FSIGN || fn==`FMAN)) || |
(op==`FLOAT && fn==`FCMP) || |
(op==`SINGLE_R && (fn==`FABSS || fn==`FNABSS || fn==`FMOVS || fn==`FNEGS || fn==`FSIGNS || fn==`FMANS)) || |
(op==`FLOAT && (fn==`FCMPS)) |
; |
wire loo_op = (op==`DOUBLE_R && (fn==`ITOF || fn==`FTOI)) || |
(op==`SINGLE_R && (fn==`FTOIS || op==`ITOFS)); |
wire loo_done; |
|
wire subinf; |
wire zerozero; |
wire infzero; |
wire infdiv; |
|
// floating point control and status |
reg [1:0] rm; // rounding mode |
reg inexe; // inexact exception enable |
reg dbzxe; // divide by zero exception enable |
reg underxe; // underflow exception enable |
reg overxe; // overflow exception enable |
reg invopxe; // invalid operation exception enable |
|
reg nsfp; // non-standard floating point indicator |
|
reg fractie; // fraction inexact |
reg raz; // rounded away from zero |
|
reg inex; // inexact exception |
reg dbzx; // divide by zero exception |
reg underx; // underflow exception |
reg overx; // overflow exception |
reg giopx; // global invalid operation exception |
reg sx; // summary exception |
|
reg swtx; // software triggered exception indicator |
|
wire gx = swtx|inex|dbzx|underx|overx|giopx; // global exception indicator |
|
// breakdown of invalid operation exceptions |
reg cvtx; // conversion exception |
reg sqrtx; // squareroot exception |
reg NaNCmpx; // NaN comparison exception |
reg infzerox; // multiply infinity by zero |
reg zerozerox; // division of zero by zero |
reg infdivx; // division of infinities |
reg subinfx; // subtraction of infinities |
reg snanx; // signalling nan |
|
wire divDone; |
wire pipe_ce = ce & divDone; // divide must be done in order for pipe to clock |
|
always @(posedge clk) |
// reset: disable and clear all exceptions and status |
if (rst) begin |
rm <= 2'b0; // round nearest even - default rounding mode |
inex <= 1'b0; |
dbzx <= 1'b0; |
underx <= 1'b0; |
overx <= 1'b0; |
giopx <= 1'b0; |
swtx <= 1'b0; |
sx <= 1'b0; |
NaNCmpx <= 1'b0; |
|
inexe <= 1'b0; |
dbzxe <= 1'b0; |
underxe <= 1'b0; |
overxe <= 1'b0; |
invopxe <= 1'b0; |
|
nsfp <= 1'b0; |
|
end |
else if (pipe_ce) begin |
if (ftx) begin |
inex <= inex | (a[4]|b[4]); |
dbzx <= dbzx | (a[3]|b[3]); |
underx <= underx | (a[2]|b[2]); |
overx <= overx | (a[1]|b[1]); |
giopx <= giopx | (a[0]|b[0]); |
swtx <= 1'b1; |
sx <= 1'b1; |
end |
else if (fcx) begin |
sx <= sx & !(a[5]|b[5]); |
inex <= inex & !(a[4]|b[4]); |
dbzx <= dbzx & !(a[3]|b[3]); |
underx <= underx & !(a[2]|b[2]); |
overx <= overx & !(a[1]|b[1]); |
giopx <= giopx & !(a[0]|b[0]); |
// clear exception type when global invalid operation is cleared |
infdivx <= infdivx & !(a[0]|b[0]); |
zerozerox <= zerozerox & !(a[0]|b[0]); |
subinfx <= subinfx & !(a[0]|b[0]); |
infzerox <= infzerox & !(a[0]|b[0]); |
NaNCmpx <= NaNCmpx & !(a[0]|b[0]); |
dbzx <= dbzx & !(a[0]|b[0]); |
swtx <= 1'b1; |
end |
else if (fex) begin |
inexe <= inexe | (a[4]|b[4]); |
dbzxe <= dbzxe | (a[3]|b[3]); |
underxe <= underxe | (a[2]|b[2]); |
overxe <= overxe | (a[1]|b[1]); |
invopxe <= invopxe | (a[0]|b[0]); |
end |
else if (fdx) begin |
inexe <= inexe & !(a[4]|b[4]); |
dbzxe <= dbzxe & !(a[3]|b[3]); |
underxe <= underxe & !(a[2]|b[2]); |
overxe <= overxe & !(a[1]|b[1]); |
invopxe <= invopxe & !(a[0]|b[0]); |
end |
else if (frm) |
rm <= a[1:0]|b[1:0]; |
|
infzerox <= infzerox | (invopxe & infzero); |
zerozerox <= zerozerox | (invopxe & zerozero); |
subinfx <= subinfx | (invopxe & subinf); |
infdivx <= infdivx | (invopxe & infdiv); |
dbzx <= dbzx | (dbzxe & divByZero); |
NaNCmpx <= NaNCmpx | (invopxe & nanx & fcmp); // must be a compare |
sx <= sx | |
(invopxe & nanx & fcmp) | |
(invopxe & (infzero|zerozero|subinf|infdiv)) | |
(dbzxe & divByZero); |
end |
|
// Decompose operands into sign,exponent,mantissa |
wire sa, sb, sas, sbs; |
wire [FMSB:0] ma, mb; |
wire [22:0] mas, mbs; |
|
wire aInf, bInf, aInfs, bInfs; |
wire aNan, bNan, aNans, bNans; |
wire az, bz, azs, bzs; |
wire [1:0] rmd4; // 1st stage delayed |
wire [7:0] op1, op2; |
wire [5:0] fn1,fn2; |
|
wire [MSB:0] zld_o,lood_o; |
wire [31:0] zls_o,loos_o; |
fpZLUnit #(64) u6 (.op(op), .fn(fn), .a(a), .b(b), .o(zld_o), .nanx(nanx) ); |
fpLOOUnit #(64) u7 (.clk(clk), .ce(pipe_ce), .rm(rm), .op(op), .fn(fn), .a(a), .o(loo_o), .done(loos_done) ); |
fpZLUnit #(32) u6s (.op(op), .fn(fn), .a(a[31:0]), .b(b[31:0]), .o(zls_o), .nanx(nanxs) ); |
fpLOOUnit #(32) u7s (.clk(clk), .ce(pipe_ce), .rm(rm), .op(op), .fn(fn), .a(a[31:0]), .o(loos_o), .done() ); |
assign loo_o = single ? loos_o : lood_o; |
assign zl_o = single ? zls_o : zld_o; |
fp_decomp #(64) u1 (.i(a), .sgn(sa), .man(ma), .vz(az), .inf(aInf), .nan(aNan) ); |
fp_decomp #(64) u2 (.i(b), .sgn(sb), .man(mb), .vz(bz), .inf(bInf), .nan(bNan) ); |
fp_decomp #(32) u1s (.i(a[31:0]), .sgn(sas), .man(mas), .vz(azs), .inf(aInfs), .nan(aNans) ); |
fp_decomp #(32) u2s (.i(b[31:0]), .sgn(sbs), .man(mbs), .vz(bzs), .inf(bInfs), .nan(bNans) ); |
|
delay4 #(2) u3 (.clk(clk), .ce(pipe_ce), .i(rmd), .o(rmd4) ); |
delay1 #(8) u4 (.clk(clk), .ce(pipe_ce), .i(op), .o(op1) ); |
delay2 #(8) u5 (.clk(clk), .ce(pipe_ce), .i(op), .o(op2) ); |
delay1 #(6) u5a (.clk(clk), .ce(pipe_ce), .i(fn), .o(fn1) ); |
delay2 #(6) u5b (.clk(clk), .ce(pipe_ce), .i(fn), .o(fn2) ); |
|
delay5 delay5_3(.clk(clk), .ce(pipe_ce), .i((bz & !aNan & fdiv)|(bzs & !aNans & fdivs)), .o(divByZero) ); |
|
// Compute NaN output sign |
wire aob_nan = aNan|bNan; // one of the operands is a nan |
wire bothNan = aNan&bNan; // both of the operands are nans |
wire aob_nans = aNans|bNans; // one of the operands is a nan |
wire bothNans = aNans&bNans; // both of the operands are nans |
|
assign ns = bothNan ? |
(ma==mb ? sa & sb : ma < mb ? sb : sa) : |
aNan ? sa : sb; |
assign nss = bothNans ? |
(mas==mbs ? sas & sbs : mas < mbs ? sbs : sas) : |
aNans ? sas : sbs; |
|
delay5 u8(.clk(clk), .ce(ce), .i(ns), .o(nso) ); |
delay5 u9(.clk(clk), .ce(ce), .i(aob_nan), .o(isNan) ); |
delay5 u8s(.clk(clk), .ce(ce), .i(nss), .o(nsos) ); |
delay5 u9s(.clk(clk), .ce(ce), .i(aob_nans), .o(isNans) ); |
|
wire [MSB:0] fpu_o; |
wire [MSB+3:0] fpn_o; |
wire [EX:0] fdiv_o; |
wire [EX:0] fmul_o; |
wire [EX:0] fas_o; |
reg [EX:0] fres; |
wire [31:0] fpus_o; |
wire [31+3:0] fpns_o; |
wire [EXS:0] fdivs_o; |
wire [EXS:0] fmuls_o; |
wire [EXS:0] fass_o; |
reg [EXS:0] fress; |
wire divUnder,divUnders; |
wire mulUnder,mulUnders; |
reg under,unders; |
|
// These units have a two clock cycle latency |
fpAddsub #(64) u10(.clk(clk), .ce(pipe_ce), .rm(rm), .op(op[0]), .a(a), .b(b), .o(fas_o) ); |
fpDiv #(64) u11(.clk(clk), .ce(pipe_ce), .ld(ld), .a(a), .b(b), .o(fdiv_o), .sign_exe(), .underflow(divUnder), .done(divDone) ); |
fpMul #(64) u12(.clk(clk), .ce(pipe_ce), .a(a), .b(b), .o(fmul_o), .sign_exe(), .inf(), .underflow(mulUnder) ); |
fpAddsub #(32) u10s(.clk(clk), .ce(pipe_ce), .rm(rm), .op(op[0]), .a(a[31:0]), .b(b[31:0]), .o(fass_o) ); |
fpDiv #(32) u11s(.clk(clk), .ce(pipe_ce), .ld(ld), .a(a[31:0]), .b(b[31:0]), .o(fdivs_o), .sign_exe(), .underflow(divUnders), .done() ); |
fpMul #(32) u12s(.clk(clk), .ce(pipe_ce), .a(a[31:0]), .b(b[31:0]), .o(fmuls_o), .sign_exe(), .inf(), .underflow(mulUnders) ); |
|
always @(op2,fn2,mulUnder,divUnder,mulUnders,divUnders) |
case (op2) |
`FLOAT: |
case (fn2) |
`FMUL: under = mulUnder; |
`FDIV: under = divUnder; |
`FMULS: unders = mulUnders; |
`FDIVS: unders = divUnders; |
default: begin under = 0; unders = 0; end |
endcase |
default: begin under = 0; unders = 0; end |
endcase |
|
always @(op2,fn2,fas_o,fmul_o,fdiv_o,fass_o,fmuls_o,fdivs_o) |
case (op2) |
`FLOAT: |
case(fn2) |
`FADD: fres <= fas_o; |
`FSUB: fres <= fas_o; |
`FMUL: fres <= fmul_o; |
`FDIV: fres <= fdiv_o; |
`FADDS: fress <= fass_o; |
`FSUBS: fress <= fass_o; |
`FMULS: fress <= fmuls_o; |
`FDIVS: fress <= fdivs_o; |
default: begin fres <= fas_o; fress <= fass_o; end |
endcase |
default: begin fres <= fas_o; fress <= fass_o; end |
endcase |
|
// pipeline stage |
// one cycle latency |
fpNormalize #(64) fpn0(.clk(clk), .ce(pipe_ce), .under(under), .i(fres), .o(fpn_o) ); |
fpNormalize #(32) fpns(.clk(clk), .ce(pipe_ce), .under(unders), .i(fress), .o(fpns_o) ); |
|
// pipeline stage |
// one cycle latency |
fpRoundReg #(64) fpr0(.clk(clk), .ce(pipe_ce), .rm(rm4), .i(fpn_o), .o(fpu_o) ); |
fpRoundReg #(32) fprs(.clk(clk), .ce(pipe_ce), .rm(rm4), .i(fpns_o), .o(fpus_o) ); |
|
wire so = single ? (isNans?nsos:fpus_o[31]): (isNan?nso:fpu_o[63]); |
|
//fix: status should be registered |
assign o = fstat ? { |
rm, |
inexe, |
dbzxe, |
underxe, |
overxe, |
invopxe, |
nsfp, |
|
fractie, |
raz, |
1'b0, |
so & !zero, |
!so & !zero, |
zero, |
inf, |
|
swtx, |
inex, |
dbzx, |
underx, |
overx, |
giopx, |
gx, |
sx, |
|
cvtx, |
sqrtx, |
NaNCmpx, |
infzerox, |
zerozerox, |
infdivx, |
subinfx, |
snanx |
} : 'bz; |
|
assign o = (!fstat & !single) ? |
zl_op ? zld_o : |
loo_op ? lood_o : |
{so,fpu_o[MSB-1:0]} : 'bz; |
assign o = (!fstat & single)? |
zl_op ? zls_o : |
loo_op ? loos_o : |
{so,fpus_o[MSB-1:0]} : 'bz; |
assign zero = single ? fpus_o[30:0]==0 : WID==64 ? fpu_o[62:0]==0 : 0; |
assign inf = single ? &fpus_o[31:23] && fpus_o[22:0]==0 : WID==64 ? &fpu_o[62:52] && fpu_o[51:0]==0 : 0; |
|
assign subinf = single ? fpus_o[31:0]==`QSUBINFS : WID==64 ? fpu_o[63:0]==`QSUBINF : 0; |
assign infdiv = single ? fpus_o[31:0]==`QINFDIVS : WID==64 ? fpu_o[63:0]==`QINFDIV : 0; |
assign zerozero = single ? fpus_o[31:0]==`QZEROZEROS : WID==64 ? fpu_o[63:0]==`QZEROZERO : 0; |
assign infzero = single ? fpus_o[31:0]==`QINFZEROS : WID==64 ? fpu_o[31:0]==`QINFZERO : 0; |
|
assign exception = gx; |
|
endmodule |
|
/verilog/fpUnit/fpNormalize.v
0,0 → 1,168
/* =============================================================== |
(C) 2006 Robert Finch |
All rights reserved. |
rob@birdcomputer.ca |
|
fpNormalize.v |
- floating point normalization unit |
- two cycle latency |
- parameterized width |
- IEEE 754 representation |
|
This source code is free for use and modification for |
non-commercial or evaluation purposes, provided this |
copyright statement and disclaimer remains present in |
the file. |
|
If you do modify the code, please state the origin and |
note that you have modified the code. |
|
NO WARRANTY. |
THIS Work, IS PROVIDEDED "AS IS" WITH NO WARRANTIES OF |
ANY KIND, WHETHER EXPRESS OR IMPLIED. The user must assume |
the entire risk of using the Work. |
|
IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR |
ANY INCIDENTAL, CONSEQUENTIAL, OR PUNITIVE DAMAGES |
WHATSOEVER RELATING TO THE USE OF THIS WORK, OR YOUR |
RELATIONSHIP WITH THE AUTHOR. |
|
IN ADDITION, IN NO EVENT DOES THE AUTHOR AUTHORIZE YOU |
TO USE THE WORK IN APPLICATIONS OR SYSTEMS WHERE THE |
WORK'S FAILURE TO PERFORM CAN REASONABLY BE EXPECTED |
TO RESULT IN A SIGNIFICANT PHYSICAL INJURY, OR IN LOSS |
OF LIFE. ANY SUCH USE BY YOU IS ENTIRELY AT YOUR OWN RISK, |
AND YOU AGREE TO HOLD THE AUTHOR AND CONTRIBUTORS HARMLESS |
FROM ANY CLAIMS OR LOSSES RELATING TO SUCH UNAUTHORIZED |
USE. |
|
|
This unit takes a floating point number in an intermediate |
format and normalizes it. No normalization occurs |
for NaN's or infinities. The unit has a two cycle latency. |
|
The mantissa is assumed to start with two whole bits on |
the left. The remaining bits are fractional. |
|
The width of the incoming format is reduced via a generation |
of sticky bit in place of the low order fractional bits. |
|
On an underflowed input, the incoming exponent is assumed |
to be negative. A right shift is needed. |
|
Ref: Webpack 8.2 Spartan3-4 xc3s1000-4ft256 |
302 LUTs / 166 slices / |
550 LUTs / 291 slices / 89 MHz |
163 LUTs / 93 slices / 113.6 MHz? |
=============================================================== */ |
|
module fpNormalize(clk, ce, under, i, o); |
parameter WID = 32; |
localparam MSB = WID-1; |
localparam EMSB = WID==80 ? 14 : |
WID==64 ? 10 : |
WID==52 ? 10 : |
WID==48 ? 10 : |
WID==44 ? 10 : |
WID==42 ? 10 : |
WID==40 ? 9 : |
WID==32 ? 7 : |
WID==24 ? 6 : 4; |
localparam FMSB = WID==80 ? 63 : |
WID==64 ? 51 : |
WID==52 ? 39 : |
WID==48 ? 35 : |
WID==44 ? 31 : |
WID==42 ? 29 : |
WID==40 ? 28 : |
WID==32 ? 22 : |
WID==24 ? 15 : 9; |
|
localparam FX = (FMSB+2)*2-1; // the MSB of the expanded fraction |
localparam EX = FX + 1 + EMSB + 1 + 1 - 1; |
|
input clk; |
input ce; |
input under; |
input [EX:0] i; // expanded format input |
output [WID+2:0] o; // normalized output + guard, sticky and round bits, + 1 whole digit |
|
// variables |
wire so; |
|
wire so1 = i[EX]; // sign doesn't change |
|
// Since the there are *two* whole digits in the incoming format |
// the number of whole digits needs to be reduced. If the MSB is |
// set, then increment the exponent and no shift is needed. |
wire [EMSB:0] xo; |
wire [EMSB:0] xo1a = i[EX-1:FX+1]; |
wire xInf = &xo1a & !under; |
wire incExp1 = !xInf & i[FX]; |
wire [EMSB:0] xo1 = xo1a + incExp1; |
wire [EMSB:0] xo2; |
wire xInf1 = &xo1; |
|
// If infinity is reached then set the mantissa to zero |
wire gbit = i[FMSB]; |
wire rbit = i[FMSB-1]; |
wire sbit = |i[FMSB-2:0]; |
// shift mantissa left by one to reduce to a single whole digit |
// if there is no exponent increment |
wire [FMSB+3:0] mo; |
wire [FMSB+3:0] mo1 = xInf1 & incExp1 ? 0 : |
incExp1 ? {i[FX:FMSB+1],gbit,rbit,sbit} : // reduce mantissa size |
{i[FX-1:FMSB+1],gbit,rbit,sbit,1'b0}; // reduce mantissa size |
wire [FMSB+3:0] mo2; |
wire [6:0] leadingZeros2; |
|
|
cntlz64Reg clz0 (.clk(clk), .ce(ce), .i(mo1), .o(leadingZeros2) ); |
|
// compensate for leadingZeros delay |
wire xInf2; |
delay1 #(EMSB+1) d2(.clk(clk), .ce(ce), .i(xo1), .o(xo2) ); |
delay1 #(1) d3(.clk(clk), .ce(ce), .i(xInf1), .o(xInf2) ); |
|
// If the exponent underflowed, then the shift direction must be to the |
// right regardless of mantissa bits; the number is denormalized. |
// Otherwise the shift direction must be to the left. |
wire rightOrLeft2; // 0=left,1=right |
delay1 #(1) d8(.clk(clk), .ce(ce), .i(under), .o(rightOrLeft2) ); |
|
// Compute how much we want to decrement by |
wire [6:0] lshiftAmt2 = leadingZeros2 > xo2 ? xo2 : leadingZeros2; |
|
// compute amount to shift right |
// at infinity the exponent can't be incremented, so we can't shift right |
// otherwise it was an underflow situation so the exponent was negative |
// shift amount needs to be negated for shift register |
wire [6:0] rshiftAmt2 = xInf2 ? 0 : -xo2 > FMSB+3 ? FMSB+4 : FMSB+4+xo2; // xo2 is negative ! |
|
|
// sign |
// the output sign is the same as the input sign |
delay1 #(1) d7(.clk(clk), .ce(ce), .i(so1), .o(so) ); |
|
// exponent |
// always @(posedge clk) |
// if (ce) |
assign xo = |
xInf2 ? xo2 : // an infinite exponent is either a NaN or infinity; no need to change |
rightOrLeft2 ? 0 : // on a right shift, the exponent was negative, it's being made to zero |
xo2 - lshiftAmt2; // on a left shift, the exponent can't be decremented below zero |
|
// mantissa |
delay1 #(FMSB+3) d4(.clk(clk), .ce(ce), .i(mo1), .o(mo2) ); |
|
wire [FMSB+3:0] mo2a; |
shiftAndMask #(FMSB+4) u1 (.op({rightOrLeft2,1'b0}), .a(mo2), .b(rightOrLeft2 ? lshiftAmt2 : rshiftAmt2), .mb(6'd0), .me(FMSB+3), .o(mo2a) ); |
|
// always @(posedge clk) |
// if (ce) |
assign mo = mo2a;//rightOrLeft2 ? mo2 >> rshiftAmt2 : mo2 << lshiftAmt2; |
|
assign o = {so,xo,mo}; |
|
endmodule |
|
/verilog/fpUnit/fpAddsub.v
0,0 → 1,226
/* =============================================================== |
(C) 2006 Robert Finch |
All rights reserved. |
rob@birdcomputer.ca |
|
fpAddsub.v |
- floating point adder/subtracter |
- two cycle latency |
- can issue every clock cycle |
- parameterized width |
- IEEE 754 representation |
|
This source code is free for use and modification for |
non-commercial or evaluation purposes, provided this |
copyright statement and disclaimer remains present in |
the file. |
|
If you do modify the code, please state the origin and |
note that you have modified the code. |
|
NO WARRANTY. |
THIS Work, IS PROVIDEDED "AS IS" WITH NO WARRANTIES OF |
ANY KIND, WHETHER EXPRESS OR IMPLIED. The user must assume |
the entire risk of using the Work. |
|
IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR |
ANY INCIDENTAL, CONSEQUENTIAL, OR PUNITIVE DAMAGES |
WHATSOEVER RELATING TO THE USE OF THIS WORK, OR YOUR |
RELATIONSHIP WITH THE AUTHOR. |
|
IN ADDITION, IN NO EVENT DOES THE AUTHOR AUTHORIZE YOU |
TO USE THE WORK IN APPLICATIONS OR SYSTEMS WHERE THE |
WORK'S FAILURE TO PERFORM CAN REASONABLY BE EXPECTED |
TO RESULT IN A SIGNIFICANT PHYSICAL INJURY, OR IN LOSS |
OF LIFE. ANY SUCH USE BY YOU IS ENTIRELY AT YOUR OWN RISK, |
AND YOU AGREE TO HOLD THE AUTHOR AND CONTRIBUTORS HARMLESS |
FROM ANY CLAIMS OR LOSSES RELATING TO SUCH UNAUTHORIZED |
USE. |
|
This adder/subtractor handles denormalized numbers. |
It has a two cycle latency. |
The output format is of an internal expanded representation |
in preparation to be fed into a normalization unit, then |
rounding. Basically, it's the same as the regular format |
except the mantissa is doubled in size, the leading two |
bits of which are assumed to be whole bits. |
|
Ref: Webpack 8.2 Spartan3-4 xc3s1000-4ft256 |
580 LUTS / 315 slices / 74 MHz |
=============================================================== */ |
|
module fpAddsub(clk, ce, rm, op, a, b, o); |
parameter WID = 32; |
localparam MSB = WID-1; |
localparam EMSB = WID==80 ? 14 : |
WID==64 ? 10 : |
WID==52 ? 10 : |
WID==48 ? 10 : |
WID==44 ? 10 : |
WID==42 ? 10 : |
WID==40 ? 9 : |
WID==32 ? 7 : |
WID==24 ? 6 : 4; |
localparam FMSB = WID==80 ? 63 : |
WID==64 ? 51 : |
WID==52 ? 39 : |
WID==48 ? 35 : |
WID==44 ? 31 : |
WID==42 ? 29 : |
WID==40 ? 28 : |
WID==32 ? 22 : |
WID==24 ? 15 : 9; |
|
localparam FX = (FMSB+2)*2-1; // the MSB of the expanded fraction |
localparam EX = FX + 1 + EMSB + 1 + 1 - 1; |
|
input clk; // system clock |
input ce; // core clock enable |
input [1:0] rm; // rounding mode |
input op; // operation 0 = add, 1 = subtract |
input [WID-1:0] a; // operand a |
input [WID-1:0] b; // operand b |
output [EX:0] o; // output |
|
|
// variables |
wire so; // sign output |
wire [EMSB:0] xo; // de normalized exponent output |
reg [EMSB:0] xo1; // de normalized exponent output |
wire [FX:0] mo; // mantissa output |
reg [FX:0] mo1; // mantissa output |
|
assign o = {so,xo,mo}; |
|
// operands sign,exponent,mantissa |
wire sa, sb; |
wire [EMSB:0] xa, xb; |
wire [FMSB:0] ma, mb; |
wire [FMSB+1:0] fracta, fractb; |
wire [FMSB+1:0] fracta1, fractb1; |
|
// which has greater magnitude ? Used for sign calc |
wire xa_gt_xb = xa > xb; |
wire xa_gt_xb1; |
wire a_gt_b = xa_gt_xb || (xa==xb && ma > mb); |
wire a_gt_b1; |
wire az, bz; // operand a,b is zero |
|
wire adn, bdn; // a,b denormalized ? |
wire xaInf, xbInf; |
wire aInf, bInf, aInf1, bInf1; |
wire aNan, bNan, aNan1, bNan1; |
|
wire [EMSB:0] xad = xa|adn; // operand a exponent, compensated for denormalized numbers |
wire [EMSB:0] xbd = xb|bdn; // operand b exponent, compensated for denormalized numbers |
|
fpDecomp #(WID) u1a (.i(a), .sgn(sa), .exp(xa), .man(ma), .fract(fracta), .xz(adn), .vz(az), .xinf(xaInf), .inf(aInf), .nan(aNan) ); |
fpDecomp #(WID) u1b (.i(b), .sgn(sb), .exp(xb), .man(mb), .fract(fractb), .xz(bdn), .vz(bz), .xinf(xbInf), .inf(bInf), .nan(bNan) ); |
|
// Figure out which operation is really needed an add or |
// subtract ? |
// If the signs are the same, use the orignal op, |
// otherwise flip the operation |
// a + b = add,+ |
// a + -b = sub, so of larger |
// -a + b = sub, so of larger |
// -a + -b = add,- |
// a - b = sub, so of larger |
// a - -b = add,+ |
// -a - b = add,- |
// -a - -b = sub, so of larger |
wire realOp = op ^ sa ^ sb; |
wire realOp1; |
wire op1; |
|
// Find out if the result will be zero. |
wire resZero = (realOp && xa==xb && ma==mb) || // subtract, same magnitude |
(az & bz); // both a,b zero |
|
// Compute output exponent |
// |
// The output exponent is the larger of the two exponents, |
// unless a subtract operation is in progress and the two |
// numbers are equal, in which case the exponent should be |
// zero. |
|
always @(xaInf,xbInf,resZero,xa,xb,xa_gt_xb) |
xo1 = (xaInf&xbInf) ? xa : resZero ? 0 : xa_gt_xb ? xa : xb; |
|
// Compute output sign |
reg so1; |
always @* |
case ({resZero,sa,op,sb}) // synopsys full_case parallel_case |
4'b0000: so1 <= 0; // + + + = + |
4'b0001: so1 <= !a_gt_b; // + + - = sign of larger |
4'b0010: so1 <= !a_gt_b; // + - + = sign of larger |
4'b0011: so1 <= 0; // + - - = + |
4'b0100: so1 <= a_gt_b; // - + + = sign of larger |
4'b0101: so1 <= 1; // - + - = - |
4'b0110: so1 <= 1; // - - + = - |
4'b0111: so1 <= a_gt_b; // - - - = sign of larger |
4'b1000: so1 <= 0; // A + B, sign = + |
4'b1001: so1 <= rm==3; // A + -B, sign = + unless rounding down |
4'b1010: so1 <= rm==3; // A - B, sign = + unless rounding down |
4'b1011: so1 <= 0; // +A - -B, sign = + |
4'b1100: so1 <= rm==3; // -A + B, sign = + unless rounding down |
4'b1101: so1 <= 1; // -A + -B, sign = - |
4'b1110: so1 <= 1; // -A - +B, sign = - |
4'b1111: so1 <= rm==3; // -A - -B, sign = + unless rounding down |
endcase |
|
delay2 #(EMSB+1) d1(.clk(clk), .ce(ce), .i(xo1), .o(xo) ); |
delay2 #(1) d2(.clk(clk), .ce(ce), .i(so1), .o(so) ); |
|
// Compute the difference in exponents, provides shift amount |
wire [EMSB:0] xdiff = xa_gt_xb ? xad - xbd : xbd - xad; |
wire [6:0] xdif = xdiff > FMSB+3 ? FMSB+3 : xdiff; |
wire [6:0] xdif1; |
|
// determine which fraction to denormalize |
wire [FMSB+1:0] mfs = xa_gt_xb ? fractb : fracta; |
wire [FMSB+1:0] mfs1; |
|
// Determine the sticky bit |
wire sticky, sticky1; |
redor64 u1 (.a(xdif), .b({mfs,2'b0}), .o(sticky) ); |
|
// register inputs to shifter and shift |
delay1 #(1) d16(.clk(clk), .ce(ce), .i(sticky), .o(sticky1) ); |
delay1 #(7) d15(.clk(clk), .ce(ce), .i(xdif), .o(xdif1) ); |
delay1 #(FMSB+2) d14(.clk(clk), .ce(ce), .i(mfs), .o(mfs1) ); |
|
wire [FMSB+3:0] md1 = ({mfs1,2'b0} >> xdif1)|sticky1; |
|
// sync control signals |
delay1 #(1) d4 (.clk(clk), .ce(ce), .i(xa_gt_xb), .o(xa_gt_xb1) ); |
delay1 #(1) d17(.clk(clk), .ce(ce), .i(a_gt_b), .o(a_gt_b1) ); |
delay1 #(1) d5 (.clk(clk), .ce(ce), .i(realOp), .o(realOp1) ); |
delay1 #(FMSB+2) d5a(.clk(clk), .ce(ce), .i(fracta), .o(fracta1) ); |
delay1 #(FMSB+2) d6a(.clk(clk), .ce(ce), .i(fractb), .o(fractb1) ); |
delay1 #(1) d7 (.clk(clk), .ce(ce), .i(aInf), .o(aInf1) ); |
delay1 #(1) d8 (.clk(clk), .ce(ce), .i(bInf), .o(bInf1) ); |
delay1 #(1) d9 (.clk(clk), .ce(ce), .i(aNan), .o(aNan1) ); |
delay1 #(1) d10(.clk(clk), .ce(ce), .i(bNan), .o(bNan1) ); |
delay1 #(1) d11(.clk(clk), .ce(ce), .i(op), .o(op1) ); |
|
// Sort operands and perform add/subtract |
// addition can generate an extra bit, subtract can't go negative |
wire [FMSB+3:0] oa = xa_gt_xb1 ? {fracta1,2'b0} : md1; |
wire [FMSB+3:0] ob = xa_gt_xb1 ? md1 : {fractb1,2'b0}; |
wire [FMSB+3:0] oaa = a_gt_b1 ? oa : ob; |
wire [FMSB+3:0] obb = a_gt_b1 ? ob : oa; |
wire [FMSB+4:0] mab = realOp1 ? oaa - obb : oaa + obb; |
|
always @* |
casex({aInf1&bInf1,aNan1,bNan1}) |
3'b1xx: mo1 = {1'b0,op1,{FMSB-1{1'b0}},op1,{FMSB{1'b0}}}; // inf +/- inf - generate QNaN on subtract, inf on add |
3'bx1x: mo1 = {1'b0,fracta1[FMSB+1:0],{FMSB{1'b0}}}; |
3'bxx1: mo1 = {1'b0,fractb1[FMSB+1:0],{FMSB{1'b0}}}; |
default: mo1 = {mab,{FMSB-2{1'b0}}}; // mab has an extra lead bit and two trailing bits |
endcase |
|
delay1 #(FX+1) d3(.clk(clk), .ce(ce), .i(mo1), .o(mo) ); |
|
endmodule |
|
/verilog/fpUnit/fpDiv.v
0,0 → 1,176
/* =============================================================== |
(C) 2006 Robert Finch |
All rights reserved. |
rob@birdcomputer.ca |
|
fpDiv.v |
- floating point divider |
- parameterized width |
- IEEE 754 representation |
|
This source code is free for use and modification for |
non-commercial or evaluation purposes, provided this |
copyright statement and disclaimer remains present in |
the file. |
|
If you do modify the code, please state the origin and |
note that you have modified the code. |
|
NO WARRANTY. |
THIS Work, IS PROVIDEDED "AS IS" WITH NO WARRANTIES OF |
ANY KIND, WHETHER EXPRESS OR IMPLIED. The user must assume |
the entire risk of using the Work. |
|
IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR |
ANY INCIDENTAL, CONSEQUENTIAL, OR PUNITIVE DAMAGES |
WHATSOEVER RELATING TO THE USE OF THIS WORK, OR YOUR |
RELATIONSHIP WITH THE AUTHOR. |
|
IN ADDITION, IN NO EVENT DOES THE AUTHOR AUTHORIZE YOU |
TO USE THE WORK IN APPLICATIONS OR SYSTEMS WHERE THE |
WORK'S FAILURE TO PERFORM CAN REASONABLY BE EXPECTED |
TO RESULT IN A SIGNIFICANT PHYSICAL INJURY, OR IN LOSS |
OF LIFE. ANY SUCH USE BY YOU IS ENTIRELY AT YOUR OWN RISK, |
AND YOU AGREE TO HOLD THE AUTHOR AND CONTRIBUTORS HARMLESS |
FROM ANY CLAIMS OR LOSSES RELATING TO SUCH UNAUTHORIZED |
USE. |
|
This multiplier/divider handles denormalized numbers. |
The output format is of an internal expanded representation |
in preparation to be fed into a normalization unit, then |
rounding. Basically, it's the same as the regular format |
except the mantissa is doubled in size, the leading two |
bits of which are assumed to be whole bits. |
|
|
Floating Point Multiplier / Divider |
|
Properties: |
+-inf * +-inf = -+inf (this is handled by exOver) |
+-inf * 0 = QNaN |
+-0 / +-0 = QNaN |
|
Ref: Webpack8.2i Spartan3-4 xc3s1000-4ft256 |
316 LUTS / 174 slices / 49.7 MHz |
=============================================================== */ |
|
module fpDiv(clk, ce, ld, a, b, o, done, sign_exe, overflow, underflow); |
|
parameter WID = 32; |
localparam MSB = WID-1; |
localparam EMSB = WID==80 ? 14 : |
WID==64 ? 10 : |
WID==52 ? 10 : |
WID==48 ? 10 : |
WID==44 ? 10 : |
WID==42 ? 10 : |
WID==40 ? 9 : |
WID==32 ? 7 : |
WID==24 ? 6 : 4; |
localparam FMSB = WID==80 ? 63 : |
WID==64 ? 51 : |
WID==52 ? 39 : |
WID==48 ? 35 : |
WID==44 ? 31 : |
WID==42 ? 29 : |
WID==40 ? 28 : |
WID==32 ? 22 : |
WID==24 ? 15 : 9; |
|
localparam FX = (FMSB+2)*2-1; // the MSB of the expanded fraction |
localparam EX = FX + 1 + EMSB + 1 + 1 - 1; |
|
input clk; |
input ce; |
input ld; |
input [MSB:0] a, b; |
output [EX:0] o; |
output done; |
output sign_exe; |
output overflow; |
output underflow; |
|
// registered outputs |
reg sign_exe; |
reg inf; |
reg overflow; |
reg underflow; |
|
reg so; |
reg [EMSB:0] xo; |
reg [FX:0] mo; |
assign o = {so,xo,mo}; |
|
// constants |
wire [EMSB:0] infXp = {EMSB+1{1'b1}}; // infinite / NaN - all ones |
// The following is the value for an exponent of zero, with the offset |
// eg. 8'h7f for eight bit exponent, 11'h7ff for eleven bit exponent, etc. |
wire [EMSB:0] bias = {1'b0,{EMSB{1'b1}}}; //2^0 exponent |
// The following is a template for a quiet nan. (MSB=1) |
wire [FMSB:0] qNaN = {1'b1,{FMSB{1'b0}}}; |
|
// variables |
wire [EMSB+2:0] ex1; // sum of exponents |
wire [FX:0] divo; |
|
// Operands |
wire sa, sb; // sign bit |
wire [EMSB:0] xa, xb; // exponent bits |
wire [FMSB+1:0] fracta, fractb; |
wire a_dn, b_dn; // a/b is denormalized |
wire az, bz; |
wire aInf, bInf; |
|
|
// ----------------------------------------------------------- |
// - decode the input operands |
// - derive basic information |
// - calculate exponent |
// - calculate fraction |
// ----------------------------------------------------------- |
|
fpDecomp #(WID) u1a (.i(a), .sgn(sa), .exp(xa), .fract(fracta), .xz(a_dn), .vz(az), .inf(aInf) ); |
fpDecomp #(WID) u1b (.i(b), .sgn(sb), .exp(xb), .fract(fractb), .xz(b_dn), .vz(bz), .inf(bInf) ); |
|
// Compute the exponent. |
// - correct the exponent for denormalized operands |
// - adjust the difference by the bias (add 127) |
// - also factor in the different decimal position for division |
assign ex1 = (xa|a_dn) - (xb|b_dn) + bias + FMSB-1; |
|
// check for exponent underflow/overflow |
wire under = ex1[EMSB+2]; // MSB set = negative exponent |
wire over = (&ex1[EMSB:0] | ex1[EMSB+1]) & !ex1[EMSB+2]; |
|
// Perform divide |
// could take either 1 or 16 clock cycles |
fpdivr8 #(WID) u2 (.clk(clk), .ld(ld), .a(fracta), .b(fractb), .q(divo), .r(), .done(done)); |
|
// determine when a NaN is output |
wire qNaNOut = (az&bz)|(aInf&bInf); |
|
always @(posedge clk) |
if (ce) begin |
if (done) begin |
casex({qNaNOut,bInf,bz}) |
3'b1xx: xo = infXp; // NaN exponent value |
3'bx1x: xo = 0; // divide by inf |
3'bxx1: xo = infXp; // divide by zero |
default: xo = ex1; // normal or underflow: passthru neg. exp. for normalization |
endcase |
|
casex({qNaNOut,bInf,bz}) |
3'b1xx: mo = {1'b0,qNaN[FMSB:0]|{aInf,1'b0}|{az,bz},{FMSB+1{1'b0}}}; |
3'bx1x: mo = 0; // div by inf |
3'bxx1: mo = 0; // div by zero |
default: mo = divo; // plain div |
endcase |
|
so = sa ^ sb; |
sign_exe = sa & sb; |
overflow = over; |
underflow = under; |
end |
end |
|
endmodule |