OpenCores
URL https://opencores.org/ocsvn/thor/thor/trunk

Subversion Repositories thor

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /thor/trunk
    from Rev 5 to Rev 6
    Reverse comparison

Rev 5 → Rev 6

/rtl/verilog/fpUnit/fpDecompReg.v
0,0 → 1,157
/* ============================================================================
(C) 2006, 2007 Robert T Finch
All rights reserved.
rob@birdcomputer.ca
 
fpDecompReg.v
- decompose floating point value with registered outputs
- parameterized width
 
Verilog 1995
 
This source code is free for use and modification for non-commercial or
evaluation purposes, provided this copyright statement and disclaimer
remains present in the file.
 
If the code is modified, please state the origin and note that the code
has been modified.
 
NO WARRANTY.
THIS Work, IS PROVIDEDED "AS IS" WITH NO WARRANTIES OF ANY KIND, WHETHER
EXPRESS OR IMPLIED. The user must assume the entire risk of using the
Work.
 
IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY
INCIDENTAL, CONSEQUENTIAL, OR PUNITIVE DAMAGES WHATSOEVER RELATING TO
THE USE OF THIS WORK, OR YOUR RELATIONSHIP WITH THE AUTHOR.
 
IN ADDITION, IN NO EVENT DOES THE AUTHOR AUTHORIZE YOU TO USE THE WORK
IN APPLICATIONS OR SYSTEMS WHERE THE WORK'S FAILURE TO PERFORM CAN
REASONABLY BE EXPECTED TO RESULT IN A SIGNIFICANT PHYSICAL INJURY, OR IN
LOSS OF LIFE. ANY SUCH USE BY YOU IS ENTIRELY AT YOUR OWN RISK, AND YOU
AGREE TO HOLD THE AUTHOR AND CONTRIBUTORS HARMLESS FROM ANY CLAIMS OR
LOSSES RELATING TO SUCH UNAUTHORIZED USE.
 
 
Ref: Webpack 8.1i Spartan3-4 xc3s1000 4ft256
10 slices / 20 LUTs / 12 ns (32 bits)
 
============================================================================ */
 
module fpDecomp(i, sgn, exp, man, fract, xz, mz, vz, inf, xinf, qnan, snan, nan);
 
parameter WID=32;
 
localparam MSB = WID-1;
localparam EMSB = WID==80 ? 14 :
WID==64 ? 10 :
WID==52 ? 10 :
WID==48 ? 10 :
WID==44 ? 10 :
WID==42 ? 10 :
WID==40 ? 9 :
WID==32 ? 7 :
WID==24 ? 6 : 4;
localparam FMSB = WID==80 ? 63 :
WID==64 ? 51 :
WID==52 ? 39 :
WID==48 ? 35 :
WID==44 ? 31 :
WID==42 ? 29 :
WID==40 ? 28 :
WID==32 ? 22 :
WID==24 ? 15 : 9;
 
input [MSB:0] i;
 
output sgn;
output [EMSB:0] exp;
output [FMSB:0] man;
output [FMSB+1:0] fract; // mantissa with hidden bit recovered
output xz; // denormalized - exponent is zero
output mz; // mantissa is zero
output vz; // value is zero (both exponent and mantissa are zero)
output inf; // all ones exponent, zero mantissa
output xinf; // all ones exponent
output qnan; // nan
output snan; // signalling nan
output nan;
 
// Decompose input
assign sgn = i[MSB];
assign exp = i[MSB-1:FMSB+1];
assign man = i[FMSB:0];
assign xz = !(|exp); // denormalized - exponent is zero
assign mz = !(|man); // mantissa is zero
assign vz = xz & mz; // value is zero (both exponent and mantissa are zero)
assign inf = &exp & mz; // all ones exponent, zero mantissa
assign xinf = &exp;
assign qnan = &exp & man[FMSB];
assign snan = &exp & !man[FMSB] & !mz;
assign nan = &exp & !mz;
assign fract = {!xz,i[FMSB:0]};
 
endmodule
 
 
module fpDecompReg(clk, ce, i, o, sgn, exp, man, fract, xz, mz, vz, inf, xinf, qnan, snan, nan);
 
parameter WID=32;
 
localparam MSB = WID-1;
localparam EMSB = WID==80 ? 14 :
WID==64 ? 10 :
WID==52 ? 10 :
WID==48 ? 10 :
WID==44 ? 10 :
WID==42 ? 10 :
WID==40 ? 9 :
WID==32 ? 7 :
WID==24 ? 6 : 4;
localparam FMSB = WID==80 ? 63 :
WID==64 ? 51 :
WID==52 ? 39 :
WID==48 ? 35 :
WID==44 ? 31 :
WID==42 ? 29 :
WID==40 ? 28 :
WID==32 ? 22 :
WID==24 ? 15 : 9;
 
input clk;
input ce;
input [MSB:0] i;
 
output reg [MSB:0] o;
output reg sgn;
output reg [EMSB:0] exp;
output reg [FMSB:0] man;
output reg [FMSB+1:0] fract; // mantissa with hidden bit recovered
output reg xz; // denormalized - exponent is zero
output reg mz; // mantissa is zero
output reg vz; // value is zero (both exponent and mantissa are zero)
output reg inf; // all ones exponent, zero mantissa
output reg xinf; // all ones exponent
output reg qnan; // nan
output reg snan; // signalling nan
output reg nan;
 
// Decompose input
always @(posedge clk)
if (ce) begin
o <= i;
sgn = i[MSB];
exp = i[MSB-1:FMSB+1];
man = i[FMSB:0];
xz = !(|exp); // denormalized - exponent is zero
mz = !(|man); // mantissa is zero
vz = xz & mz; // value is zero (both exponent and mantissa are zero)
inf = &exp & mz; // all ones exponent, zero mantissa
xinf = &exp;
qnan = &exp & man[FMSB];
snan = &exp & !man[FMSB] & !mz;
nan = &exp & !mz;
fract = {|exp,i[FMSB:0]};
end
 
endmodule
/rtl/verilog/fpUnit/fpZLUnit.v
0,0 → 1,113
`timescale 1ns / 1ps
// ============================================================================
// __
// \\__/ o\ (C) 2007,2014,2015 Robert Finch, Stratford
// \ __ / All rights reserved.
// \/_// robfinch<remove>@finitron.ca
// ||
//
// fpZLUnit.v
// - zero latency floating point unit
// - instructions can execute in a single cycle without
// a clock
// - parameterized width
// - IEEE 754 representation
//
//
// This source file is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published
// by the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This source file is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//
// fabs - get absolute value of number
// fnabs - get negative absolute value of number
// fneg - negate number
// fmov - copy input to output
// fsign - get sign of number (set number to +1,0, or -1)
// fman - get mantissa (set exponent to zero)
// fcmp
//
// ============================================================================
 
`include "..\Thor_defines.v"
 
module fpZLUnit
#(parameter WID=32)
(
input [7:0] op,
input [5:0] fn,
input [WID:1] a,
input [WID:1] b, // for fcmp
output reg [WID:1] o,
output nanx
);
localparam MSB = WID-1;
localparam EMSB = WID==80 ? 14 :
WID==64 ? 10 :
WID==52 ? 10 :
WID==48 ? 10 :
WID==44 ? 10 :
WID==42 ? 10 :
WID==40 ? 9 :
WID==32 ? 7 :
WID==24 ? 6 : 4;
localparam FMSB = WID==80 ? 63 :
WID==64 ? 51 :
WID==52 ? 39 :
WID==48 ? 35 :
WID==44 ? 31 :
WID==42 ? 29 :
WID==40 ? 28 :
WID==32 ? 22 :
WID==24 ? 15 : 9;
 
wire nanxd,nanxs;
wire single = op==`SINGLE_R;
wire az = single ? a[31:1]==0 : WID==64 ? a[63:1]==0 : 0;
wire [3:0] cmp_o,cmps_o;
assign nanx = op==`FLOAT && fn==`FCMPS ? nanxs : nanxd;
 
fp_cmp_unit #(64) u1 (.a(a), .b(b), .o(cmp_o), .nanx(nanxd) );
fp_cmp_unit #(32) u2 (.a(a[32:1]), .b(b[32:1]), .o(cmps_o), .nanx(nanxs) );
 
always @(op,a,cmp_o,az,cmps_o)
case (op)
`DOUBLE_R:
if (WID==64)
case(fn)
`FABS: o <= {1'b0,a[63:1]}; // fabs
`FNABS: o <= {1'b1,a[63:1]}; // fnabs
`FNEG: o <= {~a[64],a[63:1]}; // fneg
`FMOV: o <= a; // fmov
`FSIGN: o <= az ? 0 : {a[64],1'b0,{10{1'b1}},{52{1'b0}}}; // fsign
`FMAN: o <= {a[64],1'b0,{10{1'b1}},a[51:1]}; // fman
default: o <= 0;
endcase
`SINGLE_R:
case(fn)
`FABSS: o <= {1'b0,a[31:1]}; // fabs
`FNABSS: o <= {1'b1,a[31:1]}; // fnabs
`FNEGS: o <= {~a[32],a[31:1]}; // fneg
`FMOVS: o <= a; // fmov
`FSIGNS: o <= az ? 0 : {a[32],1'b0,{7{1'b1}},{23{1'b0}}}; // fsign
`FMANS: o <= {a[32],1'b0,{7{1'b1}},a[23:1]}; // fman
default: o <= 0;
endcase
`FLOAT:
case(fn)
`FCMP: o <= cmp_o;
`FCMPS: o <= cmps_o;
default: o <= 0;
endcase
default: o <= 0;
endcase
 
endmodule
/rtl/verilog/fpUnit/fp_decomp.v
0,0 → 1,97
/* ============================================================================
(C) 2006, 2007 Robert T Finch
All rights reserved.
rob@birdcomputer.ca
 
fp_decomp.v
- decompose floating point value
- parameterized width
 
 
Verilog 1995
 
This source code is free for use and modification for non-commercial or
evaluation purposes, provided this copyright statement and disclaimer
remains present in the file.
 
If the code is modified, please state the origin and note that the code
has been modified.
 
NO WARRANTY.
THIS Work, IS PROVIDEDED "AS IS" WITH NO WARRANTIES OF ANY KIND, WHETHER
EXPRESS OR IMPLIED. The user must assume the entire risk of using the
Work.
 
IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY
INCIDENTAL, CONSEQUENTIAL, OR PUNITIVE DAMAGES WHATSOEVER RELATING TO
THE USE OF THIS WORK, OR YOUR RELATIONSHIP WITH THE AUTHOR.
 
IN ADDITION, IN NO EVENT DOES THE AUTHOR AUTHORIZE YOU TO USE THE WORK
IN APPLICATIONS OR SYSTEMS WHERE THE WORK'S FAILURE TO PERFORM CAN
REASONABLY BE EXPECTED TO RESULT IN A SIGNIFICANT PHYSICAL INJURY, OR IN
LOSS OF LIFE. ANY SUCH USE BY YOU IS ENTIRELY AT YOUR OWN RISK, AND YOU
AGREE TO HOLD THE AUTHOR AND CONTRIBUTORS HARMLESS FROM ANY CLAIMS OR
LOSSES RELATING TO SUCH UNAUTHORIZED USE.
 
 
Ref: Webpack 8.1i Spartan3-4 xc3s1000 4ft256
10 slices / 20 LUTs / 12 ns (32 bits)
 
============================================================================ */
 
module fp_decomp(i, sgn, exp, man, fract, xz, mz, vz, inf, xinf, qnan, snan, nan);
 
parameter WID=32;
 
localparam MSB = WID-1;
localparam EMSB = WID==80 ? 14 :
WID==64 ? 10 :
WID==52 ? 10 :
WID==48 ? 10 :
WID==44 ? 10 :
WID==42 ? 10 :
WID==40 ? 9 :
WID==32 ? 7 :
WID==24 ? 6 : 4;
localparam FMSB = WID==80 ? 63 :
WID==64 ? 51 :
WID==52 ? 39 :
WID==48 ? 35 :
WID==44 ? 31 :
WID==42 ? 29 :
WID==40 ? 28 :
WID==32 ? 22 :
WID==24 ? 15 : 9;
 
input [MSB:0] i;
 
output sgn;
output [EMSB:0] exp;
output [FMSB:0] man;
output [FMSB+1:0] fract; // mantissa with hidden bit recovered
output xz; // denormalized - exponent is zero
output mz; // mantissa is zero
output vz; // value is zero (both exponent and mantissa are zero)
output inf; // all ones exponent, zero mantissa
output xinf; // all ones exponent
output qnan; // nan
output snan; // signalling nan
output nan;
 
// Decompose input
assign sgn = i[MSB];
assign exp = i[MSB-1:FMSB+1];
assign man = i[FMSB:0];
assign xz = !(|exp); // denormalized - exponent is zero
assign mz = !(|man); // mantissa is zero
assign vz = xz & mz; // value is zero (both exponent and mantissa are zero)
assign inf = &exp & mz; // all ones exponent, zero mantissa
assign xinf = &exp;
assign qnan = &exp & man[FMSB];
assign snan = &exp & !man[FMSB] & !mz;
assign nan = &exp & !mz;
assign fract = {!xz,i[FMSB:0]};
 
endmodule
 
 
/rtl/verilog/fpUnit/fpRound.v
0,0 → 1,168
/* ===============================================================
(C) 2006 Robert Finch
All rights reserved.
rob@birdcomputer.ca
 
fpRound.v
- floating point rounding unit
- parameterized width
- IEEE 754 representation
 
This source code is free for use and modification for
non-commercial or evaluation purposes, provided this
copyright statement and disclaimer remains present in
the file.
 
If the code is modified, please state the origin and
note that the code has been modified.
 
NO WARRANTY.
THIS Work, IS PROVIDEDED "AS IS" WITH NO WARRANTIES OF
ANY KIND, WHETHER EXPRESS OR IMPLIED. The user must assume
the entire risk of using the Work.
 
IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
ANY INCIDENTAL, CONSEQUENTIAL, OR PUNITIVE DAMAGES
WHATSOEVER RELATING TO THE USE OF THIS WORK, OR YOUR
RELATIONSHIP WITH THE AUTHOR.
 
IN ADDITION, IN NO EVENT DOES THE AUTHOR AUTHORIZE YOU
TO USE THE WORK IN APPLICATIONS OR SYSTEMS WHERE THE
WORK'S FAILURE TO PERFORM CAN REASONABLY BE EXPECTED
TO RESULT IN A SIGNIFICANT PHYSICAL INJURY, OR IN LOSS
OF LIFE. ANY SUCH USE BY YOU IS ENTIRELY AT YOUR OWN RISK,
AND YOU AGREE TO HOLD THE AUTHOR AND CONTRIBUTORS HARMLESS
FROM ANY CLAIMS OR LOSSES RELATING TO SUCH UNAUTHORIZED
USE.
 
 
This unit takes a normalized floating point number in an
expanded format and rounds it according to the IEEE-754
standard. NaN's and infinities are not rounded.
This module has a single cycle latency.
 
Mode
0: round to nearest even
1: round to zero (truncate)
2: round towards +infinity
3: round towards -infinity
 
Ref: Webpack 8.1i Spartan3-4 xc3s1000-4ft256
69 slices / 129 LUTS / 21.3 ns (32 bit)
=============================================================== */
 
module fpRound(rm, i, o);
parameter WID = 32;
localparam MSB = WID-1;
localparam EMSB = WID==80 ? 14 :
WID==64 ? 10 :
WID==52 ? 10 :
WID==48 ? 10 :
WID==44 ? 10 :
WID==42 ? 10 :
WID==40 ? 9 :
WID==32 ? 7 :
WID==24 ? 6 : 4;
localparam FMSB = WID==80 ? 63 :
WID==64 ? 51 :
WID==52 ? 39 :
WID==48 ? 35 :
WID==44 ? 31 :
WID==42 ? 29 :
WID==40 ? 28 :
WID==32 ? 22 :
WID==24 ? 15 : 9;
 
input [1:0] rm; // rounding mode
input [MSB+2:0] i; // intermediate format input
output [WID-1:0] o; // rounded output
 
//------------------------------------------------------------
// variables
wire so;
wire [EMSB:0] xo;
reg [FMSB:0] mo;
wire [EMSB:0] xo1 = i[MSB+1:FMSB+4];
wire [FMSB+3:0] mo1 = i[FMSB+3:0];
wire xInf = &xo1;
wire dn = !(|xo1); // denormalized input
assign o = {so,xo,mo};
 
wire g = i[2]; // guard bit: always the same bit for all operations
wire r = i[1]; // rounding bit
wire s = i[0]; // sticky bit
reg rnd;
 
// Compute the round bit
// Infinities and NaNs are not rounded!
always @(xInf,rm,g,r,s,so)
case ({xInf,rm})
3'd0: rnd = (g & r) | (r & s); // round to nearest even
3'd1: rnd = 0; // round to zero (truncate)
3'd2: rnd = (r | s) & !so; // round towards +infinity
3'd3: rnd = (r | s) & so; // round towards -infinity
default: rnd = 0; // no rounding if exponent indicates infinite or NaN
endcase
 
// round the number, check for carry
// note: inf. exponent checked above (if the exponent was infinite already, then no rounding occurs as rnd = 0)
// note: exponent increments if there is a carry (can only increment to infinity)
// performance note: use the carry chain to increment the exponent
wire [MSB:0] rounded = {xo1,mo1[FMSB+3:2]} + rnd;
wire carry = mo1[FMSB+3] & !rounded[FMSB+1];
 
assign so = i[MSB+2];
assign xo = rounded[MSB:FMSB+2];
 
always @(rnd or xo or carry or dn or rounded or mo1)
casex({rnd,&xo,carry,dn})
4'b0xx0: mo = mo1[FMSB+2:1]; // not rounding, not denormalized, => hide MSB
4'b0xx1: mo = mo1[FMSB+3:2]; // not rounding, denormalized
4'b1000: mo = rounded[FMSB :0]; // exponent didn't change, number was normalized, => hide MSB
4'b1001: mo = rounded[FMSB+1:1]; // exponent didn't change, but number was denormalized, => retain MSB
4'b1010: mo = rounded[FMSB+1:1]; // exponent incremented (new MSB generated), number was normalized, => hide 'extra (FMSB+2)' MSB
4'b1011: mo = rounded[FMSB+1:1]; // exponent incremented (new MSB generated), number was denormalized, number became normalized, => hide 'extra (FMSB+2)' MSB
4'b11xx: mo = 0; // number became infinite, no need to check carry etc., rnd would be zero if input was NaN or infinite
endcase
 
endmodule
 
 
// Round and register the output
 
module fpRoundReg(clk, ce, rm, i, o);
parameter WID = 32;
localparam MSB = WID-1;
localparam EMSB = WID==80 ? 14 :
WID==64 ? 10 :
WID==52 ? 10 :
WID==48 ? 10 :
WID==44 ? 10 :
WID==42 ? 10 :
WID==40 ? 9 :
WID==32 ? 7 :
WID==24 ? 6 : 4;
localparam FMSB = WID==80 ? 63 :
WID==64 ? 51 :
WID==52 ? 39 :
WID==48 ? 35 :
WID==44 ? 31 :
WID==42 ? 29 :
WID==40 ? 28 :
WID==32 ? 22 :
WID==24 ? 15 : 9;
 
input clk;
input ce;
input [1:0] rm; // rounding mode
input [MSB+2:0] i; // expanded format input
output reg [WID-1:0] o; // rounded output
 
wire [WID-1:0] o1;
fpRound #(WID) u1 (.rm(rm), .i(i), .o(o1) );
 
always @(posedge clk)
if (ce)
o <= o1;
 
endmodule
/rtl/verilog/fpUnit/fpLOOUnit.v
0,0 → 1,110
/* ===============================================================
(C) 2006,2015 Robert Finch
All rights reserved.
rob@birdcomputer.ca
 
fpLOOUnit.v
- 'latency of one' floating point unit
- instructions can execute using a single cycle
- issue rate is one per clock cycle
- latency is one clock cycle
- parameterized width
- IEEE 754 representation
 
This source code is free for use and modification for
non-commercial or evaluation purposes, provided this
copyright statement and disclaimer remains present in
the file.
 
If the code is modified, please state the origin and
note that the code has been modified.
 
NO WARRANTY.
THIS Work, IS PROVIDEDED "AS IS" WITH NO WARRANTIES OF
ANY KIND, WHETHER EXPRESS OR IMPLIED. The user must assume
the entire risk of using the Work.
 
IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
ANY INCIDENTAL, CONSEQUENTIAL, OR PUNITIVE DAMAGES
WHATSOEVER RELATING TO THE USE OF THIS WORK, OR YOUR
RELATIONSHIP WITH THE AUTHOR.
 
IN ADDITION, IN NO EVENT DOES THE AUTHOR AUTHORIZE YOU
TO USE THE WORK IN APPLICATIONS OR SYSTEMS WHERE THE
WORK'S FAILURE TO PERFORM CAN REASONABLY BE EXPECTED
TO RESULT IN A SIGNIFICANT PHYSICAL INJURY, OR IN LOSS
OF LIFE. ANY SUCH USE BY YOU IS ENTIRELY AT YOUR OWN RISK,
AND YOU AGREE TO HOLD THE AUTHOR AND CONTRIBUTORS HARMLESS
FROM ANY CLAIMS OR LOSSES RELATING TO SUCH UNAUTHORIZED
USE.
 
 
i2f - convert integer to floating point
f2i - convert floating point to integer
 
Ref: Webpack 8.1i Spartan3-4 xc3s1000 4ft256
61 LUTS / 34 slices / 16 ns
=============================================================== */
`include "..\Thor_defines.v"
 
module fpLOOUnit
#(parameter WID=32)
(
input clk,
input ce,
input [1:0] rm,
input [7:0] op,
input [5:0] fn,
input [WID:1] a,
output reg [WID:1] o,
output done
);
localparam MSB = WID-1;
localparam EMSB = WID==80 ? 14 :
WID==64 ? 10 :
WID==52 ? 10 :
WID==48 ? 10 :
WID==44 ? 10 :
WID==42 ? 10 :
WID==40 ? 9 :
WID==32 ? 7 :
WID==24 ? 6 : 4;
localparam FMSB = WID==80 ? 63 :
WID==64 ? 51 :
WID==52 ? 39 :
WID==48 ? 35 :
WID==44 ? 31 :
WID==42 ? 29 :
WID==40 ? 28 :
WID==32 ? 22 :
WID==24 ? 15 : 9;
 
wire [64:1] i2f_o;
wire [64:1] f2i_o;
wire [32:1] i2fs_o;
wire [32:1] f2is_o;
 
delay1 u1 (.clk(clk), .ce(ce), .i(op==`ITOF||op==`FTOI), .o(done) );
i2f #(64) i2f0 (.clk(clk), .ce(ce), .rm(rm), .i(a), .o(i2f_o) );
f2i #(64) f2i0 (.clk(clk), .ce(ce), .i(a), .o(f2i_o) );
i2f #(32) i2fs (.clk(clk), .ce(ce), .rm(rm), .i(a[32:1]), .o(i2fs_o) );
f2i #(32) f2is (.clk(clk), .ce(ce), .i(a[32:1]), .o(f2is_o) );
 
always @(op,a,i2f_o,f2i_o)
case (op)
`DOUBLE_R:
case(fn)
`ITOF: o <= i2f_o;
`FTOI: o <= f2i_o;
default: o <= 0;
endcase
`SINGLE_R:
case(fn)
`ITOFS: o <= i2fs_o;
`FTOIS: o <= f2is_o;
default: o <= 0;
endcase
default: o <= 0;
endcase
 
endmodule
/rtl/verilog/fpUnit/f2i.v
0,0 → 1,129
/* ===============================================================
(C) 2006 Robert Finch
All rights reserved.
rob@birdcomputer.ca
 
f2i.v
- convert floating point to integer
- parameterized width
- IEEE 754 representation
 
This source code is free for use and modification for
non-commercial or evaluation purposes, provided this
copyright statement and disclaimer remains present in
the file.
 
If the code is modified, please state the origin and
note that the code has been modified.
 
NO WARRANTY.
THIS Work, IS PROVIDEDED "AS IS" WITH NO WARRANTIES OF
ANY KIND, WHETHER EXPRESS OR IMPLIED. The user must assume
the entire risk of using the Work.
 
IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
ANY INCIDENTAL, CONSEQUENTIAL, OR PUNITIVE DAMAGES
WHATSOEVER RELATING TO THE USE OF THIS WORK, OR YOUR
RELATIONSHIP WITH THE AUTHOR.
 
IN ADDITION, IN NO EVENT DOES THE AUTHOR AUTHORIZE YOU
TO USE THE WORK IN APPLICATIONS OR SYSTEMS WHERE THE
WORK'S FAILURE TO PERFORM CAN REASONABLY BE EXPECTED
TO RESULT IN A SIGNIFICANT PHYSICAL INJURY, OR IN LOSS
OF LIFE. ANY SUCH USE BY YOU IS ENTIRELY AT YOUR OWN RISK,
AND YOU AGREE TO HOLD THE AUTHOR AND CONTRIBUTORS HARMLESS
FROM ANY CLAIMS OR LOSSES RELATING TO SUCH UNAUTHORIZED
USE.
 
- pipelinable
- one cycle latency
 
Ref: Spartan3-4
212 LUTs / 135 slices / (28.2 ns no clock)
=============================================================== */
 
module f2i
#( parameter WID = 32)
(
input clk,
input ce,
input [WID-1:0] i,
output [WID-1:0] o,
output overflow
);
localparam MSB = WID-1;
localparam EMSB = WID==80 ? 14 :
WID==64 ? 10 :
WID==52 ? 10 :
WID==48 ? 10 :
WID==44 ? 10 :
WID==42 ? 10 :
WID==40 ? 9 :
WID==32 ? 7 :
WID==24 ? 6 : 4;
localparam FMSB = WID==80 ? 63 :
WID==64 ? 51 :
WID==52 ? 39 :
WID==48 ? 35 :
WID==44 ? 31 :
WID==42 ? 29 :
WID==40 ? 28 :
WID==32 ? 22 :
WID==24 ? 15 : 9;
 
 
wire [MSB:0] maxInt = {MSB{1'b1}}; // maximum unsigned integer value
wire [EMSB:0] zeroXp = {EMSB{1'b1}}; // simple constant - value of exp for zero
 
// Decompose fp value
reg sgn; // sign
always @(posedge clk)
if (ce) sgn = i[MSB];
wire [EMSB:0] exp = i[MSB-1:FMSB+1]; // exponent
wire [FMSB+1:0] man = {exp!=0,i[FMSB:0]}; // mantissa including recreate hidden bit
 
wire iz = i[MSB-1:0]==0; // zero value (special)
 
assign overflow = exp - zeroXp > MSB; // lots of numbers are too big - don't forget one less bit is available due to signed values
wire underflow = exp < zeroXp - 1; // value less than 1/2
 
wire [6:0] shamt = MSB - (exp - zeroXp); // exp - zeroXp will be <= MSB
 
wire [MSB+1:0] o1 = {man,{EMSB+1{1'b0}},1'b0} >> shamt; // keep an extra bit for rounding
wire [MSB:0] o2 = o1[MSB+1:1] + o1[0]; // round up
reg [MSB:0] o3;
 
always @(posedge clk)
if (ce) begin
if (underflow|iz)
o3 <= 0;
else if (overflow)
o3 <= maxInt;
// value between 1/2 and 1 - round up
else if (exp==zeroXp-1)
o3 <= 1;
// value > 1
else
o3 <= o2;
end
assign o = sgn ? -o3 : o3; // adjust output for correct signed value
 
endmodule
 
module f2i_tb();
 
wire ov0,ov1;
wire [31:0] io0,io1;
reg clk;
 
initial begin
clk = 0;
end
 
always #10 clk = ~clk;
 
f2i #(32) u1 (.clk(clk), .ce(1'b1), .i(32'h3F800000), .o(io1), .overflow(ov1) );
f2i #(32) u2 (.clk(clk), .ce(1'b1), .i(32'h00000000), .o(io0), .overflow(ov0) );
 
endmodule
/rtl/verilog/fpUnit/i2f.v
0,0 → 1,148
/* ===============================================================
(C) 2006 Robert Finch
All rights reserved.
rob@birdcomputer.ca
 
i2f.v
- convert integer to floating point
- parameterized width
- IEEE 754 representation
 
This source code is free for use and modification for
non-commercial or evaluation purposes, provided this
copyright statement and disclaimer remains present in
the file.
 
If the code is modified, please state the origin and
note that the code has been modified.
 
NO WARRANTY.
THIS Work, IS PROVIDEDED "AS IS" WITH NO WARRANTIES OF
ANY KIND, WHETHER EXPRESS OR IMPLIED. The user must assume
the entire risk of using the Work.
 
IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
ANY INCIDENTAL, CONSEQUENTIAL, OR PUNITIVE DAMAGES
WHATSOEVER RELATING TO THE USE OF THIS WORK, OR YOUR
RELATIONSHIP WITH THE AUTHOR.
 
IN ADDITION, IN NO EVENT DOES THE AUTHOR AUTHORIZE YOU
TO USE THE WORK IN APPLICATIONS OR SYSTEMS WHERE THE
WORK'S FAILURE TO PERFORM CAN REASONABLY BE EXPECTED
TO RESULT IN A SIGNIFICANT PHYSICAL INJURY, OR IN LOSS
OF LIFE. ANY SUCH USE BY YOU IS ENTIRELY AT YOUR OWN RISK,
AND YOU AGREE TO HOLD THE AUTHOR AND CONTRIBUTORS HARMLESS
FROM ANY CLAIMS OR LOSSES RELATING TO SUCH UNAUTHORIZED
USE.
 
- pipelinable
- single stage latency
 
Ref: Spartan3-4
267 LUTs / 167 slices / 20? ns (32 bits)
=============================================================== */
 
module i2f
#( parameter WID = 32)
(
input clk,
input ce,
input [1:0] rm, // rounding mode
input [WID-1:0] i, // integer input
output [WID-1:0] o // float output
);
localparam MSB = WID-1;
localparam EMSB = WID==80 ? 14 :
WID==64 ? 10 :
WID==52 ? 10 :
WID==48 ? 10 :
WID==44 ? 10 :
WID==42 ? 10 :
WID==40 ? 9 :
WID==32 ? 7 :
WID==24 ? 6 : 4;
localparam FMSB = WID==80 ? 63 :
WID==64 ? 51 :
WID==52 ? 39 :
WID==48 ? 35 :
WID==44 ? 31 :
WID==42 ? 29 :
WID==40 ? 28 :
WID==32 ? 22 :
WID==24 ? 15 : 9;
 
wire [EMSB:0] zeroXp = {EMSB{1'b1}};
 
wire iz; // zero input ?
wire [MSB:0] imag; // get magnitude of i
wire [MSB:0] imag1 = i[MSB] ? -i : i;
wire [6:0] lz; // count the leading zeros in the number
wire [EMSB:0] wd; // compute number of whole digits
wire so; // copy the sign of the input (easy)
wire [1:0] rmd;
 
delay1 #(2) u0 (.clk(clk), .ce(ce), .i(rm), .o(rmd) );
delay1 #(1) u1 (.clk(clk), .ce(ce), .i(i==0), .o(iz) );
delay1 #(WID) u2 (.clk(clk), .ce(ce), .i(imag1), .o(imag) );
delay1 #(1) u3 (.clk(clk), .ce(ce), .i(i[MSB]), .o(so) );
generate
if (WID==64) begin
cntlz64Reg u4 (.clk(clk), .ce(ce), .i(imag1), .o(lz) );
end else begin
cntlz32Reg u4 (.clk(clk), .ce(ce), .i(imag1), .o(lz) );
assign lz[6]=1'b0;
end
endgenerate
 
assign wd = zeroXp - 1 + WID - lz; // constant except for lz
 
wire [EMSB:0] xo = iz ? 0 : wd;
wire [MSB:0] simag = imag << lz; // left align number
 
wire g = simag[EMSB+2]; // guard bit (lsb)
wire r = simag[EMSB+1]; // rounding bit
wire s = |simag[EMSB:0]; // "sticky" bit
reg rnd;
 
// Compute the round bit
always @(rmd,g,r,s,so)
case (rmd)
2'd0: rnd = (g & r) | (r & s); // round to nearest even
2'd1: rnd = 0; // round to zero (truncate)
2'd2: rnd = (r | s) & !so; // round towards +infinity
2'd3: rnd = (r | s) & so; // round towards -infinity
endcase
 
// "hide" the leading one bit = MSB-1
// round the result
wire [FMSB:0] mo = simag[MSB-1:EMSB+1]+rnd;
 
assign o = {so,xo,mo};
 
endmodule
 
 
module i2f_tb();
 
reg clk;
reg [7:0] cnt;
wire [31:0] fo;
reg [31:0] i;
initial begin
clk = 1'b0;
cnt = 0;
end
always #10 clk=!clk;
 
always @(posedge clk)
cnt = cnt + 1;
 
always @(cnt)
case(cnt)
8'd0: i <= 32'd0;
8'd1: i <= 32'd16777226;
endcase
 
i2f #(32) u1 (.clk(clk), .ce(1), .rm(2'd0), .i(i), .o(fo) );
 
endmodule
/rtl/verilog/fpUnit/fpdivr8.v
0,0 → 1,147
/* ===============================================================
(C) 2006 Robert Finch
All rights reserved.
rob@birdcomputer.ca
 
fpdivr8.v
Radix 8 floating point divider primitive
 
 
This source code is free for use and modification for
non-commercial or evaluation purposes, provided this
copyright statement and disclaimer remains present in
the file.
 
If you do modify the code, please state the origin and
note that you have modified the code.
 
NO WARRANTY.
THIS Work, IS PROVIDEDED "AS IS" WITH NO WARRANTIES OF
ANY KIND, WHETHER EXPRESS OR IMPLIED. The user must assume
the entire risk of using the Work.
 
IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
ANY INCIDENTAL, CONSEQUENTIAL, OR PUNITIVE DAMAGES
WHATSOEVER RELATING TO THE USE OF THIS WORK, OR YOUR
RELATIONSHIP WITH THE AUTHOR.
 
IN ADDITION, IN NO EVENT DOES THE AUTHOR AUTHORIZE YOU
TO USE THE WORK IN APPLICATIONS OR SYSTEMS WHERE THE
WORK'S FAILURE TO PERFORM CAN REASONABLY BE EXPECTED
TO RESULT IN A SIGNIFICANT PHYSICAL INJURY, OR IN LOSS
OF LIFE. ANY SUCH USE BY YOU IS ENTIRELY AT YOUR OWN RISK,
AND YOU AGREE TO HOLD THE AUTHOR AND CONTRIBUTORS HARMLESS
FROM ANY CLAIMS OR LOSSES RELATING TO SUCH UNAUTHORIZED
USE.
 
 
Performance
Webpack 7.1i xc3s1000-4ft256
222 slices / 410 LUTs / 51.5 MHz
=============================================================== */
 
module fpdivr8
#( parameter WID = 24 )
(
input clk,
input ld,
input [WID-1:0] a,
input [WID-1:0] b,
output reg [WID*2-1:0] q,
output [WID-1:0] r,
output done
);
localparam DMSB = WID-1;
 
wire [DMSB:0] rx [2:0]; // remainder holds
reg [DMSB:0] rxx;
reg [5:0] cnt; // iteration count
wire [DMSB:0] sdq;
wire [DMSB:0] sdr;
wire sdval;
wire sddbz;
specialCaseDivider #(WID) u1 (.a(a), .b(b), .q(sdq), .val(sdval), .dbz(sdbz) );
 
 
assign rx[0] = rxx [DMSB] ? {rxx ,q[WID*2-1 ]} + b : {rxx ,q[WID*2-1 ]} - b;
assign rx[1] = rx[0][DMSB] ? {rx[0],q[WID*2-1-1]} + b : {rx[0],q[WID*2-1-1]} - b;
assign rx[2] = rx[1][DMSB] ? {rx[1],q[WID*2-1-2]} + b : {rx[1],q[WID*2-1-2]} - b;
 
 
always @(posedge clk)
if (ld)
cnt <= sdval ? 6'b100000 : WID*2/3;
else if (!done)
cnt <= cnt - 1;
 
 
always @(posedge clk)
if (ld)
rxx <= 0;
else if (!done)
rxx <= rx[2];
 
 
always @(posedge clk)
if (ld) begin
if (sdval)
q <= {sdq,{WID{1'b0}}};
else
q <= {a,{WID{1'b0}}};
end
else if (!done) begin
q[WID*2-1:3] <= q[WID*2-1-3:0];
q[0] <= ~rx[2][DMSB];
q[1] <= ~rx[1][DMSB];
q[2] <= ~rx[0][DMSB];
end
 
// correct remainder
assign r = sdval ? sdr : rx[2][DMSB] ? rx[2] + b : rx[2];
assign done = cnt[5];
 
endmodule
 
/*
module fpdiv_tb();
 
reg rst;
reg clk;
reg ld;
reg [6:0] cnt;
 
wire ce = 1'b1;
wire [49:0] a = 50'h0_0000_0400_0000;
wire [23:0] b = 24'd101;
wire [49:0] q;
wire [49:0] r;
wire done;
 
initial begin
clk = 1;
rst = 0;
#100 rst = 1;
#100 rst = 0;
end
 
always #20 clk = ~clk; // 25 MHz
always @(posedge clk)
if (rst)
cnt <= 0;
else begin
ld <= 0;
cnt <= cnt + 1;
if (cnt == 3)
ld <= 1;
$display("ld=%b q=%h r=%h done=%b", ld, q, r, done);
end
 
fpdivr8 divu0(.clk(clk), .ce(ce), .ld(ld), .a(a), .b(b), .q(q), .r(r), .done(done) );
 
endmodule
 
*/
 
/rtl/verilog/fpUnit/fpMul.v
0,0 → 1,241
// ===============================================================
// (C) 2006 Robert Finch
// All rights reserved.
// rob@birdcomputer.ca
//
// fpMul.v
// - floating point multiplier
// - two cycle latency
// - can issue every clock cycle
// - parameterized width
// - IEEE 754 representation
//
// This source code is free for use and modification for
// non-commercial or evaluation purposes, provided this
// copyright statement and disclaimer remains present in
// the file.
//
// If the code is modified, please state the origin and
// note that the code has been modified.
//
// NO WARRANTY.
// THIS Work, IS PROVIDEDED "AS IS" WITH NO WARRANTIES OF
// ANY KIND, WHETHER EXPRESS OR IMPLIED. The user must assume
// the entire risk of using the Work.
//
// IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
// ANY INCIDENTAL, CONSEQUENTIAL, OR PUNITIVE DAMAGES
// WHATSOEVER RELATING TO THE USE OF THIS WORK, OR YOUR
// RELATIONSHIP WITH THE AUTHOR.
//
// IN ADDITION, IN NO EVENT DOES THE AUTHOR AUTHORIZE YOU
// TO USE THE WORK IN APPLICATIONS OR SYSTEMS WHERE THE
// WORK'S FAILURE TO PERFORM CAN REASONABLY BE EXPECTED
// TO RESULT IN A SIGNIFICANT PHYSICAL INJURY, OR IN LOSS
// OF LIFE. ANY SUCH USE BY YOU IS ENTIRELY AT YOUR OWN RISK,
// AND YOU AGREE TO HOLD THE AUTHOR AND CONTRIBUTORS HARMLESS
// FROM ANY CLAIMS OR LOSSES RELATING TO SUCH UNAUTHORIZED
// USE.
//
// This multiplier/divider handles denormalized numbers.
// The output format is of an internal expanded representation
// in preparation to be fed into a normalization unit, then
// rounding. Basically, it's the same as the regular format
// except the mantissa is doubled in size, the leading two
// bits of which are assumed to be whole bits.
//
//
// Floating Point Multiplier
//
// Properties:
// +-inf * +-inf = -+inf (this is handled by exOver)
// +-inf * 0 = QNaN
//
// 1 sign number
// 8 exponent
// 48 mantissa
//
// Ref: Webpack8.1i Spartan3-4 xc3s1000-4ft256
// 174 LUTS / 113 slices / 24.7 ns
// 4 Mults
//=============================================================== */
 
module fpMul (clk, ce, a, b, o, sign_exe, inf, overflow, underflow);
parameter WID = 32;
localparam MSB = WID-1;
localparam EMSB = WID==80 ? 14 :
WID==64 ? 10 :
WID==52 ? 10 :
WID==48 ? 10 :
WID==44 ? 10 :
WID==42 ? 10 :
WID==40 ? 9 :
WID==32 ? 7 :
WID==24 ? 6 : 4;
localparam FMSB = WID==80 ? 63 :
WID==64 ? 51 :
WID==52 ? 39 :
WID==48 ? 35 :
WID==44 ? 31 :
WID==42 ? 29 :
WID==40 ? 28 :
WID==32 ? 22 :
WID==24 ? 15 : 9;
 
localparam FX = (FMSB+2)*2-1; // the MSB of the expanded fraction
localparam EX = FX + 1 + EMSB + 1 + 1 - 1;
 
input clk;
input ce;
input [WID:1] a, b;
output [EX:0] o;
output sign_exe;
output inf;
output overflow;
output underflow;
 
reg [EMSB:0] xo1; // extra bit for sign
reg [FX:0] mo1;
 
// constants
wire [EMSB:0] infXp = {EMSB+1{1'b1}}; // infinite / NaN - all ones
// The following is the value for an exponent of zero, with the offset
// eg. 8'h7f for eight bit exponent, 11'h7ff for eleven bit exponent, etc.
wire [EMSB:0] bias = {1'b0,{EMSB{1'b1}}}; //2^0 exponent
// The following is a template for a quiet nan. (MSB=1)
wire [FMSB:0] qNaN = {1'b1,{FMSB{1'b0}}};
 
// variables
reg [FX:0] fract1,fract1a;
wire [FX:0] fracto;
wire [EMSB+2:0] ex1; // sum of exponents
wire [EMSB :0] ex2;
 
// Decompose the operands
wire sa, sb; // sign bit
wire [EMSB:0] xa, xb; // exponent bits
wire [FMSB+1:0] fracta, fractb;
wire a_dn, b_dn; // a/b is denormalized
wire az, bz;
wire aInf, bInf, aInf1, bInf1;
 
 
// -----------------------------------------------------------
// First clock
// - decode the input operands
// - derive basic information
// - calculate exponent
// - calculate fraction
// -----------------------------------------------------------
 
fpDecomp #(WID) u1a (.i(a), .sgn(sa), .exp(xa), .fract(fracta), .xz(a_dn), .vz(az), .inf(aInf) );
fpDecomp #(WID) u1b (.i(b), .sgn(sb), .exp(xb), .fract(fractb), .xz(b_dn), .vz(bz), .inf(bInf) );
 
// Compute the sum of the exponents.
// correct the exponent for denormalized operands
// adjust the sum by the exponent offset (subtract 127)
// mul: ex1 = xa + xb, result should always be < 1ffh
assign ex1 = (az|bz) ? 0 : (xa|a_dn) + (xb|b_dn) - bias;
generate
if (WID==64) begin
reg [35:0] p00,p01,p02;
reg [35:0] p10,p11,p12;
reg [35:0] p20,p21,p22;
always @(posedge clk)
if (ce) begin
p00 <= fracta[17: 0] * fractb[17: 0];
p01 <= fracta[35:18] * fractb[17: 0];
p02 <= fracta[52:36] * fractb[17: 0];
p10 <= fracta[17: 0] * fractb[35:18];
p11 <= fracta[35:18] * fractb[35:18];
p12 <= fracta[52:36] * fractb[35:18];
p20 <= fracta[17: 0] * fractb[52:36];
p21 <= fracta[35:18] * fractb[52:36];
p22 <= fracta[52:36] * fractb[52:36];
fract1 <= {p02,36'b0} + {p01,18'b0} + p00 +
{p12,54'b0} + {p11,36'b0} + {p10,18'b0} +
{p22,72'b0} + {p21,54'b0} + {p20,36'b0}
;
end
end
else if (WID==32) begin
reg [35:0] p00,p01;
reg [35:0] p10,p11;
always @(posedge clk)
if (ce) begin
p00 <= fracta[17: 0] * fractb[17: 0];
p01 <= fracta[23:18] * fractb[17: 0];
p10 <= fracta[17: 0] * fractb[23:18];
p11 <= fracta[23:18] * fractb[23:18];
fract1 <= {p11,p00} + {p01,18'b0} + {p10,18'b0};
end
end
endgenerate
 
// Status
wire under1, over1;
wire under = ex1[EMSB+2]; // exponent underflow
wire over = (&ex1[EMSB:0] | ex1[EMSB+1]) & !ex1[EMSB+2];
 
delay2 #(EMSB) u3 (.clk(clk), .ce(ce), .i(ex1[EMSB:0]), .o(ex2) );
delay2 #(FX+1) u4 (.clk(clk), .ce(ce), .i(fract1), .o(fracto) );
delay2 u2a (.clk(clk), .ce(ce), .i(aInf), .o(aInf1) );
delay2 u2b (.clk(clk), .ce(ce), .i(bInf), .o(bInf1) );
delay2 u6 (.clk(clk), .ce(ce), .i(under), .o(under1) );
delay2 u7 (.clk(clk), .ce(ce), .i(over), .o(over1) );
 
// determine when a NaN is output
wire qNaNOut;
delay2 u5 (.clk(clk), .ce(ce), .i((aInf&bz)|(bInf&az)), .o(qNaNOut) );
 
 
// -----------------------------------------------------------
// Second clock
// - correct xponent and mantissa for exceptional conditions
// -----------------------------------------------------------
 
wire so1;
delay3 u8 (.clk(clk), .ce(ce), .i(sa ^ sb), .o(so1) );// two clock delay!
 
always @(posedge clk)
if (ce)
casex({qNaNOut,aInf1,bInf1,over1,under1})
5'b1xxxx: xo1 = infXp; // qNaN - infinity * zero
5'b01xxx: xo1 = infXp; // 'a' infinite
5'b001xx: xo1 = infXp; // 'b' infinite
5'b0001x: xo1 = infXp; // result overflow
5'b00001: xo1 = 0; // underflow
default: xo1 = ex2[EMSB:0]; // situation normal
endcase
 
always @(posedge clk)
if (ce)
casex({qNaNOut,aInf1,bInf1,over1})
4'b1xxx: mo1 = {1'b0,qNaN|3'd4,{FMSB+1{1'b0}}}; // multiply inf * zero
4'b01xx: mo1 = 0; // mul inf's
4'b001x: mo1 = 0; // mul inf's
4'b0001: mo1 = 0; // mul overflow
default: mo1 = fracto;
endcase
 
delay3 u10 (.clk(clk), .ce(ce), .i(sa & sb), .o(sign_exe) );
delay1 u11 (.clk(clk), .ce(ce), .i(over1), .o(overflow) );
delay1 u12 (.clk(clk), .ce(ce), .i(over1), .o(inf) );
delay1 u13 (.clk(clk), .ce(ce), .i(under1), .o(underflow) );
 
assign o = {so1,xo1,mo1};
 
endmodule
 
module fpMul_tb();
reg clk;
 
initial begin
clk = 0;
end
always #10 clk <= ~clk;
 
fpMul u1 (.clk(clk), .ce(1'b1), .a(0), .b(0), .o(o1), .sign_exe(sgnx1), .inf(inf1), .overflow(of1), .underflow(uf1));
fpMul u2 (.clk(clk), .ce(1'b1), .a(0), .b(0), .o(o1), .sign_exe(sgnx1), .inf(inf1), .overflow(of1), .underflow(uf1));
 
endmodule
/rtl/verilog/fpUnit/fp_cmp_unit.v
0,0 → 1,84
/* ============================================================================
(C) 2007,2015 Robert T Finch
All rights reserved.
rob@birdcomputer.ca
 
fp_cmp_unit.v
- floating point comparison unit
- parameterized width
- IEEE 754 representation
 
Verilog 2001
 
Notice of Confidentiality
 
http://en.wikipedia.org/wiki/IEEE_754
 
Ref: Webpack 8.1i Spartan3-4 xc3s1000-4ft256
111 LUTS / 58 slices / 16 ns
Ref: Webpack 8.1i Spartan3-4 xc3s1000-4ft256
109 LUTS / 58 slices / 16.4 ns
 
============================================================================ */
 
module fp_cmp_unit(a, b, o, nanx);
parameter WID = 32;
localparam MSB = WID-1;
localparam EMSB = WID==80 ? 14 :
WID==64 ? 10 :
WID==52 ? 10 :
WID==48 ? 10 :
WID==44 ? 10 :
WID==42 ? 10 :
WID==40 ? 9 :
WID==32 ? 7 :
WID==24 ? 6 : 4;
localparam FMSB = WID==80 ? 63 :
WID==64 ? 51 :
WID==52 ? 39 :
WID==48 ? 35 :
WID==44 ? 31 :
WID==42 ? 29 :
WID==40 ? 28 :
WID==32 ? 22 :
WID==24 ? 15 : 9;
 
input [WID-1:0] a, b;
output [3:0] o;
reg [3:0] o;
output nanx;
 
// Decompose the operands
wire sa;
wire sb;
wire [EMSB:0] xa;
wire [EMSB:0] xb;
wire [FMSB:0] ma;
wire [FMSB:0] mb;
wire az, bz;
wire nan_a, nan_b;
 
fp_decomp #(WID) u1(.i(a), .sgn(sa), .exp(xa), .man(ma), .vz(az), .qnan(), .snan(), .nan(nan_a) );
fp_decomp #(WID) u2(.i(b), .sgn(sb), .exp(xb), .man(mb), .vz(bz), .qnan(), .snan(), .nan(nan_b) );
 
wire unordered = nan_a | nan_b;
 
wire eq = (az & bz) || (a==b); // special test for zero
wire gt1 = {xa,ma} > {xb,mb};
wire lt1 = {xa,ma} < {xb,mb};
 
wire lt = sa ^ sb ? sa & !(az & bz): sa ? gt1 : lt1;
 
always @(unordered or eq or lt)
begin
o[0] = eq;
o[1] = lt;
o[2] = lt1;
o[3] = unordered;
end
 
// an unorder comparison will signal a nan exception
//assign nanx = op!=`FCOR && op!=`FCUN && unordered;
assign nanx = 1'b0;
 
endmodule
/rtl/verilog/fpUnit/fpUnit.v
0,0 → 1,455
// ============================================================================
// __
// \\__/ o\ (C) 2006,2015 Robert Finch, Stratford
// \ __ / All rights reserved.
// \/_// robfinch<remove>@finitron.ca
// ||
//
// This source file is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published
// by the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This source file is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//
//
// Thor SuperScalar
// fpUnit.v
// - floating point unit
// - parameterized width
// - IEEE 754 representation
//
// NaN Value Origin
// 31'h7FC00001 - infinity - infinity
// 31'h7FC00002 - infinity / infinity
// 31'h7FC00003 - zero / zero
// 31'h7FC00004 - infinity X zero
//
// Whenever the fpu encounters a NaN input, the NaN is
// passed through to the output.
//
// Ref: Webpack 8.2 Spartan3-4 xc3s1000-4ft256
// 2335 LUTS / 1260 slices / 43.4 MHz
// Ref: Webpack 13.1 Spartan3e xc3s1200e-4fg320
// 2433 LUTs / 1301 slices / 51.6 MHz
//
// Instr. Cyc Lat
// fc__ ; 1 0 compare, lt le gt ge eq ne or un
// fabs ; 1 0 absolute value
// fnabs ; 1 0 negative absolute value
// fneg ; 1 0 negate
// fmov ; 1 0 move
// fman ; 1 0 get mantissa
// fsign ; 1 0 get sign
//
// f2i ; 1 1 convert float to integer
// i2f ; 1 1 convert integer to float
//
// fadd ; 1 4 addition
// fsub ; 1 4 subtraction
// fmul ; 1 4 multiplication
//
// fdiv ; 16 4 division
//
// ftx ; 1 0 trigger fp exception
// fcx ; 1 0 clear fp exception
// fex ; 1 0 enable fp exception
// fdx ; 1 0 disable fp exception
// frm ; 1 0 set rounding mode
// fstat ; 1 0 get status register
//
// related integer:
// graf ; 1 0 get random float (0,1]
//
// ============================================================================
//
`include "..\Thor_defines.v"
 
`define QINFOS 23'h7FC000 // info
`define QSUBINFS 31'h7FC00001 // - infinity - infinity
`define QINFDIVS 31'h7FC00002 // - infinity / infinity
`define QZEROZEROS 31'h7FC00003 // - zero / zero
`define QINFZEROS 31'h7FC00004 // - infinity X zero
 
`define QINFO 52'h7FC000 // info
`define QSUBINF 62'h7FF0000000000001 // - infinity - infinity
`define QINFDIV 62'h7FF0000000000002 // - infinity / infinity
`define QZEROZERO 62'h7FF0000000000003 // - zero / zero
`define QINFZERO 62'h7FF0000000000004 // - infinity X zero
 
module fpUnit(rst, clk, ce, op, fn, ld, a, b, o, exception);
 
parameter WID = 32;
localparam MSB = WID-1;
localparam EMSB = WID==80 ? 14 :
WID==64 ? 10 :
WID==52 ? 10 :
WID==48 ? 10 :
WID==44 ? 10 :
WID==42 ? 10 :
WID==40 ? 9 :
WID==32 ? 7 :
WID==24 ? 6 : 4;
localparam FMSB = WID==80 ? 63 :
WID==64 ? 51 :
WID==52 ? 39 :
WID==48 ? 35 :
WID==44 ? 31 :
WID==42 ? 29 :
WID==40 ? 28 :
WID==32 ? 22 :
WID==24 ? 15 : 9;
localparam EMSBS = 7;
localparam FMSBS = 22;
localparam FX = (FMSB+2)*2-1; // the MSB of the expanded fraction
localparam EX = FX + 1 + EMSB + 1 + 1 - 1;
localparam FXS = (FMSBS+2)*2-1; // the MSB of the expanded fraction
localparam EXS = FXS + 1 + EMSBS + 1 + 1 - 1;
 
input rst;
input clk;
input ce;
input [7:0] op;
input [5:0] fn;
input ld;
input [MSB:0] a;
input [MSB:0] b;
output tri [MSB:0] o;
output exception;
 
 
//------------------------------------------------------------
// constants
wire infXp = {11{1'b1}}; // value for infinite exponent / nan
wire infXps = {8{1'b1}};
 
// Variables
wire divByZero; // attempt to divide by zero
wire inf; // result is infinite (+ or -)
wire zero; // result is zero (+ or -)
wire ns; // nan sign
wire nss;
wire nso;
wire nsos;
wire isNan,isNans;
wire nanx,nanxs;
 
// Decode fp operation
wire fstat = op==`FLOAT && fn==`FSTAT; // get status
wire fdiv = op==`FLOAT && fn==`FDIV;
wire fdivs = op==`FLOAT && fn==`FDIVS;
wire ftx = op==`FLOAT && fn==`FTX; // trigger exception
wire fcx = op==`FLOAT && fn==`FCX; // clear exception
wire fex = op==`FLOAT && fn==`FEX; // enable exception
wire fdx = op==`FLOAT && fn==`FDX; // disable exception
wire fcmp = op==`FLOAT && (fn==`FCMP || fn==`FCMPS);
wire frm = op==`FLOAT && fn==`FRM; // set rounding mode
wire single = (op==`FLOAT && fn[5:4]==2'b01) || op==`SINGLE_R;
wire zl_op = (op==`DOUBLE_R && (fn==`FABS || fn==`FNABS || fn==`FMOV || fn==`FNEG || fn==`FSIGN || fn==`FMAN)) ||
(op==`FLOAT && fn==`FCMP) ||
(op==`SINGLE_R && (fn==`FABSS || fn==`FNABSS || fn==`FMOVS || fn==`FNEGS || fn==`FSIGNS || fn==`FMANS)) ||
(op==`FLOAT && (fn==`FCMPS))
;
wire loo_op = (op==`DOUBLE_R && (fn==`ITOF || fn==`FTOI)) ||
(op==`SINGLE_R && (fn==`FTOIS || op==`ITOFS));
wire loo_done;
 
wire subinf;
wire zerozero;
wire infzero;
wire infdiv;
 
// floating point control and status
reg [1:0] rm; // rounding mode
reg inexe; // inexact exception enable
reg dbzxe; // divide by zero exception enable
reg underxe; // underflow exception enable
reg overxe; // overflow exception enable
reg invopxe; // invalid operation exception enable
 
reg nsfp; // non-standard floating point indicator
 
reg fractie; // fraction inexact
reg raz; // rounded away from zero
 
reg inex; // inexact exception
reg dbzx; // divide by zero exception
reg underx; // underflow exception
reg overx; // overflow exception
reg giopx; // global invalid operation exception
reg sx; // summary exception
 
reg swtx; // software triggered exception indicator
 
wire gx = swtx|inex|dbzx|underx|overx|giopx; // global exception indicator
 
// breakdown of invalid operation exceptions
reg cvtx; // conversion exception
reg sqrtx; // squareroot exception
reg NaNCmpx; // NaN comparison exception
reg infzerox; // multiply infinity by zero
reg zerozerox; // division of zero by zero
reg infdivx; // division of infinities
reg subinfx; // subtraction of infinities
reg snanx; // signalling nan
 
wire divDone;
wire pipe_ce = ce & divDone; // divide must be done in order for pipe to clock
 
always @(posedge clk)
// reset: disable and clear all exceptions and status
if (rst) begin
rm <= 2'b0; // round nearest even - default rounding mode
inex <= 1'b0;
dbzx <= 1'b0;
underx <= 1'b0;
overx <= 1'b0;
giopx <= 1'b0;
swtx <= 1'b0;
sx <= 1'b0;
NaNCmpx <= 1'b0;
 
inexe <= 1'b0;
dbzxe <= 1'b0;
underxe <= 1'b0;
overxe <= 1'b0;
invopxe <= 1'b0;
nsfp <= 1'b0;
 
end
else if (pipe_ce) begin
if (ftx) begin
inex <= inex | (a[4]|b[4]);
dbzx <= dbzx | (a[3]|b[3]);
underx <= underx | (a[2]|b[2]);
overx <= overx | (a[1]|b[1]);
giopx <= giopx | (a[0]|b[0]);
swtx <= 1'b1;
sx <= 1'b1;
end
else if (fcx) begin
sx <= sx & !(a[5]|b[5]);
inex <= inex & !(a[4]|b[4]);
dbzx <= dbzx & !(a[3]|b[3]);
underx <= underx & !(a[2]|b[2]);
overx <= overx & !(a[1]|b[1]);
giopx <= giopx & !(a[0]|b[0]);
// clear exception type when global invalid operation is cleared
infdivx <= infdivx & !(a[0]|b[0]);
zerozerox <= zerozerox & !(a[0]|b[0]);
subinfx <= subinfx & !(a[0]|b[0]);
infzerox <= infzerox & !(a[0]|b[0]);
NaNCmpx <= NaNCmpx & !(a[0]|b[0]);
dbzx <= dbzx & !(a[0]|b[0]);
swtx <= 1'b1;
end
else if (fex) begin
inexe <= inexe | (a[4]|b[4]);
dbzxe <= dbzxe | (a[3]|b[3]);
underxe <= underxe | (a[2]|b[2]);
overxe <= overxe | (a[1]|b[1]);
invopxe <= invopxe | (a[0]|b[0]);
end
else if (fdx) begin
inexe <= inexe & !(a[4]|b[4]);
dbzxe <= dbzxe & !(a[3]|b[3]);
underxe <= underxe & !(a[2]|b[2]);
overxe <= overxe & !(a[1]|b[1]);
invopxe <= invopxe & !(a[0]|b[0]);
end
else if (frm)
rm <= a[1:0]|b[1:0];
 
infzerox <= infzerox | (invopxe & infzero);
zerozerox <= zerozerox | (invopxe & zerozero);
subinfx <= subinfx | (invopxe & subinf);
infdivx <= infdivx | (invopxe & infdiv);
dbzx <= dbzx | (dbzxe & divByZero);
NaNCmpx <= NaNCmpx | (invopxe & nanx & fcmp); // must be a compare
sx <= sx |
(invopxe & nanx & fcmp) |
(invopxe & (infzero|zerozero|subinf|infdiv)) |
(dbzxe & divByZero);
end
 
// Decompose operands into sign,exponent,mantissa
wire sa, sb, sas, sbs;
wire [FMSB:0] ma, mb;
wire [22:0] mas, mbs;
 
wire aInf, bInf, aInfs, bInfs;
wire aNan, bNan, aNans, bNans;
wire az, bz, azs, bzs;
wire [1:0] rmd4; // 1st stage delayed
wire [7:0] op1, op2;
wire [5:0] fn1,fn2;
 
wire [MSB:0] zld_o,lood_o;
wire [31:0] zls_o,loos_o;
fpZLUnit #(64) u6 (.op(op), .fn(fn), .a(a), .b(b), .o(zld_o), .nanx(nanx) );
fpLOOUnit #(64) u7 (.clk(clk), .ce(pipe_ce), .rm(rm), .op(op), .fn(fn), .a(a), .o(loo_o), .done(loos_done) );
fpZLUnit #(32) u6s (.op(op), .fn(fn), .a(a[31:0]), .b(b[31:0]), .o(zls_o), .nanx(nanxs) );
fpLOOUnit #(32) u7s (.clk(clk), .ce(pipe_ce), .rm(rm), .op(op), .fn(fn), .a(a[31:0]), .o(loos_o), .done() );
assign loo_o = single ? loos_o : lood_o;
assign zl_o = single ? zls_o : zld_o;
fp_decomp #(64) u1 (.i(a), .sgn(sa), .man(ma), .vz(az), .inf(aInf), .nan(aNan) );
fp_decomp #(64) u2 (.i(b), .sgn(sb), .man(mb), .vz(bz), .inf(bInf), .nan(bNan) );
fp_decomp #(32) u1s (.i(a[31:0]), .sgn(sas), .man(mas), .vz(azs), .inf(aInfs), .nan(aNans) );
fp_decomp #(32) u2s (.i(b[31:0]), .sgn(sbs), .man(mbs), .vz(bzs), .inf(bInfs), .nan(bNans) );
 
delay4 #(2) u3 (.clk(clk), .ce(pipe_ce), .i(rmd), .o(rmd4) );
delay1 #(8) u4 (.clk(clk), .ce(pipe_ce), .i(op), .o(op1) );
delay2 #(8) u5 (.clk(clk), .ce(pipe_ce), .i(op), .o(op2) );
delay1 #(6) u5a (.clk(clk), .ce(pipe_ce), .i(fn), .o(fn1) );
delay2 #(6) u5b (.clk(clk), .ce(pipe_ce), .i(fn), .o(fn2) );
 
delay5 delay5_3(.clk(clk), .ce(pipe_ce), .i((bz & !aNan & fdiv)|(bzs & !aNans & fdivs)), .o(divByZero) );
 
// Compute NaN output sign
wire aob_nan = aNan|bNan; // one of the operands is a nan
wire bothNan = aNan&bNan; // both of the operands are nans
wire aob_nans = aNans|bNans; // one of the operands is a nan
wire bothNans = aNans&bNans; // both of the operands are nans
 
assign ns = bothNan ?
(ma==mb ? sa & sb : ma < mb ? sb : sa) :
aNan ? sa : sb;
assign nss = bothNans ?
(mas==mbs ? sas & sbs : mas < mbs ? sbs : sas) :
aNans ? sas : sbs;
 
delay5 u8(.clk(clk), .ce(ce), .i(ns), .o(nso) );
delay5 u9(.clk(clk), .ce(ce), .i(aob_nan), .o(isNan) );
delay5 u8s(.clk(clk), .ce(ce), .i(nss), .o(nsos) );
delay5 u9s(.clk(clk), .ce(ce), .i(aob_nans), .o(isNans) );
 
wire [MSB:0] fpu_o;
wire [MSB+3:0] fpn_o;
wire [EX:0] fdiv_o;
wire [EX:0] fmul_o;
wire [EX:0] fas_o;
reg [EX:0] fres;
wire [31:0] fpus_o;
wire [31+3:0] fpns_o;
wire [EXS:0] fdivs_o;
wire [EXS:0] fmuls_o;
wire [EXS:0] fass_o;
reg [EXS:0] fress;
wire divUnder,divUnders;
wire mulUnder,mulUnders;
reg under,unders;
 
// These units have a two clock cycle latency
fpAddsub #(64) u10(.clk(clk), .ce(pipe_ce), .rm(rm), .op(op[0]), .a(a), .b(b), .o(fas_o) );
fpDiv #(64) u11(.clk(clk), .ce(pipe_ce), .ld(ld), .a(a), .b(b), .o(fdiv_o), .sign_exe(), .underflow(divUnder), .done(divDone) );
fpMul #(64) u12(.clk(clk), .ce(pipe_ce), .a(a), .b(b), .o(fmul_o), .sign_exe(), .inf(), .underflow(mulUnder) );
fpAddsub #(32) u10s(.clk(clk), .ce(pipe_ce), .rm(rm), .op(op[0]), .a(a[31:0]), .b(b[31:0]), .o(fass_o) );
fpDiv #(32) u11s(.clk(clk), .ce(pipe_ce), .ld(ld), .a(a[31:0]), .b(b[31:0]), .o(fdivs_o), .sign_exe(), .underflow(divUnders), .done() );
fpMul #(32) u12s(.clk(clk), .ce(pipe_ce), .a(a[31:0]), .b(b[31:0]), .o(fmuls_o), .sign_exe(), .inf(), .underflow(mulUnders) );
 
always @(op2,fn2,mulUnder,divUnder,mulUnders,divUnders)
case (op2)
`FLOAT:
case (fn2)
`FMUL: under = mulUnder;
`FDIV: under = divUnder;
`FMULS: unders = mulUnders;
`FDIVS: unders = divUnders;
default: begin under = 0; unders = 0; end
endcase
default: begin under = 0; unders = 0; end
endcase
 
always @(op2,fn2,fas_o,fmul_o,fdiv_o,fass_o,fmuls_o,fdivs_o)
case (op2)
`FLOAT:
case(fn2)
`FADD: fres <= fas_o;
`FSUB: fres <= fas_o;
`FMUL: fres <= fmul_o;
`FDIV: fres <= fdiv_o;
`FADDS: fress <= fass_o;
`FSUBS: fress <= fass_o;
`FMULS: fress <= fmuls_o;
`FDIVS: fress <= fdivs_o;
default: begin fres <= fas_o; fress <= fass_o; end
endcase
default: begin fres <= fas_o; fress <= fass_o; end
endcase
 
// pipeline stage
// one cycle latency
fpNormalize #(64) fpn0(.clk(clk), .ce(pipe_ce), .under(under), .i(fres), .o(fpn_o) );
fpNormalize #(32) fpns(.clk(clk), .ce(pipe_ce), .under(unders), .i(fress), .o(fpns_o) );
 
// pipeline stage
// one cycle latency
fpRoundReg #(64) fpr0(.clk(clk), .ce(pipe_ce), .rm(rm4), .i(fpn_o), .o(fpu_o) );
fpRoundReg #(32) fprs(.clk(clk), .ce(pipe_ce), .rm(rm4), .i(fpns_o), .o(fpus_o) );
 
wire so = single ? (isNans?nsos:fpus_o[31]): (isNan?nso:fpu_o[63]);
 
//fix: status should be registered
assign o = fstat ? {
rm,
inexe,
dbzxe,
underxe,
overxe,
invopxe,
nsfp,
 
fractie,
raz,
1'b0,
so & !zero,
!so & !zero,
zero,
inf,
 
swtx,
inex,
dbzx,
underx,
overx,
giopx,
gx,
sx,
cvtx,
sqrtx,
NaNCmpx,
infzerox,
zerozerox,
infdivx,
subinfx,
snanx
} : 'bz;
 
assign o = (!fstat & !single) ?
zl_op ? zld_o :
loo_op ? lood_o :
{so,fpu_o[MSB-1:0]} : 'bz;
assign o = (!fstat & single)?
zl_op ? zls_o :
loo_op ? loos_o :
{so,fpus_o[MSB-1:0]} : 'bz;
assign zero = single ? fpus_o[30:0]==0 : WID==64 ? fpu_o[62:0]==0 : 0;
assign inf = single ? &fpus_o[31:23] && fpus_o[22:0]==0 : WID==64 ? &fpu_o[62:52] && fpu_o[51:0]==0 : 0;
 
assign subinf = single ? fpus_o[31:0]==`QSUBINFS : WID==64 ? fpu_o[63:0]==`QSUBINF : 0;
assign infdiv = single ? fpus_o[31:0]==`QINFDIVS : WID==64 ? fpu_o[63:0]==`QINFDIV : 0;
assign zerozero = single ? fpus_o[31:0]==`QZEROZEROS : WID==64 ? fpu_o[63:0]==`QZEROZERO : 0;
assign infzero = single ? fpus_o[31:0]==`QINFZEROS : WID==64 ? fpu_o[31:0]==`QINFZERO : 0;
 
assign exception = gx;
 
endmodule
 
/rtl/verilog/fpUnit/fpNormalize.v
0,0 → 1,168
/* ===============================================================
(C) 2006 Robert Finch
All rights reserved.
rob@birdcomputer.ca
 
fpNormalize.v
- floating point normalization unit
- two cycle latency
- parameterized width
- IEEE 754 representation
 
This source code is free for use and modification for
non-commercial or evaluation purposes, provided this
copyright statement and disclaimer remains present in
the file.
 
If you do modify the code, please state the origin and
note that you have modified the code.
 
NO WARRANTY.
THIS Work, IS PROVIDEDED "AS IS" WITH NO WARRANTIES OF
ANY KIND, WHETHER EXPRESS OR IMPLIED. The user must assume
the entire risk of using the Work.
 
IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
ANY INCIDENTAL, CONSEQUENTIAL, OR PUNITIVE DAMAGES
WHATSOEVER RELATING TO THE USE OF THIS WORK, OR YOUR
RELATIONSHIP WITH THE AUTHOR.
 
IN ADDITION, IN NO EVENT DOES THE AUTHOR AUTHORIZE YOU
TO USE THE WORK IN APPLICATIONS OR SYSTEMS WHERE THE
WORK'S FAILURE TO PERFORM CAN REASONABLY BE EXPECTED
TO RESULT IN A SIGNIFICANT PHYSICAL INJURY, OR IN LOSS
OF LIFE. ANY SUCH USE BY YOU IS ENTIRELY AT YOUR OWN RISK,
AND YOU AGREE TO HOLD THE AUTHOR AND CONTRIBUTORS HARMLESS
FROM ANY CLAIMS OR LOSSES RELATING TO SUCH UNAUTHORIZED
USE.
 
 
This unit takes a floating point number in an intermediate
format and normalizes it. No normalization occurs
for NaN's or infinities. The unit has a two cycle latency.
 
The mantissa is assumed to start with two whole bits on
the left. The remaining bits are fractional.
The width of the incoming format is reduced via a generation
of sticky bit in place of the low order fractional bits.
 
On an underflowed input, the incoming exponent is assumed
to be negative. A right shift is needed.
 
Ref: Webpack 8.2 Spartan3-4 xc3s1000-4ft256
302 LUTs / 166 slices /
550 LUTs / 291 slices / 89 MHz
163 LUTs / 93 slices / 113.6 MHz?
=============================================================== */
 
module fpNormalize(clk, ce, under, i, o);
parameter WID = 32;
localparam MSB = WID-1;
localparam EMSB = WID==80 ? 14 :
WID==64 ? 10 :
WID==52 ? 10 :
WID==48 ? 10 :
WID==44 ? 10 :
WID==42 ? 10 :
WID==40 ? 9 :
WID==32 ? 7 :
WID==24 ? 6 : 4;
localparam FMSB = WID==80 ? 63 :
WID==64 ? 51 :
WID==52 ? 39 :
WID==48 ? 35 :
WID==44 ? 31 :
WID==42 ? 29 :
WID==40 ? 28 :
WID==32 ? 22 :
WID==24 ? 15 : 9;
 
localparam FX = (FMSB+2)*2-1; // the MSB of the expanded fraction
localparam EX = FX + 1 + EMSB + 1 + 1 - 1;
 
input clk;
input ce;
input under;
input [EX:0] i; // expanded format input
output [WID+2:0] o; // normalized output + guard, sticky and round bits, + 1 whole digit
 
// variables
wire so;
 
wire so1 = i[EX]; // sign doesn't change
 
// Since the there are *two* whole digits in the incoming format
// the number of whole digits needs to be reduced. If the MSB is
// set, then increment the exponent and no shift is needed.
wire [EMSB:0] xo;
wire [EMSB:0] xo1a = i[EX-1:FX+1];
wire xInf = &xo1a & !under;
wire incExp1 = !xInf & i[FX];
wire [EMSB:0] xo1 = xo1a + incExp1;
wire [EMSB:0] xo2;
wire xInf1 = &xo1;
 
// If infinity is reached then set the mantissa to zero
wire gbit = i[FMSB];
wire rbit = i[FMSB-1];
wire sbit = |i[FMSB-2:0];
// shift mantissa left by one to reduce to a single whole digit
// if there is no exponent increment
wire [FMSB+3:0] mo;
wire [FMSB+3:0] mo1 = xInf1 & incExp1 ? 0 :
incExp1 ? {i[FX:FMSB+1],gbit,rbit,sbit} : // reduce mantissa size
{i[FX-1:FMSB+1],gbit,rbit,sbit,1'b0}; // reduce mantissa size
wire [FMSB+3:0] mo2;
wire [6:0] leadingZeros2;
 
 
cntlz64Reg clz0 (.clk(clk), .ce(ce), .i(mo1), .o(leadingZeros2) );
 
// compensate for leadingZeros delay
wire xInf2;
delay1 #(EMSB+1) d2(.clk(clk), .ce(ce), .i(xo1), .o(xo2) );
delay1 #(1) d3(.clk(clk), .ce(ce), .i(xInf1), .o(xInf2) );
 
// If the exponent underflowed, then the shift direction must be to the
// right regardless of mantissa bits; the number is denormalized.
// Otherwise the shift direction must be to the left.
wire rightOrLeft2; // 0=left,1=right
delay1 #(1) d8(.clk(clk), .ce(ce), .i(under), .o(rightOrLeft2) );
 
// Compute how much we want to decrement by
wire [6:0] lshiftAmt2 = leadingZeros2 > xo2 ? xo2 : leadingZeros2;
 
// compute amount to shift right
// at infinity the exponent can't be incremented, so we can't shift right
// otherwise it was an underflow situation so the exponent was negative
// shift amount needs to be negated for shift register
wire [6:0] rshiftAmt2 = xInf2 ? 0 : -xo2 > FMSB+3 ? FMSB+4 : FMSB+4+xo2; // xo2 is negative !
 
 
// sign
// the output sign is the same as the input sign
delay1 #(1) d7(.clk(clk), .ce(ce), .i(so1), .o(so) );
 
// exponent
// always @(posedge clk)
// if (ce)
assign xo =
xInf2 ? xo2 : // an infinite exponent is either a NaN or infinity; no need to change
rightOrLeft2 ? 0 : // on a right shift, the exponent was negative, it's being made to zero
xo2 - lshiftAmt2; // on a left shift, the exponent can't be decremented below zero
 
// mantissa
delay1 #(FMSB+3) d4(.clk(clk), .ce(ce), .i(mo1), .o(mo2) );
 
wire [FMSB+3:0] mo2a;
shiftAndMask #(FMSB+4) u1 (.op({rightOrLeft2,1'b0}), .a(mo2), .b(rightOrLeft2 ? lshiftAmt2 : rshiftAmt2), .mb(6'd0), .me(FMSB+3), .o(mo2a) );
 
// always @(posedge clk)
// if (ce)
assign mo = mo2a;//rightOrLeft2 ? mo2 >> rshiftAmt2 : mo2 << lshiftAmt2;
 
assign o = {so,xo,mo};
 
endmodule
/rtl/verilog/fpUnit/fpAddsub.v
0,0 → 1,226
/* ===============================================================
(C) 2006 Robert Finch
All rights reserved.
rob@birdcomputer.ca
 
fpAddsub.v
- floating point adder/subtracter
- two cycle latency
- can issue every clock cycle
- parameterized width
- IEEE 754 representation
 
This source code is free for use and modification for
non-commercial or evaluation purposes, provided this
copyright statement and disclaimer remains present in
the file.
 
If you do modify the code, please state the origin and
note that you have modified the code.
 
NO WARRANTY.
THIS Work, IS PROVIDEDED "AS IS" WITH NO WARRANTIES OF
ANY KIND, WHETHER EXPRESS OR IMPLIED. The user must assume
the entire risk of using the Work.
 
IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
ANY INCIDENTAL, CONSEQUENTIAL, OR PUNITIVE DAMAGES
WHATSOEVER RELATING TO THE USE OF THIS WORK, OR YOUR
RELATIONSHIP WITH THE AUTHOR.
 
IN ADDITION, IN NO EVENT DOES THE AUTHOR AUTHORIZE YOU
TO USE THE WORK IN APPLICATIONS OR SYSTEMS WHERE THE
WORK'S FAILURE TO PERFORM CAN REASONABLY BE EXPECTED
TO RESULT IN A SIGNIFICANT PHYSICAL INJURY, OR IN LOSS
OF LIFE. ANY SUCH USE BY YOU IS ENTIRELY AT YOUR OWN RISK,
AND YOU AGREE TO HOLD THE AUTHOR AND CONTRIBUTORS HARMLESS
FROM ANY CLAIMS OR LOSSES RELATING TO SUCH UNAUTHORIZED
USE.
 
This adder/subtractor handles denormalized numbers.
It has a two cycle latency.
The output format is of an internal expanded representation
in preparation to be fed into a normalization unit, then
rounding. Basically, it's the same as the regular format
except the mantissa is doubled in size, the leading two
bits of which are assumed to be whole bits.
 
Ref: Webpack 8.2 Spartan3-4 xc3s1000-4ft256
580 LUTS / 315 slices / 74 MHz
=============================================================== */
 
module fpAddsub(clk, ce, rm, op, a, b, o);
parameter WID = 32;
localparam MSB = WID-1;
localparam EMSB = WID==80 ? 14 :
WID==64 ? 10 :
WID==52 ? 10 :
WID==48 ? 10 :
WID==44 ? 10 :
WID==42 ? 10 :
WID==40 ? 9 :
WID==32 ? 7 :
WID==24 ? 6 : 4;
localparam FMSB = WID==80 ? 63 :
WID==64 ? 51 :
WID==52 ? 39 :
WID==48 ? 35 :
WID==44 ? 31 :
WID==42 ? 29 :
WID==40 ? 28 :
WID==32 ? 22 :
WID==24 ? 15 : 9;
 
localparam FX = (FMSB+2)*2-1; // the MSB of the expanded fraction
localparam EX = FX + 1 + EMSB + 1 + 1 - 1;
 
input clk; // system clock
input ce; // core clock enable
input [1:0] rm; // rounding mode
input op; // operation 0 = add, 1 = subtract
input [WID-1:0] a; // operand a
input [WID-1:0] b; // operand b
output [EX:0] o; // output
 
 
// variables
wire so; // sign output
wire [EMSB:0] xo; // de normalized exponent output
reg [EMSB:0] xo1; // de normalized exponent output
wire [FX:0] mo; // mantissa output
reg [FX:0] mo1; // mantissa output
 
assign o = {so,xo,mo};
 
// operands sign,exponent,mantissa
wire sa, sb;
wire [EMSB:0] xa, xb;
wire [FMSB:0] ma, mb;
wire [FMSB+1:0] fracta, fractb;
wire [FMSB+1:0] fracta1, fractb1;
 
// which has greater magnitude ? Used for sign calc
wire xa_gt_xb = xa > xb;
wire xa_gt_xb1;
wire a_gt_b = xa_gt_xb || (xa==xb && ma > mb);
wire a_gt_b1;
wire az, bz; // operand a,b is zero
 
wire adn, bdn; // a,b denormalized ?
wire xaInf, xbInf;
wire aInf, bInf, aInf1, bInf1;
wire aNan, bNan, aNan1, bNan1;
 
wire [EMSB:0] xad = xa|adn; // operand a exponent, compensated for denormalized numbers
wire [EMSB:0] xbd = xb|bdn; // operand b exponent, compensated for denormalized numbers
 
fpDecomp #(WID) u1a (.i(a), .sgn(sa), .exp(xa), .man(ma), .fract(fracta), .xz(adn), .vz(az), .xinf(xaInf), .inf(aInf), .nan(aNan) );
fpDecomp #(WID) u1b (.i(b), .sgn(sb), .exp(xb), .man(mb), .fract(fractb), .xz(bdn), .vz(bz), .xinf(xbInf), .inf(bInf), .nan(bNan) );
 
// Figure out which operation is really needed an add or
// subtract ?
// If the signs are the same, use the orignal op,
// otherwise flip the operation
// a + b = add,+
// a + -b = sub, so of larger
// -a + b = sub, so of larger
// -a + -b = add,-
// a - b = sub, so of larger
// a - -b = add,+
// -a - b = add,-
// -a - -b = sub, so of larger
wire realOp = op ^ sa ^ sb;
wire realOp1;
wire op1;
 
// Find out if the result will be zero.
wire resZero = (realOp && xa==xb && ma==mb) || // subtract, same magnitude
(az & bz); // both a,b zero
 
// Compute output exponent
//
// The output exponent is the larger of the two exponents,
// unless a subtract operation is in progress and the two
// numbers are equal, in which case the exponent should be
// zero.
 
always @(xaInf,xbInf,resZero,xa,xb,xa_gt_xb)
xo1 = (xaInf&xbInf) ? xa : resZero ? 0 : xa_gt_xb ? xa : xb;
 
// Compute output sign
reg so1;
always @*
case ({resZero,sa,op,sb}) // synopsys full_case parallel_case
4'b0000: so1 <= 0; // + + + = +
4'b0001: so1 <= !a_gt_b; // + + - = sign of larger
4'b0010: so1 <= !a_gt_b; // + - + = sign of larger
4'b0011: so1 <= 0; // + - - = +
4'b0100: so1 <= a_gt_b; // - + + = sign of larger
4'b0101: so1 <= 1; // - + - = -
4'b0110: so1 <= 1; // - - + = -
4'b0111: so1 <= a_gt_b; // - - - = sign of larger
4'b1000: so1 <= 0; // A + B, sign = +
4'b1001: so1 <= rm==3; // A + -B, sign = + unless rounding down
4'b1010: so1 <= rm==3; // A - B, sign = + unless rounding down
4'b1011: so1 <= 0; // +A - -B, sign = +
4'b1100: so1 <= rm==3; // -A + B, sign = + unless rounding down
4'b1101: so1 <= 1; // -A + -B, sign = -
4'b1110: so1 <= 1; // -A - +B, sign = -
4'b1111: so1 <= rm==3; // -A - -B, sign = + unless rounding down
endcase
 
delay2 #(EMSB+1) d1(.clk(clk), .ce(ce), .i(xo1), .o(xo) );
delay2 #(1) d2(.clk(clk), .ce(ce), .i(so1), .o(so) );
 
// Compute the difference in exponents, provides shift amount
wire [EMSB:0] xdiff = xa_gt_xb ? xad - xbd : xbd - xad;
wire [6:0] xdif = xdiff > FMSB+3 ? FMSB+3 : xdiff;
wire [6:0] xdif1;
 
// determine which fraction to denormalize
wire [FMSB+1:0] mfs = xa_gt_xb ? fractb : fracta;
wire [FMSB+1:0] mfs1;
 
// Determine the sticky bit
wire sticky, sticky1;
redor64 u1 (.a(xdif), .b({mfs,2'b0}), .o(sticky) );
 
// register inputs to shifter and shift
delay1 #(1) d16(.clk(clk), .ce(ce), .i(sticky), .o(sticky1) );
delay1 #(7) d15(.clk(clk), .ce(ce), .i(xdif), .o(xdif1) );
delay1 #(FMSB+2) d14(.clk(clk), .ce(ce), .i(mfs), .o(mfs1) );
 
wire [FMSB+3:0] md1 = ({mfs1,2'b0} >> xdif1)|sticky1;
 
// sync control signals
delay1 #(1) d4 (.clk(clk), .ce(ce), .i(xa_gt_xb), .o(xa_gt_xb1) );
delay1 #(1) d17(.clk(clk), .ce(ce), .i(a_gt_b), .o(a_gt_b1) );
delay1 #(1) d5 (.clk(clk), .ce(ce), .i(realOp), .o(realOp1) );
delay1 #(FMSB+2) d5a(.clk(clk), .ce(ce), .i(fracta), .o(fracta1) );
delay1 #(FMSB+2) d6a(.clk(clk), .ce(ce), .i(fractb), .o(fractb1) );
delay1 #(1) d7 (.clk(clk), .ce(ce), .i(aInf), .o(aInf1) );
delay1 #(1) d8 (.clk(clk), .ce(ce), .i(bInf), .o(bInf1) );
delay1 #(1) d9 (.clk(clk), .ce(ce), .i(aNan), .o(aNan1) );
delay1 #(1) d10(.clk(clk), .ce(ce), .i(bNan), .o(bNan1) );
delay1 #(1) d11(.clk(clk), .ce(ce), .i(op), .o(op1) );
 
// Sort operands and perform add/subtract
// addition can generate an extra bit, subtract can't go negative
wire [FMSB+3:0] oa = xa_gt_xb1 ? {fracta1,2'b0} : md1;
wire [FMSB+3:0] ob = xa_gt_xb1 ? md1 : {fractb1,2'b0};
wire [FMSB+3:0] oaa = a_gt_b1 ? oa : ob;
wire [FMSB+3:0] obb = a_gt_b1 ? ob : oa;
wire [FMSB+4:0] mab = realOp1 ? oaa - obb : oaa + obb;
 
always @*
casex({aInf1&bInf1,aNan1,bNan1})
3'b1xx: mo1 = {1'b0,op1,{FMSB-1{1'b0}},op1,{FMSB{1'b0}}}; // inf +/- inf - generate QNaN on subtract, inf on add
3'bx1x: mo1 = {1'b0,fracta1[FMSB+1:0],{FMSB{1'b0}}};
3'bxx1: mo1 = {1'b0,fractb1[FMSB+1:0],{FMSB{1'b0}}};
default: mo1 = {mab,{FMSB-2{1'b0}}}; // mab has an extra lead bit and two trailing bits
endcase
 
delay1 #(FX+1) d3(.clk(clk), .ce(ce), .i(mo1), .o(mo) );
 
endmodule
 
/rtl/verilog/fpUnit/fpDiv.v
0,0 → 1,176
/* ===============================================================
(C) 2006 Robert Finch
All rights reserved.
rob@birdcomputer.ca
 
fpDiv.v
- floating point divider
- parameterized width
- IEEE 754 representation
 
This source code is free for use and modification for
non-commercial or evaluation purposes, provided this
copyright statement and disclaimer remains present in
the file.
 
If you do modify the code, please state the origin and
note that you have modified the code.
 
NO WARRANTY.
THIS Work, IS PROVIDEDED "AS IS" WITH NO WARRANTIES OF
ANY KIND, WHETHER EXPRESS OR IMPLIED. The user must assume
the entire risk of using the Work.
 
IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
ANY INCIDENTAL, CONSEQUENTIAL, OR PUNITIVE DAMAGES
WHATSOEVER RELATING TO THE USE OF THIS WORK, OR YOUR
RELATIONSHIP WITH THE AUTHOR.
 
IN ADDITION, IN NO EVENT DOES THE AUTHOR AUTHORIZE YOU
TO USE THE WORK IN APPLICATIONS OR SYSTEMS WHERE THE
WORK'S FAILURE TO PERFORM CAN REASONABLY BE EXPECTED
TO RESULT IN A SIGNIFICANT PHYSICAL INJURY, OR IN LOSS
OF LIFE. ANY SUCH USE BY YOU IS ENTIRELY AT YOUR OWN RISK,
AND YOU AGREE TO HOLD THE AUTHOR AND CONTRIBUTORS HARMLESS
FROM ANY CLAIMS OR LOSSES RELATING TO SUCH UNAUTHORIZED
USE.
 
This multiplier/divider handles denormalized numbers.
The output format is of an internal expanded representation
in preparation to be fed into a normalization unit, then
rounding. Basically, it's the same as the regular format
except the mantissa is doubled in size, the leading two
bits of which are assumed to be whole bits.
 
 
Floating Point Multiplier / Divider
 
Properties:
+-inf * +-inf = -+inf (this is handled by exOver)
+-inf * 0 = QNaN
+-0 / +-0 = QNaN
 
Ref: Webpack8.2i Spartan3-4 xc3s1000-4ft256
316 LUTS / 174 slices / 49.7 MHz
=============================================================== */
 
module fpDiv(clk, ce, ld, a, b, o, done, sign_exe, overflow, underflow);
 
parameter WID = 32;
localparam MSB = WID-1;
localparam EMSB = WID==80 ? 14 :
WID==64 ? 10 :
WID==52 ? 10 :
WID==48 ? 10 :
WID==44 ? 10 :
WID==42 ? 10 :
WID==40 ? 9 :
WID==32 ? 7 :
WID==24 ? 6 : 4;
localparam FMSB = WID==80 ? 63 :
WID==64 ? 51 :
WID==52 ? 39 :
WID==48 ? 35 :
WID==44 ? 31 :
WID==42 ? 29 :
WID==40 ? 28 :
WID==32 ? 22 :
WID==24 ? 15 : 9;
 
localparam FX = (FMSB+2)*2-1; // the MSB of the expanded fraction
localparam EX = FX + 1 + EMSB + 1 + 1 - 1;
 
input clk;
input ce;
input ld;
input [MSB:0] a, b;
output [EX:0] o;
output done;
output sign_exe;
output overflow;
output underflow;
 
// registered outputs
reg sign_exe;
reg inf;
reg overflow;
reg underflow;
 
reg so;
reg [EMSB:0] xo;
reg [FX:0] mo;
assign o = {so,xo,mo};
 
// constants
wire [EMSB:0] infXp = {EMSB+1{1'b1}}; // infinite / NaN - all ones
// The following is the value for an exponent of zero, with the offset
// eg. 8'h7f for eight bit exponent, 11'h7ff for eleven bit exponent, etc.
wire [EMSB:0] bias = {1'b0,{EMSB{1'b1}}}; //2^0 exponent
// The following is a template for a quiet nan. (MSB=1)
wire [FMSB:0] qNaN = {1'b1,{FMSB{1'b0}}};
 
// variables
wire [EMSB+2:0] ex1; // sum of exponents
wire [FX:0] divo;
 
// Operands
wire sa, sb; // sign bit
wire [EMSB:0] xa, xb; // exponent bits
wire [FMSB+1:0] fracta, fractb;
wire a_dn, b_dn; // a/b is denormalized
wire az, bz;
wire aInf, bInf;
 
 
// -----------------------------------------------------------
// - decode the input operands
// - derive basic information
// - calculate exponent
// - calculate fraction
// -----------------------------------------------------------
 
fpDecomp #(WID) u1a (.i(a), .sgn(sa), .exp(xa), .fract(fracta), .xz(a_dn), .vz(az), .inf(aInf) );
fpDecomp #(WID) u1b (.i(b), .sgn(sb), .exp(xb), .fract(fractb), .xz(b_dn), .vz(bz), .inf(bInf) );
 
// Compute the exponent.
// - correct the exponent for denormalized operands
// - adjust the difference by the bias (add 127)
// - also factor in the different decimal position for division
assign ex1 = (xa|a_dn) - (xb|b_dn) + bias + FMSB-1;
 
// check for exponent underflow/overflow
wire under = ex1[EMSB+2]; // MSB set = negative exponent
wire over = (&ex1[EMSB:0] | ex1[EMSB+1]) & !ex1[EMSB+2];
 
// Perform divide
// could take either 1 or 16 clock cycles
fpdivr8 #(WID) u2 (.clk(clk), .ld(ld), .a(fracta), .b(fractb), .q(divo), .r(), .done(done));
 
// determine when a NaN is output
wire qNaNOut = (az&bz)|(aInf&bInf);
 
always @(posedge clk)
if (ce) begin
if (done) begin
casex({qNaNOut,bInf,bz})
3'b1xx: xo = infXp; // NaN exponent value
3'bx1x: xo = 0; // divide by inf
3'bxx1: xo = infXp; // divide by zero
default: xo = ex1; // normal or underflow: passthru neg. exp. for normalization
endcase
 
casex({qNaNOut,bInf,bz})
3'b1xx: mo = {1'b0,qNaN[FMSB:0]|{aInf,1'b0}|{az,bz},{FMSB+1{1'b0}}};
3'bx1x: mo = 0; // div by inf
3'bxx1: mo = 0; // div by zero
default: mo = divo; // plain div
endcase
 
so = sa ^ sb;
sign_exe = sa & sb;
overflow = over;
underflow = under;
end
end
 
endmodule

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.