OpenCores
URL https://opencores.org/ocsvn/ft816float/ft816float/trunk

Subversion Repositories ft816float

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /ft816float/trunk/rtl/verilog
    from Rev 25 to Rev 26
    Reverse comparison

Rev 25 → Rev 26

/DivGoldschmidt.v
25,7 → 25,7
//
// ============================================================================
//
module DivGoldschmidt(rst, clk, ld, a, b, q, f0, done, lzcnt);
module DivGoldschmidt(rst, clk, ld, a, b, q, done, lzcnt);
parameter WID=32;
parameter WHOLE=16;
parameter POINTS=16;
38,7 → 38,6
input [WID-1:0] a;
input [WID-1:0] b;
output reg [WID*2-1:0] q;
output reg [SIZE-1:0] f0;
output reg done;
output reg [7:0] lzcnt;
parameter IDLE = 2'd0;
110,13 → 109,11
N <= {16'd0,a,{WHOLE{1'b0}}} << shft;
D <= {16'd0,b,{WHOLE{1'd0}}} << shft;
F <= {16'd2,{POINTS2{1'b0}}} - ({b,{WHOLE{1'd0}}} << shft);
f0 <= {16'd2,{POINTS2{1'b0}}} - ({b,{WHOLE{1'd0}}} << shft);
end
else begin
N <= {16'd0,a,{WHOLE{1'b0}}};
D <= {16'd0,b,{WHOLE{1'd0}}};
F <= {16'd2,{POINTS2{1'b0}}} - ({b,{WHOLE{1'd0}}});
f0 <= {16'd2,{POINTS2{1'b0}}} - ({b,{WHOLE{1'd0}}});
end
end
else begin
123,7 → 120,6
N <= {16'd0,a,{WHOLE{1'b0}}} >> shft;
D <= {16'd0,b,{WHOLE{1'd0}}} >> shft;
F <= {16'd2,{POINTS2{1'b0}}} - ({b,{WHOLE{1'd0}}} >> shft);
f0 <= {16'd2,{POINTS2{1'b0}}} - ({b,{WHOLE{1'd0}}} >> shft);
end
count <= 0;
state <= DIV;
158,7 → 154,6
reg ld;
wire done;
wire [WID*2-1:0] qo;
wire [7:0] f0;
reg [3:0] state;
reg [3:0] a, b;
reg [7:0] count;
211,12 → 206,10
.b(b),
// .imm(64'd123),
.q(qo),
.f0(f0),
// .ro(ro),
// .dvByZr(),
.left_right(),
.shift(),
.done(done)
.done(done),
.lzcnt()
);
 
endmodule
/F32ToF80.v
47,12 → 47,12
always @*
begin
// sign out always just = sign in
signo = signi;
signo <= signi;
 
// special check for zero
if (vz) begin
expo <= 0;
mano <= 0;
expo <= 1'd0;
mano <= 1'd0;
end
// convert infinity / nan
// infinity in = infinity out
/f2i.v
1,6 → 1,6
// ============================================================================
// __
// \\__/ o\ (C) 2006-2016 Robert Finch, Waterloo
// \\__/ o\ (C) 2006-2019 Robert Finch, Waterloo
// \ __ / All rights reserved.
// \/_// robfinch<remove>@finitron.ca
// ||
39,31 → 39,8
output [WID-1:0] o,
output overflow
);
localparam MSB = WID-1;
localparam EMSB = WID==128 ? 14 :
WID==96 ? 14 :
WID==80 ? 14 :
WID==64 ? 10 :
WID==52 ? 10 :
WID==48 ? 10 :
WID==44 ? 10 :
WID==42 ? 10 :
WID==40 ? 9 :
WID==32 ? 7 :
WID==24 ? 6 : 4;
localparam FMSB = WID==128 ? 111 :
WID==96 ? 79 :
WID==80 ? 63 :
WID==64 ? 51 :
WID==52 ? 39 :
WID==48 ? 35 :
WID==44 ? 31 :
WID==42 ? 29 :
WID==40 ? 28 :
WID==32 ? 22 :
WID==24 ? 15 : 9;
`include "fpSize.sv"
 
 
wire [MSB:0] maxInt = {MSB{1'b1}}; // maximum unsigned integer value
wire [EMSB:0] zeroXp = {EMSB{1'b1}}; // simple constant - value of exp for zero
 
/fpAddsub.v
31,33 → 31,8
 
module fpAddsub(clk, ce, rm, op, a, b, o);
parameter WID = 128;
localparam MSB = WID-1;
localparam EMSB = WID==128 ? 14 :
WID==96 ? 14 :
WID==80 ? 14 :
WID==64 ? 10 :
WID==52 ? 10 :
WID==48 ? 11 :
WID==44 ? 10 :
WID==42 ? 10 :
WID==40 ? 9 :
WID==32 ? 7 :
WID==24 ? 6 : 4;
localparam FMSB = WID==128 ? 111 :
WID==96 ? 79 :
WID==80 ? 63 :
WID==64 ? 51 :
WID==52 ? 39 :
WID==48 ? 34 :
WID==44 ? 31 :
WID==42 ? 29 :
WID==40 ? 28 :
WID==32 ? 22 :
WID==24 ? 15 : 9;
`include "fpSize.sv"
 
localparam FX = (FMSB+2)*2-1; // the MSB of the expanded fraction
localparam EX = FX + 1 + EMSB + 1 + 1 - 1;
 
input clk; // system clock
input ce; // core clock enable
input [2:0] rm; // rounding mode
173,6 → 148,8
redor128 u1 (.a(xdif), .b({mfs,2'b0}), .o(sticky) );
else if (WID==96)
redor96 u1 (.a(xdif), .b({mfs,2'b0}), .o(sticky) );
else if (WID==84)
redor84 u1 (.a(xdif), .b({mfs,2'b0}), .o(sticky) );
else if (WID==80)
redor80 u1 (.a(xdif), .b({mfs,2'b0}), .o(sticky) );
else if (WID==64)
225,33 → 202,8
 
module fpAddsubnr(clk, ce, rm, op, a, b, o);
parameter WID = 128;
localparam MSB = WID-1;
localparam EMSB = WID==128 ? 14 :
WID==96 ? 14 :
WID==80 ? 14 :
WID==64 ? 10 :
WID==52 ? 10 :
WID==48 ? 11 :
WID==44 ? 10 :
WID==42 ? 10 :
WID==40 ? 9 :
WID==32 ? 7 :
WID==24 ? 6 : 4;
localparam FMSB = WID==128 ? 111 :
WID==96 ? 79 :
WID==80 ? 63 :
WID==64 ? 51 :
WID==52 ? 39 :
WID==48 ? 34 :
WID==44 ? 31 :
WID==42 ? 29 :
WID==40 ? 28 :
WID==32 ? 22 :
WID==24 ? 15 : 9;
`include "fpSize.sv"
 
localparam FX = (FMSB+2)*2-1; // the MSB of the expanded fraction
localparam EX = FX + 1 + EMSB + 1 + 1 - 1;
 
input clk; // system clock
input ce; // core clock enable
input [2:0] rm; // rounding mode
/fpAddsub_L10.v
31,33 → 31,8
 
module fpAddsub_L10(clk, ce, rm, op, a, b, o);
parameter WID = 128;
localparam MSB = WID-1;
localparam EMSB = WID==128 ? 14 :
WID==96 ? 14 :
WID==80 ? 14 :
WID==64 ? 10 :
WID==52 ? 10 :
WID==48 ? 11 :
WID==44 ? 10 :
WID==42 ? 10 :
WID==40 ? 9 :
WID==32 ? 7 :
WID==24 ? 6 : 4;
localparam FMSB = WID==128 ? 111 :
WID==96 ? 79 :
WID==80 ? 63 :
WID==64 ? 51 :
WID==52 ? 39 :
WID==48 ? 34 :
WID==44 ? 31 :
WID==42 ? 29 :
WID==40 ? 28 :
WID==32 ? 22 :
WID==24 ? 15 : 9;
`include "fpSize.sv"
 
localparam FX = (FMSB+2)*2-1; // the MSB of the expanded fraction
localparam EX = FX + 1 + EMSB + 1 + 1 - 1;
 
input clk; // system clock
input ce; // core clock enable
input [2:0] rm; // rounding mode
254,6 → 229,8
redor128 u1 (.a(xdif4), .b({mfs4,2'b0}), .o(sticky) );
else if (WID==96)
redor96 u1 (.a(xdif4), .b({mfs4,2'b0}), .o(sticky) );
else if (WID==84)
redor84 u1 (.a(xdif4), .b({mfs4,2'b0}), .o(sticky) );
else if (WID==80)
redor80 u1 (.a(xdif4), .b({mfs4,2'b0}), .o(sticky) );
else if (WID==64)
355,33 → 332,8
 
module fpAddsubnr_L10(clk, ce, rm, op, a, b, o);
parameter WID = 128;
localparam MSB = WID-1;
localparam EMSB = WID==128 ? 14 :
WID==96 ? 14 :
WID==80 ? 14 :
WID==64 ? 10 :
WID==52 ? 10 :
WID==48 ? 11 :
WID==44 ? 10 :
WID==42 ? 10 :
WID==40 ? 9 :
WID==32 ? 7 :
WID==24 ? 6 : 4;
localparam FMSB = WID==128 ? 111 :
WID==96 ? 79 :
WID==80 ? 63 :
WID==64 ? 51 :
WID==52 ? 39 :
WID==48 ? 34 :
WID==44 ? 31 :
WID==42 ? 29 :
WID==40 ? 28 :
WID==32 ? 22 :
WID==24 ? 15 : 9;
`include "fpSize.sv"
 
localparam FX = (FMSB+2)*2-1; // the MSB of the expanded fraction
localparam EX = FX + 1 + EMSB + 1 + 1 - 1;
 
input clk; // system clock
input ce; // core clock enable
input [2:0] rm; // rounding mode
/fpDecompReg.v
1,7 → 1,7
`timescale 1ns / 1ps
// ============================================================================
// __
// \\__/ o\ (C) 2006-2016 Robert Finch, Waterloo
// \\__/ o\ (C) 2006-2019 Robert Finch, Waterloo
// \ __ / All rights reserved.
// \/_// robfinch<remove>@finitron.ca
// ||
27,33 → 27,9
// ============================================================================
 
module fpDecomp(i, sgn, exp, man, fract, xz, mz, vz, inf, xinf, qnan, snan, nan);
 
parameter WID=32;
`include "fpSize.sv"
 
localparam MSB = WID-1;
localparam EMSB = WID==128 ? 14 :
WID==96 ? 14 :
WID==80 ? 14 :
WID==64 ? 10 :
WID==52 ? 10 :
WID==48 ? 10 :
WID==44 ? 10 :
WID==42 ? 10 :
WID==40 ? 9 :
WID==32 ? 7 :
WID==24 ? 6 : 4;
localparam FMSB = WID==128 ? 111 :
WID==96 ? 79 :
WID==80 ? 63 :
WID==64 ? 51 :
WID==52 ? 39 :
WID==48 ? 35 :
WID==44 ? 31 :
WID==42 ? 29 :
WID==40 ? 28 :
WID==32 ? 22 :
WID==24 ? 15 : 9;
 
input [MSB:0] i;
 
output sgn;
87,33 → 63,9
 
 
module fpDecompReg(clk, ce, i, o, sgn, exp, man, fract, xz, mz, vz, inf, xinf, qnan, snan, nan);
 
parameter WID=32;
`include "fpSize.sv"
 
localparam MSB = WID-1;
localparam EMSB = WID==128 ? 14 :
WID==96 ? 14 :
WID==80 ? 14 :
WID==64 ? 10 :
WID==52 ? 10 :
WID==48 ? 10 :
WID==44 ? 10 :
WID==42 ? 10 :
WID==40 ? 9 :
WID==32 ? 7 :
WID==24 ? 6 : 4;
localparam FMSB = WID==128 ? 111 :
WID==96 ? 79 :
WID==80 ? 63 :
WID==64 ? 51 :
WID==52 ? 39 :
WID==48 ? 35 :
WID==44 ? 31 :
WID==42 ? 29 :
WID==40 ? 28 :
WID==32 ? 22 :
WID==24 ? 15 : 9;
 
input clk;
input ce;
input [MSB:0] i;
/fpDiv.v
1,7 → 1,7
`timescale 1ns / 1ps
// ============================================================================
// __
// \\__/ o\ (C) 2006-2018 Robert Finch, Waterloo
// \\__/ o\ (C) 2006-2019 Robert Finch, Waterloo
// \ __ / All rights reserved.
// \/_// robfinch<remove>@finitron.ca
// ||
34,37 → 34,16
// ============================================================================
 
`include "fp_defines.v"
`define GOLDSCHMIDT 1'b1
//`define GOLDSCHMIDT 1'b1
 
module fpDiv(rst, clk, ce, ld, op, a, b, o, done, sign_exe, overflow, underflow);
module fpDiv(rst, clk, clk4x, ce, ld, op, a, b, o, done, sign_exe, overflow, underflow);
 
parameter WID = 128;
localparam MSB = WID-1;
localparam EMSB = WID==128 ? 14 :
WID==96 ? 14 :
WID==80 ? 14 :
WID==64 ? 10 :
WID==52 ? 10 :
WID==48 ? 11 :
WID==44 ? 10 :
WID==42 ? 10 :
WID==40 ? 9 :
WID==32 ? 7 :
WID==24 ? 6 : 4;
localparam FMSB = WID==128 ? 111 :
WID==96 ? 79 :
WID==80 ? 63 :
WID==64 ? 51 :
WID==52 ? 39 :
WID==48 ? 34 :
WID==44 ? 31 :
WID==42 ? 29 :
WID==40 ? 28 :
WID==32 ? 22 :
WID==24 ? 15 : 9;
`include "fpSize.sv"
// FADD is a constant that makes the divider width a multiple of four and includes eight extra bits.
localparam FADD = WID==128 ? 9 :
WID==96 ? 9 :
WID==84 ? 9 :
WID==80 ? 9 :
WID==64 ? 13 :
WID==52 ? 9 :
75,10 → 54,9
WID==32 ? 10 :
WID==24 ? 9 : 11;
localparam FX = (FMSB+2)*2-1; // the MSB of the expanded fraction
localparam EX = FX + 1 + EMSB + 1 + 1 - 1;
input rst;
input clk;
input clk4x;
input ce;
input ld;
input op;
155,6 → 133,7
// Divider width must be a multiple of four
`ifndef GOLDSCHMIDT
fpdivr16 #(FMSB+FADD) u2 (.clk(clk), .ld(ld), .a({3'b0,fracta,8'b0}), .b({3'b0,fractb,8'b0}), .q(divo), .r(), .done(done1), .lzcnt(lzcnt));
//fpdivr2 #(FMSB+FADD) u2 (.clk4x(clk4x), .ld(ld), .a({3'b0,fracta,8'b0}), .b({3'b0,fractb,8'b0}), .q(divo), .r(), .done(done1), .lzcnt(lzcnt));
wire [(FMSB+FADD)*2-1:0] divo1 = divo[(FMSB+FADD)*2-1:0] << (lzcnt-2);
`else
DivGoldschmidt #(.WID(FMSB+6),.WHOLE(1),.POINTS(FMSB+5))
217,36 → 196,13
 
endmodule
 
module fpDivnr(rst, clk, ce, ld, op, a, b, o, rm, done, sign_exe, inf, overflow, underflow);
module fpDivnr(rst, clk, clk4x, ce, ld, op, a, b, o, rm, done, sign_exe, inf, overflow, underflow);
parameter WID=32;
localparam MSB = WID-1;
localparam EMSB = WID==128 ? 14 :
WID==96 ? 14 :
WID==80 ? 14 :
WID==64 ? 10 :
WID==52 ? 10 :
WID==48 ? 11 :
WID==44 ? 10 :
WID==42 ? 10 :
WID==40 ? 9 :
WID==32 ? 7 :
WID==24 ? 6 : 4;
localparam FMSB = WID==128 ? 111 :
WID==96 ? 79 :
WID==80 ? 63 :
WID==64 ? 51 :
WID==52 ? 39 :
WID==48 ? 34 :
WID==44 ? 31 :
WID==42 ? 29 :
WID==40 ? 28 :
WID==32 ? 22 :
WID==24 ? 15 : 9;
`include "fpSize.sv"
 
localparam FX = (FMSB+2)*2-1; // the MSB of the expanded fraction
localparam EX = FX + 1 + EMSB + 1 + 1 - 1;
input rst;
input clk;
input clk4x;
input ce;
input ld;
input op;
264,7 → 220,7
wire [MSB+3:0] fpn0;
wire done1;
 
fpDiv #(WID) u1 (rst, clk, ce, ld, op, a, b, o1, done1, sign_exe1, overflow1, underflow1);
fpDiv #(WID) u1 (rst, clk, clk4x, ce, ld, op, a, b, o1, done1, sign_exe1, overflow1, underflow1);
fpNormalize #(WID) u2(.clk(clk), .ce(ce), .under(underflow1), .i(o1), .o(fpn0) );
fpRoundReg #(WID) u3(.clk(clk), .ce(ce), .rm(rm), .i(fpn0), .o(o) );
delay2 #(1) u4(.clk(clk), .ce(ce), .i(sign_exe1), .o(sign_exe));
/fpFMA.v
46,33 → 46,8
 
module fpFMA (clk, ce, op, rm, a, b, c, o, inf);
parameter WID = 32;
localparam MSB = WID-1;
localparam EMSB = WID==128 ? 14 :
WID==96 ? 14 :
WID==80 ? 14 :
WID==64 ? 10 :
WID==52 ? 10 :
WID==48 ? 11 :
WID==44 ? 10 :
WID==42 ? 10 :
WID==40 ? 9 :
WID==32 ? 7 :
WID==24 ? 6 : 4;
localparam FMSB = WID==128 ? 111 :
WID==96 ? 79 :
WID==80 ? 63 :
WID==64 ? 51 :
WID==52 ? 39 :
WID==48 ? 34 :
WID==44 ? 31 :
WID==42 ? 29 :
WID==40 ? 28 :
WID==32 ? 22 :
WID==24 ? 15 : 9;
`include "fpSize.sv"
 
localparam FX = (FMSB+2)*2-1; // the MSB of the expanded fraction
localparam EX = FX + 1 + EMSB + 1 + 1 - 1;
 
input clk;
input ce;
input op; // operation 0 = add, 1 = subtract
154,7 → 129,58
 
reg [FX:0] fract5;
generate
if (WID==80) begin
if (WID==84) begin
reg [33:0] p00,p01,p02,p03;
reg [33:0] p10,p11,p12,p13;
reg [33:0] p20,p21,p22,p23;
reg [33:0] p30,p31,p32,p33;
reg [135:0] fract3a;
reg [135:0] fract3b;
reg [135:0] fract3c;
reg [135:0] fract3d;
reg [135:0] fract4a;
reg [135:0] fract4b;
 
always @(posedge clk)
if (ce) begin
p00 <= fracta1[16: 0] * fractb1[16: 0];
p01 <= fracta1[33:17] * fractb1[16: 0];
p02 <= fracta1[50:34] * fractb1[16: 0];
p03 <= fracta1[67:51] * fractb1[16: 0];
p10 <= fracta1[16: 0] * fractb1[33:17];
p11 <= fracta1[33:17] * fractb1[33:17];
p12 <= fracta1[50:34] * fractb1[33:17];
p13 <= fracta1[67:51] * fractb1[33:17];
 
p20 <= fracta1[16: 0] * fractb1[50:34];
p21 <= fracta1[33:17] * fractb1[50:34];
p22 <= fracta1[50:34] * fractb1[50:34];
p23 <= fracta1[67:51] * fractb1[50:34];
 
p30 <= fracta1[15: 0] * fractb1[67:51];
p31 <= fracta1[31:16] * fractb1[67:51];
p32 <= fracta1[47:32] * fractb1[67:51];
p33 <= fracta1[63:48] * fractb1[67:51];
end
always @(posedge clk)
if (ce) begin
fract3a <= {p33,p31,p20,p00};
fract3b <= {p32,p12,p10,17'b0} + {p23,p03,p01,17'b0};
fract3c <= {p22,p11,34'b0} + {p13,p02,34'b0};
fract3d <= {p12,51'b0} + {p03,51'b0};
end
always @(posedge clk)
if (ce) begin
fract4a <= fract3a + fract3b;
fract4b <= fract3c + fract3d;
end
always @(posedge clk)
if (ce) begin
fract5 <= fract4a + fract4b;
end
end
else if (WID==80) begin
reg [31:0] p00,p01,p02,p03;
reg [31:0] p10,p11,p12,p13;
reg [31:0] p20,p21,p22,p23;
506,13 → 532,17
reg [EMSB:0] ex9a;
reg ex_gt_xc9;
reg [EMSB:0] xc9;
reg a_gt_c9;
wire [FX:0] mo9;
wire [FMSB+1:0] fractc9;
wire under9;
wire xeq9;
 
always @(posedge clk)
if (ce) ex_gt_xc9 <= ex_gt_xc8;
always @(posedge clk)
if (ce) a_gt_c9 <= a_gt_b8;
always @(posedge clk)
if (ce) xc9 <= xc8;
always @(posedge clk)
if (ce) ex9a <= ex8;
520,6 → 550,7
delay3 #(FX+1) u93 (.clk(clk), .ce(ce), .i(mo6), .o(mo9));
delay3 #(FMSB+2) u94 (.clk(clk), .ce(ce), .i(fractc6), .o(fractc9));
delay3 u95 (.clk(clk), .ce(ce), .i(under6), .o(under9));
delay2 u96 (.clk(clk), .ce(ce), .i(xeq7), .o(xeq9));
 
always @(posedge clk)
if (ce) ex9 <= resZero8 ? 0 : ex_gt_xc8 ? ex8 : xc8;
556,6 → 587,7
// -----------------------------------------------------------
reg [EMSB:0] xdiff10;
reg [FX:0] mfs;
reg ops10;
 
always @(posedge clk)
if (ce) xdiff10 <= ex_gt_xc9 ? ex9a - xc9
562,10 → 594,17
: (under9 ? xc9 + ex9a : xc9 - ex9a);
 
// Determine which fraction to denormalize (the one with the
// smaller exponent is denormalized).
// smaller exponent is denormalized). If the exponents are equal
// denormalize the smaller fraction.
always @(posedge clk)
if (ce) mfs <= ex_gt_xc9 ? {4'b0,fractc9,{FMSB+1{1'b0}}} : mo9;
if (ce) mfs <=
xeq9 ? (a_gt_c9 ? {4'b0,fractc9,{FMSB+1{1'b0}}} : mo9)
: ex_gt_xc9 ? {4'b0,fractc9,{FMSB+1{1'b0}}} : mo9;
 
always @(posedge clk)
if (ce) ops10 <= xeq9 ? (a_gt_c9 ? 1'b1 : 1'b0)
: (ex_gt_xc9 ? 1'b1 : 1'b0);
 
// -----------------------------------------------------------
// Clock #11
// Limit the size of the shifter to only bits needed.
590,6 → 629,8
redor128 u121 (.a(xdif11), .b({mfs,2'b0}), .o(sticky) );
else if (WID==96)
redor96 u121 (.a(xdif11), .b({mfs,2'b0}), .o(sticky) );
else if (WID==84)
redor84 u121 (.a(xdif11), .b({mfs,2'b0}), .o(sticky) );
else if (WID==80)
redor80 u121 (.a(xdif11), .b({mfs,2'b0}), .o(sticky) );
else if (WID==64)
612,10 → 653,12
wire [FX:0] mo13;
wire ex_gt_xc13;
wire [FMSB+1:0] fractc13;
wire ops13;
 
delay4 #(FX+1) u131 (.clk(clk), .ce(ce), .i(mo9), .o(mo13));
delay4 u132 (.clk(clk), .ce(ce), .i(ex_gt_xc9), .o(ex_gt_xc13));
vtdl #(FMSB+2) u133 (.clk(clk), .ce(ce), .a(4'd3), .d(fractc9), .q(fractc13));
delay3 u134 (.clk(clk), .ce(ce), .i(ops10), .o(ops13));
 
always @(posedge clk)
if (ce) mfs13 <= ({mfs12,2'b0} >> xdif12)|sticky12;
630,9 → 673,9
vtdl #(1) u141 (.clk(clk), .ce(ce), .a(4'd5), .d(a_gt_b8), .q(a_gt_b14));
 
always @(posedge clk)
if (ce) oa <= ex_gt_xc13 ? {mo13,2'b00} : mfs13;
if (ce) oa <= ops13 ? {mo13,2'b00} : mfs13;
always @(posedge clk)
if (ce) ob <= ex_gt_xc13 ? mfs13 : {fractc13,{FMSB+1{1'b0}},2'b00};
if (ce) ob <= ops13 ? mfs13 : {fractc13,{FMSB+1{1'b0}},2'b00};
 
// -----------------------------------------------------------
// Clock #15
693,7 → 736,7
4'b01??: mo17 <= {1'b0,mo16};
4'b001?: mo17 <= {1'b0,fractc16[FMSB+1:0],{FMSB{1'b0}}};
4'b0001: mo17 <= 1'd0;
default: mo17 <= mab[FX+3:2]; // mab has an extra lead bit and two trailing bits
default: mo17 <= mab[FX+3:2]; // mab has two extra lead bits and two trailing bits
endcase
 
assign o = {so17,ex17,mo17};
710,32 → 753,8
 
module fpFMAnr(clk, ce, op, rm, a, b, c, o, sign_exe, inf, overflow, underflow);
parameter WID=32;
localparam MSB = WID-1;
localparam EMSB = WID==128 ? 14 :
WID==96 ? 14 :
WID==80 ? 14 :
WID==64 ? 10 :
WID==52 ? 10 :
WID==48 ? 11 :
WID==44 ? 10 :
WID==42 ? 10 :
WID==40 ? 9 :
WID==32 ? 7 :
WID==24 ? 6 : 4;
localparam FMSB = WID==128 ? 111 :
WID==96 ? 79 :
WID==80 ? 63 :
WID==64 ? 51 :
WID==52 ? 39 :
WID==48 ? 34 :
WID==44 ? 31 :
WID==42 ? 29 :
WID==40 ? 28 :
WID==32 ? 22 :
WID==24 ? 15 : 9;
`include "fpSize.sv"
 
localparam FX = (FMSB+2)*2-1; // the MSB of the expanded fraction
localparam EX = FX + 1 + EMSB + 1 + 1 - 1;
input clk;
input ce;
input op;
/fpLOOUnit.v
1,7 → 1,7
`timescale 1ns / 1ps
// ============================================================================
// __
// \\__/ o\ (C) 2006-2016 Robert Finch, Waterloo
// \\__/ o\ (C) 2006-2019 Robert Finch, Waterloo
// \ __ / All rights reserved.
// \/_// robfinch<remove>@finitron.ca
// ||
30,9 → 30,12
//
// ============================================================================
 
`define FLOAT 6'h36
`define FTOI 6'h02
`define ITOF 6'h03
`define FLT1 4'h1
`define FLT2 4'h2
`define FTOI 5'h02
`define ITOF 5'h03
`define TRUNC 5'h15
`define NXTAFT 5'h0B
 
module fpLOOUnit
#(parameter WID=32)
39,55 → 42,46
(
input clk,
input ce,
input [31:0] ir,
input [3:0] op4,
input [4:0] func5,
input [2:0] rm,
input [WID-1:0] a,
input [WID-1:0] b,
output reg [WID-1:0] o,
output done
);
localparam MSB = WID-1;
localparam EMSB = WID==128 ? 14 :
WID==96 ? 14 :
WID==80 ? 14 :
WID==64 ? 10 :
WID==52 ? 10 :
WID==48 ? 10 :
WID==44 ? 10 :
WID==42 ? 10 :
WID==40 ? 9 :
WID==32 ? 7 :
WID==24 ? 6 : 4;
localparam FMSB = WID==128 ? 111 :
WID==96 ? 79 :
WID==80 ? 63 :
WID==64 ? 51 :
WID==52 ? 39 :
WID==48 ? 35 :
WID==44 ? 31 :
WID==42 ? 29 :
WID==40 ? 28 :
WID==32 ? 22 :
WID==24 ? 15 : 9;
`include "fpSize.sv"
 
wire [WID-1:0] i2f_o;
wire [WID-1:0] f2i_o;
wire [5:0] op = ir[5:0];
wire [5:0] fn = ir[17:12];
wire [2:0] rm = ir[26:24];
wire [1:0] prec = ir[28:27];
wire [WID-1:0] trunc_o;
wire [WID-1:0] nxtaft_o;
 
delay1 u1 (.clk(clk), .ce(ce), .i(op==`FLOAT && (fn==`ITOF||fn==`FTOI)), .o(done) );
i2f #(WID) ui2fs (.clk(clk), .ce(ce), .rm(rm), .i(a), .o(i2f_o) );
f2i #(WID) uf2is (.clk(clk), .ce(ce), .i(a), .o(f2i_o) );
delay1 u1 (
.clk(clk),
.ce(ce),
.i((op4==`FLT1 && (func5==`ITOF||func5==`FTOI||func5==`TRUNC))||(op4==`FLT2 && (func5==`NXTAFT))),
.o(done) );
i2f #(WID-4) ui2fs (.clk(clk), .ce(ce), .rm(rm), .i(a[WID-1:4]), .o(i2f_o) );
f2i #(WID-4) uf2is (.clk(clk), .ce(ce), .i(a[WID-1:4]), .o(f2i_o) );
fpTrunc #(WID) urho1 (.clk(clk), .ce(ce), .i(a), .o(trunc_o), .overflow());
fpNextAfter #(WID-4) una1 (.clk(clk), .ce(ce), .a(a[WID-1:4]), .b(b[WID-1:4]), .o(nxtaft_o));
 
always @*
case (op)
`FLOAT:
case(fn)
`ITOF: o <= i2f_o;
`FTOI: o <= f2i_o;
default: o <= 0;
endcase
default: o <= 0;
endcase
case (op4)
`FLT1:
case(func5)
`ITOF: o <= {i2f_o,4'h0};
`FTOI: o <= {f2i_o,4'h0};
`TRUNC: o <= trunc_o;
default: o <= 0;
endcase
`FLT2:
case(func5)
`NXTAFT: o <= {nxtaft_o,4'h0};
default: o <= 0;
endcase
default: o <= 0;
endcase
 
endmodule
/fpMul.v
50,34 → 50,9
// ============================================================================
 
module fpMul (clk, ce, a, b, o, sign_exe, inf, overflow, underflow);
parameter WID = 128;
localparam MSB = WID-1;
localparam EMSB = WID==128 ? 14 :
WID==96 ? 14 :
WID==80 ? 14 :
WID==64 ? 10 :
WID==52 ? 10 :
WID==48 ? 11 :
WID==44 ? 10 :
WID==42 ? 10 :
WID==40 ? 9 :
WID==32 ? 7 :
WID==24 ? 6 : 4;
localparam FMSB = WID==128 ? 111 :
WID==96 ? 79 :
WID==80 ? 63 :
WID==64 ? 51 :
WID==52 ? 39 :
WID==48 ? 34 :
WID==44 ? 31 :
WID==42 ? 29 :
WID==40 ? 28 :
WID==32 ? 22 :
WID==24 ? 15 : 9;
parameter WID = 32;
`include "fpSize.sv"
 
localparam FX = (FMSB+2)*2-1; // the MSB of the expanded fraction
localparam EX = FX + 1 + EMSB + 1 + 1 - 1;
 
input clk;
input ce;
input [WID:1] a, b;
132,7 → 107,41
assign ex1 = (az|bz) ? 0 : (xa|a_dn) + (xb|b_dn) - bias;
 
generate
if (WID==64) begin
if (WID==80) begin
reg [31:0] p00,p01,p02,p03;
reg [31:0] p10,p11,p12,p13;
reg [31:0] p20,p21,p22,p23;
reg [31:0] p30,p31,p32,p33;
always @(posedge clk)
if (ce) begin
p00 <= fracta[15: 0] * fractb[15: 0];
p01 <= fracta[31:16] * fractb[15: 0];
p02 <= fracta[47:32] * fractb[15: 0];
p03 <= fracta[63:48] * fractb[15: 0];
p10 <= fracta[15: 0] * fractb[31:16];
p11 <= fracta[31:16] * fractb[31:16];
p12 <= fracta[47:32] * fractb[31:16];
p13 <= fracta[63:48] * fractb[31:16];
 
p20 <= fracta[15: 0] * fractb[47:32];
p21 <= fracta[31:16] * fractb[47:32];
p22 <= fracta[47:32] * fractb[47:32];
p23 <= fracta[63:48] * fractb[47:32];
 
p30 <= fracta[15: 0] * fractb[63:48];
p31 <= fracta[31:16] * fractb[63:48];
p32 <= fracta[47:32] * fractb[63:48];
p33 <= fracta[63:48] * fractb[63:48];
 
fract1 <= {p03,48'b0} + {p02,32'b0} + {p01,16'b0} + p00 +
{p13,64'b0} + {p12,48'b0} + {p11,32'b0} + {p10,16'b0} +
{p23,80'b0} + {p22,64'b0} + {p21,48'b0} + {p20,32'b0} +
{p33,96'b0} + {p32,80'b0} + {p31,64'b0} + {p30,48'b0}
;
end
end
else if (WID==64) begin
reg [35:0] p00,p01,p02;
reg [35:0] p10,p11,p12;
reg [35:0] p20,p21,p22;
167,9 → 176,6
end
end
else begin
reg [35:0] p00,p01,p02;
reg [35:0] p10,p11,p12;
reg [35:0] p20,p21,p22;
always @(posedge clk)
if (ce) begin
fract1a <= fracta * fractb;
238,34 → 244,13
 
endmodule
 
 
// Multiplier with normalization and rounding.
 
module fpMulnr(clk, ce, a, b, o, rm, sign_exe, inf, overflow, underflow);
parameter WID=32;
localparam MSB = WID-1;
localparam EMSB = WID==128 ? 14 :
WID==96 ? 14 :
WID==80 ? 14 :
WID==64 ? 10 :
WID==52 ? 10 :
WID==48 ? 11 :
WID==44 ? 10 :
WID==42 ? 10 :
WID==40 ? 9 :
WID==32 ? 7 :
WID==24 ? 6 : 4;
localparam FMSB = WID==128 ? 111 :
WID==96 ? 79 :
WID==80 ? 63 :
WID==64 ? 51 :
WID==52 ? 39 :
WID==48 ? 34 :
WID==44 ? 31 :
WID==42 ? 29 :
WID==40 ? 28 :
WID==32 ? 22 :
WID==24 ? 15 : 9;
`include "fpSize.sv"
 
localparam FX = (FMSB+2)*2-1; // the MSB of the expanded fraction
localparam EX = FX + 1 + EMSB + 1 + 1 - 1;
input clk;
input ce;
input [MSB:0] a, b;
289,15 → 274,3
delay2 #(1) u7(.clk(clk), .ce(ce), .i(underflow1), .o(underflow));
endmodule
 
module fpMul_tb();
reg clk;
 
initial begin
clk = 0;
end
always #10 clk <= ~clk;
 
fpMul u1 (.clk(clk), .ce(1'b1), .a(0), .b(0), .o(o1), .sign_exe(sgnx1), .inf(inf1), .overflow(of1), .underflow(uf1));
fpMul u2 (.clk(clk), .ce(1'b1), .a(0), .b(0), .o(o1), .sign_exe(sgnx1), .inf(inf1), .overflow(of1), .underflow(uf1));
 
endmodule
/fpNormalize.v
1,7 → 1,7
`timescale 1ns / 1ps
// ============================================================================
// __
// \\__/ o\ (C) 2006-2018 Robert Finch, Waterloo
// \\__/ o\ (C) 2006-2019 Robert Finch, Waterloo
// \ __ / All rights reserved.
// \/_// robfinch<remove>@finitron.ca
// ||
42,33 → 42,8
 
module fpNormalize(clk, ce, under, i, o);
parameter WID = 128;
localparam MSB = WID-1;
localparam EMSB = WID==128 ? 14 :
WID==96 ? 14 :
WID==80 ? 14 :
WID==64 ? 10 :
WID==52 ? 10 :
WID==48 ? 11 :
WID==44 ? 10 :
WID==42 ? 10 :
WID==40 ? 9 :
WID==32 ? 7 :
WID==24 ? 6 : 4;
localparam FMSB = WID==128 ? 111 :
WID==96 ? 79 :
WID==80 ? 63 :
WID==64 ? 51 :
WID==52 ? 39 :
WID==48 ? 34 :
WID==44 ? 31 :
WID==42 ? 29 :
WID==40 ? 28 :
WID==32 ? 22 :
WID==24 ? 15 : 9;
`include "fpSize.sv"
 
localparam FX = (FMSB+2)*2-1; // the MSB of the expanded fraction
localparam EX = FX + 1 + EMSB + 1 + 1 - 1;
 
input clk;
input ce;
input under;
80,14 → 55,16
 
wire so1 = i[EX]; // sign doesn't change
 
// Since the there are *two* whole digits in the incoming format
// Since the there are *three* whole digits in the incoming format
// the number of whole digits needs to be reduced. If the MSB is
// set, then increment the exponent and no shift is needed.
wire [EMSB:0] xo;
wire [EMSB:0] xo1a = i[EX-1:FX+1];
wire xInf = &xo1a & !under;
wire incExp1 = !xInf & i[FX];
wire [EMSB:0] xo1 = xo1a + incExp1;
wire xInf3 = &xo1a[EMSB:1] & !under;
wire incExp2 = !xInf3 & i[FX];
wire incExp1 = !xInf & i[FX-1];
wire [EMSB:0] xo1 = xo1a + (incExp2 ? 2'd2 : incExp1 ? 2'd1 : 2'd0);
wire [EMSB:0] xo2;
wire xInf1 = &xo1;
 
95,9 → 72,10
// shift mantissa left by one to reduce to a single whole digit
// if there is no exponent increment
wire [FMSB+4:0] mo;
wire [FMSB+4:0] mo1 = (xInf1 & incExp1) ? 0 :
incExp1 ? {i[FX:FMSB+1],|i[FMSB:0],1'b0} : // reduce mantissa size
{i[FX-1:FMSB],|i[FMSB-1:0],1'b0}; // reduce mantissa size
wire [FMSB+4:0] mo1 = ((xInf1 & (incExp1|incExp2))|(xInf3 & incExp2)) ? 0 :
incExp2 ? {i[FX:FMSB+1],|i[FMSB:0]} :
incExp1 ? {i[FX-1:FMSB],|i[FMSB-1:0]} : // reduce mantissa size
{i[FX-2:FMSB-1],|i[FMSB-2:0]}; // reduce mantissa size
wire [FMSB+4:0] mo2;
wire [7:0] leadingZeros2;
 
115,6 → 93,10
assign leadingZeros2[7] = 1'b0;
cntlz80Reg clz0 (.clk(clk), .ce(ce), .i({mo1,12'b0}), .o(leadingZeros2) );
end
else if (WID<=84) begin
assign leadingZeros2[7] = 1'b0;
cntlz96Reg clz0 (.clk(clk), .ce(ce), .i({mo1,24'b0}), .o(leadingZeros2) );
end
else if (WID<=96) begin
assign leadingZeros2[7] = 1'b0;
cntlz96Reg clz0 (.clk(clk), .ce(ce), .i({mo1,12'b0}), .o(leadingZeros2) );
/fpRes.sv
1,6 → 1,6
// ============================================================================
// __
// \\__/ o\ (C) 2019 Robert Finch, Waterloo
// \\__/ o\ (C) 2006-2019 Robert Finch, Waterloo
// \ __ / All rights reserved.
// \/_// robfinch<remove>@finitron.ca
// ||
21,32 → 21,11
//
// ============================================================================
//
module fpRes(clk, a, o);
module fpRes(clk, ce, a, o);
parameter WID = 128;
localparam MSB = WID-1;
localparam EMSB = WID==128 ? 14 :
WID==96 ? 14 :
WID==80 ? 14 :
WID==64 ? 10 :
WID==52 ? 10 :
WID==48 ? 11 :
WID==44 ? 10 :
WID==42 ? 10 :
WID==40 ? 9 :
WID==32 ? 7 :
WID==24 ? 6 : 4;
localparam FMSB = WID==128 ? 111 :
WID==96 ? 79 :
WID==80 ? 63 :
WID==64 ? 51 :
WID==52 ? 39 :
WID==48 ? 34 :
WID==44 ? 31 :
WID==42 ? 29 :
WID==40 ? 28 :
WID==32 ? 22 :
WID==24 ? 15 : 9;
`include "fpSize.sv"
input clk;
input ce;
input [WID-1:0] a;
output [WID-1:0] o;
 
1095,11 → 1074,11
reg [9:0] indexr;
reg [15:0] k0, k1;
always @(posedge clk)
indexr <= index;
if(ce) indexr <= index;
always @(posedge clk)
k0 <= k01[indexr][31:16];
if(ce) k0 <= k01[indexr][31:16];
always @(posedge clk)
k1 <= k01[indexr][15: 0];
if(ce) k1 <= k01[indexr][15: 0];
delay3 #(1) u2 (.clk(clk), .ce(1'b1), .i(sa), .o(sa3));
delay3 #(EMSB+1) u3 (.clk(clk), .ce(1'b1), .i(exp), .o(exp3));
wire [15:0] eps = ma[FMSB-10:FMSB-10-15];
1106,7 → 1085,7
wire [31:0] p = k1 * eps;
reg [15:0] r0;
always @(posedge clk)
r0 <= k0 - (p >> 26);
if(ce) r0 <= k0 - (p >> 26);
assign o = {sa3,exp3,r0[14:0],{FMSB+2-16{1'b0}}};
 
always @*
/fpRound.v
29,29 → 29,7
 
module fpRound(rm, i, o);
parameter WID = 128;
localparam MSB = WID-1;
localparam EMSB = WID==128 ? 14 :
WID==96 ? 14 :
WID==80 ? 14 :
WID==64 ? 10 :
WID==52 ? 10 :
WID==48 ? 11 :
WID==44 ? 10 :
WID==42 ? 10 :
WID==40 ? 9 :
WID==32 ? 7 :
WID==24 ? 6 : 4;
localparam FMSB = WID==128 ? 111 :
WID==96 ? 79 :
WID==80 ? 63 :
WID==64 ? 51 :
WID==52 ? 39 :
WID==48 ? 34 :
WID==44 ? 31 :
WID==42 ? 29 :
WID==40 ? 28 :
WID==32 ? 22 :
WID==24 ? 15 : 9;
`include "fpSize.sv"
 
input [2:0] rm; // rounding mode
input [MSB+3:0] i; // intermediate format input
113,29 → 91,7
 
module fpRoundReg(clk, ce, rm, i, o);
parameter WID = 128;
localparam MSB = WID-1;
localparam EMSB = WID==128 ? 14 :
WID==96 ? 14 :
WID==80 ? 14 :
WID==64 ? 10 :
WID==52 ? 10 :
WID==48 ? 11 :
WID==44 ? 10 :
WID==42 ? 10 :
WID==40 ? 9 :
WID==32 ? 7 :
WID==24 ? 6 : 4;
localparam FMSB = WID==128 ? 111 :
WID==96 ? 79 :
WID==80 ? 63 :
WID==64 ? 51 :
WID==52 ? 39 :
WID==48 ? 34 :
WID==44 ? 31 :
WID==42 ? 29 :
WID==40 ? 28 :
WID==32 ? 22 :
WID==24 ? 15 : 9;
`include "fpSize.sv"
 
input clk;
input ce;
/fpRsqrte.sv
36,29 → 36,7
 
module fpRsqrte(clk, ce, ld, a, o);
parameter WID = 80;
localparam MSB = WID-1;
localparam EMSB = WID==128 ? 14 :
WID==96 ? 14 :
WID==80 ? 14 :
WID==64 ? 10 :
WID==52 ? 10 :
WID==48 ? 11 :
WID==44 ? 10 :
WID==42 ? 10 :
WID==40 ? 9 :
WID==32 ? 7 :
WID==24 ? 6 : 4;
localparam FMSB = WID==128 ? 111 :
WID==96 ? 79 :
WID==80 ? 63 :
WID==64 ? 51 :
WID==52 ? 39 :
WID==48 ? 34 :
WID==44 ? 31 :
WID==42 ? 29 :
WID==40 ? 28 :
WID==32 ? 22 :
WID==24 ? 15 : 9;
`include "fpSize.sv"
input clk;
input ce;
input ld;
/fpSigmoid.v
43,29 → 43,7
 
module fpSigmoid(clk, ce, a, o);
parameter WID = 128;
localparam MSB = WID-1;
localparam EMSB = WID==128 ? 14 :
WID==96 ? 14 :
WID==80 ? 14 :
WID==64 ? 10 :
WID==52 ? 10 :
WID==48 ? 11 :
WID==44 ? 10 :
WID==42 ? 10 :
WID==40 ? 9 :
WID==32 ? 7 :
WID==24 ? 6 : 4;
localparam FMSB = WID==128 ? 111 :
WID==96 ? 79 :
WID==80 ? 63 :
WID==64 ? 51 :
WID==52 ? 39 :
WID==48 ? 34 :
WID==44 ? 31 :
WID==42 ? 29 :
WID==40 ? 28 :
WID==32 ? 22 :
WID==24 ? 15 : 9;
`include "fpSize.sv"
input clk;
input ce;
input [WID-1:0] a;
102,7 → 80,7
endgenerate
 
initial begin
`include "D:\Cores6\rtfItanium\v1\rtl\fpUnit\SigTbl.ver"
`include "D:\Cores6\nvio\v1\rtl\fpUnit\SigTbl.ver"
end
 
// Quickly multiply number by 64 (it is in range -8 to 8) then convert to integer to get
/fpSize.sv
0,0 → 1,30
// This file contains defintions for fields to ease dealing with different fp
// widths. Some of the code still needs to be modified to support widths
// other than standard 32,64 or 80 bit.
localparam MSB = WID-1;
localparam EMSB = WID==128 ? 14 :
WID==96 ? 14 :
WID==84 ? 14 :
WID==80 ? 14 :
WID==64 ? 10 :
WID==52 ? 10 :
WID==48 ? 10 :
WID==44 ? 10 :
WID==42 ? 10 :
WID==40 ? 9 :
WID==32 ? 7 :
WID==24 ? 6 : 4;
localparam FMSB = WID==128 ? 111 :
WID==96 ? 79 :
WID==84 ? 67 :
WID==80 ? 63 :
WID==64 ? 51 :
WID==52 ? 39 :
WID==48 ? 35 :
WID==44 ? 31 :
WID==42 ? 29 :
WID==40 ? 28 :
WID==32 ? 22 :
WID==24 ? 15 : 9;
localparam FX = (FMSB+2)*2; // the MSB of the expanded fraction
localparam EX = FX + 1 + EMSB + 1 + 1 - 1;
/fpSqrt.v
1,7 → 1,7
`timescale 1ns / 1ps
// ============================================================================
// __
// \\__/ o\ (C) 2018 Robert Finch, Waterloo
// \\__/ o\ (C) 2018-2019 Robert Finch, Waterloo
// \ __ / All rights reserved.
// \/_// robfinch<remove>@finitron.ca
// ||
29,42 → 29,21
//
// ============================================================================
 
module fpSqrt(rst, clk, ce, ld, a, o, done);
`include "fp_defines.v"
 
module fpSqrt(rst, clk, ce, ld, a, o, done, sqrinf, sqrneg);
parameter WID = 128;
localparam MSB = WID-1;
localparam EMSB = WID==128 ? 14 :
WID==96 ? 14 :
WID==80 ? 14 :
WID==64 ? 10 :
WID==52 ? 10 :
WID==48 ? 11 :
WID==44 ? 10 :
WID==42 ? 10 :
WID==40 ? 9 :
WID==32 ? 7 :
WID==24 ? 6 : 4;
localparam FMSB = WID==128 ? 111 :
WID==96 ? 79 :
WID==80 ? 63 :
WID==64 ? 51 :
WID==52 ? 39 :
WID==48 ? 34 :
WID==44 ? 31 :
WID==42 ? 29 :
WID==40 ? 28 :
WID==32 ? 22 :
WID==24 ? 15 : 9;
`include "fpSize.sv"
 
localparam FX = (FMSB+2)*2-1; // the MSB of the expanded fraction
localparam EX = FX + 1 + EMSB + 1 + 1 - 1;
 
input rst;
input clk;
input ce;
input ld;
input [MSB:0] a;
output [EX:0] o;
output reg [EX:0] o;
output done;
output sqrinf;
output sqrneg;
 
// registered outputs
reg sign_exe;
122,6 → 101,8
assign so = 1'b0; // square root of positive numbers only
assign xo = (ex1 >> 1) + (bias >> 1); // divide by 2 cuts the bias in half, so 1/2 of it is added back in.
assign mo = aNan ? {1'b1,a[FMSB:0],{FMSB+1{1'b0}}} : (sqrto << 36);
assign sqrinf = aInf;
assign sqrneg = !az & so;
 
wire [FMSB+2:0] fracta1 = ex1[0] ? {1'b0,fracta} << 1 : {2'b0,fracta};
 
136,38 → 117,21
.done(done)
);
 
assign o = aNan ? {sa,xa,mo} : {so,xo,mo};
always @*
casez({aNan,sqrinf,sqrneg})
3'b1??: o <= {sa,xa,mo};
3'b01?: o <= {sa,1'b1,qNaN|`QSQRTINF,{FMSB+1{1'b0}}};
3'b001: o <= {sa,1'b1,qNaN|`QSQRTNEG,{FMSB+1{1'b0}}};
default: o <= {so,xo,mo};
endcase
 
endmodule
 
module fpSqrtnr(rst, clk, ce, ld, a, o, rm, done, inf);
module fpSqrtnr(rst, clk, ce, ld, a, o, rm, done, inf, sqrinf, sqrneg);
parameter WID=32;
localparam MSB = WID-1;
localparam EMSB = WID==128 ? 14 :
WID==96 ? 14 :
WID==80 ? 14 :
WID==64 ? 10 :
WID==52 ? 10 :
WID==48 ? 11 :
WID==44 ? 10 :
WID==42 ? 10 :
WID==40 ? 9 :
WID==32 ? 7 :
WID==24 ? 6 : 4;
localparam FMSB = WID==128 ? 111 :
WID==96 ? 79 :
WID==80 ? 63 :
WID==64 ? 51 :
WID==52 ? 39 :
WID==48 ? 34 :
WID==44 ? 31 :
WID==42 ? 29 :
WID==40 ? 28 :
WID==32 ? 22 :
WID==24 ? 15 : 9;
`include "fpSize.sv"
 
localparam FX = (FMSB+2)*2-1; // the MSB of the expanded fraction
localparam EX = FX + 1 + EMSB + 1 + 1 - 1;
input rst;
input clk;
input ce;
177,6 → 141,8
input [2:0] rm;
output done;
output inf;
output sqrinf;
output sqrneg;
 
wire [EX:0] o1;
wire inf1;
183,7 → 149,7
wire [MSB+3:0] fpn0;
wire done1;
 
fpSqrt #(WID) u1 (rst, clk, ce, ld, a, o1, done1);
fpSqrt #(WID) u1 (rst, clk, ce, ld, a, o1, done1, sqrinf, sqrneg);
fpNormalize #(WID) u2(.clk(clk), .ce(ce), .under(1'b0), .i(o1), .o(fpn0) );
fpRoundReg #(WID) u3(.clk(clk), .ce(ce), .rm(rm), .i(fpn0), .o(o) );
delay2 #(1) u5(.clk(clk), .ce(ce), .i(inf1), .o(inf));
/fpTrunc.sv
36,29 → 36,7
output reg [WID-1:0] o,
output overflow
);
localparam MSB = WID-1;
localparam EMSB = WID==128 ? 14 :
WID==96 ? 14 :
WID==80 ? 14 :
WID==64 ? 10 :
WID==52 ? 10 :
WID==48 ? 11 :
WID==44 ? 10 :
WID==42 ? 10 :
WID==40 ? 9 :
WID==32 ? 7 :
WID==24 ? 6 : 4;
localparam FMSB = WID==128 ? 111 :
WID==96 ? 79 :
WID==80 ? 63 :
WID==64 ? 51 :
WID==52 ? 39 :
WID==48 ? 34 :
WID==44 ? 31 :
WID==42 ? 29 :
WID==40 ? 28 :
WID==32 ? 22 :
WID==24 ? 15 : 9;
`include "fpSize.sv"
 
integer n;
wire [MSB:0] maxInt = {MSB{1'b1}}; // maximum unsigned integer value
/fpUnit.v
1,6 → 1,6
// ============================================================================
// __
// \\__/ o\ (C) 2006-2018 Robert Finch, Waterloo
// \\__/ o\ (C) 2006-2019 Robert Finch, Waterloo
// \ __ / All rights reserved.
// \/_// robfinch<remove>@finitron.ca
// ||
19,7 → 19,6
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//
//
// DSD
// fpUnit.v
// - floating point unit
// - parameterized width
30,6 → 29,8
// 31'h7FC00002 - infinity / infinity
// 31'h7FC00003 - zero / zero
// 31'h7FC00004 - infinity X zero
// 31'h7FC00005 - square root of infinity
// 31'h7FC00006 - square root of negative number
//
// Whenever the fpu encounters a NaN input, the NaN is
// passed through to the output.
82,96 → 83,67
`define VITOF 6'h25
`define VFMUL 6'h3A
`define VFDIV 6'h3E
`define FLOAT 6'h0B
`define FMOV 6'h10
`define FTOI 6'h12
`define ITOF 6'h13
`define FNEG 6'h14
`define FABS 6'h15
`define FSIGN 6'h16
`define FMAN 6'h17
`define FNABS 6'h18
`define FCVTSD 6'h19
`define FCVTSQ 6'h1B
`define FSTAT 6'h1C
`define FTX 6'h20
`define FCX 6'h21
`define FEX 6'h22
`define FDX 6'h23
`define FRM 6'h24
`define FCVTDS 6'h29
`define FLOAT 6'h0F
`define FLT1 4'h1
`define FLT2 4'h2
`define FLT3 4'h3
`define FLT1A 4'h5
`define FLT2LI 4'hA
`define FMA 5'h00
`define FMS 5'h01
`define FNMA 5'h02
`define FNMS 5'h03
`define FMOV 5'h00
`define FTOI 5'h02
`define ITOF 5'h03
`define FNEG 5'h04
`define FABS 5'h05
`define FSIGN 5'h06
`define FMAN 5'h07
`define FNABS 5'h08
`define FCVTSD 5'h09
//`define FCVTSQ 6'h1B
`define FSTAT 5'h0C
`define FSQRT 5'h0D
`define FTX 5'h10
`define FCX 5'h11
`define FEX 5'h12
`define FDX 5'h13
`define FRM 5'h14
`define TRUNC 5'h15
`define FCVTDS 5'h19
 
`define FADD 6'h04
`define FSUB 6'h05
`define FCMP 6'h06
`define FMUL 6'h08
`define FDIV 6'h09
`define FSCALEB 5'h00
`define FADD 5'h04
`define FSUB 5'h05
`define FCMP 5'h06
`define FMUL 5'h08
`define FDIV 5'h09
`define FREM 5'h0A
`define NXTAFT 5'h0B
// FLT1A
`define FRES 5'h00
 
`define QINFOS 23'h7FC000 // info
`define QSUBINFS 31'h7FC00001 // - infinity - infinity
`define QINFDIVS 31'h7FC00002 // - infinity / infinity
`define QZEROZEROS 31'h7FC00003 // - zero / zero
`define QINFZEROS 31'h7FC00004 // - infinity X zero
`include "fp_defines.v"
 
`define QINFOD 52'hFF80000000000 // info
`define QSUBINFD 63'h7FF0000000000001 // - infinity - infinity
`define QINFDIVD 63'h7FF0000000000002 // - infinity / infinity
`define QZEROZEROD 63'h7FF0000000000003 // - zero / zero
`define QINFZEROD 63'h7FF0000000000004 // - infinity X zero
 
`define QINFODX 64'hFF800000_00000000 // info
`define QSUBINFDX 79'h7FFF000000_0000000001 // - infinity - infinity
`define QINFDIVDX 79'h7FFF000000_0000000002 // - infinity / infinity
`define QZEROZERODX 79'h7FFF000000_0000000003 // - zero / zero
`define QINFZERODX 79'h7FFF000000_0000000004 // - infinity X zero
 
`define QINFOQ 112'hFF800000_0000000000_0000000000 // info
`define QSUBINFQ 127'h7F_FF00000000_0000000000_0000000001 // - infinity - infinity
`define QINFDIVQ 127'h7F_FF00000000_0000000000_0000000002 // - infinity / infinity
`define QZEROZEROQ 127'h7F_FF00000000_0000000000_0000000003 // - zero / zero
`define QINFZEROQ 127'h7F_FF00000000_0000000000_0000000004 // - infinity X zero
 
module fpUnit(rst, clk, ce, ir, ld, a, b, imm, o, csr_i, status, exception, done, rm
module fpUnit(rst, clk, clk4x, ce, ir, ld, a, b, c, imm, o, csr_i, status, exception, done, rm
);
 
parameter WID = 64;
localparam MSB = WID-1;
localparam EMSB = WID==128 ? 14 :
WID==96 ? 14 :
WID==80 ? 14 :
WID==64 ? 10 :
WID==52 ? 10 :
WID==48 ? 10 :
WID==44 ? 10 :
WID==42 ? 10 :
WID==40 ? 9 :
WID==32 ? 7 :
WID==24 ? 6 : 4;
localparam FMSB = WID==128 ? 111 :
WID==96 ? 79 :
WID==80 ? 63 :
WID==64 ? 51 :
WID==52 ? 39 :
WID==48 ? 35 :
WID==44 ? 31 :
WID==42 ? 29 :
WID==40 ? 28 :
WID==32 ? 22 :
WID==24 ? 15 : 9;
`include "fpSize.sv"
localparam EMSBS = 7;
localparam FMSBS = 22;
localparam FX = (FMSB+2)*2-1; // the MSB of the expanded fraction
localparam EX = FX + 1 + EMSB + 1 + 1 - 1;
localparam FXS = (FMSBS+2)*2-1; // the MSB of the expanded fraction
localparam EXS = FXS + 1 + EMSBS + 1 + 1 - 1;
 
input rst;
input clk;
input clk4x;
input ce;
input [31:0] ir;
input [39:0] ir;
input ld;
input [MSB:0] a;
input [MSB:0] b;
input [MSB:0] c;
input [5:0] imm;
output tri [MSB:0] o;
input [31:0] csr_i;
181,7 → 153,10
input [2:0] rm;
 
reg [7:0] fpcnt;
assign done = fpcnt==8'h00;
wire rem_done;
wire rem_ld;
wire op_done = fpcnt==8'h00;
assign done = op_done & rem_done;
 
//------------------------------------------------------------
// constants
200,24 → 175,53
wire isNan,isNans;
wire nanx,nanxs;
 
// Decode fp operation
wire latch_res;
wire [3:0] op4_r;
wire [5:0] func6b_r;
wire [2:0] srca;
wire [2:0] srcb;
wire [3:0] op4_i = ir[9:6];
wire [5:0] op = ir[5:0];
wire [5:0] func6b = ir[31:26];
wire [1:0] prec = ir[25:24];
wire [4:0] func6b_i = ir[39:35];
wire fprem = {op4_i,func6b_i} == {`FLT2,`FREM};
wire [3:0] op4 = fprem ? op4_r : op4_i;
wire [5:0] func6b = fprem ? func6b_r : func6b_i;
wire [2:0] insn_rm = ir[30:28];
reg [WID-1:0] res;
reg [WID-1:0] aop, bop;
always @*
case(srca)
`RES: aop <= res;
default: aop <= a;
endcase
always @*
case(srcb)
`RES: bop <= res;
`POINT5:
case(WID)
32: bop <= `POINT5S;
40: bop <= `POINT5SX;
64: bop <= `POINT5D;
80: bop <= `POINT5DX;
endcase
default: bop <= b;
endcase
 
wire fstat = {op,func6b} == {`FLOAT,`FSTAT}; // get status
wire fdiv = {op,func6b} == {`FLOAT,`FDIV};
wire ftx = {op,func6b} == {`FLOAT,`FTX}; // trigger exception
wire fcx = {op,func6b} == {`FLOAT,`FCX}; // clear exception
wire fex = {op,func6b} == {`FLOAT,`FEX}; // enable exception
wire fdx = {op,func6b} == {`FLOAT,`FDX}; // disable exception
wire fcmp = {op,func6b} == {`FLOAT,`FCMP};
wire frm = {op,func6b} == {`FLOAT,`FRM}; // set rounding mode
wire [2:0] prec = 3'd4;//ir[25:24];
 
wire zl_op = (op==`FLOAT && (
(func6b==`FABS || func6b==`FNABS || func6b==`FMOV || func6b==`FNEG || func6b==`FSIGN || func6b==`FMAN || func6b==`FCVTSQ)) ||
wire fstat = {op4,func6b} == {`FLT1,`FSTAT}; // get status
wire fdiv = {op4,func6b} == {`FLT2,`FDIV};
wire ftx = {op4,func6b} == {`FLT1,`FTX}; // trigger exception
wire fcx = {op4,func6b} == {`FLT1,`FCX}; // clear exception
wire fex = {op4,func6b} == {`FLT1,`FEX}; // enable exception
wire fdx = {op4,func6b} == {`FLT1,`FDX}; // disable exception
wire fcmp = {op4,func6b} == {`FLT2,`FCMP};
wire frm = {op4,func6b} == {`FLT1,`FRM}; // set rounding mode
 
wire zl_op = (op4==`FLT1 && (
(func6b==`FABS || func6b==`FNABS || func6b==`FMOV || func6b==`FNEG || func6b==`FSIGN || func6b==`FMAN)) ||
func6b==`FCMP);
wire loo_op = (op==`FLOAT && (func6b==`ITOF || func6b==`FTOI));
wire loo_op = (op4==`FLT1 && (func6b==`ITOF || func6b==`FTOI));
wire loo_done;
 
wire subinf;
287,10 → 291,11
reg subinfx; // subtraction of infinities
reg snanx; // signalling nan
 
wire fdivs = 1'b0;
wire divDone;
wire pipe_ce = ce;// & divDone; // divide must be done in order for pipe to clock
wire precmatch = WID==32 ? ir[28:27]==2'b00 :
WID==64 ? ir[28:27]==2'b01 : 1;
wire precmatch = 1'b0;//WID==32 ? ir[28:27]==2'b00 :
//WID==64 ? ir[28:27]==2'b01 : 1;
/*
WID==80 ? ir[28:27]==2'b10 :
ir[28:27]==2'b11;
359,25 → 364,29
wire aNan, bNan, aNans, bNans;
wire az, bz, azs, bzs;
wire [2:0] rmd4; // 1st stage delayed
wire [5:0] op1, op2;
wire [3:0] op2;
wire [5:0] op1;
wire [5:0] fn2;
 
wire [MSB:0] zld_o,lood_o;
wire [31:0] zls_o,loos_o;
wire [WID-1:0] zlq_o, looq_o;
fpZLUnit #(WID) u6 (.ir(ir), .a(a), .b(b), .o(zlq_o), .nanx(nanx) );
fpLOOUnit #(WID) u7 (.clk(clk), .ce(pipe_ce), .ir(ir), .a(a), .o(looq_o), .done() );
wire [WID-1:0] scaleb_o;
fpZLUnit #(WID) u6 (.ir(ir), .op4(op4), .func5(func6b), .a(aop), .b(bop), .c(c), .o(zlq_o), .nanx(nanx) );
fpLOOUnit #(WID) u7 (.clk(clk), .ce(pipe_ce), .op4(op4), .func5(func6b), .rm(insn_rm==3'b111 ? rm : insn_rm), .a(aop), .b(bop), .o(looq_o), .done() );
fpScaleb u16 (.clk(clk), .ce(pipe_ce), .a(aop), .b(bop), .o(scaleb_o));
 
//fpLOOUnit #(32) u7s (.clk(clk), .ce(pipe_ce), .rm(rm), .op(op), .fn(fn), .a(a[31:0]), .o(loos_o), .done() );
 
fp_decomp #(WID) u1 (.i(a), .sgn(sa), .man(ma), .vz(az), .inf(aInf), .nan(aNan) );
fp_decomp #(WID) u2 (.i(b), .sgn(sb), .man(mb), .vz(bz), .inf(bInf), .nan(bNan) );
fp_decomp #(WID) u1 (.i(aop), .sgn(sa), .man(ma), .vz(az), .inf(aInf), .nan(aNan) );
fp_decomp #(WID) u2 (.i(bop), .sgn(sb), .man(mb), .vz(bz), .inf(bInf), .nan(bNan) );
//fp_decomp #(32) u1s (.i(a[31:0]), .sgn(sas), .man(mas), .vz(azs), .inf(aInfs), .nan(aNans) );
//fp_decomp #(32) u2s (.i(b[31:0]), .sgn(sbs), .man(mbs), .vz(bzs), .inf(bInfs), .nan(bNans) );
 
wire [2:0] rmd = ir[26:24]==3'b111 ? rm : ir[26:24];
wire [2:0] rmd = ir[30:28]==3'b111 ? rm : ir[30:28];
delay4 #(3) u3 (.clk(clk), .ce(pipe_ce), .i(rmd), .o(rmd4) );
delay1 #(6) u4 (.clk(clk), .ce(pipe_ce), .i(func6b), .o(op1) );
delay2 #(6) u5 (.clk(clk), .ce(pipe_ce), .i(func6b), .o(op2) );
delay2 #(4) u5 (.clk(clk), .ce(pipe_ce), .i(op4), .o(op2) );
delay2 #(6) u5b (.clk(clk), .ce(pipe_ce), .i(func6b), .o(fn2) );
 
delay5 delay5_3(.clk(clk), .ce(pipe_ce), .i((bz & !aNan & fdiv)|(bzs & !aNans & fdivs)), .o(divByZero) );
405,6 → 414,7
wire [EX:0] fdiv_o;
wire [EX:0] fmul_o;
wire [EX:0] fas_o;
wire [EX:0] fsqrt_o;
reg [EX:0] fres;
wire [31:0] fpus_o;
wire [31+3:0] fpns_o;
411,14 → 421,40
wire [EXS:0] fdivs_o;
wire [EXS:0] fmuls_o;
wire [EXS:0] fass_o;
wire [EXS:0] fres_o;
reg [EXS:0] fress;
wire divUnder,divUnders;
wire mulUnder,mulUnders;
reg under,unders;
wire sqrneg;
wire fms = func6b==`FMS || func6b==`FNMS;
wire nma = func6b==`FNMA || func6b==`FNMS;
wire [WID-1:0] ma_aop = aop ^ (nma << WID-1);
 
fpAddsub #(WID) u10(.clk(clk), .ce(pipe_ce), .rm(rmd), .op(func6b[0]), .a(a), .b(b), .o(fas_o) );
fpDiv #(WID) u11(.clk(clk), .ce(pipe_ce), .ld(ld), .a(a), .b(b), .o(fdiv_o), .sign_exe(), .underflow(divUnder), .done(divDone) );
fpMul #(WID) u12(.clk(clk), .ce(pipe_ce), .a(a), .b(b), .o(fmul_o), .sign_exe(), .inf(), .underflow(mulUnder) );
fpAddsub #(WID) u10(.clk(clk), .ce(pipe_ce), .rm(rmd), .op(func6b[0]), .a(aop), .b(bop), .o(fas_o) );
fpDiv #(WID) u11(.clk(clk), .clk4x(clk4x), .ce(pipe_ce), .ld(ld|rem_ld), .a(aop), .b(bop), .o(fdiv_o), .sign_exe(), .underflow(divUnder), .done(divDone) );
fpMul #(WID) u12(.clk(clk), .ce(pipe_ce), .a(aop), .b(bop), .o(fmul_o), .sign_exe(), .inf(), .underflow(mulUnder) );
fpSqrt #(WID) u13(.rst(rst), .clk(clk4x), .ce(pipe_ce), .ld(ld), .a(aop), .o(fsqrt_o), .done(), .sqrinf(), .sqrneg(sqrneg) );
fpRes #(WID) u14(.clk(clk), .ce(pipe_ce), .a(aop), .o(fres_o));
fpFMA #(WID) u15(.clk(clk), .ce(pipe_ce), .op(fms), .rm(rmd), .a(ma_aop), .b(bop), .c(c), .o(fma_o), .inf());
 
fpRemainder ufpr1
(
.rst(rst),
.clk(clk),
.ce(ce),
.ld_i(ld),
.ld_o(rem_ld),
.op4_i(op4_i),
.funct6b_i(func6b_i),
.op4_o(op4_r),
.funct6b_o(func6b_r),
.op_done(op_done),
.rem_done(rem_done),
.srca(srca),
.srcb(srcb),
.latch_res(latch_res)
);
/*
fpAddsub #(32) u10s(.clk(clk), .ce(pipe_ce), .rm(rm), .op(op[0]), .a(a[31:0]), .b(b[31:0]), .o(fass_o) );
fpDiv #(32) u11s(.clk(clk), .ce(pipe_ce), .ld(ld), .a(a[31:0]), .b(b[31:0]), .o(fdivs_o), .sign_exe(), .underflow(divUnders), .done() );
426,39 → 462,44
*/
always @*
case(op2)
`FLOAT:
case (fn2)
`FMUL: under = mulUnder;
`FDIV: under = divUnder;
default: begin under = 0; unders = 0; end
`FLT2,`FLT2LI:
case (fn2)
`FMUL: under = mulUnder;
`FDIV: under = divUnder;
default: begin under = 0; unders = 0; end
endcase
`VECTOR:
case (fn2)
`VFMUL: under = mulUnder;
`VFDIV: under = divUnder;
default: begin under = 0; unders = 0; end
endcase
default: begin under = 0; unders = 0; end
endcase
 
always @*
case(op2)
`FLOAT:
case(fn2)
`FADD: fres <= fas_o;
`FSUB: fres <= fas_o;
`FMUL: fres <= fmul_o;
`FDIV: fres <= fdiv_o;
default: begin fres <= fas_o; fress <= fass_o; end
endcase
`VECTOR:
case(fn2)
`VFADD: fres <= fas_o;
`VFSUB: fres <= fas_o;
`VFMUL: fres <= fmul_o;
`VFDIV: fres <= fdiv_o;
default: begin fres <= fas_o; fress <= fass_o; end
endcase
`FLT3:
case(fn2)
`FMA: fres <= fma_o;
`FMS: fres <= fma_o;
`FNMA: fres <= fma_o;
`FNMS: fres <= fma_o;
default: fres <= fma_o;
endcase
`FLT2,`FLT2LI:
case(fn2)
`FADD: fres <= fas_o;
`FSUB: fres <= fas_o;
`FMUL: fres <= fmul_o;
`FDIV: fres <= fdiv_o;
`FSCALEB: fres <= scaleb_o;
default: begin fres <= fas_o; fress <= fass_o; end
endcase
`FLT1:
case(fn2)
`FSQRT: fres <= fsqrt_o;
default: begin fres <= 1'd0; fress <= 1'd0; end
endcase
`FLT1A:
case(fn2)
`FRES: fres <= fres_o;
default: begin fres <= 1'd0; fress <= 1'd0; end
endcase
default: begin fres <= fas_o; fress <= fass_o; end
endcase
 
502,8 → 543,8
gx,
sx,
1'b0, // cvtx
1'b0, // sqrtx
1'b0, // cvtx
sqrneg, // sqrtx
fcmp & nanx,
infzero,
zerozero,
512,12 → 553,15
isNan
};
 
assign o = (!fstat) ?
wire [WID-1:0] o1 =
(frm|fcx|fdx|fex) ? (a|imm) :
zl_op ? zlq_o :
loo_op ? looq_o :
{so,fpu_o[MSB-1:0]} : 'bz;
{so,fpu_o[MSB-1:0]};
assign zero = fpu_o[MSB-1:0]==0;
assign o = fprem ? res : o1;
always @(posedge clk)
if (ce & latch_res) res <= o1;
 
wire [7:0] maxdivcnt;
generate begin
527,7 → 571,7
assign infdiv = fpu_o[126:0]==`QINFDIVQ;
assign zerozero = fpu_o[126:0]==`QZEROZEROQ;
assign infzero = fpu_o[126:0]==`QINFZEROQ;
assign maxdivcnt = 8'd250;
assign maxdivcnt = 8'd128;
end
else if (WID==80) begin
assign inf = &fpu_o[78:64] && fpu_o[63:0]==0;
535,7 → 579,7
assign infdiv = fpu_o[78:0]==`QINFDIVDX;
assign zerozero = fpu_o[78:0]==`QZEROZERODX;
assign infzero = fpu_o[78:0]==`QINFZERODX;
assign maxdivcnt = 8'd136;
assign maxdivcnt = 8'd80;
end
else if (WID==64) begin
assign inf = &fpu_o[62:52] && fpu_o[51:0]==0;
543,7 → 587,7
assign infdiv = fpu_o[62:0]==`QINFDIVD;
assign zerozero = fpu_o[62:0]==`QZEROZEROD;
assign infzero = fpu_o[62:0]==`QINFZEROD;
assign maxdivcnt = 8'd112;
assign maxdivcnt = 8'd64;
end
else if (WID==32) begin
assign inf = &fpu_o[30:23] && fpu_o[22:0]==0;
551,7 → 595,7
assign infdiv = fpu_o[30:0]==`QINFDIVS;
assign zerozero = fpu_o[30:0]==`QZEROZEROS;
assign infzero = fpu_o[30:0]==`QINFZEROS;
assign maxdivcnt = 8'd54;
assign maxdivcnt = 8'd32;
end
end
endgenerate
561,43 → 605,55
// Generate a done signal. Latency varys depending on the instruction.
always @(posedge clk)
begin
if (rst)
fpcnt <= 8'h00;
else begin
if (ld)
case(ir[5:0])
`FLOAT:
begin
case(func6b)
`FABS,`FNABS,`FNEG,`FMAN,`FMOV,`FSIGN,
`FCVTSD,`FCVTSQ,`FCVTDS: begin fpcnt <= 8'd0; end
`FTOI: begin fpcnt <= 8'd1; end
`ITOF: begin fpcnt <= 8'd1; end
`FCMP: begin fpcnt <= 8'd0; end
`FADD: begin fpcnt <= 8'd8; end
`FSUB: begin fpcnt <= 8'd8; end
`FMUL: begin fpcnt <= 8'd10; end
`FDIV: begin fpcnt <= maxdivcnt; end
default: fpcnt <= 8'h00;
endcase
end
`VECTOR:
case(func6b)
`VFNEG: begin fpcnt <= 8'd0; end
`VFADD: begin fpcnt <= 8'd8; end
`VFSUB: begin fpcnt <= 8'd8; end
`VFSxx: begin fpcnt <= 8'd0; end
`VFMUL: begin fpcnt <= 8'd10; end
`VFDIV: begin fpcnt <= maxdivcnt; end
`VFTOI: begin fpcnt <= 8'd1; end
`VITOF: begin fpcnt <= 8'd1; end
default: fpcnt <= 8'h00;
endcase
default: fpcnt <= 8'h00;
endcase
else if (!done)
fpcnt <= fpcnt - 1;
end
if (rst)
fpcnt <= 8'h00;
else begin
if (ld|rem_ld)
case(op4)
`FLT3:
case(func6b)
`FMA: fpcnt <= 8'd22;
`FMS: fpcnt <= 8'd22;
`FNMA: fpcnt <= 8'd22;
`FNMS: fpcnt <= 8'd22;
default: fpcnt <= 8'd00;
endcase
`FLT2,`FLT2LI:
case(func6b)
`FCMP: begin fpcnt <= 8'd0; end
`FADD: begin fpcnt <= 8'd6; end
`FSUB: begin fpcnt <= 8'd6; end
`FMUL: begin fpcnt <= 8'd6; end
`FDIV: begin fpcnt <= maxdivcnt; end
`FREM: fpcnt <= maxdivcnt+8'd23;
`NXTAFT: fpcnt <= 8'd1;
`FSCALEB: fpcnt <= 8'd2;
default: fpcnt <= 8'h00;
endcase
`FLT1:
case(func6b)
`FABS,`FNABS,`FNEG,`FMAN,`FMOV,`FSIGN,
`FCVTSD,`FCVTDS: begin fpcnt <= 8'd0; end
`FTOI: begin fpcnt <= 8'd1; end
`ITOF: begin fpcnt <= 8'd1; end
`TRUNC: begin fpcnt <= 8'd1; end
`FSQRT: begin fpcnt <= maxdivcnt; end
default: fpcnt <= 8'h00;
endcase
`FLT1A:
case(func6b)
`FRES: fpcnt <= 8'h03;
default: fpcnt <= 8'h00;
endcase
default: fpcnt <= 8'h00;
endcase
else if (!op_done) begin
if ((op4==`FLT2||op4==`FLT2LI) && func6b==`FDIV && divDone)
fpcnt <= 8'h00;
else
fpcnt <= fpcnt - 1;
end
end
end
endmodule
 
/fpZLUnit.v
1,7 → 1,7
`timescale 1ns / 1ps
// ============================================================================
// __
// \\__/ o\ (C) 2007-2016 Robert Finch, Waterloo
// \\__/ o\ (C) 2007-2019 Robert Finch, Waterloo
// \ __ / All rights reserved.
// \/_// robfinch<remove>@finitron.ca
// ||
37,77 → 37,146
//
// ============================================================================
 
`define FLOAT 6'h36
`define FMOV 6'h00
`define FNEG 6'h04
`define FABS 6'h05
`define FSIGN 6'h06
`define FMAN 6'h07
`define FNABS 6'h08
`define FCVTSQ 6'h0B
`define FLOAT 4'h1
`define FLT1 4'h1
`define FLT2 4'h2
`define FLT3 4'h3
`define FANDI 4'hE
`define FORI 4'hF
 
`define FMAX 5'h10
`define FMIN 5'h11
`define FCMP 5'h06
`define FMOV 5'h00
`define FNEG 5'h04
`define FABS 5'h05
`define FSIGN 5'h06
`define FMAN 5'h07
`define FNABS 5'h08
`define FCVTSD 5'h09
`define F32TO80 5'h0A
`define ISNAN 5'h0E
`define CPYSGN 5'h0F // FLT2
`define FINITE 5'h0F // FLT1
//`define FCVTSQ 6'h1B
`define FCVTDS 5'h19
`define FSLT 5'h10
`define FSGE 5'h11
`define FSLE 5'h12
`define FSGT 5'h13
`define FSEQ 5'h14
`define FSNE 5'h15
`define FSUN 5'h16
`define F80TO32 5'h1A
`define UNORD 5'h1F
 
module fpZLUnit
#(parameter WID=32)
#(parameter WID=80)
(
input [31:0] ir,
input [WID-1:0] a,
input [WID-1:0] b, // for fcmp
output reg [WID-1:0] o,
output nanx
input [3:0] op4,
input [4:0] func5,
input [39:0] ir,
input [WID+3:0] a,
input [WID+3:0] b, // for fcmp
input [WID+3:0] c, // for fcmp
output reg [WID+3:0] o,
output reg nanx
);
localparam MSB = WID-1;
localparam EMSB = WID==128 ? 14 :
WID==96 ? 14 :
WID==80 ? 14 :
WID==64 ? 10 :
WID==52 ? 10 :
WID==48 ? 10 :
WID==44 ? 10 :
WID==42 ? 10 :
WID==40 ? 9 :
WID==32 ? 7 :
WID==24 ? 6 : 4;
localparam FMSB = WID==128 ? 111 :
WID==96 ? 79 :
WID==80 ? 63 :
WID==64 ? 51 :
WID==52 ? 39 :
WID==48 ? 35 :
WID==44 ? 31 :
WID==42 ? 29 :
WID==40 ? 28 :
WID==32 ? 22 :
WID==24 ? 15 : 9;
`include "fpSize.sv"
 
wire [5:0] op = ir[5:0];
wire [1:0] prec = ir[28:27];
wire [5:0] fn = ir[17:12];
wire [2:0] fn3 = ir[31:29];
//wire [1:0] prec = ir[25:24];
 
wire [3:0] cmp_o;
wire nanxab,nanxac,nanxbc;
wire nana;
wire [EMSB:0] expa;
wire [FMSB:0] ma;
wire xinfa;
wire [4:0] cmp_o, cmpac_o, cmpbc_o;
 
fp_cmp_unit #(WID) u1 (.a(a), .b(b), .o(cmp_o), .nanx(nanx) );
// Zero is being passed for b in some cases so the NaN must come from a if
// present.
fp_cmp_unit #(WID+4) u1 (.a(a), .b(b), .o(cmp_o), .nanx(nanxab) );
fp_cmp_unit #(WID+4) u2 (.a(a), .b(c), .o(cmpac_o), .nanx(nanxac) );
fp_cmp_unit #(WID+4) u3 (.a(b), .b(c), .o(cmpbc_o), .nanx(nanxbc) );
fpDecomp #(WID+4) u4 (.i(a), .sgn(), .exp(expa), .man(ma), .fract(), .xz(), .mz(), .vz(), .inf(), .xinf(xinfa), .qnan(), .snan(), .nan(nana));
wire [127:0] sq_o;
fcvtsq u2 (a, sq_o);
//fcvtsq u2 (a[31:0], sq_o);
wire [79:0] sdo;
fs2d u5 (a[43:4], sdo);
wire [39:0] dso;
fd2s u6 (a, dso);
wire [79:0] f32to80o;
wire [31:0] f80to32o;
F32ToF80 u7 (a[35:4], f32to80o);
F80ToF32 u8 (a[WID+3:4], f32to80o);
 
always @*
case(op)
`FLOAT:
case(fn3)
3'b000:
case(fn)
`FABS: o <= {1'b0,a[WID-2:0]}; // fabs
`FNABS: o <= {1'b1,a[WID-2:0]}; // fnabs
`FNEG: o <= {~a[WID-1],a[WID-2:0]}; // fneg
`FMOV: o <= a; // fmov
`FSIGN: o <= (a[WID-2:0]==0) ? 0 : {a[WID-1],1'b0,{EMSB{1'b1}},{FMSB+1{1'b0}}}; // fsign
`FMAN: o <= {a[WID-1],1'b0,{EMSB{1'b1}},a[FMSB:0]}; // fman
`FCVTSQ: o <= sq_o;
default: o <= 0;
endcase
// FCMP
3'b001: o <= cmp_o;
endcase
case(op4)
`FLT1:
case(func5)
`FABS: begin o <= {1'b0,a[WID-2:0]}; nanx <= nanxab; end
`FNABS: begin o <= {1'b1,a[WID-2:0]}; nanx <= nanxab; end
`FNEG: begin o <= {~a[WID-1],a[WID-2:0]}; nanx <= nanxab; end
`FMOV: begin o <= a; nanx <= nanxab; end
`FSIGN: begin o <= (a[WID-2:0]==0) ? 0 : {a[WID-1],1'b0,{EMSB{1'b1}},{FMSB+1{1'b0}}}; nanx <= 1'b0; end
`FMAN: begin o <= {a[WID-1],1'b0,{EMSB{1'b1}},a[FMSB:0]}; nanx <= 1'b0; end
//`FCVTSQ: o <= sq_o;
`FCVTSD: begin o <= {sdo,4'h0}; nanx <= nanxab; end
`FCVTDS: begin o <= {{40{dso[39]}},dso,4'h0}; nanx <= nanxab; end
`F32TO80: begin o <= {f32to80o,4'h0}; nanx <= nanxab; end
`F80TO32: begin o <= {f80to32o,4'h0}; nanx <= nanxab; end
`ISNAN: begin o <= nana; end
`FINITE: begin o <= !xinfa; end
`UNORD: begin o <= nanxab; end
default: o <= 0;
endcase
`FLT2:
case(func5)
`FCMP: begin o <= {cmp_o,4'h0}; nanx <= nanxab; end
`FSLT: begin o <= {cmp_o[1],4'h0}; nanx <= nanxab; end
`FSGE: begin o <= {~cmp_o[1],4'h0}; nanx <= nanxab; end
`FSLE: begin o <= {cmp_o[2],4'h0}; nanx <= nanxab; end
`FSGT: begin o <= ~{cmp_o[2],4'h0}; nanx <= nanxab; end
`FSEQ: begin o <= {cmp_o[0],4'h0}; nanx <= nanxab; end
`FSNE: begin o <= ~{cmp_o[0],4'h0}; nanx <= nanxab; end
`FSUN: begin o <= {cmp_o[4],4'h0}; nanx <= nanxab; end
`CPYSGN: begin o <= {b[WID+3],a[WID+2:0]}; end
default: o <= 0;
endcase
`FLT3:
case(func5)
`FMAX:
begin
o <= ~cmp_o[2] & ~cmpac_o[2] ? a : ~cmpbc_o[2] ? b : c;
nanx <= nanxab|nanxac|nanxbc;
end
`FMIN:
begin
o <= cmp_o[1] & cmpac_o[1] ? a : cmpbc_o[2] ? b : c;
nanx <= nanxab|nanxac|nanxbc;
end
default: o <= 0;
endcase
`FANDI:
begin
case(ir[32:31])
2'd0: o <= {a[23: 4] & {{58{1'b1}},ir[39:33],ir[30:16],4'h0}};
2'd1: o <= a[43:24] & {{36{1'b1}},ir[39:33],ir[30:16],{20{1'b1}}};
2'd2: o <= a[63:44] & {{14{1'b1}},ir[39:33],ir[30:16],{40{1'b1}}};
2'd3: o <= a[83:64] & {ir[39:33],ir[30:16],{60{1'b1}}};
endcase
nanx <= 1'b0;
end
`FORI:
begin
case(ir[32:31])
2'd0: o <= {a[23: 4] & {{58{1'b0}},ir[39:33],ir[30:16],4'h0}};
2'd1: o <= a[43:24] & {{36{1'b0}},ir[39:33],ir[30:16],{20{1'b0}}};
2'd2: o <= a[63:44] & {{14{1'b0}},ir[39:33],ir[30:16],{40{1'b0}}};
2'd3: o <= a[83:64] & {ir[39:33],ir[30:16],{60{1'b0}}};
endcase
nanx <= 1'b0;
end
default: o <= 0;
endcase
 
/fp_cmp_unit.v
29,29 → 29,7
 
module fp_cmp_unit(a, b, o, nanx);
parameter WID = 32;
localparam MSB = WID-1;
localparam EMSB = WID==128 ? 14 :
WID==96 ? 14 :
WID==80 ? 14 :
WID==64 ? 10 :
WID==52 ? 10 :
WID==48 ? 11 :
WID==44 ? 10 :
WID==42 ? 10 :
WID==40 ? 9 :
WID==32 ? 7 :
WID==24 ? 6 : 4;
localparam FMSB = WID==128 ? 111 :
WID==96 ? 79 :
WID==80 ? 63 :
WID==64 ? 51 :
WID==52 ? 39 :
WID==48 ? 34 :
WID==44 ? 31 :
WID==42 ? 29 :
WID==40 ? 28 :
WID==32 ? 22 :
WID==24 ? 15 : 9;
`include "fpSize.sv"
 
input [WID-1:0] a, b;
output [4:0] o;
/fp_decomp.v
27,33 → 27,9
// ============================================================================
 
module fp_decomp(i, sgn, exp, man, fract, xz, mz, vz, inf, xinf, qnan, snan, nan);
 
parameter WID=32;
`include "fpSize.sv"
 
localparam MSB = WID-1;
localparam EMSB = WID==128 ? 14 :
WID==96 ? 14 :
WID==80 ? 14 :
WID==64 ? 10 :
WID==52 ? 10 :
WID==48 ? 10 :
WID==44 ? 10 :
WID==42 ? 10 :
WID==40 ? 9 :
WID==32 ? 7 :
WID==24 ? 6 : 4;
localparam FMSB = WID==128 ? 111 :
WID==96 ? 79 :
WID==80 ? 63 :
WID==64 ? 51 :
WID==52 ? 39 :
WID==48 ? 35 :
WID==44 ? 31 :
WID==42 ? 29 :
WID==40 ? 28 :
WID==32 ? 22 :
WID==24 ? 15 : 9;
 
input [MSB:0] i;
 
output sgn;
/fp_defines.v
1,6 → 1,6
// ============================================================================
// __
// \\__/ o\ (C) 2006-2018 Robert Finch, Waterloo
// \\__/ o\ (C) 2006-2019 Robert Finch, Waterloo
// \ __ / All rights reserved.
// \/_// robfinch<remove>@finitron.ca
// ||
61,3 → 61,15
`define QINFZEROQ 127'h7F_FF00000000_0000000000_0000000004 // - infinity X zero
`define QSQRTINFQ 127'h7F_FF00000000_0000000000_0000000005 // - square root of infinity
`define QSQRTNEGQ 127'h7F_FF00000000_0000000000_0000000006 // - square root of negaitve number
 
`define POINT5S 32'h3F000000
`define POINT5SX 40'h3F80000000
`define POINT5D 64'h3FE0000000000000
`define POINT5DX 80'h3FFE0000000000000000
 
`define AIN 3'd0
`define BIN 3'd1
`define RES 3'd2
`define POINT5 3'd3
 
 
/fpdivr2.v
1,10 → 1,14
// ============================================================================
// __
// \\__/ o\ (C) 2006-2016 Robert Finch, Stratford
// \\__/ o\ (C) 2006-2018 Robert Finch, Waterloo
// \ __ / All rights reserved.
// \/_// robfinch<remove>@finitron.ca
// ||
//
// fpdivr2.v
// Radix 2 floating point divider primitive
//
//
// This source file is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published
// by the Free Software Foundation, either version 3 of the License, or
17,108 → 21,100
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//
// fpdivr2.v
// Radix 2 floating point divider primitive
//
//
// ============================================================================
//
module fpdivr2
#( parameter WID = 24 )
(
input clk,
input ld,
input [WID-1:0] a,
input [WID-1:0] b,
output reg [WID*2-1:0] q,
output [WID-1:0] r,
output done
);
localparam DMSB = WID-1;
 
reg [DMSB:0] rx [2:0]; // remainder holds
reg [DMSB:0] rxx;
reg [7:0] cnt; // iteration count
wire [DMSB:0] sdq;
wire [DMSB:0] sdr;
wire sdval = 1'b0;
wire sdbz;
reg willGo0;
//specialCaseDivider #(WID) u1 (.a(a), .b(b), .q(sdq), .r(sdr), .val(sdval), .dbz(sdbz) );
module fpdivr2(clk4x, ld, a, b, q, r, done, lzcnt);
parameter WID = 112;
parameter RADIX = 2;
localparam WID1 = WID;//((WID+2)/3)*3; // make width a multiple of three
localparam DMSB = WID1-1;
input clk4x;
input ld;
input [WID1-1:0] a;
input [WID1-1:0] b;
output reg [WID1*2-1:0] q = 0;
output reg [WID1-1:0] r = 0;
output reg done = 1'b0;
output reg [7:0] lzcnt;
 
initial begin
rx[0] = 0;
end
 
always @(posedge clk)
if (ld)
cnt <= sdval ? 8'b10000000 : WID*2-2;
else if (!done)
cnt <= cnt - 1;
reg [8:0] cnt; // iteration count
reg [WID1*2-1:0] qi = 0;
reg [DMSB+1:0] ri = 0;
wire b0;
reg gotnz; // got a non-zero bit
 
reg done1;
wire [7:0] maxcnt;
assign b0 = b <= ri;
wire [DMSB+1:0] r1 = b0 ? ri - b : ri;
assign maxcnt = WID1*2;
 
always @(posedge clk)
if (ld) begin
rxx <= 0;
if (sdval)
q <= {sdq,{WID{1'b0}}};
else
q <= {a,{WID{1'b0}}};
end
else if (!done) begin
willGo0 = {rxx ,q[WID*2-1 ]} > b;
rx[0] = willGo0 ? {rxx ,q[WID*2-1 ]} - b : {rxx ,q[WID*2-1 ]};
q[WID*2-1:1] <= q[WID*2-1-1:0];
q[0] <= willGo0;
rxx <= rx[0];
end
// Done pulse for external circuit. Must span over 1 1x clock so that it's
// recognized.
always @(posedge clk4x)
if (ld)
done <= 1'b0;
else if (cnt==9'h1FE)
done <= 1'b1;
else if (cnt==9'h1F7)
done <= 1'b0;
 
// correct remainder
assign r = sdval ? sdr : rx[2][DMSB] ? rx[2] + b : rx[2];
assign done = cnt[7];
// Internal done pulse
always @(posedge clk4x)
begin
done1 <= 1'b0;
if (ld)
done1 <= 1'b0;
else if (cnt==9'h1FF)
done1 <= 1'b1;
end
 
endmodule
always @(posedge clk4x)
if (ld)
cnt <= maxcnt;
else if (cnt != 9'h1F7)
cnt <= cnt - 8'd1;
 
/*
module fpdivr2_tb();
always @(posedge clk4x)
if (ld)
gotnz <= 1'b0;
else if (!cnt[8]) begin
if (b0)
gotnz <= 1'b1;
end
 
reg rst;
reg clk;
reg ld;
reg [6:0] cnt;
wire cnt81;
delay1 #(1) u1 (clk4x, 1'b1, cnt[8], cnt81);
 
wire ce = 1'b1;
wire [23:0] a = 24'h0_4000;
wire [23:0] b = 24'd101;
wire [45:0] q;
wire [23:0] r;
wire done;
always @(posedge clk4x)
if (ld)
lzcnt <= 8'h00;
else if (!cnt81) begin
if (b0==1'b0 && !gotnz)
lzcnt <= lzcnt + 8'd1;
end
 
initial begin
clk = 1;
rst = 0;
#100 rst = 1;
#100 rst = 0;
end
always @(posedge clk4x)
if (ld)
qi <= {3'b0,a,{WID1{1'b0}}};
else if (!cnt81)
qi[WID1*2-1:0] <= {qi[WID1*2-1-1:0],b0};
 
always #20 clk = ~clk; // 25 MHz
always @(posedge clk)
if (rst)
cnt <= 0;
else begin
ld <= 0;
cnt <= cnt + 1;
if (cnt == 3)
ld <= 1;
$display("ld=%b q=%h r=%h done=%b", ld, q, r, done);
end
always @(posedge clk4x)
if (ld)
ri <= 0;
else if (!cnt81)
ri <= {r1[DMSB:0],qi[WID1*2-1]};
 
fpdivr2 #(24) divu0(.clk(clk), .ld(ld), .a(a), .b(b), .q(q), .r(r), .done(done) );
always @(posedge clk4x)
if (done1)
q <= qi;
always @(posedge clk4x)
if (done1)
r <= ri;
 
endmodule
*/
 
 
/fpdivr8.v
1,12 → 1,13
// ============================================================================
// __
// \\__/ o\ (C) 2006-2016 Robert Finch, Waterloo
// \\__/ o\ (C) 2006-2018 Robert Finch, Waterloo
// \ __ / All rights reserved.
// \/_// robfinch<remove>@finitron.ca
// ||
//
// fpdivr8.v
// Radix 8 floating point divider primitive
// Radix8 doesn't work !!!!
// Radix 2 floating point divider primitive
//
//
// This source file is free software: you can redistribute it and/or modify
26,124 → 27,65
 
module fpdivr8(clk, ld, a, b, q, r, done, lzcnt);
parameter WID = 112;
parameter RADIX = 8;
localparam WID1 = WID;//((WID+2)/3)*3; // make width a multiple of three
localparam DMSB = WID1-1;
localparam DMSB = WID-1;
input clk;
input ld;
input [WID1-1:0] a;
input [WID1-1:0] b;
output reg [WID1*2-1:0] q;
output [WID1-1:0] r;
input [WID-1:0] a;
input [WID-1:0] b;
output reg [WID-1:0] q;
output [WID-1:0] r;
output reg done;
output reg [7:0] lzcnt;
 
 
wire [DMSB:0] rx [2:0]; // remainder holds
reg [DMSB:0] rxx;
reg [8:0] cnt; // iteration count
wire [DMSB:0] sdq;
wire [DMSB:0] sdr;
wire sdval;
wire sddbz;
reg [DMSB+1:0] ri = 0;
wire b0,b1,b2;
wire b0,b1,b2,b3;
wire [DMSB+1:0] r1,r2,r3;
reg gotnz;
 
specialCaseDivider #(WID1) u1 (.a(a), .b(b), .q(sdq), .val(sdval), .dbz(sdbz) );
 
wire [7:0] maxcnt;
wire [2:0] n1;
generate
assign maxcnt = WID/3+1;
assign b0 = b < rxx;
assign r1 = b0 ? rxx - b : rxx;
assign b1 = b < {r1,q[WID-1]};
assign r2 = b1 ? {r1,q[WID-1]} - b : {r1,q[WID-1]};
assign b2 = b < {r2,q[WID-2]};
assign r3 = b2 ? {r2,q[WID-2]} - b : {r2,q[WID-2]};
 
always @(posedge clk)
if (ld)
rxx <= {WID{1'b0}};
else if (!done)
rxx <= {r3,q[WID-3]};
 
always @(posedge clk)
begin
if (RADIX==8) begin
assign maxcnt = WID1*2/3+1;
assign b0 = b < rxx;
assign r1 = b0 ? rxx - b : rxx;
assign b1 = b < {r1,q[WID*2-1]};
assign r2 = b1 ? {r1,q[WID*2-1]} - b : {r1,q[WID*2-1]};
assign b2 = b < {r2,q[WID*2-1-1]};
assign r3 = b2 ? {r2,q[WID*2-1-1]} - b : {r2,q[WID*2-1-1]};
assign n1 = 2;
always @(posedge clk)
if (ld)
rxx <= 0;
else if (!done)
rxx <= {r3,q[WID*2-1]};
done <= 1'b0;
if (ld) begin
cnt <= maxcnt;
end
else if (cnt != 9'h1FE) begin
cnt <= cnt - 1;
if (cnt==9'h1FF)
done <= 1'b1;
end
end
else if (RADIX==2) begin
assign b0 = b <= ri;
assign r1 = b0 ? ri - b : ri;
assign maxcnt = WID1*2-1;
assign n1 = 0;
// assign rx[0] = rxx [DMSB] ? {rxx ,q[WID*2-1 ]} + b : {rxx ,q[WID*2-1 ]} - b;
end
end
endgenerate
 
always @(posedge clk)
begin
done <= 1'b0;
if (ld) begin
cnt <= sdval ? 9'h1FE : maxcnt;
done <= sdval;
end
else if (cnt != 9'h1FE) begin
cnt <= cnt - 1;
if (cnt==9'h1FF)
done <= 1'b1;
end
 
always @(posedge clk)
if (ld) begin
q <= a;
end
else if (!done) begin
q[WID-1:3] <= q[WID-4:0];
q[2] <= b0;
q[1] <= b1;
q[0] <= b2;
end
assign r = r3;
 
 
generate
begin
if (RADIX==8) begin
always @(posedge clk)
if (ld) begin
gotnz <= 1'b0;
lzcnt <= 8'h00;
if (sdval)
q <= {3'b0,sdq,{WID1{1'b0}}};
else
q <= {3'b0,a,{WID1{1'b0}}};
end
else if (!done) begin
q[WID1-1:3] <= q[WID1-1-3:0];
q[0] <= b0;
q[1] <= b1;
q[2] <= b2;
end
// correct remainder
assign r = sdval ? sdr : r3;
end
if (RADIX==2) begin
always @(posedge clk)
if (ld) begin
gotnz <= 1'b0;
lzcnt <= 8'h00;
ri <= 0;
if (sdval)
q <= {3'b0,sdq,{WID1{1'b0}}};
else
q <= {3'b0,a,{WID1{1'b0}}};
end
else if (cnt!=9'h1FE) begin
if (b0)
gotnz <= 1'b1;
if (b0==0 && !gotnz)
lzcnt <= lzcnt + 8'd1;
q[WID1*2-1:1] <= q[WID1*2-1-1:0];
q[0] <= b0;
ri <= {r1[DMSB:0],q[WID1*2-1]};
end
// correct remainder
assign r = sdval ? sdr : ri;
end
end
endgenerate
 
endmodule
 
 
/i2f.v
36,29 → 36,7
input [WID-1:0] i, // integer input
output [WID-1:0] o // float output
);
localparam MSB = WID-1;
localparam EMSB = WID==128 ? 14 :
WID==96 ? 14 :
WID==80 ? 14 :
WID==64 ? 10 :
WID==52 ? 10 :
WID==48 ? 10 :
WID==44 ? 10 :
WID==42 ? 10 :
WID==40 ? 9 :
WID==32 ? 7 :
WID==24 ? 6 : 4;
localparam FMSB = WID==128 ? 111 :
WID==96 ? 79 :
WID==80 ? 63 :
WID==64 ? 51 :
WID==52 ? 39 :
WID==48 ? 35 :
WID==44 ? 31 :
WID==42 ? 29 :
WID==40 ? 28 :
WID==32 ? 22 :
WID==24 ? 15 : 9;
`include "fpSize.sv"
 
wire [EMSB:0] zeroXp = {EMSB{1'b1}};
 
78,14 → 56,20
if (WID==128) begin
cntlz128Reg u4 (.clk(clk), .ce(ce), .i(imag1), .o(lz) );
end else if (WID==96) begin
cntlz96Reg u4 (.clk(clk), .ce(ce), .i(imag1), .o(lz) );
cntlz96Reg u4 (.clk(clk), .ce(ce), .i(imag1), .o(lz[6:0]) );
assign lz[7]=1'b0;
end else if (WID==84) begin
cntlz96Reg u4 (.clk(clk), .ce(ce), .i({imag1,12'hfff}), .o(lz[6:0]) );
assign lz[7]=1'b0;
end else if (WID==80) begin
cntlz80Reg u4 (.clk(clk), .ce(ce), .i(imag1), .o(lz) );
cntlz80Reg u4 (.clk(clk), .ce(ce), .i(imag1), .o(lz[6:0]) );
assign lz[7]=1'b0;
end else if (WID==64) begin
cntlz64Reg u4 (.clk(clk), .ce(ce), .i(imag1), .o(lz) );
cntlz64Reg u4 (.clk(clk), .ce(ce), .i(imag1), .o(lz[6:0]) );
assign lz[7]=1'b0;
end else begin
cntlz32Reg u4 (.clk(clk), .ce(ce), .i(imag1), .o(lz) );
assign lz[6]=1'b0;
cntlz32Reg u4 (.clk(clk), .ce(ce), .i(imag1), .o(lz[5:0]) );
assign lz[7:6]=2'b00;
end
endgenerate
 
124,6 → 108,7
reg [7:0] cnt;
wire [31:0] fo;
reg [31:0] i;
wire [79:0] fo80;
initial begin
clk = 1'b0;
cnt = 0;
140,6 → 125,6
endcase
 
i2f #(32) u1 (.clk(clk), .ce(1), .rm(2'd0), .i(i), .o(fo) );
i2f #(80) u2 (.clk(clk), .ce(1), .rm(2'd0), .i(i), .o(fo) );
i2f #(80) u2 (.clk(clk), .ce(1), .rm(2'd0), .i({{48{i[31]}},i}), .o(fo80) );
 
endmodule
/isqrt.v
1,7 → 1,7
`timescale 1ns / 1ps
// ============================================================================
// __
// \\__/ o\ (C) 2010-2018 Robert Finch, Waterloo
// \\__/ o\ (C) 2010-2019 Robert Finch, Waterloo
// \ __ / All rights reserved.
// \/_// robfinch<remove>@finitron.ca
// ||
8,6 → 8,9
//
// isqrt.v
// - integer square root
// - uses the standard long form calc.
// - geared towards use in an floating point unit
// - calculates to WID fractional precision (double width output)
//
//
// This source file is free software: you can redistribute it and/or modify
23,8 → 26,6
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//
// Floating Point Multiplier / Divider
//
// ============================================================================
 
module isqrt(rst, clk, ce, ld, a, o, done);
85,11 → 86,15
// Shift the root
root <= {root+doesGoInto,1'b0}; // root * 2 + 1/0
end
else
else begin
cnt <= 8'h00;
state <= DONE;
end
DONE:
begin
state <= IDLE;
cnt <= cnt + 8'd1;
if (cnt == 8'd6)
state <= IDLE;
end
endcase
end

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.