URL
https://opencores.org/ocsvn/ft816float/ft816float/trunk
Subversion Repositories ft816float
Compare Revisions
- This comparison shows the changes necessary to convert path
/ft816float/trunk/rtl/verilog
- from Rev 25 to Rev 26
- ↔ Reverse comparison
Rev 25 → Rev 26
/DivGoldschmidt.v
25,7 → 25,7
// |
// ============================================================================ |
// |
module DivGoldschmidt(rst, clk, ld, a, b, q, f0, done, lzcnt); |
module DivGoldschmidt(rst, clk, ld, a, b, q, done, lzcnt); |
parameter WID=32; |
parameter WHOLE=16; |
parameter POINTS=16; |
38,7 → 38,6
input [WID-1:0] a; |
input [WID-1:0] b; |
output reg [WID*2-1:0] q; |
output reg [SIZE-1:0] f0; |
output reg done; |
output reg [7:0] lzcnt; |
parameter IDLE = 2'd0; |
110,13 → 109,11
N <= {16'd0,a,{WHOLE{1'b0}}} << shft; |
D <= {16'd0,b,{WHOLE{1'd0}}} << shft; |
F <= {16'd2,{POINTS2{1'b0}}} - ({b,{WHOLE{1'd0}}} << shft); |
f0 <= {16'd2,{POINTS2{1'b0}}} - ({b,{WHOLE{1'd0}}} << shft); |
end |
else begin |
N <= {16'd0,a,{WHOLE{1'b0}}}; |
D <= {16'd0,b,{WHOLE{1'd0}}}; |
F <= {16'd2,{POINTS2{1'b0}}} - ({b,{WHOLE{1'd0}}}); |
f0 <= {16'd2,{POINTS2{1'b0}}} - ({b,{WHOLE{1'd0}}}); |
end |
end |
else begin |
123,7 → 120,6
N <= {16'd0,a,{WHOLE{1'b0}}} >> shft; |
D <= {16'd0,b,{WHOLE{1'd0}}} >> shft; |
F <= {16'd2,{POINTS2{1'b0}}} - ({b,{WHOLE{1'd0}}} >> shft); |
f0 <= {16'd2,{POINTS2{1'b0}}} - ({b,{WHOLE{1'd0}}} >> shft); |
end |
count <= 0; |
state <= DIV; |
158,7 → 154,6
reg ld; |
wire done; |
wire [WID*2-1:0] qo; |
wire [7:0] f0; |
reg [3:0] state; |
reg [3:0] a, b; |
reg [7:0] count; |
211,12 → 206,10
.b(b), |
// .imm(64'd123), |
.q(qo), |
.f0(f0), |
// .ro(ro), |
// .dvByZr(), |
.left_right(), |
.shift(), |
.done(done) |
.done(done), |
.lzcnt() |
); |
|
endmodule |
/F32ToF80.v
47,12 → 47,12
always @* |
begin |
// sign out always just = sign in |
signo = signi; |
signo <= signi; |
|
// special check for zero |
if (vz) begin |
expo <= 0; |
mano <= 0; |
expo <= 1'd0; |
mano <= 1'd0; |
end |
// convert infinity / nan |
// infinity in = infinity out |
/f2i.v
1,6 → 1,6
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2006-2016 Robert Finch, Waterloo |
// \\__/ o\ (C) 2006-2019 Robert Finch, Waterloo |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
39,31 → 39,8
output [WID-1:0] o, |
output overflow |
); |
localparam MSB = WID-1; |
localparam EMSB = WID==128 ? 14 : |
WID==96 ? 14 : |
WID==80 ? 14 : |
WID==64 ? 10 : |
WID==52 ? 10 : |
WID==48 ? 10 : |
WID==44 ? 10 : |
WID==42 ? 10 : |
WID==40 ? 9 : |
WID==32 ? 7 : |
WID==24 ? 6 : 4; |
localparam FMSB = WID==128 ? 111 : |
WID==96 ? 79 : |
WID==80 ? 63 : |
WID==64 ? 51 : |
WID==52 ? 39 : |
WID==48 ? 35 : |
WID==44 ? 31 : |
WID==42 ? 29 : |
WID==40 ? 28 : |
WID==32 ? 22 : |
WID==24 ? 15 : 9; |
`include "fpSize.sv" |
|
|
wire [MSB:0] maxInt = {MSB{1'b1}}; // maximum unsigned integer value |
wire [EMSB:0] zeroXp = {EMSB{1'b1}}; // simple constant - value of exp for zero |
|
/fpAddsub.v
31,33 → 31,8
|
module fpAddsub(clk, ce, rm, op, a, b, o); |
parameter WID = 128; |
localparam MSB = WID-1; |
localparam EMSB = WID==128 ? 14 : |
WID==96 ? 14 : |
WID==80 ? 14 : |
WID==64 ? 10 : |
WID==52 ? 10 : |
WID==48 ? 11 : |
WID==44 ? 10 : |
WID==42 ? 10 : |
WID==40 ? 9 : |
WID==32 ? 7 : |
WID==24 ? 6 : 4; |
localparam FMSB = WID==128 ? 111 : |
WID==96 ? 79 : |
WID==80 ? 63 : |
WID==64 ? 51 : |
WID==52 ? 39 : |
WID==48 ? 34 : |
WID==44 ? 31 : |
WID==42 ? 29 : |
WID==40 ? 28 : |
WID==32 ? 22 : |
WID==24 ? 15 : 9; |
`include "fpSize.sv" |
|
localparam FX = (FMSB+2)*2-1; // the MSB of the expanded fraction |
localparam EX = FX + 1 + EMSB + 1 + 1 - 1; |
|
input clk; // system clock |
input ce; // core clock enable |
input [2:0] rm; // rounding mode |
173,6 → 148,8
redor128 u1 (.a(xdif), .b({mfs,2'b0}), .o(sticky) ); |
else if (WID==96) |
redor96 u1 (.a(xdif), .b({mfs,2'b0}), .o(sticky) ); |
else if (WID==84) |
redor84 u1 (.a(xdif), .b({mfs,2'b0}), .o(sticky) ); |
else if (WID==80) |
redor80 u1 (.a(xdif), .b({mfs,2'b0}), .o(sticky) ); |
else if (WID==64) |
225,33 → 202,8
|
module fpAddsubnr(clk, ce, rm, op, a, b, o); |
parameter WID = 128; |
localparam MSB = WID-1; |
localparam EMSB = WID==128 ? 14 : |
WID==96 ? 14 : |
WID==80 ? 14 : |
WID==64 ? 10 : |
WID==52 ? 10 : |
WID==48 ? 11 : |
WID==44 ? 10 : |
WID==42 ? 10 : |
WID==40 ? 9 : |
WID==32 ? 7 : |
WID==24 ? 6 : 4; |
localparam FMSB = WID==128 ? 111 : |
WID==96 ? 79 : |
WID==80 ? 63 : |
WID==64 ? 51 : |
WID==52 ? 39 : |
WID==48 ? 34 : |
WID==44 ? 31 : |
WID==42 ? 29 : |
WID==40 ? 28 : |
WID==32 ? 22 : |
WID==24 ? 15 : 9; |
`include "fpSize.sv" |
|
localparam FX = (FMSB+2)*2-1; // the MSB of the expanded fraction |
localparam EX = FX + 1 + EMSB + 1 + 1 - 1; |
|
input clk; // system clock |
input ce; // core clock enable |
input [2:0] rm; // rounding mode |
/fpAddsub_L10.v
31,33 → 31,8
|
module fpAddsub_L10(clk, ce, rm, op, a, b, o); |
parameter WID = 128; |
localparam MSB = WID-1; |
localparam EMSB = WID==128 ? 14 : |
WID==96 ? 14 : |
WID==80 ? 14 : |
WID==64 ? 10 : |
WID==52 ? 10 : |
WID==48 ? 11 : |
WID==44 ? 10 : |
WID==42 ? 10 : |
WID==40 ? 9 : |
WID==32 ? 7 : |
WID==24 ? 6 : 4; |
localparam FMSB = WID==128 ? 111 : |
WID==96 ? 79 : |
WID==80 ? 63 : |
WID==64 ? 51 : |
WID==52 ? 39 : |
WID==48 ? 34 : |
WID==44 ? 31 : |
WID==42 ? 29 : |
WID==40 ? 28 : |
WID==32 ? 22 : |
WID==24 ? 15 : 9; |
`include "fpSize.sv" |
|
localparam FX = (FMSB+2)*2-1; // the MSB of the expanded fraction |
localparam EX = FX + 1 + EMSB + 1 + 1 - 1; |
|
input clk; // system clock |
input ce; // core clock enable |
input [2:0] rm; // rounding mode |
254,6 → 229,8
redor128 u1 (.a(xdif4), .b({mfs4,2'b0}), .o(sticky) ); |
else if (WID==96) |
redor96 u1 (.a(xdif4), .b({mfs4,2'b0}), .o(sticky) ); |
else if (WID==84) |
redor84 u1 (.a(xdif4), .b({mfs4,2'b0}), .o(sticky) ); |
else if (WID==80) |
redor80 u1 (.a(xdif4), .b({mfs4,2'b0}), .o(sticky) ); |
else if (WID==64) |
355,33 → 332,8
|
module fpAddsubnr_L10(clk, ce, rm, op, a, b, o); |
parameter WID = 128; |
localparam MSB = WID-1; |
localparam EMSB = WID==128 ? 14 : |
WID==96 ? 14 : |
WID==80 ? 14 : |
WID==64 ? 10 : |
WID==52 ? 10 : |
WID==48 ? 11 : |
WID==44 ? 10 : |
WID==42 ? 10 : |
WID==40 ? 9 : |
WID==32 ? 7 : |
WID==24 ? 6 : 4; |
localparam FMSB = WID==128 ? 111 : |
WID==96 ? 79 : |
WID==80 ? 63 : |
WID==64 ? 51 : |
WID==52 ? 39 : |
WID==48 ? 34 : |
WID==44 ? 31 : |
WID==42 ? 29 : |
WID==40 ? 28 : |
WID==32 ? 22 : |
WID==24 ? 15 : 9; |
`include "fpSize.sv" |
|
localparam FX = (FMSB+2)*2-1; // the MSB of the expanded fraction |
localparam EX = FX + 1 + EMSB + 1 + 1 - 1; |
|
input clk; // system clock |
input ce; // core clock enable |
input [2:0] rm; // rounding mode |
/fpDecompReg.v
1,7 → 1,7
`timescale 1ns / 1ps |
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2006-2016 Robert Finch, Waterloo |
// \\__/ o\ (C) 2006-2019 Robert Finch, Waterloo |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
27,33 → 27,9
// ============================================================================ |
|
module fpDecomp(i, sgn, exp, man, fract, xz, mz, vz, inf, xinf, qnan, snan, nan); |
|
parameter WID=32; |
`include "fpSize.sv" |
|
localparam MSB = WID-1; |
localparam EMSB = WID==128 ? 14 : |
WID==96 ? 14 : |
WID==80 ? 14 : |
WID==64 ? 10 : |
WID==52 ? 10 : |
WID==48 ? 10 : |
WID==44 ? 10 : |
WID==42 ? 10 : |
WID==40 ? 9 : |
WID==32 ? 7 : |
WID==24 ? 6 : 4; |
localparam FMSB = WID==128 ? 111 : |
WID==96 ? 79 : |
WID==80 ? 63 : |
WID==64 ? 51 : |
WID==52 ? 39 : |
WID==48 ? 35 : |
WID==44 ? 31 : |
WID==42 ? 29 : |
WID==40 ? 28 : |
WID==32 ? 22 : |
WID==24 ? 15 : 9; |
|
input [MSB:0] i; |
|
output sgn; |
87,33 → 63,9
|
|
module fpDecompReg(clk, ce, i, o, sgn, exp, man, fract, xz, mz, vz, inf, xinf, qnan, snan, nan); |
|
parameter WID=32; |
`include "fpSize.sv" |
|
localparam MSB = WID-1; |
localparam EMSB = WID==128 ? 14 : |
WID==96 ? 14 : |
WID==80 ? 14 : |
WID==64 ? 10 : |
WID==52 ? 10 : |
WID==48 ? 10 : |
WID==44 ? 10 : |
WID==42 ? 10 : |
WID==40 ? 9 : |
WID==32 ? 7 : |
WID==24 ? 6 : 4; |
localparam FMSB = WID==128 ? 111 : |
WID==96 ? 79 : |
WID==80 ? 63 : |
WID==64 ? 51 : |
WID==52 ? 39 : |
WID==48 ? 35 : |
WID==44 ? 31 : |
WID==42 ? 29 : |
WID==40 ? 28 : |
WID==32 ? 22 : |
WID==24 ? 15 : 9; |
|
input clk; |
input ce; |
input [MSB:0] i; |
/fpDiv.v
1,7 → 1,7
`timescale 1ns / 1ps |
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2006-2018 Robert Finch, Waterloo |
// \\__/ o\ (C) 2006-2019 Robert Finch, Waterloo |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
34,37 → 34,16
// ============================================================================ |
|
`include "fp_defines.v" |
`define GOLDSCHMIDT 1'b1 |
//`define GOLDSCHMIDT 1'b1 |
|
module fpDiv(rst, clk, ce, ld, op, a, b, o, done, sign_exe, overflow, underflow); |
module fpDiv(rst, clk, clk4x, ce, ld, op, a, b, o, done, sign_exe, overflow, underflow); |
|
parameter WID = 128; |
localparam MSB = WID-1; |
localparam EMSB = WID==128 ? 14 : |
WID==96 ? 14 : |
WID==80 ? 14 : |
WID==64 ? 10 : |
WID==52 ? 10 : |
WID==48 ? 11 : |
WID==44 ? 10 : |
WID==42 ? 10 : |
WID==40 ? 9 : |
WID==32 ? 7 : |
WID==24 ? 6 : 4; |
localparam FMSB = WID==128 ? 111 : |
WID==96 ? 79 : |
WID==80 ? 63 : |
WID==64 ? 51 : |
WID==52 ? 39 : |
WID==48 ? 34 : |
WID==44 ? 31 : |
WID==42 ? 29 : |
WID==40 ? 28 : |
WID==32 ? 22 : |
WID==24 ? 15 : 9; |
`include "fpSize.sv" |
// FADD is a constant that makes the divider width a multiple of four and includes eight extra bits. |
localparam FADD = WID==128 ? 9 : |
WID==96 ? 9 : |
WID==84 ? 9 : |
WID==80 ? 9 : |
WID==64 ? 13 : |
WID==52 ? 9 : |
75,10 → 54,9
WID==32 ? 10 : |
WID==24 ? 9 : 11; |
|
localparam FX = (FMSB+2)*2-1; // the MSB of the expanded fraction |
localparam EX = FX + 1 + EMSB + 1 + 1 - 1; |
input rst; |
input clk; |
input clk4x; |
input ce; |
input ld; |
input op; |
155,6 → 133,7
// Divider width must be a multiple of four |
`ifndef GOLDSCHMIDT |
fpdivr16 #(FMSB+FADD) u2 (.clk(clk), .ld(ld), .a({3'b0,fracta,8'b0}), .b({3'b0,fractb,8'b0}), .q(divo), .r(), .done(done1), .lzcnt(lzcnt)); |
//fpdivr2 #(FMSB+FADD) u2 (.clk4x(clk4x), .ld(ld), .a({3'b0,fracta,8'b0}), .b({3'b0,fractb,8'b0}), .q(divo), .r(), .done(done1), .lzcnt(lzcnt)); |
wire [(FMSB+FADD)*2-1:0] divo1 = divo[(FMSB+FADD)*2-1:0] << (lzcnt-2); |
`else |
DivGoldschmidt #(.WID(FMSB+6),.WHOLE(1),.POINTS(FMSB+5)) |
217,36 → 196,13
|
endmodule |
|
module fpDivnr(rst, clk, ce, ld, op, a, b, o, rm, done, sign_exe, inf, overflow, underflow); |
module fpDivnr(rst, clk, clk4x, ce, ld, op, a, b, o, rm, done, sign_exe, inf, overflow, underflow); |
parameter WID=32; |
localparam MSB = WID-1; |
localparam EMSB = WID==128 ? 14 : |
WID==96 ? 14 : |
WID==80 ? 14 : |
WID==64 ? 10 : |
WID==52 ? 10 : |
WID==48 ? 11 : |
WID==44 ? 10 : |
WID==42 ? 10 : |
WID==40 ? 9 : |
WID==32 ? 7 : |
WID==24 ? 6 : 4; |
localparam FMSB = WID==128 ? 111 : |
WID==96 ? 79 : |
WID==80 ? 63 : |
WID==64 ? 51 : |
WID==52 ? 39 : |
WID==48 ? 34 : |
WID==44 ? 31 : |
WID==42 ? 29 : |
WID==40 ? 28 : |
WID==32 ? 22 : |
WID==24 ? 15 : 9; |
`include "fpSize.sv" |
|
localparam FX = (FMSB+2)*2-1; // the MSB of the expanded fraction |
localparam EX = FX + 1 + EMSB + 1 + 1 - 1; |
input rst; |
input clk; |
input clk4x; |
input ce; |
input ld; |
input op; |
264,7 → 220,7
wire [MSB+3:0] fpn0; |
wire done1; |
|
fpDiv #(WID) u1 (rst, clk, ce, ld, op, a, b, o1, done1, sign_exe1, overflow1, underflow1); |
fpDiv #(WID) u1 (rst, clk, clk4x, ce, ld, op, a, b, o1, done1, sign_exe1, overflow1, underflow1); |
fpNormalize #(WID) u2(.clk(clk), .ce(ce), .under(underflow1), .i(o1), .o(fpn0) ); |
fpRoundReg #(WID) u3(.clk(clk), .ce(ce), .rm(rm), .i(fpn0), .o(o) ); |
delay2 #(1) u4(.clk(clk), .ce(ce), .i(sign_exe1), .o(sign_exe)); |
/fpFMA.v
46,33 → 46,8
|
module fpFMA (clk, ce, op, rm, a, b, c, o, inf); |
parameter WID = 32; |
localparam MSB = WID-1; |
localparam EMSB = WID==128 ? 14 : |
WID==96 ? 14 : |
WID==80 ? 14 : |
WID==64 ? 10 : |
WID==52 ? 10 : |
WID==48 ? 11 : |
WID==44 ? 10 : |
WID==42 ? 10 : |
WID==40 ? 9 : |
WID==32 ? 7 : |
WID==24 ? 6 : 4; |
localparam FMSB = WID==128 ? 111 : |
WID==96 ? 79 : |
WID==80 ? 63 : |
WID==64 ? 51 : |
WID==52 ? 39 : |
WID==48 ? 34 : |
WID==44 ? 31 : |
WID==42 ? 29 : |
WID==40 ? 28 : |
WID==32 ? 22 : |
WID==24 ? 15 : 9; |
`include "fpSize.sv" |
|
localparam FX = (FMSB+2)*2-1; // the MSB of the expanded fraction |
localparam EX = FX + 1 + EMSB + 1 + 1 - 1; |
|
input clk; |
input ce; |
input op; // operation 0 = add, 1 = subtract |
154,7 → 129,58
|
reg [FX:0] fract5; |
generate |
if (WID==80) begin |
if (WID==84) begin |
reg [33:0] p00,p01,p02,p03; |
reg [33:0] p10,p11,p12,p13; |
reg [33:0] p20,p21,p22,p23; |
reg [33:0] p30,p31,p32,p33; |
reg [135:0] fract3a; |
reg [135:0] fract3b; |
reg [135:0] fract3c; |
reg [135:0] fract3d; |
reg [135:0] fract4a; |
reg [135:0] fract4b; |
|
always @(posedge clk) |
if (ce) begin |
p00 <= fracta1[16: 0] * fractb1[16: 0]; |
p01 <= fracta1[33:17] * fractb1[16: 0]; |
p02 <= fracta1[50:34] * fractb1[16: 0]; |
p03 <= fracta1[67:51] * fractb1[16: 0]; |
|
p10 <= fracta1[16: 0] * fractb1[33:17]; |
p11 <= fracta1[33:17] * fractb1[33:17]; |
p12 <= fracta1[50:34] * fractb1[33:17]; |
p13 <= fracta1[67:51] * fractb1[33:17]; |
|
p20 <= fracta1[16: 0] * fractb1[50:34]; |
p21 <= fracta1[33:17] * fractb1[50:34]; |
p22 <= fracta1[50:34] * fractb1[50:34]; |
p23 <= fracta1[67:51] * fractb1[50:34]; |
|
p30 <= fracta1[15: 0] * fractb1[67:51]; |
p31 <= fracta1[31:16] * fractb1[67:51]; |
p32 <= fracta1[47:32] * fractb1[67:51]; |
p33 <= fracta1[63:48] * fractb1[67:51]; |
end |
always @(posedge clk) |
if (ce) begin |
fract3a <= {p33,p31,p20,p00}; |
fract3b <= {p32,p12,p10,17'b0} + {p23,p03,p01,17'b0}; |
fract3c <= {p22,p11,34'b0} + {p13,p02,34'b0}; |
fract3d <= {p12,51'b0} + {p03,51'b0}; |
end |
always @(posedge clk) |
if (ce) begin |
fract4a <= fract3a + fract3b; |
fract4b <= fract3c + fract3d; |
end |
always @(posedge clk) |
if (ce) begin |
fract5 <= fract4a + fract4b; |
end |
end |
else if (WID==80) begin |
reg [31:0] p00,p01,p02,p03; |
reg [31:0] p10,p11,p12,p13; |
reg [31:0] p20,p21,p22,p23; |
506,13 → 532,17
reg [EMSB:0] ex9a; |
reg ex_gt_xc9; |
reg [EMSB:0] xc9; |
reg a_gt_c9; |
wire [FX:0] mo9; |
wire [FMSB+1:0] fractc9; |
wire under9; |
wire xeq9; |
|
always @(posedge clk) |
if (ce) ex_gt_xc9 <= ex_gt_xc8; |
always @(posedge clk) |
if (ce) a_gt_c9 <= a_gt_b8; |
always @(posedge clk) |
if (ce) xc9 <= xc8; |
always @(posedge clk) |
if (ce) ex9a <= ex8; |
520,6 → 550,7
delay3 #(FX+1) u93 (.clk(clk), .ce(ce), .i(mo6), .o(mo9)); |
delay3 #(FMSB+2) u94 (.clk(clk), .ce(ce), .i(fractc6), .o(fractc9)); |
delay3 u95 (.clk(clk), .ce(ce), .i(under6), .o(under9)); |
delay2 u96 (.clk(clk), .ce(ce), .i(xeq7), .o(xeq9)); |
|
always @(posedge clk) |
if (ce) ex9 <= resZero8 ? 0 : ex_gt_xc8 ? ex8 : xc8; |
556,6 → 587,7
// ----------------------------------------------------------- |
reg [EMSB:0] xdiff10; |
reg [FX:0] mfs; |
reg ops10; |
|
always @(posedge clk) |
if (ce) xdiff10 <= ex_gt_xc9 ? ex9a - xc9 |
562,10 → 594,17
: (under9 ? xc9 + ex9a : xc9 - ex9a); |
|
// Determine which fraction to denormalize (the one with the |
// smaller exponent is denormalized). |
// smaller exponent is denormalized). If the exponents are equal |
// denormalize the smaller fraction. |
always @(posedge clk) |
if (ce) mfs <= ex_gt_xc9 ? {4'b0,fractc9,{FMSB+1{1'b0}}} : mo9; |
if (ce) mfs <= |
xeq9 ? (a_gt_c9 ? {4'b0,fractc9,{FMSB+1{1'b0}}} : mo9) |
: ex_gt_xc9 ? {4'b0,fractc9,{FMSB+1{1'b0}}} : mo9; |
|
always @(posedge clk) |
if (ce) ops10 <= xeq9 ? (a_gt_c9 ? 1'b1 : 1'b0) |
: (ex_gt_xc9 ? 1'b1 : 1'b0); |
|
// ----------------------------------------------------------- |
// Clock #11 |
// Limit the size of the shifter to only bits needed. |
590,6 → 629,8
redor128 u121 (.a(xdif11), .b({mfs,2'b0}), .o(sticky) ); |
else if (WID==96) |
redor96 u121 (.a(xdif11), .b({mfs,2'b0}), .o(sticky) ); |
else if (WID==84) |
redor84 u121 (.a(xdif11), .b({mfs,2'b0}), .o(sticky) ); |
else if (WID==80) |
redor80 u121 (.a(xdif11), .b({mfs,2'b0}), .o(sticky) ); |
else if (WID==64) |
612,10 → 653,12
wire [FX:0] mo13; |
wire ex_gt_xc13; |
wire [FMSB+1:0] fractc13; |
wire ops13; |
|
delay4 #(FX+1) u131 (.clk(clk), .ce(ce), .i(mo9), .o(mo13)); |
delay4 u132 (.clk(clk), .ce(ce), .i(ex_gt_xc9), .o(ex_gt_xc13)); |
vtdl #(FMSB+2) u133 (.clk(clk), .ce(ce), .a(4'd3), .d(fractc9), .q(fractc13)); |
delay3 u134 (.clk(clk), .ce(ce), .i(ops10), .o(ops13)); |
|
always @(posedge clk) |
if (ce) mfs13 <= ({mfs12,2'b0} >> xdif12)|sticky12; |
630,9 → 673,9
vtdl #(1) u141 (.clk(clk), .ce(ce), .a(4'd5), .d(a_gt_b8), .q(a_gt_b14)); |
|
always @(posedge clk) |
if (ce) oa <= ex_gt_xc13 ? {mo13,2'b00} : mfs13; |
if (ce) oa <= ops13 ? {mo13,2'b00} : mfs13; |
always @(posedge clk) |
if (ce) ob <= ex_gt_xc13 ? mfs13 : {fractc13,{FMSB+1{1'b0}},2'b00}; |
if (ce) ob <= ops13 ? mfs13 : {fractc13,{FMSB+1{1'b0}},2'b00}; |
|
// ----------------------------------------------------------- |
// Clock #15 |
693,7 → 736,7
4'b01??: mo17 <= {1'b0,mo16}; |
4'b001?: mo17 <= {1'b0,fractc16[FMSB+1:0],{FMSB{1'b0}}}; |
4'b0001: mo17 <= 1'd0; |
default: mo17 <= mab[FX+3:2]; // mab has an extra lead bit and two trailing bits |
default: mo17 <= mab[FX+3:2]; // mab has two extra lead bits and two trailing bits |
endcase |
|
assign o = {so17,ex17,mo17}; |
710,32 → 753,8
|
module fpFMAnr(clk, ce, op, rm, a, b, c, o, sign_exe, inf, overflow, underflow); |
parameter WID=32; |
localparam MSB = WID-1; |
localparam EMSB = WID==128 ? 14 : |
WID==96 ? 14 : |
WID==80 ? 14 : |
WID==64 ? 10 : |
WID==52 ? 10 : |
WID==48 ? 11 : |
WID==44 ? 10 : |
WID==42 ? 10 : |
WID==40 ? 9 : |
WID==32 ? 7 : |
WID==24 ? 6 : 4; |
localparam FMSB = WID==128 ? 111 : |
WID==96 ? 79 : |
WID==80 ? 63 : |
WID==64 ? 51 : |
WID==52 ? 39 : |
WID==48 ? 34 : |
WID==44 ? 31 : |
WID==42 ? 29 : |
WID==40 ? 28 : |
WID==32 ? 22 : |
WID==24 ? 15 : 9; |
`include "fpSize.sv" |
|
localparam FX = (FMSB+2)*2-1; // the MSB of the expanded fraction |
localparam EX = FX + 1 + EMSB + 1 + 1 - 1; |
input clk; |
input ce; |
input op; |
/fpLOOUnit.v
1,7 → 1,7
`timescale 1ns / 1ps |
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2006-2016 Robert Finch, Waterloo |
// \\__/ o\ (C) 2006-2019 Robert Finch, Waterloo |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
30,9 → 30,12
// |
// ============================================================================ |
|
`define FLOAT 6'h36 |
`define FTOI 6'h02 |
`define ITOF 6'h03 |
`define FLT1 4'h1 |
`define FLT2 4'h2 |
`define FTOI 5'h02 |
`define ITOF 5'h03 |
`define TRUNC 5'h15 |
`define NXTAFT 5'h0B |
|
module fpLOOUnit |
#(parameter WID=32) |
39,55 → 42,46
( |
input clk, |
input ce, |
input [31:0] ir, |
input [3:0] op4, |
input [4:0] func5, |
input [2:0] rm, |
input [WID-1:0] a, |
input [WID-1:0] b, |
output reg [WID-1:0] o, |
output done |
); |
localparam MSB = WID-1; |
localparam EMSB = WID==128 ? 14 : |
WID==96 ? 14 : |
WID==80 ? 14 : |
WID==64 ? 10 : |
WID==52 ? 10 : |
WID==48 ? 10 : |
WID==44 ? 10 : |
WID==42 ? 10 : |
WID==40 ? 9 : |
WID==32 ? 7 : |
WID==24 ? 6 : 4; |
localparam FMSB = WID==128 ? 111 : |
WID==96 ? 79 : |
WID==80 ? 63 : |
WID==64 ? 51 : |
WID==52 ? 39 : |
WID==48 ? 35 : |
WID==44 ? 31 : |
WID==42 ? 29 : |
WID==40 ? 28 : |
WID==32 ? 22 : |
WID==24 ? 15 : 9; |
`include "fpSize.sv" |
|
wire [WID-1:0] i2f_o; |
wire [WID-1:0] f2i_o; |
wire [5:0] op = ir[5:0]; |
wire [5:0] fn = ir[17:12]; |
wire [2:0] rm = ir[26:24]; |
wire [1:0] prec = ir[28:27]; |
wire [WID-1:0] trunc_o; |
wire [WID-1:0] nxtaft_o; |
|
delay1 u1 (.clk(clk), .ce(ce), .i(op==`FLOAT && (fn==`ITOF||fn==`FTOI)), .o(done) ); |
i2f #(WID) ui2fs (.clk(clk), .ce(ce), .rm(rm), .i(a), .o(i2f_o) ); |
f2i #(WID) uf2is (.clk(clk), .ce(ce), .i(a), .o(f2i_o) ); |
delay1 u1 ( |
.clk(clk), |
.ce(ce), |
.i((op4==`FLT1 && (func5==`ITOF||func5==`FTOI||func5==`TRUNC))||(op4==`FLT2 && (func5==`NXTAFT))), |
.o(done) ); |
i2f #(WID-4) ui2fs (.clk(clk), .ce(ce), .rm(rm), .i(a[WID-1:4]), .o(i2f_o) ); |
f2i #(WID-4) uf2is (.clk(clk), .ce(ce), .i(a[WID-1:4]), .o(f2i_o) ); |
fpTrunc #(WID) urho1 (.clk(clk), .ce(ce), .i(a), .o(trunc_o), .overflow()); |
fpNextAfter #(WID-4) una1 (.clk(clk), .ce(ce), .a(a[WID-1:4]), .b(b[WID-1:4]), .o(nxtaft_o)); |
|
always @* |
case (op) |
`FLOAT: |
case(fn) |
`ITOF: o <= i2f_o; |
`FTOI: o <= f2i_o; |
default: o <= 0; |
endcase |
default: o <= 0; |
endcase |
case (op4) |
`FLT1: |
case(func5) |
`ITOF: o <= {i2f_o,4'h0}; |
`FTOI: o <= {f2i_o,4'h0}; |
`TRUNC: o <= trunc_o; |
default: o <= 0; |
endcase |
`FLT2: |
case(func5) |
`NXTAFT: o <= {nxtaft_o,4'h0}; |
default: o <= 0; |
endcase |
default: o <= 0; |
endcase |
|
endmodule |
/fpMul.v
50,34 → 50,9
// ============================================================================ |
|
module fpMul (clk, ce, a, b, o, sign_exe, inf, overflow, underflow); |
parameter WID = 128; |
localparam MSB = WID-1; |
localparam EMSB = WID==128 ? 14 : |
WID==96 ? 14 : |
WID==80 ? 14 : |
WID==64 ? 10 : |
WID==52 ? 10 : |
WID==48 ? 11 : |
WID==44 ? 10 : |
WID==42 ? 10 : |
WID==40 ? 9 : |
WID==32 ? 7 : |
WID==24 ? 6 : 4; |
localparam FMSB = WID==128 ? 111 : |
WID==96 ? 79 : |
WID==80 ? 63 : |
WID==64 ? 51 : |
WID==52 ? 39 : |
WID==48 ? 34 : |
WID==44 ? 31 : |
WID==42 ? 29 : |
WID==40 ? 28 : |
WID==32 ? 22 : |
WID==24 ? 15 : 9; |
parameter WID = 32; |
`include "fpSize.sv" |
|
localparam FX = (FMSB+2)*2-1; // the MSB of the expanded fraction |
localparam EX = FX + 1 + EMSB + 1 + 1 - 1; |
|
input clk; |
input ce; |
input [WID:1] a, b; |
132,7 → 107,41
assign ex1 = (az|bz) ? 0 : (xa|a_dn) + (xb|b_dn) - bias; |
|
generate |
if (WID==64) begin |
if (WID==80) begin |
reg [31:0] p00,p01,p02,p03; |
reg [31:0] p10,p11,p12,p13; |
reg [31:0] p20,p21,p22,p23; |
reg [31:0] p30,p31,p32,p33; |
always @(posedge clk) |
if (ce) begin |
p00 <= fracta[15: 0] * fractb[15: 0]; |
p01 <= fracta[31:16] * fractb[15: 0]; |
p02 <= fracta[47:32] * fractb[15: 0]; |
p03 <= fracta[63:48] * fractb[15: 0]; |
|
p10 <= fracta[15: 0] * fractb[31:16]; |
p11 <= fracta[31:16] * fractb[31:16]; |
p12 <= fracta[47:32] * fractb[31:16]; |
p13 <= fracta[63:48] * fractb[31:16]; |
|
p20 <= fracta[15: 0] * fractb[47:32]; |
p21 <= fracta[31:16] * fractb[47:32]; |
p22 <= fracta[47:32] * fractb[47:32]; |
p23 <= fracta[63:48] * fractb[47:32]; |
|
p30 <= fracta[15: 0] * fractb[63:48]; |
p31 <= fracta[31:16] * fractb[63:48]; |
p32 <= fracta[47:32] * fractb[63:48]; |
p33 <= fracta[63:48] * fractb[63:48]; |
|
fract1 <= {p03,48'b0} + {p02,32'b0} + {p01,16'b0} + p00 + |
{p13,64'b0} + {p12,48'b0} + {p11,32'b0} + {p10,16'b0} + |
{p23,80'b0} + {p22,64'b0} + {p21,48'b0} + {p20,32'b0} + |
{p33,96'b0} + {p32,80'b0} + {p31,64'b0} + {p30,48'b0} |
; |
end |
end |
else if (WID==64) begin |
reg [35:0] p00,p01,p02; |
reg [35:0] p10,p11,p12; |
reg [35:0] p20,p21,p22; |
167,9 → 176,6
end |
end |
else begin |
reg [35:0] p00,p01,p02; |
reg [35:0] p10,p11,p12; |
reg [35:0] p20,p21,p22; |
always @(posedge clk) |
if (ce) begin |
fract1a <= fracta * fractb; |
238,34 → 244,13
|
endmodule |
|
|
// Multiplier with normalization and rounding. |
|
module fpMulnr(clk, ce, a, b, o, rm, sign_exe, inf, overflow, underflow); |
parameter WID=32; |
localparam MSB = WID-1; |
localparam EMSB = WID==128 ? 14 : |
WID==96 ? 14 : |
WID==80 ? 14 : |
WID==64 ? 10 : |
WID==52 ? 10 : |
WID==48 ? 11 : |
WID==44 ? 10 : |
WID==42 ? 10 : |
WID==40 ? 9 : |
WID==32 ? 7 : |
WID==24 ? 6 : 4; |
localparam FMSB = WID==128 ? 111 : |
WID==96 ? 79 : |
WID==80 ? 63 : |
WID==64 ? 51 : |
WID==52 ? 39 : |
WID==48 ? 34 : |
WID==44 ? 31 : |
WID==42 ? 29 : |
WID==40 ? 28 : |
WID==32 ? 22 : |
WID==24 ? 15 : 9; |
`include "fpSize.sv" |
|
localparam FX = (FMSB+2)*2-1; // the MSB of the expanded fraction |
localparam EX = FX + 1 + EMSB + 1 + 1 - 1; |
input clk; |
input ce; |
input [MSB:0] a, b; |
289,15 → 274,3
delay2 #(1) u7(.clk(clk), .ce(ce), .i(underflow1), .o(underflow)); |
endmodule |
|
module fpMul_tb(); |
reg clk; |
|
initial begin |
clk = 0; |
end |
always #10 clk <= ~clk; |
|
fpMul u1 (.clk(clk), .ce(1'b1), .a(0), .b(0), .o(o1), .sign_exe(sgnx1), .inf(inf1), .overflow(of1), .underflow(uf1)); |
fpMul u2 (.clk(clk), .ce(1'b1), .a(0), .b(0), .o(o1), .sign_exe(sgnx1), .inf(inf1), .overflow(of1), .underflow(uf1)); |
|
endmodule |
/fpNormalize.v
1,7 → 1,7
`timescale 1ns / 1ps |
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2006-2018 Robert Finch, Waterloo |
// \\__/ o\ (C) 2006-2019 Robert Finch, Waterloo |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
42,33 → 42,8
|
module fpNormalize(clk, ce, under, i, o); |
parameter WID = 128; |
localparam MSB = WID-1; |
localparam EMSB = WID==128 ? 14 : |
WID==96 ? 14 : |
WID==80 ? 14 : |
WID==64 ? 10 : |
WID==52 ? 10 : |
WID==48 ? 11 : |
WID==44 ? 10 : |
WID==42 ? 10 : |
WID==40 ? 9 : |
WID==32 ? 7 : |
WID==24 ? 6 : 4; |
localparam FMSB = WID==128 ? 111 : |
WID==96 ? 79 : |
WID==80 ? 63 : |
WID==64 ? 51 : |
WID==52 ? 39 : |
WID==48 ? 34 : |
WID==44 ? 31 : |
WID==42 ? 29 : |
WID==40 ? 28 : |
WID==32 ? 22 : |
WID==24 ? 15 : 9; |
`include "fpSize.sv" |
|
localparam FX = (FMSB+2)*2-1; // the MSB of the expanded fraction |
localparam EX = FX + 1 + EMSB + 1 + 1 - 1; |
|
input clk; |
input ce; |
input under; |
80,14 → 55,16
|
wire so1 = i[EX]; // sign doesn't change |
|
// Since the there are *two* whole digits in the incoming format |
// Since the there are *three* whole digits in the incoming format |
// the number of whole digits needs to be reduced. If the MSB is |
// set, then increment the exponent and no shift is needed. |
wire [EMSB:0] xo; |
wire [EMSB:0] xo1a = i[EX-1:FX+1]; |
wire xInf = &xo1a & !under; |
wire incExp1 = !xInf & i[FX]; |
wire [EMSB:0] xo1 = xo1a + incExp1; |
wire xInf3 = &xo1a[EMSB:1] & !under; |
wire incExp2 = !xInf3 & i[FX]; |
wire incExp1 = !xInf & i[FX-1]; |
wire [EMSB:0] xo1 = xo1a + (incExp2 ? 2'd2 : incExp1 ? 2'd1 : 2'd0); |
wire [EMSB:0] xo2; |
wire xInf1 = &xo1; |
|
95,9 → 72,10
// shift mantissa left by one to reduce to a single whole digit |
// if there is no exponent increment |
wire [FMSB+4:0] mo; |
wire [FMSB+4:0] mo1 = (xInf1 & incExp1) ? 0 : |
incExp1 ? {i[FX:FMSB+1],|i[FMSB:0],1'b0} : // reduce mantissa size |
{i[FX-1:FMSB],|i[FMSB-1:0],1'b0}; // reduce mantissa size |
wire [FMSB+4:0] mo1 = ((xInf1 & (incExp1|incExp2))|(xInf3 & incExp2)) ? 0 : |
incExp2 ? {i[FX:FMSB+1],|i[FMSB:0]} : |
incExp1 ? {i[FX-1:FMSB],|i[FMSB-1:0]} : // reduce mantissa size |
{i[FX-2:FMSB-1],|i[FMSB-2:0]}; // reduce mantissa size |
wire [FMSB+4:0] mo2; |
wire [7:0] leadingZeros2; |
|
115,6 → 93,10
assign leadingZeros2[7] = 1'b0; |
cntlz80Reg clz0 (.clk(clk), .ce(ce), .i({mo1,12'b0}), .o(leadingZeros2) ); |
end |
else if (WID<=84) begin |
assign leadingZeros2[7] = 1'b0; |
cntlz96Reg clz0 (.clk(clk), .ce(ce), .i({mo1,24'b0}), .o(leadingZeros2) ); |
end |
else if (WID<=96) begin |
assign leadingZeros2[7] = 1'b0; |
cntlz96Reg clz0 (.clk(clk), .ce(ce), .i({mo1,12'b0}), .o(leadingZeros2) ); |
/fpRes.sv
1,6 → 1,6
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2019 Robert Finch, Waterloo |
// \\__/ o\ (C) 2006-2019 Robert Finch, Waterloo |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
21,32 → 21,11
// |
// ============================================================================ |
// |
module fpRes(clk, a, o); |
module fpRes(clk, ce, a, o); |
parameter WID = 128; |
localparam MSB = WID-1; |
localparam EMSB = WID==128 ? 14 : |
WID==96 ? 14 : |
WID==80 ? 14 : |
WID==64 ? 10 : |
WID==52 ? 10 : |
WID==48 ? 11 : |
WID==44 ? 10 : |
WID==42 ? 10 : |
WID==40 ? 9 : |
WID==32 ? 7 : |
WID==24 ? 6 : 4; |
localparam FMSB = WID==128 ? 111 : |
WID==96 ? 79 : |
WID==80 ? 63 : |
WID==64 ? 51 : |
WID==52 ? 39 : |
WID==48 ? 34 : |
WID==44 ? 31 : |
WID==42 ? 29 : |
WID==40 ? 28 : |
WID==32 ? 22 : |
WID==24 ? 15 : 9; |
`include "fpSize.sv" |
input clk; |
input ce; |
input [WID-1:0] a; |
output [WID-1:0] o; |
|
1095,11 → 1074,11
reg [9:0] indexr; |
reg [15:0] k0, k1; |
always @(posedge clk) |
indexr <= index; |
if(ce) indexr <= index; |
always @(posedge clk) |
k0 <= k01[indexr][31:16]; |
if(ce) k0 <= k01[indexr][31:16]; |
always @(posedge clk) |
k1 <= k01[indexr][15: 0]; |
if(ce) k1 <= k01[indexr][15: 0]; |
delay3 #(1) u2 (.clk(clk), .ce(1'b1), .i(sa), .o(sa3)); |
delay3 #(EMSB+1) u3 (.clk(clk), .ce(1'b1), .i(exp), .o(exp3)); |
wire [15:0] eps = ma[FMSB-10:FMSB-10-15]; |
1106,7 → 1085,7
wire [31:0] p = k1 * eps; |
reg [15:0] r0; |
always @(posedge clk) |
r0 <= k0 - (p >> 26); |
if(ce) r0 <= k0 - (p >> 26); |
assign o = {sa3,exp3,r0[14:0],{FMSB+2-16{1'b0}}}; |
|
always @* |
/fpRound.v
29,29 → 29,7
|
module fpRound(rm, i, o); |
parameter WID = 128; |
localparam MSB = WID-1; |
localparam EMSB = WID==128 ? 14 : |
WID==96 ? 14 : |
WID==80 ? 14 : |
WID==64 ? 10 : |
WID==52 ? 10 : |
WID==48 ? 11 : |
WID==44 ? 10 : |
WID==42 ? 10 : |
WID==40 ? 9 : |
WID==32 ? 7 : |
WID==24 ? 6 : 4; |
localparam FMSB = WID==128 ? 111 : |
WID==96 ? 79 : |
WID==80 ? 63 : |
WID==64 ? 51 : |
WID==52 ? 39 : |
WID==48 ? 34 : |
WID==44 ? 31 : |
WID==42 ? 29 : |
WID==40 ? 28 : |
WID==32 ? 22 : |
WID==24 ? 15 : 9; |
`include "fpSize.sv" |
|
input [2:0] rm; // rounding mode |
input [MSB+3:0] i; // intermediate format input |
113,29 → 91,7
|
module fpRoundReg(clk, ce, rm, i, o); |
parameter WID = 128; |
localparam MSB = WID-1; |
localparam EMSB = WID==128 ? 14 : |
WID==96 ? 14 : |
WID==80 ? 14 : |
WID==64 ? 10 : |
WID==52 ? 10 : |
WID==48 ? 11 : |
WID==44 ? 10 : |
WID==42 ? 10 : |
WID==40 ? 9 : |
WID==32 ? 7 : |
WID==24 ? 6 : 4; |
localparam FMSB = WID==128 ? 111 : |
WID==96 ? 79 : |
WID==80 ? 63 : |
WID==64 ? 51 : |
WID==52 ? 39 : |
WID==48 ? 34 : |
WID==44 ? 31 : |
WID==42 ? 29 : |
WID==40 ? 28 : |
WID==32 ? 22 : |
WID==24 ? 15 : 9; |
`include "fpSize.sv" |
|
input clk; |
input ce; |
/fpRsqrte.sv
36,29 → 36,7
|
module fpRsqrte(clk, ce, ld, a, o); |
parameter WID = 80; |
localparam MSB = WID-1; |
localparam EMSB = WID==128 ? 14 : |
WID==96 ? 14 : |
WID==80 ? 14 : |
WID==64 ? 10 : |
WID==52 ? 10 : |
WID==48 ? 11 : |
WID==44 ? 10 : |
WID==42 ? 10 : |
WID==40 ? 9 : |
WID==32 ? 7 : |
WID==24 ? 6 : 4; |
localparam FMSB = WID==128 ? 111 : |
WID==96 ? 79 : |
WID==80 ? 63 : |
WID==64 ? 51 : |
WID==52 ? 39 : |
WID==48 ? 34 : |
WID==44 ? 31 : |
WID==42 ? 29 : |
WID==40 ? 28 : |
WID==32 ? 22 : |
WID==24 ? 15 : 9; |
`include "fpSize.sv" |
input clk; |
input ce; |
input ld; |
/fpSigmoid.v
43,29 → 43,7
|
module fpSigmoid(clk, ce, a, o); |
parameter WID = 128; |
localparam MSB = WID-1; |
localparam EMSB = WID==128 ? 14 : |
WID==96 ? 14 : |
WID==80 ? 14 : |
WID==64 ? 10 : |
WID==52 ? 10 : |
WID==48 ? 11 : |
WID==44 ? 10 : |
WID==42 ? 10 : |
WID==40 ? 9 : |
WID==32 ? 7 : |
WID==24 ? 6 : 4; |
localparam FMSB = WID==128 ? 111 : |
WID==96 ? 79 : |
WID==80 ? 63 : |
WID==64 ? 51 : |
WID==52 ? 39 : |
WID==48 ? 34 : |
WID==44 ? 31 : |
WID==42 ? 29 : |
WID==40 ? 28 : |
WID==32 ? 22 : |
WID==24 ? 15 : 9; |
`include "fpSize.sv" |
input clk; |
input ce; |
input [WID-1:0] a; |
102,7 → 80,7
endgenerate |
|
initial begin |
`include "D:\Cores6\rtfItanium\v1\rtl\fpUnit\SigTbl.ver" |
`include "D:\Cores6\nvio\v1\rtl\fpUnit\SigTbl.ver" |
end |
|
// Quickly multiply number by 64 (it is in range -8 to 8) then convert to integer to get |
/fpSize.sv
0,0 → 1,30
// This file contains defintions for fields to ease dealing with different fp |
// widths. Some of the code still needs to be modified to support widths |
// other than standard 32,64 or 80 bit. |
localparam MSB = WID-1; |
localparam EMSB = WID==128 ? 14 : |
WID==96 ? 14 : |
WID==84 ? 14 : |
WID==80 ? 14 : |
WID==64 ? 10 : |
WID==52 ? 10 : |
WID==48 ? 10 : |
WID==44 ? 10 : |
WID==42 ? 10 : |
WID==40 ? 9 : |
WID==32 ? 7 : |
WID==24 ? 6 : 4; |
localparam FMSB = WID==128 ? 111 : |
WID==96 ? 79 : |
WID==84 ? 67 : |
WID==80 ? 63 : |
WID==64 ? 51 : |
WID==52 ? 39 : |
WID==48 ? 35 : |
WID==44 ? 31 : |
WID==42 ? 29 : |
WID==40 ? 28 : |
WID==32 ? 22 : |
WID==24 ? 15 : 9; |
localparam FX = (FMSB+2)*2; // the MSB of the expanded fraction |
localparam EX = FX + 1 + EMSB + 1 + 1 - 1; |
/fpSqrt.v
1,7 → 1,7
`timescale 1ns / 1ps |
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2018 Robert Finch, Waterloo |
// \\__/ o\ (C) 2018-2019 Robert Finch, Waterloo |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
29,42 → 29,21
// |
// ============================================================================ |
|
module fpSqrt(rst, clk, ce, ld, a, o, done); |
`include "fp_defines.v" |
|
module fpSqrt(rst, clk, ce, ld, a, o, done, sqrinf, sqrneg); |
parameter WID = 128; |
localparam MSB = WID-1; |
localparam EMSB = WID==128 ? 14 : |
WID==96 ? 14 : |
WID==80 ? 14 : |
WID==64 ? 10 : |
WID==52 ? 10 : |
WID==48 ? 11 : |
WID==44 ? 10 : |
WID==42 ? 10 : |
WID==40 ? 9 : |
WID==32 ? 7 : |
WID==24 ? 6 : 4; |
localparam FMSB = WID==128 ? 111 : |
WID==96 ? 79 : |
WID==80 ? 63 : |
WID==64 ? 51 : |
WID==52 ? 39 : |
WID==48 ? 34 : |
WID==44 ? 31 : |
WID==42 ? 29 : |
WID==40 ? 28 : |
WID==32 ? 22 : |
WID==24 ? 15 : 9; |
`include "fpSize.sv" |
|
localparam FX = (FMSB+2)*2-1; // the MSB of the expanded fraction |
localparam EX = FX + 1 + EMSB + 1 + 1 - 1; |
|
input rst; |
input clk; |
input ce; |
input ld; |
input [MSB:0] a; |
output [EX:0] o; |
output reg [EX:0] o; |
output done; |
output sqrinf; |
output sqrneg; |
|
// registered outputs |
reg sign_exe; |
122,6 → 101,8
assign so = 1'b0; // square root of positive numbers only |
assign xo = (ex1 >> 1) + (bias >> 1); // divide by 2 cuts the bias in half, so 1/2 of it is added back in. |
assign mo = aNan ? {1'b1,a[FMSB:0],{FMSB+1{1'b0}}} : (sqrto << 36); |
assign sqrinf = aInf; |
assign sqrneg = !az & so; |
|
wire [FMSB+2:0] fracta1 = ex1[0] ? {1'b0,fracta} << 1 : {2'b0,fracta}; |
|
136,38 → 117,21
.done(done) |
); |
|
assign o = aNan ? {sa,xa,mo} : {so,xo,mo}; |
always @* |
casez({aNan,sqrinf,sqrneg}) |
3'b1??: o <= {sa,xa,mo}; |
3'b01?: o <= {sa,1'b1,qNaN|`QSQRTINF,{FMSB+1{1'b0}}}; |
3'b001: o <= {sa,1'b1,qNaN|`QSQRTNEG,{FMSB+1{1'b0}}}; |
default: o <= {so,xo,mo}; |
endcase |
|
|
endmodule |
|
module fpSqrtnr(rst, clk, ce, ld, a, o, rm, done, inf); |
module fpSqrtnr(rst, clk, ce, ld, a, o, rm, done, inf, sqrinf, sqrneg); |
parameter WID=32; |
localparam MSB = WID-1; |
localparam EMSB = WID==128 ? 14 : |
WID==96 ? 14 : |
WID==80 ? 14 : |
WID==64 ? 10 : |
WID==52 ? 10 : |
WID==48 ? 11 : |
WID==44 ? 10 : |
WID==42 ? 10 : |
WID==40 ? 9 : |
WID==32 ? 7 : |
WID==24 ? 6 : 4; |
localparam FMSB = WID==128 ? 111 : |
WID==96 ? 79 : |
WID==80 ? 63 : |
WID==64 ? 51 : |
WID==52 ? 39 : |
WID==48 ? 34 : |
WID==44 ? 31 : |
WID==42 ? 29 : |
WID==40 ? 28 : |
WID==32 ? 22 : |
WID==24 ? 15 : 9; |
`include "fpSize.sv" |
|
localparam FX = (FMSB+2)*2-1; // the MSB of the expanded fraction |
localparam EX = FX + 1 + EMSB + 1 + 1 - 1; |
input rst; |
input clk; |
input ce; |
177,6 → 141,8
input [2:0] rm; |
output done; |
output inf; |
output sqrinf; |
output sqrneg; |
|
wire [EX:0] o1; |
wire inf1; |
183,7 → 149,7
wire [MSB+3:0] fpn0; |
wire done1; |
|
fpSqrt #(WID) u1 (rst, clk, ce, ld, a, o1, done1); |
fpSqrt #(WID) u1 (rst, clk, ce, ld, a, o1, done1, sqrinf, sqrneg); |
fpNormalize #(WID) u2(.clk(clk), .ce(ce), .under(1'b0), .i(o1), .o(fpn0) ); |
fpRoundReg #(WID) u3(.clk(clk), .ce(ce), .rm(rm), .i(fpn0), .o(o) ); |
delay2 #(1) u5(.clk(clk), .ce(ce), .i(inf1), .o(inf)); |
/fpTrunc.sv
36,29 → 36,7
output reg [WID-1:0] o, |
output overflow |
); |
localparam MSB = WID-1; |
localparam EMSB = WID==128 ? 14 : |
WID==96 ? 14 : |
WID==80 ? 14 : |
WID==64 ? 10 : |
WID==52 ? 10 : |
WID==48 ? 11 : |
WID==44 ? 10 : |
WID==42 ? 10 : |
WID==40 ? 9 : |
WID==32 ? 7 : |
WID==24 ? 6 : 4; |
localparam FMSB = WID==128 ? 111 : |
WID==96 ? 79 : |
WID==80 ? 63 : |
WID==64 ? 51 : |
WID==52 ? 39 : |
WID==48 ? 34 : |
WID==44 ? 31 : |
WID==42 ? 29 : |
WID==40 ? 28 : |
WID==32 ? 22 : |
WID==24 ? 15 : 9; |
`include "fpSize.sv" |
|
integer n; |
wire [MSB:0] maxInt = {MSB{1'b1}}; // maximum unsigned integer value |
/fpUnit.v
1,6 → 1,6
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2006-2018 Robert Finch, Waterloo |
// \\__/ o\ (C) 2006-2019 Robert Finch, Waterloo |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
19,7 → 19,6
// along with this program. If not, see <http://www.gnu.org/licenses/>. |
// |
// |
// DSD |
// fpUnit.v |
// - floating point unit |
// - parameterized width |
30,6 → 29,8
// 31'h7FC00002 - infinity / infinity |
// 31'h7FC00003 - zero / zero |
// 31'h7FC00004 - infinity X zero |
// 31'h7FC00005 - square root of infinity |
// 31'h7FC00006 - square root of negative number |
// |
// Whenever the fpu encounters a NaN input, the NaN is |
// passed through to the output. |
82,96 → 83,67
`define VITOF 6'h25 |
`define VFMUL 6'h3A |
`define VFDIV 6'h3E |
`define FLOAT 6'h0B |
`define FMOV 6'h10 |
`define FTOI 6'h12 |
`define ITOF 6'h13 |
`define FNEG 6'h14 |
`define FABS 6'h15 |
`define FSIGN 6'h16 |
`define FMAN 6'h17 |
`define FNABS 6'h18 |
`define FCVTSD 6'h19 |
`define FCVTSQ 6'h1B |
`define FSTAT 6'h1C |
`define FTX 6'h20 |
`define FCX 6'h21 |
`define FEX 6'h22 |
`define FDX 6'h23 |
`define FRM 6'h24 |
`define FCVTDS 6'h29 |
`define FLOAT 6'h0F |
`define FLT1 4'h1 |
`define FLT2 4'h2 |
`define FLT3 4'h3 |
`define FLT1A 4'h5 |
`define FLT2LI 4'hA |
`define FMA 5'h00 |
`define FMS 5'h01 |
`define FNMA 5'h02 |
`define FNMS 5'h03 |
`define FMOV 5'h00 |
`define FTOI 5'h02 |
`define ITOF 5'h03 |
`define FNEG 5'h04 |
`define FABS 5'h05 |
`define FSIGN 5'h06 |
`define FMAN 5'h07 |
`define FNABS 5'h08 |
`define FCVTSD 5'h09 |
//`define FCVTSQ 6'h1B |
`define FSTAT 5'h0C |
`define FSQRT 5'h0D |
`define FTX 5'h10 |
`define FCX 5'h11 |
`define FEX 5'h12 |
`define FDX 5'h13 |
`define FRM 5'h14 |
`define TRUNC 5'h15 |
`define FCVTDS 5'h19 |
|
`define FADD 6'h04 |
`define FSUB 6'h05 |
`define FCMP 6'h06 |
`define FMUL 6'h08 |
`define FDIV 6'h09 |
`define FSCALEB 5'h00 |
`define FADD 5'h04 |
`define FSUB 5'h05 |
`define FCMP 5'h06 |
`define FMUL 5'h08 |
`define FDIV 5'h09 |
`define FREM 5'h0A |
`define NXTAFT 5'h0B |
// FLT1A |
`define FRES 5'h00 |
|
`define QINFOS 23'h7FC000 // info |
`define QSUBINFS 31'h7FC00001 // - infinity - infinity |
`define QINFDIVS 31'h7FC00002 // - infinity / infinity |
`define QZEROZEROS 31'h7FC00003 // - zero / zero |
`define QINFZEROS 31'h7FC00004 // - infinity X zero |
`include "fp_defines.v" |
|
`define QINFOD 52'hFF80000000000 // info |
`define QSUBINFD 63'h7FF0000000000001 // - infinity - infinity |
`define QINFDIVD 63'h7FF0000000000002 // - infinity / infinity |
`define QZEROZEROD 63'h7FF0000000000003 // - zero / zero |
`define QINFZEROD 63'h7FF0000000000004 // - infinity X zero |
|
`define QINFODX 64'hFF800000_00000000 // info |
`define QSUBINFDX 79'h7FFF000000_0000000001 // - infinity - infinity |
`define QINFDIVDX 79'h7FFF000000_0000000002 // - infinity / infinity |
`define QZEROZERODX 79'h7FFF000000_0000000003 // - zero / zero |
`define QINFZERODX 79'h7FFF000000_0000000004 // - infinity X zero |
|
`define QINFOQ 112'hFF800000_0000000000_0000000000 // info |
`define QSUBINFQ 127'h7F_FF00000000_0000000000_0000000001 // - infinity - infinity |
`define QINFDIVQ 127'h7F_FF00000000_0000000000_0000000002 // - infinity / infinity |
`define QZEROZEROQ 127'h7F_FF00000000_0000000000_0000000003 // - zero / zero |
`define QINFZEROQ 127'h7F_FF00000000_0000000000_0000000004 // - infinity X zero |
|
module fpUnit(rst, clk, ce, ir, ld, a, b, imm, o, csr_i, status, exception, done, rm |
module fpUnit(rst, clk, clk4x, ce, ir, ld, a, b, c, imm, o, csr_i, status, exception, done, rm |
); |
|
parameter WID = 64; |
localparam MSB = WID-1; |
localparam EMSB = WID==128 ? 14 : |
WID==96 ? 14 : |
WID==80 ? 14 : |
WID==64 ? 10 : |
WID==52 ? 10 : |
WID==48 ? 10 : |
WID==44 ? 10 : |
WID==42 ? 10 : |
WID==40 ? 9 : |
WID==32 ? 7 : |
WID==24 ? 6 : 4; |
localparam FMSB = WID==128 ? 111 : |
WID==96 ? 79 : |
WID==80 ? 63 : |
WID==64 ? 51 : |
WID==52 ? 39 : |
WID==48 ? 35 : |
WID==44 ? 31 : |
WID==42 ? 29 : |
WID==40 ? 28 : |
WID==32 ? 22 : |
WID==24 ? 15 : 9; |
`include "fpSize.sv" |
localparam EMSBS = 7; |
localparam FMSBS = 22; |
localparam FX = (FMSB+2)*2-1; // the MSB of the expanded fraction |
localparam EX = FX + 1 + EMSB + 1 + 1 - 1; |
localparam FXS = (FMSBS+2)*2-1; // the MSB of the expanded fraction |
localparam EXS = FXS + 1 + EMSBS + 1 + 1 - 1; |
|
input rst; |
input clk; |
input clk4x; |
input ce; |
input [31:0] ir; |
input [39:0] ir; |
input ld; |
input [MSB:0] a; |
input [MSB:0] b; |
input [MSB:0] c; |
input [5:0] imm; |
output tri [MSB:0] o; |
input [31:0] csr_i; |
181,7 → 153,10
input [2:0] rm; |
|
reg [7:0] fpcnt; |
assign done = fpcnt==8'h00; |
wire rem_done; |
wire rem_ld; |
wire op_done = fpcnt==8'h00; |
assign done = op_done & rem_done; |
|
//------------------------------------------------------------ |
// constants |
200,24 → 175,53
wire isNan,isNans; |
wire nanx,nanxs; |
|
// Decode fp operation |
wire latch_res; |
wire [3:0] op4_r; |
wire [5:0] func6b_r; |
wire [2:0] srca; |
wire [2:0] srcb; |
wire [3:0] op4_i = ir[9:6]; |
wire [5:0] op = ir[5:0]; |
wire [5:0] func6b = ir[31:26]; |
wire [1:0] prec = ir[25:24]; |
wire [4:0] func6b_i = ir[39:35]; |
wire fprem = {op4_i,func6b_i} == {`FLT2,`FREM}; |
wire [3:0] op4 = fprem ? op4_r : op4_i; |
wire [5:0] func6b = fprem ? func6b_r : func6b_i; |
wire [2:0] insn_rm = ir[30:28]; |
reg [WID-1:0] res; |
reg [WID-1:0] aop, bop; |
always @* |
case(srca) |
`RES: aop <= res; |
default: aop <= a; |
endcase |
always @* |
case(srcb) |
`RES: bop <= res; |
`POINT5: |
case(WID) |
32: bop <= `POINT5S; |
40: bop <= `POINT5SX; |
64: bop <= `POINT5D; |
80: bop <= `POINT5DX; |
endcase |
default: bop <= b; |
endcase |
|
wire fstat = {op,func6b} == {`FLOAT,`FSTAT}; // get status |
wire fdiv = {op,func6b} == {`FLOAT,`FDIV}; |
wire ftx = {op,func6b} == {`FLOAT,`FTX}; // trigger exception |
wire fcx = {op,func6b} == {`FLOAT,`FCX}; // clear exception |
wire fex = {op,func6b} == {`FLOAT,`FEX}; // enable exception |
wire fdx = {op,func6b} == {`FLOAT,`FDX}; // disable exception |
wire fcmp = {op,func6b} == {`FLOAT,`FCMP}; |
wire frm = {op,func6b} == {`FLOAT,`FRM}; // set rounding mode |
wire [2:0] prec = 3'd4;//ir[25:24]; |
|
wire zl_op = (op==`FLOAT && ( |
(func6b==`FABS || func6b==`FNABS || func6b==`FMOV || func6b==`FNEG || func6b==`FSIGN || func6b==`FMAN || func6b==`FCVTSQ)) || |
wire fstat = {op4,func6b} == {`FLT1,`FSTAT}; // get status |
wire fdiv = {op4,func6b} == {`FLT2,`FDIV}; |
wire ftx = {op4,func6b} == {`FLT1,`FTX}; // trigger exception |
wire fcx = {op4,func6b} == {`FLT1,`FCX}; // clear exception |
wire fex = {op4,func6b} == {`FLT1,`FEX}; // enable exception |
wire fdx = {op4,func6b} == {`FLT1,`FDX}; // disable exception |
wire fcmp = {op4,func6b} == {`FLT2,`FCMP}; |
wire frm = {op4,func6b} == {`FLT1,`FRM}; // set rounding mode |
|
wire zl_op = (op4==`FLT1 && ( |
(func6b==`FABS || func6b==`FNABS || func6b==`FMOV || func6b==`FNEG || func6b==`FSIGN || func6b==`FMAN)) || |
func6b==`FCMP); |
wire loo_op = (op==`FLOAT && (func6b==`ITOF || func6b==`FTOI)); |
wire loo_op = (op4==`FLT1 && (func6b==`ITOF || func6b==`FTOI)); |
wire loo_done; |
|
wire subinf; |
287,10 → 291,11
reg subinfx; // subtraction of infinities |
reg snanx; // signalling nan |
|
wire fdivs = 1'b0; |
wire divDone; |
wire pipe_ce = ce;// & divDone; // divide must be done in order for pipe to clock |
wire precmatch = WID==32 ? ir[28:27]==2'b00 : |
WID==64 ? ir[28:27]==2'b01 : 1; |
wire precmatch = 1'b0;//WID==32 ? ir[28:27]==2'b00 : |
//WID==64 ? ir[28:27]==2'b01 : 1; |
/* |
WID==80 ? ir[28:27]==2'b10 : |
ir[28:27]==2'b11; |
359,25 → 364,29
wire aNan, bNan, aNans, bNans; |
wire az, bz, azs, bzs; |
wire [2:0] rmd4; // 1st stage delayed |
wire [5:0] op1, op2; |
wire [3:0] op2; |
wire [5:0] op1; |
wire [5:0] fn2; |
|
wire [MSB:0] zld_o,lood_o; |
wire [31:0] zls_o,loos_o; |
wire [WID-1:0] zlq_o, looq_o; |
fpZLUnit #(WID) u6 (.ir(ir), .a(a), .b(b), .o(zlq_o), .nanx(nanx) ); |
fpLOOUnit #(WID) u7 (.clk(clk), .ce(pipe_ce), .ir(ir), .a(a), .o(looq_o), .done() ); |
wire [WID-1:0] scaleb_o; |
fpZLUnit #(WID) u6 (.ir(ir), .op4(op4), .func5(func6b), .a(aop), .b(bop), .c(c), .o(zlq_o), .nanx(nanx) ); |
fpLOOUnit #(WID) u7 (.clk(clk), .ce(pipe_ce), .op4(op4), .func5(func6b), .rm(insn_rm==3'b111 ? rm : insn_rm), .a(aop), .b(bop), .o(looq_o), .done() ); |
fpScaleb u16 (.clk(clk), .ce(pipe_ce), .a(aop), .b(bop), .o(scaleb_o)); |
|
//fpLOOUnit #(32) u7s (.clk(clk), .ce(pipe_ce), .rm(rm), .op(op), .fn(fn), .a(a[31:0]), .o(loos_o), .done() ); |
|
fp_decomp #(WID) u1 (.i(a), .sgn(sa), .man(ma), .vz(az), .inf(aInf), .nan(aNan) ); |
fp_decomp #(WID) u2 (.i(b), .sgn(sb), .man(mb), .vz(bz), .inf(bInf), .nan(bNan) ); |
fp_decomp #(WID) u1 (.i(aop), .sgn(sa), .man(ma), .vz(az), .inf(aInf), .nan(aNan) ); |
fp_decomp #(WID) u2 (.i(bop), .sgn(sb), .man(mb), .vz(bz), .inf(bInf), .nan(bNan) ); |
//fp_decomp #(32) u1s (.i(a[31:0]), .sgn(sas), .man(mas), .vz(azs), .inf(aInfs), .nan(aNans) ); |
//fp_decomp #(32) u2s (.i(b[31:0]), .sgn(sbs), .man(mbs), .vz(bzs), .inf(bInfs), .nan(bNans) ); |
|
wire [2:0] rmd = ir[26:24]==3'b111 ? rm : ir[26:24]; |
wire [2:0] rmd = ir[30:28]==3'b111 ? rm : ir[30:28]; |
delay4 #(3) u3 (.clk(clk), .ce(pipe_ce), .i(rmd), .o(rmd4) ); |
delay1 #(6) u4 (.clk(clk), .ce(pipe_ce), .i(func6b), .o(op1) ); |
delay2 #(6) u5 (.clk(clk), .ce(pipe_ce), .i(func6b), .o(op2) ); |
delay2 #(4) u5 (.clk(clk), .ce(pipe_ce), .i(op4), .o(op2) ); |
delay2 #(6) u5b (.clk(clk), .ce(pipe_ce), .i(func6b), .o(fn2) ); |
|
delay5 delay5_3(.clk(clk), .ce(pipe_ce), .i((bz & !aNan & fdiv)|(bzs & !aNans & fdivs)), .o(divByZero) ); |
405,6 → 414,7
wire [EX:0] fdiv_o; |
wire [EX:0] fmul_o; |
wire [EX:0] fas_o; |
wire [EX:0] fsqrt_o; |
reg [EX:0] fres; |
wire [31:0] fpus_o; |
wire [31+3:0] fpns_o; |
411,14 → 421,40
wire [EXS:0] fdivs_o; |
wire [EXS:0] fmuls_o; |
wire [EXS:0] fass_o; |
wire [EXS:0] fres_o; |
reg [EXS:0] fress; |
wire divUnder,divUnders; |
wire mulUnder,mulUnders; |
reg under,unders; |
wire sqrneg; |
wire fms = func6b==`FMS || func6b==`FNMS; |
wire nma = func6b==`FNMA || func6b==`FNMS; |
wire [WID-1:0] ma_aop = aop ^ (nma << WID-1); |
|
fpAddsub #(WID) u10(.clk(clk), .ce(pipe_ce), .rm(rmd), .op(func6b[0]), .a(a), .b(b), .o(fas_o) ); |
fpDiv #(WID) u11(.clk(clk), .ce(pipe_ce), .ld(ld), .a(a), .b(b), .o(fdiv_o), .sign_exe(), .underflow(divUnder), .done(divDone) ); |
fpMul #(WID) u12(.clk(clk), .ce(pipe_ce), .a(a), .b(b), .o(fmul_o), .sign_exe(), .inf(), .underflow(mulUnder) ); |
fpAddsub #(WID) u10(.clk(clk), .ce(pipe_ce), .rm(rmd), .op(func6b[0]), .a(aop), .b(bop), .o(fas_o) ); |
fpDiv #(WID) u11(.clk(clk), .clk4x(clk4x), .ce(pipe_ce), .ld(ld|rem_ld), .a(aop), .b(bop), .o(fdiv_o), .sign_exe(), .underflow(divUnder), .done(divDone) ); |
fpMul #(WID) u12(.clk(clk), .ce(pipe_ce), .a(aop), .b(bop), .o(fmul_o), .sign_exe(), .inf(), .underflow(mulUnder) ); |
fpSqrt #(WID) u13(.rst(rst), .clk(clk4x), .ce(pipe_ce), .ld(ld), .a(aop), .o(fsqrt_o), .done(), .sqrinf(), .sqrneg(sqrneg) ); |
fpRes #(WID) u14(.clk(clk), .ce(pipe_ce), .a(aop), .o(fres_o)); |
fpFMA #(WID) u15(.clk(clk), .ce(pipe_ce), .op(fms), .rm(rmd), .a(ma_aop), .b(bop), .c(c), .o(fma_o), .inf()); |
|
fpRemainder ufpr1 |
( |
.rst(rst), |
.clk(clk), |
.ce(ce), |
.ld_i(ld), |
.ld_o(rem_ld), |
.op4_i(op4_i), |
.funct6b_i(func6b_i), |
.op4_o(op4_r), |
.funct6b_o(func6b_r), |
.op_done(op_done), |
.rem_done(rem_done), |
.srca(srca), |
.srcb(srcb), |
.latch_res(latch_res) |
); |
/* |
fpAddsub #(32) u10s(.clk(clk), .ce(pipe_ce), .rm(rm), .op(op[0]), .a(a[31:0]), .b(b[31:0]), .o(fass_o) ); |
fpDiv #(32) u11s(.clk(clk), .ce(pipe_ce), .ld(ld), .a(a[31:0]), .b(b[31:0]), .o(fdivs_o), .sign_exe(), .underflow(divUnders), .done() ); |
426,39 → 462,44
*/ |
always @* |
case(op2) |
`FLOAT: |
case (fn2) |
`FMUL: under = mulUnder; |
`FDIV: under = divUnder; |
default: begin under = 0; unders = 0; end |
`FLT2,`FLT2LI: |
case (fn2) |
`FMUL: under = mulUnder; |
`FDIV: under = divUnder; |
default: begin under = 0; unders = 0; end |
endcase |
`VECTOR: |
case (fn2) |
`VFMUL: under = mulUnder; |
`VFDIV: under = divUnder; |
default: begin under = 0; unders = 0; end |
endcase |
default: begin under = 0; unders = 0; end |
endcase |
|
always @* |
case(op2) |
`FLOAT: |
case(fn2) |
`FADD: fres <= fas_o; |
`FSUB: fres <= fas_o; |
`FMUL: fres <= fmul_o; |
`FDIV: fres <= fdiv_o; |
default: begin fres <= fas_o; fress <= fass_o; end |
endcase |
`VECTOR: |
case(fn2) |
`VFADD: fres <= fas_o; |
`VFSUB: fres <= fas_o; |
`VFMUL: fres <= fmul_o; |
`VFDIV: fres <= fdiv_o; |
default: begin fres <= fas_o; fress <= fass_o; end |
endcase |
`FLT3: |
case(fn2) |
`FMA: fres <= fma_o; |
`FMS: fres <= fma_o; |
`FNMA: fres <= fma_o; |
`FNMS: fres <= fma_o; |
default: fres <= fma_o; |
endcase |
`FLT2,`FLT2LI: |
case(fn2) |
`FADD: fres <= fas_o; |
`FSUB: fres <= fas_o; |
`FMUL: fres <= fmul_o; |
`FDIV: fres <= fdiv_o; |
`FSCALEB: fres <= scaleb_o; |
default: begin fres <= fas_o; fress <= fass_o; end |
endcase |
`FLT1: |
case(fn2) |
`FSQRT: fres <= fsqrt_o; |
default: begin fres <= 1'd0; fress <= 1'd0; end |
endcase |
`FLT1A: |
case(fn2) |
`FRES: fres <= fres_o; |
default: begin fres <= 1'd0; fress <= 1'd0; end |
endcase |
default: begin fres <= fas_o; fress <= fass_o; end |
endcase |
|
502,8 → 543,8
gx, |
sx, |
|
1'b0, // cvtx |
1'b0, // sqrtx |
1'b0, // cvtx |
sqrneg, // sqrtx |
fcmp & nanx, |
infzero, |
zerozero, |
512,12 → 553,15
isNan |
}; |
|
assign o = (!fstat) ? |
wire [WID-1:0] o1 = |
(frm|fcx|fdx|fex) ? (a|imm) : |
zl_op ? zlq_o : |
loo_op ? looq_o : |
{so,fpu_o[MSB-1:0]} : 'bz; |
{so,fpu_o[MSB-1:0]}; |
assign zero = fpu_o[MSB-1:0]==0; |
assign o = fprem ? res : o1; |
always @(posedge clk) |
if (ce & latch_res) res <= o1; |
|
wire [7:0] maxdivcnt; |
generate begin |
527,7 → 571,7
assign infdiv = fpu_o[126:0]==`QINFDIVQ; |
assign zerozero = fpu_o[126:0]==`QZEROZEROQ; |
assign infzero = fpu_o[126:0]==`QINFZEROQ; |
assign maxdivcnt = 8'd250; |
assign maxdivcnt = 8'd128; |
end |
else if (WID==80) begin |
assign inf = &fpu_o[78:64] && fpu_o[63:0]==0; |
535,7 → 579,7
assign infdiv = fpu_o[78:0]==`QINFDIVDX; |
assign zerozero = fpu_o[78:0]==`QZEROZERODX; |
assign infzero = fpu_o[78:0]==`QINFZERODX; |
assign maxdivcnt = 8'd136; |
assign maxdivcnt = 8'd80; |
end |
else if (WID==64) begin |
assign inf = &fpu_o[62:52] && fpu_o[51:0]==0; |
543,7 → 587,7
assign infdiv = fpu_o[62:0]==`QINFDIVD; |
assign zerozero = fpu_o[62:0]==`QZEROZEROD; |
assign infzero = fpu_o[62:0]==`QINFZEROD; |
assign maxdivcnt = 8'd112; |
assign maxdivcnt = 8'd64; |
end |
else if (WID==32) begin |
assign inf = &fpu_o[30:23] && fpu_o[22:0]==0; |
551,7 → 595,7
assign infdiv = fpu_o[30:0]==`QINFDIVS; |
assign zerozero = fpu_o[30:0]==`QZEROZEROS; |
assign infzero = fpu_o[30:0]==`QINFZEROS; |
assign maxdivcnt = 8'd54; |
assign maxdivcnt = 8'd32; |
end |
end |
endgenerate |
561,43 → 605,55
// Generate a done signal. Latency varys depending on the instruction. |
always @(posedge clk) |
begin |
if (rst) |
fpcnt <= 8'h00; |
else begin |
if (ld) |
case(ir[5:0]) |
`FLOAT: |
begin |
case(func6b) |
`FABS,`FNABS,`FNEG,`FMAN,`FMOV,`FSIGN, |
`FCVTSD,`FCVTSQ,`FCVTDS: begin fpcnt <= 8'd0; end |
`FTOI: begin fpcnt <= 8'd1; end |
`ITOF: begin fpcnt <= 8'd1; end |
`FCMP: begin fpcnt <= 8'd0; end |
`FADD: begin fpcnt <= 8'd8; end |
`FSUB: begin fpcnt <= 8'd8; end |
`FMUL: begin fpcnt <= 8'd10; end |
`FDIV: begin fpcnt <= maxdivcnt; end |
default: fpcnt <= 8'h00; |
endcase |
end |
`VECTOR: |
case(func6b) |
`VFNEG: begin fpcnt <= 8'd0; end |
`VFADD: begin fpcnt <= 8'd8; end |
`VFSUB: begin fpcnt <= 8'd8; end |
`VFSxx: begin fpcnt <= 8'd0; end |
`VFMUL: begin fpcnt <= 8'd10; end |
`VFDIV: begin fpcnt <= maxdivcnt; end |
`VFTOI: begin fpcnt <= 8'd1; end |
`VITOF: begin fpcnt <= 8'd1; end |
default: fpcnt <= 8'h00; |
endcase |
default: fpcnt <= 8'h00; |
endcase |
else if (!done) |
fpcnt <= fpcnt - 1; |
end |
if (rst) |
fpcnt <= 8'h00; |
else begin |
if (ld|rem_ld) |
case(op4) |
`FLT3: |
case(func6b) |
`FMA: fpcnt <= 8'd22; |
`FMS: fpcnt <= 8'd22; |
`FNMA: fpcnt <= 8'd22; |
`FNMS: fpcnt <= 8'd22; |
default: fpcnt <= 8'd00; |
endcase |
`FLT2,`FLT2LI: |
case(func6b) |
`FCMP: begin fpcnt <= 8'd0; end |
`FADD: begin fpcnt <= 8'd6; end |
`FSUB: begin fpcnt <= 8'd6; end |
`FMUL: begin fpcnt <= 8'd6; end |
`FDIV: begin fpcnt <= maxdivcnt; end |
`FREM: fpcnt <= maxdivcnt+8'd23; |
`NXTAFT: fpcnt <= 8'd1; |
`FSCALEB: fpcnt <= 8'd2; |
default: fpcnt <= 8'h00; |
endcase |
`FLT1: |
case(func6b) |
`FABS,`FNABS,`FNEG,`FMAN,`FMOV,`FSIGN, |
`FCVTSD,`FCVTDS: begin fpcnt <= 8'd0; end |
`FTOI: begin fpcnt <= 8'd1; end |
`ITOF: begin fpcnt <= 8'd1; end |
`TRUNC: begin fpcnt <= 8'd1; end |
`FSQRT: begin fpcnt <= maxdivcnt; end |
default: fpcnt <= 8'h00; |
endcase |
`FLT1A: |
case(func6b) |
`FRES: fpcnt <= 8'h03; |
default: fpcnt <= 8'h00; |
endcase |
default: fpcnt <= 8'h00; |
endcase |
else if (!op_done) begin |
if ((op4==`FLT2||op4==`FLT2LI) && func6b==`FDIV && divDone) |
fpcnt <= 8'h00; |
else |
fpcnt <= fpcnt - 1; |
end |
end |
end |
endmodule |
|
/fpZLUnit.v
1,7 → 1,7
`timescale 1ns / 1ps |
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2007-2016 Robert Finch, Waterloo |
// \\__/ o\ (C) 2007-2019 Robert Finch, Waterloo |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
37,77 → 37,146
// |
// ============================================================================ |
|
`define FLOAT 6'h36 |
`define FMOV 6'h00 |
`define FNEG 6'h04 |
`define FABS 6'h05 |
`define FSIGN 6'h06 |
`define FMAN 6'h07 |
`define FNABS 6'h08 |
`define FCVTSQ 6'h0B |
`define FLOAT 4'h1 |
`define FLT1 4'h1 |
`define FLT2 4'h2 |
`define FLT3 4'h3 |
`define FANDI 4'hE |
`define FORI 4'hF |
|
`define FMAX 5'h10 |
`define FMIN 5'h11 |
`define FCMP 5'h06 |
`define FMOV 5'h00 |
`define FNEG 5'h04 |
`define FABS 5'h05 |
`define FSIGN 5'h06 |
`define FMAN 5'h07 |
`define FNABS 5'h08 |
`define FCVTSD 5'h09 |
`define F32TO80 5'h0A |
`define ISNAN 5'h0E |
`define CPYSGN 5'h0F // FLT2 |
`define FINITE 5'h0F // FLT1 |
//`define FCVTSQ 6'h1B |
`define FCVTDS 5'h19 |
`define FSLT 5'h10 |
`define FSGE 5'h11 |
`define FSLE 5'h12 |
`define FSGT 5'h13 |
`define FSEQ 5'h14 |
`define FSNE 5'h15 |
`define FSUN 5'h16 |
`define F80TO32 5'h1A |
`define UNORD 5'h1F |
|
module fpZLUnit |
#(parameter WID=32) |
#(parameter WID=80) |
( |
input [31:0] ir, |
input [WID-1:0] a, |
input [WID-1:0] b, // for fcmp |
output reg [WID-1:0] o, |
output nanx |
input [3:0] op4, |
input [4:0] func5, |
input [39:0] ir, |
input [WID+3:0] a, |
input [WID+3:0] b, // for fcmp |
input [WID+3:0] c, // for fcmp |
output reg [WID+3:0] o, |
output reg nanx |
); |
localparam MSB = WID-1; |
localparam EMSB = WID==128 ? 14 : |
WID==96 ? 14 : |
WID==80 ? 14 : |
WID==64 ? 10 : |
WID==52 ? 10 : |
WID==48 ? 10 : |
WID==44 ? 10 : |
WID==42 ? 10 : |
WID==40 ? 9 : |
WID==32 ? 7 : |
WID==24 ? 6 : 4; |
localparam FMSB = WID==128 ? 111 : |
WID==96 ? 79 : |
WID==80 ? 63 : |
WID==64 ? 51 : |
WID==52 ? 39 : |
WID==48 ? 35 : |
WID==44 ? 31 : |
WID==42 ? 29 : |
WID==40 ? 28 : |
WID==32 ? 22 : |
WID==24 ? 15 : 9; |
`include "fpSize.sv" |
|
wire [5:0] op = ir[5:0]; |
wire [1:0] prec = ir[28:27]; |
wire [5:0] fn = ir[17:12]; |
wire [2:0] fn3 = ir[31:29]; |
//wire [1:0] prec = ir[25:24]; |
|
wire [3:0] cmp_o; |
wire nanxab,nanxac,nanxbc; |
wire nana; |
wire [EMSB:0] expa; |
wire [FMSB:0] ma; |
wire xinfa; |
wire [4:0] cmp_o, cmpac_o, cmpbc_o; |
|
fp_cmp_unit #(WID) u1 (.a(a), .b(b), .o(cmp_o), .nanx(nanx) ); |
// Zero is being passed for b in some cases so the NaN must come from a if |
// present. |
fp_cmp_unit #(WID+4) u1 (.a(a), .b(b), .o(cmp_o), .nanx(nanxab) ); |
fp_cmp_unit #(WID+4) u2 (.a(a), .b(c), .o(cmpac_o), .nanx(nanxac) ); |
fp_cmp_unit #(WID+4) u3 (.a(b), .b(c), .o(cmpbc_o), .nanx(nanxbc) ); |
fpDecomp #(WID+4) u4 (.i(a), .sgn(), .exp(expa), .man(ma), .fract(), .xz(), .mz(), .vz(), .inf(), .xinf(xinfa), .qnan(), .snan(), .nan(nana)); |
wire [127:0] sq_o; |
fcvtsq u2 (a, sq_o); |
//fcvtsq u2 (a[31:0], sq_o); |
wire [79:0] sdo; |
fs2d u5 (a[43:4], sdo); |
wire [39:0] dso; |
fd2s u6 (a, dso); |
wire [79:0] f32to80o; |
wire [31:0] f80to32o; |
F32ToF80 u7 (a[35:4], f32to80o); |
F80ToF32 u8 (a[WID+3:4], f32to80o); |
|
always @* |
case(op) |
`FLOAT: |
case(fn3) |
3'b000: |
case(fn) |
`FABS: o <= {1'b0,a[WID-2:0]}; // fabs |
`FNABS: o <= {1'b1,a[WID-2:0]}; // fnabs |
`FNEG: o <= {~a[WID-1],a[WID-2:0]}; // fneg |
`FMOV: o <= a; // fmov |
`FSIGN: o <= (a[WID-2:0]==0) ? 0 : {a[WID-1],1'b0,{EMSB{1'b1}},{FMSB+1{1'b0}}}; // fsign |
`FMAN: o <= {a[WID-1],1'b0,{EMSB{1'b1}},a[FMSB:0]}; // fman |
`FCVTSQ: o <= sq_o; |
default: o <= 0; |
endcase |
// FCMP |
3'b001: o <= cmp_o; |
endcase |
case(op4) |
`FLT1: |
case(func5) |
`FABS: begin o <= {1'b0,a[WID-2:0]}; nanx <= nanxab; end |
`FNABS: begin o <= {1'b1,a[WID-2:0]}; nanx <= nanxab; end |
`FNEG: begin o <= {~a[WID-1],a[WID-2:0]}; nanx <= nanxab; end |
`FMOV: begin o <= a; nanx <= nanxab; end |
`FSIGN: begin o <= (a[WID-2:0]==0) ? 0 : {a[WID-1],1'b0,{EMSB{1'b1}},{FMSB+1{1'b0}}}; nanx <= 1'b0; end |
`FMAN: begin o <= {a[WID-1],1'b0,{EMSB{1'b1}},a[FMSB:0]}; nanx <= 1'b0; end |
//`FCVTSQ: o <= sq_o; |
`FCVTSD: begin o <= {sdo,4'h0}; nanx <= nanxab; end |
`FCVTDS: begin o <= {{40{dso[39]}},dso,4'h0}; nanx <= nanxab; end |
`F32TO80: begin o <= {f32to80o,4'h0}; nanx <= nanxab; end |
`F80TO32: begin o <= {f80to32o,4'h0}; nanx <= nanxab; end |
`ISNAN: begin o <= nana; end |
`FINITE: begin o <= !xinfa; end |
`UNORD: begin o <= nanxab; end |
default: o <= 0; |
endcase |
`FLT2: |
case(func5) |
`FCMP: begin o <= {cmp_o,4'h0}; nanx <= nanxab; end |
`FSLT: begin o <= {cmp_o[1],4'h0}; nanx <= nanxab; end |
`FSGE: begin o <= {~cmp_o[1],4'h0}; nanx <= nanxab; end |
`FSLE: begin o <= {cmp_o[2],4'h0}; nanx <= nanxab; end |
`FSGT: begin o <= ~{cmp_o[2],4'h0}; nanx <= nanxab; end |
`FSEQ: begin o <= {cmp_o[0],4'h0}; nanx <= nanxab; end |
`FSNE: begin o <= ~{cmp_o[0],4'h0}; nanx <= nanxab; end |
`FSUN: begin o <= {cmp_o[4],4'h0}; nanx <= nanxab; end |
`CPYSGN: begin o <= {b[WID+3],a[WID+2:0]}; end |
default: o <= 0; |
endcase |
`FLT3: |
case(func5) |
`FMAX: |
begin |
o <= ~cmp_o[2] & ~cmpac_o[2] ? a : ~cmpbc_o[2] ? b : c; |
nanx <= nanxab|nanxac|nanxbc; |
end |
`FMIN: |
begin |
o <= cmp_o[1] & cmpac_o[1] ? a : cmpbc_o[2] ? b : c; |
nanx <= nanxab|nanxac|nanxbc; |
end |
default: o <= 0; |
endcase |
`FANDI: |
begin |
case(ir[32:31]) |
2'd0: o <= {a[23: 4] & {{58{1'b1}},ir[39:33],ir[30:16],4'h0}}; |
2'd1: o <= a[43:24] & {{36{1'b1}},ir[39:33],ir[30:16],{20{1'b1}}}; |
2'd2: o <= a[63:44] & {{14{1'b1}},ir[39:33],ir[30:16],{40{1'b1}}}; |
2'd3: o <= a[83:64] & {ir[39:33],ir[30:16],{60{1'b1}}}; |
endcase |
nanx <= 1'b0; |
end |
`FORI: |
begin |
case(ir[32:31]) |
2'd0: o <= {a[23: 4] & {{58{1'b0}},ir[39:33],ir[30:16],4'h0}}; |
2'd1: o <= a[43:24] & {{36{1'b0}},ir[39:33],ir[30:16],{20{1'b0}}}; |
2'd2: o <= a[63:44] & {{14{1'b0}},ir[39:33],ir[30:16],{40{1'b0}}}; |
2'd3: o <= a[83:64] & {ir[39:33],ir[30:16],{60{1'b0}}}; |
endcase |
nanx <= 1'b0; |
end |
default: o <= 0; |
endcase |
|
/fp_cmp_unit.v
29,29 → 29,7
|
module fp_cmp_unit(a, b, o, nanx); |
parameter WID = 32; |
localparam MSB = WID-1; |
localparam EMSB = WID==128 ? 14 : |
WID==96 ? 14 : |
WID==80 ? 14 : |
WID==64 ? 10 : |
WID==52 ? 10 : |
WID==48 ? 11 : |
WID==44 ? 10 : |
WID==42 ? 10 : |
WID==40 ? 9 : |
WID==32 ? 7 : |
WID==24 ? 6 : 4; |
localparam FMSB = WID==128 ? 111 : |
WID==96 ? 79 : |
WID==80 ? 63 : |
WID==64 ? 51 : |
WID==52 ? 39 : |
WID==48 ? 34 : |
WID==44 ? 31 : |
WID==42 ? 29 : |
WID==40 ? 28 : |
WID==32 ? 22 : |
WID==24 ? 15 : 9; |
`include "fpSize.sv" |
|
input [WID-1:0] a, b; |
output [4:0] o; |
/fp_decomp.v
27,33 → 27,9
// ============================================================================ |
|
module fp_decomp(i, sgn, exp, man, fract, xz, mz, vz, inf, xinf, qnan, snan, nan); |
|
parameter WID=32; |
`include "fpSize.sv" |
|
localparam MSB = WID-1; |
localparam EMSB = WID==128 ? 14 : |
WID==96 ? 14 : |
WID==80 ? 14 : |
WID==64 ? 10 : |
WID==52 ? 10 : |
WID==48 ? 10 : |
WID==44 ? 10 : |
WID==42 ? 10 : |
WID==40 ? 9 : |
WID==32 ? 7 : |
WID==24 ? 6 : 4; |
localparam FMSB = WID==128 ? 111 : |
WID==96 ? 79 : |
WID==80 ? 63 : |
WID==64 ? 51 : |
WID==52 ? 39 : |
WID==48 ? 35 : |
WID==44 ? 31 : |
WID==42 ? 29 : |
WID==40 ? 28 : |
WID==32 ? 22 : |
WID==24 ? 15 : 9; |
|
input [MSB:0] i; |
|
output sgn; |
/fp_defines.v
1,6 → 1,6
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2006-2018 Robert Finch, Waterloo |
// \\__/ o\ (C) 2006-2019 Robert Finch, Waterloo |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
61,3 → 61,15
`define QINFZEROQ 127'h7F_FF00000000_0000000000_0000000004 // - infinity X zero |
`define QSQRTINFQ 127'h7F_FF00000000_0000000000_0000000005 // - square root of infinity |
`define QSQRTNEGQ 127'h7F_FF00000000_0000000000_0000000006 // - square root of negaitve number |
|
`define POINT5S 32'h3F000000 |
`define POINT5SX 40'h3F80000000 |
`define POINT5D 64'h3FE0000000000000 |
`define POINT5DX 80'h3FFE0000000000000000 |
|
`define AIN 3'd0 |
`define BIN 3'd1 |
`define RES 3'd2 |
`define POINT5 3'd3 |
|
|
/fpdivr2.v
1,10 → 1,14
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2006-2016 Robert Finch, Stratford |
// \\__/ o\ (C) 2006-2018 Robert Finch, Waterloo |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
// |
// fpdivr2.v |
// Radix 2 floating point divider primitive |
// |
// |
// This source file is free software: you can redistribute it and/or modify |
// it under the terms of the GNU Lesser General Public License as published |
// by the Free Software Foundation, either version 3 of the License, or |
17,108 → 21,100
// |
// You should have received a copy of the GNU General Public License |
// along with this program. If not, see <http://www.gnu.org/licenses/>. |
// |
// fpdivr2.v |
// Radix 2 floating point divider primitive |
// |
// |
// ============================================================================ |
// |
module fpdivr2 |
#( parameter WID = 24 ) |
( |
input clk, |
input ld, |
input [WID-1:0] a, |
input [WID-1:0] b, |
output reg [WID*2-1:0] q, |
output [WID-1:0] r, |
output done |
); |
localparam DMSB = WID-1; |
|
reg [DMSB:0] rx [2:0]; // remainder holds |
reg [DMSB:0] rxx; |
reg [7:0] cnt; // iteration count |
wire [DMSB:0] sdq; |
wire [DMSB:0] sdr; |
wire sdval = 1'b0; |
wire sdbz; |
reg willGo0; |
|
//specialCaseDivider #(WID) u1 (.a(a), .b(b), .q(sdq), .r(sdr), .val(sdval), .dbz(sdbz) ); |
module fpdivr2(clk4x, ld, a, b, q, r, done, lzcnt); |
parameter WID = 112; |
parameter RADIX = 2; |
localparam WID1 = WID;//((WID+2)/3)*3; // make width a multiple of three |
localparam DMSB = WID1-1; |
input clk4x; |
input ld; |
input [WID1-1:0] a; |
input [WID1-1:0] b; |
output reg [WID1*2-1:0] q = 0; |
output reg [WID1-1:0] r = 0; |
output reg done = 1'b0; |
output reg [7:0] lzcnt; |
|
initial begin |
rx[0] = 0; |
end |
|
always @(posedge clk) |
if (ld) |
cnt <= sdval ? 8'b10000000 : WID*2-2; |
else if (!done) |
cnt <= cnt - 1; |
reg [8:0] cnt; // iteration count |
reg [WID1*2-1:0] qi = 0; |
reg [DMSB+1:0] ri = 0; |
wire b0; |
reg gotnz; // got a non-zero bit |
|
reg done1; |
wire [7:0] maxcnt; |
assign b0 = b <= ri; |
wire [DMSB+1:0] r1 = b0 ? ri - b : ri; |
assign maxcnt = WID1*2; |
|
always @(posedge clk) |
if (ld) begin |
rxx <= 0; |
if (sdval) |
q <= {sdq,{WID{1'b0}}}; |
else |
q <= {a,{WID{1'b0}}}; |
end |
else if (!done) begin |
willGo0 = {rxx ,q[WID*2-1 ]} > b; |
rx[0] = willGo0 ? {rxx ,q[WID*2-1 ]} - b : {rxx ,q[WID*2-1 ]}; |
q[WID*2-1:1] <= q[WID*2-1-1:0]; |
q[0] <= willGo0; |
rxx <= rx[0]; |
end |
// Done pulse for external circuit. Must span over 1 1x clock so that it's |
// recognized. |
always @(posedge clk4x) |
if (ld) |
done <= 1'b0; |
else if (cnt==9'h1FE) |
done <= 1'b1; |
else if (cnt==9'h1F7) |
done <= 1'b0; |
|
// correct remainder |
assign r = sdval ? sdr : rx[2][DMSB] ? rx[2] + b : rx[2]; |
assign done = cnt[7]; |
// Internal done pulse |
always @(posedge clk4x) |
begin |
done1 <= 1'b0; |
if (ld) |
done1 <= 1'b0; |
else if (cnt==9'h1FF) |
done1 <= 1'b1; |
end |
|
endmodule |
always @(posedge clk4x) |
if (ld) |
cnt <= maxcnt; |
else if (cnt != 9'h1F7) |
cnt <= cnt - 8'd1; |
|
/* |
module fpdivr2_tb(); |
always @(posedge clk4x) |
if (ld) |
gotnz <= 1'b0; |
else if (!cnt[8]) begin |
if (b0) |
gotnz <= 1'b1; |
end |
|
reg rst; |
reg clk; |
reg ld; |
reg [6:0] cnt; |
wire cnt81; |
delay1 #(1) u1 (clk4x, 1'b1, cnt[8], cnt81); |
|
wire ce = 1'b1; |
wire [23:0] a = 24'h0_4000; |
wire [23:0] b = 24'd101; |
wire [45:0] q; |
wire [23:0] r; |
wire done; |
always @(posedge clk4x) |
if (ld) |
lzcnt <= 8'h00; |
else if (!cnt81) begin |
if (b0==1'b0 && !gotnz) |
lzcnt <= lzcnt + 8'd1; |
end |
|
initial begin |
clk = 1; |
rst = 0; |
#100 rst = 1; |
#100 rst = 0; |
end |
always @(posedge clk4x) |
if (ld) |
qi <= {3'b0,a,{WID1{1'b0}}}; |
else if (!cnt81) |
qi[WID1*2-1:0] <= {qi[WID1*2-1-1:0],b0}; |
|
always #20 clk = ~clk; // 25 MHz |
|
always @(posedge clk) |
if (rst) |
cnt <= 0; |
else begin |
ld <= 0; |
cnt <= cnt + 1; |
if (cnt == 3) |
ld <= 1; |
$display("ld=%b q=%h r=%h done=%b", ld, q, r, done); |
end |
|
always @(posedge clk4x) |
if (ld) |
ri <= 0; |
else if (!cnt81) |
ri <= {r1[DMSB:0],qi[WID1*2-1]}; |
|
fpdivr2 #(24) divu0(.clk(clk), .ld(ld), .a(a), .b(b), .q(q), .r(r), .done(done) ); |
always @(posedge clk4x) |
if (done1) |
q <= qi; |
always @(posedge clk4x) |
if (done1) |
r <= ri; |
|
endmodule |
*/ |
|
|
/fpdivr8.v
1,12 → 1,13
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2006-2016 Robert Finch, Waterloo |
// \\__/ o\ (C) 2006-2018 Robert Finch, Waterloo |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
// |
// fpdivr8.v |
// Radix 8 floating point divider primitive |
// Radix8 doesn't work !!!! |
// Radix 2 floating point divider primitive |
// |
// |
// This source file is free software: you can redistribute it and/or modify |
26,124 → 27,65
|
module fpdivr8(clk, ld, a, b, q, r, done, lzcnt); |
parameter WID = 112; |
parameter RADIX = 8; |
localparam WID1 = WID;//((WID+2)/3)*3; // make width a multiple of three |
localparam DMSB = WID1-1; |
localparam DMSB = WID-1; |
input clk; |
input ld; |
input [WID1-1:0] a; |
input [WID1-1:0] b; |
output reg [WID1*2-1:0] q; |
output [WID1-1:0] r; |
input [WID-1:0] a; |
input [WID-1:0] b; |
output reg [WID-1:0] q; |
output [WID-1:0] r; |
output reg done; |
output reg [7:0] lzcnt; |
|
|
wire [DMSB:0] rx [2:0]; // remainder holds |
reg [DMSB:0] rxx; |
reg [8:0] cnt; // iteration count |
wire [DMSB:0] sdq; |
wire [DMSB:0] sdr; |
wire sdval; |
wire sddbz; |
reg [DMSB+1:0] ri = 0; |
wire b0,b1,b2; |
wire b0,b1,b2,b3; |
wire [DMSB+1:0] r1,r2,r3; |
reg gotnz; |
|
specialCaseDivider #(WID1) u1 (.a(a), .b(b), .q(sdq), .val(sdval), .dbz(sdbz) ); |
|
wire [7:0] maxcnt; |
wire [2:0] n1; |
generate |
assign maxcnt = WID/3+1; |
assign b0 = b < rxx; |
assign r1 = b0 ? rxx - b : rxx; |
assign b1 = b < {r1,q[WID-1]}; |
assign r2 = b1 ? {r1,q[WID-1]} - b : {r1,q[WID-1]}; |
assign b2 = b < {r2,q[WID-2]}; |
assign r3 = b2 ? {r2,q[WID-2]} - b : {r2,q[WID-2]}; |
|
always @(posedge clk) |
if (ld) |
rxx <= {WID{1'b0}}; |
else if (!done) |
rxx <= {r3,q[WID-3]}; |
|
always @(posedge clk) |
begin |
if (RADIX==8) begin |
assign maxcnt = WID1*2/3+1; |
assign b0 = b < rxx; |
assign r1 = b0 ? rxx - b : rxx; |
assign b1 = b < {r1,q[WID*2-1]}; |
assign r2 = b1 ? {r1,q[WID*2-1]} - b : {r1,q[WID*2-1]}; |
assign b2 = b < {r2,q[WID*2-1-1]}; |
assign r3 = b2 ? {r2,q[WID*2-1-1]} - b : {r2,q[WID*2-1-1]}; |
assign n1 = 2; |
always @(posedge clk) |
if (ld) |
rxx <= 0; |
else if (!done) |
rxx <= {r3,q[WID*2-1]}; |
done <= 1'b0; |
if (ld) begin |
cnt <= maxcnt; |
end |
else if (cnt != 9'h1FE) begin |
cnt <= cnt - 1; |
if (cnt==9'h1FF) |
done <= 1'b1; |
end |
end |
else if (RADIX==2) begin |
assign b0 = b <= ri; |
assign r1 = b0 ? ri - b : ri; |
assign maxcnt = WID1*2-1; |
assign n1 = 0; |
// assign rx[0] = rxx [DMSB] ? {rxx ,q[WID*2-1 ]} + b : {rxx ,q[WID*2-1 ]} - b; |
end |
end |
endgenerate |
|
always @(posedge clk) |
begin |
done <= 1'b0; |
if (ld) begin |
cnt <= sdval ? 9'h1FE : maxcnt; |
done <= sdval; |
end |
else if (cnt != 9'h1FE) begin |
cnt <= cnt - 1; |
if (cnt==9'h1FF) |
done <= 1'b1; |
end |
|
always @(posedge clk) |
if (ld) begin |
q <= a; |
end |
else if (!done) begin |
q[WID-1:3] <= q[WID-4:0]; |
q[2] <= b0; |
q[1] <= b1; |
q[0] <= b2; |
end |
assign r = r3; |
|
|
generate |
begin |
if (RADIX==8) begin |
always @(posedge clk) |
if (ld) begin |
gotnz <= 1'b0; |
lzcnt <= 8'h00; |
if (sdval) |
q <= {3'b0,sdq,{WID1{1'b0}}}; |
else |
q <= {3'b0,a,{WID1{1'b0}}}; |
end |
else if (!done) begin |
q[WID1-1:3] <= q[WID1-1-3:0]; |
q[0] <= b0; |
q[1] <= b1; |
q[2] <= b2; |
end |
// correct remainder |
assign r = sdval ? sdr : r3; |
end |
if (RADIX==2) begin |
always @(posedge clk) |
if (ld) begin |
gotnz <= 1'b0; |
lzcnt <= 8'h00; |
ri <= 0; |
if (sdval) |
q <= {3'b0,sdq,{WID1{1'b0}}}; |
else |
q <= {3'b0,a,{WID1{1'b0}}}; |
end |
else if (cnt!=9'h1FE) begin |
if (b0) |
gotnz <= 1'b1; |
if (b0==0 && !gotnz) |
lzcnt <= lzcnt + 8'd1; |
q[WID1*2-1:1] <= q[WID1*2-1-1:0]; |
q[0] <= b0; |
ri <= {r1[DMSB:0],q[WID1*2-1]}; |
end |
// correct remainder |
assign r = sdval ? sdr : ri; |
end |
end |
endgenerate |
|
endmodule |
|
|
/i2f.v
36,29 → 36,7
input [WID-1:0] i, // integer input |
output [WID-1:0] o // float output |
); |
localparam MSB = WID-1; |
localparam EMSB = WID==128 ? 14 : |
WID==96 ? 14 : |
WID==80 ? 14 : |
WID==64 ? 10 : |
WID==52 ? 10 : |
WID==48 ? 10 : |
WID==44 ? 10 : |
WID==42 ? 10 : |
WID==40 ? 9 : |
WID==32 ? 7 : |
WID==24 ? 6 : 4; |
localparam FMSB = WID==128 ? 111 : |
WID==96 ? 79 : |
WID==80 ? 63 : |
WID==64 ? 51 : |
WID==52 ? 39 : |
WID==48 ? 35 : |
WID==44 ? 31 : |
WID==42 ? 29 : |
WID==40 ? 28 : |
WID==32 ? 22 : |
WID==24 ? 15 : 9; |
`include "fpSize.sv" |
|
wire [EMSB:0] zeroXp = {EMSB{1'b1}}; |
|
78,14 → 56,20
if (WID==128) begin |
cntlz128Reg u4 (.clk(clk), .ce(ce), .i(imag1), .o(lz) ); |
end else if (WID==96) begin |
cntlz96Reg u4 (.clk(clk), .ce(ce), .i(imag1), .o(lz) ); |
cntlz96Reg u4 (.clk(clk), .ce(ce), .i(imag1), .o(lz[6:0]) ); |
assign lz[7]=1'b0; |
end else if (WID==84) begin |
cntlz96Reg u4 (.clk(clk), .ce(ce), .i({imag1,12'hfff}), .o(lz[6:0]) ); |
assign lz[7]=1'b0; |
end else if (WID==80) begin |
cntlz80Reg u4 (.clk(clk), .ce(ce), .i(imag1), .o(lz) ); |
cntlz80Reg u4 (.clk(clk), .ce(ce), .i(imag1), .o(lz[6:0]) ); |
assign lz[7]=1'b0; |
end else if (WID==64) begin |
cntlz64Reg u4 (.clk(clk), .ce(ce), .i(imag1), .o(lz) ); |
cntlz64Reg u4 (.clk(clk), .ce(ce), .i(imag1), .o(lz[6:0]) ); |
assign lz[7]=1'b0; |
end else begin |
cntlz32Reg u4 (.clk(clk), .ce(ce), .i(imag1), .o(lz) ); |
assign lz[6]=1'b0; |
cntlz32Reg u4 (.clk(clk), .ce(ce), .i(imag1), .o(lz[5:0]) ); |
assign lz[7:6]=2'b00; |
end |
endgenerate |
|
124,6 → 108,7
reg [7:0] cnt; |
wire [31:0] fo; |
reg [31:0] i; |
wire [79:0] fo80; |
initial begin |
clk = 1'b0; |
cnt = 0; |
140,6 → 125,6
endcase |
|
i2f #(32) u1 (.clk(clk), .ce(1), .rm(2'd0), .i(i), .o(fo) ); |
i2f #(80) u2 (.clk(clk), .ce(1), .rm(2'd0), .i(i), .o(fo) ); |
i2f #(80) u2 (.clk(clk), .ce(1), .rm(2'd0), .i({{48{i[31]}},i}), .o(fo80) ); |
|
endmodule |
/isqrt.v
1,7 → 1,7
`timescale 1ns / 1ps |
// ============================================================================ |
// __ |
// \\__/ o\ (C) 2010-2018 Robert Finch, Waterloo |
// \\__/ o\ (C) 2010-2019 Robert Finch, Waterloo |
// \ __ / All rights reserved. |
// \/_// robfinch<remove>@finitron.ca |
// || |
8,6 → 8,9
// |
// isqrt.v |
// - integer square root |
// - uses the standard long form calc. |
// - geared towards use in an floating point unit |
// - calculates to WID fractional precision (double width output) |
// |
// |
// This source file is free software: you can redistribute it and/or modify |
23,8 → 26,6
// You should have received a copy of the GNU General Public License |
// along with this program. If not, see <http://www.gnu.org/licenses/>. |
// |
// Floating Point Multiplier / Divider |
// |
// ============================================================================ |
|
module isqrt(rst, clk, ce, ld, a, o, done); |
85,11 → 86,15
// Shift the root |
root <= {root+doesGoInto,1'b0}; // root * 2 + 1/0 |
end |
else |
else begin |
cnt <= 8'h00; |
state <= DONE; |
end |
DONE: |
begin |
state <= IDLE; |
cnt <= cnt + 8'd1; |
if (cnt == 8'd6) |
state <= IDLE; |
end |
endcase |
end |