Line 44... |
Line 44... |
//
|
//
|
// ============================================================================
|
// ============================================================================
|
|
|
module fpFMA (clk, ce, op, rm, a, b, c, o, inf);
|
module fpFMA (clk, ce, op, rm, a, b, c, o, inf);
|
parameter WID = 32;
|
parameter WID = 32;
|
localparam MSB = WID-1;
|
`include "fpSize.sv"
|
localparam EMSB = WID==128 ? 14 :
|
|
WID==96 ? 14 :
|
|
WID==80 ? 14 :
|
|
WID==64 ? 10 :
|
|
WID==52 ? 10 :
|
|
WID==48 ? 11 :
|
|
WID==44 ? 10 :
|
|
WID==42 ? 10 :
|
|
WID==40 ? 9 :
|
|
WID==32 ? 7 :
|
|
WID==24 ? 6 : 4;
|
|
localparam FMSB = WID==128 ? 111 :
|
|
WID==96 ? 79 :
|
|
WID==80 ? 63 :
|
|
WID==64 ? 51 :
|
|
WID==52 ? 39 :
|
|
WID==48 ? 34 :
|
|
WID==44 ? 31 :
|
|
WID==42 ? 29 :
|
|
WID==40 ? 28 :
|
|
WID==32 ? 22 :
|
|
WID==24 ? 15 : 9;
|
|
|
|
localparam FX = (FMSB+2)*2-1; // the MSB of the expanded fraction
|
|
localparam EX = FX + 1 + EMSB + 1 + 1 - 1;
|
|
|
|
input clk;
|
input clk;
|
input ce;
|
input ce;
|
input op; // operation 0 = add, 1 = subtract
|
input op; // operation 0 = add, 1 = subtract
|
input [2:0] rm;
|
input [2:0] rm;
|
Line 152... |
Line 127... |
if (ce) realOp2 <= op1 ^ (sa1 ^ sb1) ^ sc1;
|
if (ce) realOp2 <= op1 ^ (sa1 ^ sb1) ^ sc1;
|
|
|
|
|
reg [FX:0] fract5;
|
reg [FX:0] fract5;
|
generate
|
generate
|
if (WID==80) begin
|
if (WID==84) begin
|
|
reg [33:0] p00,p01,p02,p03;
|
|
reg [33:0] p10,p11,p12,p13;
|
|
reg [33:0] p20,p21,p22,p23;
|
|
reg [33:0] p30,p31,p32,p33;
|
|
reg [135:0] fract3a;
|
|
reg [135:0] fract3b;
|
|
reg [135:0] fract3c;
|
|
reg [135:0] fract3d;
|
|
reg [135:0] fract4a;
|
|
reg [135:0] fract4b;
|
|
|
|
always @(posedge clk)
|
|
if (ce) begin
|
|
p00 <= fracta1[16: 0] * fractb1[16: 0];
|
|
p01 <= fracta1[33:17] * fractb1[16: 0];
|
|
p02 <= fracta1[50:34] * fractb1[16: 0];
|
|
p03 <= fracta1[67:51] * fractb1[16: 0];
|
|
|
|
p10 <= fracta1[16: 0] * fractb1[33:17];
|
|
p11 <= fracta1[33:17] * fractb1[33:17];
|
|
p12 <= fracta1[50:34] * fractb1[33:17];
|
|
p13 <= fracta1[67:51] * fractb1[33:17];
|
|
|
|
p20 <= fracta1[16: 0] * fractb1[50:34];
|
|
p21 <= fracta1[33:17] * fractb1[50:34];
|
|
p22 <= fracta1[50:34] * fractb1[50:34];
|
|
p23 <= fracta1[67:51] * fractb1[50:34];
|
|
|
|
p30 <= fracta1[15: 0] * fractb1[67:51];
|
|
p31 <= fracta1[31:16] * fractb1[67:51];
|
|
p32 <= fracta1[47:32] * fractb1[67:51];
|
|
p33 <= fracta1[63:48] * fractb1[67:51];
|
|
end
|
|
always @(posedge clk)
|
|
if (ce) begin
|
|
fract3a <= {p33,p31,p20,p00};
|
|
fract3b <= {p32,p12,p10,17'b0} + {p23,p03,p01,17'b0};
|
|
fract3c <= {p22,p11,34'b0} + {p13,p02,34'b0};
|
|
fract3d <= {p12,51'b0} + {p03,51'b0};
|
|
end
|
|
always @(posedge clk)
|
|
if (ce) begin
|
|
fract4a <= fract3a + fract3b;
|
|
fract4b <= fract3c + fract3d;
|
|
end
|
|
always @(posedge clk)
|
|
if (ce) begin
|
|
fract5 <= fract4a + fract4b;
|
|
end
|
|
end
|
|
else if (WID==80) begin
|
reg [31:0] p00,p01,p02,p03;
|
reg [31:0] p00,p01,p02,p03;
|
reg [31:0] p10,p11,p12,p13;
|
reg [31:0] p10,p11,p12,p13;
|
reg [31:0] p20,p21,p22,p23;
|
reg [31:0] p20,p21,p22,p23;
|
reg [31:0] p30,p31,p32,p33;
|
reg [31:0] p30,p31,p32,p33;
|
reg [127:0] fract3a;
|
reg [127:0] fract3a;
|
Line 504... |
Line 530... |
reg so9;
|
reg so9;
|
reg [EMSB:0] ex9;
|
reg [EMSB:0] ex9;
|
reg [EMSB:0] ex9a;
|
reg [EMSB:0] ex9a;
|
reg ex_gt_xc9;
|
reg ex_gt_xc9;
|
reg [EMSB:0] xc9;
|
reg [EMSB:0] xc9;
|
|
reg a_gt_c9;
|
wire [FX:0] mo9;
|
wire [FX:0] mo9;
|
wire [FMSB+1:0] fractc9;
|
wire [FMSB+1:0] fractc9;
|
wire under9;
|
wire under9;
|
|
wire xeq9;
|
|
|
always @(posedge clk)
|
always @(posedge clk)
|
if (ce) ex_gt_xc9 <= ex_gt_xc8;
|
if (ce) ex_gt_xc9 <= ex_gt_xc8;
|
always @(posedge clk)
|
always @(posedge clk)
|
|
if (ce) a_gt_c9 <= a_gt_b8;
|
|
always @(posedge clk)
|
if (ce) xc9 <= xc8;
|
if (ce) xc9 <= xc8;
|
always @(posedge clk)
|
always @(posedge clk)
|
if (ce) ex9a <= ex8;
|
if (ce) ex9a <= ex8;
|
|
|
delay3 #(FX+1) u93 (.clk(clk), .ce(ce), .i(mo6), .o(mo9));
|
delay3 #(FX+1) u93 (.clk(clk), .ce(ce), .i(mo6), .o(mo9));
|
delay3 #(FMSB+2) u94 (.clk(clk), .ce(ce), .i(fractc6), .o(fractc9));
|
delay3 #(FMSB+2) u94 (.clk(clk), .ce(ce), .i(fractc6), .o(fractc9));
|
delay3 u95 (.clk(clk), .ce(ce), .i(under6), .o(under9));
|
delay3 u95 (.clk(clk), .ce(ce), .i(under6), .o(under9));
|
|
delay2 u96 (.clk(clk), .ce(ce), .i(xeq7), .o(xeq9));
|
|
|
always @(posedge clk)
|
always @(posedge clk)
|
if (ce) ex9 <= resZero8 ? 0 : ex_gt_xc8 ? ex8 : xc8;
|
if (ce) ex9 <= resZero8 ? 0 : ex_gt_xc8 ? ex8 : xc8;
|
|
|
// Compute output sign
|
// Compute output sign
|
Line 554... |
Line 585... |
// is the same as an add. The underflow is tracked rather than
|
// is the same as an add. The underflow is tracked rather than
|
// using extra bits in the exponent.
|
// using extra bits in the exponent.
|
// -----------------------------------------------------------
|
// -----------------------------------------------------------
|
reg [EMSB:0] xdiff10;
|
reg [EMSB:0] xdiff10;
|
reg [FX:0] mfs;
|
reg [FX:0] mfs;
|
|
reg ops10;
|
|
|
always @(posedge clk)
|
always @(posedge clk)
|
if (ce) xdiff10 <= ex_gt_xc9 ? ex9a - xc9
|
if (ce) xdiff10 <= ex_gt_xc9 ? ex9a - xc9
|
: (under9 ? xc9 + ex9a : xc9 - ex9a);
|
: (under9 ? xc9 + ex9a : xc9 - ex9a);
|
|
|
// Determine which fraction to denormalize (the one with the
|
// Determine which fraction to denormalize (the one with the
|
// smaller exponent is denormalized).
|
// smaller exponent is denormalized). If the exponents are equal
|
|
// denormalize the smaller fraction.
|
|
always @(posedge clk)
|
|
if (ce) mfs <=
|
|
xeq9 ? (a_gt_c9 ? {4'b0,fractc9,{FMSB+1{1'b0}}} : mo9)
|
|
: ex_gt_xc9 ? {4'b0,fractc9,{FMSB+1{1'b0}}} : mo9;
|
|
|
always @(posedge clk)
|
always @(posedge clk)
|
if (ce) mfs <= ex_gt_xc9 ? {4'b0,fractc9,{FMSB+1{1'b0}}} : mo9;
|
if (ce) ops10 <= xeq9 ? (a_gt_c9 ? 1'b1 : 1'b0)
|
|
: (ex_gt_xc9 ? 1'b1 : 1'b0);
|
|
|
// -----------------------------------------------------------
|
// -----------------------------------------------------------
|
// Clock #11
|
// Clock #11
|
// Limit the size of the shifter to only bits needed.
|
// Limit the size of the shifter to only bits needed.
|
// -----------------------------------------------------------
|
// -----------------------------------------------------------
|
Line 588... |
Line 627... |
begin
|
begin
|
if (WID==128)
|
if (WID==128)
|
redor128 u121 (.a(xdif11), .b({mfs,2'b0}), .o(sticky) );
|
redor128 u121 (.a(xdif11), .b({mfs,2'b0}), .o(sticky) );
|
else if (WID==96)
|
else if (WID==96)
|
redor96 u121 (.a(xdif11), .b({mfs,2'b0}), .o(sticky) );
|
redor96 u121 (.a(xdif11), .b({mfs,2'b0}), .o(sticky) );
|
|
else if (WID==84)
|
|
redor84 u121 (.a(xdif11), .b({mfs,2'b0}), .o(sticky) );
|
else if (WID==80)
|
else if (WID==80)
|
redor80 u121 (.a(xdif11), .b({mfs,2'b0}), .o(sticky) );
|
redor80 u121 (.a(xdif11), .b({mfs,2'b0}), .o(sticky) );
|
else if (WID==64)
|
else if (WID==64)
|
redor64 u121 (.a(xdif11), .b({mfs,2'b0}), .o(sticky) );
|
redor64 u121 (.a(xdif11), .b({mfs,2'b0}), .o(sticky) );
|
else if (WID==32)
|
else if (WID==32)
|
Line 610... |
Line 651... |
// -----------------------------------------------------------
|
// -----------------------------------------------------------
|
reg [FX+2:0] mfs13;
|
reg [FX+2:0] mfs13;
|
wire [FX:0] mo13;
|
wire [FX:0] mo13;
|
wire ex_gt_xc13;
|
wire ex_gt_xc13;
|
wire [FMSB+1:0] fractc13;
|
wire [FMSB+1:0] fractc13;
|
|
wire ops13;
|
|
|
delay4 #(FX+1) u131 (.clk(clk), .ce(ce), .i(mo9), .o(mo13));
|
delay4 #(FX+1) u131 (.clk(clk), .ce(ce), .i(mo9), .o(mo13));
|
delay4 u132 (.clk(clk), .ce(ce), .i(ex_gt_xc9), .o(ex_gt_xc13));
|
delay4 u132 (.clk(clk), .ce(ce), .i(ex_gt_xc9), .o(ex_gt_xc13));
|
vtdl #(FMSB+2) u133 (.clk(clk), .ce(ce), .a(4'd3), .d(fractc9), .q(fractc13));
|
vtdl #(FMSB+2) u133 (.clk(clk), .ce(ce), .a(4'd3), .d(fractc9), .q(fractc13));
|
|
delay3 u134 (.clk(clk), .ce(ce), .i(ops10), .o(ops13));
|
|
|
always @(posedge clk)
|
always @(posedge clk)
|
if (ce) mfs13 <= ({mfs12,2'b0} >> xdif12)|sticky12;
|
if (ce) mfs13 <= ({mfs12,2'b0} >> xdif12)|sticky12;
|
|
|
// -----------------------------------------------------------
|
// -----------------------------------------------------------
|
Line 628... |
Line 671... |
wire a_gt_b14;
|
wire a_gt_b14;
|
|
|
vtdl #(1) u141 (.clk(clk), .ce(ce), .a(4'd5), .d(a_gt_b8), .q(a_gt_b14));
|
vtdl #(1) u141 (.clk(clk), .ce(ce), .a(4'd5), .d(a_gt_b8), .q(a_gt_b14));
|
|
|
always @(posedge clk)
|
always @(posedge clk)
|
if (ce) oa <= ex_gt_xc13 ? {mo13,2'b00} : mfs13;
|
if (ce) oa <= ops13 ? {mo13,2'b00} : mfs13;
|
always @(posedge clk)
|
always @(posedge clk)
|
if (ce) ob <= ex_gt_xc13 ? mfs13 : {fractc13,{FMSB+1{1'b0}},2'b00};
|
if (ce) ob <= ops13 ? mfs13 : {fractc13,{FMSB+1{1'b0}},2'b00};
|
|
|
// -----------------------------------------------------------
|
// -----------------------------------------------------------
|
// Clock #15
|
// Clock #15
|
// - Sort operands
|
// - Sort operands
|
// -----------------------------------------------------------
|
// -----------------------------------------------------------
|
Line 691... |
Line 734... |
casez({aInf16&cInf16,Nan16,cNan16,exinf16})
|
casez({aInf16&cInf16,Nan16,cNan16,exinf16})
|
4'b1???: mo17 <= {1'b0,op16,{FMSB-1{1'b0}},op16,{FMSB{1'b0}}}; // inf +/- inf - generate QNaN on subtract, inf on add
|
4'b1???: mo17 <= {1'b0,op16,{FMSB-1{1'b0}},op16,{FMSB{1'b0}}}; // inf +/- inf - generate QNaN on subtract, inf on add
|
4'b01??: mo17 <= {1'b0,mo16};
|
4'b01??: mo17 <= {1'b0,mo16};
|
4'b001?: mo17 <= {1'b0,fractc16[FMSB+1:0],{FMSB{1'b0}}};
|
4'b001?: mo17 <= {1'b0,fractc16[FMSB+1:0],{FMSB{1'b0}}};
|
4'b0001: mo17 <= 1'd0;
|
4'b0001: mo17 <= 1'd0;
|
default: mo17 <= mab[FX+3:2]; // mab has an extra lead bit and two trailing bits
|
default: mo17 <= mab[FX+3:2]; // mab has two extra lead bits and two trailing bits
|
endcase
|
endcase
|
|
|
assign o = {so17,ex17,mo17};
|
assign o = {so17,ex17,mo17};
|
|
|
// The following are from the multiplier!!!
|
// The following are from the multiplier!!!
|
Line 708... |
Line 751... |
|
|
// Multiplier with normalization and rounding.
|
// Multiplier with normalization and rounding.
|
|
|
module fpFMAnr(clk, ce, op, rm, a, b, c, o, sign_exe, inf, overflow, underflow);
|
module fpFMAnr(clk, ce, op, rm, a, b, c, o, sign_exe, inf, overflow, underflow);
|
parameter WID=32;
|
parameter WID=32;
|
localparam MSB = WID-1;
|
`include "fpSize.sv"
|
localparam EMSB = WID==128 ? 14 :
|
|
WID==96 ? 14 :
|
|
WID==80 ? 14 :
|
|
WID==64 ? 10 :
|
|
WID==52 ? 10 :
|
|
WID==48 ? 11 :
|
|
WID==44 ? 10 :
|
|
WID==42 ? 10 :
|
|
WID==40 ? 9 :
|
|
WID==32 ? 7 :
|
|
WID==24 ? 6 : 4;
|
|
localparam FMSB = WID==128 ? 111 :
|
|
WID==96 ? 79 :
|
|
WID==80 ? 63 :
|
|
WID==64 ? 51 :
|
|
WID==52 ? 39 :
|
|
WID==48 ? 34 :
|
|
WID==44 ? 31 :
|
|
WID==42 ? 29 :
|
|
WID==40 ? 28 :
|
|
WID==32 ? 22 :
|
|
WID==24 ? 15 : 9;
|
|
|
|
localparam FX = (FMSB+2)*2-1; // the MSB of the expanded fraction
|
|
localparam EX = FX + 1 + EMSB + 1 + 1 - 1;
|
|
input clk;
|
input clk;
|
input ce;
|
input ce;
|
input op;
|
input op;
|
input [2:0] rm;
|
input [2:0] rm;
|
input [MSB:0] a, b, c;
|
input [MSB:0] a, b, c;
|