Line 8... |
Line 8... |
//// Description ////
|
//// Description ////
|
//// Wrapper for floating point unit. ////
|
//// Wrapper for floating point unit. ////
|
//// Interface based on MULT/MAC unit. ////
|
//// Interface based on MULT/MAC unit. ////
|
//// ////
|
//// ////
|
//// To Do: ////
|
//// To Do: ////
|
//// - remainder instruction implementation ////
|
//// - lf.rem.s and lf.madd.s instruction support ////
|
//// - registering in/around compare unit ////
|
//// - implement FP SPRs as needed ////
|
//// ////
|
//// ////
|
//// Author(s): ////
|
//// Author(s): ////
|
//// - Julius Baxter, julius@opencores.org ////
|
//// - Julius Baxter, julius@opencores.org ////
|
//// ////
|
//// ////
|
//////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////
|
//// ////
|
//// ////
|
//// Copyright (C) 2009 Authors and OPENCORES.ORG ////
|
//// Copyright (C) 2009,2010 Authors and OPENCORES.ORG ////
|
//// ////
|
//// ////
|
//// This source file may be used and distributed without ////
|
//// This source file may be used and distributed without ////
|
//// restriction provided that this copyright statement is not ////
|
//// restriction provided that this copyright statement is not ////
|
//// removed from the file and that any derivative work contains ////
|
//// removed from the file and that any derivative work contains ////
|
//// the original copyright notice and the associated disclaimer. ////
|
//// the original copyright notice and the associated disclaimer. ////
|
Line 51... |
Line 51... |
module or1200_fpu(
|
module or1200_fpu(
|
// Clock and reset
|
// Clock and reset
|
clk, rst,
|
clk, rst,
|
|
|
// FPU interface
|
// FPU interface
|
ex_freeze, a, b, fpu_op, result,
|
ex_freeze, a, b, fpu_op, result, done,
|
|
|
// Flag controls
|
// Flag controls
|
flagforw, flag_we,
|
flagforw, flag_we,
|
|
|
// Exception signal
|
// Exception signal
|
sig_fp, except_started,
|
sig_fp, except_started,
|
|
|
// SPR interface
|
// FPCSR system register
|
fpcsr_we, fpcsr,
|
fpcsr_we, fpcsr,
|
|
|
|
// SPR interface -- currently unused
|
spr_cs, spr_write, spr_addr, spr_dat_i, spr_dat_o
|
spr_cs, spr_write, spr_addr, spr_dat_i, spr_dat_o
|
);
|
);
|
|
|
parameter width = `OR1200_OPERAND_WIDTH;
|
parameter width = `OR1200_OPERAND_WIDTH;
|
|
|
Line 84... |
Line 86... |
input ex_freeze;
|
input ex_freeze;
|
input [width-1:0] a;
|
input [width-1:0] a;
|
input [width-1:0] b;
|
input [width-1:0] b;
|
input [`OR1200_FPUOP_WIDTH-1:0] fpu_op;
|
input [`OR1200_FPUOP_WIDTH-1:0] fpu_op;
|
output [width-1:0] result;
|
output [width-1:0] result;
|
|
output done;
|
|
|
//
|
//
|
// Flag signals
|
// Flag signals
|
//
|
//
|
output flagforw;
|
output flagforw;
|
output flag_we;
|
output flag_we;
|
|
|
|
|
//
|
//
|
// FPCSR interface
|
// FPCSR interface
|
//
|
//
|
input fpcsr_we;
|
input fpcsr_we;
|
output [`OR1200_FPCSR_WIDTH-1:0] fpcsr;
|
output [`OR1200_FPCSR_WIDTH-1:0] fpcsr;
|
Line 123... |
Line 125... |
assign result = 0;
|
assign result = 0;
|
assign flagforw = 0;
|
assign flagforw = 0;
|
assign flag_we = 0;
|
assign flag_we = 0;
|
assign sig_fp = 0;
|
assign sig_fp = 0;
|
assign spr_dat_o = 0;
|
assign spr_dat_o = 0;
|
|
assign fpcsr = 0;
|
|
assign done = 1;
|
`else
|
`else
|
|
|
|
|
//
|
//
|
// Internals
|
// Internals
|
//
|
//
|
reg [2:0] fpu_op_count;
|
wire fpu_op_is_arith, fpu_op_is_conv,
|
|
fpu_op_is_comp;
|
|
wire fpu_op_r_is_arith, fpu_op_r_is_conv,
|
|
fpu_op_r_is_comp;
|
|
wire fpu_arith_done, fpu_conv_done,
|
|
fpu_comp_done;
|
|
wire [width-1:0] result_arith, result_conv;
|
|
|
reg [`OR1200_FPUOP_WIDTH:0] fpu_op_r;
|
reg [`OR1200_FPUOP_WIDTH:0] fpu_op_r;
|
reg [`OR1200_FPCSR_WIDTH-1:0] fpcsr_r;
|
reg [`OR1200_FPCSR_WIDTH-1:0] fpcsr_r;
|
reg fpu_latch_operand;
|
wire fpu_op_valid;
|
|
reg fpu_op_valid_re;
|
wire fpu_check_op;
|
wire fpu_check_op;
|
wire fpu_latch_op;
|
wire inf, inv_inf_op_in,snan, snan_in,qnan,
|
wire inf, snan, qnan, ine, overflow,
|
ine, overflow, underflow, zero, dbz,
|
underflow, zero, div_by_zero;
|
dbz_in, mul_z_inf, nan_in;
|
wire fpu_op_is_comp, fpu_op_r_is_comp;
|
wire altb, blta, aeqb, inf_cmp, zero_cmp,
|
wire altb, blta, aeqb, cmp_inf, cmp_zero,
|
|
unordered ;
|
unordered ;
|
|
wire snan_conv, ine_conv, inv_conv,
|
|
zero_conv, underflow_conv,
|
|
overflow_conv;
|
|
wire inv_comp;
|
reg flag;
|
reg flag;
|
|
|
|
|
|
assign spr_dat_o = 0;
|
|
|
assign fpcsr = fpcsr_r;
|
assign fpcsr = fpcsr_r;
|
|
|
assign sig_fp = fpcsr_r[`OR1200_FPCSR_FPEE]
|
assign sig_fp = fpcsr_r[`OR1200_FPCSR_FPEE]
|
& (|fpcsr_r[`OR1200_FPCSR_WIDTH-1:`OR1200_FPCSR_OVF]);
|
& (|fpcsr_r[`OR1200_FPCSR_WIDTH-1:`OR1200_FPCSR_OVF]);
|
|
|
|
// Top bit indicates FPU instruction
|
|
assign fpu_op_valid = fpu_op[`OR1200_FPUOP_WIDTH-1];
|
|
|
|
assign fpu_check_op = !ex_freeze & fpu_op_valid;
|
|
|
// Generate signals to latch fpu_op from decode instruction, then latch
|
// Generate signals to latch fpu_op from decode instruction, then latch
|
// operands when they appear during execute stage
|
// operands when they appear during execute stage
|
|
|
assign fpu_check_op = (!ex_freeze & fpu_op[`OR1200_FPUOP_WIDTH-1]);
|
assign fpu_op_is_arith = !(|fpu_op[3:2]);
|
|
assign fpu_op_is_conv = fpu_op[2] & !fpu_op[3];
|
assign fpu_op_is_comp = fpu_op[3];
|
assign fpu_op_is_comp = fpu_op[3];
|
|
|
|
assign fpu_op_r_is_arith = !(|fpu_op_r[3:2]);
|
|
assign fpu_op_r_is_conv = fpu_op_r[2] & !fpu_op_r[3];
|
assign fpu_op_r_is_comp = fpu_op_r[3];
|
assign fpu_op_r_is_comp = fpu_op_r[3];
|
|
|
assign fpu_latch_op = fpu_check_op & !fpu_op_is_comp;
|
assign done = (fpu_op_r_is_arith & fpu_arith_done) |
|
|
(fpu_op_r_is_conv & fpu_conv_done) |
|
|
(fpu_op_r_is_comp & fpu_comp_done) ;
|
|
|
|
// Register fpu_op (remove FPU op valid bit [7], replace with 0)
|
always @(posedge clk)
|
always @(posedge clk)
|
fpu_latch_operand <= fpu_check_op & !fpu_op_is_comp;
|
if (fpu_check_op)
|
|
fpu_op_r <= {1'b0,fpu_op[`OR1200_FPUOP_WIDTH-2:0]};
|
|
|
// Register fpu_op on comparisons, clear otherwise, remove top bit
|
// Indicate new FPU op
|
always @(posedge clk)
|
always @(posedge clk or posedge rst)
|
fpu_op_r <= (fpu_check_op & fpu_op_is_comp) ?
|
|
{1'b0,fpu_op[`OR1200_FPUOP_WIDTH-2:0]} : !ex_freeze ?
|
|
0 : fpu_op_r;
|
|
|
|
//
|
|
// Counter for each FPU operation
|
|
// Loaded at start, counts down
|
|
//
|
|
always @(posedge clk or posedge rst) begin
|
|
if (rst)
|
if (rst)
|
fpu_op_count <= 0;
|
fpu_op_valid_re <= 0;
|
else
|
else if (fpu_op_valid_re)
|
if (|fpu_op_count)
|
fpu_op_valid_re <= 0;
|
fpu_op_count <= fpu_op_count - 1;
|
|
else if(fpu_check_op)
|
else if(fpu_check_op)
|
fpu_op_count <= 5;
|
fpu_op_valid_re <= 1;
|
end
|
|
|
|
//
|
//
|
// FPCSR register
|
// FPCSR system group register implementation
|
//
|
//
|
always @(posedge clk or posedge rst) begin
|
always @(posedge clk or posedge rst) begin
|
if (rst)
|
if (rst)
|
fpcsr_r <= 0;
|
fpcsr_r <= 0;
|
else
|
else
|
begin
|
begin
|
if (fpcsr_we)
|
if (fpcsr_we)
|
fpcsr_r <= b[`OR1200_FPCSR_WIDTH-1:0];
|
fpcsr_r <= b[`OR1200_FPCSR_WIDTH-1:0];
|
else if (fpu_op_count == 1)
|
else if (done)
|
begin
|
begin
|
fpcsr_r[`OR1200_FPCSR_OVF] <= overflow;
|
fpcsr_r[`OR1200_FPCSR_OVF] <= (overflow & fpu_op_r_is_arith);
|
fpcsr_r[`OR1200_FPCSR_UNF] <= underflow;
|
fpcsr_r[`OR1200_FPCSR_UNF] <= (underflow & fpu_op_r_is_arith) |
|
fpcsr_r[`OR1200_FPCSR_SNF] <= snan;
|
(underflow_conv & fpu_op_r_is_conv);
|
fpcsr_r[`OR1200_FPCSR_QNF] <= qnan;
|
fpcsr_r[`OR1200_FPCSR_SNF] <= (snan & fpu_op_r_is_arith)|
|
fpcsr_r[`OR1200_FPCSR_ZF] <= zero |
|
(snan_conv & fpu_op_r_is_conv);
|
(cmp_zero & fpu_op_r_is_comp);
|
fpcsr_r[`OR1200_FPCSR_QNF] <= (qnan & fpu_op_r_is_arith);
|
fpcsr_r[`OR1200_FPCSR_IXF] <= ine;
|
fpcsr_r[`OR1200_FPCSR_ZF] <= (zero & fpu_op_r_is_arith) |
|
fpcsr_r[`OR1200_FPCSR_IVF] <= 0; // Not used by this FPU
|
(zero_cmp & fpu_op_r_is_comp) |
|
fpcsr_r[`OR1200_FPCSR_INF] <= inf |
|
(zero_conv & fpu_op_r_is_conv);
|
(cmp_inf & fpu_op_r_is_comp);
|
fpcsr_r[`OR1200_FPCSR_IXF] <= (ine & fpu_op_r_is_arith) |
|
fpcsr_r[`OR1200_FPCSR_DZF] <= div_by_zero;
|
(ine_conv & fpu_op_r_is_conv);
|
end // if (fpu_op_count == 1)
|
fpcsr_r[`OR1200_FPCSR_IVF] <=
|
|
((snan_in | dbz_in | inv_inf_op_in | mul_z_inf) &
|
|
fpu_op_r_is_arith) |
|
|
((inv_conv | snan_conv) & fpu_op_r_is_conv) |
|
|
(inv_comp & fpu_op_r_is_comp);
|
|
fpcsr_r[`OR1200_FPCSR_INF] <= (inf & fpu_op_r_is_arith) |
|
|
(inf_cmp & fpu_op_r_is_comp);
|
|
fpcsr_r[`OR1200_FPCSR_DZF] <= (dbz & fpu_op_r_is_arith);
|
|
end // if (fpu_arith_done | fpu_conv_done)
|
if (except_started)
|
if (except_started)
|
fpcsr_r[`OR1200_FPCSR_FPEE] <= 0;
|
fpcsr_r[`OR1200_FPCSR_FPEE] <= 0;
|
end // else: !if(rst)
|
end // else: !if(rst)
|
end // always @ (posedge clk or posedge rst)
|
end // always @ (posedge clk or posedge rst)
|
|
|
//
|
//
|
// Comparison flag generation
|
// Comparison flag generation
|
//
|
//
|
always@(posedge clk)
|
always @*
|
begin
|
begin
|
if (fpu_op_r_is_comp)
|
// Get rid of top bit - is FPU op valid bit
|
begin
|
case({1'b0,fpu_op_r[`OR1200_FPUOP_WIDTH-2:0]})
|
case(fpu_op_r)
|
|
`OR1200_FPCOP_SFEQ: begin
|
`OR1200_FPCOP_SFEQ: begin
|
flag <= aeqb;
|
flag = aeqb;
|
end
|
end
|
`OR1200_FPCOP_SFNE: begin
|
`OR1200_FPCOP_SFNE: begin
|
flag <= !aeqb;
|
flag = !aeqb;
|
end
|
end
|
`OR1200_FPCOP_SFGT: begin
|
`OR1200_FPCOP_SFGT: begin
|
flag <= blta & !aeqb;
|
flag = blta & !aeqb;
|
end
|
end
|
`OR1200_FPCOP_SFGE: begin
|
`OR1200_FPCOP_SFGE: begin
|
flag <= blta | aeqb;
|
flag = blta | aeqb;
|
end
|
end
|
`OR1200_FPCOP_SFLT: begin
|
`OR1200_FPCOP_SFLT: begin
|
flag <= altb & !aeqb;
|
flag = altb & !aeqb;
|
end
|
end
|
`OR1200_FPCOP_SFLE: begin
|
`OR1200_FPCOP_SFLE: begin
|
flag <= altb | aeqb;
|
flag = altb | aeqb;
|
end
|
end
|
default: begin
|
default: begin
|
flag <= 0;
|
flag = 0;
|
end
|
end
|
endcase // case (fpu_op_r)
|
endcase // case (fpu_op_r)
|
end // if (fpu_op_r_is_comp)
|
|
else
|
|
flag <= 0;
|
|
end // always@ (posedge clk)
|
end // always@ (posedge clk)
|
|
|
assign flagforw = flag;
|
assign flagforw = flag;
|
|
|
// Determine here where we do the write, ie how much we pipeline the
|
// Determine here where we do the write, ie how much we pipeline the
|
// comparison
|
// comparison
|
assign flag_we = fpu_op_r_is_comp & (fpu_op_count == 2);
|
assign flag_we = fpu_op_r_is_comp & fpu_comp_done;
|
|
|
|
// MUX for outputs from arith and conversion modules
|
|
assign result = fpu_op_r_is_conv ? result_conv : result_arith;
|
|
|
|
//
|
|
// Instantiate FPU modules
|
|
//
|
|
|
|
`ifdef OR1200_FPU_ARITH_FPU100
|
|
|
|
// FPU 100 VHDL core from OpenCores.org: http://opencores.org/project,fpu100
|
|
// Used only for add,sub,mul,div
|
|
or1200_fpu_arith fpu_arith
|
|
(
|
|
.clk_i(clk),
|
|
.opa_i(a),
|
|
.opb_i(b),
|
|
.fpu_op_i({1'b0,fpu_op_r[1:0]}), // Only bottom 2 bits
|
|
.rmode_i(fpcsr_r[`OR1200_FPCSR_RM]),
|
|
.output_o(result_arith),
|
|
.start_i(fpu_op_valid_re & fpu_op_r_is_arith),
|
|
.ready_o(fpu_arith_done),
|
|
.ine_o(ine),
|
|
.overflow_o(overflow),
|
|
.underflow_o(underflow),
|
|
.div_zero_o(dbz),
|
|
.inf_o(inf),
|
|
.zero_o(zero),
|
|
.qnan_o(qnan),
|
|
.snan_o(snan)
|
|
);
|
|
|
// FP arithmetic module
|
`endif // `ifdef OR1200_FPU_ARITH_FPU100
|
fpu fpu0
|
|
|
// Logic for detection of signaling NaN on input
|
|
// signaling NaN: exponent is 8hff, [22] is zero, rest of fract is non-zero
|
|
// quiet NaN: exponent is 8hff, [22] is 1
|
|
reg a_is_snan, b_is_snan;
|
|
reg a_is_qnan, b_is_qnan;
|
|
|
|
always @(posedge clk)
|
|
begin
|
|
a_is_snan <= (a[30:23]==8'hff) & !a[22] & (|a[21:0]);
|
|
b_is_snan <= (b[30:23]==8'hff) & !b[22] & (|b[21:0]);
|
|
a_is_qnan <= (a[30:23]==8'hff) & a[22];
|
|
b_is_qnan <= (b[30:23]==8'hff) & b[22];
|
|
end
|
|
// Signal to indicate there was a signaling NaN on input
|
|
assign snan_in = a_is_snan | b_is_snan;
|
|
|
|
// Check for, add with opposite signed infinities, or subtract with
|
|
// same signed infinities.
|
|
reg a_is_inf, b_is_inf, a_b_sign_xor;
|
|
|
|
always @(posedge clk)
|
|
begin
|
|
a_is_inf <= (a[30:23]==8'hff) & !(|a[22:0]);
|
|
b_is_inf <= (b[30:23]==8'hff) & !(|a[22:0]);
|
|
a_b_sign_xor <= a[31] ^ b[31];
|
|
end
|
|
|
|
assign inv_inf_op_in = (a_is_inf & b_is_inf) &
|
|
((a_b_sign_xor &
|
|
({1'b0,fpu_op_r[`OR1200_FPUOP_WIDTH-2:0]} ==
|
|
`OR1200_FPUOP_ADD)) |
|
|
(!a_b_sign_xor &
|
|
({1'b0,fpu_op_r[`OR1200_FPUOP_WIDTH-2:0]} ==
|
|
`OR1200_FPUOP_SUB))) ;
|
|
|
|
// Check if it's 0.0/0.0 to generate invalid signal (ignore sign bit)
|
|
reg a_is_zero, b_is_zero;
|
|
|
|
always @(posedge clk)
|
|
begin
|
|
a_is_zero <= !(|a[30:0]);
|
|
b_is_zero <= !(|b[30:0]);
|
|
end
|
|
assign dbz_in = ({1'b0,fpu_op_r[`OR1200_FPUOP_WIDTH-2:0]} ==
|
|
`OR1200_FPUOP_DIV) & (a_is_zero & b_is_zero);
|
|
|
|
|
|
assign mul_z_inf = ({1'b0,fpu_op_r[`OR1200_FPUOP_WIDTH-2:0]} ==
|
|
`OR1200_FPUOP_MUL) &
|
|
((a_is_zero & b_is_inf) | (b_is_zero & a_is_inf));
|
|
|
|
assign nan_in = (a_is_snan | b_is_snan | a_is_qnan | b_is_qnan);
|
|
|
|
// 32-bit integer <-> single precision floating point conversion unit
|
|
or1200_fpu_intfloat_conv fpu_intfloat_conv
|
(
|
(
|
.clk(clk),
|
.clk(clk),
|
.rmode(fpcsr_r[`OR1200_FPCSR_RM]),
|
.rmode(fpcsr_r[`OR1200_FPCSR_RM]),
|
.fpu_op(fpu_op[2:0]),
|
.fpu_op(fpu_op_r[2:0]),
|
.opa(a),
|
.opa(a),
|
.opb(b),
|
.out(result_conv),
|
.out(result),
|
.snan(snan_conv),
|
.latch_operand(fpu_latch_operand),
|
.ine(ine_conv),
|
.latch_op(fpu_latch_op),
|
.inv(inv_conv),
|
.inf(inf),
|
.overflow(overflow_conv),
|
.snan(snan),
|
.underflow(underflow_conv),
|
.qnan(qnan),
|
.zero(zero_conv)
|
.ine(ine),
|
|
.overflow(overflow),
|
|
.underflow(underflow),
|
|
.zero(zero),
|
|
.div_by_zero(div_by_zero)
|
|
);
|
);
|
|
|
// FP comparator
|
// 5-long shift reg for conversion ready counter
|
fcmp fcmp0
|
reg [6:0] fpu_conv_shr;
|
|
always @(posedge clk)
|
|
fpu_conv_shr <= {fpu_conv_shr[5:0],fpu_check_op & fpu_op_is_conv};
|
|
assign fpu_conv_done = fpu_conv_shr[6];
|
|
|
|
// Single precision floating point number comparison module
|
|
or1200_fpu_fcmp fpu_fcmp
|
(
|
(
|
.opa(a),
|
.opa(a),
|
.opb(b),
|
.opb(b),
|
.unordered(unordered),
|
.unordered(unordered),
|
// I am convinced the comparison logic is wrong way around in this
|
// I am convinced the comparison logic is wrong way around in this
|
// module, simplest to swap them on output -- julius
|
// module, simplest to swap them on output -- julius
|
|
|
.altb(blta),
|
.altb(blta),
|
.blta(altb),
|
.blta(altb),
|
.aeqb(aeqb),
|
.aeqb(aeqb),
|
.inf(cmp_inf),
|
.inf(inf_cmp),
|
.zero(cmp_zero));
|
.zero(zero_cmp));
|
|
|
|
reg fpu_op_valid_re_r;
|
|
always @(posedge clk)
|
|
fpu_op_valid_re_r <= fpu_op_valid_re;
|
|
|
|
assign fpu_comp_done = fpu_op_valid_re_r & fpu_op_r_is_comp;
|
|
|
|
// Comparison invalid when sNaN in on an equal comparison, or any NaN
|
|
// for any other comparison.
|
|
assign inv_comp = (snan_in & ({1'b0,fpu_op_r[`OR1200_FPUOP_WIDTH-2:0]}
|
|
== `OR1200_FPCOP_SFEQ)) |
|
|
(nan_in & ({1'b0,fpu_op_r[`OR1200_FPUOP_WIDTH-2:0]}
|
|
!= `OR1200_FPCOP_SFEQ));
|
|
|
`endif // !`ifndef OR1200_FPU_IMPLEMENTED
|
`endif // !`ifndef OR1200_FPU_IMPLEMENTED
|
|
|
endmodule // or1200_fpu
|
endmodule // or1200_fpu
|
|
|
No newline at end of file
|
No newline at end of file
|