Line 8... |
Line 8... |
//// Description ////
//// Description ////
//// Wrapper for floating point unit. ////
//// Wrapper for floating point unit. ////
//// Interface based on MULT/MAC unit. ////
//// Interface based on MULT/MAC unit. ////
//// ////
//// ////
//// To Do: ////
//// To Do: ////
//// - remainder instruction implementation ////
//// - lf.rem.s and lf.madd.s instruction support ////
//// - registering in/around compare unit ////
//// - implement FP SPRs as needed ////
//// ////
//// ////
//// Author(s): ////
//// Author(s): ////
//// - Julius Baxter, ////
//// - Julius Baxter, ////
//// ////
//// ////
//// ////
//// ////
//// Copyright (C) 2009 Authors and OPENCORES.ORG ////
//// Copyright (C) 2009,2010 Authors and OPENCORES.ORG ////
//// ////
//// ////
//// This source file may be used and distributed without ////
//// This source file may be used and distributed without ////
//// restriction provided that this copyright statement is not ////
//// restriction provided that this copyright statement is not ////
//// removed from the file and that any derivative work contains ////
//// removed from the file and that any derivative work contains ////
//// the original copyright notice and the associated disclaimer. ////
//// the original copyright notice and the associated disclaimer. ////
Line 51... |
Line 51... |
module or1200_fpu(
module or1200_fpu(
// Clock and reset
// Clock and reset
clk, rst,
clk, rst,
// FPU interface
// FPU interface
ex_freeze, a, b, fpu_op, result,
ex_freeze, a, b, fpu_op, result, done,
// Flag controls
// Flag controls
flagforw, flag_we,
flagforw, flag_we,
// Exception signal
// Exception signal
sig_fp, except_started,
sig_fp, except_started,
// SPR interface
// FPCSR system register
fpcsr_we, fpcsr,
fpcsr_we, fpcsr,
// SPR interface -- currently unused
spr_cs, spr_write, spr_addr, spr_dat_i, spr_dat_o
spr_cs, spr_write, spr_addr, spr_dat_i, spr_dat_o
parameter width = `OR1200_OPERAND_WIDTH;
parameter width = `OR1200_OPERAND_WIDTH;
Line 84... |
Line 86... |
input ex_freeze;
input ex_freeze;
input [width-1:0] a;
input [width-1:0] a;
input [width-1:0] b;
input [width-1:0] b;
input [`OR1200_FPUOP_WIDTH-1:0] fpu_op;
input [`OR1200_FPUOP_WIDTH-1:0] fpu_op;
output [width-1:0] result;
output [width-1:0] result;
output done;
// Flag signals
// Flag signals
output flagforw;
output flagforw;
output flag_we;
output flag_we;
// FPCSR interface
// FPCSR interface
input fpcsr_we;
input fpcsr_we;
output [`OR1200_FPCSR_WIDTH-1:0] fpcsr;
output [`OR1200_FPCSR_WIDTH-1:0] fpcsr;
Line 123... |
Line 125... |
assign result = 0;
assign result = 0;
assign flagforw = 0;
assign flagforw = 0;
assign flag_we = 0;
assign flag_we = 0;
assign sig_fp = 0;
assign sig_fp = 0;
assign spr_dat_o = 0;
assign spr_dat_o = 0;
assign fpcsr = 0;
assign done = 1;
// Internals
// Internals
reg [2:0] fpu_op_count;
wire fpu_op_is_arith, fpu_op_is_conv,
wire fpu_op_r_is_arith, fpu_op_r_is_conv,
wire fpu_arith_done, fpu_conv_done,
wire [width-1:0] result_arith, result_conv;
reg [`OR1200_FPUOP_WIDTH:0] fpu_op_r;
reg [`OR1200_FPUOP_WIDTH:0] fpu_op_r;
reg [`OR1200_FPCSR_WIDTH-1:0] fpcsr_r;
reg [`OR1200_FPCSR_WIDTH-1:0] fpcsr_r;
reg fpu_latch_operand;
wire fpu_op_valid;
reg fpu_op_valid_re;
wire fpu_check_op;
wire fpu_check_op;
wire fpu_latch_op;
wire inf, inv_inf_op_in,snan, snan_in,qnan,
wire inf, snan, qnan, ine, overflow,
ine, overflow, underflow, zero, dbz,
underflow, zero, div_by_zero;
dbz_in, mul_z_inf, nan_in;
wire fpu_op_is_comp, fpu_op_r_is_comp;
wire altb, blta, aeqb, inf_cmp, zero_cmp,
wire altb, blta, aeqb, cmp_inf, cmp_zero,
unordered ;
unordered ;
wire snan_conv, ine_conv, inv_conv,
zero_conv, underflow_conv,
wire inv_comp;
reg flag;
reg flag;
assign spr_dat_o = 0;
assign fpcsr = fpcsr_r;
assign fpcsr = fpcsr_r;
assign sig_fp = fpcsr_r[`OR1200_FPCSR_FPEE]
assign sig_fp = fpcsr_r[`OR1200_FPCSR_FPEE]
& (|fpcsr_r[`OR1200_FPCSR_WIDTH-1:`OR1200_FPCSR_OVF]);
& (|fpcsr_r[`OR1200_FPCSR_WIDTH-1:`OR1200_FPCSR_OVF]);
// Top bit indicates FPU instruction
assign fpu_op_valid = fpu_op[`OR1200_FPUOP_WIDTH-1];
assign fpu_check_op = !ex_freeze & fpu_op_valid;
// Generate signals to latch fpu_op from decode instruction, then latch
// Generate signals to latch fpu_op from decode instruction, then latch
// operands when they appear during execute stage
// operands when they appear during execute stage
assign fpu_check_op = (!ex_freeze & fpu_op[`OR1200_FPUOP_WIDTH-1]);
assign fpu_op_is_arith = !(|fpu_op[3:2]);
assign fpu_op_is_conv = fpu_op[2] & !fpu_op[3];
assign fpu_op_is_comp = fpu_op[3];
assign fpu_op_is_comp = fpu_op[3];
assign fpu_op_r_is_arith = !(|fpu_op_r[3:2]);
assign fpu_op_r_is_conv = fpu_op_r[2] & !fpu_op_r[3];
assign fpu_op_r_is_comp = fpu_op_r[3];
assign fpu_op_r_is_comp = fpu_op_r[3];
assign fpu_latch_op = fpu_check_op & !fpu_op_is_comp;
assign done = (fpu_op_r_is_arith & fpu_arith_done) |
(fpu_op_r_is_conv & fpu_conv_done) |
(fpu_op_r_is_comp & fpu_comp_done) ;
// Register fpu_op (remove FPU op valid bit [7], replace with 0)
always @(posedge clk)
always @(posedge clk)
fpu_latch_operand <= fpu_check_op & !fpu_op_is_comp;
if (fpu_check_op)
fpu_op_r <= {1'b0,fpu_op[`OR1200_FPUOP_WIDTH-2:0]};
// Register fpu_op on comparisons, clear otherwise, remove top bit
// Indicate new FPU op
always @(posedge clk)
always @(posedge clk or posedge rst)
fpu_op_r <= (fpu_check_op & fpu_op_is_comp) ?
{1'b0,fpu_op[`OR1200_FPUOP_WIDTH-2:0]} : !ex_freeze ?
0 : fpu_op_r;
// Counter for each FPU operation
// Loaded at start, counts down
always @(posedge clk or posedge rst) begin
if (rst)
if (rst)
fpu_op_count <= 0;
fpu_op_valid_re <= 0;
else if (fpu_op_valid_re)
if (|fpu_op_count)
fpu_op_valid_re <= 0;
fpu_op_count <= fpu_op_count - 1;
else if(fpu_check_op)
else if(fpu_check_op)
fpu_op_count <= 5;
fpu_op_valid_re <= 1;
// FPCSR register
// FPCSR system group register implementation
always @(posedge clk or posedge rst) begin
always @(posedge clk or posedge rst) begin
if (rst)
if (rst)
fpcsr_r <= 0;
fpcsr_r <= 0;
if (fpcsr_we)
if (fpcsr_we)
fpcsr_r <= b[`OR1200_FPCSR_WIDTH-1:0];
fpcsr_r <= b[`OR1200_FPCSR_WIDTH-1:0];
else if (fpu_op_count == 1)
else if (done)
fpcsr_r[`OR1200_FPCSR_OVF] <= overflow;
fpcsr_r[`OR1200_FPCSR_OVF] <= (overflow & fpu_op_r_is_arith);
fpcsr_r[`OR1200_FPCSR_UNF] <= underflow;
fpcsr_r[`OR1200_FPCSR_UNF] <= (underflow & fpu_op_r_is_arith) |
fpcsr_r[`OR1200_FPCSR_SNF] <= snan;
(underflow_conv & fpu_op_r_is_conv);
fpcsr_r[`OR1200_FPCSR_QNF] <= qnan;
fpcsr_r[`OR1200_FPCSR_SNF] <= (snan & fpu_op_r_is_arith)|
fpcsr_r[`OR1200_FPCSR_ZF] <= zero |
(snan_conv & fpu_op_r_is_conv);
(cmp_zero & fpu_op_r_is_comp);
fpcsr_r[`OR1200_FPCSR_QNF] <= (qnan & fpu_op_r_is_arith);
fpcsr_r[`OR1200_FPCSR_IXF] <= ine;
fpcsr_r[`OR1200_FPCSR_ZF] <= (zero & fpu_op_r_is_arith) |
fpcsr_r[`OR1200_FPCSR_IVF] <= 0; // Not used by this FPU
(zero_cmp & fpu_op_r_is_comp) |
fpcsr_r[`OR1200_FPCSR_INF] <= inf |
(zero_conv & fpu_op_r_is_conv);
(cmp_inf & fpu_op_r_is_comp);
fpcsr_r[`OR1200_FPCSR_IXF] <= (ine & fpu_op_r_is_arith) |
fpcsr_r[`OR1200_FPCSR_DZF] <= div_by_zero;
(ine_conv & fpu_op_r_is_conv);
end // if (fpu_op_count == 1)
fpcsr_r[`OR1200_FPCSR_IVF] <=
((snan_in | dbz_in | inv_inf_op_in | mul_z_inf) &
fpu_op_r_is_arith) |
((inv_conv | snan_conv) & fpu_op_r_is_conv) |
(inv_comp & fpu_op_r_is_comp);
fpcsr_r[`OR1200_FPCSR_INF] <= (inf & fpu_op_r_is_arith) |
(inf_cmp & fpu_op_r_is_comp);
fpcsr_r[`OR1200_FPCSR_DZF] <= (dbz & fpu_op_r_is_arith);
end // if (fpu_arith_done | fpu_conv_done)
if (except_started)
if (except_started)
fpcsr_r[`OR1200_FPCSR_FPEE] <= 0;
fpcsr_r[`OR1200_FPCSR_FPEE] <= 0;
end // else: !if(rst)
end // else: !if(rst)
end // always @ (posedge clk or posedge rst)
end // always @ (posedge clk or posedge rst)
// Comparison flag generation
// Comparison flag generation
always@(posedge clk)
always @*
if (fpu_op_r_is_comp)
// Get rid of top bit - is FPU op valid bit
`OR1200_FPCOP_SFEQ: begin
`OR1200_FPCOP_SFEQ: begin
flag <= aeqb;
flag = aeqb;
`OR1200_FPCOP_SFNE: begin
`OR1200_FPCOP_SFNE: begin
flag <= !aeqb;
flag = !aeqb;
`OR1200_FPCOP_SFGT: begin
`OR1200_FPCOP_SFGT: begin
flag <= blta & !aeqb;
flag = blta & !aeqb;
`OR1200_FPCOP_SFGE: begin
`OR1200_FPCOP_SFGE: begin
flag <= blta | aeqb;
flag = blta | aeqb;
`OR1200_FPCOP_SFLT: begin
`OR1200_FPCOP_SFLT: begin
flag <= altb & !aeqb;
flag = altb & !aeqb;
`OR1200_FPCOP_SFLE: begin
`OR1200_FPCOP_SFLE: begin
flag <= altb | aeqb;
flag = altb | aeqb;
default: begin
default: begin
flag <= 0;
flag = 0;
endcase // case (fpu_op_r)
endcase // case (fpu_op_r)
end // if (fpu_op_r_is_comp)
flag <= 0;
end // always@ (posedge clk)
end // always@ (posedge clk)
assign flagforw = flag;
assign flagforw = flag;
// Determine here where we do the write, ie how much we pipeline the
// Determine here where we do the write, ie how much we pipeline the
// comparison
// comparison
assign flag_we = fpu_op_r_is_comp & (fpu_op_count == 2);
assign flag_we = fpu_op_r_is_comp & fpu_comp_done;
// MUX for outputs from arith and conversion modules
assign result = fpu_op_r_is_conv ? result_conv : result_arith;
// Instantiate FPU modules
`ifdef OR1200_FPU_ARITH_FPU100
// FPU 100 VHDL core from,fpu100
// Used only for add,sub,mul,div
or1200_fpu_arith fpu_arith
.fpu_op_i({1'b0,fpu_op_r[1:0]}), // Only bottom 2 bits
.start_i(fpu_op_valid_re & fpu_op_r_is_arith),
// FP arithmetic module
`endif // `ifdef OR1200_FPU_ARITH_FPU100
fpu fpu0
// Logic for detection of signaling NaN on input
// signaling NaN: exponent is 8hff, [22] is zero, rest of fract is non-zero
// quiet NaN: exponent is 8hff, [22] is 1
reg a_is_snan, b_is_snan;
reg a_is_qnan, b_is_qnan;
always @(posedge clk)
a_is_snan <= (a[30:23]==8'hff) & !a[22] & (|a[21:0]);
b_is_snan <= (b[30:23]==8'hff) & !b[22] & (|b[21:0]);
a_is_qnan <= (a[30:23]==8'hff) & a[22];
b_is_qnan <= (b[30:23]==8'hff) & b[22];
// Signal to indicate there was a signaling NaN on input
assign snan_in = a_is_snan | b_is_snan;
// Check for, add with opposite signed infinities, or subtract with
// same signed infinities.
reg a_is_inf, b_is_inf, a_b_sign_xor;
always @(posedge clk)
a_is_inf <= (a[30:23]==8'hff) & !(|a[22:0]);
b_is_inf <= (b[30:23]==8'hff) & !(|a[22:0]);
a_b_sign_xor <= a[31] ^ b[31];
assign inv_inf_op_in = (a_is_inf & b_is_inf) &
((a_b_sign_xor &
({1'b0,fpu_op_r[`OR1200_FPUOP_WIDTH-2:0]} ==
`OR1200_FPUOP_ADD)) |
(!a_b_sign_xor &
({1'b0,fpu_op_r[`OR1200_FPUOP_WIDTH-2:0]} ==
`OR1200_FPUOP_SUB))) ;
// Check if it's 0.0/0.0 to generate invalid signal (ignore sign bit)
reg a_is_zero, b_is_zero;
always @(posedge clk)
a_is_zero <= !(|a[30:0]);
b_is_zero <= !(|b[30:0]);
assign dbz_in = ({1'b0,fpu_op_r[`OR1200_FPUOP_WIDTH-2:0]} ==
`OR1200_FPUOP_DIV) & (a_is_zero & b_is_zero);
assign mul_z_inf = ({1'b0,fpu_op_r[`OR1200_FPUOP_WIDTH-2:0]} ==
`OR1200_FPUOP_MUL) &
((a_is_zero & b_is_inf) | (b_is_zero & a_is_inf));
assign nan_in = (a_is_snan | b_is_snan | a_is_qnan | b_is_qnan);
// 32-bit integer <-> single precision floating point conversion unit
or1200_fpu_intfloat_conv fpu_intfloat_conv
// FP comparator
// 5-long shift reg for conversion ready counter
fcmp fcmp0
reg [6:0] fpu_conv_shr;
always @(posedge clk)
fpu_conv_shr <= {fpu_conv_shr[5:0],fpu_check_op & fpu_op_is_conv};
assign fpu_conv_done = fpu_conv_shr[6];
// Single precision floating point number comparison module
or1200_fpu_fcmp fpu_fcmp
// I am convinced the comparison logic is wrong way around in this
// I am convinced the comparison logic is wrong way around in this
// module, simplest to swap them on output -- julius
// module, simplest to swap them on output -- julius
reg fpu_op_valid_re_r;
always @(posedge clk)
fpu_op_valid_re_r <= fpu_op_valid_re;
assign fpu_comp_done = fpu_op_valid_re_r & fpu_op_r_is_comp;
// Comparison invalid when sNaN in on an equal comparison, or any NaN
// for any other comparison.
assign inv_comp = (snan_in & ({1'b0,fpu_op_r[`OR1200_FPUOP_WIDTH-2:0]}
== `OR1200_FPCOP_SFEQ)) |
(nan_in & ({1'b0,fpu_op_r[`OR1200_FPUOP_WIDTH-2:0]}
!= `OR1200_FPCOP_SFEQ));
`endif // !`ifndef OR1200_FPU_IMPLEMENTED
`endif // !`ifndef OR1200_FPU_IMPLEMENTED
endmodule // or1200_fpu
endmodule // or1200_fpu
No newline at end of file
No newline at end of file