//////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////
|
//// ////
|
//// ////
|
//// OR1200's FPU Wrapper ////
|
//// OR1200's FPU Wrapper ////
|
//// ////
|
//// ////
|
//// This file is part of the OpenRISC 1200 project ////
|
//// This file is part of the OpenRISC 1200 project ////
|
//// http://opencores.org/project,or1k ////
|
//// http://opencores.org/project,or1k ////
|
//// ////
|
//// ////
|
//// Description ////
|
//// Description ////
|
//// Wrapper for floating point unit. ////
|
//// Wrapper for floating point unit. ////
|
//// Interface based on MULT/MAC unit. ////
|
//// Interface based on MULT/MAC unit. ////
|
//// ////
|
//// ////
|
//// To Do: ////
|
//// To Do: ////
|
//// - remainder instruction implementation ////
|
//// - remainder instruction implementation ////
|
//// - registering in/around compare unit ////
|
//// - registering in/around compare unit ////
|
//// ////
|
//// ////
|
//// Author(s): ////
|
//// Author(s): ////
|
//// - Julius Baxter, julius@opencores.org ////
|
//// - Julius Baxter, julius@opencores.org ////
|
//// ////
|
//// ////
|
//////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////
|
//// ////
|
//// ////
|
//// Copyright (C) 2009 Authors and OPENCORES.ORG ////
|
//// Copyright (C) 2009 Authors and OPENCORES.ORG ////
|
//// ////
|
//// ////
|
//// This source file may be used and distributed without ////
|
//// This source file may be used and distributed without ////
|
//// restriction provided that this copyright statement is not ////
|
//// restriction provided that this copyright statement is not ////
|
//// removed from the file and that any derivative work contains ////
|
//// removed from the file and that any derivative work contains ////
|
//// the original copyright notice and the associated disclaimer. ////
|
//// the original copyright notice and the associated disclaimer. ////
|
//// ////
|
//// ////
|
//// This source file is free software; you can redistribute it ////
|
//// This source file is free software; you can redistribute it ////
|
//// and/or modify it under the terms of the GNU Lesser General ////
|
//// and/or modify it under the terms of the GNU Lesser General ////
|
//// Public License as published by the Free Software Foundation; ////
|
//// Public License as published by the Free Software Foundation; ////
|
//// either version 2.1 of the License, or (at your option) any ////
|
//// either version 2.1 of the License, or (at your option) any ////
|
//// later version. ////
|
//// later version. ////
|
//// ////
|
//// ////
|
//// This source is distributed in the hope that it will be ////
|
//// This source is distributed in the hope that it will be ////
|
//// useful, but WITHOUT ANY WARRANTY; without even the implied ////
|
//// useful, but WITHOUT ANY WARRANTY; without even the implied ////
|
//// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ////
|
//// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ////
|
//// PURPOSE. See the GNU Lesser General Public License for more ////
|
//// PURPOSE. See the GNU Lesser General Public License for more ////
|
//// details. ////
|
//// details. ////
|
//// ////
|
//// ////
|
//// You should have received a copy of the GNU Lesser General ////
|
//// You should have received a copy of the GNU Lesser General ////
|
//// Public License along with this source; if not, download it ////
|
//// Public License along with this source; if not, download it ////
|
//// from http://www.opencores.org/lgpl.shtml ////
|
//// from http://www.opencores.org/lgpl.shtml ////
|
//// ////
|
//// ////
|
//////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////
|
|
|
// synopsys translate_off
|
// synopsys translate_off
|
`include "timescale.v"
|
`include "timescale.v"
|
// synopsys translate_on
|
// synopsys translate_on
|
`include "or1200_defines.v"
|
`include "or1200_defines.v"
|
|
|
module or1200_fpu(
|
module or1200_fpu(
|
// Clock and reset
|
// Clock and reset
|
clk, rst,
|
clk, rst,
|
|
|
// FPU interface
|
// FPU interface
|
ex_freeze, a, b, fpu_op, result,
|
ex_freeze, a, b, fpu_op, result,
|
|
|
// Flag controls
|
// Flag controls
|
flagforw, flag_we,
|
flagforw, flag_we,
|
|
|
// Exception signal
|
// Exception signal
|
sig_fp, except_started,
|
sig_fp, except_started,
|
|
|
// SPR interface
|
// SPR interface
|
fpcsr_we, fpcsr,
|
fpcsr_we, fpcsr,
|
spr_cs, spr_write, spr_addr, spr_dat_i, spr_dat_o
|
spr_cs, spr_write, spr_addr, spr_dat_i, spr_dat_o
|
);
|
);
|
|
|
parameter width = `OR1200_OPERAND_WIDTH;
|
parameter width = `OR1200_OPERAND_WIDTH;
|
|
|
//
|
//
|
// I/O
|
// I/O
|
//
|
//
|
|
|
//
|
//
|
// Clock and reset
|
// Clock and reset
|
//
|
//
|
input clk;
|
input clk;
|
input rst;
|
input rst;
|
|
|
//
|
//
|
// FPU interface
|
// FPU interface
|
//
|
//
|
input ex_freeze;
|
input ex_freeze;
|
input [width-1:0] a;
|
input [width-1:0] a;
|
input [width-1:0] b;
|
input [width-1:0] b;
|
input [`OR1200_FPUOP_WIDTH-1:0] fpu_op;
|
input [`OR1200_FPUOP_WIDTH-1:0] fpu_op;
|
output [width-1:0] result;
|
output [width-1:0] result;
|
|
|
//
|
//
|
// Flag signals
|
// Flag signals
|
//
|
//
|
output flagforw;
|
output flagforw;
|
output flag_we;
|
output flag_we;
|
|
|
|
|
//
|
//
|
// FPCSR interface
|
// FPCSR interface
|
//
|
//
|
input fpcsr_we;
|
input fpcsr_we;
|
output [`OR1200_FPCSR_WIDTH-1:0] fpcsr;
|
output [`OR1200_FPCSR_WIDTH-1:0] fpcsr;
|
|
|
//
|
//
|
// Exception signal
|
// Exception signal
|
//
|
//
|
output sig_fp;
|
output sig_fp;
|
input except_started;
|
input except_started;
|
|
|
|
|
//
|
//
|
// SPR interface
|
// SPR interface
|
//
|
//
|
input spr_cs;
|
input spr_cs;
|
input spr_write;
|
input spr_write;
|
input [31:0] spr_addr;
|
input [31:0] spr_addr;
|
input [31:0] spr_dat_i;
|
input [31:0] spr_dat_i;
|
output [31:0] spr_dat_o;
|
output [31:0] spr_dat_o;
|
|
|
|
|
|
`ifndef OR1200_FPU_IMPLEMENTED
|
|
|
|
// No FPU needed
|
|
assign result = 0;
|
|
assign flagforw = 0;
|
|
assign flag_we = 0;
|
|
assign sig_fp = 0;
|
|
assign spr_dat_o = 0;
|
|
|
|
`else
|
|
|
|
|
//
|
//
|
// Internals
|
// Internals
|
//
|
//
|
reg [2:0] fpu_op_count;
|
reg [2:0] fpu_op_count;
|
reg [`OR1200_FPUOP_WIDTH:0] fpu_op_r;
|
reg [`OR1200_FPUOP_WIDTH:0] fpu_op_r;
|
reg [`OR1200_FPCSR_WIDTH-1:0] fpcsr_r;
|
reg [`OR1200_FPCSR_WIDTH-1:0] fpcsr_r;
|
reg fpu_latch_operand;
|
reg fpu_latch_operand;
|
wire fpu_check_op;
|
wire fpu_check_op;
|
wire fpu_latch_op;
|
wire fpu_latch_op;
|
wire inf, snan, qnan, ine, overflow,
|
wire inf, snan, qnan, ine, overflow,
|
underflow, zero, div_by_zero;
|
underflow, zero, div_by_zero;
|
wire fpu_op_is_comp, fpu_op_r_is_comp;
|
wire fpu_op_is_comp, fpu_op_r_is_comp;
|
wire altb, blta, aeqb, cmp_inf, cmp_zero,
|
wire altb, blta, aeqb, cmp_inf, cmp_zero,
|
unordered ;
|
unordered ;
|
reg flag;
|
reg flag;
|
|
|
|
|
assign fpcsr = fpcsr_r;
|
assign fpcsr = fpcsr_r;
|
|
|
assign sig_fp = fpcsr_r[`OR1200_FPCSR_FPEE]
|
assign sig_fp = fpcsr_r[`OR1200_FPCSR_FPEE]
|
& (|fpcsr_r[`OR1200_FPCSR_WIDTH-1:`OR1200_FPCSR_OVF]);
|
& (|fpcsr_r[`OR1200_FPCSR_WIDTH-1:`OR1200_FPCSR_OVF]);
|
|
|
// Generate signals to latch fpu_op from decode instruction, then latch
|
// Generate signals to latch fpu_op from decode instruction, then latch
|
// operands when they appear during execute stage
|
// operands when they appear during execute stage
|
|
|
assign fpu_check_op = (!ex_freeze & fpu_op[`OR1200_FPUOP_WIDTH-1]);
|
assign fpu_check_op = (!ex_freeze & fpu_op[`OR1200_FPUOP_WIDTH-1]);
|
|
|
assign fpu_op_is_comp = fpu_op[3];
|
assign fpu_op_is_comp = fpu_op[3];
|
|
|
assign fpu_op_r_is_comp = fpu_op_r[3];
|
assign fpu_op_r_is_comp = fpu_op_r[3];
|
|
|
assign fpu_latch_op = fpu_check_op & !fpu_op_is_comp;
|
assign fpu_latch_op = fpu_check_op & !fpu_op_is_comp;
|
|
|
always @(posedge clk)
|
always @(posedge clk)
|
fpu_latch_operand <= fpu_check_op & !fpu_op_is_comp;
|
fpu_latch_operand <= fpu_check_op & !fpu_op_is_comp;
|
|
|
// Register fpu_op on comparisons, clear otherwise, remove top bit
|
// Register fpu_op on comparisons, clear otherwise, remove top bit
|
always @(posedge clk)
|
always @(posedge clk)
|
fpu_op_r <= (fpu_check_op & fpu_op_is_comp) ?
|
fpu_op_r <= (fpu_check_op & fpu_op_is_comp) ?
|
{1'b0,fpu_op[`OR1200_FPUOP_WIDTH-2:0]} : !ex_freeze ?
|
{1'b0,fpu_op[`OR1200_FPUOP_WIDTH-2:0]} : !ex_freeze ?
|
0 : fpu_op_r;
|
0 : fpu_op_r;
|
|
|
//
|
//
|
// Counter for each FPU operation
|
// Counter for each FPU operation
|
// Loaded at start, counts down
|
// Loaded at start, counts down
|
//
|
//
|
always @(posedge clk or posedge rst) begin
|
always @(posedge clk or posedge rst) begin
|
if (rst)
|
if (rst)
|
fpu_op_count <= 0;
|
fpu_op_count <= 0;
|
else
|
else
|
if (|fpu_op_count)
|
if (|fpu_op_count)
|
fpu_op_count <= fpu_op_count - 1;
|
fpu_op_count <= fpu_op_count - 1;
|
else if(fpu_check_op)
|
else if(fpu_check_op)
|
fpu_op_count <= 5;
|
fpu_op_count <= 5;
|
end
|
end
|
|
|
//
|
//
|
// FPCSR register
|
// FPCSR register
|
//
|
//
|
always @(posedge clk or posedge rst) begin
|
always @(posedge clk or posedge rst) begin
|
if (rst)
|
if (rst)
|
fpcsr_r <= 0;
|
fpcsr_r <= 0;
|
else
|
else
|
begin
|
begin
|
if (fpcsr_we)
|
if (fpcsr_we)
|
fpcsr_r <= b[`OR1200_FPCSR_WIDTH-1:0];
|
fpcsr_r <= b[`OR1200_FPCSR_WIDTH-1:0];
|
else if (fpu_op_count == 1)
|
else if (fpu_op_count == 1)
|
begin
|
begin
|
fpcsr_r[`OR1200_FPCSR_OVF] <= overflow;
|
fpcsr_r[`OR1200_FPCSR_OVF] <= overflow;
|
fpcsr_r[`OR1200_FPCSR_UNF] <= underflow;
|
fpcsr_r[`OR1200_FPCSR_UNF] <= underflow;
|
fpcsr_r[`OR1200_FPCSR_SNF] <= snan;
|
fpcsr_r[`OR1200_FPCSR_SNF] <= snan;
|
fpcsr_r[`OR1200_FPCSR_QNF] <= qnan;
|
fpcsr_r[`OR1200_FPCSR_QNF] <= qnan;
|
fpcsr_r[`OR1200_FPCSR_ZF] <= zero |
|
fpcsr_r[`OR1200_FPCSR_ZF] <= zero |
|
(cmp_zero & fpu_op_r_is_comp);
|
(cmp_zero & fpu_op_r_is_comp);
|
fpcsr_r[`OR1200_FPCSR_IXF] <= ine;
|
fpcsr_r[`OR1200_FPCSR_IXF] <= ine;
|
fpcsr_r[`OR1200_FPCSR_IVF] <= 0; // Not used by this FPU
|
fpcsr_r[`OR1200_FPCSR_IVF] <= 0; // Not used by this FPU
|
fpcsr_r[`OR1200_FPCSR_INF] <= inf |
|
fpcsr_r[`OR1200_FPCSR_INF] <= inf |
|
(cmp_inf & fpu_op_r_is_comp);
|
(cmp_inf & fpu_op_r_is_comp);
|
fpcsr_r[`OR1200_FPCSR_DZF] <= div_by_zero;
|
fpcsr_r[`OR1200_FPCSR_DZF] <= div_by_zero;
|
end // if (fpu_op_count == 1)
|
end // if (fpu_op_count == 1)
|
if (except_started)
|
if (except_started)
|
fpcsr_r[`OR1200_FPCSR_FPEE] <= 0;
|
fpcsr_r[`OR1200_FPCSR_FPEE] <= 0;
|
end // else: !if(rst)
|
end // else: !if(rst)
|
end // always @ (posedge clk or posedge rst)
|
end // always @ (posedge clk or posedge rst)
|
|
|
//
|
//
|
// Comparison flag generation
|
// Comparison flag generation
|
//
|
//
|
always@(posedge clk)
|
always@(posedge clk)
|
begin
|
begin
|
if (fpu_op_r_is_comp)
|
if (fpu_op_r_is_comp)
|
begin
|
begin
|
case(fpu_op_r)
|
case(fpu_op_r)
|
`OR1200_FPCOP_SFEQ: begin
|
`OR1200_FPCOP_SFEQ: begin
|
flag <= aeqb;
|
flag <= aeqb;
|
end
|
end
|
`OR1200_FPCOP_SFNE: begin
|
`OR1200_FPCOP_SFNE: begin
|
flag <= !aeqb;
|
flag <= !aeqb;
|
end
|
end
|
`OR1200_FPCOP_SFGT: begin
|
`OR1200_FPCOP_SFGT: begin
|
flag <= blta & !aeqb;
|
flag <= blta & !aeqb;
|
end
|
end
|
`OR1200_FPCOP_SFGE: begin
|
`OR1200_FPCOP_SFGE: begin
|
flag <= blta | aeqb;
|
flag <= blta | aeqb;
|
end
|
end
|
`OR1200_FPCOP_SFLT: begin
|
`OR1200_FPCOP_SFLT: begin
|
flag <= altb & !aeqb;
|
flag <= altb & !aeqb;
|
end
|
end
|
`OR1200_FPCOP_SFLE: begin
|
`OR1200_FPCOP_SFLE: begin
|
flag <= altb | aeqb;
|
flag <= altb | aeqb;
|
end
|
end
|
default: begin
|
default: begin
|
flag <= 0;
|
flag <= 0;
|
end
|
end
|
endcase // case (fpu_op_r)
|
endcase // case (fpu_op_r)
|
end // if (fpu_op_r_is_comp)
|
end // if (fpu_op_r_is_comp)
|
else
|
else
|
flag <= 0;
|
flag <= 0;
|
end // always@ (posedge clk)
|
end // always@ (posedge clk)
|
|
|
assign flagforw = flag;
|
assign flagforw = flag;
|
|
|
// Determine here where we do the write, ie how much we pipeline the
|
// Determine here where we do the write, ie how much we pipeline the
|
// comparison
|
// comparison
|
assign flag_we = fpu_op_r_is_comp & (fpu_op_count == 2);
|
assign flag_we = fpu_op_r_is_comp & (fpu_op_count == 2);
|
|
|
// FP arithmetic module
|
// FP arithmetic module
|
fpu fpu0
|
fpu fpu0
|
(
|
(
|
.clk(clk),
|
.clk(clk),
|
.rmode(fpcsr_r[`OR1200_FPCSR_RM]),
|
.rmode(fpcsr_r[`OR1200_FPCSR_RM]),
|
.fpu_op(fpu_op[2:0]),
|
.fpu_op(fpu_op[2:0]),
|
.opa(a),
|
.opa(a),
|
.opb(b),
|
.opb(b),
|
.out(result),
|
.out(result),
|
.latch_operand(fpu_latch_operand),
|
.latch_operand(fpu_latch_operand),
|
.latch_op(fpu_latch_op),
|
.latch_op(fpu_latch_op),
|
.inf(inf),
|
.inf(inf),
|
.snan(snan),
|
.snan(snan),
|
.qnan(qnan),
|
.qnan(qnan),
|
.ine(ine),
|
.ine(ine),
|
.overflow(overflow),
|
.overflow(overflow),
|
.underflow(underflow),
|
.underflow(underflow),
|
.zero(zero),
|
.zero(zero),
|
.div_by_zero(div_by_zero)
|
.div_by_zero(div_by_zero)
|
);
|
);
|
|
|
// FP comparator
|
// FP comparator
|
fcmp fcmp0
|
fcmp fcmp0
|
(
|
(
|
.opa(a),
|
.opa(a),
|
.opb(b),
|
.opb(b),
|
.unordered(unordered),
|
.unordered(unordered),
|
// I am convinced the comparison logic is wrong way around in this
|
// I am convinced the comparison logic is wrong way around in this
|
// module, simplest to swap them on output -- julius
|
// module, simplest to swap them on output -- julius
|
|
|
.altb(blta),
|
.altb(blta),
|
.blta(altb),
|
.blta(altb),
|
.aeqb(aeqb),
|
.aeqb(aeqb),
|
.inf(cmp_inf),
|
.inf(cmp_inf),
|
.zero(cmp_zero));
|
.zero(cmp_zero));
|
|
|
|
`endif // !`ifndef OR1200_FPU_IMPLEMENTED
|
|
|
endmodule // or1200_fpu
|
endmodule // or1200_fpu
|
|
|