URL https://opencores.org/ocsvn/or1200_hp/or1200_hp/trunk
Subversion Repositories or1200_hp

[/] [or1200_hp/] [trunk/] [rtl/] [rtl_cm3/] [verilog/] [or1200_mult_mac.v] - Rev 2

Compare with Previous | Blame | View Log
//////////////////////////////////////////////////////////////////////
////                                                              ////
////  OR1200's Top level multiplier and MAC                       ////
////                                                              ////
////  This file is part of the OpenRISC 1200 project              ////
////  http://www.opencores.org/cores/or1k/                        ////
////                                                              ////
////  Description                                                 ////
////  Multiplier is 32x32 however multiply instructions only      ////
////  use lower 32 bits of the result. MAC is 32x32=64+64.        ////
////                                                              ////
////  To Do:                                                      ////
////   - make signed division better, w/o negating the operands   ////
////                                                              ////
////  Author(s):                                                  ////
////      - Damjan Lampret, lampret@opencores.org                 ////
////                                                              ////
//////////////////////////////////////////////////////////////////////
////                                                              ////
//// Copyright (C) 2000 Authors and OPENCORES.ORG                 ////
////                                                              ////
//// This source file may be used and distributed without         ////
//// restriction provided that this copyright statement is not    ////
//// removed from the file and that any derivative work contains  ////
//// the original copyright notice and the associated disclaimer. ////
////                                                              ////
//// This source file is free software; you can redistribute it   ////
//// and/or modify it under the terms of the GNU Lesser General   ////
//// Public License as published by the Free Software Foundation; ////
//// either version 2.1 of the License, or (at your option) any   ////
//// later version.                                               ////
////                                                              ////
//// This source is distributed in the hope that it will be       ////
//// useful, but WITHOUT ANY WARRANTY; without even the implied   ////
//// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      ////
//// PURPOSE.  See the GNU Lesser General Public License for more ////
//// details.                                                     ////
////                                                              ////
//// You should have received a copy of the GNU Lesser General    ////
//// Public License along with this source; if not, download it   ////
//// from http://www.opencores.org/lgpl.shtml                     ////
////                                                              ////
//////////////////////////////////////////////////////////////////////
//
// CVS Revision History
//
// $Log: not supported by cvs2svn $
// Revision 1.4  2004/06/08 18:17:36  lampret
// Non-functional changes. Coding style fixes.
//
// Revision 1.3  2003/04/24 00:16:07  lampret
// No functional changes. Added defines to disable implementation of multiplier/MAC
//
// Revision 1.2  2002/09/08 05:52:16  lampret
// Added optional l.div/l.divu insns. By default they are disabled.
//
// Revision 1.1  2002/01/03 08:16:15  lampret
// New prefixes for RTL files, prefixed module names. Updated cache controllers and MMUs.
//
// Revision 1.3  2001/10/21 17:57:16  lampret
// Removed params from generic_XX.v. Added translate_off/on in sprs.v and id.v. Removed spr_addr from dc.v and ic.v. Fixed CR+LF.
//
// Revision 1.2  2001/10/14 13:12:09  lampret
// MP3 version.
//
// Revision 1.1.1.1  2001/10/06 10:18:38  igorm
// no message
//
//
 
// synopsys translate_off
`include "timescale.v"
// synopsys translate_on
`include "or1200_defines.v"
 
module or1200_mult_mac_cm3(
		clk_i_cml_1,
		clk_i_cml_2,
 
	// Clock and reset
	clk, rst,
 
	// Multiplier/MAC interface
	ex_freeze, id_macrc_op, macrc_op, a, b, mac_op, alu_op, result, mac_stall_r,
 
	// SPR interface
	spr_cs, spr_write, spr_addr, spr_dat_i, spr_dat_o
);
 
 
input clk_i_cml_1;
input clk_i_cml_2;
reg  ex_freeze_cml_2;
reg  macrc_op_cml_2;
reg  macrc_op_cml_1;
reg [ 32 - 1 : 0 ] a_cml_1;
reg [ 32 - 1 : 0 ] b_cml_2;
reg [ 32 - 1 : 0 ] b_cml_1;
reg [ 2 - 1 : 0 ] mac_op_cml_2;
reg [ 2 - 1 : 0 ] mac_op_cml_1;
reg  mac_stall_r_cml_2;
reg  mac_stall_r_cml_1;
reg  spr_write_cml_2;
reg [ 31 : 0 ] spr_addr_cml_2;
reg [ 31 : 0 ] spr_addr_cml_1;
reg [ 31 : 0 ] spr_dat_i_cml_2;
reg [ 31 : 0 ] spr_dat_i_cml_1;
reg [ 2 * 32 - 1 : 0 ] mul_prod_r_cml_2;
reg [ 2 * 32 - 1 : 0 ] mul_prod_r_cml_1;
reg [ 2 - 1 : 0 ] mac_op_r1_cml_2;
reg [ 2 - 1 : 0 ] mac_op_r1_cml_1;
reg [ 2 - 1 : 0 ] mac_op_r2_cml_2;
reg [ 2 - 1 : 0 ] mac_op_r2_cml_1;
reg [ 2 - 1 : 0 ] mac_op_r3_cml_2;
reg [ 2 - 1 : 0 ] mac_op_r3_cml_1;
reg [ 2 * 32 - 1 : 0 ] mac_r_cml_2;
reg [ 2 * 32 - 1 : 0 ] mac_r_cml_1;
reg  div_free_cml_2;
reg  div_free_cml_1;
 
 
 
parameter width = `OR1200_OPERAND_WIDTH;
 
//
// I/O
//
 
//
// Clock and reset
//
input				clk;
input				rst;
 
//
// Multiplier/MAC interface
//
input				ex_freeze;
input				id_macrc_op;
input				macrc_op;
input	[width-1:0]		a;
input	[width-1:0]		b;
input	[`OR1200_MACOP_WIDTH-1:0]	mac_op;
input	[`OR1200_ALUOP_WIDTH-1:0]	alu_op;
output	[width-1:0]		result;
output				mac_stall_r;
 
//
// SPR interface
//
input				spr_cs;
input				spr_write;
input	[31:0]			spr_addr;
input	[31:0]			spr_dat_i;
output	[31:0]			spr_dat_o;
 
//
// Internal wires and regs
//
`ifdef OR1200_MULT_IMPLEMENTED
reg	[width-1:0]		result;
reg	[2*width-1:0]		mul_prod_r;
`else
wire	[width-1:0]		result;
wire	[2*width-1:0]		mul_prod_r;
`endif
wire	[2*width-1:0]		mul_prod;
wire	[`OR1200_MACOP_WIDTH-1:0]	mac_op;
`ifdef OR1200_MAC_IMPLEMENTED
reg	[`OR1200_MACOP_WIDTH-1:0]	mac_op_r1;
reg	[`OR1200_MACOP_WIDTH-1:0]	mac_op_r2;
reg	[`OR1200_MACOP_WIDTH-1:0]	mac_op_r3;
reg				mac_stall_r;
reg	[2*width-1:0]		mac_r;
`else
wire	[`OR1200_MACOP_WIDTH-1:0]	mac_op_r1;
wire	[`OR1200_MACOP_WIDTH-1:0]	mac_op_r2;
wire	[`OR1200_MACOP_WIDTH-1:0]	mac_op_r3;
wire				mac_stall_r;
wire	[2*width-1:0]		mac_r;
`endif
wire	[width-1:0]		x;
wire	[width-1:0]		y;
wire				spr_maclo_we;
wire				spr_machi_we;
wire				alu_op_div_divu;
wire				alu_op_div;
reg				div_free;
`ifdef OR1200_IMPL_DIV
wire	[width-1:0]		div_tmp;
reg	[5:0]			div_cntr;
`endif
 
//
// Combinatorial logic
//
`ifdef OR1200_MAC_IMPLEMENTED
 
// SynEDA CoreMultiplier
// assignment(s): spr_maclo_we
// replace(s): spr_write, spr_addr
assign spr_maclo_we = spr_cs & spr_write_cml_2 & spr_addr_cml_2[`OR1200_MAC_ADDR];
 
// SynEDA CoreMultiplier
// assignment(s): spr_machi_we
// replace(s): spr_write, spr_addr
assign spr_machi_we = spr_cs & spr_write_cml_2 & !spr_addr_cml_2[`OR1200_MAC_ADDR];
 
// SynEDA CoreMultiplier
// assignment(s): spr_dat_o
// replace(s): spr_addr, mac_r
assign spr_dat_o = spr_addr_cml_1[`OR1200_MAC_ADDR] ? mac_r_cml_1[31:0] : mac_r_cml_1[63:32];
`else
assign spr_maclo_we = 1'b0;
assign spr_machi_we = 1'b0;
assign spr_dat_o = 32'h0000_0000;
`endif
`ifdef OR1200_LOWPWR_MULT
assign x = (alu_op_div & a_cml_1[31]) ? ~a_cml_1 + 1'b1 : alu_op_div_divu | (alu_op == `OR1200_ALUOP_MUL) | (|mac_op) ? a_cml_1 : 32'h0000_0000;
assign y = (alu_op_div & b[31]) ? ~b + 1'b1 : alu_op_div_divu | (alu_op == `OR1200_ALUOP_MUL) | (|mac_op) ? b : 32'h0000_0000;
`else
 
// SynEDA CoreMultiplier
// assignment(s): x
// replace(s): a
assign x = alu_op_div & a_cml_1[31] ? ~a_cml_1 + 32'b1 : a_cml_1;
 
// SynEDA CoreMultiplier
// assignment(s): y
// replace(s): b
assign y = alu_op_div & b_cml_2[31] ? ~b_cml_2 + 32'b1 : b_cml_2;
`endif
`ifdef OR1200_IMPL_DIV
assign alu_op_div = (alu_op == `OR1200_ALUOP_DIV);
assign alu_op_div_divu = alu_op_div | (alu_op == `OR1200_ALUOP_DIVU);
assign div_tmp = mul_prod_r[63:32] - y;
`else
assign alu_op_div = 1'b0;
assign alu_op_div_divu = 1'b0;
`endif
 
`ifdef OR1200_MULT_IMPLEMENTED
 
//
// Select result of current ALU operation to be forwarded
// to next instruction and to WB stage
//
always @(alu_op or mul_prod_r or mac_r or a or b)
	casex(alu_op)	// synopsys parallel_case
`ifdef OR1200_IMPL_DIV
		`OR1200_ALUOP_DIV:
			result = a[31] ^ b[31] ? ~mul_prod_r[31:0] + 1'b1 : mul_prod_r[31:0];
		`OR1200_ALUOP_DIVU,
`endif
		`OR1200_ALUOP_MUL: begin
			result = mul_prod_r[31:0];
		end
		default:
`ifdef OR1200_MAC_SHIFTBY
			result = mac_r[`OR1200_MAC_SHIFTBY+31:`OR1200_MAC_SHIFTBY];
`else
			result = mac_r[31:0];
`endif
	endcase
 
//
// Instantiation of the multiplier
//
`ifdef OR1200_ASIC_MULTP2_32X32
or1200_amultp2_32x32 or1200_amultp2_32x32(
	.X(x),
	.Y(y),
	.RST(rst),
	.CLK(clk),
	.P(mul_prod)
);
`else // OR1200_ASIC_MULTP2_32X32
or1200_gmultp2_32x32_cm3 or1200_gmultp2_32x32(
		.clk_i_cml_1(clk_i_cml_1),
		.clk_i_cml_2(clk_i_cml_2),
	.X(x),
	.Y(y),
	.RST(rst),
	.CLK(clk),
	.P(mul_prod)
);
`endif // OR1200_ASIC_MULTP2_32X32
 
//
// Registered output from the multiplier and
// an optional divider
//
 
// SynEDA CoreMultiplier
// assignment(s): mul_prod_r, div_free
// replace(s): ex_freeze, mul_prod_r, div_free
always @(posedge rst or posedge clk)
	if (rst) begin
		mul_prod_r <= #1 64'h0000_0000_0000_0000;
		div_free <= #1 1'b1;
`ifdef OR1200_IMPL_DIV
		div_cntr <= #1 6'b00_0000;
`endif
	end else begin  div_free <= div_free_cml_2; mul_prod_r <= mul_prod_r_cml_2; 
`ifdef OR1200_IMPL_DIV
	if (|div_cntr) begin
		if (div_tmp[31])
			mul_prod_r <= #1 {mul_prod_r_cml_2[62:0], 1'b0};
		else
			mul_prod_r <= #1 {div_tmp[30:0], mul_prod_r_cml_2[31:0], 1'b1};
		div_cntr <= #1 div_cntr - 1'b1;
	end
	else if (alu_op_div_divu && div_free_cml_2) begin
		mul_prod_r <= #1 {31'b0, x[31:0], 1'b0};
		div_cntr <= #1 6'b10_0000;
		div_free <= #1 1'b0;
	end else 
`endif // OR1200_IMPL_DIV
	if (div_free_cml_2 | !ex_freeze_cml_2) begin
		mul_prod_r <= #1 mul_prod[63:0];
		div_free <= #1 1'b1;
	end end
 
`else // OR1200_MULT_IMPLEMENTED
assign result = {width{1'b0}};
assign mul_prod = {2*width{1'b0}};
assign mul_prod_r = {2*width{1'b0}};
`endif // OR1200_MULT_IMPLEMENTED
 
`ifdef OR1200_MAC_IMPLEMENTED
 
//
// Propagation of l.mac opcode
//
 
// SynEDA CoreMultiplier
// assignment(s): mac_op_r1
// replace(s): mac_op, mac_op_r1
always @(posedge clk or posedge rst)
	if (rst)
		mac_op_r1 <= #1 `OR1200_MACOP_WIDTH'b0;
	else begin  mac_op_r1 <= mac_op_r1_cml_2;
		mac_op_r1 <= #1 mac_op_cml_2; end
 
//
// Propagation of l.mac opcode
//
 
// SynEDA CoreMultiplier
// assignment(s): mac_op_r2
// replace(s): mac_op_r1, mac_op_r2
always @(posedge clk or posedge rst)
	if (rst)
		mac_op_r2 <= #1 `OR1200_MACOP_WIDTH'b0;
	else begin  mac_op_r2 <= mac_op_r2_cml_2;
		mac_op_r2 <= #1 mac_op_r1_cml_2; end
 
//
// Propagation of l.mac opcode
//
 
// SynEDA CoreMultiplier
// assignment(s): mac_op_r3
// replace(s): mac_op_r2, mac_op_r3
always @(posedge clk or posedge rst)
	if (rst)
		mac_op_r3 <= #1 `OR1200_MACOP_WIDTH'b0;
	else begin  mac_op_r3 <= mac_op_r3_cml_2;
		mac_op_r3 <= #1 mac_op_r2_cml_2; end
 
//
// Implementation of MAC
//
 
// SynEDA CoreMultiplier
// assignment(s): mac_r
// replace(s): ex_freeze, macrc_op, spr_dat_i, mul_prod_r, mac_op_r3, mac_r
always @(posedge rst or posedge clk)
	if (rst)
		mac_r <= #1 64'h0000_0000_0000_0000;
`ifdef OR1200_MAC_SPR_WE
	else begin  mac_r <= mac_r_cml_2; if (spr_maclo_we)
		mac_r[31:0] <= #1 spr_dat_i_cml_2;
	else if (spr_machi_we)
		mac_r[63:32] <= #1 spr_dat_i_cml_2;
`endif
	else if (mac_op_r3_cml_2 == `OR1200_MACOP_MAC)
		mac_r <= #1 mac_r_cml_2 + mul_prod_r_cml_2;
	else if (mac_op_r3_cml_2 == `OR1200_MACOP_MSB)
		mac_r <= #1 mac_r_cml_2 - mul_prod_r_cml_2;
	else if (macrc_op_cml_2 & !ex_freeze_cml_2)
		mac_r <= #1 64'h0000_0000_0000_0000; end
 
//
// Stall CPU if l.macrc is in ID and MAC still has to process l.mac instructions
// in EX stage (e.g. inside multiplier)
// This stall signal is also used by the divider.
//
 
// SynEDA CoreMultiplier
// assignment(s): mac_stall_r
// replace(s): mac_op, mac_stall_r, mac_op_r1, mac_op_r2
always @(posedge rst or posedge clk)
	if (rst)
		mac_stall_r <= #1 1'b0;
	else begin  mac_stall_r <= mac_stall_r_cml_2;
		mac_stall_r <= #1 (|mac_op_cml_2 | (|mac_op_r1_cml_2) | (|mac_op_r2_cml_2)) & id_macrc_op
`ifdef OR1200_IMPL_DIV
				| (|div_cntr)
`endif
				; end
`else // OR1200_MAC_IMPLEMENTED
assign mac_stall_r = 1'b0;
assign mac_r = {2*width{1'b0}};
assign mac_op_r1 = `OR1200_MACOP_WIDTH'b0;
assign mac_op_r2 = `OR1200_MACOP_WIDTH'b0;
assign mac_op_r3 = `OR1200_MACOP_WIDTH'b0;
`endif // OR1200_MAC_IMPLEMENTED
 
 
always @ (posedge clk_i_cml_1) begin
macrc_op_cml_1 <= macrc_op;
a_cml_1 <= a;
b_cml_1 <= b;
mac_op_cml_1 <= mac_op;
mac_stall_r_cml_1 <= mac_stall_r;
spr_addr_cml_1 <= spr_addr;
spr_dat_i_cml_1 <= spr_dat_i;
mul_prod_r_cml_1 <= mul_prod_r;
mac_op_r1_cml_1 <= mac_op_r1;
mac_op_r2_cml_1 <= mac_op_r2;
mac_op_r3_cml_1 <= mac_op_r3;
mac_r_cml_1 <= mac_r;
div_free_cml_1 <= div_free;
end
always @ (posedge clk_i_cml_2) begin
ex_freeze_cml_2 <= ex_freeze;
macrc_op_cml_2 <= macrc_op_cml_1;
b_cml_2 <= b_cml_1;
mac_op_cml_2 <= mac_op_cml_1;
mac_stall_r_cml_2 <= mac_stall_r_cml_1;
spr_write_cml_2 <= spr_write;
spr_addr_cml_2 <= spr_addr_cml_1;
spr_dat_i_cml_2 <= spr_dat_i_cml_1;
mul_prod_r_cml_2 <= mul_prod_r_cml_1;
mac_op_r1_cml_2 <= mac_op_r1_cml_1;
mac_op_r2_cml_2 <= mac_op_r2_cml_1;
mac_op_r3_cml_2 <= mac_op_r3_cml_1;
mac_r_cml_2 <= mac_r_cml_1;
div_free_cml_2 <= div_free_cml_1;
end
endmodule
Compare with Previous | Blame | View Log
Browse

Tools

Subversion Repositories or1200_hp

[/] [or1200_hp/] [trunk/] [rtl/] [rtl_cm3/] [verilog/] [or1200_mult_mac.v] - Rev 2