URL https://opencores.org/ocsvn/openarty/openarty/trunk
Subversion Repositories openarty

[/] [openarty/] [trunk/] [rtl/] [cpu/] [zipcpuhs.v] - Rev 53

Go to most recent revision | Compare with Previous | Blame | View Log
///////////////////////////////////////////////////////////////////////////////
//
// Filename:	zipcpuhs.v
//
// Project:	Zip CPU -- a small, lightweight, RISC CPU soft core
//
// Purpose:	This is the top level module holding the core of the Zip CPU
//		together.  The Zip CPU is designed to be as simple as possible.
//	(actual implementation aside ...)  The instruction set is about as
//	RISC as you can get, there are only 26 instruction types supported, not
//	including the floating point instruction set.  Please see the
//	accompanying spec.pdf file for a description of these instructions.
//
//	All instructions are 32-bits wide.  All bus accesses, both address and
//	data, are 32-bits over a wishbone bus.
//
//
//	This version of the ZipCPU has been modified for "high speed" operation.
//	By that I mean, it has been modified so that it can handle a high speed
//	system clock.  The nominal five stage pipeline has therefore been
//	broken into nine pieces, as outlined below:
//
//		1. Prefetch, returns the instruction from memory. 
//
//		2. Instruction Decode: triplet instructions, VLIW upper half,
//			VLIW lower half, and normal instructions
//
//		3. Instruction Decode: Select among the four types of 
//			instructions
//
//		4. Read Operand B
//
//		5. Read Operand A, add the immediate offset to Operand B
//
//		6. 16 ALU operations
//
//		7. Select among ALU results
//
//		8. Select from ALU, Memory, Divide, FPU results
//
//		9. Write-back Results
//
//	Further information about the ZipCPU may be found in the spec.pdf file.
//	(The documentation within this file is likely to become out of date
//	and out of sync with the spec.pdf, so look to the spec.pdf for
//	accurate and up to date information.)
//
//
//	A note about pipelining.  The approach used to accommodate pipelining
//	in this implementation assumes that if will be impossible to tell if
//	a particular stage will stall until the logic for that stage completes.
//	There is no time, therefore, for the stall logic to ripple from the
//	end of the pipeline to the beginning.  At best, it can ripple from
//	one stage to the next.  Stall logic, therefore, is latched in a 
//	FLIP-FLOP, rather than done in a combinatorial fashion.  Hopefully,
//	you'll have a copy of the stall logic slides.  If not, here's the
//	outline of how stalls will be done:
//
//	assign	(n)_slp = // stall logic for location n, based upon the prior
//			//	stages info
//	assign	(n)_slc = // stall logic for location n, based upon a copy of
//			//	what was in the prior stage
//
//
//	// We'll shorten _valid to _v, _stall to _s, _copy to _c
//	always @(posedge i_clk)
//		if ((i_rst)||(clear_pipeline)
//			(n)_v = 0;
//		else if (!(n)_stall)
//			(n)_v = ( (n-1)_v && (!(n)_slp) );
//		else
//			(n)_v = ( !(n)_slc );
//
//	always @(posedge i_clk)
//		if ((i_rst)||(clear_pipeline)
//			(n)_s = 1'b0;
//		else if (!(n)_s)
//			(n)_s = ((n-1)_v) && ( (n)_slp || (n+1)_s );
//		else
//			(n)_s = ( (n)_slc || (n+1)_s );
//			
//	always @(posedge i_clk)
//		if ((n)_s)
//			(n)_data  = PROCESS[(n)_c];
//			// Can't chnge copy if not stalled
//		else
//			(n)_data = PROCESS[(n-1)_data];
//			(n)_c <= (n-1)_data;
//
//
// Creator:	Dan Gisselquist, Ph.D.
//		Gisselquist Technology, LLC
//
///////////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2015-2016, Gisselquist Technology, LLC
//
// This program is free software (firmware): you can redistribute it and/or
// modify it under the terms of  the GNU General Public License as published
// by the Free Software Foundation, either version 3 of the License, or (at
// your option) any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or
// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
// for more details.
//
// License:	GPL, v3, as defined and found on www.gnu.org,
//		http://www.gnu.org/licenses/gpl.html
//
//
///////////////////////////////////////////////////////////////////////////////
//
// We can either pipeline our fetches, or issue one fetch at a time.  Pipelined
// fetches are more complicated and therefore use more FPGA resources, while
// single fetches will cause the CPU to stall for about 5 stalls each 
// instruction cycle, effectively reducing the instruction count per clock to
// about 0.2.  However, the area cost may be worth it.  Consider:
//
//	Slice LUTs		ZipSystem	ZipCPU
//	Single Fetching		2521		1734
//	Pipelined fetching	2796		2046
//
//
//
`define	CPU_CC_REG	4'he
`define	CPU_PC_REG	4'hf
`define	CPU_CLRCACHE_BIT 14	// Floating point error flag, set on error
`define	CPU_PHASE_BIT	13	// Floating point error flag, set on error
`define	CPU_FPUERR_BIT	12	// Floating point error flag, set on error
`define	CPU_DIVERR_BIT	11	// Divide error flag, set on divide by zero
`define	CPU_BUSERR_BIT	10	// Bus error flag, set on error
`define	CPU_TRAP_BIT	9	// User TRAP has taken place
`define	CPU_ILL_BIT	8	// Illegal instruction
`define	CPU_BREAK_BIT	7
`define	CPU_STEP_BIT	6	// Will step one or two (VLIW) instructions
`define	CPU_GIE_BIT	5
`define	CPU_SLEEP_BIT	4
// Compile time defines
//
`include "cpudefs.v"
//
//
module	zipcpuhs(i_clk, i_rst, i_interrupt,
		// Debug interface
		i_halt, i_clear_pf_cache, i_dbg_reg, i_dbg_we, i_dbg_data,
			o_dbg_stall, o_dbg_reg, o_dbg_cc,
			o_break,
		// CPU interface to the wishbone bus
		o_wb_gbl_cyc, o_wb_gbl_stb,
			o_wb_lcl_cyc, o_wb_lcl_stb,
			o_wb_we, o_wb_addr, o_wb_data,
			i_wb_ack, i_wb_stall, i_wb_data,
			i_wb_err,
		// Accounting/CPU usage interface
		o_op_stall, o_pf_stall, o_i_count
`ifdef	DEBUG_SCOPE
		, o_debug
`endif
		);
	parameter	RESET_ADDRESS=32'h0100000, ADDRESS_WIDTH=24,
			LGICACHE=6;
`ifdef	OPT_MULTIPLY
	parameter	IMPLEMENT_MPY = `OPT_MULTIPLY;
`else
	parameter	IMPLEMENT_MPY = 0;
`endif
`ifdef	OPT_DIVIDE
	parameter	IMPLEMENT_DIVIDE = 1;
`else
	parameter	IMPLEMENT_DIVIDE = 0;
`endif
`ifdef	OPT_IMPLEMENT_FPU
	parameter	IMPLEMENT_FPU = 1,
`else
	parameter	IMPLEMENT_FPU = 0,
`endif
			IMPLEMENT_LOCK=1;
`ifdef	OPT_EARLY_BRANCHING
	parameter	EARLY_BRANCHING = 1;
`else
	parameter	EARLY_BRANCHING = 0;
`endif
	parameter	AW=ADDRESS_WIDTH;
	input			i_clk, i_rst, i_interrupt;
	// Debug interface -- inputs
	input			i_halt, i_clear_pf_cache;
	input		[4:0]	i_dbg_reg;
	input			i_dbg_we;
	input		[31:0]	i_dbg_data;
	// Debug interface -- outputs
	output	wire		o_dbg_stall;
	output	reg	[31:0]	o_dbg_reg;
	output	reg	[3:0]	o_dbg_cc;
	output	wire		o_break;
	// Wishbone interface -- outputs
	output	wire		o_wb_gbl_cyc, o_wb_gbl_stb;
	output	wire		o_wb_lcl_cyc, o_wb_lcl_stb, o_wb_we;
	output	wire	[(AW-1):0]	o_wb_addr;
	output	wire	[31:0]	o_wb_data;
	// Wishbone interface -- inputs
	input			i_wb_ack, i_wb_stall;
	input		[31:0]	i_wb_data;
	input			i_wb_err;
	// Accounting outputs ... to help us count stalls and usage
	output	wire		o_op_stall;
	output	wire		o_pf_stall;
	output	wire		o_i_count;
	//
`ifdef	DEBUG_SCOPE
	output	reg	[31:0]	o_debug;
`endif
 
 
	// Registers
	//
	//	The distributed RAM style comment is necessary on the
	// SPARTAN6 with XST to prevent XST from oversimplifying the register
	// set and in the process ruining everything else.  It basically
	// optimizes logic away, to where it no longer works.  The logic
	// as described herein will work, this just makes sure XST implements
	// that logic.
	//
	(* ram_style = "distributed" *)
	reg	[31:0]	regset [0:31];
 
	// Condition codes
	// (BUS, TRAP,ILL,BREAKEN,STEP,GIE,SLEEP ), V, N, C, Z
	reg	[3:0]	flags, iflags;
	wire	[14:0]	w_uflags, w_iflags;
	reg		trap, break_en, step, gie, sleep, r_halted,
			break_pending;
	wire		w_clear_icache;
`ifdef	OPT_ILLEGAL_INSTRUCTION
	reg		ill_err_u, ill_err_i;
`else
	wire		ill_err_u, ill_err_i;
`endif
	reg		ubreak;
	reg		ibus_err_flag, ubus_err_flag;
	wire		idiv_err_flag, udiv_err_flag;
	wire		ifpu_err_flag, ufpu_err_flag;
	wire		ihalt_phase, uhalt_phase;
 
	// The master chip enable
	wire		master_ce;
 
	//
	//
	//	PIPELINE STAGE #1 :: Prefetch
	//		Variable declarations
	//
	reg	[(AW-1):0]	pf_pc;
	reg	new_pc;
	wire	clear_pipeline;
	assign	clear_pipeline = new_pc;
 
	wire		dcd_stalled;
	wire		pf_cyc, pf_stb, pf_we, pf_busy, pf_ack, pf_stall, pf_err;
	wire	[(AW-1):0]	pf_addr;
	wire	[31:0]		pf_data;
	wire	[31:0]		instruction;
	wire	[(AW-1):0]	instruction_pc;
	wire	pf_v, instruction_gie, pf_illegal;
 
	//
	//
	//	PIPELINE STAGE #2 :: Instruction Decode
	//		Variable declarations
	//
	//
	wire		op_stall, dcd_ce, dcd_phase;
	wire	[3:0]	dcdOp;
	wire	[4:0]	dcd_iA, dcd_iB, dcd_iR;
	wire		dcdA_cc, dcdB_cc, dcdA_pc, dcdB_pc, dcdR_cc, dcdR_pc;
	wire	[3:0]	dcdF;
	wire		dcd_wR, dcd_rA, dcd_rB,
				dcdALU, dcdM, dcdDV, dcdFP,
				dcdF_wr, dcd_gie, dcd_break, dcd_lock,
				dcd_pipe, dcd_ljmp;
	reg	[1:0]	r_dcdvalid;
	wire		dcd_v;
	wire	[(AW-1):0]	dcd_pc;
	wire	[31:0]	dcd_I;
	wire		dcd_zI;	// true if dcdI == 0
	wire	dcdA_stall, dcdB_stall, dcdF_stall;
 
	wire	dcd_illegal;
	wire			dcd_early_branch;
	wire	[(AW-1):0]	dcd_branch_pc;
 
 
	//
	//
	//	PIPELINE STAGE #3a :: Read Operands
	//		Variable declarations
	//
	//
	//
	// Now, let's read our operands
	reg		opa_v, opa_DV, opa_FP, opa_ALU, opa_M,
			opa_rA, opa_rB;
	reg	[4:0]	alu_reg;
	reg	[3:0]	opa_opn;
	reg	[4:0]	opa_R, opa_iA;
	reg	[31:0]	r_opa_B;
	reg	[(AW-1):0]	opa_pc;
	wire	[31:0]	opA_nowait, opa_Bnowait, opa_A, opa_B, opa_I;
	reg		opa_wR, opa_ccR, opa_wF, opa_gie;
	wire	[13:0]	opa_Fl;
	reg	[5:0]	r_opa_F;
	wire	[7:0]	opa_F;
	wire		opa_ce, opa_phase, opa_pipe;
	// Some pipeline control wires
	reg	opa_A_alu, opa_A_mem;
	reg	opa_B_alu, opa_B_mem;
	reg	opa_illegal;
	reg	opa_break;
	reg	opa_lock;
 
	//
	//
	//	PIPELINE STAGE #3b :: Read Operands
	//		Variable declarations
	//
	//
	//
	// Now, let's read our operands
	reg	[3:0]	opb_opn;
	reg		opb_v, opb_v_mem, opb_v_alu;
	reg		opb_v_div, opb_v_fpu;
	reg	[4:0]	opb_R;
	reg	[31:0]	r_opb_A, r_opb_B;
	reg	[(AW-1):0]	opb_pc;
	wire	[31:0]	opb_A_nowait, opb_B_nowait, opb_A, opb_B;
	reg		opb_wR, opb_ccR, opb_wF, opb_gie;
	wire	[13:0]	opb_Fl;
	reg	[5:0]	r_opb_F;
	wire	[7:0]	opb_F;
	wire		opb_ce, opb_phase, opb_pipe;
	// Some pipeline control wires
	reg	opb_A_alu, opb_A_mem;
	reg	opb_B_alu, opb_B_mem;
	reg	opb_illegal;
	reg	opb_break;
	reg	opb_lock;
 
 
	//
	//
	//	PIPELINE STAGE #4 :: ALU / Memory / Divide
	//		Variable declarations
	//
	//
	reg		stage_busy;
	reg	[(AW-1):0]	alu_pc;
	reg		r_alu_pc_v, mem_pc_v;
	wire		alu_pc_v;
	wire		alu_phase;
	wire		alu_ce, alu_stall;
	wire	[31:0]	alu_result;
	wire	[3:0]	alu_flags;
	wire		alu_v, alu_busy;
	wire		set_cond;
	reg		alu_wr, alF_wr, alu_gie;
	wire		alu_illegal_op;
	wire		alu_illegal;
 
 
 
	wire	mem_ce, mem_stalled;
	wire	mem_pipe_stalled;
	wire	mem_v, mem_ack, mem_stall, mem_err, bus_err,
		mem_cyc_gbl, mem_cyc_lcl, mem_stb_gbl, mem_stb_lcl, mem_we;
	wire	[4:0]		mem_wreg;
 
	wire			mem_busy, mem_rdbusy;
	wire	[(AW-1):0]	mem_addr;
	wire	[31:0]		mem_data, mem_result;
 
	wire	div_ce, div_error, div_busy, div_v;
	wire	[31:0]	div_result;
	wire	[3:0]	div_flags;
 
	assign	div_ce = (master_ce)&&(~clear_pipeline)&&(opb_v_div)
				&&(~stage_busy)&&(set_cond);
 
	wire	fpu_ce, fpu_error, fpu_busy, fpu_v;
	wire	[31:0]	fpu_result;
	wire	[3:0]	fpu_flags;
 
	assign	fpu_ce = (master_ce)&&(~clear_pipeline)&&(opb_v_fpu)
				&&(~stage_busy)&&(set_cond);
 
	//
	//
	//	PIPELINE STAGE #5 :: Write-back
	//		Variable declarations
	//
	wire		wr_reg_ce, wr_flags_ce, wr_write_pc, wr_write_cc;
	wire	[4:0]	wr_reg_id;
	wire	[31:0]	wr_gpreg_vl, wr_spreg_vl;
	wire	w_switch_to_interrupt, w_release_from_interrupt;
	reg	[(AW-1):0]	upc, ipc;
 
 
 
	//
	//	MASTER: clock enable.
	//
	assign	master_ce = (~i_halt)&&(~o_break)&&(~sleep);
 
 
	//
	//	PIPELINE STAGE #1 :: Prefetch
	//		Calculate stall conditions
	//
	//	These are calculated externally, within the prefetch module.
	//
 
	//
	//	PIPELINE STAGE #2 :: Instruction Decode
	//		Calculate stall conditions
	assign		dcd_ce = ((~dcd_v)||(~dcd_stalled))&&(~clear_pipeline);
 
	assign		dcd_stalled = (dcd_v)&&(opa_stall);
	//
	//	PIPELINE STAGE #3 :: Read Operands
	//		Calculate stall conditions
	wire	op_lock_stall;
	assign	opa_stall_slp = (
				// Likewise for B, also includes logic
				// regarding immediate offset (register must
				// be in register file if we need to add to
				// an immediate)
				(((dcdB_rd)&&(~dcd_zI))
					&&((opa_v)&&(opb_R == dcdB)
						||(mem_rdbusy)
						||((div_busy)&&(div_R == dcdB))
						||((fpu_busy)&&(fpu_R == dcdB))
						||((alua_v)&&(alua_R==dcdB))
						||((alub_v)&&(alub_R==dcdB))
						||((alu_busy))
					&&(
					// 1.
					((~dcd_zI)&&(
						((opb_R == dcdB)&&(opb_wR))
						||((mem_rdbusy)&&(~dcd_pipe))
						))
					// 2.
					||((opF_wr)&&(dcdB_cc))
					)))
				// Or if we need to wait on flags to work on the
				// CC register
				||(((~dcdF[3])
						||((dcd_rA)&&(dcdA_cc))
						||((dcd_rB)&&(dcdB_cc)))
					&&(opb_v)&&(opb_ccR))
			);
 
	//
	//	PIPELINE STAGE #4 :: ALU / Memory
	//		Calculate stall conditions
	//
	// 1. Basic stall is if the previous stage is valid and the next is
	//	busy.  
	// 2. Also stall if the prior stage is valid and the master clock enable
	//	is de-selected
	// 3. Stall if someone on the other end is writing the CC register,
	//	since we don't know if it'll put us to sleep or not.
	// 4. Last case: Stall if we would otherwise move a break instruction
	//	through the ALU.  Break instructions are not allowed through
	//	the ALU.
	assign	alu_stall_clp = (~master_ce);
	assign	alu_stall_cls = (~master_ce);
	always @(posedge i_clk)
		stage_busy <= (alu_ce)||(mem_ce)||(fpu_ce)||(div_ce)
			||(alu_busy)||(mem_rdbusy)||(fpu_busy)||(div_busy);
	assign	stage_ce = (~div_busy)&&(~alu_busy)&&(~mem_rdbusy)&&(~fpu_busy);
	//
 
	//
	// Note: if you change the conditions for mem_ce, you must also change
	// alu_pc_v.
	//
	assign	mem_ce = (master_ce)&&(opb_v_mem)&&(~mem_stalled)
			&&(~clear_pipeline);
	assign	mem_stall_clp = (~master_ce)||(alu_busy)||(div_busy)||(fpu_busy)
					||(wr_write_pc)||(wr_write_cc)
				||((opb_v_mem)&&(
					(mem_pipe_stalled)
					||((~opb_pipe)&&(mem_busy))));
	assign	mem_stall_cls = (~master_ce)||(alu_busy)||(div_busy)||(fpu_busy)
					||(wr_write_pc)||(wr_write_cc)
				||((cp_opb_v_mem)&&(
					(mem_pipe_stalled)
					||((~cp_opb_pipe)&&(mem_busy))));
 
 
	//
	//
	//	PIPELINE STAGE #1 :: Prefetch
	//
	//
	fastcache #(LGICACHE, ADDRESS_WIDTH)
		pf(i_clk, i_rst, (new_pc)||((dcd_early_branch)&&(~clear_pipeline)),
					i_clear_pf_cache,
				// dcd_pc,
				~dcd_stalled,
				((dcd_early_branch)&&(~clear_pipeline))
					? dcd_branch_pc:pf_pc,
				instruction, instruction_pc, pf_v,
				pf_cyc, pf_stb, pf_we, pf_addr, pf_data,
					pf_ack, pf_stall, pf_err, i_wb_data,
				pf_illegal);
	assign	instruction_gie = gie;
 
	//
	// The ifastdec decoder takes two clocks to decode an instruction.
	// Therefore, to determine if a decoded instruction is valid, we
	// need to wait two clocks from pf_v.  Hence, we dump this into
	// a pipeline below.
	//
	initial	r_dcdvalid = 2'b00;
	always @(posedge i_clk)
		if ((i_rst)||(clear_pipeline)||(w_clear_icache))
			r_dcdvalid <= 2'b00;
		else if (dcd_ce)
			r_dcdvalid <= { r_dcdvalid[0], pf_v };
		else if (opa_ce)
			r_dcdvalid <= 1'b0;
	assign	dcd_v = r_dcdvalid[1];
 
	ifastdec #(AW, IMPLEMENT_MPY, EARLY_BRANCHING, IMPLEMENT_DIVIDE,
			IMPLEMENT_FPU)
		instruction_decoder(i_clk, (i_rst)||(clear_pipeline),
			dcd_ce, dcd_stalled, instruction, instruction_gie,
			instruction_pc, pf_v, pf_illegal, dcd_phase,
			dcd_illegal, dcd_pc, dcd_gie, 
			{ dcd_Rcc, dcd_Rpc, dcd_iR },
			{ dcd_Acc, dcd_Apc, dcd_iA },
			{ dcd_Bcc, dcd_Bpc, dcd_iB },
			dcd_I, dcd_zI, dcd_F, dcd_wF, dcdOp,
			dcdALU, dcdM, dcdDV, dcdFP, dcd_break, dcd_lock,
			dcd_wR,dcd_rA, dcd_rB,
			dcd_early_branch,
			dcd_branch_pc, dcd_ljmp,
			dcd_pipe);
 
	//
	//
	//	PIPELINE STAGE #3 :: Read Operands (Registers)
	//
	//
 
	reg		opa_pipe;
	initial	opa_pipe = 1'b0;
	// To be a pipeable operation, there must be 
	//	two valid adjacent instructions
	//	Both must be memory instructions
	//	Both must be writes, or both must be reads
	//	Both operations must be to the same identical address,
	//		or at least a single (one) increment above that address
	//
	// However ... we need to know this before this clock, hence this is
	// calculated in the instruction decoder.
	always @(posedge i_clk)
		if (!opa_stall)
		begin
			opa_v <= dcdvalid&&(~opa_stall_slp);
			opa_stall <= (dcdvalid)&&(opa_stall_slp);
			opa_pipe <= dcd_pipe;
 
			opa_wR <= dcd_wR;
			{ opa_Acc, opa_Apc, opa_iA, opa_rA }
				<= { dcd_Acc, dcd_Apc, dcd_iA, dcd_rA };
			{ opa_Bcc, opa_Bpc, opa_iB, opa_rB }
				<= { dcd_Bcc, dcd_Bpc, dcd_iB, dcd_rB };
 
			// Register A
			if (dcd_Apc)
				opa_vA <= (dcd_iA[4]==dcd_gie) ? dcd_pc
						: (dcd_iA)?upc : ipc;
			else if (dcd_Acc)
				opa_vA <= (dcd_iA[4])?user_flags_reg
						: supervisor_flags_reg;
			else
				opa_vA <= regset[dcd_iA];
 
			// Register B
			if (!dcd_rB)
				opa_vB <= 32'h00;
			else if (dcd_Bpc)
				opa_vB <= (dcd_iB[4]==dcd_gie) ? dcd_pc
						: (dcd_iB)?upc : ipc;
			else if (dcd_Bcc)
				opa_vB <= (dcd_iB[4])?user_flags_reg
						: supervisor_flags_reg;
			else
				opa_vB <= regset[dcd_iB];
 
			// Copy
			cp_opa_pc   <= dcd_pc;
			cp_opa_gie  <= dcd_gie;
			cp_opa_pipe <= dcd_pipe;
			{ cp_opa_Rcc, cp_opa_Rpc, cp_opa_iR }
				<= { dcd_Rcc, dcd_Rpc, dcd_iR };
			{ cp_opa_Acc, cp_opa_Apc, cp_opa_iA }
				<= { dcd_Acc, dcd_Apc, dcd_iA };
			{ cp_opa_Bcc, cp_opa_Bpc, cp_opa_iB }
				<= { dcd_Bcc, dcd_Bpc, dcd_iB };
		end else begin
			opa_v     <= (~opa_stall_slc);
			opa_stall <= (opa_stall_slc);
			opa_pipe  <= cp_opa_pipe;
 
			// Register A
			if (cp_opa_Apc)
				opa_vA <= (cp_opa_iA[4]==cp_opa_gie) ? cp_opa_pc
						: (cp_opa_iA)?upc : ipc;
			else if (dcd_Acc)
				opa_vA <= (cp_opa_iA[4])?user_flags_reg
						: supervisor_flags_reg;
			else
				opa_vA <= regset[cp_opa_iA];
 
			// Register B
			if (!cp_opa_rB)
				opa_vB <= 32'h00;
			else if (cp_opa_Bpc)
				opa_vB <= (cp_opa_iB[4]==cp_opa_gie) ? cp_opa_pc
						: (cp_opa_iB)?upc : ipc;
			else if (cp_opa_Bcc)
				opa_vB <= (cp_opa_iB[4])?user_flags_reg
						: supervisor_flags_reg;
			else
				opa_vB <= regset[cp_opa_iB];
		end
 
	wire	[8:0]	w_cpu_info;
	assign	w_cpu_info = {
`ifdef	OPT_ILLEGAL_INSTRUCTION
	1'b1,
`else
	1'b0,
`endif
	1'b1,
`ifdef	OPT_DIVIDE
	1'b1,
`else
	1'b0,
`endif
`ifdef	OPT_IMPLEMENT_FPU
	1'b1,
`else
	1'b0,
`endif
	1'b1, 1'b1,
`ifdef	OPT_EARLY_BRANCHING
	1'b1,
`else
	1'b0,
`endif
	1'b1,
`ifdef	OPT_VLIW
	1'b1
`else
	1'b0
`endif
	};
 
	always @(posedge i_clk)
		if (opa_ce)
		begin
			if ((wr_reg_ce)&&(wr_reg_id == dcd_iA))
				r_opA <= wr_gpreg_vl;
			else if (dcdA_pc)
				r_opA <= w_pcA_v;
			else if (dcdA_cc)
				r_opA <= { w_cpu_info, w_opA[22:15], (dcd_iA[4])?w_uflags:w_iflags };
			else
				r_opA <= w_opA;
		end else if ((wr_reg_ce)&&(wr_reg_id == opa_iA)&&(opa_rA))
				r_opA <= wr_gpreg_vl;
 
	wire	[31:0]	w_opBnI, w_pcB_v;
	generate
	if (AW < 32)
		assign	w_pcB_v = {{(32-AW){1'b0}}, (dcdB[4] == dcd_gie)?dcd_pc:upc };
	else
		assign	w_pcB_v = (dcdB[4] == dcd_gie)?dcd_pc:upc;
	endgenerate
 
	always @(posedge i_clk)
		if (opa_ce)
		begin
			opa_B <= (~dcdB_rd) ? 32'h00
			: (((wr_reg_ce)&&(wr_reg_id == dcdB)) ? wr_gpreg_vl
			: ((dcdB_pc) ? w_pcB_v
			: ((dcdB_cc) ? { w_cpu_info, w_opB[22:14], // w_opB[31:14],
				(dcdB[4])?w_uflags:w_iflags}
			: w_opB)));
			opa_I <= dcd_I;
		end
 
//
//	B-Inflight
//
//	We cannot read the B register if it is "in-flight", that is if the
//	result register of any previous instruction still needs to be written.
//
//	reg	[31:0]	opa_b_inflight;
//	always @(posedge i_clk)
//		if ((i_reset)||(clear_pipeline))
//			opa_b_inflight <= 32'h00;
//		else begin
//			if (wr_reg_ce)
//				opa_b_inflight[wr_reg_id] <= 1'b0;
//			if (opb_ce)
//				opa_b_inflight[opa_Rid] <= 1'b1;
//		end
//			
//	always @(posedge i_clk)
//		if (opa_b_invalid)
//			opa_b_invalid <= opa_b_inflight[opa_A];
//		else
//			opa_b_invalid <= opa_b_inflight[dcd_iA];
//
 
	always @(posedge i_clk)
		if (opb_ce)
			opb_B <= opa_B + opa_I;
		else if ((wr_reg_ce)&&(opa_Bid == wr_reg_id)&&(opa_Brd))
			opb_B <= wr_gpreg_vl;
 
	always @(posedge i_clk)
		if (opa_ce)
			opa_F <= dcdF;
	always @(posedge i_clk)
		if (opb_ce)
		begin
			case(opa_F[2:0])
			3'h0:	r_opb_F <= 6'h00;	// Always
			// These were remapped as part of the new instruction
			// set in order to make certain that the low order
			// two bits contained the most commonly used 
			// conditions: Always, LT, Z, and NZ.
			3'h1:	r_opb_F <= 6'h24;	// LT
			3'h2:	r_opb_F <= 6'h11;	// Z
			3'h3:	r_opb_F <= 6'h10;	// NE
			3'h4:	r_opb_F <= 6'h30;	// GT (!N&!Z)
			3'h5:	r_opb_F <= 6'h20;	// GE (!N)
			3'h6:	r_opb_F <= 6'h02;	// C
			3'h7:	r_opb_F <= 6'h08;	// V
			endcase
		end // Bit order is { (flags_not_used), VNCZ mask, VNCZ value }
	assign	opb_F = { r_opb_F[3], r_opb_F[5], r_opb_F[1], r_opb_F[4:0] };
 
	wire	w_opa_v;
	always @(posedge i_clk)
		if (i_rst)
			opa_v <= 1'b0;
		else if (opa_ce)
			opa_v <= ((dcd_v)||(dcd_illegal))&&(~clear_pipeline);
 
	always @(posedge i_clk)
		if ((i_rst)||(clear_pipeline))
		begin
			opa_v <= 1'b0;
		end else if (opa_ce)
		begin
			opa_v <=(dcd_v);
			opa_M     <= (dcd_v)&&(opa_M  )&&(~opa_illegal);
			opa_DV    <= (dcd_v)&&(opa_DV )&&(~opa_illegal);
			opa_FP    <= (dcd_v)&&(opa_FP )&&(~opa_illegal);
		end else if (opb_ce)
			opa_v <= 1'b0;
 
	initial	opb_v     = 1'b0;
	initial	opb_v_alu = 1'b0;
	initial	opb_v_mem = 1'b0;
	initial	opb_v_div = 1'b0;
	initial	opb_v_fpu = 1'b0;
	always @(posedge i_clk)
		if ((i_rst)||(clear_pipeline))
		begin
			opb_v     <= 1'b0;
			opb_v_alu <= 1'b0;
			opb_v_mem <= 1'b0;
			opb_v_div <= 1'b0;
			opb_v_fpu <= 1'b0;
		end else if (opb_ce)
		begin
			// Do we have a valid instruction?
			//   The decoder may vote to stall one of its
			//   instructions based upon something we currently
			//   have in our queue.  This instruction must then
			//   move forward, and get a stall cycle inserted.
			//   Hence, the test on dcd_stalled here.  If we must
			//   wait until our operands are valid, then we aren't
			//   valid yet until then.
			opb_v     <= (opa_v);
			opb_v_alu <=(opa_v)&&((opa_ALU)||(opa_illegal));
			opb_v_mem <= (opa_v)&&(opa_M  )&&(~opa_illegal);
			opb_v_div <= (opa_v)&&(opa_DV )&&(~opa_illegal);
			opb_v_fpu <= (opa_v)&&(opa_FP )&&(~opa_illegal);
		end else if ((clear_pipeline)||(stage_ce))
		begin
			opb_v     <= 1'b0;
			opb_v_alu <= 1'b0;
			opb_v_mem <= 1'b0;
			opb_v_div <= 1'b0;
			opb_v_fpu <= 1'b0;
		end
 
	initial	op_break = 1'b0;
	always @(posedge i_clk)
		if (i_rst)	opb_break <= 1'b0;
		else if (opb_ce)
			opb_break <= (opa_break)&&((break_en)||(~opa_gie));
		else if ((clear_pipeline)||(~opb_v))
				opb_break <= 1'b0;
 
	reg	r_op_lock, r_op_lock_stall;
 
	initial	r_op_lock_stall = 1'b0;
	always @(posedge i_clk)
		if (i_rst)
			r_op_lock_stall <= 1'b0;
		else
			r_op_lock_stall <= (~opb_v)||(~opb_lock)
				||(~opa_v)||(~dcd_v)||(~pf_v);
 
	assign	op_lock_stall = r_op_lock_stall;
 
	initial	opa_lock = 1'b0;
	always @(posedge i_clk)
		if ((i_rst)||(clear_pipeline))
			opa_lock <= 1'b0;
		else if (opa_ce)
			opa_lock <= (dcd_lock)&&(~clear_pipeline);
	initial	opb_lock = 1'b0;
	always @(posedge i_clk)
		if ((i_rst)||(clear_pipeline))
			opb_lock <= 1'b0;
		else if (opb_ce)
			opb_lock <= (opb_lock)&&(~clear_pipeline);
 
	initial	opa_illegal = 1'b0;
	always @(posedge i_clk)
		if ((i_rst)||(clear_pipeline))
			opa_illegal <= 1'b0;
		else if(opa_ce)
			opa_illegal <=(dcd_illegal);
	initial	opb_illegal = 1'b0;
	always @(posedge i_clk)
		if ((i_rst)||(clear_pipeline))
			opb_illegal <= 1'b0;
		else if(opb_ce)
			opb_illegal <=(opa_illegal);
 
	always @(posedge i_clk)
		if (opa_ce)
		begin
			opa_wF <= (dcdF_wr)&&((~dcdR_cc)||(~dcd_wR))
				&&(~dcd_early_branch)&&(~dcd_illegal);
			opa_wR <= (dcd_wR)&&(~dcd_early_branch)&&(~dcd_illegal);
		end
	always @(posedge i_clk)
		if (opb_ce)
		begin
			opb_wF <= opa_wF;
			opb_wR <= opa_wR;
		end
 
	always @(posedge i_clk)
		if (opa_ce)
		begin
			opa_opn  <= dcdOp;	// Which ALU operation?
			opa_R    <= dcd_iR;
			opa_ccR  <= (dcdR_cc)&&(dcd_wR)&&(dcd_iR[4]==dcd_gie);
			opa_gie <= dcd_gie;
			//
			opa_pc  <= dcd_v;
			opa_rA  <= dcd_;
			opa_rB  <= dcd_;
		end
	always @(posedge i_clk)
		if (opb_ce)
		begin
			opb_opn  <= opa_opn;
			opb_R    <= opa_R;
			opb_ccR  <= opa_ccR;
			opb_gie <= opa_gie;
			//
			opb_pc  <= opa_pc;
		end
	assign	opb_Fl = (opb_gie)?(w_uflags):(w_iflags);
 
	always @(posedge i_clk)
		if ((i_rst)||(clear_pipeline))
			opa_phase <= 1'b0;
		else if (opa_ce)
			opa_phase <= dcd_phase;
 
	always @(posedge i_clk)
		if ((i_rst)||(clear_pipeline))
			opb_phase <= 1'b0;
		else if (opb_ce)
			opb_phase <= opa_phase;
 
	assign	opA = r_opA;
 
	assign	dcdA_stall = (dcd_rA) // &&(dcdvalid) is checked for elsewhere
				&&((opa_v)||(mem_rdbusy)
					||(div_busy)||(fpu_busy))
				&&((opF_wr)&&(dcdA_cc));
 
	assign	dcdB_stall = (dcdB_rd)
				&&((opa_v)||(mem_rdbusy)
					||(div_busy)||(fpu_busy)||(alu_busy))
				&&(
				// 1.
				((~dcd_zI)&&(
					((opb_R == dcdB)&&(opb_wR))
					||((mem_rdbusy)&&(~dcd_pipe))
					))
				// 2.
				||((opF_wr)&&(dcdB_cc))
				);
	assign	dcdF_stall = ((~dcdF[3])
					||((dcd_rA)&&(dcdA_cc))
					||((dcd_rB)&&(dcdB_cc)))
				&&(opb_v)&&(opb_ccR);
	//
	//
	//	PIPELINE STAGE #4 :: Apply Instruction
	//
	//
	fastops	fastalu(i_clk, i_rst, alu_ce,
			(opb_v_alu), opb_opn, opb_A, opb_B,
			alu_result, alu_flags, alu_v, alu_illegal_op,
			alu_busy);
 
	div thedivide(i_clk, (i_rst)||(clear_pipeline), div_ce, opb_opn[0],
			opb_A, opb_B, div_busy, div_v, div_error, div_result,
			div_flags);
 
	generate
	if (IMPLEMENT_FPU != 0)
	begin
		//
		// sfpu thefpu(i_clk, i_rst, fpu_ce,
		//	opA, opB, fpu_busy, fpu_v, fpu_err, fpu_result,
		//	fpu_flags);
		//
		assign	fpu_error = 1'b0; // Must only be true if fpu_v
		assign	fpu_busy  = 1'b0;
		assign	fpu_v = 1'b0;
		assign	fpu_result= 32'h00;
		assign	fpu_flags = 4'h0;
	end else begin
		assign	fpu_error = 1'b0;
		assign	fpu_busy  = 1'b0;
		assign	fpu_v = 1'b0;
		assign	fpu_result= 32'h00;
		assign	fpu_flags = 4'h0;
	end endgenerate
 
 
	assign	set_cond = ((opb_F[7:4]&opb_Fl[3:0])==opb_F[3:0]);
	initial	alF_wr   = 1'b0;
	initial	alu_wr   = 1'b0;
	always @(posedge i_clk)
		if (i_rst)
		begin
			alu_wr   <= 1'b0;
			alF_wr   <= 1'b0;
		end else if (alu_ce)
		begin
			// alu_reg <= opR;
			alu_wr  <= (opb_wR)&&(set_cond);
			alF_wr  <= (opb_wF)&&(set_cond);
		end else if (~alu_busy) begin
			// These are strobe signals, so clear them if not
			// set for any particular clock
			alu_wr <= (i_halt)&&(i_dbg_we);
			alF_wr <= 1'b0;
		end
 
	initial	alu_phase = 1'b0;
	always @(posedge i_clk)
		if (i_rst)
			alu_phase <= 1'b0;
		else if ((adf_ce_unconditional)||(mem_ce))
			alu_phase <= opb_phase;
 
	always @(posedge i_clk)
		if (adf_ce_unconditional)
			alu_reg <= opb_R;
		else if ((i_halt)&&(i_dbg_we))
			alu_reg <= i_dbg_reg;
 
	//
	// DEBUG Register write access starts here
	//
	reg		dbgv;
	initial	dbgv = 1'b0;
	always @(posedge i_clk)
		dbgv <= (~i_rst)&&(i_halt)&&(i_dbg_we)&&(r_halted);
	reg	[31:0]	dbg_val;
	always @(posedge i_clk)
		dbg_val <= i_dbg_data;
	always @(posedge i_clk)
		if (stage_ce)
			alu_gie  <= op_gie;
	always @(posedge i_clk)
		if (stage_ce)
			alu_pc  <= opb_pc;
 
	initial	alu_illegal = 0;
	always @(posedge i_clk)
		if (clear_pipeline)
			alu_illegal <= 1'b0;
		else if (stage_ce)
			alu_illegal <= opb_illegal;
 
	initial	r_alu_pc_v = 1'b0;
	initial	mem_pc_v = 1'b0;
	always @(posedge i_clk)
		if (i_rst)
			r_alu_pc_v <= 1'b0;
		else if (adf_ce_unconditional)//Includes&&(~alu_clear_pipeline)
			r_alu_pc_v <= 1'b1;
		else if (((~alu_busy)&&(~div_busy)&&(~fpu_busy))||(clear_pipeline))
			r_alu_pc_v <= 1'b0;
	assign	alu_pc_v = (r_alu_pc_v)&&((~alu_busy)&&(~div_busy)&&(~fpu_busy));
	always @(posedge i_clk)
		if (i_rst)
			mem_pc_v <= 1'b0;
		else
			mem_pc_v <= (mem_ce);
 
	wire	bus_lock;
 
	reg	[1:0]	r_bus_lock;
	initial	r_bus_lock = 2'b00;
	always @(posedge i_clk)
		if (i_rst)
			r_bus_lock <= 2'b00;
		else if ((opb_ce)&&(opb_lock))
			r_bus_lock <= 2'b11;
		else if ((|r_bus_lock)&&((~opb_v_mem)||(~opb_ce)))
			r_bus_lock <= r_bus_lock + 2'b11; // r_bus_lock -= 1
	assign	bus_lock = |r_bus_lock;
 
	pipemem	#(AW,IMPLEMENT_LOCK) domem(i_clk, i_rst,(mem_ce)&&(set_cond), bus_lock,
				(opb_opn[0]), opb_B, opb_A, opb_R,
				mem_busy, mem_pipe_stalled,
				mem_v, bus_err, mem_wreg, mem_result,
			mem_cyc_gbl, mem_cyc_lcl,
				mem_stb_gbl, mem_stb_lcl,
				mem_we, mem_addr, mem_data,
				mem_ack, mem_stall, mem_err, i_wb_data);
 
	assign	mem_rdbusy = ((mem_busy)&&(~mem_we));
 
	// Either the prefetch or the instruction gets the memory bus, but 
	// never both.
	wbdblpriarb	#(32,AW) pformem(i_clk, i_rst,
		// Memory access to the arbiter, priority position
		mem_cyc_gbl, mem_cyc_lcl, mem_stb_gbl, mem_stb_lcl,
			mem_we, mem_addr, mem_data, mem_ack, mem_stall, mem_err,
		// Prefetch access to the arbiter
		pf_cyc, 1'b0, pf_stb, 1'b0, pf_we, pf_addr, pf_data,
			pf_ack, pf_stall, pf_err,
		// Common wires, in and out, of the arbiter
		o_wb_gbl_cyc, o_wb_lcl_cyc, o_wb_gbl_stb, o_wb_lcl_stb, 
			o_wb_we, o_wb_addr, o_wb_data,
			i_wb_ack, i_wb_stall, i_wb_err);
 
 
 
	//
	//
	//
	//
	//
	//
	//
	//
	//	PIPELINE STAGE #5 :: Write-back results
	//
	//
 
	// Unlike previous versions of the writeback routine(s), this version
	// requires that everything be registered and clocked as soon as it is
	// valid.  So, let's start by clocking in our results.
	reg	[4:0]	r_wr_reg;
	reg	[31:0]	r_wr_val;
	reg		r_wr_ce, r_wr_err;
 
	// 1. Will we need to write a register?
	always @(posedge i_clk)
		r_wr_ce <= (dbgv)||(mem_v)
				||((~clear_pipeline)&&(~alu_illegal)
					&&(((alu_wr)&&(alu_v))
						||(div_v)||(fpu_v)));
	assign	wr_reg_ce = r_wr_ce;
 
	// 2. Did the ALU/MEM/DIV/FPU stage produce an error of any type?
	//	a. Illegal instruction
	//	b. Division by zero
	// 	c. Floating point error
	//	d. Bus Error
	// these will be causes for an interrupt on the next clock after this
	// one.
	always @(posedge i_clk)
		r_wr_err <= ((div_v)&&(div_error))
				||((fpu_v)&&(fpu_error))
				||((alu_pc_v)&&(alu_illegal))
				||(bus_err);
	reg	r_wr_illegal;
	always @(posedge i_clk)
		r_wr_illegal <= (alu_pc_v)&&(alu_illegal);
 
	// Which register shall be written?
	//	Note that the alu_reg is the register to write on a divide or
	//	FPU operation.
	always @(posedge i_clk)
		r_wr_reg <= (alu_wr|div_v|fpu_v)?alu_reg:mem_wreg;
	assign	wr_reg_id = r_wr_reg;
 
	// Are we writing to the CC register?
	assign	wr_write_cc = (wr_reg_id[3:0] == `CPU_CC_REG);
	assign	wr_write_scc = (wr_reg_id[4:0] == {1'b0, `CPU_CC_REG});
	assign	wr_write_ucc = (wr_reg_id[4:0] == {1'b1, `CPU_CC_REG});
	// Are we writing to the PC?
	assign	wr_write_pc = (wr_reg_id[3:0] == `CPU_PC_REG);
 
	// What value to write?
	always @(posedge i_clk)
		r_wr_val <= ((mem_v) ? mem_result
				:((div_v|fpu_v))
					? ((div_v) ? div_result:fpu_result)
				:((dbgv) ? dbg_val : alu_result));
	assign	wr_gpreg_vl = r_wr_val;
	assign	wr_spreg_vl = r_wr_val;
 
	// Do we write back our flags?
	reg	r_wr_flags_ce;
	initial	r_wr_flags_ce = 1'b0;
	always @(posedge i_clk)
		r_wr_flags_ce <= ((alF_wr)||(div_v)||(fpu_v))
					&&(~clear_pipeline)&&(~alu_illegal);
	assign	wr_flags_ce = r_wr_flags_ce;
 
	reg	[3:0]	r_wr_newflags;
	always @(posedge i_clk)
		if (div_v)
			r_wr_newflags <= div_flags;
		else if (fpu_v)
			r_wr_newflags <= fpu_flags;
		else // if (alu_v)
			r_wr_newflags <= alu_flags;
 
	reg	r_wr_gie;
	always @(posedge i_clk)
		r_wr_gie <= (~dbgv)&&(alu_gie);
 
	reg	r_wr_pc_v;
	initial	r_wr_pc_v = 1'b0;
	always @(posedge i_clk)
		r_wr_pc_v <= ((alu_pc_v)&&(~clear_pipeline))
				||(mem_pc_v);
	reg	[(AW-1):0]	r_wr_pc;
	always @(posedge i_clk)
		r_wr_pc <= alu_pc; // (alu_pc_v)?alu_pc : mem_pc;
 
	////
	//
	//
	// Write back, second clock
	//
	//
	////
	always @(posedge i_clk)
		if (wr_reg_ce)
			regset[wr_reg_id] <= wr_gpreg_vl;	
 
 
	assign	w_uflags = { uhalt_phase, ufpu_err_flag,
			udiv_err_flag, ubus_err_flag, trap, ill_err_u,
			1'b0, step, 1'b1, sleep,
			((wr_flags_ce)&&(alu_gie))?r_wr_newflags:flags };
	assign	w_iflags = { ihalt_phase, ifpu_err_flag,
			idiv_err_flag, ibus_err_flag, trap, ill_err_i,
			break_en, 1'b0, 1'b0, sleep,
			((wr_flags_ce)&&(~alu_gie))?r_wr_newflags:iflags };
 
 
	// What value to write?
	always @(posedge i_clk)
		// If explicitly writing the register itself
		if ((wr_reg_ce)&&(wr_reg_id[4])&&(wr_write_cc))
			flags <= wr_gpreg_vl[3:0];
		// Otherwise if we're setting the flags from an ALU operation
		else if ((wr_flags_ce)&&(alu_gie))
			flags <= r_wr_newflags;
 
	always @(posedge i_clk)
		if ((wr_reg_ce)&&(~wr_reg_id[4])&&(wr_write_cc))
			iflags <= wr_gpreg_vl[3:0];
		else if ((wr_flags_ce)&&(~alu_gie))
			iflags <= r_wr_newflags;
 
	// The 'break' enable  bit.  This bit can only be set from supervisor
	// mode.  It control what the CPU does upon encountering a break
	// instruction.
	//
	// The goal, upon encountering a break is that the CPU should stop and
	// not execute the break instruction, choosing instead to enter into
	// either interrupt mode or halt first.  
	//	if ((break_en) AND (break_instruction)) // user mode or not
	//		HALT CPU
	//	else if (break_instruction) // only in user mode
	//		set an interrupt flag, set the user break bit,
	//		go to supervisor mode, allow supervisor to step the CPU.
	//	Upon a CPU halt, any break condition will be reset.  The
	//	external debugger will then need to deal with whatever
	//	condition has taken place.
	initial	break_en = 1'b0;
	always @(posedge i_clk)
		if ((i_rst)||(i_halt))
			break_en <= 1'b0;
		else if ((wr_reg_ce)&&(~wr_reg_id[4])&&(wr_write_cc))
			break_en <= wr_spreg_vl[`CPU_BREAK_BIT];
 
	reg	pipe_busy;
	initial	pipe_busy <= 1'b0;
	always @(posedge i_clk)
		pipe_busy <= ((mem_ce)||(alu_ce)||(div_ce)||(fpu_ce))
			||((alu_busy)||(mem_busy)||(div_busy)||(fpu_busy));
 
	// pending_break <= ((break_en)||(~op_gie))&&(op_break)
	assign	o_break = ((op_break)&&(~pipe_busy)&&(~clear_pipeline))
			||((~r_wr_gie)&&(r_wr_err));
 
 
	// The GIE register.  Only interrupts can disable the interrupt register
	reg	slow_interrupt, fast_interrupt;
	initial	slow_interrupt = 1'b0;
	// The key difference between a fast interrupt and a slow interrupt
	// is that a fast interrupt requires the pipeline to be cleared,
	// whereas a slow interrupt does not.
	always @(posedge i_clk)
		slow_interrupt <= (gie)&&(
				(i_interrupt)
			// If we encounter a break instruction, if the break
			// enable isn't set.  This is slow because pre
			// ALU logic will prevent the break from moving forward.
				||((op_break)&&(~break_en)));
	initial	fast_interrupt = 1'b0;
	always @(posedge i_clk) // 12 inputs
		fast_interrupt <= ((gie)||(alu_gie))&&(
			((r_wr_pc_v)&&(step)&&(~alu_phase)&&(~bus_lock))
			// Or ... if we encountered some form of error in our
			// instruction ...
			||(r_wr_err)
			// Or if we write to the CC register.
			||((wr_reg_ce)&&(~wr_spreg_vl[`CPU_GIE_BIT])
				&&(wr_reg_id[4])&&(wr_write_cc)));
 
	assign	w_switch_to_interrupt = fast_interrupt;
 
	assign	w_release_from_interrupt = (~gie)&&(~i_interrupt)
			// Then if we write the CC register
			&&(((wr_reg_ce)&&(~r_wr_gie)&&(wr_spreg_vl[`CPU_GIE_BIT])
				&&(~wr_reg_id[4])&&(wr_write_cc))
			);
	always @(posedge i_clk)
		if (i_rst)
			gie <= 1'b0;
		else if ((fast_interrupt)||(slow_interrupt))
			gie <= 1'b0;
		else if (w_release_from_interrupt)
			gie <= 1'b1;
 
	initial	trap = 1'b0;
	always @(posedge i_clk)
		if (i_rst)
			trap <= 1'b0;
		else if (w_release_from_interrupt)
			trap <= 1'b0;
		else if ((r_wr_gie)&&(wr_reg_ce)&&(wr_write_cc)
				&&(~wr_spreg_vl[`CPU_GIE_BIT]))
				// &&(wr_reg_id[4]) implied
			trap <= 1'b1;
		else if ((wr_reg_ce)&&(wr_write_cc)&&(wr_reg_id[4]))
			trap <= wr_spreg_vl[`CPU_TRAP_BIT];
 
	// The sleep register.  Setting the sleep register causes the CPU to
	// sleep until the next interrupt.  Setting the sleep register within
	// interrupt mode causes the processor to halt until a reset.  This is
	// a panic/fault halt.  The trick is that you cannot be allowed to
	// set the sleep bit and switch to supervisor mode in the same 
	// instruction: users are not allowed to halt the CPU.
	always @(posedge i_clk)
		if ((i_rst)||(slow_interrupt))
			sleep <= 1'b0;
		else if ((wr_reg_ce)&&(wr_write_cc)&&(~r_wr_gie))
			// In supervisor mode, we have no protections.  The
			// supervisor can set the sleep bit however he wants.
			// Well ... not quite.  Switching to user mode and
			// sleep mode shouold only be possible if the interrupt
			// flag isn't set.
			//	Thus: if (i_interrupt)&&(wr_spreg_vl[GIE])
			//		don't set the sleep bit
			//	otherwise however it would o.w. be set
			sleep <= (wr_spreg_vl[`CPU_SLEEP_BIT])
				&&((~i_interrupt)||(~wr_spreg_vl[`CPU_GIE_BIT]));
		else if ((wr_reg_ce)&&(wr_write_cc)&&(wr_spreg_vl[`CPU_GIE_BIT]))
			// In user mode, however, you can only set the sleep
			// mode while remaining in user mode.  You can't switch
			// to sleep mode *and* supervisor mode at the same
			// time, lest you halt the CPU.
			sleep <= wr_spreg_vl[`CPU_SLEEP_BIT];
 
	always @(posedge i_clk)
		if ((i_rst)||(fast_interrupt))
			step <= 1'b0;
		else if ((wr_reg_ce)&&(~alu_gie)&&(wr_reg_id[4])&&(wr_write_cc))
			step <= wr_spreg_vl[`CPU_STEP_BIT];
		else if (((alu_pc_v)||(mem_pc_v))&&(step)&&(gie))
			step <= 1'b0;
 
 
	initial	ill_err_i = 1'b0;
	always @(posedge i_clk)
		if (i_rst)
			ill_err_i <= 1'b0;
		// Only the debug interface can clear this bit
		else if ((dbgv)&&(wr_reg_id == {1'b0, `CPU_CC_REG})
				&&(~wr_spreg_vl[`CPU_ILL_BIT]))
			ill_err_i <= 1'b0;
		else if ((r_wr_illegal)&&(~r_wr_gie))
			ill_err_i <= 1'b1;
	initial	ill_err_u = 1'b0;
	always @(posedge i_clk)
		// The bit is automatically cleared on release from interrupt
		// or reset
		if ((i_rst)||(w_release_from_interrupt))
			ill_err_u <= 1'b0;
		// If the supervisor writes to this register, clearing the
		// bit, then clear it
		else if ((~r_wr_gie)
				&&(wr_reg_ce)&&(~wr_spreg_vl[`CPU_ILL_BIT])
				&&(wr_reg_id[4])&&(wr_write_cc))
			ill_err_u <= 1'b0;
		else if ((r_wr_gie)&&(r_wr_illegal))
			ill_err_u <= 1'b1;
	// Supervisor/interrupt bus error flag -- this will crash the CPU if
	// ever set.
	initial	ibus_err_flag = 1'b0;
	always @(posedge i_clk)
		if (i_rst)
			ibus_err_flag <= 1'b0;
		else if ((dbgv)&&(wr_reg_id == {1'b0, `CPU_CC_REG})
				&&(~wr_spreg_vl[`CPU_BUSERR_BIT]))
			ibus_err_flag <= 1'b0;
		else if ((bus_err)&&(~alu_gie))
			ibus_err_flag <= 1'b1;
	// User bus error flag -- if ever set, it will cause an interrupt to
	// supervisor mode.  
	initial	ubus_err_flag = 1'b0;
	always @(posedge i_clk)
		if (i_rst)
			ubus_err_flag <= 1'b0;
		else if (w_release_from_interrupt)
			ubus_err_flag <= 1'b0;
		else if (((~alu_gie)||(dbgv))&&(wr_reg_ce)
				&&(~wr_spreg_vl[`CPU_BUSERR_BIT])
				&&(wr_reg_id[4])&&(wr_write_cc))
			ubus_err_flag <= 1'b0;
		else if ((bus_err)&&(alu_gie))
			ubus_err_flag <= 1'b1;
 
	reg	r_idiv_err_flag, r_udiv_err_flag;
 
	// Supervisor/interrupt divide (by zero) error flag -- this will
	// crash the CPU if ever set.  This bit is thus available for us
	// to be able to tell if/why the CPU crashed.
	initial	r_idiv_err_flag = 1'b0;
	always @(posedge i_clk)
		if (i_rst)
			r_idiv_err_flag <= 1'b0;
		else if ((dbgv)&&(wr_reg_id == {1'b0, `CPU_CC_REG})
				&&(~wr_spreg_vl[`CPU_DIVERR_BIT]))
			r_idiv_err_flag <= 1'b0;
		else if ((div_error)&&(div_v)&&(~r_wr_gie))
			r_idiv_err_flag <= 1'b1;
	// User divide (by zero) error flag -- if ever set, it will
	// cause a sudden switch interrupt to supervisor mode.  
	initial	r_udiv_err_flag = 1'b0;
	always @(posedge i_clk)
		if (i_rst)
			r_udiv_err_flag <= 1'b0;
		else if (w_release_from_interrupt)
			r_udiv_err_flag <= 1'b0;
		else if (((~r_wr_gie)||(dbgv))&&(wr_reg_ce)
				&&(~wr_spreg_vl[`CPU_DIVERR_BIT])
				&&(wr_reg_id[4])&&(wr_write_cc))
			r_udiv_err_flag <= 1'b0;
		else if ((div_error)&&(r_wr_gie)&&(div_v))
			r_udiv_err_flag <= 1'b1;
 
	assign	idiv_err_flag = r_idiv_err_flag;
	assign	udiv_err_flag = r_udiv_err_flag;
 
	generate
	if (IMPLEMENT_FPU !=0)
	begin
		// Supervisor/interrupt floating point error flag -- this will
		// crash the CPU if ever set.
		reg		r_ifpu_err_flag, r_ufpu_err_flag;
		initial	r_ifpu_err_flag = 1'b0;
		always @(posedge i_clk)
			if (i_rst)
				r_ifpu_err_flag <= 1'b0;
			else if ((dbgv)&&(wr_reg_id == {1'b0, `CPU_CC_REG})
					&&(~wr_spreg_vl[`CPU_FPUERR_BIT]))
				r_ifpu_err_flag <= 1'b0;
			else if ((fpu_error)&&(fpu_v)&&(~r_wr_gie))
				r_ifpu_err_flag <= 1'b1;
		// User floating point error flag -- if ever set, it will cause
		// a sudden switch interrupt to supervisor mode.  
		initial	r_ufpu_err_flag = 1'b0;
		always @(posedge i_clk)
			if (i_rst)
				r_ufpu_err_flag <= 1'b0;
			else if (w_release_from_interrupt)
				r_ufpu_err_flag <= 1'b0;
			else if (((~r_wr_gie)||(dbgv))&&(wr_reg_ce)
					&&(~wr_spreg_vl[`CPU_FPUERR_BIT])
					&&(wr_reg_id[4])&&(wr_write_cc))
				r_ufpu_err_flag <= 1'b0;
			else if ((fpu_error)&&(r_wr_gie)&&(fpu_v))
				r_ufpu_err_flag <= 1'b1;
 
		assign	ifpu_err_flag = r_ifpu_err_flag;
		assign	ufpu_err_flag = r_ufpu_err_flag;
	end else begin
		assign	ifpu_err_flag = 1'b0;
		assign	ufpu_err_flag = 1'b0;
	end endgenerate
 
`ifdef	OPT_VLIW
	reg		r_ihalt_phase, r_uhalt_phase;
 
	initial	r_ihalt_phase = 0;
	initial	r_uhalt_phase = 0;
	always @(posedge i_clk)
		if (i_rst)
			r_ihalt_phase <= 1'b0;
		else if ((~alu_gie)&&(alu_pc_v)&&(~clear_pipeline))
			r_ihalt_phase <= alu_phase;
	always @(posedge i_clk)
		if (r_wr_gie)
			r_uhalt_phase <= alu_phase;
		else if (w_release_from_interrupt)
			r_uhalt_phase <= 1'b0;
 
	assign	ihalt_phase = r_ihalt_phase;
	assign	uhalt_phase = r_uhalt_phase;
`else
	assign	ihalt_phase = 1'b0;
	assign	uhalt_phase = 1'b0;
`endif
 
	//
	// Write backs to the PC register, and general increments of it
	//	We support two: upc and ipc.  If the instruction is normal,
	// we increment upc, if interrupt level we increment ipc.  If
	// the instruction writes the PC, we write whichever PC is appropriate.
	//
	// Do we need to all our partial results from the pipeline?
	// What happens when the pipeline has gie and ~gie instructions within
	// it?  Do we clear both?  What if a gie instruction tries to clear
	// a non-gie instruction?
	always @(posedge i_clk)
		if ((wr_reg_ce)&&(wr_reg_id[4])&&(wr_write_pc))
			upc <= wr_spreg_vl[(AW-1):0];
		else if ((r_wr_gie)&&
				(((alu_pc_v)&&(~clear_pipeline))
				||(mem_pc_v)))
			upc <= alu_pc;
 
	always @(posedge i_clk)
		if (i_rst)
			ipc <= RESET_ADDRESS;
		else if ((wr_reg_ce)&&(~wr_reg_id[4])&&(wr_write_pc))
			ipc <= wr_spreg_vl[(AW-1):0];
		else if ((~r_wr_gie)&&
				(((alu_pc_v)&&(~clear_pipeline))
				||(mem_pc_v)))
			ipc <= alu_pc;
 
	always @(posedge i_clk)
		if (i_rst)
			pf_pc <= RESET_ADDRESS;
		else if ((w_switch_to_interrupt)||((~gie)&&(w_clear_icache)))
			pf_pc <= ipc;
		else if ((w_release_from_interrupt)||((gie)&&(w_clear_icache)))
			pf_pc <= upc;
		else if ((wr_reg_ce)&&(wr_reg_id[4] == gie)&&(wr_write_pc))
			pf_pc <= wr_spreg_vl[(AW-1):0];
`ifdef	OPT_PIPELINED
		else if ((dcd_early_branch)&&(~clear_pipeline))
			pf_pc <= dcd_branch_pc + 1;
		else if ((new_pc)||((~dcd_stalled)&&(pf_v)))
			pf_pc <= pf_pc + {{(AW-1){1'b0}},1'b1};
`else
		else if ((alu_gie==gie)&&(
				((alu_pc_v)&&(~clear_pipeline))
				||(mem_pc_v)))
			pf_pc <= alu_pc;
`endif
 
	initial	new_pc = 1'b1;
	always @(posedge i_clk)
		if ((i_rst)||(i_clear_pf_cache))
			new_pc <= 1'b1;
		else if (w_switch_to_interrupt)
			new_pc <= 1'b1;
		else if (w_release_from_interrupt)
			new_pc <= 1'b1;
		else if ((wr_reg_ce)&&(wr_reg_id[4] == gie)&&(wr_write_pc))
			new_pc <= 1'b1;
		else
			new_pc <= 1'b0;
 
`ifdef	OPT_PIPELINED
	reg	r_clear_icache;
	initial	r_clear_icache = 1'b1;
	always @(posedge i_clk)
		if ((i_rst)||(i_clear_pf_cache))
			r_clear_icache <= 1'b1;
		else if ((wr_reg_ce)&&(wr_write_scc))
			r_clear_icache <=  wr_spreg_vl[`CPU_CLRCACHE_BIT];
		else
			r_clear_icache <= 1'b0;
	assign	w_clear_icache = r_clear_icache;
`else
	assign	w_clear_icache = 1'b0;
`endif
 
	//
	// The debug interface
	generate
	if (AW<32)
	begin
		always @(posedge i_clk)
		begin
			o_dbg_reg <= regset[i_dbg_reg];
			if (i_dbg_reg[3:0] == `CPU_PC_REG)
				o_dbg_reg <= {{(32-AW){1'b0}},(i_dbg_reg[4])?upc:ipc};
			else if (i_dbg_reg[3:0] == `CPU_CC_REG)
			begin
				o_dbg_reg[14:0] <= (i_dbg_reg[4])?w_uflags:w_iflags;
				o_dbg_reg[31:23] <= w_cpu_info;
				o_dbg_reg[`CPU_GIE_BIT] <= gie;
			end
		end
	end else begin
		always @(posedge i_clk)
		begin
			o_dbg_reg <= regset[i_dbg_reg];
			if (i_dbg_reg[3:0] == `CPU_PC_REG)
				o_dbg_reg <= (i_dbg_reg[4])?upc:ipc;
			else if (i_dbg_reg[3:0] == `CPU_CC_REG)
			begin
				o_dbg_reg[14:0] <= (i_dbg_reg[4])?w_uflags:w_iflags;
				o_dbg_reg[31:23] <= w_cpu_info;
				o_dbg_reg[`CPU_GIE_BIT] <= gie;
			end
		end
	end endgenerate
 
	always @(posedge i_clk)
		o_dbg_cc <= { o_break, bus_err, gie, sleep };
 
	always @(posedge i_clk)
		r_halted <= (i_halt)&&(
			// To be halted, any long lasting instruction must
			// be completed.
			(~pf_cyc)&&(~mem_busy)&&(~alu_busy)
				&&(~div_busy)&&(~fpu_busy)
			// Operations must either be valid, or illegal
			&&((opb_v)||(i_rst)||(dcd_illegal))
			// Decode stage must be either valid, in reset, or ill
			&&((dcdvalid)||(i_rst)||(pf_illegal)));
	assign	o_dbg_stall = ~r_halted;
 
	//
	//
	// Produce accounting outputs: Account for any CPU stalls, so we can
	// later evaluate how well we are doing.
	//
	//
	assign	o_op_stall = (master_ce)&&(op_stall);
	assign	o_pf_stall = (master_ce)&&(~pf_v);
	assign	o_i_count  = (alu_pc_v)&&(~clear_pipeline);
 
`ifdef	DEBUG_SCOPE
	always @(posedge i_clk)
		o_debug <= {
		/*
			o_break, i_wb_err, pf_pc[1:0],
			flags,
			pf_v, dcdvalid, opvalid, alu_v, mem_v,
			op_ce, alu_ce, mem_ce,
			//
			master_ce, opvalid_alu, opvalid_mem,
			//
			alu_stall, mem_busy, op_pipe, mem_pipe_stalled,
			mem_we,
			// ((opvalid_alu)&&(alu_stall))
			// ||((opvalid_mem)&&(~op_pipe)&&(mem_busy))
			// ||((opvalid_mem)&&( op_pipe)&&(mem_pipe_stalled)));
			// opA[23:20], opA[3:0],
			gie, sleep, wr_reg_ce, wr_gpreg_vl[4:0]
		*/
		/*
			i_rst, master_ce, (new_pc),
			((dcd_early_branch)&&(dcdvalid)),
			pf_v, pf_illegal,
			op_ce, dcd_ce, dcdvalid, dcd_stalled,
			pf_cyc, pf_stb, pf_we, pf_ack, pf_stall, pf_err,
			pf_pc[7:0], pf_addr[7:0]
		*/
 
			i_wb_err, gie, alu_illegal,
			      (new_pc)||((dcd_early_branch)&&(~clear_pipeline)),
			mem_busy,
				(mem_busy)?{ (o_wb_gbl_stb|o_wb_lcl_stb), o_wb_we,
					o_wb_addr[8:0] }
					: { instruction[31:21] },
			pf_v, (pf_v) ? alu_pc[14:0]
				:{ pf_cyc, pf_stb, pf_pc[12:0] }
 
		/*
			i_wb_err, gie, new_pc, dcd_early_branch,	// 4
			pf_v, pf_cyc, pf_stb, instruction_pc[0],	// 4
			instruction[30:27],				// 4
			dcd_gie, mem_busy, o_wb_gbl_cyc, o_wb_gbl_stb,	// 4
			dcdvalid,
			((dcd_early_branch)&&(~clear_pipeline))		// 15
					? dcd_branch_pc[14:0]:pf_pc[14:0]
		*/
			};
`endif
 
endmodule
Go to most recent revision | Compare with Previous | Blame | View Log
Browse

Tools

Subversion Repositories openarty

[/] [openarty/] [trunk/] [rtl/] [cpu/] [zipcpuhs.v] - Rev 53