OpenCores
URL https://opencores.org/ocsvn/zipcpu/zipcpu/trunk

Subversion Repositories zipcpu

[/] [zipcpu/] [trunk/] [rtl/] [core/] [zipcpu.v] - Rev 11

Go to most recent revision | Compare with Previous | Blame | View Log

///////////////////////////////////////////////////////////////////////////////
//
// Filename:	zipcpu.v
//
// Project:	Zip CPU -- a small, lightweight, RISC CPU soft core
//
// Purpose:	This is the top level module holding the core of the Zip CPU
//		together.  The Zip CPU is designed to be as simple as possible.
//		The instruction set is about as RISC as you can get, there are
//		only 16 instruction types supported (of which one isn't yet
//		supported ...)  Please see the accompanying iset.html file
//		for a description of these instructions.
//
//		All instructions are 32-bits wide.  All bus accesses, both
//		address and data, are 32-bits over a wishbone bus.
//
//	The Zip CPU is fully pipelined with the following pipeline stages:
//
//		1. Prefetch, returns the instruction from memory.  On the
//		Basys board that I'm working on, one instruction may be
//		issued every 20 clocks or so, unless and until I implement a
//		cache or local memory.
//
//		2. Instruction Decode
//
//		3. Read Operands
//
//		4. Apply Instruction
//
//		4. Write-back Results
//
//	A lot of difficult work has been placed into the pipeline stall
//	handling.  My original proposal was not to allow pipeline stalls at all.
//	The idea would be that the CPU would just run every clock and whatever
//	stalled answer took place would just get fixed a clock or two later,
//	meaning that the compiler could just schedule everything out.
//	This idea died at the memory interface, which can take a variable
//	amount of time to read or write any value, thus the whole CPU needed
//	to stall on a stalled memory access.
//
//	My next idea was to just let things complete.  I.e., once an instrution
//	starts, it continues to completion no matter what and we go on.  This
//	failed at writing the PC.  If the PC gets written in something such as
//	a MOV PC,PC+5 instruction, 3 (or however long the pipeline is) clocks
//	later, if whether or not something happens in those clocks depends
//	upon the instruction fetch filling the pipeline, then the CPU has a
//	non-deterministic behavior.
//
//	This leads to two possibilities: either *everything* stalls upon a 
//	stall condition, or partial results need to be destroyed before
//	they are written.  This is made more difficult by the fact that
//	once a command is written to the memory unit, whether it be a
//	read or a write, there is no undoing it--since peripherals on the
//	bus may act upon the answer with whatever side effects they might
//	have.  (For example, writing a '1' to the interrupt register will
//	clear certain interrupts ...)  Further, since the memory ops depend
//	upon conditions, the we'll need to wait for the condition codes to
//	be available before executing a memory op.  Thus, memory ops can 
//	proceed without stalling whenever either the previous instruction
//	doesn't write the flags register, or when the memory instruction doesn't
//	depend upon the flags register.
//
//	The other possibility is that we leave independent instruction
//	execution behind, so that the pipeline is always full and stalls,
//	or moves forward, together on every clock.
//
//	For now, we pick the first approach: independent instruction execution.
//	Thus, if stage 2 stalls, stages 3-5 may still complete the instructions
//	in their pipeline.  This leaves another problem: what happens on a
//	MOV -1+PC,PC instruction?  There will be four instructions behind this
//	one (or is it five?) that will need to be 'cancelled'.  So here's
//	the plan: Anything can be cancelled before the ALU/MEM stage,
//	since memory ops cannot be canceled after being issued.  Thus, the
//	ALU/MEM stage must stall if any prior instruction is going to write
//	the PC register (i.e. JMP).
//
//	Further, let's define a "STALL" as a reason to not execute a stage
//	due to some condition at or beyond the stage, and let's define
//	a VALID flag to mean that this stage has completed.  Thus, the clock
//	enable for a stage is (STG[n-1]VALID)&&((~STG[n]VALID)||(~STG[n]STALL)).
//	The ALU/MEM stages will also depend upon a master clock enable
//	(~SLEEP) condition as well.
//
//
//
// Creator:	Dan Gisselquist, Ph.D.
//		Gisselquist Tecnology, LLC
//
///////////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2015, Gisselquist Technology, LLC
//
// This program is free software (firmware): you can redistribute it and/or
// modify it under the terms of  the GNU General Public License as published
// by the Free Software Foundation, either version 3 of the License, or (at
// your option) any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or
// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
// for more details.
//
// License:	GPL, v3, as defined and found on www.gnu.org,
//		http://www.gnu.org/licenses/gpl.html
//
//
///////////////////////////////////////////////////////////////////////////////
//
`define	CPU_PC_REG	4'hf
`define	CPU_CC_REG	4'he
`define	CPU_BREAK_BIT	7
`define	CPU_STEP_BIT	6
`define	CPU_GIE_BIT	5
`define	CPU_SLEEP_BIT	4
module	zipcpu(i_clk, i_rst, i_interrupt,
		// Debug interface
		i_halt, i_dbg_reg, i_dbg_we, i_dbg_data,
			o_dbg_stall, o_dbg_reg,
			o_break,
		// CPU interface to the wishbone bus
		o_wb_cyc, o_wb_stb, o_wb_we, o_wb_addr, o_wb_data,
			i_wb_ack, i_wb_stall, i_wb_data,
		// Accounting/CPU usage interface
		o_op_stall, o_pf_stall, o_i_count);
	parameter	RESET_ADDRESS=32'h0100000;
	input			i_clk, i_rst, i_interrupt;
	// Debug interface -- inputs
	input			i_halt;
	input		[4:0]	i_dbg_reg;
	input			i_dbg_we;
	input		[31:0]	i_dbg_data;
	// Debug interface -- outputs
	output	reg		o_dbg_stall;
	output	reg	[31:0]	o_dbg_reg;
	output	wire		o_break;
	// Wishbone interface -- outputs
	output	wire		o_wb_cyc, o_wb_stb, o_wb_we;
	output	wire	[31:0]	o_wb_addr, o_wb_data;
	// Wishbone interface -- inputs
	input			i_wb_ack, i_wb_stall;
	input		[31:0]	i_wb_data;
	// Accounting outputs ... to help us count stalls and usage
	output	wire		o_op_stall;
	output	wire		o_pf_stall;
	output	wire		o_i_count;
 
 
	// Registers
	reg	[31:0]	regset [0:31];
 
	// Condition codes
	reg	[3:0]	flags, iflags;	// (BREAKEN,STEP,GIE,SLEEP ), V, N, C, Z
	wire	[7:0]	w_uflags, w_iflags;
	reg		break_en, step, gie, sleep;
 
	// The master chip enable
	wire		master_ce;
 
	//
	//
	//	PIPELINE STAGE #1 :: Prefetch
	//		Variable declarations
	//
	reg	[31:0]	pf_pc;
	reg		new_pc;
 
	wire		dcd_stalled;
	wire		pf_cyc, pf_stb, pf_we, pf_busy, pf_ack, pf_stall;
	wire	[31:0]	pf_addr, pf_data;
	wire	[31:0]	instruction, instruction_pc;
	wire	pf_valid, instruction_gie;
 
	//
	//
	//	PIPELINE STAGE #2 :: Instruction Decode
	//		Variable declarations
	//
	//
	reg		opvalid, op_wr_pc, op_break;
	wire		op_stall, dcd_ce;
	reg	[3:0]	dcdOp;
	reg	[4:0]	dcdA, dcdB;
	reg	[3:0]	dcdF;
	reg		dcdA_rd, dcdA_wr, dcdB_rd, dcdvalid,
				dcdM, dcdF_wr, dcd_gie, dcd_break;
	reg	[31:0]	dcd_pc;
	reg	[23:0]	r_dcdI;
	wire	dcdA_stall, dcdB_stall, dcdF_stall;
 
 
 
	//
	//
	//	PIPELINE STAGE #3 :: Read Operands
	//		Variable declarations
	//
	//
	//
	// Now, let's read our operands
	reg	[4:0]	alu_reg;
	reg	[3:0]	opn;
	reg	[4:0]	opR;
	reg	[1:0]	opA_cc, opB_cc;
	reg	[31:0]	r_opA, r_opB, op_pc;
	wire	[31:0]	opA_nowait, opB_nowait, opA, opB;
	reg		opR_wr, opM, opF_wr, op_gie,
			opA_rd, opB_rd;
	wire	[7:0]	opFl;
	reg	[6:0]	r_opF;
	wire	[8:0]	opF;
	wire		op_ce;
 
 
 
	//
	//
	//	PIPELINE STAGE #4 :: ALU / Memory
	//		Variable declarations
	//
	//
	reg	[31:0]	alu_pc;
	reg		alu_pc_valid;;
	wire		alu_ce, alu_stall;
	wire	[31:0]	alu_result;
	wire	[3:0]	alu_flags;
	wire		alu_valid;
	wire		set_cond;
	reg		alu_wr, alF_wr, alu_gie;
 
 
 
	wire	mem_ce, mem_stalled;
	wire	mem_valid, mem_ack, mem_stall,
		mem_cyc, mem_stb, mem_we;
	wire	[4:0]	mem_wreg;
 
	wire		mem_busy, mem_rdbusy;
	wire	[31:0]	mem_addr, mem_data, mem_result;
 
 
 
	//
	//
	//	PIPELINE STAGE #5 :: Write-back
	//		Variable declarations
	//
	wire		wr_reg_ce, wr_flags_ce, wr_write_pc;
	wire	[4:0]	wr_reg_id;
	wire	[31:0]	wr_reg_vl;
	wire	w_switch_to_interrupt, w_release_from_interrupt;
	reg	[31:0]	upc, ipc;
 
 
 
	//
	//	MASTER: clock enable.
	//
	assign	master_ce = (~i_halt)&&(~o_break)&&(~sleep)&&(~mem_rdbusy);
 
 
	//
	//	PIPELINE STAGE #1 :: Prefetch
	//		Calculate stall conditions
 
	//
	//	PIPELINE STAGE #2 :: Instruction Decode
	//		Calculate stall conditions
	assign		dcd_ce = (pf_valid)&&(~dcd_stalled);
	assign		dcd_stalled = (dcdvalid)&&(
					(op_stall)
					||((dcdA_stall)||(dcdB_stall)||(dcdF_stall))
					||((opvalid)&&(op_wr_pc)));
	//
	//	PIPELINE STAGE #3 :: Read Operands
	//		Calculate stall conditions
	assign	op_stall = (opvalid)&&(
				((mem_stalled)&&(opM))
				||((alu_stall)&&(~opM)));
	assign	op_ce = (dcdvalid)&&((~opvalid)||(~op_stall));
 
	//
	//	PIPELINE STAGE #4 :: ALU / Memory
	//		Calculate stall conditions
	assign	alu_stall = (((~master_ce)||(mem_rdbusy))&&(opvalid)&&(~opM))
			||((opvalid)&&(wr_reg_ce)&&(wr_reg_id == { op_gie, `CPU_PC_REG }));
	assign	alu_ce = (master_ce)&&(opvalid)&&(~opM)&&(~alu_stall)&&(~new_pc);
	//
	assign	mem_ce = (master_ce)&&(opvalid)&&(opM)&&(~mem_stalled)&&(~new_pc)&&(set_cond);
	assign	mem_stalled = (mem_busy)||((opvalid)&&(opM)&&(
				(~master_ce)
				// Stall waiting for flags to be valid
				||((~opF[8])&&(
					((wr_reg_ce)&&(wr_reg_id[4:0] == {op_gie,`CPU_CC_REG}))))
					// Do I need this last condition?
					//||((wr_flags_ce)&&(alu_gie==op_gie))))
				// Or waiting for a write to the PC register
				||((wr_reg_ce)&&(wr_reg_id[4] == op_gie)&&(wr_write_pc))));
 
 
	//
	//
	//	PIPELINE STAGE #1 :: Prefetch
	//
	//
`ifdef	SINGLE_FETCH
	wire		pf_ce;
 
	assign		pf_ce = (~dcd_stalled);
	prefetch	pf(i_clk, i_rst, (pf_ce), pf_pc, gie,
				instruction, instruction_pc, instruction_gie,
					pf_valid,
				pf_cyc, pf_stb, pf_we, pf_addr,
					pf_data,
				pf_ack, pf_stall, i_wb_data);
`else // Pipe fetch
	pipefetch	#(RESET_ADDRESS)
			pf(i_clk, i_rst, new_pc, ~dcd_stalled, pf_pc,
					instruction, instruction_pc, pf_valid,
				pf_cyc, pf_stb, pf_we, pf_addr, pf_data,
					pf_ack, pf_stall, i_wb_data,
				mem_cyc);
	assign	instruction_gie = gie;
`endif
 
	always @(posedge i_clk)
		if (i_rst)
			dcdvalid <= 1'b0;
		else if (dcd_ce)
			dcdvalid <= (~new_pc);
		else if ((~dcd_stalled)||(new_pc))
			dcdvalid <= 1'b0;
 
	always @(posedge i_clk)
		if (dcd_ce)
		begin
			dcd_pc <= instruction_pc+1;
 
			// Record what operation we are doing
			dcdOp <= instruction[31:28];
 
			// Default values
			dcdA[4:0] <= { instruction_gie, instruction[27:24] };
			dcdB[4:0] <= { instruction_gie, instruction[19:16] };
			dcdM    <= 1'b0;
			dcdF_wr <= 1'b1;
			dcd_break <= 1'b0;
 
			// Set the condition under which we do this operation
			// The top four bits are a mask, the bottom four the
			// value the flags must equal once anded with the mask
			dcdF <= { (instruction[23:21]==3'h0), instruction[23:21] };
			casez(instruction[31:28])
			4'h2: begin // Move instruction
				if (~instruction_gie)
				begin
					dcdA[4] <= instruction[20];
					dcdB[4] <= instruction[15];
				end
				dcdA_wr <= 1'b1;
				dcdA_rd <= 1'b0;
				dcdB_rd <= 1'b1;
				r_dcdI <= { {(9){instruction[14]}}, instruction[14:0] };
				dcdF_wr <= 1'b0; // Don't write flags
				end
			4'h3: begin // Load immediate
				dcdA_wr <= 1'b1;
				dcdA_rd <= 1'b0;
				dcdB_rd <= 1'b0;
				r_dcdI <= { instruction[23:0] };
				dcdF_wr <= 1'b0; // Don't write flags
				dcdF    <= 4'h8; // This is unconditional
				dcdOp <= 4'h2;
				end
			4'h4: begin // Load immediate special
				dcdF_wr <= 1'b0; // Don't write flags
				r_dcdI <= { 8'h00, instruction[15:0] };
				if (instruction[27:24] == 4'he)
				begin
					// NOOP instruction
					dcdA_wr <= 1'b0;
					dcdA_rd <= 1'b0;
					dcdB_rd <= 1'b0;
					dcdOp <= 4'h2;
					dcd_break <= 1'b1;//Could be a break ins
				end else if (instruction[27:24] == 4'hf)
				begin // Load partial immediate(s)
					dcdA_wr <= 1'b1;
					dcdA_rd <= 1'b1;
					dcdB_rd <= 1'b0;
					dcdA[4:0] <= { instruction_gie, instruction[19:16] };
					dcdOp <= { 3'h3, instruction[20] };
				end else begin
					; // Multiply instruction place holder
				end end
			4'b011?: begin // Load/Store
				dcdF_wr <= 1'b0; // Don't write flags
				dcdA_wr <= (~instruction[28]); // Write on loads
				dcdA_rd <= (instruction[28]); // Read on stores
				dcdB_rd <= instruction[20];
				if (instruction[20])
					r_dcdI <= { {(8){instruction[15]}}, instruction[15:0] };
				else
					r_dcdI <= { {(4){instruction[19]}}, instruction[19:0] };
				dcdM <= 1'b1; // Memory operation
				end
			default: begin
				dcdA <= { instruction_gie, instruction[27:24] };
				dcdB <= { instruction_gie, instruction[19:16] };
				dcdA_wr <= (instruction[31])||(instruction[31:28]==4'h5);
				dcdA_rd <= 1'b1;
				dcdB_rd <= instruction[20];
				if (instruction[20])
					r_dcdI <= { {(8){instruction[15]}}, instruction[15:0] };
				else
					r_dcdI <= { {(4){instruction[19]}}, instruction[19:0] };
				end
			endcase
 
 
			dcd_gie <= instruction_gie;
		end
 
 
	//
	//
	//	PIPELINE STAGE #3 :: Read Operands (Registers)
	//
	//
 
	always @(posedge i_clk)
		if (op_ce) // &&(dcdvalid))
		begin
			if ((wr_reg_ce)&&(wr_reg_id == dcdA))
				r_opA <= wr_reg_vl;
			else if (dcdA == { dcd_gie, `CPU_PC_REG })
				r_opA <= dcd_pc;
			else if (dcdA[3:0] == `CPU_PC_REG)
				r_opA <= (dcdA[4])?upc:ipc;
			else
				r_opA <= regset[dcdA];
		end
	wire	[31:0]	dcdI;
	assign	dcdI = { {(8){r_dcdI[23]}}, r_dcdI };
	always @(posedge i_clk)
		if (op_ce) // &&(dcdvalid))
		begin
			if (~dcdB_rd)
				r_opB <= dcdI;
			else if ((wr_reg_ce)&&(wr_reg_id == dcdB))
				r_opB <= wr_reg_vl + dcdI;
			else if (dcdB == { dcd_gie, `CPU_PC_REG })
				r_opB <= dcd_pc + dcdI;
			else if (dcdB[3:0] == `CPU_PC_REG)
				r_opB <= ((dcdB[4])?upc:ipc) + dcdI;
			else
				r_opB <= regset[dcdB] + dcdI;
		end
 
	// The logic here has become more complex than it should be, no thanks
	// to Xilinx's Vivado trying to help.  The conditions are supposed to
	// be two sets of four bits: the top bits specify what bits matter, the
	// bottom specify what those top bits must equal.  However, two of
	// conditions check whether bits are on, and those are the only two
	// conditions checking those bits.  Therefore, Vivado complains that
	// these two bits are redundant.  Hence the convoluted expression
	// below, arriving at what we finally want in the (now wire net)
	// opF.
`define	NEWCODE
`ifdef	NEWCODE
	always @(posedge i_clk)
		if (op_ce)
		begin // Set the flag condition codes
			case(dcdF[2:0])
			3'h0:	r_opF <= 7'h80;	// Always
			3'h1:	r_opF <= 7'h11;	// Z
			3'h2:	r_opF <= 7'h10;	// NE
			3'h3:	r_opF <= 7'h20;	// GE (!N)
			3'h4:	r_opF <= 7'h30;	// GT (!N&!Z)
			3'h5:	r_opF <= 7'h24;	// LT
			3'h6:	r_opF <= 7'h02;	// C
			3'h7:	r_opF <= 7'h08;	// V
			endcase
		end
	assign	opF = { r_opF[6], r_opF[3], r_opF[5], r_opF[1], r_opF[4:0] };
`else
	always @(posedge i_clk)
		if (op_ce)
		begin // Set the flag condition codes
			case(dcdF[2:0])
			3'h0:	opF <= 9'h100;	// Always
			3'h1:	opF <= 9'h011;	// Z
			3'h2:	opF <= 9'h010;	// NE
			3'h3:	opF <= 9'h040;	// GE (!N)
			3'h4:	opF <= 9'h050;	// GT (!N&!Z)
			3'h5:	opF <= 9'h044;	// LT
			3'h6:	opF <= 9'h022;	// C
			3'h7:	opF <= 9'h088;	// V
			endcase
		end
`endif
 
	always @(posedge i_clk)
		if (i_rst)
			opvalid <= 1'b0;
		else if (op_ce)
			// Do we have a valid instruction?
			//   The decoder may vote to stall one of its
			//   instructions based upon something we currently
			//   have in our queue.  This instruction must then
			//   move forward, and get a stall cycle inserted.
			//   Hence, the test on dcd_stalled here.  If we must
			//   wait until our operands are valid, then we aren't
			//   valid yet until then.
			opvalid<= (~new_pc)&&(dcdvalid)&&(~dcd_stalled);
		else if ((~op_stall)||(new_pc))
			opvalid <= 1'b0;
 
	// Here's part of our debug interface.  When we recognize a break
	// instruction, we set the op_break flag.  That'll prevent this
	// instruction from entering the ALU, and cause an interrupt before
	// this instruction.  Thus, returning to this code will cause the
	// break to repeat and continue upon return.  To get out of this
	// condition, replace the break instruction with what it is supposed
	// to be, step through it, and then replace it back.  In this fashion,
	// a debugger can step through code.
	always @(posedge i_clk)
		if (i_rst)
			op_break <= 1'b0;
		else if (op_ce)
			op_break <= (dcd_break)&&(r_dcdI[15:0] == 16'h0001);
		else if ((~op_stall)||(new_pc))
			op_break <= 1'b0;
 
	always @(posedge i_clk)
		if (op_ce)
		begin
			opn    <= dcdOp;	// Which ALU operation?
			opM    <= dcdM;		// Is this a memory operation?
			// Will we write the flags/CC Register with our result?
			opF_wr <= dcdF_wr;
			// Will we be writing our results into a register?
			opR_wr <= dcdA_wr;
			// What register will these results be written into?
			opR    <= dcdA;
			// User level (1), vs supervisor (0)/interrupts disabled
			op_gie <= dcd_gie;
 
			// We're not done with these yet--we still need them
			// for the unclocked assign.  We need the unclocked
			// assign so that there's no wait state between an
			// ALU or memory result and the next register that may
			// use that value.
			opA_cc <= {dcdA[4], (dcdA[3:0] == `CPU_CC_REG) };
			opA_rd <= dcdA_rd;
			opB_cc <= {dcdB[4], (dcdB[3:0] == `CPU_CC_REG) };
			opB_rd <= dcdB_rd;
			op_pc  <= dcd_pc;
			//
			op_wr_pc <= ((dcdA_wr)&&(dcdA[3:0] == `CPU_PC_REG));
		end
	assign	opFl = (op_gie)?(w_uflags):(w_iflags);
 
	// This is tricky.  First, the PC and Flags registers aren't kept in
	// register set but in special registers of their own.  So step one
	// is to select the right register.  Step to is to replace that
	// register with the results of an ALU or memory operation, if such
	// results are now available.  Otherwise, we'd need to insert a wait
	// state of some type.
	//
	// The alternative approach would be to define some sort of
	// op_stall wire, which would stall any upstream stage.
	// We'll create a flag here to start our coordination.  Once we
	// define this flag to something other than just plain zero, then
	// the stalls will already be in place.
	assign	dcdA_stall = (dcdvalid)&&(dcdA_rd)&&
				(((opvalid)&&(opR_wr)&&(opR == dcdA))
					||((mem_busy)&&(~mem_we)&&(mem_wreg == dcdA))
					||((mem_valid)&&(mem_wreg == dcdA)));
	assign	dcdB_stall = (dcdvalid)&&(dcdB_rd)
				&&(((opvalid)&&(opR_wr)&&(opR == dcdB))
					||((mem_busy)&&(~mem_we)&&(mem_wreg == dcdB))
					||((mem_valid)&&(mem_wreg == dcdB)));
	assign	dcdF_stall = (dcdvalid)&&(((dcdF[3])
					||(dcdA[3:0]==`CPU_CC_REG)
					||(dcdB[3:0]==`CPU_CC_REG))
				&&((opvalid)&&(opR[3:0] == `CPU_CC_REG))
			||((dcdF[3])&&(dcdM)&&(opvalid)&&(opF_wr)));
	assign	opA = { r_opA[31:8], ((opA_cc[0]) ?
			((opA_cc[1])?w_uflags:w_iflags) : r_opA[7:0]) };
	assign	opB = { r_opB[31:8], ((opB_cc[0]) ?
			((opB_cc[1])?w_uflags:w_iflags) : r_opB[7:0]) };
 
	//
	//
	//	PIPELINE STAGE #4 :: Apply Instruction
	//
	//
	cpuops	doalu(i_clk, i_rst, alu_ce,
			(opvalid)&&(~opM), opn, opA, opB,
			alu_result, alu_flags, alu_valid);
 
	assign	set_cond = ((opF[7:4]&opFl[3:0])==opF[3:0]);
	initial	alF_wr   = 1'b0;
	initial	alu_wr   = 1'b0;
	always @(posedge i_clk)
		if (i_rst)
		begin
			alu_wr   <= 1'b0;
			alF_wr   <= 1'b0;
		end else if (alu_ce)
		begin
			alu_reg <= opR;
			alu_wr  <= (opR_wr)&&(set_cond);
			alF_wr  <= (opF_wr)&&(set_cond);
		end else begin
			// These are strobe signals, so clear them if not
			// set for any particular clock
			alu_wr <= 1'b0;
			alF_wr <= 1'b0;
		end
	always @(posedge i_clk)
		if ((alu_ce)||(mem_ce))
			alu_gie  <= op_gie;
	always @(posedge i_clk)
		if ((alu_ce)||(mem_ce))
			alu_pc  <= op_pc;
	initial	alu_pc_valid = 1'b0;
	always @(posedge i_clk)
		alu_pc_valid <= (~i_rst)&&(master_ce)&&(opvalid)&&(~new_pc)
					&&((~opM)
						||(~mem_stalled));
 
	memops	domem(i_clk, i_rst, mem_ce,
				(opn[0]), opB, opA, opR,
				mem_busy, mem_valid, mem_wreg, mem_result,
			mem_cyc, mem_stb, mem_we, mem_addr, mem_data,
				mem_ack, mem_stall, i_wb_data);
	assign	mem_rdbusy = ((mem_cyc)&&(~mem_we));
 
	// Either the prefetch or the instruction gets the memory bus, but 
	// never both.
	wbarbiter	#(32,32) pformem(i_clk, i_rst,
		// Prefetch access to the arbiter
		pf_addr, pf_data, pf_we, pf_stb, pf_cyc, pf_ack, pf_stall,
		// Memory access to the arbiter
		mem_addr, mem_data, mem_we, mem_stb, mem_cyc, mem_ack, mem_stall,
		// Common wires, in and out, of the arbiter
		o_wb_addr, o_wb_data, o_wb_we, o_wb_stb, o_wb_cyc, i_wb_ack,
			i_wb_stall);
 
	//
	//
	//	PIPELINE STAGE #5 :: Write-back results
	//
	//
	// This stage is not allowed to stall.  If results are ready to be
	// written back, they are written back at all cost.  Sleepy CPU's
	// won't prevent write back, nor debug modes, halting the CPU, nor
	// anything else.  Indeed, the (master_ce) bit is only as relevant
	// as knowinig something is available for writeback.
 
	//
	// Write back to our generic register set ...
	// When shall we write back?  On one of two conditions
	//	Note that the flags needed to be checked before issuing the
	//	bus instruction, so they don't need to be checked here.
	//	Further, alu_wr includes (set_cond), so we don't need to
	//	check for that here either.
	assign	wr_reg_ce = ((alu_wr)&&(alu_valid))||(mem_valid);
	// Which register shall be written?
	assign	wr_reg_id = (alu_wr)?alu_reg:mem_wreg;
	// Are we writing to the PC?
	assign	wr_write_pc = (wr_reg_id[3:0] == `CPU_PC_REG);
	// What value to write?
	assign	wr_reg_vl = (alu_wr)?alu_result:mem_result;
	always @(posedge i_clk)
		if (wr_reg_ce)
			regset[wr_reg_id] <= wr_reg_vl;	
 
	//
	// Write back to the condition codes/flags register ...
	// When shall we write to our flags register?  alF_wr already
	// includes the set condition ...
	assign	wr_flags_ce = (alF_wr)&&(alu_valid);
	assign	w_uflags = { 1'b0, step, 1'b1, sleep, ((wr_flags_ce)&&(alu_gie))?alu_flags:flags };
	assign	w_iflags = { break_en, 1'b0, 1'b0, sleep, ((wr_flags_ce)&&(~alu_gie))?alu_flags:iflags };
	// What value to write?
	always @(posedge i_clk)
		// If explicitly writing the register itself
		if ((wr_reg_ce)&&(wr_reg_id[4:0] == { 1'b1, `CPU_CC_REG }))
			flags <= wr_reg_vl[3:0];
		// Otherwise if we're setting the flags from an ALU operation
		else if ((wr_flags_ce)&&(alu_gie))
			flags <= alu_flags;
		else if ((i_halt)&&(i_dbg_we)
				&&(i_dbg_reg == { 1'b1, `CPU_CC_REG }))
			flags <= i_dbg_data[3:0];
 
	always @(posedge i_clk)
		if ((wr_reg_ce)&&(wr_reg_id[4:0] == { 1'b0, `CPU_CC_REG }))
			iflags <= wr_reg_vl[3:0];
		else if ((wr_flags_ce)&&(~alu_gie))
			iflags <= alu_flags;
		else if ((i_halt)&&(i_dbg_we)
				&&(i_dbg_reg == { 1'b0, `CPU_CC_REG }))
			iflags <= i_dbg_data[3:0];
 
	// The 'break' enable  bit.  This bit can only be set from supervisor
	// mode.  It control what the CPU does upon encountering a break
	// instruction.
	//
	// The goal, upon encountering a break is that the CPU should stop and
	// not execute the break instruction, choosing instead to enter into
	// either interrupt mode or halt first.  
	//	if ((break_en) AND (break_instruction)) // user mode or not
	//		HALT CPU
	//	else if (break_instruction) // only in user mode
	//		set an interrupt flag, go to supervisor mode
	//		allow supervisor to step the CPU.
	//	Upon a CPU halt, any break condition will be reset.  The
	//	external debugger will then need to deal with whatever
	//	condition has taken place.
	initial	break_en = 1'b0;
	always @(posedge i_clk)
		if ((i_rst)||(i_halt))
			break_en <= 1'b0;
		else if ((wr_reg_ce)&&(wr_reg_id[4:0] == {1'b0, `CPU_CC_REG}))
			break_en <= wr_reg_vl[`CPU_BREAK_BIT];
	assign	o_break = (break_en)&&(op_break);
 
 
	// The sleep register.  Setting the sleep register causes the CPU to
	// sleep until the next interrupt.  Setting the sleep register within
	// interrupt mode causes the processor to halt until a reset.  This is
	// a panic/fault halt.
	always @(posedge i_clk)
		if ((i_rst)||((i_interrupt)&&(gie)))
			sleep <= 1'b0;
		else if ((wr_reg_ce)&&(wr_reg_id[3:0] == `CPU_CC_REG))
			sleep <= wr_reg_vl[`CPU_SLEEP_BIT];
		else if ((i_halt)&&(i_dbg_we)
				&&(i_dbg_reg == { 1'b1, `CPU_CC_REG }))
			sleep <= i_dbg_data[`CPU_SLEEP_BIT];
 
	always @(posedge i_clk)
		if ((i_rst)||(w_switch_to_interrupt))
			step <= 1'b0;
		else if ((wr_reg_ce)&&(~alu_gie)&&(wr_reg_id[4:0] == {1'b1,`CPU_CC_REG}))
			step <= wr_reg_vl[`CPU_STEP_BIT];
		else if ((i_halt)&&(i_dbg_we)
				&&(i_dbg_reg == { 1'b1, `CPU_CC_REG }))
			step <= i_dbg_data[`CPU_STEP_BIT];
		else if ((master_ce)&&(alu_pc_valid)&&(step)&&(gie))
			step <= 1'b0;
 
	// The GIE register.  Only interrupts can disable the interrupt register
	assign	w_switch_to_interrupt = (gie)&&(
			// On interrupt (obviously)
			(i_interrupt)
			// If we are stepping the CPU
			||((master_ce)&&(alu_pc_valid)&&(step))
			// If we encounter a break instruction, if the break
			//	enable isn't not set.
			||((master_ce)&&(op_break))
			// If we write to the CC register
			||((wr_reg_ce)&&(~wr_reg_vl[`CPU_GIE_BIT])
				&&(wr_reg_id[4:0] == { 1'b1, `CPU_CC_REG }))
			// Or if, in debug mode, we write to the CC register
			||((i_halt)&&(i_dbg_we)&&(~i_dbg_data[`CPU_GIE_BIT])
				&&(i_dbg_reg == { 1'b1, `CPU_CC_REG}))
			);
	assign	w_release_from_interrupt = (~gie)&&(~i_interrupt)
			// Then if we write the CC register
			&&(((wr_reg_ce)&&(wr_reg_vl[`CPU_GIE_BIT])
				&&(wr_reg_id[4:0] == { 1'b0, `CPU_CC_REG }))
			// Or if, in debug mode, we write the CC register
			  ||((i_halt)&&(i_dbg_we)&&(i_dbg_data[`CPU_GIE_BIT])
				&&(i_dbg_reg == { 1'b0, `CPU_CC_REG}))
			);
	always @(posedge i_clk)
		if (i_rst)
			gie <= 1'b0;
		else if (w_switch_to_interrupt)
			gie <= 1'b0;
		else if (w_release_from_interrupt)
			gie <= 1'b1;
 
	//
	// Write backs to the PC register, and general increments of it
	//	We support two: upc and ipc.  If the instruction is normal,
	// we increment upc, if interrupt level we increment ipc.  If
	// the instruction writes the PC, we write whichever PC is appropriate.
	//
	// Do we need to all our partial results from the pipeline?
	// What happens when the pipeline has gie and ~gie instructions within
	// it?  Do we clear both?  What if a gie instruction tries to clear
	// a non-gie instruction?
	always @(posedge i_clk)
		if ((wr_reg_ce)&&(wr_reg_id[4])&&(wr_write_pc))
			upc <= wr_reg_vl;
		else if ((alu_gie)&&(alu_pc_valid))
			upc <= alu_pc;
		else if ((i_halt)&&(i_dbg_we)
				&&(i_dbg_reg == { 1'b1, `CPU_PC_REG }))
			upc <= i_dbg_data;
 
	always @(posedge i_clk)
		if (i_rst)
			ipc <= RESET_ADDRESS;
		else if ((wr_reg_ce)&&(~wr_reg_id[4])&&(wr_write_pc))
			ipc <= wr_reg_vl;
		else if ((~alu_gie)&&(alu_pc_valid))
			ipc <= alu_pc;
		else if ((i_halt)&&(i_dbg_we)
				&&(i_dbg_reg == { 1'b0, `CPU_PC_REG }))
			ipc <= i_dbg_data;
 
	always @(posedge i_clk)
		if (i_rst)
			pf_pc <= RESET_ADDRESS;
		else if (w_switch_to_interrupt)
			pf_pc <= ipc;
		else if (w_release_from_interrupt)
			pf_pc <= upc;
		else if ((wr_reg_ce)&&(wr_reg_id[4] == gie)&&(wr_write_pc))
			pf_pc <= wr_reg_vl;
		else if ((i_halt)&&(i_dbg_we)
				&&(wr_reg_id[4:0] == { gie, `CPU_PC_REG}))
			pf_pc <= i_dbg_data;
		else if (dcd_ce)
			pf_pc <= pf_pc + 1;
 
	initial	new_pc = 1'b1;
	always @(posedge i_clk)
		if (i_rst)
			new_pc <= 1'b1;
		else if (w_switch_to_interrupt)
			new_pc <= 1'b1;
		else if (w_release_from_interrupt)
			new_pc <= 1'b1;
		else if ((wr_reg_ce)&&(wr_reg_id[4] == gie)&&(wr_write_pc))
			new_pc <= 1'b1;
		else if ((i_halt)&&(i_dbg_we)
				&&(wr_reg_id[4:0] == { gie, `CPU_PC_REG}))
			new_pc <= 1'b1;
		else
			new_pc <= 1'b0;
 
	//
	// The debug interface
	always @(posedge i_clk)
		begin
			o_dbg_reg <= regset[i_dbg_reg];
			if (i_dbg_reg[3:0] == `CPU_PC_REG)
				o_dbg_reg <= (i_dbg_reg[4])?upc:ipc;
			else if (i_dbg_reg[3:0] == `CPU_CC_REG)
				o_dbg_reg <= { 25'h00, step, gie, sleep,
					((i_dbg_reg[4])?flags:iflags) };
		end
	always @(posedge i_clk)
		o_dbg_stall <= (~i_halt)||(pf_cyc)||(mem_cyc)||(mem_busy)
			||((~opvalid)&&(~i_rst))
			||((~dcdvalid)&&(~i_rst));
 
	//
	//
	// Produce accounting outputs: Account for any CPU stalls, so we can
	// later evaluate how well we are doing.
	//
	//
	assign	o_op_stall = (master_ce)&&((~opvalid)||(op_stall));
	assign	o_pf_stall = (master_ce)&&(~pf_valid);
	assign	o_i_count  = alu_pc_valid;
endmodule
 

Go to most recent revision | Compare with Previous | Blame | View Log

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.