OpenCores
URL https://opencores.org/ocsvn/zipcpu/zipcpu/trunk

Subversion Repositories zipcpu

[/] [zipcpu/] [trunk/] [rtl/] [core/] [zipcpu.v] - Rev 25

Go to most recent revision | Compare with Previous | Blame | View Log

///////////////////////////////////////////////////////////////////////////////
//
// Filename:	zipcpu.v
//
// Project:	Zip CPU -- a small, lightweight, RISC CPU soft core
//
// Purpose:	This is the top level module holding the core of the Zip CPU
//		together.  The Zip CPU is designed to be as simple as possible.
//		The instruction set is about as RISC as you can get, there are
//		only 16 instruction types supported (of which one isn't yet
//		supported ...)  Please see the accompanying iset.html file
//		for a description of these instructions.
//
//		All instructions are 32-bits wide.  All bus accesses, both
//		address and data, are 32-bits over a wishbone bus.
//
//	The Zip CPU is fully pipelined with the following pipeline stages:
//
//		1. Prefetch, returns the instruction from memory.  On the
//		Basys board that I'm working on, one instruction may be
//		issued every 20 clocks or so, unless and until I implement a
//		cache or local memory.
//
//		2. Instruction Decode
//
//		3. Read Operands
//
//		4. Apply Instruction
//
//		4. Write-back Results
//
//	A lot of difficult work has been placed into the pipeline stall
//	handling.  My original proposal was not to allow pipeline stalls at all.
//	The idea would be that the CPU would just run every clock and whatever
//	stalled answer took place would just get fixed a clock or two later,
//	meaning that the compiler could just schedule everything out.
//	This idea died at the memory interface, which can take a variable
//	amount of time to read or write any value, thus the whole CPU needed
//	to stall on a stalled memory access.
//
//	My next idea was to just let things complete.  I.e., once an instrution
//	starts, it continues to completion no matter what and we go on.  This
//	failed at writing the PC.  If the PC gets written in something such as
//	a MOV PC,PC+5 instruction, 3 (or however long the pipeline is) clocks
//	later, if whether or not something happens in those clocks depends
//	upon the instruction fetch filling the pipeline, then the CPU has a
//	non-deterministic behavior.
//
//	This leads to two possibilities: either *everything* stalls upon a 
//	stall condition, or partial results need to be destroyed before
//	they are written.  This is made more difficult by the fact that
//	once a command is written to the memory unit, whether it be a
//	read or a write, there is no undoing it--since peripherals on the
//	bus may act upon the answer with whatever side effects they might
//	have.  (For example, writing a '1' to the interrupt register will
//	clear certain interrupts ...)  Further, since the memory ops depend
//	upon conditions, the we'll need to wait for the condition codes to
//	be available before executing a memory op.  Thus, memory ops can 
//	proceed without stalling whenever either the previous instruction
//	doesn't write the flags register, or when the memory instruction doesn't
//	depend upon the flags register.
//
//	The other possibility is that we leave independent instruction
//	execution behind, so that the pipeline is always full and stalls,
//	or moves forward, together on every clock.
//
//	For now, we pick the first approach: independent instruction execution.
//	Thus, if stage 2 stalls, stages 3-5 may still complete the instructions
//	in their pipeline.  This leaves another problem: what happens on a
//	MOV -1+PC,PC instruction?  There will be four instructions behind this
//	one (or is it five?) that will need to be 'cancelled'.  So here's
//	the plan: Anything can be cancelled before the ALU/MEM stage,
//	since memory ops cannot be canceled after being issued.  Thus, the
//	ALU/MEM stage must stall if any prior instruction is going to write
//	the PC register (i.e. JMP).
//
//	Further, let's define a "STALL" as a reason to not execute a stage
//	due to some condition at or beyond the stage, and let's define
//	a VALID flag to mean that this stage has completed.  Thus, the clock
//	enable for a stage is (STG[n-1]VALID)&&((~STG[n]VALID)||(~STG[n]STALL)).
//	The ALU/MEM stages will also depend upon a master clock enable
//	(~SLEEP) condition as well.
//
//
//
// Creator:	Dan Gisselquist, Ph.D.
//		Gisselquist Tecnology, LLC
//
///////////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2015, Gisselquist Technology, LLC
//
// This program is free software (firmware): you can redistribute it and/or
// modify it under the terms of  the GNU General Public License as published
// by the Free Software Foundation, either version 3 of the License, or (at
// your option) any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or
// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
// for more details.
//
// License:	GPL, v3, as defined and found on www.gnu.org,
//		http://www.gnu.org/licenses/gpl.html
//
//
///////////////////////////////////////////////////////////////////////////////
//
`define	CPU_CC_REG	4'he
`define	CPU_PC_REG	4'hf
`define	CPU_TRAP_BIT	9
`define	CPU_BREAK_BIT	7
`define	CPU_STEP_BIT	6
`define	CPU_GIE_BIT	5
`define	CPU_SLEEP_BIT	4
module	zipcpu(i_clk, i_rst, i_interrupt,
		// Debug interface
		i_halt, i_clear_pf_cache, i_dbg_reg, i_dbg_we, i_dbg_data,
			o_dbg_stall, o_dbg_reg, o_dbg_cc,
			o_break,
		// CPU interface to the wishbone bus
		o_wb_cyc, o_wb_stb, o_wb_we, o_wb_addr, o_wb_data,
			i_wb_ack, i_wb_stall, i_wb_data,
		// Accounting/CPU usage interface
		o_op_stall, o_pf_stall, o_i_count);
	parameter	RESET_ADDRESS=32'h0100000;
	input			i_clk, i_rst, i_interrupt;
	// Debug interface -- inputs
	input			i_halt, i_clear_pf_cache;
	input		[4:0]	i_dbg_reg;
	input			i_dbg_we;
	input		[31:0]	i_dbg_data;
	// Debug interface -- outputs
	output	reg		o_dbg_stall;
	output	reg	[31:0]	o_dbg_reg;
	output	reg	[1:0]	o_dbg_cc;
	output	wire		o_break;
	// Wishbone interface -- outputs
	output	wire		o_wb_cyc, o_wb_stb, o_wb_we;
	output	wire	[31:0]	o_wb_addr, o_wb_data;
	// Wishbone interface -- inputs
	input			i_wb_ack, i_wb_stall;
	input		[31:0]	i_wb_data;
	// Accounting outputs ... to help us count stalls and usage
	output	wire		o_op_stall;
	output	wire		o_pf_stall;
	output	wire		o_i_count;
 
 
	// Registers
	reg	[31:0]	regset [0:31];
 
	// Condition codes
	reg	[3:0]	flags, iflags;	// (TRAP,FPEN,BREAKEN,STEP,GIE,SLEEP ), V, N, C, Z
	wire	[9:0]	w_uflags, w_iflags;
	reg		trap, break_en, step, gie, sleep;
 
	// The master chip enable
	wire		master_ce;
 
	//
	//
	//	PIPELINE STAGE #1 :: Prefetch
	//		Variable declarations
	//
	reg	[31:0]	pf_pc;
	reg		new_pc, op_break;
	wire	clear_pipeline;
	assign	clear_pipeline = new_pc || i_clear_pf_cache || op_break;
 
	wire		dcd_stalled;
	wire		pf_cyc, pf_stb, pf_we, pf_busy, pf_ack, pf_stall;
	wire	[31:0]	pf_addr, pf_data;
	wire	[31:0]	instruction, instruction_pc;
	wire	pf_valid, instruction_gie;
 
	//
	//
	//	PIPELINE STAGE #2 :: Instruction Decode
	//		Variable declarations
	//
	//
	reg		opvalid, opvalid_mem, opvalid_alu, op_wr_pc;
	wire		op_stall, dcd_ce;
	reg	[3:0]	dcdOp;
	reg	[4:0]	dcdA, dcdB;
	reg		dcdA_cc, dcdB_cc, dcdA_pc, dcdB_pc;
	reg	[3:0]	dcdF;
	reg		dcdA_rd, dcdA_wr, dcdB_rd, dcdvalid,
				dcdM, dcdF_wr, dcd_gie, dcd_break;
	reg	[31:0]	dcd_pc;
	reg	[23:0]	r_dcdI;
	wire	dcdA_stall, dcdB_stall, dcdF_stall;
 
 
 
	//
	//
	//	PIPELINE STAGE #3 :: Read Operands
	//		Variable declarations
	//
	//
	//
	// Now, let's read our operands
	reg	[4:0]	alu_reg;
	reg	[3:0]	opn;
	reg	[4:0]	opR;
	reg	[31:0]	r_opA, r_opB, op_pc;
	wire	[31:0]	w_opA, w_opB;
	wire	[31:0]	opA_nowait, opB_nowait, opA, opB;
	reg		opR_wr, opR_cc, opF_wr, op_gie,
			opA_rd, opB_rd;
	wire	[9:0]	opFl;
	reg	[6:0]	r_opF;
	wire	[8:0]	opF;
	wire		op_ce;
 
 
 
	//
	//
	//	PIPELINE STAGE #4 :: ALU / Memory
	//		Variable declarations
	//
	//
	reg	[31:0]	alu_pc;
	reg		alu_pc_valid;;
	wire		alu_ce, alu_stall;
	wire	[31:0]	alu_result;
	wire	[3:0]	alu_flags;
	wire		alu_valid;
	wire		set_cond;
	reg		alu_wr, alF_wr, alu_gie;
 
 
 
	wire	mem_ce, mem_stalled;
	wire	mem_valid, mem_ack, mem_stall,
		mem_cyc, mem_stb, mem_we;
	wire	[4:0]	mem_wreg;
 
	wire		mem_busy, mem_rdbusy;
	wire	[31:0]	mem_addr, mem_data, mem_result;
 
 
 
	//
	//
	//	PIPELINE STAGE #5 :: Write-back
	//		Variable declarations
	//
	wire		wr_reg_ce, wr_flags_ce, wr_write_pc, wr_write_cc;
	wire	[4:0]	wr_reg_id;
	wire	[31:0]	wr_reg_vl;
	wire	w_switch_to_interrupt, w_release_from_interrupt;
	reg	[31:0]	upc, ipc;
 
 
 
	//
	//	MASTER: clock enable.
	//
	assign	master_ce = (~i_halt)&&(~o_break)&&(~sleep)&&(~mem_rdbusy);
 
 
	//
	//	PIPELINE STAGE #1 :: Prefetch
	//		Calculate stall conditions
 
	//
	//	PIPELINE STAGE #2 :: Instruction Decode
	//		Calculate stall conditions
	assign		dcd_ce = (pf_valid)&&(~dcd_stalled);
	assign		dcd_stalled = (dcdvalid)&&(
					(op_stall)
					||((dcdA_stall)||(dcdB_stall)||(dcdF_stall))
					||((opvalid)&&((op_wr_pc)||(opR_cc))));
	//
	//	PIPELINE STAGE #3 :: Read Operands
	//		Calculate stall conditions
	assign	op_stall = ((mem_stalled)&&(opvalid_mem))
				||((alu_stall)&&(opvalid_alu));
	assign	op_ce = (dcdvalid)&&((~opvalid)||(~op_stall));
 
	//
	//	PIPELINE STAGE #4 :: ALU / Memory
	//		Calculate stall conditions
	assign	alu_stall = (((~master_ce)||(mem_rdbusy))&&(opvalid_alu))
			||((opvalid)&&(wr_reg_ce)&&(wr_reg_id[4] == op_gie)
				&&(wr_write_pc)||(wr_write_cc));
	assign	alu_ce = (master_ce)&&(opvalid_alu)&&(~alu_stall)&&(~clear_pipeline);
	//
	assign	mem_ce = (master_ce)&&(opvalid_mem)&&(~mem_stalled)&&(~clear_pipeline)&&(set_cond);
	assign	mem_stalled = (mem_busy)||((opvalid_mem)&&(
				(~master_ce)
				// Stall waiting for flags to be valid
				||((~opF[8])&&(
					((wr_reg_ce)&&(wr_reg_id[4:0] == {op_gie,`CPU_CC_REG}))
					// Do I need this last condition?
					||(wr_flags_ce)))
				// Or waiting for a write to the PC register
				// Or CC register, since that can change the
				//  PC as well
				||((wr_reg_ce)&&(wr_reg_id[4] == op_gie)&&((wr_write_pc)||(wr_write_cc)))));
 
 
	//
	//
	//	PIPELINE STAGE #1 :: Prefetch
	//
	//
`ifdef	SINGLE_FETCH
	wire		pf_ce;
 
	assign		pf_ce = (~dcd_stalled);
	prefetch	pf(i_clk, i_rst, (pf_ce), pf_pc, gie,
				instruction, instruction_pc, instruction_gie,
					pf_valid,
				pf_cyc, pf_stb, pf_we, pf_addr,
					pf_data,
				pf_ack, pf_stall, i_wb_data);
`else // Pipe fetch
	pipefetch	#(RESET_ADDRESS)
			pf(i_clk, i_rst, new_pc, i_clear_pf_cache, ~dcd_stalled, pf_pc,
					instruction, instruction_pc, pf_valid,
				pf_cyc, pf_stb, pf_we, pf_addr, pf_data,
					pf_ack, pf_stall, i_wb_data,
				mem_cyc);
	assign	instruction_gie = gie;
`endif
 
	always @(posedge i_clk)
		if (i_rst)
			dcdvalid <= 1'b0;
		else if (dcd_ce)
			dcdvalid <= (~clear_pipeline);
		else if ((~dcd_stalled)||(clear_pipeline))
			dcdvalid <= 1'b0;
 
	always @(posedge i_clk)
		if (dcd_ce)
		begin
			dcd_pc <= instruction_pc+1;
 
			// Record what operation we are doing
			dcdOp <= instruction[31:28];
 
			// Default values
			dcdA[4:0] <= { instruction_gie, instruction[27:24] };
			dcdB[4:0] <= { instruction_gie, instruction[19:16] };
			dcdA_cc <=  (instruction[27:24] == `CPU_CC_REG);
			dcdB_cc <=  (instruction[19:16] == `CPU_CC_REG);
			dcdA_pc <=  (instruction[27:24] == `CPU_PC_REG);
			dcdB_pc <=  (instruction[19:16] == `CPU_PC_REG);
			dcdM    <= 1'b0;
			dcdF_wr <= 1'b1;
 
			// Set the condition under which we do this operation
			// The top four bits are a mask, the bottom four the
			// value the flags must equal once anded with the mask
			dcdF <= { (instruction[23:21]==3'h0), instruction[23:21] };
			casez(instruction[31:28])
			4'h2: begin // Move instruction
				if (~instruction_gie)
				begin
					dcdA[4] <= instruction[20];
					dcdB[4] <= instruction[15];
				end
				dcdA_wr <= 1'b1;
				dcdA_rd <= 1'b0;
				dcdB_rd <= 1'b1;
				r_dcdI <= { {(9){instruction[14]}}, instruction[14:0] };
				dcdF_wr <= 1'b0; // Don't write flags
				end
			4'h3: begin // Load immediate
				dcdA_wr <= 1'b1;
				dcdA_rd <= 1'b0;
				dcdB_rd <= 1'b0;
				r_dcdI <= { instruction[23:0] };
				dcdF_wr <= 1'b0; // Don't write flags
				dcdF    <= 4'h8; // This is unconditional
				dcdOp <= 4'h2;
				end
			4'h4: begin // Multiply, LDI[HI|LO], or NOOP/BREAK
				// Don't write flags except for multiplies
				dcdF_wr <= (instruction[27:25] != 3'h7);
				r_dcdI <= { 8'h00, instruction[15:0] };
				if (instruction[27:24] == 4'he)
				begin
					// NOOP instruction
					dcdA_wr <= 1'b0;
					dcdA_rd <= 1'b0;
					dcdB_rd <= 1'b0;
					dcdOp <= 4'h2;
				end else if (instruction[27:24] == 4'hf)
				begin // Load partial immediate(s)
					dcdA_wr <= 1'b1;
					dcdA_rd <= 1'b1;
					dcdB_rd <= 1'b0;
					dcdA[4:0] <= { instruction_gie, instruction[19:16] };
					dcdA_cc <= (instruction[19:16] == `CPU_CC_REG);
					dcdA_pc <= (instruction[19:16] == `CPU_PC_REG);
					dcdOp <= { 3'h3, instruction[20] };
				end else begin
					// Actual multiply instruction
					r_dcdI <= { 8'h00, instruction[15:0] };
					dcdA_rd <= 1'b1;
					dcdB_rd <= (instruction[19:16] != 4'hf);
					dcdOp[3:0] <= (instruction[20])? 4'h4:4'h3;
				end end
			4'b011?: begin // Load/Store
				dcdF_wr <= 1'b0; // Don't write flags
				dcdA_wr <= (~instruction[28]); // Write on loads
				dcdA_rd <= (instruction[28]); // Read on stores
				dcdB_rd <= instruction[20];
				if (instruction[20])
					r_dcdI <= { {(8){instruction[15]}}, instruction[15:0] };
				else
					r_dcdI <= { {(4){instruction[19]}}, instruction[19:0] };
				dcdM <= 1'b1; // Memory operation
				end
			default: begin
				dcdA_wr <= (instruction[31])||(instruction[31:28]==4'h5);
				dcdA_rd <= 1'b1;
				dcdB_rd <= instruction[20];
				if (instruction[20])
					r_dcdI <= { {(8){instruction[15]}}, instruction[15:0] };
				else
					r_dcdI <= { {(4){instruction[19]}}, instruction[19:0] };
				end
			endcase
 
 
			dcd_gie <= instruction_gie;
		end
	always @(posedge i_clk)
		if (dcd_ce)
			dcd_break <= (instruction[31:0] == 32'h4e000001);
		else
			dcd_break <= 1'b0;
 
 
	//
	//
	//	PIPELINE STAGE #3 :: Read Operands (Registers)
	//
	//
	assign	w_opA = regset[dcdA];
	assign	w_opB = regset[dcdB];
	always @(posedge i_clk)
		if (op_ce) // &&(dcdvalid))
		begin
			if ((wr_reg_ce)&&(wr_reg_id == dcdA))
				r_opA <= wr_reg_vl;
			else if ((dcdA_pc)&&(dcdA[4] == dcd_gie))
				r_opA <= dcd_pc;
			else if (dcdA_pc)
				r_opA <= upc;
			else if (dcdA_cc)
				r_opA <= { w_opA[31:10], (dcd_gie)?w_uflags:w_iflags };
			else
				r_opA <= w_opA;
		end
	wire	[31:0]	dcdI;
	assign	dcdI = { {(8){r_dcdI[23]}}, r_dcdI };
	always @(posedge i_clk)
		if (op_ce) // &&(dcdvalid))
		begin
			if (~dcdB_rd)
				r_opB <= dcdI;
			else if ((wr_reg_ce)&&(wr_reg_id == dcdB))
				r_opB <= wr_reg_vl + dcdI;
			else if ((dcdB_pc)&&(dcdB[4] == dcd_gie))
				r_opB <= dcd_pc + dcdI;
			else if (dcdB_pc) // & dcdB[4] != dcd_gie thus is user
				r_opB <= upc + dcdI;
			else if (dcdB_cc)
				r_opB <= { w_opB[31:10], (dcd_gie)?w_uflags:w_iflags} + dcdI;
			else
				r_opB <= regset[dcdB] + dcdI;
		end
 
	// The logic here has become more complex than it should be, no thanks
	// to Xilinx's Vivado trying to help.  The conditions are supposed to
	// be two sets of four bits: the top bits specify what bits matter, the
	// bottom specify what those top bits must equal.  However, two of
	// conditions check whether bits are on, and those are the only two
	// conditions checking those bits.  Therefore, Vivado complains that
	// these two bits are redundant.  Hence the convoluted expression
	// below, arriving at what we finally want in the (now wire net)
	// opF.
	always @(posedge i_clk)
		if (op_ce)
		begin // Set the flag condition codes
			case(dcdF[2:0])
			3'h0:	r_opF <= 7'h80;	// Always
			3'h1:	r_opF <= 7'h11;	// Z
			3'h2:	r_opF <= 7'h10;	// NE
			3'h3:	r_opF <= 7'h20;	// GE (!N)
			3'h4:	r_opF <= 7'h30;	// GT (!N&!Z)
			3'h5:	r_opF <= 7'h24;	// LT
			3'h6:	r_opF <= 7'h02;	// C
			3'h7:	r_opF <= 7'h08;	// V
			endcase
		end
	assign	opF = { r_opF[6], r_opF[3], r_opF[5], r_opF[1], r_opF[4:0] };
 
	always @(posedge i_clk)
		if (i_rst)
		begin
			opvalid     <= 1'b0;
			opvalid_alu <= 1'b0;
			opvalid_mem <= 1'b0;
		end else if (op_ce)
		begin
			// Do we have a valid instruction?
			//   The decoder may vote to stall one of its
			//   instructions based upon something we currently
			//   have in our queue.  This instruction must then
			//   move forward, and get a stall cycle inserted.
			//   Hence, the test on dcd_stalled here.  If we must
			//   wait until our operands are valid, then we aren't
			//   valid yet until then.
			opvalid<= (~clear_pipeline)&&(dcdvalid)&&(~dcd_stalled);
			opvalid_alu <= (~dcdM)&&(~clear_pipeline)&&(dcdvalid)&&(~dcd_stalled);
			opvalid_mem <= (dcdM)&&(~clear_pipeline)&&(dcdvalid)&&(~dcd_stalled);
		end else if ((~op_stall)||(clear_pipeline))
		begin
			opvalid     <= 1'b0;
			opvalid_alu <= 1'b0;
			opvalid_mem <= 1'b0;
		end
 
	// Here's part of our debug interface.  When we recognize a break
	// instruction, we set the op_break flag.  That'll prevent this
	// instruction from entering the ALU, and cause an interrupt before
	// this instruction.  Thus, returning to this code will cause the
	// break to repeat and continue upon return.  To get out of this
	// condition, replace the break instruction with what it is supposed
	// to be, step through it, and then replace it back.  In this fashion,
	// a debugger can step through code.
	// assign w_op_break = (dcd_break)&&(r_dcdI[15:0] == 16'h0001);
	initial	op_break = 1'b0;
	always @(posedge i_clk)
		if (i_rst)	op_break <= 1'b0;
		else if (op_ce)	op_break <= (dcd_break);
		else if ((clear_pipeline)||(~opvalid))
				op_break <= 1'b0;
 
	always @(posedge i_clk)
		if (op_ce)
		begin
			opn    <= dcdOp;	// Which ALU operation?
			// opM  <= dcdM;	// Is this a memory operation?
			// Will we write the flags/CC Register with our result?
			opF_wr <= (dcdF_wr)&&((~dcdA_cc)||(~dcdA_wr));
			// Will we be writing our results into a register?
			opR_wr <= dcdA_wr;
			// What register will these results be written into?
			opR    <= dcdA;
			opR_cc <= (dcdA_wr)&&(dcdA_cc);
			// User level (1), vs supervisor (0)/interrupts disabled
			op_gie <= dcd_gie;
 
			// We're not done with these yet--we still need them
			// for the unclocked assign.  We need the unclocked
			// assign so that there's no wait state between an
			// ALU or memory result and the next register that may
			// use that value.
			opA_rd <= dcdA_rd;
			opB_rd <= dcdB_rd;
			op_pc  <= dcd_pc;
			//
			op_wr_pc <= ((dcdA_wr)&&(dcdA_pc));
		end
	assign	opFl = (op_gie)?(w_uflags):(w_iflags);
 
	// This is tricky.  First, the PC and Flags registers aren't kept in
	// register set but in special registers of their own.  So step one
	// is to select the right register.  Step to is to replace that
	// register with the results of an ALU or memory operation, if such
	// results are now available.  Otherwise, we'd need to insert a wait
	// state of some type.
	//
	// The alternative approach would be to define some sort of
	// op_stall wire, which would stall any upstream stage.
	// We'll create a flag here to start our coordination.  Once we
	// define this flag to something other than just plain zero, then
	// the stalls will already be in place.
`define	DONT_STALL_ON_OPA
`ifdef	DONT_STALL_ON_OPA
	reg	opA_alu;
	always @(posedge i_clk)
		if (op_ce)
			opA_alu <= (opvalid_alu)&&(opR == dcdA)&&(dcdA_rd);
	assign	opA = (opA_alu) ? alu_result : r_opA;
`else
	assign	opA = r_opA;
`endif
 
	assign	dcdA_stall = (dcdvalid)&&(dcdA_rd)&&(
`define	DONT_STALL_ON_OPB
`ifdef	DONT_STALL_ON_OPB
		// Skip the requirement on writing back opA
		// Stall on memory, since we'll always need to stall for a 
		// memory access anyway
				((opvalid_mem)&&(opR_wr)&&(opR == dcdA))||
`else
				((opvalid)&&(opR_wr)&&(opR == dcdA))||
`endif
					((mem_busy)&&(~mem_we)&&(mem_wreg == dcdA)));
`ifdef	DONT_STALL_ON_OPB
	reg	opB_alu;
	always @(posedge i_clk)
		if (op_ce)
			opB_alu <= (opvalid_alu)&&(opR == dcdB)&&(dcdB_rd)&&(dcdI == 0);
	assign	opB = (opB_alu) ? alu_result : r_opB;
`else
	assign	opB = r_opB;
`endif
	assign	dcdB_stall = (dcdvalid)&&(dcdB_rd)&&(
				((opvalid)&&(opR_wr)&&(opR == dcdB)
`ifdef	DONT_STALL_ON_OPB
					&&((opvalid_mem)||(dcdI != 0))
`endif
				)||
				((mem_busy)&&(~mem_we)&&(mem_wreg == dcdB)));
	assign	dcdF_stall = (dcdvalid)&&(
				(((~dcdF[3]) ||(dcdA_cc) ||(dcdB_cc))
					&&(opvalid)&&((opR_cc)||(opF_wr)))
			||((dcdF[3])&&(dcdM)&&(opvalid)&&(opF_wr)));
	//
	//
	//	PIPELINE STAGE #4 :: Apply Instruction
	//
	//
	cpuops	doalu(i_clk, i_rst, alu_ce,
			(opvalid_alu), opn, opA, opB,
			alu_result, alu_flags, alu_valid);
 
	assign	set_cond = ((opF[7:4]&opFl[3:0])==opF[3:0]);
	initial	alF_wr   = 1'b0;
	initial	alu_wr   = 1'b0;
	always @(posedge i_clk)
		if (i_rst)
		begin
			alu_wr   <= 1'b0;
			alF_wr   <= 1'b0;
		end else if (alu_ce)
		begin
			alu_reg <= opR;
			alu_wr  <= (opR_wr)&&(set_cond);
			alF_wr  <= (opF_wr)&&(set_cond);
		end else begin
			// These are strobe signals, so clear them if not
			// set for any particular clock
			alu_wr <= 1'b0;
			alF_wr <= 1'b0;
		end
	always @(posedge i_clk)
		if ((alu_ce)||(mem_ce))
			alu_gie  <= op_gie;
	always @(posedge i_clk)
		if ((alu_ce)||(mem_ce))
			alu_pc  <= op_pc;
	initial	alu_pc_valid = 1'b0;
	always @(posedge i_clk)
		alu_pc_valid <= (~i_rst)&&(master_ce)&&(opvalid)&&(~clear_pipeline)
					&&((opvalid_alu)||(~mem_stalled));
 
	memops	domem(i_clk, i_rst, mem_ce,
				(opn[0]), opB, opA, opR,
				mem_busy, mem_valid, mem_wreg, mem_result,
			mem_cyc, mem_stb, mem_we, mem_addr, mem_data,
				mem_ack, mem_stall, i_wb_data);
	assign	mem_rdbusy = ((mem_cyc)&&(~mem_we));
 
	// Either the prefetch or the instruction gets the memory bus, but 
	// never both.
	wbarbiter	#(32,32) pformem(i_clk, i_rst,
		// Prefetch access to the arbiter
		pf_addr, pf_data, pf_we, pf_stb, pf_cyc, pf_ack, pf_stall,
		// Memory access to the arbiter
		mem_addr, mem_data, mem_we, mem_stb, mem_cyc, mem_ack, mem_stall,
		// Common wires, in and out, of the arbiter
		o_wb_addr, o_wb_data, o_wb_we, o_wb_stb, o_wb_cyc, i_wb_ack,
			i_wb_stall);
 
	//
	//
	//	PIPELINE STAGE #5 :: Write-back results
	//
	//
	// This stage is not allowed to stall.  If results are ready to be
	// written back, they are written back at all cost.  Sleepy CPU's
	// won't prevent write back, nor debug modes, halting the CPU, nor
	// anything else.  Indeed, the (master_ce) bit is only as relevant
	// as knowinig something is available for writeback.
 
	//
	// Write back to our generic register set ...
	// When shall we write back?  On one of two conditions
	//	Note that the flags needed to be checked before issuing the
	//	bus instruction, so they don't need to be checked here.
	//	Further, alu_wr includes (set_cond), so we don't need to
	//	check for that here either.
	assign	wr_reg_ce = ((alu_wr)&&(alu_valid))||(mem_valid);
	// Which register shall be written?
	assign	wr_reg_id = (alu_wr)?alu_reg:mem_wreg;
	// Are we writing to the CC register?
	assign	wr_write_cc = (wr_reg_id[3:0] == `CPU_CC_REG);
	// Are we writing to the PC?
	assign	wr_write_pc = (wr_reg_id[3:0] == `CPU_PC_REG);
	// What value to write?
	assign	wr_reg_vl = (alu_wr)?alu_result:mem_result;
	always @(posedge i_clk)
		if (wr_reg_ce)
			regset[wr_reg_id] <= wr_reg_vl;	
		else if ((i_halt)&&(i_dbg_we))
			regset[i_dbg_reg] <= i_dbg_data[31:0];
 
	//
	// Write back to the condition codes/flags register ...
	// When shall we write to our flags register?  alF_wr already
	// includes the set condition ...
	assign	wr_flags_ce = (alF_wr)&&(alu_valid);
	assign	w_uflags = { trap, 1'b0, 1'b0, step, 1'b1, sleep, ((wr_flags_ce)&&(alu_gie))?alu_flags:flags };
	assign	w_iflags = { trap, 1'b0, break_en, 1'b0, 1'b0, sleep, ((wr_flags_ce)&&(~alu_gie))?alu_flags:iflags };
	// What value to write?
	always @(posedge i_clk)
		// If explicitly writing the register itself
		if ((wr_reg_ce)&&(wr_reg_id[4])&&(wr_write_cc))
			flags <= wr_reg_vl[3:0];
		// Otherwise if we're setting the flags from an ALU operation
		else if ((wr_flags_ce)&&(alu_gie))
			flags <= alu_flags;
		else if ((i_halt)&&(i_dbg_we)
				&&(i_dbg_reg == { 1'b1, `CPU_CC_REG }))
			flags <= i_dbg_data[3:0];
 
	always @(posedge i_clk)
		if ((wr_reg_ce)&&(~wr_reg_id[4])&&(wr_write_cc))
			iflags <= wr_reg_vl[3:0];
		else if ((wr_flags_ce)&&(~alu_gie))
			iflags <= alu_flags;
		else if ((i_halt)&&(i_dbg_we)
				&&(i_dbg_reg == { 1'b0, `CPU_CC_REG }))
			iflags <= i_dbg_data[3:0];
 
	// The 'break' enable  bit.  This bit can only be set from supervisor
	// mode.  It control what the CPU does upon encountering a break
	// instruction.
	//
	// The goal, upon encountering a break is that the CPU should stop and
	// not execute the break instruction, choosing instead to enter into
	// either interrupt mode or halt first.  
	//	if ((break_en) AND (break_instruction)) // user mode or not
	//		HALT CPU
	//	else if (break_instruction) // only in user mode
	//		set an interrupt flag, go to supervisor mode
	//		allow supervisor to step the CPU.
	//	Upon a CPU halt, any break condition will be reset.  The
	//	external debugger will then need to deal with whatever
	//	condition has taken place.
	initial	break_en = 1'b0;
	always @(posedge i_clk)
		if ((i_rst)||(i_halt))
			break_en <= 1'b0;
		else if ((wr_reg_ce)&&(~wr_reg_id[4])&&(wr_write_cc))
			break_en <= wr_reg_vl[`CPU_BREAK_BIT];
	assign	o_break = ((break_en)||(~op_gie))&&(op_break)&&(~alu_valid)&&(~mem_valid)&&(~mem_busy);
 
 
	// The sleep register.  Setting the sleep register causes the CPU to
	// sleep until the next interrupt.  Setting the sleep register within
	// interrupt mode causes the processor to halt until a reset.  This is
	// a panic/fault halt.  The trick is that you cannot be allowed to
	// set the sleep bit and switch to supervisor mode in the same 
	// instruction: users are not allowed to halt the CPU.
	always @(posedge i_clk)
		if ((i_rst)||((i_interrupt)&&(gie)))
			sleep <= 1'b0;
		else if ((wr_reg_ce)&&(wr_write_cc)&&(~alu_gie))
			// In supervisor mode, we have no protections.  The
			// supervisor can set the sleep bit however he wants.
			sleep <= wr_reg_vl[`CPU_SLEEP_BIT];
		else if ((wr_reg_ce)&&(wr_write_cc)&&(wr_reg_vl[`CPU_GIE_BIT]))
			// In user mode, however, you can only set the sleep
			// mode while remaining in user mode.  You can't switch
			// to sleep mode *and* supervisor mode at the same
			// time, lest you halt the CPU.
			sleep <= wr_reg_vl[`CPU_SLEEP_BIT];
		else if ((i_halt)&&(i_dbg_we)
				&&(i_dbg_reg == { 1'b1, `CPU_CC_REG }))
			sleep <= i_dbg_data[`CPU_SLEEP_BIT];
 
	always @(posedge i_clk)
		if ((i_rst)||(w_switch_to_interrupt))
			step <= 1'b0;
		else if ((wr_reg_ce)&&(~alu_gie)&&(wr_reg_id[4])&&(wr_write_cc))
			step <= wr_reg_vl[`CPU_STEP_BIT];
		else if ((i_halt)&&(i_dbg_we)
				&&(i_dbg_reg == { 1'b1, `CPU_CC_REG }))
			step <= i_dbg_data[`CPU_STEP_BIT];
		else if ((master_ce)&&(alu_pc_valid)&&(step)&&(gie))
			step <= 1'b0;
 
	// The GIE register.  Only interrupts can disable the interrupt register
	assign	w_switch_to_interrupt = (gie)&&(
			// On interrupt (obviously)
			(i_interrupt)
			// If we are stepping the CPU
			||((master_ce)&&(alu_pc_valid)&&(step))
			// If we encounter a break instruction, if the break
			//	enable isn't not set.
			||((master_ce)&&(op_break)&&(~break_en))
			// If we write to the CC register
			||((wr_reg_ce)&&(~wr_reg_vl[`CPU_GIE_BIT])
				&&(wr_reg_id[4])&&(wr_write_cc))
			// Or if, in debug mode, we write to the CC register
			||((i_halt)&&(i_dbg_we)&&(~i_dbg_data[`CPU_GIE_BIT])
				&&(i_dbg_reg == { 1'b1, `CPU_CC_REG}))
			);
	assign	w_release_from_interrupt = (~gie)&&(~i_interrupt)
			// Then if we write the CC register
			&&(((wr_reg_ce)&&(wr_reg_vl[`CPU_GIE_BIT])
				&&(~wr_reg_id[4])&&(wr_write_cc))
			// Or if, in debug mode, we write the CC register
			  ||((i_halt)&&(i_dbg_we)&&(i_dbg_data[`CPU_GIE_BIT])
				&&(i_dbg_reg == { 1'b0, `CPU_CC_REG}))
			);
	always @(posedge i_clk)
		if (i_rst)
			gie <= 1'b0;
		else if (w_switch_to_interrupt)
			gie <= 1'b0;
		else if (w_release_from_interrupt)
			gie <= 1'b1;
 
	initial	trap = 1'b0;
	always @(posedge i_clk)
		if (i_rst)
			trap <= 1'b0;
		else if ((gie)&&(wr_reg_ce)&&(~wr_reg_vl[`CPU_GIE_BIT])
				&&(wr_reg_id[4])&&(wr_write_cc))
			trap <= 1'b1;
		else if ((i_halt)&&(i_dbg_we)&&(i_dbg_reg[3:0] == `CPU_CC_REG)
				&&(~i_dbg_data[`CPU_GIE_BIT]))
			trap <= i_dbg_data[`CPU_TRAP_BIT];
		else if (w_release_from_interrupt)
			trap <= 1'b0;
 
	//
	// Write backs to the PC register, and general increments of it
	//	We support two: upc and ipc.  If the instruction is normal,
	// we increment upc, if interrupt level we increment ipc.  If
	// the instruction writes the PC, we write whichever PC is appropriate.
	//
	// Do we need to all our partial results from the pipeline?
	// What happens when the pipeline has gie and ~gie instructions within
	// it?  Do we clear both?  What if a gie instruction tries to clear
	// a non-gie instruction?
	always @(posedge i_clk)
		if ((wr_reg_ce)&&(wr_reg_id[4])&&(wr_write_pc))
			upc <= wr_reg_vl;
		else if ((alu_gie)&&(alu_pc_valid))
			upc <= alu_pc;
		else if ((i_halt)&&(i_dbg_we)
				&&(i_dbg_reg == { 1'b1, `CPU_PC_REG }))
			upc <= i_dbg_data;
 
	always @(posedge i_clk)
		if (i_rst)
			ipc <= RESET_ADDRESS;
		else if ((wr_reg_ce)&&(~wr_reg_id[4])&&(wr_write_pc))
			ipc <= wr_reg_vl;
		else if ((~alu_gie)&&(alu_pc_valid))
			ipc <= alu_pc;
		else if ((i_halt)&&(i_dbg_we)
				&&(i_dbg_reg == { 1'b0, `CPU_PC_REG }))
			ipc <= i_dbg_data;
 
	always @(posedge i_clk)
		if (i_rst)
			pf_pc <= RESET_ADDRESS;
		else if (w_switch_to_interrupt)
			pf_pc <= ipc;
		else if (w_release_from_interrupt)
			pf_pc <= upc;
		else if ((wr_reg_ce)&&(wr_reg_id[4] == gie)&&(wr_write_pc))
			pf_pc <= wr_reg_vl;
		else if ((i_halt)&&(i_dbg_we)
				&&(wr_reg_id[4:0] == { gie, `CPU_PC_REG}))
			pf_pc <= i_dbg_data;
		else if (dcd_ce)
			pf_pc <= pf_pc + 1;
 
	initial	new_pc = 1'b1;
	always @(posedge i_clk)
		if ((i_rst)||(i_clear_pf_cache))
			new_pc <= 1'b1;
		else if (w_switch_to_interrupt)
			new_pc <= 1'b1;
		else if (w_release_from_interrupt)
			new_pc <= 1'b1;
		else if ((wr_reg_ce)&&(wr_reg_id[4] == gie)&&(wr_write_pc))
			new_pc <= 1'b1;
		else if ((i_halt)&&(i_dbg_we)
				&&(wr_reg_id[4:0] == { gie, `CPU_PC_REG}))
			new_pc <= 1'b1;
		else
			new_pc <= 1'b0;
 
	//
	// The debug interface
	always @(posedge i_clk)
		begin
			o_dbg_reg <= regset[i_dbg_reg];
			if (i_dbg_reg[3:0] == `CPU_PC_REG)
				o_dbg_reg <= (i_dbg_reg[4])?upc:ipc;
			else if (i_dbg_reg[3:0] == `CPU_CC_REG)
				o_dbg_reg[9:0] <= (i_dbg_reg[4])?w_uflags:w_iflags;
		end
	always @(posedge i_clk)
		o_dbg_cc <= { gie, sleep };
 
	always @(posedge i_clk)
		o_dbg_stall <= (i_halt)&&(
			(pf_cyc)||(mem_cyc)||(mem_busy)
			||((~opvalid)&&(~i_rst))
			||((~dcdvalid)&&(~i_rst)));
 
	//
	//
	// Produce accounting outputs: Account for any CPU stalls, so we can
	// later evaluate how well we are doing.
	//
	//
	assign	o_op_stall = (master_ce)&&((~opvalid)||(op_stall));
	assign	o_pf_stall = (master_ce)&&(~pf_valid);
	assign	o_i_count  = alu_pc_valid;
endmodule
 

Go to most recent revision | Compare with Previous | Blame | View Log

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.