OpenCores
URL https://opencores.org/ocsvn/openarty/openarty/trunk

Subversion Repositories openarty

[/] [openarty/] [trunk/] [rtl/] [cpu/] [pfcache.v] - Rev 43

Go to most recent revision | Compare with Previous | Blame | View Log

////////////////////////////////////////////////////////////////////////////////
//
// Filename:	pfcache.v
//
// Project:	Zip CPU -- a small, lightweight, RISC CPU soft core
//
// Purpose:	Keeping our CPU fed with instructions, at one per clock and
//		with no stalls.  An unusual feature of this cache is the
//	requirement that the entire cache may be cleared (if necessary).
//
// Creator:	Dan Gisselquist, Ph.D.
//		Gisselquist Technology, LLC
//
////////////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2015-2016, Gisselquist Technology, LLC
//
// This program is free software (firmware): you can redistribute it and/or
// modify it under the terms of  the GNU General Public License as published
// by the Free Software Foundation, either version 3 of the License, or (at
// your option) any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or
// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
// for more details.
//
// License:	GPL, v3, as defined and found on www.gnu.org,
//		http://www.gnu.org/licenses/gpl.html
//
//
////////////////////////////////////////////////////////////////////////////////
//
module	pfcache(i_clk, i_rst, i_new_pc, i_clear_cache,
			// i_early_branch, i_from_addr,
			i_stall_n, i_pc, o_i, o_pc, o_v,
		o_wb_cyc, o_wb_stb, o_wb_we, o_wb_addr, o_wb_data,
			i_wb_ack, i_wb_stall, i_wb_err, i_wb_data,
			o_illegal);
	parameter	LGCACHELEN = 8, ADDRESS_WIDTH=24,
			LGLINES=5; // Log of the number of separate cache lines
	localparam	CACHELEN=(1<<LGCACHELEN); // Size of our cache memory
	localparam	CW=LGCACHELEN;	// Short hand for LGCACHELEN
	localparam	PW=LGCACHELEN-LGLINES; // Size of a cache line
	localparam	BUSW = 32;	// Number of data lines on the bus
	localparam	AW=ADDRESS_WIDTH; // Shorthand for ADDRESS_WIDTH
	input				i_clk, i_rst, i_new_pc;
	input				i_clear_cache;
	input				i_stall_n;
	input		[(AW-1):0]	i_pc;
	output	wire	[(BUSW-1):0]	o_i;
	output	wire	[(AW-1):0]	o_pc;
	output	wire			o_v;
	//
	output	reg		o_wb_cyc, o_wb_stb;
	output	wire		o_wb_we;
	output	reg	[(AW-1):0]	o_wb_addr;
	output	wire	[(BUSW-1):0]	o_wb_data;
	//
	input				i_wb_ack, i_wb_stall, i_wb_err;
	input		[(BUSW-1):0]	i_wb_data;
	//
	output	reg			o_illegal;
 
	// Fixed bus outputs: we read from the bus only, never write.
	// Thus the output data is ... irrelevant and don't care.  We set it
	// to zero just to set it to something.
	assign	o_wb_we = 1'b0;
	assign	o_wb_data = 0;
 
	wire			r_v;
	reg	[(BUSW-1):0]	cache	[0:((1<<CW)-1)];
	reg	[(AW-CW-1):0]	tags	[0:((1<<(LGLINES))-1)];
	reg	[((1<<(LGLINES))-1):0]	vmask;
 
	reg	[(AW-1):0]	lastpc;
	reg	[(CW-1):0]	rdaddr;
	reg	[(AW-1):CW]	tagvalipc, tagvallst;
	wire	[(AW-1):CW]	tagval;
	wire	[(AW-1):PW]	lasttag;
	reg			illegal_valid;
	reg	[(AW-1):PW]	illegal_cache;
 
	// initial	o_i = 32'h76_00_00_00;	// A NOOP instruction
	// initial	o_pc = 0;
	reg	[(BUSW-1):0]	r_pc_cache, r_last_cache;
	reg	[(AW-1):0]	r_pc, r_lastpc;
	reg	isrc;
	always @(posedge i_clk)
	begin
		// We don't have the logic to select what to read, we must
		// read both the value at i_pc and lastpc.  cache[i_pc] is
		// the value we return if the cache is good, cacne[lastpc] is
		// the value we return if we've been stalled, weren't valid,
		// or had to wait a clock or two.  (Remember i_pc can't stop
		// changing for a clock, so we need to keep track of the last
		// one from before it stopped.)
		//
		// Here we keep track of which answer we want/need
		isrc <= ((r_v)&&(i_stall_n))||(i_new_pc);
 
		// Here we read both, and select which was write using isrc
		// on the next clock.
		r_pc_cache <= cache[i_pc[(CW-1):0]];
		r_last_cache <= cache[lastpc[(CW-1):0]];
		r_pc <= i_pc;
		r_lastpc <= lastpc;
	end
	assign	o_pc = (isrc) ? r_pc : r_lastpc;
	assign	o_i  = (isrc) ? r_pc_cache : r_last_cache;
 
	reg	tagsrc;
	always @(posedge i_clk)
		// It may be possible to recover a clock once the cache line
		// has been filled, but our prior attempt to do so has lead
		// to a race condition, so we keep this logic simple.
		if (((r_v)&&(i_stall_n))||(i_clear_cache)||(i_new_pc))
			tagsrc <= 1'b1;
		else
			tagsrc <= 1'b0;
	initial	tagvalipc = 0;
	always @(posedge i_clk)
		tagvalipc <= tags[i_pc[(CW-1):PW]];
	initial	tagvallst = 0;
	always @(posedge i_clk)
		tagvallst <= tags[lastpc[(CW-1):PW]];
	assign	tagval = (tagsrc)?tagvalipc : tagvallst;
 
	// i_pc will only increment when everything else isn't stalled, thus
	// we can set it without worrying about that.   Doing this enables
	// us to work in spite of stalls.  For example, if the next address
	// isn't valid, but the decoder is stalled, get the next address
	// anyway.
	initial	lastpc = 0;
	always @(posedge i_clk)
		if (((r_v)&&(i_stall_n))||(i_clear_cache)||(i_new_pc))
			lastpc <= i_pc;
 
	assign	lasttag = lastpc[(AW-1):PW];
 
	wire	w_v_from_pc, w_v_from_last;
	assign	w_v_from_pc = ((i_pc[(AW-1):PW] == lasttag)
				&&(tagvalipc == i_pc[(AW-1):CW])
				&&(vmask[i_pc[(CW-1):PW]]));
	assign	w_v_from_last = (
				//(lastpc[(AW-1):PW] == lasttag)&&
				(tagval == lastpc[(AW-1):CW])
				&&(vmask[lastpc[(CW-1):PW]]));
 
	reg	[1:0]	delay;
 
	initial	delay = 2'h3;
	reg	rvsrc;
	always @(posedge i_clk)
		if ((i_rst)||(i_clear_cache)||(i_new_pc)||((r_v)&&(i_stall_n)))
		begin
			// r_v <= r_v_from_pc;
			rvsrc <= 1'b1;
			delay <= 2'h2;
		end else if (~r_v) begin // Otherwise, r_v was true and we were
			// stalled, hence only if ~r_v
			rvsrc <= 1'b0;
			if (o_wb_cyc)
				delay <= 2'h2;
			else if (delay != 0)
				delay <= delay + 2'b11; // i.e. delay -= 1;
		end
	reg	r_v_from_pc, r_v_from_last;
	always @(posedge i_clk)
		r_v_from_pc <= w_v_from_pc;
	always @(posedge i_clk)
		r_v_from_last <= w_v_from_last;
 
	assign	r_v = ((rvsrc)?(r_v_from_pc):(r_v_from_last));
	assign	o_v = (((rvsrc)?(r_v_from_pc):(r_v_from_last))
				||((o_illegal)&&(~o_wb_cyc)))
			&&(~i_new_pc)&&(~i_rst);
 
	reg	last_ack;
	initial	last_ack = 1'b0;
	always @(posedge i_clk)
		last_ack <= (o_wb_cyc)&&(
				(rdaddr[(PW-1):1]=={(PW-1){1'b1}})
				&&((rdaddr[0])||(i_wb_ack)));
 
	reg	needload;
	initial	needload = 1'b0;
	always @(posedge i_clk)
		needload <= ((~r_v)&&(delay==0)
			&&((tagvallst != lastpc[(AW-1):CW])
				||(~vmask[lastpc[(CW-1):PW]]))
			&&((~illegal_valid)
				||(lastpc[(AW-1):PW] != illegal_cache)));
 
	reg	last_addr;
	initial	last_addr = 1'b0;
	always @(posedge i_clk)
		last_addr <= (o_wb_cyc)&&(o_wb_addr[(PW-1):1] == {(PW-1){1'b1}})
				&&((~i_wb_stall)|(o_wb_addr[0]));
 
	initial	o_wb_cyc  = 1'b0;
	initial	o_wb_stb  = 1'b0;
	initial	o_wb_addr = {(AW){1'b0}};
	initial	rdaddr    = 0;
	always @(posedge i_clk)
		if ((i_rst)||(i_clear_cache))
		begin
			o_wb_cyc <= 1'b0;
			o_wb_stb <= 1'b0;
		end else if (o_wb_cyc)
		begin
			if (i_wb_err)
				o_wb_stb <= 1'b0;
			else if ((o_wb_stb)&&(~i_wb_stall)&&(last_addr))
				o_wb_stb <= 1'b0;
 
			if (((i_wb_ack)&&(last_ack))||(i_wb_err))
				o_wb_cyc <= 1'b0;
 
			// else if (rdaddr[(PW-1):1] == {(PW-1){1'b1}})
			//	tags[lastpc[(CW-1):PW]] <= lastpc[(AW-1):CW];
 
		end else if (needload)
		begin
			o_wb_cyc  <= 1'b1;
			o_wb_stb  <= 1'b1;
		end
 
	always @(posedge i_clk)
		if (o_wb_cyc) // &&(i_wb_ack)
			tags[o_wb_addr[(CW-1):PW]] <= o_wb_addr[(AW-1):CW];
	always @(posedge i_clk)
		if ((o_wb_cyc)&&(i_wb_ack))
			rdaddr <= rdaddr + 1;
		else if (~o_wb_cyc)
			rdaddr <= { lastpc[(CW-1):PW], {(PW){1'b0}} };
 
	always @(posedge i_clk)
		if ((o_wb_stb)&&(~i_wb_stall)&&(~last_addr))
			o_wb_addr[(PW-1):0] <= o_wb_addr[(PW-1):0]+1;
		else if (~o_wb_cyc)
			o_wb_addr <= { lastpc[(AW-1):PW], {(PW){1'b0}} };
 
	// Can't initialize an array, so leave cache uninitialized
	// We'll also never get an ack without sys being active, so skip
	// that check.  Or rather, let's just use o_wb_cyc instead.  This
	// will work because multiple writes to the same address, ending with
	// a valid write, aren't a problem.
	always @(posedge i_clk)
		if (o_wb_cyc) // &&(i_wb_ack)
			cache[rdaddr] <= i_wb_data;
 
	// VMask ... is a section loaded?
	// Note "svmask".  It's purpose is to delay the vmask setting by one
	// clock, so that we can insure the right value of the cache is loaded
	// before declaring that the cache line is valid.  Without this, the
	// cache line would get read, and the instruction would read from the
	// last cache line.
	reg	svmask;
	initial	vmask = 0;
	initial	svmask = 1'b0;
	reg	[(LGLINES-1):0]	saddr;
	always @(posedge i_clk)
		if ((i_rst)||(i_clear_cache))
		begin
			vmask <= 0;
			svmask<= 1'b0;
		end
		else begin
			svmask <= ((o_wb_cyc)&&(i_wb_ack)&&(last_ack));
 
			if (svmask)
				vmask[saddr] <= 1'b1;
			if ((~o_wb_cyc)&&(needload))
				vmask[lastpc[(CW-1):PW]] <= 1'b0;
		end
	always @(posedge i_clk)
		if ((o_wb_cyc)&&(i_wb_ack))
			saddr <= rdaddr[(CW-1):PW];
 
	initial	illegal_cache = 0;
	initial	illegal_valid = 0;
	always @(posedge i_clk)
		if ((i_rst)||(i_clear_cache))
		begin
			illegal_cache <= 0;
			illegal_valid <= 0;
		end else if ((o_wb_cyc)&&(i_wb_err))
		begin
			illegal_cache <= o_wb_addr[(AW-1):PW];
			illegal_valid <= 1'b1;
		end
 
	initial o_illegal = 1'b0;
	always @(posedge i_clk)
		if ((i_rst)||(i_clear_cache)||(o_wb_cyc))
			o_illegal <= 1'b0;
		else
			o_illegal <= (illegal_valid)
				&&(illegal_cache == i_pc[(AW-1):PW]);
 
endmodule
 

Go to most recent revision | Compare with Previous | Blame | View Log

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.