////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
//
|
//
|
// Filename: prefetch.v
|
// Filename: prefetch.v
|
//
|
//
|
// Project: Zip CPU -- a small, lightweight, RISC CPU soft core
|
// Project: Zip CPU -- a small, lightweight, RISC CPU soft core
|
//
|
//
|
// Purpose: This is a very simple instruction fetch approach. It gets
|
// Purpose: This is a very simple instruction fetch approach. It gets
|
// one instruction at a time. Future versions should pipeline
|
// one instruction at a time. Future versions should pipeline
|
// fetches and perhaps even cache results--this doesn't do that.
|
// fetches and perhaps even cache results--this doesn't do that.
|
// It should, however, be simple enough to get things running.
|
// It should, however, be simple enough to get things running.
|
//
|
//
|
// The interface is fascinating. The 'i_pc' input wire is just
|
// The interface is fascinating. The 'i_pc' input wire is just
|
// a suggestion of what to load. Other wires may be loaded
|
// a suggestion of what to load. Other wires may be loaded
|
// instead. i_pc is what must be output, not necessarily input.
|
// instead. i_pc is what must be output, not necessarily input.
|
//
|
//
|
// 20150919 -- Added support for the WB error signal. When reading an
|
// 20150919 -- Added support for the WB error signal. When reading an
|
// instruction results in this signal being raised, the pipefetch
|
// instruction results in this signal being raised, the pipefetch
|
// module will set an illegal instruction flag to be returned to
|
// module will set an illegal instruction flag to be returned to
|
// the CPU together with the instruction. Hence, the ZipCPU
|
// the CPU together with the instruction. Hence, the ZipCPU
|
// can trap on it if necessary.
|
// can trap on it if necessary.
|
//
|
//
|
// Creator: Dan Gisselquist, Ph.D.
|
// Creator: Dan Gisselquist, Ph.D.
|
// Gisselquist Tecnology, LLC
|
// Gisselquist Tecnology, LLC
|
//
|
//
|
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
//
|
//
|
// Copyright (C) 2015, Gisselquist Technology, LLC
|
// Copyright (C) 2015, Gisselquist Technology, LLC
|
//
|
//
|
// This program is free software (firmware): you can redistribute it and/or
|
// This program is free software (firmware): you can redistribute it and/or
|
// modify it under the terms of the GNU General Public License as published
|
// modify it under the terms of the GNU General Public License as published
|
// by the Free Software Foundation, either version 3 of the License, or (at
|
// by the Free Software Foundation, either version 3 of the License, or (at
|
// your option) any later version.
|
// your option) any later version.
|
//
|
//
|
// This program is distributed in the hope that it will be useful, but WITHOUT
|
// This program is distributed in the hope that it will be useful, but WITHOUT
|
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or
|
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or
|
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
// for more details.
|
// for more details.
|
//
|
//
|
// License: GPL, v3, as defined and found on www.gnu.org,
|
// License: GPL, v3, as defined and found on www.gnu.org,
|
// http://www.gnu.org/licenses/gpl.html
|
// http://www.gnu.org/licenses/gpl.html
|
//
|
//
|
//
|
//
|
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
//
|
//
|
// Flash requires a minimum of 4 clocks per byte to read, so that would be
|
// Flash requires a minimum of 4 clocks per byte to read, so that would be
|
// 4*(4bytes/32bit word) = 16 clocks per word read---and that's in pipeline
|
// 4*(4bytes/32bit word) = 16 clocks per word read---and that's in pipeline
|
// mode which this prefetch does not support. In non--pipelined mode, the
|
// mode which this prefetch does not support. In non--pipelined mode, the
|
// flash will require (16+6+6)*2 = 56 clocks plus 16 clocks per word read,
|
// flash will require (16+6+6)*2 = 56 clocks plus 16 clocks per word read,
|
// or 72 clocks to fetch one instruction.
|
// or 72 clocks to fetch one instruction.
|
module prefetch(i_clk, i_rst, i_ce, i_pc, i_aux,
|
module prefetch(i_clk, i_rst, i_ce, i_pc, i_aux,
|
o_i, o_pc, o_aux, o_valid, o_illegal,
|
o_i, o_pc, o_aux, o_valid, o_illegal,
|
o_wb_cyc, o_wb_stb, o_wb_we, o_wb_addr, o_wb_data,
|
o_wb_cyc, o_wb_stb, o_wb_we, o_wb_addr, o_wb_data,
|
i_wb_ack, i_wb_stall, i_wb_err, i_wb_data);
|
i_wb_ack, i_wb_stall, i_wb_err, i_wb_data);
|
parameter ADDRESS_WIDTH=32, AUX_WIDTH = 1, AW=ADDRESS_WIDTH;
|
parameter ADDRESS_WIDTH=32, AUX_WIDTH = 1, AW=ADDRESS_WIDTH;
|
input i_clk, i_rst, i_ce;
|
input i_clk, i_rst, i_ce;
|
input [(AW-1):0] i_pc;
|
input [(AW-1):0] i_pc;
|
input [(AUX_WIDTH-1):0] i_aux;
|
input [(AUX_WIDTH-1):0] i_aux;
|
output reg [31:0] o_i;
|
output reg [31:0] o_i;
|
output reg [(AW-1):0] o_pc;
|
output reg [(AW-1):0] o_pc;
|
output reg [(AUX_WIDTH-1):0] o_aux;
|
output reg [(AUX_WIDTH-1):0] o_aux;
|
output wire o_valid, o_illegal;
|
output wire o_valid, o_illegal;
|
// Wishbone outputs
|
// Wishbone outputs
|
output reg o_wb_cyc, o_wb_stb;
|
output reg o_wb_cyc, o_wb_stb;
|
output wire o_wb_we;
|
output wire o_wb_we;
|
output reg [(AW-1):0] o_wb_addr;
|
output reg [(AW-1):0] o_wb_addr;
|
output wire [31:0] o_wb_data;
|
output wire [31:0] o_wb_data;
|
// And return inputs
|
// And return inputs
|
input i_wb_ack, i_wb_stall, i_wb_err;
|
input i_wb_ack, i_wb_stall, i_wb_err;
|
input [31:0] i_wb_data;
|
input [31:0] i_wb_data;
|
|
|
assign o_wb_we = 1'b0;
|
assign o_wb_we = 1'b0;
|
assign o_wb_data = 32'h0000;
|
assign o_wb_data = 32'h0000;
|
|
|
// Let's build it simple and upgrade later: For each instruction
|
// Let's build it simple and upgrade later: For each instruction
|
// we do one bus cycle to get the instruction. Later we should
|
// we do one bus cycle to get the instruction. Later we should
|
// pipeline this, but for now let's just do one at a time.
|
// pipeline this, but for now let's just do one at a time.
|
initial o_wb_cyc = 1'b0;
|
initial o_wb_cyc = 1'b0;
|
initial o_wb_stb = 1'b0;
|
initial o_wb_stb = 1'b0;
|
initial o_wb_addr= 0;
|
initial o_wb_addr= 0;
|
always @(posedge i_clk)
|
always @(posedge i_clk)
|
if (i_rst)
|
if ((i_rst)||(i_wb_ack))
|
begin
|
begin
|
o_wb_cyc <= 1'b0;
|
o_wb_cyc <= 1'b0;
|
if (o_wb_cyc)
|
o_wb_stb <= 1'b0;
|
o_wb_addr <= 0;
|
|
end else if ((i_ce)&&(~o_wb_cyc)&&(o_wb_addr == i_pc))
|
|
begin // Single value cache check
|
|
o_aux <= i_aux;
|
|
// o_i was already set during the last bus cycle
|
|
end else if ((i_ce)&&(~o_wb_cyc)) // Initiate a bus cycle
|
end else if ((i_ce)&&(~o_wb_cyc)) // Initiate a bus cycle
|
begin
|
begin
|
o_wb_cyc <= 1'b1;
|
o_wb_cyc <= 1'b1;
|
o_wb_stb <= 1'b1;
|
o_wb_stb <= 1'b1;
|
o_wb_addr <= i_pc;
|
|
o_aux <= i_aux;
|
|
end else if (o_wb_cyc) // Independent of ce
|
end else if (o_wb_cyc) // Independent of ce
|
begin
|
begin
|
if ((o_wb_cyc)&&(o_wb_stb)&&(~i_wb_stall))
|
if ((o_wb_cyc)&&(o_wb_stb)&&(~i_wb_stall))
|
o_wb_stb <= 1'b0;
|
o_wb_stb <= 1'b0;
|
if (i_wb_ack)
|
if (i_wb_ack)
|
o_wb_cyc <= 1'b0;
|
o_wb_cyc <= 1'b0;
|
end
|
end
|
|
|
always @(posedge i_clk)
|
always @(posedge i_clk)
|
|
if (i_rst) // Set the address to guarantee the result is invalid
|
|
o_wb_addr <= 1'b0;
|
|
else if ((i_ce)&&(~o_wb_cyc))
|
|
o_wb_addr <= i_pc;
|
|
always @(posedge i_clk)
|
|
if ((o_wb_cyc)&&(i_wb_ack))
|
|
o_aux <= i_aux;
|
|
always @(posedge i_clk)
|
if ((o_wb_cyc)&&(i_wb_ack))
|
if ((o_wb_cyc)&&(i_wb_ack))
|
o_i <= i_wb_data;
|
o_i <= i_wb_data;
|
always @(posedge i_clk)
|
always @(posedge i_clk)
|
if ((o_wb_cyc)&&(i_wb_ack))
|
if ((o_wb_cyc)&&(i_wb_ack))
|
o_pc <= o_wb_addr;
|
o_pc <= o_wb_addr;
|
|
|
assign o_valid = (i_pc == o_pc)&&(i_aux == o_aux)&&(~o_wb_cyc);
|
assign o_valid = (i_pc == o_pc)&&(i_aux == o_aux)&&(~o_wb_cyc);
|
assign o_illegal = (o_wb_cyc)&&(i_wb_err);
|
assign o_illegal = (o_wb_cyc)&&(i_wb_err);
|
|
|
endmodule
|
endmodule
|
|
|