OpenCores
URL https://opencores.org/ocsvn/s6soc/s6soc/trunk

Subversion Repositories s6soc

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /s6soc
    from Rev 50 to Rev 51
    Reverse comparison

Rev 50 → Rev 51

/trunk/rtl/Makefile
37,14 → 37,15
##
##
.PHONY: all
all: busmaster altbusmaster
all: busmaster altbusmaster cpudefs.h
YYMMDD=`date +%Y%m%d`
CPUD := cpu
RAWZIP := zipbones.v zipcpu.v cpudefs.v \
prefetch.v idecode.v cpuops.v memops.v \
wbdblpriarb.v
wbdblpriarb.v dblfetch.v
ZIPSRC := $(addprefix $(CPUD)/,$(RAWZIP))
BUSSRC := builddate.v llqspi.v wbscope.v memdev.v spio.v wbgpio.v wbpwmaudio.v
BUSSRC := builddate.v llqspi.v wbscope.v memdev.v spio.v wbgpio.v wbpwmaudio.v\
qflashxpress.v
MAINSRC := busmaster.v builddate.v flash_config.v wbqspiflash.v \
$(BUSSRC) $(ZIPSRC)
# toplevel.v rxuart.v txuart.v
57,26 → 58,31
verilator -trace -cc -y $(CPUD) busmaster.v
$(VOBJ)/Vbusmaster.h: $(VOBJ)/Vbusmaster.cpp
 
$(VOBJ)/Vbusmaster__ALL.a: $(VOBJ)/Vbusmaster.cpp $(VOBJ)/Vbusmaster.h
cd $(VOBJ); make --no-print-directory -f Vbusmaster.mk
 
$(VOBJ)/Valtbusmaster.cpp: $(ALTSRC)
verilator -trace -cc -y $(CPUD) altbusmaster.v
$(VOBJ)/Valtbusmaster.h: $(VOBJ)/Valtbusmaster.cpp
 
$(VOBJ)/Vbusmaster__ALL.a: $(VOBJ)/Vbusmaster.cpp $(VOBJ)/Vbusmaster.h
cd $(VOBJ); make --no-print-directory -f Vbusmaster.mk
$(VOBJ)/Valtbusmaster__ALL.a: $(VOBJ)/Valtbusmaster.cpp $(VOBJ)/Valtbusmaster.h
cd $(VOBJ); make --no-print-directory -f Valtbusmaster.mk
 
$(VOBJ)/Vdblfetch.cpp: $(ALTSRC)
verilator -trace -cc -y $(CPUD) dblfetch.v
$(VOBJ)/Vdblfetch.mk: $(VOBJ)/Vdblfetch.cpp
$(VOBJ)/Vdblfetch.h: $(VOBJ)/Vdblfetch.cpp
 
$(VOBJ)/V%__ALL.a: $(VOBJ)/V%.mk
cd $(VDIRFB); make -f V$*.mk
make -f V$*.mk -C obj_dir
 
$(VOBJ)/Valtbusmaster__ALL.a: $(VOBJ)/Valtbusmaster.cpp $(VOBJ)/Valtbusmaster.h
cd $(VOBJ); make --no-print-directory -f Valtbusmaster.mk
 
cpudefs.h: cpudefs.v
cpudefs.h: cpu/cpudefs.v
@echo "Building cpudefs.h"
@echo "// " > $@
@echo "// Do not edit this file, it is automatically generated!" >> $@
@echo "// To generate this file, \"make cpudefs.h\" in the rtl directory." >> $@
@echo "// " >> $@
@grep "^\`" $^ | sed -e '{ s/^`/#/ }' >> $@
@sed -e '{ s/^`/#/ }' $< | sed -e ' s/cpudefs.v/cpudefs.h/' >> $@
 
.PHONY: busmaster
busmaster: $(VOBJ)/Vbusmaster__ALL.a
/trunk/rtl/altbusmaster.v
43,9 → 43,10
//
// `define IMPLEMENT_ONCHIP_RAM
`define FLASH_ACCESS
// `define DBG_SCOPE // About 204 LUTs, at 2^6 addresses
`define DBG_SCOPE // About 204 LUTs, at 2^6 addresses
// `define COMPRESSED_SCOPE
`define WBUBUS
// `define LOWLOGIC_FLASH
module altbusmaster(i_clk, i_rst,
// DEPP I/O Control
i_depp_astb_n, i_depp_dstb_n, i_depp_write_n,
63,8 → 64,8
o_uart_setup,
// GPIO lines
i_gpio, o_gpio);
parameter BUS_ADDRESS_WIDTH=23,
BAW=BUS_ADDRESS_WIDTH; // 24bits->2,258,23b->2181
parameter BUS_ADDRESS_WIDTH=23;
localparam BAW=BUS_ADDRESS_WIDTH; // 24bits->2,258,23b->2181
// 2^14 bytes requires a LGMEMSZ of 14, and 12 address bits ranging from
// 0 to 11. As with many other devices, the wb_cyc line is more for
// form than anything else--it is ignored by the memory itself.
86,7 → 87,12
input i_tx_busy;
output wire o_uart_rts_n;
// SPI flash control
output wire o_qspi_cs_n, o_qspi_sck;
output wire o_qspi_cs_n;
`ifdef LOWLOGIC_FLASH
output wire [1:0] o_qspi_sck;
`else // LOWLOGIC_FLASH
output wire o_qspi_sck;
`endif // LOWLOGIC_FLASH
output wire [3:0] o_qspi_dat;
input [3:0] i_qspi_dat;
output wire [1:0] o_qspi_mod;
173,11 → 179,11
wire none_sel, many_sel;
 
wire io_sel, flash_sel, flctl_sel, scop_sel, mem_sel;
wire flash_ack, scop_ack, cfg_ack, mem_ack, many_ack;
wire io_stall, flash_stall, scop_stall, cfg_stall, mem_stall;
wire flash_ack, scop_ack, mem_ack, many_ack;
wire io_stall, flash_stall, scop_stall, mem_stall;
reg io_ack;
 
wire [31:0] flash_data, scop_data, cfg_data, mem_data, pwm_data,
wire [31:0] flash_data, scop_data, mem_data, pwm_data,
spio_data, gpio_data, uart_data;
reg [31:0] io_data;
reg [(BAW-1):0] bus_err_addr;
309,11 → 315,16
`else
assign idle_n = 1'b1;
`endif
assign io_sel =((idle_n)&&(skipaddr[3:0]==4'h1));
assign scop_sel =((idle_n)&&(skipaddr[3:1]==3'h1)); // = 4'h2
assign flctl_sel= 1'b0; // ((wb_cyc)&&(skipaddr[3:0]==4'h3));
assign mem_sel =((idle_n)&&(skipaddr[3:2]==2'h1));
assign flash_sel=((idle_n)&&(skipaddr[3]));
assign io_sel = ((idle_n)&&(skipaddr[3:0]==4'b00_01));
`ifdef LOWLOGIC_FLASH
assign scop_sel = ((idle_n)&&(skipaddr[3:1]==3'b00_1)); // = 4'h2
assign flctl_sel= 1'b0; // The lowlogic flash has no control registers
`else // LOWLOGIC_FLASH
assign scop_sel = ((idle_n)&&(skipaddr[3:0]==4'b00_10)); // = 4'h2
assign flctl_sel= ((wb_cyc)&&(skipaddr[3:0]==4'b00_11));
`endif // LOWLOGIC_FLASH
assign mem_sel = ((idle_n)&&(skipaddr[3:2]==2'b01));
assign flash_sel= ((idle_n)&&(skipaddr[3]));
 
//
// none_sel
505,13 → 516,30
// FLASH MEMORY CONFIGURATION ACCESS
//
`ifdef FLASH_ACCESS
wbqspiflash #(LGFLASHSZ) flashmem(i_clk,
wb_cyc,(wb_stb)&&(flash_sel),(wb_stb)&&(flctl_sel),wb_we,
`ifdef LOWLOGIC_FLASH
wire w_flash_ack;
qflashxpress flashmem(i_clk,
wb_cyc,(wb_stb)&&(flash_sel),
wb_addr[(LGFLASHSZ-3):0],
w_flash_ack, flash_stall, flash_data,
o_qspi_sck, o_qspi_cs_n, o_qspi_mod, o_qspi_dat, i_qspi_dat);
 
assign flash_interrupt = 1'b0;
reg r_flash_ack;
initial r_flash_ack = 1'b0;
always @(posedge i_clk)
r_flash_ack <= (wb_stb)&&(flctl_sel);
assign flash_ack = (w_flash_ack)||(r_flash_ack);
`else // LOWLOGIC_FLASH
wbqspiflashp #(LGFLASHSZ) // Use the writable interface
flashmem(i_clk,
wb_cyc,(wb_stb)&&(flash_sel),(wb_stb)&&(flctl_sel),wb_we,
wb_addr[(LGFLASHSZ-3):0], wb_data,
flash_ack, flash_stall, flash_data,
o_qspi_sck, o_qspi_cs_n, o_qspi_mod, o_qspi_dat, i_qspi_dat,
flash_interrupt);
`else
`endif // LOWLOGIC_FLASH
`else // FLASH_ACCESS
reg r_flash_ack;
initial r_flash_ack = 1'b0;
always @(posedge i_clk)
526,7 → 554,7
assign o_qspi_cs_n = 1'b1;
assign o_qspi_mod = 2'b01;
assign o_qspi_dat = 4'b1111;
`endif
`endif // FLASH_ACCESS
 
//
// ON-CHIP RAM MEMORY ACCESS
552,36 → 580,42
//
//
//
wire [31:0] scop_cfg_data;
wire scop_cfg_ack, scop_cfg_stall, scop_cfg_interrupt;
`ifdef DBG_SCOPE
wire scop_cfg_trigger;
assign scop_cfg_trigger = (wb_stb)&&(cfg_sel);
wire scop_trigger = bus_dbg;
wire scop_trigger;
// assign scop_trigger = (flash_sel)&&(wb_stb); // bus_dbg;
assign scop_trigger = (wb_stb)&&(wb_we)&&(flctl_sel);
wire [31:0] flash_debug;
wire [1:0] sck;
`ifdef LOWLOGIC_FLASH
assign sck = o_qspi_sck;
`else // LOWLOGIC_FLASH
assign sck = { o_qspi_sck, o_qspi_sck };
`endif // LOWLOGIC_FLASH
assign flash_debug = {
wb_cyc, wb_stb,
flash_sel, flctl_sel, flash_ack, flash_stall,
o_qspi_cs_n, sck, o_qspi_mod, 1'b0,
o_qspi_dat, i_qspi_dat, flash_data[11:0]
};
`ifdef COMPRESSED_SCOPE
wbscopc #(5'ha)
`else
wbscope #(5'ha)
`endif
wbcfgscope(i_clk, 1'b1, scop_trigger, bus_debug,
thescope(i_clk, 1'b1, scop_trigger, flash_debug,
// Wishbone interface
i_clk, wb_cyc, (wb_stb)&&(scop_sel),
wb_we, wb_addr[0], wb_data,
scop_cfg_ack, scop_cfg_stall, scop_cfg_data,
scop_cfg_interrupt);
scop_ack, scop_stall, scop_data,
scop_interrupt);
`else
reg r_scop_cfg_ack;
reg r_scop_ack;
always @(posedge i_clk)
r_scop_cfg_ack <= (wb_stb)&&(scop_sel);
assign scop_cfg_ack = r_scop_cfg_ack;
assign scop_cfg_data = 32'h000;
assign scop_cfg_stall= 1'b0;
r_scop_ack <= (wb_stb)&&(scop_sel);
assign scop_ack = r_scop_ack;
assign scop_data = 32'h000;
assign scop_stall= 1'b0;
`endif
 
assign scop_interrupt = scop_cfg_interrupt;
assign scop_ack = scop_cfg_ack;
assign scop_stall = scop_cfg_stall;
assign scop_data = scop_cfg_data;
 
endmodule
 
/trunk/rtl/alttop.v
43,6 → 43,7
////////////////////////////////////////////////////////////////////////////////
//
//
// `define LOWLOGIC_FLASH
module alttop(i_clk_8mhz,
o_qspi_cs_n, o_qspi_sck, io_qspi_dat,
i_btn, o_led, o_pwm, o_pwm_shutdown_n, o_pwm_gain,
89,7 → 90,7
//
// Generate a usable clock for the rest of the board to run at.
//
wire ck_zero_0, clk_s;
wire ck_zero_0, clk_s, clk_sn;
 
// Clock frequency = (20 / 2) * 8Mhz = 80 MHz
// Clock period = 12.5 ns
110,6 → 111,7
.CLK0(ck_zero_0),
.CLKFB(ck_zero_0),
.CLKFX(clk_s),
.CLKFX180(clk_sn),
.PSEN(1'b0),
.RST(1'b0));
 
126,7 → 128,7
wire tx_busy;
wire [29:0] uart_setup;
 
// Baud rate is set by clock rate / baud rate desired. Thus,
// Baud rate is set by clock rate / baud rate desired. Thus,
// 80 MHz / 9600 Baud = 8333, or about 0x208d. We choose a slow
// speed such as 9600 Baud to help the CPU keep up with the serial
// port rate.
159,6 → 161,12
// This is an alternate version of the busmaster interface,
// offering no ZipCPU and access to reprogramming via the flash.
//
`ifdef LOWLOGIC_FLASH
wire [1:0] qspi_sck;
`else
wire qspi_sck;
`endif
wire qspi_cs_n;
wire [3:0] qspi_dat;
wire [1:0] qspi_bmod;
wire [15:0] w_gpio;
174,7 → 182,7
// External UART interface
rx_stb, rx_data, tx_stb, tx_data, tx_busy, w_uart_rts_n,
// SPI/SD-card flash
o_qspi_cs_n, o_qspi_sck, qspi_dat, io_qspi_dat, qspi_bmod,
qspi_cs_n, qspi_sck, qspi_dat, io_qspi_dat, qspi_bmod,
// Board lights and switches
i_btn, o_led, o_pwm, { o_pwm_shutdown_n, o_pwm_gain },
// Keypad connections
195,10 → 203,42
// port with one wire in and one wire out. This utilizes our
// control wires (qspi_bmod) to set the output lines appropriately.
//
assign io_qspi_dat = (~qspi_bmod[1])?({2'b11,1'bz,qspi_dat[0]})
//
// 2'b0? -- Normal SPI
// 2'b10 -- Quad Output
// 2'b11 -- Quad Input
`ifdef LOWLOGIC_FLASH
reg r_qspi_cs_n;
reg [1:0] r_qspi_bmod;
reg [3:0] r_qspi_dat, r_qspi_z;
reg [1:0] r_qspi_sck;
always @(posedge clk_s)
r_qspi_sck <= qspi_sck;
xoddr xqspi_sck({clk_s, clk_sn}, r_qspi_sck, o_qspi_sck);
initial r_qspi_cs_n = 1'b1;
initial r_qspi_z = 4'b1101;
always @(posedge clk_s)
begin
r_qspi_dat <= (qspi_bmod[1]) ? qspi_dat:{ 3'b111, qspi_dat[0]};
r_qspi_z <= (!qspi_bmod[1])? 4'b1101
: ((qspi_bmod[0]) ? 4'h0 : 4'hf);
r_qspi_cs_n <= qspi_cs_n;
end
 
assign o_qspi_cs_n = r_qspi_cs_n;
assign io_qspi_dat[0] = (r_qspi_z[0]) ? r_qspi_dat[0] : 1'bz;
assign io_qspi_dat[1] = (r_qspi_z[1]) ? r_qspi_dat[1] : 1'bz;
assign io_qspi_dat[2] = (r_qspi_z[2]) ? r_qspi_dat[2] : 1'bz;
assign io_qspi_dat[3] = (r_qspi_z[3]) ? r_qspi_dat[3] : 1'bz;
`else
assign io_qspi_dat = (!qspi_bmod[1])?({2'b11,1'bz,qspi_dat[0]})
:((qspi_bmod[0])?(4'bzzzz):(qspi_dat[3:0]));
 
`else
assign o_qspi_cs_n = qspi_cs_n;
assign o_qspi_sck = qspi_sck;
`endif // LOWLOGIC_FLASH
 
`else // BYPASS_LOGIC
reg [26:0] r_counter;
always @(posedge clk_s)
r_counter <= r_counter+1;
225,7 → 265,7
assign uart_setup = 30'h080002b6;
 
assign o_uart_rts_n = 1'b0;
`endif
`endif // BYPASS_LOGIC
//
// I2C support
//
232,7 → 272,7
// Supporting I2C requires a couple quick adjustments to our
// GPIO lines. Specifically, we'll allow that when the output
// (i.e. w_gpio) pins are high, then the I2C lines float. They
// will be (need to be) pulled up by a resistor in order to
// will be (need to be) pulled up by a resistor in order to
// match the I2C protocol, but this change makes them look/act
// more like GPIO pins.
//
/trunk/rtl/builddate.v
1,7 → 272,7
`define DATESTAMP 32'h20170309
`define DATESTAMP 32'h20170321
/trunk/rtl/busmaster.v
46,7 → 46,10
`define FLASH_ACCESS
`define DBG_SCOPE // About 204 LUTs, at 2^6 addresses
// `define COMPRESSED_SCOPE
`define HAS_RXUART
`define INCLUDE_CPU_RESET_LOGIC
`define LOWLOGIC_FLASH // Saves about 154 LUTs
`define USE_LITE_UART // Saves about 55 LUTs
module busmaster(i_clk, i_rst,
i_uart, o_uart_rts_n, o_uart, i_uart_cts_n,
// The SPI Flash lines
75,7 → 78,12
input i_uart, i_uart_cts_n;
output wire o_uart, o_uart_rts_n;
// SPI flash control
output wire o_qspi_cs_n, o_qspi_sck;
output wire o_qspi_cs_n;
`ifdef LOWLOGIC_FLASH
output wire [1:0] o_qspi_sck;
`else
output wire o_qspi_sck;
`endif
output wire [3:0] o_qspi_dat;
input [3:0] i_qspi_dat;
output wire [1:0] o_qspi_mod;
140,11 → 148,11
end
assign cpu_reset = btn_reset;
`else
assign cpu_reset = 1'b0;
assign cpu_reset = (watchdog_int);
`endif
 
zipbones #(CMOD_ZIPCPU_RESET_ADDRESS,ZA,6)
swic(i_clk, btn_reset, // 1'b0,
swic(i_clk, cpu_reset, // 1'b0,
// Zippys wishbone interface
wb_cyc, wb_stb, wb_we, w_zip_addr, wb_data, wb_sel,
wb_ack, wb_stall, wb_idata, wb_err,
437,7 → 445,8
wb_we, wb_data, spio_data,
o_kp_col, i_kp_row, i_btn, w_led,
keypad_int, button_int);
assign o_led = { w_led[3]|w_interrupt,w_led[2]|zip_cpu_int,w_led[1:0] };
assign o_led = { w_led[3]|w_interrupt,w_led[2]|zip_cpu_int,
w_led[1], w_led[0] };
 
//
// General purpose (sort of) I/O: (Bottom two bits robbed in each
459,17 → 468,40
//
assign uart_setup = UART_SETUP;
//
`ifdef HAS_RXUART
`ifdef USE_LITE_UART
rxuartlite #(UART_SETUP[23:0])
rcvuart(i_clk, i_uart, rx_stb, rx_data);
assign rx_break = 1'b0;
assign rx_parity_err = 1'b0;
assign rx_frame_err = 1'b0;
assign rx_ck_uart = 1'b0;
`else
rxuart #(UART_SETUP)
rcvuart(i_clk, 1'b0, uart_setup, i_uart, rx_stb, rx_data,
rx_break, rx_parity_err, rx_frame_err, rx_ck_uart);
`endif
`else
assign rx_break = 1'b0;
assign rx_parity_err = 1'b0;
assign rx_frame_err = 1'b0;
assign rx_ck_uart = 1'b0;
assign rx_stb = 1'b0;
assign rx_data = 8'h0;
`endif
//
wire tx_break, tx_busy;
reg tx_stb;
reg [7:0] tx_data;
assign tx_break = 1'b0;
`ifdef USE_LITE_UART
txuartlite #(UART_SETUP[23:0])
tcvuart(i_clk, tx_stb, tx_data, o_uart, tx_busy);
`else
txuart #(UART_SETUP)
tcvuart(i_clk, 1'b0, uart_setup, tx_break, tx_stb, tx_data,
i_uart_cts_n, o_uart, tx_busy);
`endif
 
//
// Rudimentary serial port control
487,6 → 519,7
end
else if ((tx_stb)&&(!tx_busy))
tx_stb <= 1'b0;
`ifdef HAS_RXUART
initial rx_rdy = 1'b0;
always @(posedge i_clk)
if (rx_stb)
495,11 → 528,15
begin
if((wb_stb)&&(io_sel)&&(wb_addr[3:0]==4'h7)&&(!wb_we))
rx_rdy <= rx_stb;
else if (rx_stb)
else
rx_rdy <= (rx_rdy | rx_stb);
end
assign o_uart_rts_n = (rx_rdy);
assign uart_data = { 23'h0, !rx_rdy, r_rx_data };
`else
assign o_uart_rts_n = 1'b1;
assign uart_data = 32'h00;
`endif
//
// uart_ack gets returned as part of io_ack, since that happens when
// io_sel and wb_stb are defined
513,6 → 550,15
// FLASH MEMORY CONFIGURATION ACCESS
//
`ifdef FLASH_ACCESS
`ifdef LOWLOGIC_FLASH
qflashxpress flashmem(i_clk,
wb_cyc,(wb_stb)&&(flash_sel),
wb_addr[(LGFLASHSZ-3):0],
flash_ack, flash_stall, flash_data,
o_qspi_sck, o_qspi_cs_n, o_qspi_mod, o_qspi_dat, i_qspi_dat);
 
assign flash_interrupt = 1'b0;
`else
wbqspiflash #(LGFLASHSZ) flashmem(i_clk,
wb_cyc,(wb_stb)&&(flash_sel),(wb_stb)&&(flctl_sel),wb_we,
wb_addr[(LGFLASHSZ-3):0], wb_data,
519,6 → 565,7
flash_ack, flash_stall, flash_data,
o_qspi_sck, o_qspi_cs_n, o_qspi_mod, o_qspi_dat, i_qspi_dat,
flash_interrupt);
`endif
`else
reg r_flash_ack;
initial r_flash_ack = 1'b0;
/trunk/rtl/cpu/cpudefs.v
41,7 → 41,7
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with this program. (It's in the $(ROOT)/doc directory, run make with no
// with this program. (It's in the $(ROOT)/doc directory. Run make with no
// target there if the PDF file isn't present.) If not, see
// <http://www.gnu.org/licenses/> for a copy.
//
103,12 → 103,12
// mode.
//
//
// `define OPT_DIVIDE
`define OPT_DIVIDE
//
//
//
// OPT_IMPLEMENT_FPU will (one day) control whether or not the floating point
// unit (once I have one) is built and included into the ZipCPU by default.
// unit (once I have one) is built and included into the ZipCPU by default.
// At that time, if this option is set then a parameter will be set that
// causes the floating point unit to be included. (This parameter may
// still be overridden, as with any parameter ...) If the floating point unit
122,36 → 122,11
//
//
//
// The instruction set defines an optional compressed instruction set (CIS)
// complement. These were at one time erroneously called Very Long Instruction
// Words. They are more appropriately referred to as compressed instructions.
// The compressed instruction format allows two instructions to be packed into
// the same instruction word. Some instructions can be compressed, not all.
// Compressed instructions take the same time to complete. Set OPT_CIS to
// include these double instructions as part of the instruction set. These
// instructions are designed to get more code density from the instruction set,
// and to hopefully take some pain off of the performance of the pre-fetch and
// instruction cache.
//
// These new instructions, however, also necessitate a change in the Zip
// CPU--the Zip CPU can no longer execute instructions atomically. It must
// now execute non-CIS instructions, or CIS instruction pairs, atomically.
// This logic has been added into the ZipCPU, but it has not (yet) been
// tested thoroughly.
//
// Oh, and the debugger and the simulator also need to be updated as well
// to properly handle these.
//
// `define OPT_CIS // Adds about 80 LUTs on a Spartan 6
//
//
//
//
// OPT_SINGLE_FETCH controls whether or not the prefetch has a cache, and
// OPT_SINGLE_FETCH controls whether or not the prefetch has a cache, and
// whether or not it can issue one instruction per clock. When set, the
// prefetch has no cache, and only one instruction is fetched at a time.
// This effectively sets the CPU so that only one instruction is ever
// in the pipeline at once, and hence you may think of this as a "kill
// This effectively sets the CPU so that only one instruction is ever
// in the pipeline at once, and hence you may think of this as a "kill
// pipeline" option. However, since the pipelined fetch component uses so
// much area on the FPGA, this is an important option to use in trimming down
// used area if necessary. Hence, it needs to be maintained for that purpose.
160,7 → 135,7
//
// We can either pipeline our fetches, or issue one fetch at a time. Pipelined
// fetches are more complicated and therefore use more FPGA resources, while
// single fetches will cause the CPU to stall for about 5 stalls each
// single fetches will cause the CPU to stall for about 5 stalls each
// instruction cycle, effectively reducing the instruction count per clock to
// about 0.2. However, the area cost may be worth it. Consider:
//
172,10 → 147,66
// I recommend only defining this if you "need" to, if area is tight and
// speed isn't as important. Otherwise, just leave this undefined.
//
`define OPT_SINGLE_FETCH
//`define OPT_SINGLE_FETCH // 2047 total LUTs (savings of 181 from before)
//
//
// OPT_DOUBLE_FETCH is an alternative to OPT_SINGLE_FETCH. It is designed to
// increase performance primarily when using an instruction memory which has
// one cost for a random access, and a second (lower) cost for sequential
// access. The driving example behind this implementation was flash memory
// with 34 clocks for an initial access and 16 clocks for any subsequent access,
// but SDRAM memory with 27 clocks for an initial access and 1 clock for a
// subsequent access is also a good example. Even block RAM might be a good
// example, if there were any bus delays in getting to the RAM device. Using
// OPT_DOUBLE_FETCH also increases the pipeline speed, as it allows CIS
// instructions and therefore partial pipelining. (No work is done to resolve
// pipeline conflicts past the decode stage, as is the case with full pipeline
// mode.
//
// Do not define OPT_DOUBLE_FETCH if you wish to fully pipeline the CPU. Do
// not define both OPT_DOUBLE_FETCH and OPT_SINGLE_FETCH (the ifndef below
// should prevent that).
//
//
// // COST: about 79 LUTs over and above the SINGLE_FETCH cost [2091 LUTs]
`ifndef OPT_SINGLE_FETCH
`define OPT_DOUBLE_FETCH
`endif
//
//
//
// The ZipCPU ISA defines an optional compressed instruction set (CIS)
// complement. This compressed instruction format allows two instructions to
// be packed into the same instruction word. Some instructions can be so
// compressed, although not all. Compressed instructions take the same time to
// complete--they are just compressed within memory to spare troubles with the
// prefetch. Set OPT_CIS to include these compressed instructions as part of
// the instruction set.
//
//
// // COST: about 87 LUTs
//
`define OPT_CIS
//
//
//
//
// OPT_EARLY_BRANCHING is an attempt to execute a BRA statement as early
// as possible, to avoid as many pipeline stalls on a branch as possible.
// With the OPT_TRADITIONAL_PFCACHE, BRA instructions cost only a single
// extra stall cycle, while LJMP instructions cost two (assuming the target is
// in the cache). Indeed, the result is that a BRA instruction can be used as
// the compiler's branch prediction optimizer: BRA's barely stall, while
// conditional branches will always suffer about 4 stall cycles or so.
//
// I recommend setting this flag, so as to turn early branching on---if you
// have the LUTs available to afford it.
//
// `define OPT_EARLY_BRANCHING
//
//
//
//
// The next several options are pipeline optimization options. They make no
// sense in a single instruction fetch mode, hence we #ifndef them so they
// are only defined if we are in a full pipelined mode (i.e. OPT_SINGLE_FETCH
182,12 → 213,13
// is not defined).
//
`ifndef OPT_SINGLE_FETCH
`ifndef OPT_DOUBLE_FETCH
//
//
//
// OPT_PIPELINED is the natural result and opposite of using the single
// OPT_PIPELINED is the natural result and opposite of using the single
// instruction fetch unit. If you are not using that unit, the ZipCPU will
// be pipelined. The option is defined here more for readability than
// be pipelined. The option is defined here more for readability than
// anything else, since OPT_PIPELINED makes more sense than OPT_SINGLE_FETCH,
// well ... that and it does a better job of explaining what is going on.
//
210,21 → 242,7
//
//
//
// OPT_EARLY_BRANCHING is an attempt to execute a BRA statement as early
// as possible, to avoid as many pipeline stalls on a branch as possible.
// It's not tremendously successful yet--BRA's still suffer stalls,
// but I intend to keep working on this approach until the number of stalls
// gets down to one or (ideally) zero. (With the OPT_TRADITIONAL_PFCACHE, this
// gets down to a single stall cycle ...) That way a "BRA" can be used as the
// compiler's branch prediction optimizer: BRA's barely stall, while branches
// on conditions will always suffer about 4 stall cycles or so.
//
// I recommend setting this flag, so as to turn early branching on.
//
`define OPT_EARLY_BRANCHING
//
//
//
// OPT_PIPELINED_BUS_ACCESS controls whether or not LOD/STO instructions
// can take advantaged of pipelined bus instructions. To be eligible, the
// operations must be identical (cannot pipeline loads and stores, just loads
243,8 → 261,7
//
//
//
//
//
`endif // OPT_DOUBLE_FETCH
`endif // OPT_SINGLE_FETCH
//
//
/trunk/rtl/cpu/cpuops.v
128,8 → 128,8
assign mpy_result = r_mpy_a_input * r_mpy_b_input;
assign mpybusy = 1'b0;
 
reg mpypipe;
initial mpypipe = 1'b0;
reg mpypipe;
always @(posedge i_clk)
if (i_rst)
mpypipe <= 1'b0;
205,6 → 205,7
reg [2:0] mpypipe;
 
// First clock, latch in the inputs
initial mpypipe = 3'b0;
always @(posedge i_clk)
begin
// mpypipe indicates we have a multiply in the
/trunk/rtl/cpu/dblfetch.v
0,0 → 1,232
////////////////////////////////////////////////////////////////////////////////
//
// Filename: dblfetch.v
//
// Project: Zip CPU -- a small, lightweight, RISC CPU soft core
//
// Purpose: This is one step beyond the simplest instruction fetch,
// prefetch.v. dblfetch.v uses memory pipelining to fetch two
// instruction words in one cycle, figuring that the unpipelined CPU can't
// go through both at once, but yet recycles itself fast enough for the
// next instruction that would follow. It is designed to be a touch
// faster than the single instruction prefetch, although not as fast as
// the prefetch and cache found elsewhere.
//
// There are some gotcha's in this logic, however. For example, it's
// illegal to switch devices mid-transaction, since the second device
// might have different timing. I.e. the first device might take 8
// clocks to create an ACK, and the second device might take 2 clocks, the
// acks might therefore come on top of each other, or even out of order.
// But ... in order to keep logic down, we keep track of the PC in the
// o_wb_addr register. Hence, this register gets changed on any i_new_pc.
// The i_pc value associated with i_new_pc will only be valid for one
// clock, hence we can't wait to change. To keep from violating the WB
// rule, therefore, we *must* immediately stop requesting any transaction,
// and then terminate the bus request as soon as possible.
//
// This has consequences in terms of logic used, leaving this routine
// anything but simple--even though the number of wires affected by
// this is small (o_wb_cyc, o_wb_stb, and last_ack).
//
//
// Creator: Dan Gisselquist, Ph.D.
// Gisselquist Technology, LLC
//
////////////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2017, Gisselquist Technology, LLC
//
// This program is free software (firmware): you can redistribute it and/or
// modify it under the terms of the GNU General Public License as published
// by the Free Software Foundation, either version 3 of the License, or (at
// your option) any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with this program. (It's in the $(ROOT)/doc directory. Run make with no
// target there if the PDF file isn't present.) If not, see
// <http://www.gnu.org/licenses/> for a copy.
//
// License: GPL, v3, as defined and found on www.gnu.org,
// http://www.gnu.org/licenses/gpl.html
//
//
////////////////////////////////////////////////////////////////////////////////
//
//
module dblfetch(i_clk, i_rst, i_new_pc, i_clear_cache,
i_stall_n, i_pc, o_i, o_pc, o_v,
o_wb_cyc, o_wb_stb, o_wb_we, o_wb_addr, o_wb_data,
i_wb_ack, i_wb_stall, i_wb_err, i_wb_data,
o_illegal);
parameter ADDRESS_WIDTH=32, AUX_WIDTH = 1;
localparam AW=ADDRESS_WIDTH;
input i_clk, i_rst, i_new_pc, i_clear_cache,
i_stall_n;
input [(AW-1):0] i_pc;
output reg [31:0] o_i;
output reg [(AW-1):0] o_pc;
output wire o_v;
// Wishbone outputs
output reg o_wb_cyc, o_wb_stb;
output wire o_wb_we;
output reg [(AW-1):0] o_wb_addr;
output wire [31:0] o_wb_data;
// And return inputs
input i_wb_ack, i_wb_stall, i_wb_err;
input [31:0] i_wb_data;
// And ... the result if we got an error
output reg o_illegal;
 
assign o_wb_we = 1'b0;
assign o_wb_data = 32'h0000;
 
reg last_ack, last_stb, invalid_bus_cycle;
 
reg [31:0] cache [0:1];
reg cache_read_addr, cache_write_addr;
reg [1:0] cache_valid;
 
initial o_wb_cyc = 1'b0;
initial o_wb_stb = 1'b0;
always @(posedge i_clk)
if ((i_rst)||(i_wb_err))
begin
o_wb_cyc <= 1'b0;
o_wb_stb <= 1'b0;
// last_stb <= 1'b0;
// last_ack <= 1'b0;
end else if (o_wb_cyc)
begin
if ((o_wb_stb)&&(!i_wb_stall))
begin
// last_stb <= 1'b1;
o_wb_stb <= !last_stb;
end
// if (i_wb_ack)
// last_ack <= 1'b1;
if ((i_new_pc)||(invalid_bus_cycle))
o_wb_stb <= 1'b0;
 
if ((i_wb_ack)&&(
// Relase the bus on the second ack
(last_ack)
// Or on the first ACK, if we've been told
// we have an invalid bus cycle
||((o_wb_stb)&&(i_wb_stall)&&(last_stb)&&(
(i_new_pc)||(invalid_bus_cycle)))
))
begin
o_wb_cyc <= 1'b0;
o_wb_stb <= 1'b0;
end
 
if ((!last_stb)&&(i_wb_stall)&&((i_new_pc)||(invalid_bus_cycle)))
// Also release the bus with no acks, if we
// haven't made any requests
begin
o_wb_cyc <= 1'b0;
o_wb_stb <= 1'b0;
end
end else if ((invalid_bus_cycle)
||((o_v)&&(i_stall_n)&&(cache_read_addr))) // Initiate a bus cycle
begin
o_wb_cyc <= 1'b1;
o_wb_stb <= 1'b1;
// last_stb <= 1'b0;
// last_ack <= 1'b0;
end
 
initial last_stb = 1'b0;
always @(posedge i_clk)
if ((o_wb_cyc)&&(o_wb_stb)&&(!i_wb_stall))
last_stb <= 1'b1;
else if (!o_wb_cyc)
last_stb <= 1'b0;
 
initial last_ack = 1'b0;
always @(posedge i_clk)
if ((o_wb_cyc)&&(i_wb_ack))
last_ack <= 1'b1;
else if ((o_wb_cyc)&&(o_wb_stb)&&(i_wb_stall)&&(
(i_new_pc)||(invalid_bus_cycle)))
last_ack <= 1'b1;
else if ((o_wb_cyc)&&(o_wb_stb)&&(!i_wb_stall)&&(!last_stb)&&(
(i_new_pc)||(invalid_bus_cycle)))
last_ack <= 1'b1;
else if (!o_wb_cyc)
last_ack <= 1'b0;
 
initial invalid_bus_cycle = 1'b0;
always @(posedge i_clk)
if (i_rst)
invalid_bus_cycle <= 1'b0;
else if ((i_new_pc)||(i_clear_cache))
invalid_bus_cycle <= 1'b1;
else if (!o_wb_cyc)
invalid_bus_cycle <= 1'b0;
 
initial o_wb_addr = {(AW){1'b1}};
always @(posedge i_clk)
if (i_new_pc)
o_wb_addr <= i_pc;
else if ((o_wb_stb)&&(!i_wb_stall)&&(!invalid_bus_cycle))
o_wb_addr <= o_wb_addr + 1'b1;
 
initial cache_write_addr = 1'b0;
always @(posedge i_clk)
if (!o_wb_cyc)
cache_write_addr <= 1'b0;
else if ((o_wb_cyc)&&(i_wb_ack))
cache_write_addr <= cache_write_addr + 1'b1;
 
always @(posedge i_clk)
if ((o_wb_cyc)&&(i_wb_ack))
cache[cache_write_addr] <= i_wb_data;
 
initial cache_read_addr = 1'b0;
always @(posedge i_clk)
if ((i_new_pc)||(invalid_bus_cycle)
||((o_v)&&(cache_read_addr)&&(i_stall_n)))
cache_read_addr <= 1'b0;
else if ((o_v)&&(i_stall_n))
cache_read_addr <= 1'b1;
 
always @(posedge i_clk)
if ((i_new_pc)||(invalid_bus_cycle))
cache_valid <= 2'b00;
else begin
if ((o_v)&&(i_stall_n))
cache_valid[cache_read_addr] <= 1'b0;
if ((o_wb_cyc)&&(i_wb_ack))
cache_valid[cache_write_addr] <= 1'b1;
end
 
initial o_i = {(32){1'b1}};
always @(posedge i_clk)
if ((i_stall_n)&&(o_wb_cyc)&&(i_wb_ack))
o_i <= i_wb_data;
else
o_i <= cache[cache_read_addr];
 
initial o_pc = 0;
always @(posedge i_clk)
if (i_new_pc)
o_pc <= i_pc;
else if ((o_v)&&(i_stall_n))
o_pc <= o_pc + 1'b1;
 
assign o_v = cache_valid[cache_read_addr];
 
initial o_illegal = 1'b0;
always @(posedge i_clk)
if ((o_wb_cyc)&&(i_wb_err))
o_illegal <= 1'b1;
else if ((!o_wb_cyc)&&((i_new_pc)||(invalid_bus_cycle)))
o_illegal <= 1'b0;
 
endmodule
/trunk/rtl/cpu/div.v
121,12 → 121,13
 
reg r_sign, pre_sign, r_z, r_c, last_bit;
reg [(LGBW-1):0] r_bit;
 
reg zero_divisor;
initial zero_divisor = 1'b0;
always @(posedge i_clk)
zero_divisor <= (r_divisor == 0)&&(r_busy);
 
// The Divide logic begins with r_busy. We use r_busy to determine
// whether or not the divide is in progress, vs being complete.
// Here, we clear r_busy on any reset and set it on i_wr (the request
// do to a divide). The divide ends when we are on the last bit,
// or equivalently when we discover we are dividing by zero.
initial r_busy = 1'b0;
always @(posedge i_clk)
if (i_rst)
136,6 → 137,12
else if ((last_bit)||(zero_divisor))
r_busy <= 1'b0;
 
// o_busy is very similar to r_busy, save for some key differences.
// Primary among them is that o_busy needs to (possibly) be true
// for an extra clock after r_busy clears. This would be that extra
// clock where we negate the result (assuming a signed divide, and that
// the result is supposed to be negative.) Otherwise, the two are
// identical.
initial o_busy = 1'b0;
always @(posedge i_clk)
if (i_rst)
147,19 → 154,50
else if (~r_busy)
o_busy <= 1'b0;
 
// If we are asked to divide by zero, we need to halt. The sooner
// we halt and report the error, the better. Hence, here we look
// for a zero divisor while being busy. The always above us will then
// look at this and halt a divide in the middle if we are trying to
// divide by zero.
//
// Note that this works off of the 2BW-1 length vector. If we can
// simplify that, it should simplify our logic as well.
initial zero_divisor = 1'b0;
always @(posedge i_clk)
if ((i_rst)||(i_wr))
// zero_divisor <= (r_divisor == 0)&&(r_busy);
if (i_rst)
zero_divisor <= 1'b0;
else if (i_wr)
zero_divisor <= (i_denominator == 0);
else if (!r_busy)
zero_divisor <= 1'b0;
 
// o_valid is part of the ZipCPU protocol. It will be set to true
// anytime our answer is valid and may be used by the calling module.
// Indeed, the ZipCPU will halt (and ignore us) once the i_wr has been
// set until o_valid gets set.
//
// Here, we clear o_valid on a reset, and any time we are on the last
// bit while busy (provided the sign is zero, or we are dividing by
// zero). Since o_valid is self-clearing, we don't need to clear
// it on an i_wr signal.
initial o_valid = 1'b0;
always @(posedge i_clk)
if (i_rst)
o_valid <= 1'b0;
else if (r_busy)
begin
if ((last_bit)||(zero_divisor))
o_valid <= (zero_divisor)||(~r_sign);
o_valid <= (zero_divisor)||(!r_sign);
end else if (r_sign)
begin
o_valid <= (~zero_divisor); // 1'b1;
o_valid <= (!zero_divisor); // 1'b1;
end else
o_valid <= 1'b0;
 
// Division by zero error reporting. Anytime we detect a zero divisor,
// we set our output error, and then hold it until we are valid and
// everything clears.
initial o_err = 1'b0;
always @(posedge i_clk)
if((i_rst)||(o_valid))
169,91 → 207,145
else
o_err <= 1'b0;
 
// r_bit
//
// Keep track of which "bit" of our divide we are on. This number
// ranges from 31 down to zero. On any write, we set ourselves to
// 5'h1f. Otherwise, while we are busy (but not within the pre-sign
// adjustment stage), we subtract one from our value on every clock.
always @(posedge i_clk)
if ((r_busy)&&(!pre_sign))
r_bit <= r_bit + {(LGBW){1'b1}};
else
r_bit <= {(LGBW){1'b1}};
 
// last_bit
//
// This logic replaces a lot of logic that was inside our giant state
// machine with ... something simpler. In particular, we'll use this
// logic to determine we are processing our last bit. The only trick
// is, this bit needs to be set whenever (r_busy) and (r_bit == 0),
// hence we need to set on (r_busy) and (r_bit == 1) so as to be set
// when (r_bit == 0).
initial last_bit = 1'b0;
always @(posedge i_clk)
if ((i_wr)||(pre_sign)||(i_rst))
if (r_busy)
last_bit <= (r_bit == {{(LGBW-1){1'b0}},1'b1});
else
last_bit <= 1'b0;
else if (r_busy)
last_bit <= (r_bit == {{(LGBW-1){1'b0}},1'b1});
 
// pre_sign
//
// This is part of the state machine. pre_sign indicates that we need
// a extra clock to take the absolute value of our inputs. It need only
// be true for the one clock, and then it must clear itself.
initial pre_sign = 1'b0;
always @(posedge i_clk)
// if (i_rst) r_busy <= 1'b0;
// else
if (i_wr)
begin
//
// Set our values upon an initial command. Here's
// where we come in and start.
//
// r_busy <= 1'b1;
//
o_quotient <= 0;
r_bit <= {(LGBW){1'b1}};
r_divisor <= { i_denominator, {(BW-1){1'b0}} };
r_dividend <= i_numerator;
r_sign <= 1'b0;
pre_sign <= i_signed;
else
pre_sign <= 1'b0;
 
// As a result of our operation, we need to set the flags. The most
// difficult of these is the "Z" flag indicating that the result is
// zero. Here, we'll use the same logic that sets the low-order
// bit to clear our zero flag, and leave the zero flag set in all
// other cases. Well ... not quite. If we need to flip the sign of
// our value, then we can't quite clear the zero flag ... yet.
always @(posedge i_clk)
if((r_busy)&&(r_divisor[(2*BW-2):(BW)] == 0)&&(!diff[BW]))
// If we are busy, the upper bits of our divisor are
// zero (i.e., we got the shift right), and the top
// (carry) bit of the difference is zero (no overflow),
// then we could subtract our divisor from our dividend
// and hence we add a '1' to the quotient, while setting
// the zero flag to false.
r_z <= 1'b0;
else if ((!r_busy)&&(!r_sign))
r_z <= 1'b1;
end else if (pre_sign)
 
// r_dividend
// This is initially the numerator. On a signed divide, it then becomes
// the absolute value of the numerator. We'll subtract from this value
// the divisor shifted as appropriate for every output bit we are
// looking for--just as with traditional long division.
always @(posedge i_clk)
if (pre_sign)
begin
//
// Note that we only come in here, for one clock, if
// our initial value may have been signed. If we are
// doing an unsigned divide, we then skip this step.
//
r_sign <= ((r_divisor[(2*BW-2)])^(r_dividend[(BW-1)]));
// Negate our dividend if necessary so that it becomes
// a magnitude only value
// If we are doing a signed divide, then take the
// absolute value of the dividend
if (r_dividend[BW-1])
r_dividend <= -r_dividend;
// Do the same with the divisor--rendering it into
// a magnitude only.
// The begin/end block is important so we don't lose
// the fact that on an else we don't do anything.
end else if((r_busy)&&(r_divisor[(2*BW-2):(BW)]==0)&&(!diff[BW]))
// This is the condition whereby we set a '1' in our
// output quotient, and we subtract the (current)
// divisor from our dividend. (The difference is
// already kept in the diff vector above.)
r_dividend <= diff[(BW-1):0];
else if (!r_busy)
// Once we are done, and r_busy is no longer high, we'll
// always accept new values into our dividend. This
// guarantees that, when i_wr is set, the new value
// is already set as desired.
r_dividend <= i_numerator;
 
initial r_divisor = 0;
always @(posedge i_clk)
if (pre_sign)
begin
if (r_divisor[(2*BW-2)])
r_divisor[(2*BW-2):(BW-1)] <= -r_divisor[(2*BW-2):(BW-1)];
//
// We only do this stage for a single clock, so go on
// with the rest of the divide otherwise.
pre_sign <= 1'b0;
r_divisor[(2*BW-2):(BW-1)]
<= -r_divisor[(2*BW-2):(BW-1)];
end else if (r_busy)
r_divisor <= { 1'b0, r_divisor[(2*BW-2):1] };
else
r_divisor <= { i_denominator, {(BW-1){1'b0}} };
 
// r_sign
// is a flag for our state machine control(s). r_sign will be set to
// true any time we are doing a signed divide and the result must be
// negative. In that case, we take a final logic stage at the end of
// the divide to negate the output. This flag is what tells us we need
// to do that. r_busy will be true during the divide, then when r_busy
// goes low, r_sign will be checked, then the idle/reset stage will have
// been reached. For this reason, we cannot set r_sign unless we are
// up to something.
initial r_sign = 1'b0;
always @(posedge i_clk)
if (pre_sign)
r_sign <= ((r_divisor[(2*BW-2)])^(r_dividend[(BW-1)]));
else if (r_busy)
r_sign <= (r_sign)&&(!zero_divisor);
else
r_sign <= 1'b0;
 
always @(posedge i_clk)
if (r_busy)
begin
// While the divide is taking place, we examine each bit
// in turn here.
//
r_bit <= r_bit + {(LGBW){1'b1}}; // r_bit = r_bit - 1;
r_divisor <= { 1'b0, r_divisor[(2*BW-2):1] };
if (|r_divisor[(2*BW-2):(BW)])
o_quotient <= { o_quotient[(BW-2):0], 1'b0 };
if ((r_divisor[(2*BW-2):(BW)] == 0)&&(!diff[BW]))
begin
end else if (diff[BW])
begin
//
// diff = r_dividend - r_divisor[(BW-1):0];
//
// If this value was negative, there wasn't
// enough value in the dividend to support
// pulling off a bit. We'll move down a bit
// therefore and try again.
//
end else begin
//
// Put a '1' into our output accumulator.
// Subtract the divisor from the dividend,
// and then move on to the next bit
//
r_dividend <= diff[(BW-1):0];
o_quotient[r_bit[(LGBW-1):0]] <= 1'b1;
r_z <= 1'b0;
o_quotient[0] <= 1'b1;
end
r_sign <= (r_sign)&&(~zero_divisor);
end else if (r_sign)
begin
r_sign <= 1'b0;
o_quotient <= -o_quotient;
end
else
o_quotient <= 0;
 
// Set Carry on an exact divide
wire w_n;
// Perhaps nothing uses this, but ... well, I suppose we could remove
// this logic eventually, just ... not yet.
always @(posedge i_clk)
r_c <= (r_busy)&&((diff == 0)||(r_dividend == 0));
 
// The last flag: Negative. This flag is set assuming that the result
// of the divide was negative (i.e., the high order bit is set). This
// will also be true of an unsigned divide--if the high order bit is
// ever set upon completion. Indeed, you might argue that there's no
// logic involved.
wire w_n;
assign w_n = o_quotient[(BW-1)];
 
assign o_flags = { 1'b0, w_n, r_c, r_z };
/trunk/rtl/cpu/idecode.v
55,6 → 55,7
module idecode(i_clk, i_rst, i_ce, i_stalled,
i_instruction, i_gie, i_pc, i_pf_valid,
i_illegal,
o_valid,
o_phase, o_illegal,
o_pc, o_gie,
o_dcdR, o_dcdA, o_dcdB, o_I, o_zI,
72,7 → 73,7
input i_gie;
input [(AW-1):0] i_pc;
input i_pf_valid, i_illegal;
output wire o_phase;
output wire o_valid, o_phase;
output reg o_illegal;
output reg [AW:0] o_pc;
output reg o_gie;
105,13 → 106,14
 
 
wire [4:0] w_op;
wire w_ldi, w_mov, w_cmptst, w_ldilo, w_ALU, w_brev, w_noop;
wire w_ldi, w_mov, w_cmptst, w_ldilo, w_ALU, w_brev,
w_noop, w_lock;
wire [4:0] w_dcdR, w_dcdB, w_dcdA;
wire w_dcdR_pc, w_dcdR_cc;
wire w_dcdA_pc, w_dcdA_cc;
wire w_dcdB_pc, w_dcdB_cc;
wire [3:0] w_cond;
wire w_wF, w_mem, w_sto, w_lod, w_div, w_fpu;
wire w_wF, w_mem, w_sto, w_div, w_fpu;
wire w_wR, w_rA, w_rB, w_wR_n;
wire w_ljmp, w_ljmp_dly, w_cis_ljmp;
wire [31:0] iword;
217,9 → 219,21
// If the result register is either CC or PC, and this would otherwise
// be a floating point instruction with floating point opcode of 0,
// then this is a NOOP.
assign w_lock = (!iword[31])&&(w_op[4:0]==5'h1d)&&(
((IMPLEMENT_FPU>0)&&(w_dcdR[3:1]==3'h7))
||(IMPLEMENT_FPU==0));
`ifdef OPT_PIPELINED
assign w_noop = (!iword[31])&&(w_op[4:0] == 5'h1f)&&(
((IMPLEMENT_FPU>0)&&(w_dcdR[3:1] == 3'h7))
||(IMPLEMENT_FPU==0));
`else
// Allow w_lock's to set the w_noop bit in the case of no pipelining
assign w_noop = (!iword[31])
&&((w_op[4:0] == 5'h1f)||(w_op[4:0]==5'h1d))
&&(
((IMPLEMENT_FPU>0)&&(w_dcdR[3:1] == 3'h7))
||(IMPLEMENT_FPU==0));
`endif
 
// dcdB - What register is used in the opB?
//
252,7 → 266,6
// 1 LUT
assign w_mem = (w_cis_op[4:3] == 2'b10)&&(w_cis_op[2:1] !=2'b00);
assign w_sto = (w_mem)&&( w_cis_op[0]);
assign w_lod = (w_mem)&&(!w_cis_op[0]);
// 1 LUT
assign w_div = (!iword[31])&&(w_op[4:1] == 4'h7);
// 2 LUTs
387,9 → 400,9
&&(w_dcdR[3:1]==3'h7)
&&(
(w_cis_op[2:0] != 3'h4) // BREAK
`ifdef OPT_PIPELINED
// `ifdef OPT_PIPELINED
&&(w_cis_op[2:0] != 3'h5) // LOCK
`endif
// `endif
// SIM instructions are always illegal
&&(w_cis_op[2:0] != 3'h7))) // NOOP
o_illegal <= 1'b1;
403,12 → 416,16
if (!o_phase)
o_gie<= i_gie;
 
if ((iword[31])&&(!o_phase))
o_pc <= { i_pc, 1'b1 };
else if ((iword[31])&&(i_pf_valid))
o_pc <= { i_pc, 1'b0 };
else
if (iword[31])
begin
if (o_phase)
o_pc <= o_pc + 1'b1;
else if (i_pf_valid)
o_pc <= { i_pc, 1'b1 };
end else begin
// The normal, non-CIS case
o_pc <= { i_pc + 1'b1, 1'b0 };
end
`else
o_gie<= i_gie;
o_pc <= { i_pc + 1'b1, 1'b0 };
460,9 → 477,7
((IMPLEMENT_FPU>0)&&(w_dcdR[3:1]==3'h7))
||(IMPLEMENT_FPU==0));
`ifdef OPT_PIPELINED
r_lock <= (!iword[31])&&(w_op[4:0]==5'h1d)&&(
((IMPLEMENT_FPU>0)&&(w_dcdR[3:1]==3'h7))
||(IMPLEMENT_FPU==0));
r_lock <= w_lock;
`endif
`ifdef OPT_CIS
r_nxt_half <= { iword[31], iword[14:0] };
588,14 → 603,15
always @(posedge i_clk)
if (i_rst)
r_valid <= 1'b0;
else if ((i_ce)&&(o_ljmp))
else if (i_ce)
r_valid <= ((i_pf_valid)||(o_phase)||(i_illegal))
&&(!o_ljmp)&&(!o_early_branch);
else if (!i_stalled)
r_valid <= 1'b0;
else if ((i_ce)&&(i_pf_valid))
r_valid <= 1'b1;
else if (~i_stalled)
r_valid <= 1'b0;
 
assign o_valid = r_valid;
 
 
assign o_I = { {(32-22){r_I[22]}}, r_I[21:0] };
 
endmodule
/trunk/rtl/cpu/memops.v
163,10 → 163,10
 
initial o_valid = 1'b0;
always @(posedge i_clk)
o_valid <= ((o_wb_cyc_gbl)||(o_wb_cyc_lcl))&&(i_wb_ack)&&(~o_wb_we);
o_valid <= (!i_rst)&&((o_wb_cyc_gbl)||(o_wb_cyc_lcl))&&(i_wb_ack)&&(~o_wb_we);
initial o_err = 1'b0;
always @(posedge i_clk)
o_err <= ((o_wb_cyc_gbl)||(o_wb_cyc_lcl))&&(i_wb_err);
o_err <= (!i_rst)&&((o_wb_cyc_gbl)||(o_wb_cyc_lcl))&&(i_wb_err);
assign o_busy = (o_wb_cyc_gbl)||(o_wb_cyc_lcl);
 
always @(posedge i_clk)
/trunk/rtl/cpu/prefetch.v
53,18 → 53,18
// mode which this prefetch does not support. In non--pipelined mode, the
// flash will require (16+6+6)*2 = 56 clocks plus 16 clocks per word read,
// or 72 clocks to fetch one instruction.
module prefetch(i_clk, i_rst, i_ce, i_stalled_n, i_pc, i_aux,
o_i, o_pc, o_aux, o_valid, o_illegal,
module prefetch(i_clk, i_rst, i_new_pc, i_clear_cache, i_stalled_n, i_pc,
o_i, o_pc, o_valid, o_illegal,
o_wb_cyc, o_wb_stb, o_wb_we, o_wb_addr, o_wb_data,
i_wb_ack, i_wb_stall, i_wb_err, i_wb_data);
parameter ADDRESS_WIDTH=32, AUX_WIDTH = 1, AW=ADDRESS_WIDTH;
input i_clk, i_rst, i_ce, i_stalled_n;
parameter ADDRESS_WIDTH=32;
localparam AW=ADDRESS_WIDTH;
input i_clk, i_rst, i_new_pc, i_clear_cache,
i_stalled_n;
input [(AW-1):0] i_pc;
input [(AUX_WIDTH-1):0] i_aux;
output reg [31:0] o_i;
output reg [(AW-1):0] o_pc;
output reg [(AUX_WIDTH-1):0] o_aux;
output reg o_valid, o_illegal;
output wire [(AW-1):0] o_pc;
output reg o_valid;
// Wishbone outputs
output reg o_wb_cyc, o_wb_stb;
output wire o_wb_we;
71,8 → 71,9
output reg [(AW-1):0] o_wb_addr;
output wire [31:0] o_wb_data;
// And return inputs
input i_wb_ack, i_wb_stall, i_wb_err;
input [31:0] i_wb_data;
input i_wb_ack, i_wb_stall, i_wb_err;
input [31:0] i_wb_data;
output reg o_illegal;
 
assign o_wb_we = 1'b0;
assign o_wb_data = 32'h0000;
84,47 → 85,54
initial o_wb_stb = 1'b0;
initial o_wb_addr= 0;
always @(posedge i_clk)
if ((i_rst)||(i_wb_ack))
if ((i_rst)||(i_wb_ack)||(i_wb_err))
begin
o_wb_cyc <= 1'b0;
o_wb_stb <= 1'b0;
end else if ((i_ce)&&(~o_wb_cyc)) // Initiate a bus cycle
begin
end else if ((!o_wb_cyc)&&((i_stalled_n)||(!o_valid)))
begin // Initiate a bus cycle
o_wb_cyc <= 1'b1;
o_wb_stb <= 1'b1;
end else if (o_wb_cyc) // Independent of ce
begin
if ((o_wb_cyc)&&(o_wb_stb)&&(~i_wb_stall))
if (~i_wb_stall)
o_wb_stb <= 1'b0;
if (i_wb_ack)
o_wb_cyc <= 1'b0;
end
 
reg invalid;
initial invalid = 1'b0;
always @(posedge i_clk)
if (i_rst) // Set the address to guarantee the result is invalid
o_wb_addr <= {(AW){1'b1}};
else if ((i_ce)&&(~o_wb_cyc))
if (!o_wb_cyc)
invalid <= 1'b0;
else if ((i_new_pc)||(i_clear_cache))
invalid <= (!o_wb_stb);
 
always @(posedge i_clk)
if (i_new_pc)
o_wb_addr <= i_pc;
else if ((!o_wb_cyc)&&(i_stalled_n)&&(!invalid))
o_wb_addr <= o_wb_addr + 1'b1;
 
always @(posedge i_clk)
if ((o_wb_cyc)&&(i_wb_ack))
o_aux <= i_aux;
always @(posedge i_clk)
if ((o_wb_cyc)&&(i_wb_ack))
if (i_wb_ack)
o_i <= i_wb_data;
always @(posedge i_clk)
if ((o_wb_cyc)&&(i_wb_ack))
o_pc <= o_wb_addr;
 
initial o_valid = 1'b0;
initial o_illegal = 1'b0;
always @(posedge i_clk)
if ((o_wb_cyc)&&(i_wb_ack))
if (i_rst)
begin
o_valid <= (i_pc == o_wb_addr)&&(~i_wb_err);
o_illegal <= (i_wb_err)&&(i_pc == o_wb_addr);
end else if (i_stalled_n)
o_valid <= 1'b0;
o_illegal <= 1'b0;
end else if ((o_wb_cyc)&&(i_wb_ack))
begin
o_valid <= (!i_wb_err)&&(!invalid);
o_illegal <= ( i_wb_err)&&(!invalid);
end else if ((i_stalled_n)||(i_clear_cache))
begin
o_valid <= 1'b0;
o_illegal <= 1'b0;
end
 
assign o_pc = o_wb_addr;
endmodule
/trunk/rtl/cpu/zipcpu.v
45,7 → 45,7
// as:
//
//
// assign (n)_ce = (n-1)_valid && (~(n)_stall)
// assign (n)_ce = (n-1)_valid && (!(n)_stall)
//
//
// always @(posedge i_clk)
261,7 → 261,6
dcd_ALU, dcd_M, dcd_DIV, dcd_FP,
dcd_wF, dcd_gie, dcd_break, dcd_lock,
dcd_pipe, dcd_ljmp;
reg r_dcd_valid;
wire dcd_valid;
wire [AW:0] dcd_pc /* verilator public_flat */;
wire [31:0] dcd_I;
298,10 → 297,6
wire [7:0] op_F;
wire op_ce, op_phase, op_pipe, op_change_data_ce;
// Some pipeline control wires
`ifdef OPT_PIPELINED
reg op_A_alu, op_A_mem;
reg op_B_alu, op_B_mem;
`endif
reg op_illegal;
wire op_break;
wire op_lock;
347,8 → 342,8
wire [31:0] div_result;
wire [3:0] div_flags;
 
assign div_ce = (master_ce)&&(~clear_pipeline)&&(op_valid_div)
&&(~mem_rdbusy)&&(~div_busy)&&(~fpu_busy)
assign div_ce = (master_ce)&&(!clear_pipeline)&&(op_valid_div)
&&(!mem_rdbusy)&&(!div_busy)&&(!fpu_busy)
&&(set_cond);
 
wire fpu_ce, fpu_error, fpu_busy, fpu_valid;
355,8 → 350,8
wire [31:0] fpu_result;
wire [3:0] fpu_flags;
 
assign fpu_ce = (master_ce)&&(~clear_pipeline)&&(op_valid_fpu)
&&(~mem_rdbusy)&&(~div_busy)&&(~fpu_busy)
assign fpu_ce = (master_ce)&&(!clear_pipeline)&&(op_valid_fpu)
&&(!mem_rdbusy)&&(!div_busy)&&(!fpu_busy)
&&(set_cond);
 
wire adf_ce_unconditional;
379,7 → 374,7
//
// MASTER: clock enable.
//
assign master_ce = ((~i_halt)||(alu_phase))&&(~o_break)&&(~sleep);
assign master_ce = ((!i_halt)||(alu_phase))&&(!o_break)&&(!sleep);
 
 
//
392,15 → 387,11
//
// PIPELINE STAGE #2 :: Instruction Decode
// Calculate stall conditions
assign dcd_ce = ((~dcd_valid)||(~dcd_stalled))&&(~clear_pipeline);
 
`ifdef OPT_PIPELINED
assign dcd_stalled = (dcd_valid)&&(op_stall);
`else
// If not pipelined, there will be no op_valid_ anything, and the
// op_stall will be false, dcd_X_stall will be false, thus we can simply
// do a ...
assign dcd_stalled = 1'b0;
`else // Not pipelined -- either double or single fetch
assign dcd_stalled = (dcd_valid)&&(op_stall);
`endif
//
// PIPELINE STAGE #3 :: Read Operands
418,7 → 409,7
assign op_stall = (op_valid)&&( // Only stall if we're loaded w/validins
// Stall if we're stopped, and not allowed to execute
// an instruction
// (~master_ce) // Already captured in alu_stall
// (!master_ce) // Already captured in alu_stall
//
// Stall if going into the ALU and the ALU is stalled
// i.e. if the memory is busy, or we are single
452,6 → 443,11
);
assign op_ce = ((dcd_valid)||(dcd_illegal)||(dcd_early_branch))&&(!op_stall);
 
`else
assign op_stall = (alu_busy)||(div_busy)||(fpu_busy)||(wr_reg_ce)
||(mem_busy)||(op_valid)||(!master_ce)||(wr_flags_ce);
assign op_ce = ((dcd_valid)||(dcd_illegal)||(dcd_early_branch))&&(!op_stall);
`endif
 
// BUT ... op_ce is too complex for many of the data operations. So
// let's make their circuit enable code simpler. In particular, if
458,12 → 454,7
// op_ doesn't need to be preserved, we can change it all we want
// ... right? The clear_pipeline code, for example, really only needs
// to determine whether op_valid is true.
assign op_change_data_ce = (~op_stall);
`else
assign op_stall = (op_valid)&&(~master_ce);
assign op_ce = ((dcd_valid)||(dcd_illegal)||(dcd_early_branch))&&(~clear_pipeline);
assign op_change_data_ce = 1'b1;
`endif
assign op_change_data_ce = (!op_stall);
 
//
// PIPELINE STAGE #4 :: ALU / Memory
479,16 → 470,16
// through the ALU. Break instructions are not allowed through
// the ALU.
`ifdef OPT_PIPELINED
assign alu_stall = (((~master_ce)||(mem_rdbusy)||(alu_busy))&&(op_valid_alu)) //Case 1&2
assign alu_stall = (((!master_ce)||(mem_rdbusy)||(alu_busy))&&(op_valid_alu)) //Case 1&2
||(prelock_stall)
||((op_valid)&&(op_break))
||(wr_reg_ce)&&(wr_write_cc)
||(div_busy)||(fpu_busy);
assign alu_ce = (master_ce)&&(op_valid_alu)&&(~alu_stall)
&&(~clear_pipeline);
assign alu_ce = (master_ce)&&(op_valid_alu)&&(!alu_stall)
&&(!clear_pipeline);
`else
assign alu_stall = (op_valid_alu)&&((~master_ce)||(op_break));
assign alu_ce = (master_ce)&&(op_valid_alu)&&(~alu_stall)&&(~clear_pipeline);
assign alu_stall = (op_valid_alu)&&((!master_ce)||(op_break));
assign alu_ce = (master_ce)&&(op_valid_alu)&&(!alu_stall)&&(!clear_pipeline);
`endif
//
 
496,26 → 487,14
// Note: if you change the conditions for mem_ce, you must also change
// alu_pc_valid.
//
`ifdef OPT_PIPELINED
assign mem_ce = (master_ce)&&(op_valid_mem)&&(~mem_stalled)
&&(~clear_pipeline);
`else
// If we aren't pipelined, then no one will be changing what's in the
// pipeline (i.e. clear_pipeline), while our only instruction goes
// through the ... pipeline.
//
// However, in hind sight this logic didn't work. What happens when
// something gets in the pipeline and then (due to interrupt or some
// such) needs to be voided? Thus we avoid simplification and keep
// what worked here.
assign mem_ce = (master_ce)&&(op_valid_mem)&&(~mem_stalled)
&&(~clear_pipeline);
`endif
assign mem_ce = (master_ce)&&(op_valid_mem)&&(!mem_stalled)
&&(!clear_pipeline);
 
`ifdef OPT_PIPELINED_BUS_ACCESS
assign mem_stalled = (~master_ce)||(alu_busy)||((op_valid_mem)&&(
assign mem_stalled = (!master_ce)||(alu_busy)||((op_valid_mem)&&(
(mem_pipe_stalled)
||(prelock_stall)
||((~op_pipe)&&(mem_busy))
||((!op_pipe)&&(mem_busy))
||(div_busy)
||(fpu_busy)
// Stall waiting for flags to be valid
527,7 → 506,7
`else
`ifdef OPT_PIPELINED
assign mem_stalled = (mem_busy)||((op_valid_mem)&&(
(~master_ce)
(!master_ce)
// Stall waiting for flags to be valid
// Or waiting for a write to the PC register
// Or CC register, since that can change the
534,15 → 513,15
// PC as well
||((wr_reg_ce)&&(wr_reg_id[4] == op_gie)&&((wr_write_pc)||(wr_write_cc)))));
`else
assign mem_stalled = (op_valid_mem)&&(~master_ce);
assign mem_stalled = (op_valid_mem)&&(!master_ce);
`endif
`endif
 
// ALU, DIV, or FPU CE ... equivalent to the OR of all three of these
assign adf_ce_unconditional = (master_ce)&&(~clear_pipeline)&&(op_valid)
&&(~op_valid_mem)&&(~mem_rdbusy)
&&((~op_valid_alu)||(~alu_stall))&&(~op_break)
&&(~div_busy)&&(~fpu_busy)&&(~clear_pipeline);
assign adf_ce_unconditional = (master_ce)&&(!clear_pipeline)&&(op_valid)
&&(!op_valid_mem)&&(!mem_rdbusy)
&&((!op_valid_alu)||(!alu_stall))&&(!op_break)
&&(!div_busy)&&(!fpu_busy)&&(!clear_pipeline);
 
//
//
549,38 → 528,46
// PIPELINE STAGE #1 :: Prefetch
//
//
wire pf_stalled;
assign pf_stalled = (dcd_stalled)||(dcd_phase);
 
wire pf_new_pc;
assign pf_new_pc = (new_pc)||((dcd_early_branch)&&(!clear_pipeline));
 
wire [(AW-1):0] pf_request_address;
assign pf_request_address = ((dcd_early_branch)&&(!clear_pipeline))
? dcd_branch_pc:pf_pc[(AW+1):2];
assign pf_gie = gie;
`ifdef OPT_SINGLE_FETCH
wire pf_ce;
 
assign pf_ce = (~pf_valid)&&(~dcd_valid)&&(~op_valid)&&(~alu_busy)&&(~mem_busy)&&(~alu_pc_valid)&&(~mem_pc_valid);
prefetch #(ADDRESS_WIDTH)
pf(i_clk, (i_rst), (pf_ce), (~dcd_stalled), pf_pc[(AW+1):2], gie,
pf_instruction, pf_instruction_pc, pf_gie,
pf(i_clk, (i_rst), pf_new_pc, w_clear_icache,
(!pf_stalled),
pf_request_address,
pf_instruction, pf_instruction_pc,
pf_valid, pf_illegal,
pf_cyc, pf_stb, pf_we, pf_addr, pf_data,
pf_ack, pf_stall, pf_err, i_wb_data);
 
initial r_dcd_valid = 1'b0;
always @(posedge i_clk)
if (clear_pipeline)
r_dcd_valid <= 1'b0;
else if (dcd_ce)
r_dcd_valid <= (pf_valid)||(pf_illegal);
else if (op_ce)
r_dcd_valid <= 1'b0;
assign dcd_valid = r_dcd_valid;
`else
`ifdef OPT_DOUBLE_FETCH
 
`else // Pipe fetch
wire [1:0] pf_dbg;
dblfetch #(ADDRESS_WIDTH)
pf(i_clk, i_rst, pf_new_pc,
w_clear_icache,
(!pf_stalled),
pf_request_address,
pf_instruction, pf_instruction_pc,
pf_valid,
pf_cyc, pf_stb, pf_we, pf_addr, pf_data,
pf_ack, pf_stall, pf_err, i_wb_data,
pf_illegal);
 
wire pf_stalled;
assign pf_stalled = (dcd_stalled)||(dcd_phase);
`else // Not single fetch and not double fetch
 
`ifdef OPT_TRADITIONAL_PFCACHE
wire [(AW-1):0] pf_request_address;
assign pf_request_address = ((dcd_early_branch)&&(!clear_pipeline))
? dcd_branch_pc:pf_pc[(AW+1):2];
pfcache #(LGICACHE, ADDRESS_WIDTH)
pf(i_clk, i_rst, (new_pc)||((dcd_early_branch)&&(~clear_pipeline)),
w_clear_icache,
pf(i_clk, i_rst, pf_new_pc, w_clear_icache,
// dcd_pc,
(!pf_stalled),
pf_request_address,
590,7 → 577,7
pf_illegal);
`else
pipefetch #(RESET_BUS_ADDRESS, LGICACHE, ADDRESS_WIDTH)
pf(i_clk, i_rst, (new_pc)||(dcd_early_branch),
pf(i_clk, i_rst, pf_new_pc,
w_clear_icache, (!pf_stalled),
(new_pc)?pf_pc[(AW+1):2]:dcd_branch_pc,
pf_instruction, pf_instruction_pc, pf_valid,
598,31 → 585,19
pf_ack, pf_stall, pf_err, i_wb_data,
(mem_cyc_lcl)||(mem_cyc_gbl),
pf_illegal);
`endif
`ifdef OPT_NO_USERMODE
assign pf_gie = 1'b0;
`else
assign pf_gie = gie;
`endif
`endif // OPT_TRADITIONAL_CACHE
`endif // OPT_DOUBLE_FETCH
`endif // OPT_SINGLE_FETCH
 
initial r_dcd_valid = 1'b0;
always @(posedge i_clk)
if ((clear_pipeline)||(w_clear_icache))
r_dcd_valid <= 1'b0;
else if (dcd_ce)
r_dcd_valid <= ((dcd_phase)||(pf_valid))
&&(~dcd_ljmp)&&(~dcd_early_branch);
else if (op_ce)
r_dcd_valid <= 1'b0;
assign dcd_valid = r_dcd_valid;
`endif
 
// If not pipelined, there will be no op_valid_ anything, and the
assign dcd_ce = (!dcd_valid)||(!dcd_stalled);
idecode #(AW, IMPLEMENT_MPY, EARLY_BRANCHING, IMPLEMENT_DIVIDE,
IMPLEMENT_FPU)
instruction_decoder(i_clk, (clear_pipeline),
(~dcd_valid)||(~op_stall), dcd_stalled, pf_instruction, pf_gie,
pf_instruction_pc, pf_valid, pf_illegal, dcd_phase,
instruction_decoder(i_clk,
(clear_pipeline)||(w_clear_icache),
dcd_ce,
dcd_stalled, pf_instruction, pf_gie,
pf_instruction_pc, pf_valid, pf_illegal,
dcd_valid, dcd_phase,
dcd_illegal, dcd_pc, dcd_gie,
{ dcd_Rcc, dcd_Rpc, dcd_R },
{ dcd_Acc, dcd_Apc, dcd_A },
729,9 → 704,7
`endif
 
always @(posedge i_clk)
`ifdef OPT_PIPELINED
if (op_ce)
`endif
begin
`ifdef OPT_PIPELINED
if ((wr_reg_ce)&&(wr_reg_id == dcd_A))
746,11 → 719,7
r_op_Av <= w_op_Av;
`ifdef OPT_PIPELINED
end else
begin // We were going to pick these up when they became valid,
// but for some reason we're stuck here as they became
// valid. Pick them up now anyway
// if (((op_A_alu)&&(alu_wR))||((op_A_mem)&&(mem_valid)))
// r_op_Av <= wr_gpreg_vl;
begin
if ((wr_reg_ce)&&(wr_reg_id == op_Aid)&&(op_rA))
r_op_Av <= wr_gpreg_vl;
`endif
798,11 → 767,9
// below, arriving at what we finally want in the (now wire net)
// op_F.
always @(posedge i_clk)
`ifdef OPT_PIPELINED
if (op_ce) // Cannot do op_change_data_ce here since op_F depends
// upon being either correct for a valid op, or correct
// for the last valid op
`endif
begin // Set the flag condition codes, bit order is [3:0]=VNCZ
case(dcd_F[2:0])
3'h0: r_op_F <= 7'h00; // Always
818,7 → 785,7
assign op_F = { r_op_F[3], r_op_F[6:0] };
 
wire w_op_valid;
assign w_op_valid = (~clear_pipeline)&&(dcd_valid)&&(~dcd_ljmp)&&(!dcd_early_branch);
assign w_op_valid = (!clear_pipeline)&&(dcd_valid)&&(!dcd_ljmp)&&(!dcd_early_branch);
initial op_valid = 1'b0;
initial op_valid_alu = 1'b0;
initial op_valid_mem = 1'b0;
845,9 → 812,9
op_valid<= (w_op_valid)||(dcd_illegal)&&(dcd_valid)||(dcd_early_branch);
op_valid_alu <= (w_op_valid)&&((dcd_ALU)||(dcd_illegal)
||(dcd_early_branch));
op_valid_mem <= (dcd_M)&&(~dcd_illegal)&&(w_op_valid);
op_valid_div <= (dcd_DIV)&&(~dcd_illegal)&&(w_op_valid);
op_valid_fpu <= (dcd_FP)&&(~dcd_illegal)&&(w_op_valid);
op_valid_mem <= (dcd_M)&&(!dcd_illegal)&&(w_op_valid);
op_valid_div <= (dcd_DIV)&&(!dcd_illegal)&&(w_op_valid);
op_valid_fpu <= (dcd_FP)&&(!dcd_illegal)&&(w_op_valid);
end else if ((adf_ce_unconditional)||(mem_ce))
begin
op_valid <= 1'b0;
866,7 → 833,6
// to be, step through it, and then replace it back. In this fashion,
// a debugger can step through code.
// assign w_op_break = (dcd_break)&&(r_dcd_I[15:0] == 16'h0001);
`ifdef OPT_PIPELINED
reg r_op_break;
 
initial r_op_break = 1'b0;
877,9 → 843,6
else if (!op_valid)
r_op_break <= 1'b0;
assign op_break = r_op_break;
`else
assign op_break = dcd_break;
`endif
 
`ifdef OPT_PIPELINED
generate
892,7 → 855,7
if (clear_pipeline)
r_op_lock <= 1'b0;
else if (op_ce)
r_op_lock <= (dcd_valid)&&(dcd_lock)&&(~clear_pipeline);
r_op_lock <= (dcd_valid)&&(dcd_lock)&&(!clear_pipeline);
assign op_lock = r_op_lock;
 
end else begin
925,21 → 888,27
always @(posedge i_clk)
if (op_ce)
begin
op_wF <= (dcd_wF)&&((~dcd_Rcc)||(~dcd_wR))
&&(~dcd_early_branch)&&(~dcd_illegal);
op_wR <= (dcd_wR)&&(~dcd_early_branch)&&(~dcd_illegal);
op_wF <= (dcd_wF)&&((!dcd_Rcc)||(!dcd_wR))
&&(!dcd_early_branch)&&(!dcd_illegal);
op_wR <= (dcd_wR)&&(!dcd_early_branch)&&(!dcd_illegal);
end
`else
always @(posedge i_clk)
begin
op_wF <= (dcd_wF)&&((~dcd_Rcc)||(~dcd_wR))
&&(~dcd_early_branch)&&(~dcd_illegal);
op_wR <= (dcd_wR)&&(~dcd_early_branch)&&(~dcd_illegal);
op_wF <= (dcd_wF)&&((!dcd_Rcc)||(!dcd_wR))
&&(!dcd_early_branch)&&(!dcd_illegal);
op_wR <= (dcd_wR)&&(!dcd_early_branch)&&(!dcd_illegal);
end
`endif
 
`ifdef VERILATOR
`ifdef OPT_PIPELINED
`ifdef SINGLE_FETCH
always @(*)
begin
op_sim = dcd_sim;
op_sim_immv = dcd_sim_immv;
end
`else
always @(posedge i_clk)
if (op_change_data_ce)
begin
946,20 → 915,15
op_sim <= dcd_sim;
op_sim_immv <= dcd_sim_immv;
end
`else
always @(*)
begin
op_sim = dcd_sim;
op_sim_immv = dcd_sim_immv;
end
`endif
`endif
 
`ifdef OPT_PIPELINED
reg [3:0] r_op_opn;
reg [4:0] r_op_R;
reg r_op_Rcc;
reg r_op_gie;
 
initial r_op_gie = 1'b0;
always @(posedge i_clk)
if (op_change_data_ce)
begin
978,24 → 942,9
end
assign op_opn = r_op_opn;
assign op_R = r_op_R;
`ifdef OPT_NO_USERMODE
assign op_gie = 1'b0;
`else
assign op_gie = r_op_gie;
`endif
assign op_Rcc = r_op_Rcc;
`else
assign op_opn = dcd_opn;
assign op_R = dcd_R;
`ifdef OPT_NO_USERMODE
assign op_gie = 1'b0;
`else
assign op_gie = dcd_gie;
`endif
// With no pipelining, there is no early branching. We keep it
always @(posedge i_clk)
op_pc <= (dcd_early_branch)?dcd_branch_pc:dcd_pc[AW:1];
`endif
 
assign op_Fl = (op_gie)?(w_uflags):(w_iflags);
 
`ifdef OPT_CIS
1087,9 → 1036,9
// decode circuit has told us that the hazard
// is clear, so we're okay then.
//
((~dcd_zI)&&(
((!dcd_zI)&&(
((op_R == dcd_B)&&(op_wR))
||((mem_rdbusy)&&(~dcd_pipe))
||((mem_rdbusy)&&(!dcd_pipe))
))
// Stall following any instruction that will
// set the flags, if we're going to need the
1097,10 → 1046,10
||(((op_wF)||(cc_invalid_for_dcd))&&(dcd_Bcc))
// Stall on any ongoing memory operation that
// will write to op_B -- captured above
// ||((mem_busy)&&(~mem_we)&&(mem_last_reg==dcd_B)&&(~dcd_zI))
// ||((mem_busy)&&(!mem_we)&&(mem_last_reg==dcd_B)&&(!dcd_zI))
)
||((dcd_rB)&&(dcd_Bcc)&&(cc_invalid_for_dcd));
assign dcd_F_stall = ((~dcd_F[3])
assign dcd_F_stall = ((!dcd_F[3])
||((dcd_rA)&&(dcd_Acc))
||((dcd_rB)&&(dcd_Bcc)))
&&(op_valid)&&(op_Rcc);
1169,7 → 1118,7
// alu_reg <= op_R;
alu_wR <= (op_wR)&&(set_cond);
alu_wF <= (op_wF)&&(set_cond);
end else if (~alu_busy) begin
end else if (!alu_busy) begin
// These are strobe signals, so clear them if not
// set for any particular clock
alu_wR <= (i_halt)&&(i_dbg_we);
1209,7 → 1158,7
reg dbgv;
initial dbgv = 1'b0;
always @(posedge i_clk)
dbgv <= (~i_rst)&&(i_halt)&&(i_dbg_we)&&(r_halted);
dbgv <= (!i_rst)&&(i_halt)&&(i_dbg_we)&&(r_halted);
reg [31:0] dbg_val;
always @(posedge i_clk)
dbg_val <= i_dbg_data;
1232,8 → 1181,8
reg [(AW-1):0] r_alu_pc;
always @(posedge i_clk)
if ((adf_ce_unconditional)
||((master_ce)&&(op_valid_mem)&&(~clear_pipeline)
&&(~mem_stalled)))
||((master_ce)&&(op_valid_mem)&&(!clear_pipeline)
&&(!mem_stalled)))
r_alu_pc <= op_pc;
assign alu_pc = r_alu_pc;
`else
1256,11 → 1205,11
always @(posedge i_clk)
if (clear_pipeline)
r_alu_pc_valid <= 1'b0;
else if ((adf_ce_unconditional)&&(!op_phase)) //Includes&&(~alu_clear_pipeline)
else if ((adf_ce_unconditional)&&(!op_phase))
r_alu_pc_valid <= 1'b1;
else if (((~alu_busy)&&(~div_busy)&&(~fpu_busy))||(clear_pipeline))
else if (((!alu_busy)&&(!div_busy)&&(!fpu_busy))||(clear_pipeline))
r_alu_pc_valid <= 1'b0;
assign alu_pc_valid = (r_alu_pc_valid)&&((~alu_busy)&&(~div_busy)&&(~fpu_busy));
assign alu_pc_valid = (r_alu_pc_valid)&&((!alu_busy)&&(!div_busy)&&(!fpu_busy));
always @(posedge i_clk)
if (i_rst)
mem_pc_valid <= 1'b0;
1326,7 → 1275,8
mem_ack, mem_stall, mem_err, i_wb_data);
 
`else // PIPELINED_BUS_ACCESS
memops #(AW,IMPLEMENT_LOCK,WITH_LOCAL_BUS) domem(i_clk, i_rst,(mem_ce)&&(set_cond), bus_lock,
memops #(AW,IMPLEMENT_LOCK,WITH_LOCAL_BUS) domem(i_clk, i_rst,
(mem_ce)&&(set_cond), bus_lock,
(op_opn[2:0]), op_Bv, op_Av, op_R,
mem_busy,
mem_valid, bus_err, mem_wreg, mem_result,
1336,7 → 1286,7
mem_ack, mem_stall, mem_err, i_wb_data);
assign mem_pipe_stalled = 1'b0;
`endif // PIPELINED_BUS_ACCESS
assign mem_rdbusy = ((mem_busy)&&(~mem_we));
assign mem_rdbusy = ((mem_busy)&&(!mem_we));
 
// Either the prefetch or the instruction gets the memory bus, but
// never both.
1389,7 → 1339,7
// Further, alu_wR includes (set_cond), so we don't need to
// check for that here either.
assign wr_reg_ce = (dbgv)||(mem_valid)
||((~clear_pipeline)&&(~alu_illegal)
||((!clear_pipeline)&&(!alu_illegal)
&&(((alu_wR)&&(alu_valid))
||(div_valid)||(fpu_valid)));
// Which register shall be written?
1431,7 → 1381,7
// Write back to the condition codes/flags register ...
// When shall we write to our flags register? alu_wF already
// includes the set condition ...
assign wr_flags_ce = ((alu_wF)||(div_valid)||(fpu_valid))&&(~clear_pipeline)&&(~alu_illegal);
assign wr_flags_ce = ((alu_wF)||(div_valid)||(fpu_valid))&&(!clear_pipeline)&&(!alu_illegal);
assign w_uflags = { 1'b0, uhalt_phase, ufpu_err_flag,
udiv_err_flag, ubus_err_flag, trap, ill_err_u,
ubreak, step, 1'b1, sleep,
1439,7 → 1389,7
assign w_iflags = { 1'b0, ihalt_phase, ifpu_err_flag,
idiv_err_flag, ibus_err_flag, trap, ill_err_i,
break_en, 1'b0, 1'b0, sleep,
((wr_flags_ce)&&(~alu_gie))?alu_flags:iflags };
((wr_flags_ce)&&(!alu_gie))?alu_flags:iflags };
 
 
// What value to write?
1455,7 → 1405,7
always @(posedge i_clk)
if ((wr_reg_ce)&&(wr_write_scc))
iflags <= wr_gpreg_vl[3:0];
else if ((wr_flags_ce)&&(~alu_gie))
else if ((wr_flags_ce)&&(!alu_gie))
iflags <= (div_valid)?div_flags:((fpu_valid)?fpu_flags
: alu_flags);
 
1486,10 → 1436,10
 
initial r_break_pending = 1'b0;
always @(posedge i_clk)
if ((clear_pipeline)||(~op_valid))
if ((clear_pipeline)||(!op_valid))
r_break_pending <= 1'b0;
else if (op_break)
r_break_pending <= (~alu_busy)&&(~div_busy)&&(~fpu_busy)&&(~mem_busy)&&(!wr_reg_ce);
r_break_pending <= (!alu_busy)&&(!div_busy)&&(!fpu_busy)&&(!mem_busy)&&(!wr_reg_ce);
else
r_break_pending <= 1'b0;
assign break_pending = r_break_pending;
1498,12 → 1448,12
`endif
 
 
assign o_break = ((break_en)||(~op_gie))&&(break_pending)
&&(~clear_pipeline)
||((~alu_gie)&&(bus_err))
||((~alu_gie)&&(div_error))
||((~alu_gie)&&(fpu_error))
||((~alu_gie)&&(alu_illegal)&&(!clear_pipeline));
assign o_break = ((break_en)||(!op_gie))&&(break_pending)
&&(!clear_pipeline)
||((!alu_gie)&&(bus_err))
||((!alu_gie)&&(div_error))
||((!alu_gie)&&(fpu_error))
||((!alu_gie)&&(alu_illegal)&&(!clear_pipeline));
 
// The sleep register. Setting the sleep register causes the CPU to
// sleep until the next interrupt. Setting the sleep register within
1520,7 → 1470,7
r_sleep_is_halt <= 1'b0;
else if ((wr_reg_ce)&&(wr_write_cc)
&&(wr_spreg_vl[`CPU_SLEEP_BIT])
&&(~wr_spreg_vl[`CPU_GIE_BIT]))
&&(!wr_spreg_vl[`CPU_GIE_BIT]))
r_sleep_is_halt <= 1'b1;
 
// Trying to switch to user mode, either via a WAIT or an RTU
1536,7 → 1486,7
always @(posedge i_clk)
if ((i_rst)||(w_switch_to_interrupt))
sleep <= 1'b0;
else if ((wr_reg_ce)&&(wr_write_cc)&&(~alu_gie))
else if ((wr_reg_ce)&&(wr_write_cc)&&(!alu_gie))
// In supervisor mode, we have no protections. The
// supervisor can set the sleep bit however he wants.
// Well ... not quite. Switching to user mode and
1546,7 → 1496,7
// don't set the sleep bit
// otherwise however it would o.w. be set
sleep <= (wr_spreg_vl[`CPU_SLEEP_BIT])
&&((~i_interrupt)||(~wr_spreg_vl[`CPU_GIE_BIT]));
&&((!i_interrupt)||(!wr_spreg_vl[`CPU_GIE_BIT]));
else if ((wr_reg_ce)&&(wr_write_cc)&&(wr_spreg_vl[`CPU_GIE_BIT]))
// In user mode, however, you can only set the sleep
// mode while remaining in user mode. You can't switch
1558,7 → 1508,7
always @(posedge i_clk)
if (i_rst)
step <= 1'b0;
else if ((wr_reg_ce)&&(~alu_gie)&&(wr_write_ucc))
else if ((wr_reg_ce)&&(!alu_gie)&&(wr_write_ucc))
step <= wr_spreg_vl[`CPU_STEP_BIT];
 
// The GIE register. Only interrupts can disable the interrupt register
1568,12 → 1518,12
`else
assign w_switch_to_interrupt = (gie)&&(
// On interrupt (obviously)
((i_interrupt)&&(~alu_phase)&&(~bus_lock))
((i_interrupt)&&(!alu_phase)&&(!bus_lock))
// If we are stepping the CPU
||(((alu_pc_valid)||(mem_pc_valid))&&(step)&&(~alu_phase)&&(~bus_lock))
||(((alu_pc_valid)||(mem_pc_valid))&&(step)&&(!alu_phase)&&(!bus_lock))
// If we encounter a break instruction, if the break
// enable isn't set.
||((master_ce)&&(break_pending)&&(~break_en))
||((master_ce)&&(break_pending)&&(!break_en))
// On an illegal instruction
||((alu_illegal)&&(!clear_pipeline))
// On division by zero. If the divide isn't
1587,10 → 1537,10
//
||(bus_err)
// If we write to the CC register
||((wr_reg_ce)&&(~wr_spreg_vl[`CPU_GIE_BIT])
||((wr_reg_ce)&&(!wr_spreg_vl[`CPU_GIE_BIT])
&&(wr_reg_id[4])&&(wr_write_cc))
);
assign w_release_from_interrupt = (~gie)&&(~i_interrupt)
assign w_release_from_interrupt = (!gie)&&(!i_interrupt)
// Then if we write the sCC register
&&(((wr_reg_ce)&&(wr_spreg_vl[`CPU_GIE_BIT])
&&(wr_write_scc))
1623,10 → 1573,10
always @(posedge i_clk)
if ((i_rst)||(w_release_from_interrupt))
r_trap <= 1'b0;
else if ((alu_gie)&&(wr_reg_ce)&&(~wr_spreg_vl[`CPU_GIE_BIT])
else if ((alu_gie)&&(wr_reg_ce)&&(!wr_spreg_vl[`CPU_GIE_BIT])
&&(wr_write_ucc)) // &&(wr_reg_id[4]) implied
r_trap <= 1'b1;
else if ((wr_reg_ce)&&(wr_write_ucc)&&(~alu_gie))
else if ((wr_reg_ce)&&(wr_write_ucc)&&(!alu_gie))
r_trap <= (r_trap)&&(wr_spreg_vl[`CPU_TRAP_BIT]);
 
reg r_ubreak;
1637,7 → 1587,7
r_ubreak <= 1'b0;
else if ((op_gie)&&(break_pending)&&(w_switch_to_interrupt))
r_ubreak <= 1'b1;
else if (((~alu_gie)||(dbgv))&&(wr_reg_ce)&&(wr_write_ucc))
else if (((!alu_gie)||(dbgv))&&(wr_reg_ce)&&(wr_write_ucc))
r_ubreak <= (ubreak)&&(wr_spreg_vl[`CPU_BREAK_BIT]);
 
assign trap = r_trap;
1653,7 → 1603,7
// Only the debug interface can clear this bit
else if ((dbgv)&&(wr_write_scc))
ill_err_i <= (ill_err_i)&&(wr_spreg_vl[`CPU_ILL_BIT]);
else if ((alu_illegal)&&(~alu_gie)&&(!clear_pipeline))
else if ((alu_illegal)&&(!alu_gie)&&(!clear_pipeline))
ill_err_i <= 1'b1;
 
`ifdef OPT_NO_USERMODE
1669,7 → 1619,7
r_ill_err_u <= 1'b0;
// If the supervisor (or debugger) writes to this register,
// clearing the bit, then clear it
else if (((~alu_gie)||(dbgv))&&(wr_reg_ce)&&(wr_write_ucc))
else if (((!alu_gie)||(dbgv))&&(wr_reg_ce)&&(wr_write_ucc))
r_ill_err_u <=((ill_err_u)&&(wr_spreg_vl[`CPU_ILL_BIT]));
else if ((alu_illegal)&&(alu_gie)&&(!clear_pipeline))
r_ill_err_u <= 1'b1;
1688,7 → 1638,7
ibus_err_flag <= 1'b0;
else if ((dbgv)&&(wr_write_scc))
ibus_err_flag <= (ibus_err_flag)&&(wr_spreg_vl[`CPU_BUSERR_BIT]);
else if ((bus_err)&&(~alu_gie))
else if ((bus_err)&&(!alu_gie))
ibus_err_flag <= 1'b1;
// User bus error flag -- if ever set, it will cause an interrupt to
// supervisor mode.
1701,7 → 1651,7
always @(posedge i_clk)
if ((i_rst)||(w_release_from_interrupt))
r_ubus_err_flag <= 1'b0;
else if (((~alu_gie)||(dbgv))&&(wr_reg_ce)&&(wr_write_ucc))
else if (((!alu_gie)||(dbgv))&&(wr_reg_ce)&&(wr_write_ucc))
r_ubus_err_flag <= (ubus_err_flag)&&(wr_spreg_vl[`CPU_BUSERR_BIT]);
else if ((bus_err)&&(alu_gie))
r_ubus_err_flag <= 1'b1;
1723,7 → 1673,7
r_idiv_err_flag <= 1'b0;
else if ((dbgv)&&(wr_write_scc))
r_idiv_err_flag <= (r_idiv_err_flag)&&(wr_spreg_vl[`CPU_DIVERR_BIT]);
else if ((div_error)&&(~alu_gie))
else if ((div_error)&&(!alu_gie))
r_idiv_err_flag <= 1'b1;
 
assign idiv_err_flag = r_idiv_err_flag;
1736,7 → 1686,7
always @(posedge i_clk)
if ((i_rst)||(w_release_from_interrupt))
r_udiv_err_flag <= 1'b0;
else if (((~alu_gie)||(dbgv))&&(wr_reg_ce)
else if (((!alu_gie)||(dbgv))&&(wr_reg_ce)
&&(wr_write_ucc))
r_udiv_err_flag <= (r_udiv_err_flag)&&(wr_spreg_vl[`CPU_DIVERR_BIT]);
else if ((div_error)&&(alu_gie))
1761,7 → 1711,7
r_ifpu_err_flag <= 1'b0;
else if ((dbgv)&&(wr_write_scc))
r_ifpu_err_flag <= (r_ifpu_err_flag)&&(wr_spreg_vl[`CPU_FPUERR_BIT]);
else if ((fpu_error)&&(fpu_valid)&&(~alu_gie))
else if ((fpu_error)&&(fpu_valid)&&(!alu_gie))
r_ifpu_err_flag <= 1'b1;
// User floating point error flag -- if ever set, it will cause
// a sudden switch interrupt to supervisor mode.
1769,7 → 1719,7
always @(posedge i_clk)
if ((i_rst)&&(w_release_from_interrupt))
r_ufpu_err_flag <= 1'b0;
else if (((~alu_gie)||(dbgv))&&(wr_reg_ce)
else if (((!alu_gie)||(dbgv))&&(wr_reg_ce)
&&(wr_write_ucc))
r_ufpu_err_flag <= (r_ufpu_err_flag)&&(wr_spreg_vl[`CPU_FPUERR_BIT]);
else if ((fpu_error)&&(alu_gie)&&(fpu_valid))
1789,7 → 1739,7
always @(posedge i_clk)
if (i_rst)
r_ihalt_phase <= 1'b0;
else if ((~alu_gie)&&(alu_pc_valid)&&(~clear_pipeline))
else if ((!alu_gie)&&(alu_pc_valid)&&(!clear_pipeline))
r_ihalt_phase <= alu_phase;
 
assign ihalt_phase = r_ihalt_phase;
1805,7 → 1755,7
r_uhalt_phase <= 1'b0;
else if ((alu_gie)&&(alu_pc_valid))
r_uhalt_phase <= alu_phase;
else if ((~alu_gie)&&(wr_reg_ce)&&(wr_write_ucc))
else if ((!alu_gie)&&(wr_reg_ce)&&(wr_write_ucc))
r_uhalt_phase <= wr_spreg_vl[`CPU_PHASE_BIT];
 
assign uhalt_phase = r_uhalt_phase;
1822,7 → 1772,7
// the instruction writes the PC, we write whichever PC is appropriate.
//
// Do we need to all our partial results from the pipeline?
// What happens when the pipeline has gie and ~gie instructions within
// What happens when the pipeline has gie and !gie instructions within
// it? Do we clear both? What if a gie instruction tries to clear
// a non-gie instruction?
`ifdef OPT_NO_USERMODE
1834,7 → 1784,7
if ((wr_reg_ce)&&(wr_reg_id[4])&&(wr_write_pc))
r_upc <= { wr_spreg_vl[(AW+1):2], 2'b00 };
else if ((alu_gie)&&
(((alu_pc_valid)&&(~clear_pipeline)&&(!alu_illegal))
(((alu_pc_valid)&&(!clear_pipeline)&&(!alu_illegal))
||(mem_pc_valid)))
r_upc <= { alu_pc, 2'b00 };
assign upc = r_upc;
1843,10 → 1793,10
always @(posedge i_clk)
if (i_rst)
ipc <= { RESET_BUS_ADDRESS, 2'b00 };
else if ((wr_reg_ce)&&(~wr_reg_id[4])&&(wr_write_pc))
else if ((wr_reg_ce)&&(!wr_reg_id[4])&&(wr_write_pc))
ipc <= { wr_spreg_vl[(AW+1):2], 2'b00 };
else if ((!alu_gie)&&(!alu_phase)&&
(((alu_pc_valid)&&(~clear_pipeline)&&(!alu_illegal))
(((alu_pc_valid)&&(!clear_pipeline)&&(!alu_illegal))
||(mem_pc_valid)))
ipc <= { alu_pc, 2'b00 };
 
1853,25 → 1803,20
always @(posedge i_clk)
if (i_rst)
pf_pc <= { RESET_BUS_ADDRESS, 2'b00 };
else if ((w_switch_to_interrupt)||((~gie)&&(w_clear_icache)))
else if ((w_switch_to_interrupt)||((!gie)&&(w_clear_icache)))
pf_pc <= { ipc[(AW+1):2], 2'b00 };
else if ((w_release_from_interrupt)||((gie)&&(w_clear_icache)))
pf_pc <= { upc[(AW+1):2], 2'b00 };
else if ((wr_reg_ce)&&(wr_reg_id[4] == gie)&&(wr_write_pc))
pf_pc <= { wr_spreg_vl[(AW+1):2], 2'b00 };
`ifdef OPT_PIPELINED
else if ((dcd_early_branch)&&(~clear_pipeline))
else if ((dcd_early_branch)&&(!clear_pipeline))
pf_pc <= { dcd_branch_pc + 1'b1, 2'b00 };
else if ((new_pc)||((!pf_stalled)&&(pf_valid)))
pf_pc <= { pf_pc[(AW+1):2] + {{(AW-1){1'b0}},1'b1}, 2'b00 };
`else
else if ((alu_gie==gie)&&(
((alu_pc_valid)&&(~clear_pipeline))
||(mem_pc_valid)))
pf_pc <= { alu_pc[(AW-1):0], 2'b00 };
`endif
 
`ifdef OPT_PIPELINED
// If we aren't pipelined, or equivalently if we have no cache, these
// instructions will get quietly (or not so quietly) ignored by the
// optimizer.
reg r_clear_icache;
initial r_clear_icache = 1'b1;
always @(posedge i_clk)
1882,9 → 1827,6
else
r_clear_icache <= 1'b0;
assign w_clear_icache = r_clear_icache;
`else
assign w_clear_icache = i_clear_pf_cache;
`endif
 
initial new_pc = 1'b1;
always @(posedge i_clk)
1949,8 → 1891,8
r_halted <= (i_halt)&&(
// To be halted, any long lasting instruction must
// be completed.
(~pf_cyc)&&(~mem_busy)&&(~alu_busy)
&&(~div_busy)&&(~fpu_busy)
(!pf_cyc)&&(!mem_busy)&&(!alu_busy)
&&(!div_busy)&&(!fpu_busy)
// Operations must either be valid, or illegal
&&((op_valid)||(i_rst)||(dcd_illegal))
// Decode stage must be either valid, in reset, or ill
1959,7 → 1901,7
always @(posedge i_clk)
r_halted <= (i_halt)&&((op_valid)||(i_rst));
`endif
assign o_dbg_stall = ~r_halted;
assign o_dbg_stall = !r_halted;
 
//
//
1968,55 → 1910,19
//
//
assign o_op_stall = (master_ce)&&(op_stall);
assign o_pf_stall = (master_ce)&&(~pf_valid);
assign o_i_count = (alu_pc_valid)&&(~clear_pipeline);
assign o_pf_stall = (master_ce)&&(!pf_valid);
assign o_i_count = (alu_pc_valid)&&(!clear_pipeline);
 
`ifdef DEBUG_SCOPE
always @(posedge i_clk)
o_debug <= {
/*
o_break, i_wb_err, pf_pc[1:0],
flags,
pf_valid, dcd_valid, op_valid, alu_valid, mem_valid,
op_ce, alu_ce, mem_ce,
//
master_ce, op_valid_alu, op_valid_mem,
//
alu_stall, mem_busy, op_pipe, mem_pipe_stalled,
mem_we,
// ((op_valid_alu)&&(alu_stall))
// ||((op_valid_mem)&&(~op_pipe)&&(mem_busy))
// ||((op_valid_mem)&&( op_pipe)&&(mem_pipe_stalled)));
// op_Av[23:20], op_Av[3:0],
gie, sleep, wr_reg_ce, wr_gpreg_vl[4:0]
*/
/*
i_rst, master_ce, (new_pc),
((dcd_early_branch)&&(dcd_valid)),
pf_valid, pf_illegal,
op_ce, dcd_ce, dcd_valid, dcd_stalled,
pf_cyc, pf_stb, pf_we, pf_ack, pf_stall, pf_err,
pf_pc[7:0], pf_addr[7:0]
*/
 
i_wb_err, gie, alu_illegal,
(new_pc)||((dcd_early_branch)&&(~clear_pipeline)),
mem_busy,
(mem_busy)?{ (o_wb_gbl_stb|o_wb_lcl_stb), o_wb_we,
o_wb_addr[8:0] }
: { pf_instruction[31:21] },
pf_valid, (pf_valid) ? alu_pc[14:0]
:{ pf_cyc, pf_stb, pf_pc[14:2] }
 
/*
i_wb_err, gie, new_pc, dcd_early_branch, // 4
pf_valid, pf_cyc, pf_stb, pf_instruction_pc[0], // 4
pf_instruction[30:27], // 4
dcd_gie, mem_busy, o_wb_gbl_cyc, o_wb_gbl_stb, // 4
dcd_valid,
((dcd_early_branch)&&(~clear_pipeline)) // 15
? dcd_branch_pc[14:0]:pf_pc[14:0]
*/
wr_reg_ce, pf_valid, new_pc,
(wr_reg_ce)?
{ wr_reg_id, wr_gpreg_vl[23:0] }
:{ op_stall,
o_wb_gbl_cyc, o_wb_gbl_stb, o_wb_we,
mem_busy,
dcd_valid, op_ce, pf_pc[21:0] }
};
`endif
 
/trunk/rtl/qflashxpress.v
0,0 → 1,251
////////////////////////////////////////////////////////////////////////////////
//
// Filename: qflashxpress.v
//
// Project: CMod S6 System on a Chip, ZipCPU demonstration project
//
// Purpose: To provide wishbone controlled read access (and read access
// *only*) to the QSPI flash, using a flash clock of 80MHz, and
// nothing more. Indeed, this is designed to be a *very* stripped down
// version of a flash driver, with the goal of providing 1) very fast
// access for 2) very low logic count.
//
// Two modes/states of operation:
//
// STARTUP
// 1. Waits for the flash to come on line
// Start out idle for 300 uS
// 2. Sends a signal to remove the flash from any QSPI read mode. In our
// case, we'll send several clocks of an empty command. In SPI
// mode, it'll get ignored. In QSPI mode, it'll remove us from
// QSPI mode.
// 3. Explicitly places and leaves the flash into QSPI mode
// 0xEB 3(0xa0) 0xa0 0xa0 0xa0 4(0x00)
// 4. All done
//
// NORMAL-OPS
// ODATA <- ?, 3xADDR, 0xa0, 0x00, 0x00 | 0x00, 0x00, 0x00, 0x00 ? (22nibs)
// STALL <- TRUE until closed at the end
// MODE <- 2'b10 for 4 clks, then 2'b11
// CLK <- 2'b10 before starting, then 2'b01 until the end
// CSN <- 0 any time CLK != 2'b11
//
//
//
// Creator: Dan Gisselquist, Ph.D.
// Gisselquist Technology, LLC
//
////////////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2015-2017, Gisselquist Technology, LLC
//
// This program is free software (firmware): you can redistribute it and/or
// modify it under the terms of the GNU General Public License as published
// by the Free Software Foundation, either version 3 of the License, or (at
// your option) any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with this program. (It's in the $(ROOT)/doc directory. Run make with no
// target there if the PDF file isn't present.) If not, see
// <http://www.gnu.org/licenses/> for a copy.
//
// License: GPL, v3, as defined and found on www.gnu.org,
// http://www.gnu.org/licenses/gpl.html
//
//
////////////////////////////////////////////////////////////////////////////////
//
//
`define OPT_FLASH_PIPELINE
module qflashxpress(i_clk,
i_wb_cyc, i_wb_stb, i_wb_addr,
o_wb_ack, o_wb_stall, o_wb_data,
o_qspi_sck, o_qspi_cs_n, o_qspi_mod, o_qspi_dat, i_qspi_dat);
localparam AW=24-2;
input i_clk;
//
input i_wb_cyc, i_wb_stb;
input [(AW-1):0] i_wb_addr;
//
output reg o_wb_ack, o_wb_stall;
output reg [31:0] o_wb_data;
//
output wire [1:0] o_qspi_sck;
output wire o_qspi_cs_n;
output wire [1:0] o_qspi_mod;
output wire [3:0] o_qspi_dat;
input wire [3:0] i_qspi_dat;
 
//
//
// Maintenance / startup portion
//
//
reg maintenance;
reg [14:0] m_counter;
reg [1:0] m_state;
reg [1:0] m_mod;
reg m_cs_n;
reg [1:0] m_clk;
reg [31:0] m_data;
wire [3:0] m_dat;
 
initial maintenance = 1'b1;
initial m_counter = 0;
initial m_state = 2'b00;
always @(posedge i_clk)
begin
if (maintenance)
m_counter <= m_counter + 1'b1;
m_mod <= 2'b00; // SPI mode always for maintenance
case(m_state)
2'b00: begin
// Step one: wait for the flash device to initialize.
// Perhaps this is more for form than anything else,
// especially if we just loaded our configuration from
// the flash, but in case we did not--we do this anyway.
maintenance <= 1'b1;
if (m_counter[14:0]==15'h7fff) // 24000 is the limit
m_state <= 2'b01;
m_cs_n <= 1'b1;
m_clk <= 2'b11;
end
2'b01: begin
// Now that the flash has had a chance to start up, feed
// it with chip selects with no clocks. This is
// guaranteed to remove us from any XIP mode we might
// be in upon startup. We do this so that we might be
// placed into a known mode--albeit the wrong one, but
// a known one.
maintenance <= 1'b1;
//
// 1111 0000 1111 0000 1111 0000 1111 0000
// 1111 0000 1111 0000 1111 0000 1111 0000
// 1111 ==> 17 * 4 clocks, or 68 clocks in total
//
if (m_counter[14:0] == 15'd138)
m_state <= 2'b10;
m_cs_n <= 1'b0;
m_clk <= {(2){!m_counter[2]}};
m_data <= { 32'hfff0f0ff }; // EB command
m_data[31:28] <= 0; // just ... not yet
end
2'b10: begin
// Rest, before issuing our initial read command
maintenance <= 1'b1;
if (m_counter[14:0] == 15'd138 + 15'd48)
m_state <= 2'b11;
m_cs_n <= 1'b1; // Rest the interface
m_clk <= 2'b11;
m_data <= { 32'hfff0f0ff }; // EB command
end
2'b11: begin
if (m_counter[14:0] == 15'd138+15'd48+15'd10)
maintenance <= 1'b0;
m_cs_n <= 1'b0;
m_clk <= (m_clk == 2'b11)? 2'b10 : 2'b01;
if (m_clk == 2'b01) // EB QuadIO Read Cmd
m_data <= {m_data[27:0], 4'h0};
// We depend upon the non-maintenance code to provide
// our first (bogus) address, mode, dummy cycles, and
// data bits.
end
endcase
end
assign m_dat = m_data[31:28];
 
//
//
// Data / access portion
//
//
reg [21:0] busy_pipe;
reg [31:0] data_pipe;
reg pre_ack;
initial data_pipe = 0;
always @(posedge i_clk)
if (((i_wb_stb)&&(!o_wb_stall))||(maintenance))
data_pipe <= { i_wb_addr, 2'b00, 8'ha0 };
else if (o_qspi_sck == 2'b01)
data_pipe <= { data_pipe[27:0], 4'h0 };
assign o_qspi_dat = (maintenance)? m_dat : data_pipe[31:28];
 
`ifdef OPT_FLASH_PIPELINE
reg pipe_req;
 
reg [(AW-1):0] last_addr;
always @(posedge i_clk)
if ((i_wb_stb)&&(!o_wb_stall))
last_addr <= i_wb_addr;
 
initial pipe_req = 1'b0;
always @(posedge i_clk)
pipe_req <= (pre_ack)&&(i_wb_stb)
&&(last_addr + 1'b1 == i_wb_addr);
`else
wire pipe_req;
assign pipe_req = 1'b0;
`endif
 
 
initial pre_ack = 0;
always @(posedge i_clk)
if ((maintenance)||(!i_wb_cyc))
pre_ack <= 1'b0;
else if ((i_wb_stb)&&(!o_wb_stall))
pre_ack <= 1'b1;
else if ((o_wb_ack)&&(!pipe_req))
pre_ack <= 1'b0;
 
reg [43:0] clk_pipe;
initial clk_pipe = -1;
always @(posedge i_clk)
if (((i_wb_stb)&&(!o_wb_stall)&&(!pipe_req))||(maintenance))
clk_pipe <= { 2'b00, {(21){2'b01}}};
else if (((i_wb_stb)&&(!o_wb_stall))||(maintenance))
clk_pipe <= { {(8){2'b01}}, {(14){2'b11}} };
else
clk_pipe <= { clk_pipe[41:0], 2'b11 };
assign o_qspi_sck = (maintenance)? m_clk : clk_pipe[43:42];
assign o_qspi_cs_n= (maintenance)?m_cs_n : (clk_pipe[43:42] == 2'b11);
 
reg [9:0] mod_pipe;
always @(posedge i_clk)
if(((i_wb_stb)&&(!o_wb_stall)&&(!pipe_req))||(maintenance))
mod_pipe <= { 10'h0 }; // Always quad, but in/out
else
mod_pipe <= { mod_pipe[8:0], 1'b1 }; // Add input at end
assign o_qspi_mod = (maintenance) ? m_mod :(mod_pipe[9]? 2'b11:2'b10);
 
initial busy_pipe = 22'h3fffff;
always @(posedge i_clk)
if (((i_wb_stb)&&(!o_wb_stall)&&(!pipe_req))||(maintenance))
busy_pipe <= { 22'h3fffff };
else if ((i_wb_stb)&&(!o_wb_stall))
busy_pipe <= { 22'h3fc000 };
else
busy_pipe <= { busy_pipe[20:0], 1'b0 };
 
initial o_wb_stall = 1'b1;
always @(posedge i_clk)
o_wb_stall <= ((i_wb_stb)&&(!o_wb_stall))
||(busy_pipe[19])||((busy_pipe[20])&&(!pipe_req));
 
reg ack_pipe;
initial ack_pipe = 1'b0;
always @(posedge i_clk)
ack_pipe <= (pre_ack)&&(busy_pipe[20:19] == 2'b10);
initial o_wb_ack = 1'b0;
always @(posedge i_clk)
o_wb_ack <= (pre_ack)&&(ack_pipe);
 
always @(posedge i_clk)
o_wb_data <= { o_wb_data[27:0], i_qspi_dat };
 
endmodule
 
/trunk/rtl/rxuartlite.v
0,0 → 1,197
////////////////////////////////////////////////////////////////////////////////
//
// Filename: rxuartlite.v
//
// Project: wbuart32, a full featured UART with simulator
//
// Purpose: Receive and decode inputs from a single UART line.
//
//
// To interface with this module, connect it to your system clock,
// and a UART input. Set the parameter to the number of clocks per
// baud. When data becomes available, the o_wr line will be asserted
// for one clock cycle.
//
// This interface only handles 8N1 serial port communications. It does
// not handle the break, parity, or frame error conditions.
//
//
// Creator: Dan Gisselquist, Ph.D.
// Gisselquist Technology, LLC
//
////////////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2015-2017, Gisselquist Technology, LLC
//
// This program is free software (firmware): you can redistribute it and/or
// modify it under the terms of the GNU General Public License as published
// by the Free Software Foundation, either version 3 of the License, or (at
// your option) any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with this program. (It's in the $(ROOT)/doc directory. Run make with no
// target there if the PDF file isn't present.) If not, see
// <http://www.gnu.org/licenses/> for a copy.
//
// License: GPL, v3, as defined and found on www.gnu.org,
// http://www.gnu.org/licenses/gpl.html
//
//
////////////////////////////////////////////////////////////////////////////////
//
//
`define RXU_BIT_ZERO 4'h0
`define RXU_BIT_ONE 4'h1
`define RXU_BIT_TWO 4'h2
`define RXU_BIT_THREE 4'h3
`define RXU_BIT_FOUR 4'h4
`define RXU_BIT_FIVE 4'h5
`define RXU_BIT_SIX 4'h6
`define RXU_BIT_SEVEN 4'h7
// `define RXU_PARITY 4'h8 // Unused in RXUARTLITE
`define RXU_STOP 4'h8
// `define RXU_SECOND_STOP 4'ha // Unused in RXUARTLITE
// Unused 4'hb
// Unused 4'hc
// `define RXU_BREAK 4'hd // Unused in RXUARTLITE
// `define RXU_RESET_IDLE 4'he // Unused in RXUARTLITE
`define RXU_IDLE 4'hf
 
module rxuartlite(i_clk, i_uart_rx, o_wr, o_data);
parameter [23:0] CLOCKS_PER_BAUD = 24'd868;
input i_clk;
input i_uart_rx;
output reg o_wr;
output reg [7:0] o_data;
 
 
wire [23:0] clocks_per_baud, half_baud;
reg [3:0] state;
 
assign half_baud = { 1'b0, CLOCKS_PER_BAUD[23:1] } - 24'h1;
reg [23:0] baud_counter;
reg zero_baud_counter;
 
 
// Since this is an asynchronous receiver, we need to register our
// input a couple of clocks over to avoid any problems with
// metastability. We do that here, and then ignore all but the
// ck_uart wire.
reg q_uart, qq_uart, ck_uart;
initial q_uart = 1'b0;
initial qq_uart = 1'b0;
initial ck_uart = 1'b0;
always @(posedge i_clk)
begin
q_uart <= i_uart_rx;
qq_uart <= q_uart;
ck_uart <= qq_uart;
end
 
// Keep track of the number of clocks since the last change.
//
// This is used to determine if we are in either a break or an idle
// condition, as discussed further below.
reg [23:0] chg_counter;
initial chg_counter = 24'h00;
always @(posedge i_clk)
if (qq_uart != ck_uart)
chg_counter <= 24'h00;
else
chg_counter <= chg_counter + 1;
 
// Are we in the middle of a baud iterval? Specifically, are we
// in the middle of a start bit? Set this to high if so. We'll use
// this within our state machine to transition out of the IDLE
// state.
reg half_baud_time;
initial half_baud_time = 0;
always @(posedge i_clk)
half_baud_time <= (~ck_uart)&&(chg_counter >= half_baud);
 
 
initial state = `RXU_IDLE;
always @(posedge i_clk)
begin
if (state == `RXU_IDLE)
begin // Idle state, independent of baud counter
// By default, just stay in the IDLE state
state <= `RXU_IDLE;
if ((~ck_uart)&&(half_baud_time))
// UNLESS: We are in the center of a valid
// start bit
state <= `RXU_BIT_ZERO;
end else if (zero_baud_counter)
begin
if (state < `RXU_STOP)
// Data arrives least significant bit first.
// By the time this is clocked in, it's what
// you'll have.
state <= state + 1;
else // Wait for the next character
state <= `RXU_IDLE;
end
end
 
// Data bit capture logic.
//
// This is drastically simplified from the state machine above, based
// upon: 1) it doesn't matter what it is until the end of a captured
// byte, and 2) the data register will flush itself of any invalid
// data in all other cases. Hence, let's keep it real simple.
reg [7:0] data_reg;
always @(posedge i_clk)
if (zero_baud_counter)
data_reg <= { ck_uart, data_reg[7:1] };
 
// Our data bit logic doesn't need nearly the complexity of all that
// work above. Indeed, we only need to know if we are at the end of
// a stop bit, in which case we copy the data_reg into our output
// data register, o_data, and tell others (for one clock) that data is
// available.
//
initial o_data = 8'h00;
reg pre_wr;
initial pre_wr = 1'b0;
always @(posedge i_clk)
if ((zero_baud_counter)&&(state == `RXU_STOP))
begin
o_wr <= 1'b1;
o_data <= data_reg;
end else
o_wr <= 1'b0;
 
// The baud counter
//
// This is used as a "clock divider" if you will, but the clock needs
// to be reset before any byte can be decoded. In all other respects,
// we set ourselves up for clocks_per_baud counts between baud
// intervals.
always @(posedge i_clk)
if ((zero_baud_counter)|||(state == `RXU_IDLE))
baud_counter <= CLOCKS_PER_BAUD-1'b1;
else
baud_counter <= baud_counter-1'b1;
 
// zero_baud_counter
//
// Rather than testing whether or not (baud_counter == 0) within our
// (already too complicated) state transition tables, we use
// zero_baud_counter to pre-charge that test on the clock
// before--cleaning up some otherwise difficult timing dependencies.
initial zero_baud_counter = 1'b0;
always @(posedge i_clk)
if (state == `RXU_IDLE)
zero_baud_counter <= 1'b0;
else
zero_baud_counter <= (baud_counter == 24'h01);
 
 
endmodule
 
 
/trunk/rtl/spio.v
4,7 → 4,12
//
// Project: CMod S6 System on a Chip, ZipCPU demonstration project
//
// Purpose:
// Purpose: To provide a bare minimum level of access to the two on-board
// buttons, the 4 LED's, and the external 4x4 keypad. This
// routine does *nothing* to debounce either buttons or keypad. Any such
// debouncing *must* be done in software. As with the rest of the S6
// project, the goal is to keep the logic small and simple, and this
// module is no different.
//
// With the USB cord on top, the board facing you, LED[0] is on the left.
//
72,6 → 77,12
reg [3:0] x_kp_row, r_kp_row;
reg [1:0] x_btn, r_btn;
 
initial x_kp_row = 4'h0;
initial r_kp_row = 4'b0;
initial x_btn = 2'b0;
initial r_btn = 2'b0;
initial o_kp_int = 1'b0;
initial o_btn_int = 1'b0;
always @(posedge i_clk)
begin
x_kp_row <= i_kp_row;
/trunk/rtl/toplevel.v
50,6 → 50,7
////////////////////////////////////////////////////////////////////////////////
//
//
`define LOWLOGIC_FLASH
module toplevel(i_clk_8mhz,
o_qspi_cs_n, o_qspi_sck, io_qspi_dat,
i_btn, o_led, o_pwm, o_pwm_shutdown_n, o_pwm_gain,
90,7 → 91,7
//
// Generate a usable clock for the rest of the board to run at.
//
wire ck_zero_0, clk_s;
wire ck_zero_0, clk_s, clk_sn;
 
// Clock frequency = (20 / 2) * 8Mhz = 80 MHz
// Clock period = 12.5 ns
111,6 → 112,7
.CLK0(ck_zero_0),
.CLKFB(ck_zero_0),
.CLKFX(clk_s),
.CLKFX180(clk_sn),
.PSEN(1'b0),
.RST(1'b0));
 
129,6 → 131,12
// the busmaster module (i.e. the interconnect) is all that needs
// to be changed: either to add more devices, or to remove them.
//
`ifdef LOWLOGIC_FLASH
wire [1:0] qspi_sck;
`else
wire qspi_sck;
`endif
wire qspi_cs_n;
wire [3:0] qspi_dat;
wire [1:0] qspi_bmod;
wire [15:0] w_gpio;
139,7 → 147,7
// Serial port wires
i_uart, o_uart_rts_n, o_uart, i_uart_cts_n,
// SPI/SD-card flash
o_qspi_cs_n, o_qspi_sck, qspi_dat, io_qspi_dat, qspi_bmod,
qspi_cs_n, qspi_sck, qspi_dat, io_qspi_dat, qspi_bmod,
// Board lights and switches
i_btn, o_led, o_pwm, { o_pwm_shutdown_n, o_pwm_gain },
// Keypad connections
157,9 → 165,41
// port with one wire in and one wire out. This utilizes our
// control wires (qspi_bmod) to set the output lines appropriately.
//
assign io_qspi_dat = (~qspi_bmod[1])?({2'b11,1'bz,qspi_dat[0]})
//
// 2'b0? -- Normal SPI
// 2'b10 -- Quad Output
// 2'b11 -- Quad Input
`ifdef LOWLOGIC_FLASH
reg r_qspi_cs_n;
reg [1:0] r_qspi_bmod;
reg [3:0] r_qspi_dat, r_qspi_z;
reg [1:0] r_qspi_sck;
always @(posedge clk_s)
r_qspi_sck <= qspi_sck;
xoddr xqspi_sck({clk_s, clk_sn}, r_qspi_sck, o_qspi_sck);
initial r_qspi_cs_n = 1'b1;
initial r_qspi_z = 4'b1101;
always @(posedge clk_s)
begin
r_qspi_dat <= (qspi_bmod[1]) ? qspi_dat:{ 3'b111, qspi_dat[0]};
r_qspi_z <= (!qspi_bmod[1])? 4'b1101
: ((qspi_bmod[0]) ? 4'h0 : 4'hf);
r_qspi_cs_n <= qspi_cs_n;
end
 
assign o_qspi_cs_n = r_qspi_cs_n;
assign io_qspi_dat[0] = (r_qspi_z[0]) ? r_qspi_dat[0] : 1'bz;
assign io_qspi_dat[1] = (r_qspi_z[1]) ? r_qspi_dat[1] : 1'bz;
assign io_qspi_dat[2] = (r_qspi_z[2]) ? r_qspi_dat[2] : 1'bz;
assign io_qspi_dat[3] = (r_qspi_z[3]) ? r_qspi_dat[3] : 1'bz;
`else
assign io_qspi_dat = (!qspi_bmod[1])?({2'b11,1'bz,qspi_dat[0]})
:((qspi_bmod[0])?(4'bzzzz):(qspi_dat[3:0]));
 
assign o_qspi_cs_n = qspi_cs_n;
assign o_qspi_sck = qspi_sck;
`endif // LOWLOGIC_FLASH
 
//
// I2C support
//
/trunk/rtl/txuartlite.v
0,0 → 1,206
////////////////////////////////////////////////////////////////////////////////
//
// Filename: txuartlite.v
//
// Project: wbuart32, a full featured UART with simulator
//
// Purpose: Transmit outputs over a single UART line. This particular UART
// implementation has been extremely simplified: it does not handle
// generating break conditions, nor does it handle anything other than the
// 8N1 (8 data bits, no parity, 1 stop bit) UART sub-protocol.
//
// To interface with this module, connect it to your system clock, and
// pass it the byte of data you wish to transmit. Strobe the i_wr line
// high for one cycle, and your data will be off. Wait until the 'o_busy'
// line is low before strobing the i_wr line again--this implementation
// has NO BUFFER, so strobing i_wr while the core is busy will just
// get ignored. The output will be placed on the o_txuart output line.
//
// (I often set both data and strobe on the same clock, and then just leave
// them set until the busy line is low. Then I move on to the next piece
// of data.)
//
// Creator: Dan Gisselquist, Ph.D.
// Gisselquist Technology, LLC
//
////////////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2015-2017, Gisselquist Technology, LLC
//
// This program is free software (firmware): you can redistribute it and/or
// modify it under the terms of the GNU General Public License as published
// by the Free Software Foundation, either version 3 of the License, or (at
// your option) any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with this program. (It's in the $(ROOT)/doc directory. Run make with no
// target there if the PDF file isn't present.) If not, see
// <http://www.gnu.org/licenses/> for a copy.
//
// License: GPL, v3, as defined and found on www.gnu.org,
// http://www.gnu.org/licenses/gpl.html
//
//
////////////////////////////////////////////////////////////////////////////////
//
//
`define TXU_BIT_ZERO 4'h0
`define TXU_BIT_ONE 4'h1
`define TXU_BIT_TWO 4'h2
`define TXU_BIT_THREE 4'h3
`define TXU_BIT_FOUR 4'h4
`define TXU_BIT_FIVE 4'h5
`define TXU_BIT_SIX 4'h6
`define TXU_BIT_SEVEN 4'h7
`define TXU_STOP 4'h8
`define TXU_IDLE 4'hf
//
//
module txuartlite(i_clk, i_wr, i_data, o_uart_tx, o_busy);
parameter [23:0] CLOCKS_PER_BAUD = 24'd868;
input i_clk;
input i_wr;
input [7:0] i_data;
// And the UART input line itself
output reg o_uart_tx;
// A line to tell others when we are ready to accept data. If
// (i_wr)&&(!o_busy) is ever true, then the core has accepted a byte
// for transmission.
output wire o_busy;
 
reg [23:0] baud_counter;
reg [3:0] state;
reg [7:0] lcl_data;
reg r_busy, zero_baud_counter;
 
initial r_busy = 1'b1;
initial state = `TXU_IDLE;
initial lcl_data= 8'h0;
always @(posedge i_clk)
begin
if (!zero_baud_counter)
// r_busy needs to be set coming into here
r_busy <= 1'b1;
else if (state == `TXU_IDLE) // STATE_IDLE
begin
r_busy <= 1'b0;
if ((i_wr)&&(!r_busy))
begin // Immediately start us off with a start bit
r_busy <= 1'b1;
state <= `TXU_BIT_ZERO;
end
end else begin
// One clock tick in each of these states ...
r_busy <= 1'b1;
if (state <=`TXU_STOP) // start bit, 8-d bits, stop-b
state <= state + 1;
else
state <= `TXU_IDLE;
end
end
 
// o_busy
//
// This is a wire, designed to be true is we are ever busy above.
// originally, this was going to be true if we were ever not in the
// idle state. The logic has since become more complex, hence we have
// a register dedicated to this and just copy out that registers value.
assign o_busy = (r_busy);
 
 
// lcl_data
//
// This is our working copy of the i_data register which we use
// when transmitting. It is only of interest during transmit, and is
// allowed to be whatever at any other time. Hence, if r_busy isn't
// true, we can always set it. On the one clock where r_busy isn't
// true and i_wr is, we set it and r_busy is true thereafter.
// Then, on any zero_baud_counter (i.e. change between baud intervals)
// we simple logically shift the register right to grab the next bit.
initial lcl_data = 8'hff;
always @(posedge i_clk)
if ((i_wr)&&(!r_busy))
lcl_data <= i_data;
else if (zero_baud_counter)
lcl_data <= { 1'b1, lcl_data[7:1] };
 
// o_uart_tx
//
// This is the final result/output desired of this core. It's all
// centered about o_uart_tx. This is what finally needs to follow
// the UART protocol.
//
initial o_uart_tx = 1'b1;
always @(posedge i_clk)
if ((i_wr)&&(!r_busy))
o_uart_tx <= 1'b0; // Set the start bit on writes
else if (zero_baud_counter) // Set the data bit.
o_uart_tx <= lcl_data[0];
 
 
// All of the above logic is driven by the baud counter. Bits must last
// CLOCKS_PER_BAUD in length, and this baud counter is what we use to
// make certain of that.
//
// The basic logic is this: at the beginning of a bit interval, start
// the baud counter and set it to count CLOCKS_PER_BAUD. When it gets
// to zero, restart it.
//
// However, comparing a 28'bit number to zero can be rather complex--
// especially if we wish to do anything else on that same clock. For
// that reason, we create "zero_baud_counter". zero_baud_counter is
// nothing more than a flag that is true anytime baud_counter is zero.
// It's true when the logic (above) needs to step to the next bit.
// Simple enough?
//
// I wish we could stop there, but there are some other (ugly)
// conditions to deal with that offer exceptions to this basic logic.
//
// 1. When the user has commanded a BREAK across the line, we need to
// wait several baud intervals following the break before we start
// transmitting, to give any receiver a chance to recognize that we are
// out of the break condition, and to know that the next bit will be
// a stop bit.
//
// 2. A reset is similar to a break condition--on both we wait several
// baud intervals before allowing a start bit.
//
// 3. In the idle state, we stop our counter--so that upon a request
// to transmit when idle we can start transmitting immediately, rather
// than waiting for the end of the next (fictitious and arbitrary) baud
// interval.
//
// When (i_wr)&&(!r_busy)&&(state == `TXU_IDLE) then we're not only in
// the idle state, but we also just accepted a command to start writing
// the next word. At this point, the baud counter needs to be reset
// to the number of CLOCKS_PER_BAUD, and zero_baud_counter set to zero.
//
// The logic is a bit twisted here, in that it will only check for the
// above condition when zero_baud_counter is false--so as to make
// certain the STOP bit is complete.
initial zero_baud_counter = 1'b0;
initial baud_counter = 24'h05;
always @(posedge i_clk)
begin
zero_baud_counter <= (baud_counter == 24'h01);
if (state == `TXU_IDLE)
begin
baud_counter <= 24'h0;
zero_baud_counter <= 1'b1;
if ((i_wr)&&(!r_busy))
begin
baud_counter <= CLOCKS_PER_BAUD - 24'h01;
zero_baud_counter <= 1'b0;
end
end else if (!zero_baud_counter)
baud_counter <= baud_counter - 24'h01;
else
baud_counter <= CLOCKS_PER_BAUD - 24'h01;
end
endmodule
 
/trunk/rtl/wbqspiflashp.v
8,7 → 8,7
// provides a writable interface to the flash whereas the other
// is configured to be read only.
//
// Creator: Dan Gisselquist
// Creator: Dan Gisselquist, Ph.D.
// Gisselquist Technology, LLC
//
////////////////////////////////////////////////////////////////////////////////
/trunk/rtl/wbscope.v
11,7 → 11,7
// reset, the scope records a copy of the input data every time the clock
// ticks with the circuit enabled. That is, it records these values up
// until the trigger. Once the trigger goes high, the scope will record
// for bw_holdoff more counts before stopping. Values may then be read
// for br_holdoff more counts before stopping. Values may then be read
// from the buffer, oldest to most recent. After reading, the scope may
// then be reset for another run.
//
87,9 → 87,11
i_wb_clk, i_wb_cyc, i_wb_stb, i_wb_we, i_wb_addr, i_wb_data,
o_wb_ack, o_wb_stall, o_wb_data,
o_interrupt);
parameter LGMEM = 5'd10, BUSW = 32, SYNCHRONOUS=1,
DEFAULT_HOLDOFF = ((1<<(LGMEM-1))-4),
HOLDOFFBITS = 20;
parameter [4:0] LGMEM = 5'd10;
parameter BUSW = 32;
parameter [0:0] SYNCHRONOUS=1;
parameter HOLDOFFBITS = 20;
parameter [(HOLDOFFBITS-1):0] DEFAULT_HOLDOFF = ((1<<(LGMEM-1))-4);
// The input signals that we wish to record
input i_clk, i_ce, i_trigger;
input [(BUSW-1):0] i_data;
110,23 → 112,25
// Our status/config register
wire bw_reset_request, bw_manual_trigger,
bw_disable_trigger, bw_reset_complete;
reg [22:0] br_config;
wire [(HOLDOFFBITS-1):0] bw_holdoff;
initial br_config = DEFAULT_HOLDOFF;
reg [2:0] br_config;
reg [(HOLDOFFBITS-1):0] br_holdoff;
initial br_config = 3'b0;
initial br_holdoff = DEFAULT_HOLDOFF;
always @(posedge i_wb_clk)
if ((i_wb_stb)&&(~i_wb_addr))
if ((i_wb_stb)&&(!i_wb_addr))
begin
if (i_wb_we)
begin
br_config <= { i_wb_data[31],
(i_wb_data[27]),
i_wb_data[26],
i_wb_data[19:0] };
i_wb_data[27],
i_wb_data[26] };
br_holdoff = i_wb_data[(HOLDOFFBITS-1):0];
end
end else if (bw_reset_complete)
br_config[22] <= 1'b1;
assign bw_reset_request = (~br_config[22]);
assign bw_manual_trigger = (br_config[21]);
assign bw_disable_trigger = (br_config[20]);
assign bw_holdoff = br_config[(HOLDOFFBITS-1):0];
br_config[2] <= 1'b1;
assign bw_reset_request = (!br_config[2]);
assign bw_manual_trigger = (br_config[1]);
assign bw_disable_trigger = (br_config[0]);
 
wire dw_reset, dw_manual_trigger, dw_disable_trigger;
generate
180,8 → 184,7
reg dr_triggered, dr_primed;
wire dw_trigger;
assign dw_trigger = (dr_primed)&&(
((i_trigger)&&(~dw_disable_trigger))
||(dr_triggered)
((i_trigger)&&(!dw_disable_trigger))
||(dw_manual_trigger));
initial dr_triggered = 1'b0;
always @(posedge i_clk)
196,12 → 199,6
// Writes take place on the data clock
// The counter is unsigned
(* ASYNC_REG="TRUE" *) reg [(HOLDOFFBITS-1):0] counter;
reg less_than_holdoff;
always @(posedge i_clk)
if (dw_reset)
less_than_holdoff <= 1'b1;
else if (i_ce)
less_than_holdoff <= (counter < bw_holdoff);
 
reg dr_stopped;
initial dr_stopped = 1'b0;
209,7 → 206,7
always @(posedge i_clk)
if (dw_reset)
counter <= 0;
else if ((i_ce)&&(dr_triggered)&&(~dr_stopped))
else if ((i_ce)&&(dr_triggered)&&(!dr_stopped))
begin // MUST BE a < and not <=, so that we can keep this w/in
// 20 bits. Else we'd need to add a bit to comparison
// here.
216,12 → 213,10
counter <= counter + 1'b1;
end
always @(posedge i_clk)
if ((~dr_triggered)||(dw_reset))
if ((!dr_triggered)||(dw_reset))
dr_stopped <= 1'b0;
else if (i_ce)
dr_stopped <= (counter+1'b1 >= bw_holdoff);
else
dr_stopped <= (counter >= bw_holdoff);
dr_stopped <= (counter >= br_holdoff);
 
//
// Actually do our writes to memory. Record, via 'primed' when
296,10 → 291,10
if ((bw_reset_request)
||((bw_cyc_stb)&&(i_wb_addr)&&(i_wb_we)))
raddr <= 0;
else if ((bw_cyc_stb)&&(i_wb_addr)&&(~i_wb_we)&&(bw_stopped))
else if ((bw_cyc_stb)&&(i_wb_addr)&&(!i_wb_we)&&(bw_stopped))
raddr <= raddr + 1'b1; // Data read, when stopped
 
if ((bw_cyc_stb)&&(~i_wb_we))
if ((bw_cyc_stb)&&(!i_wb_we))
begin // Read from the bus
br_wb_ack <= 1'b1;
end else if ((bw_cyc_stb)&&(i_wb_we))
312,11 → 307,11
reg [31:0] nxt_mem;
always @(posedge i_wb_clk)
nxt_mem <= mem[raddr+waddr+
(((bw_cyc_stb)&&(i_wb_addr)&&(~i_wb_we)) ?
(((bw_cyc_stb)&&(i_wb_addr)&&(!i_wb_we)) ?
{{(LGMEM-1){1'b0}},1'b1} : { (LGMEM){1'b0}} )];
 
wire [19:0] full_holdoff;
assign full_holdoff[(HOLDOFFBITS-1):0] = bw_holdoff;
assign full_holdoff[(HOLDOFFBITS-1):0] = br_holdoff;
generate if (HOLDOFFBITS < 20)
assign full_holdoff[19:(HOLDOFFBITS)] = 0;
endgenerate
324,7 → 319,7
wire [4:0] bw_lgmem;
assign bw_lgmem = LGMEM;
always @(posedge i_wb_clk)
if (~i_wb_addr) // Control register read
if (!i_wb_addr) // Control register read
o_wb_data <= { bw_reset_request,
bw_stopped,
bw_triggered,
334,7 → 329,7
(raddr == {(LGMEM){1'b0}}),
bw_lgmem,
full_holdoff };
else if (~bw_stopped) // read, prior to stopping
else if (!bw_stopped) // read, prior to stopping
o_wb_data <= i_data;
else // if (i_wb_addr) // Read from FIFO memory
o_wb_data <= nxt_mem; // mem[raddr+waddr];
344,12 → 339,12
 
reg br_level_interrupt;
initial br_level_interrupt = 1'b0;
assign o_interrupt = (bw_stopped)&&(~bw_disable_trigger)
&&(~br_level_interrupt);
assign o_interrupt = (bw_stopped)&&(!bw_disable_trigger)
&&(!br_level_interrupt);
always @(posedge i_wb_clk)
if ((bw_reset_complete)||(bw_reset_request))
br_level_interrupt<= 1'b0;
else
br_level_interrupt<= (bw_stopped)&&(~bw_disable_trigger);
br_level_interrupt<= (bw_stopped)&&(!bw_disable_trigger);
 
endmodule
/trunk/rtl/xoddr.v
0,0 → 1,66
////////////////////////////////////////////////////////////////////////////////
//
// Filename: xoddr.v
//
// Project: CMod S6 System on a Chip, ZipCPU demonstration project
//
// Purpose: When outputting a clock, Xilinx recommends using the ODDR
// primitive to insure the clock's stability. This is a simple
// wrapper around that primitive, although it does cost one delay.
// For the QSPI, this helps to make certain that as much of the logic
// delay as possible has been removed from the path--to get the full
// 80MHz speed of our clock. (The QSPI device on the S6 can run at 108MHz,
// here, we will run it at 80MHz.)
//
// Creator: Dan Gisselquist, Ph.D.
// Gisselquist Technology, LLC
//
////////////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2015-2017, Gisselquist Technology, LLC
//
// This program is free software (firmware): you can redistribute it and/or
// modify it under the terms of the GNU General Public License as published
// by the Free Software Foundation, either version 3 of the License, or (at
// your option) any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with this program. (It's in the $(ROOT)/doc directory. Run make with no
// target there if the PDF file isn't present.) If not, see
// <http://www.gnu.org/licenses/> for a copy.
//
// License: GPL, v3, as defined and found on www.gnu.org,
// http://www.gnu.org/licenses/gpl.html
//
//
////////////////////////////////////////////////////////////////////////////////
//
//
module xoddr(i_clk, i_v, o_pin);
input [1:0] i_clk;
input [1:0] i_v;
output o_pin;
 
wire w_internal;
reg last;
 
ODDR2 #(
.DDR_ALIGNMENT("C0"),
.INIT(1'b1),
.SRTYPE("ASYNC")
) ODDRi(
.Q(o_pin),
.CE(1'b1),
.C0(i_clk[0]),
.D0(i_v[0]), // Negative clock edge (goes first)
.C1(i_clk[1]),
.D1(i_v[1]), // Positive clock edge
.R(1'b0),
.S(1'b0));
 
endmodule

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.