`define __3STAGE__
// read-modify-write cycle:
// Generate RMW cycles when writing in the memory. This option basically
// makes the read and write cycle symmetric and may work better in the cases
// when the 32-bit memory does not support separate write enables for
// separate 16-bit and 8-bit words. Typically, the RMW cycle results in a
// decrease of 5% in the performance (not the clock, but the instruction
// pipeline eficiency) due to memory wait-states.
// Additional note: the RMW cycle is required for -O3 compilation!
//`define __RMW_CYCLE__
// muti-threading support:
// Decreases clock performance by 10% (90MHz), but enables two contexts
// (threads) in the core. They start in the same code, but the "interrupt"
// handling is locked in a separate loop and the conext switch is always
// delayed until the next pipeline flush, in order to decrease the
// performance impact. Note: threading is currently supported only in the
// 3-stage pipeline version.
// Decreases clock performance by 20% (80MHz), but enables two contexts
// (threads) in the core. The threads work in symmetrical way, which means
// that they will start with the same exactly core parameters (same initial
// PC, same initial SP, etc). The boot.s code is designed to handle this
// difference and set each thread to different applications.
// Notes:
// a) threading is currently supported only in the 3-stage pipeline version.
// b) the old experimental "interrupt mode" was removed, which means that
// the multi-thread mode does not make anything "visible" other than
// increment the gpio register.
// c) the threading in the non interrupt mode just shares the core 50%/50%,
// in a way that the single-thread performance is reduced.
//`define __THREADING__
// The performance measurement can be done in the simulation level by
// eabling the __PERFMETER__ define, in order to check how the clock cycles
// are used in the core. The value defines how many clocks are computed
// before print the result.
// are used in the core. The report is displayed when the FINISH_REQ signal
// is actived by the UART.
//`define __PERFMETER__ 70000
`define __PERFMETER__
// mac instruction:
`define __RV32E__
// initial PC and SP
// it is possible program the initial PC and SP. Typically, the PC is set
// to address 0, representing the start of ROM memory and the SP is set to
// the final of RAM memory. In the linker, the start of ROM memory matches
// with the .text area, which is defined in the boot.c code and the start of
// RAM memory matches with the .data and other volatile data, in a way that
// the stack can be positioned in the top of RAM and does not match with the
// .data.
`define __HARVARD__
// full harvard architecture:
// When defined, enforses that the instruction and data buses are connected
// be better allocated, but in this case is not possible protect the .text
// area as in the case of separate memory banks.
`define __FLEXBUZZ__
//`define __HARVARD__
// flexbuzz interface (experimental):
// in order to insert wait-states and perform the required multiplexing to fit
// the DLEN operand size in the data bus width available.
`define __FLEXBUZZ__
// initial PC and SP
// it is possible program the initial PC and SP. Typically, the PC is set
// to address 0, representing the start of ROM memory and the SP is set to
// the final of RAM memory. In the linker, the start of ROM memory matches
// with the .text area, which is defined in the boot.c code and the start of
// RAM memory matches with the .data and other volatile data, in a way that
// the stack can be positioned in the top of RAM and does not match with the
// .data.
`define __RESETPC__ 32'd0
`define __RESETSP__ 32'd8192
input RES, // reset
input HLT, // halt
`ifdef __THREADING__
input IREQ, // irq req
//`ifdef __THREADING__
// input IREQ, // irq req
input [31:0] IDATA, // instruction data bus
output [31:0] IADDR, // instruction addr bus
output WR, // write enable
output RD, // read enable
output [3:0] DEBUG // old-school osciloscope based debug! :)
wire [31:0] ALL1 = -1;
`ifdef __THREADING__
reg XMODE = 0; // 0 = user, 1 = exception
reg XMODE = 0; // thread ptr
// pre-decode: IDATA is break apart as described in the RV32I specification
wire JREQ = (JAL||JALR||BMUX);
wire [31:0] JVAL = JALR ? DADDR : PC+SIMM; // SIMM + (JALR ? U1REG : PC);
`ifdef __PERFMETER__
integer clocks=0, user=0, super=0, halt=0, flush=0;
integer clocks=0, thread0=0, thread1=0, load=0, store=0, flush=0, halt=0;
always@(posedge CLK)
clocks = clocks+1;
if(SCC) store = store+1;
else if(LCC) load = load +1;
else halt = halt +1;
`ifdef __THREADING__
if(XMODE==0 && !HLT && !FLUSH) user = user +1;
if(XMODE==1 && !HLT && !FLUSH) super = super+1;
if(XMODE==0) thread0 = thread0+1;
if(XMODE==1) thread1 = thread1+1;
if(!HLT && !FLUSH) user = user +1;
thread0 = thread0 +1;
if(HLT) halt=halt+1;
if(FLUSH) flush=flush+1;
if(clocks && clocks%`__PERFMETER__==0)
$display("%d clocks: %0d%% user, %0d%% super, %0d%% ws, %0d%% flush",
$display("DarkRISCV Pipeline Report:");
$display("core0 clocks: %0d",clocks);
$display("core0 running: %0d%% (%0d%% thread0, %0d%% thread1)",
$display("core0 halted: %0d%% (%0d%% load, %0d%% store, %0d%% busy)",
$display("core0 stalled: %0d%%",100.0*flush/clocks);
always@(posedge CLK)
NXPC2[XMODE]+4; // normal flow
XMODE <= XRES ? 0 : HLT ? XMODE : // reset and halt
XMODE==0&& IREQ&&(JAL||JALR||BMUX) ? 1 : // wait pipeflush to switch to irq
XMODE==1&&!IREQ&&(JAL||JALR||BMUX) ? 0 : XMODE; // wait pipeflush to return from irq
XMODE==0/*&& IREQ*/&&(JAL||JALR||BMUX) ? 1 : // wait pipeflush to switch to irq
XMODE==1/*&&!IREQ*/&&(JAL||JALR||BMUX) ? 0 : XMODE; // wait pipeflush to return from irq
NXPC <= /*XRES ? `__RESETPC__ :*/ HLT ? NXPC : NXPC2;
reg [1:0] DACK = 0;
wire WHIT = 1;
wire DHIT = !((RD||WR) && DACK!=1); // the WR operatio does not need ws. in this config.
wire DHIT = !((RD
`ifdef __RMW_CYCLE__
||WR // worst code ever! but it is 3:12am...
) && DACK!=1); // the WR operatio does not need ws. in this config.
always@(posedge CLK) // stage #1.0
DACK <= RES ? 0 : DACK ? DACK-1 : (RD||WR) ? 1 : 0; // wait-states
DACK <= RES ? 0 : DACK ? DACK-1 : (RD
`ifdef __RMW_CYCLE__
||WR // 2nd worst code ever!
) ? 1 : 0; // wait-states
always@(posedge CLK)
`ifdef __3STAGE__
`ifdef __RMW_CYCLE__
// read-modify-write operation w/ 1 wait-state:
// write-only operation w/ 0 wait-states:
`ifdef __HARVARD__
if(WR&&DADDR[31]==0&&/*DADDR[12]==1&&*/BE[3]) RAM[DADDR[11:2]][3 * 8 + 7: 3 * 8] <= DATAO[3 * 8 + 7: 3 * 8];
if(WR&&DADDR[31]==0&&/*DADDR[12]==1&&*/BE[2]) RAM[DADDR[11:2]][2 * 8 + 7: 2 * 8] <= DATAO[2 * 8 + 7: 2 * 8];
if(WR&&DADDR[31]==0&&/*DADDR[12]==1&&*/BE[1]) RAM[DADDR[11:2]][1 * 8 + 7: 1 * 8] <= DATAO[1 * 8 + 7: 1 * 8];
if(WR&&DADDR[31]==0&&/*DADDR[12]==1&&*/BE[0]) RAM[DADDR[11:2]][0 * 8 + 7: 0 * 8] <= DATAO[0 * 8 + 7: 0 * 8];
if(!HLT&&WR&&DADDR[31]==0&&/*DADDR[12]==1&&*/BE[3]) RAM[DADDR[11:2]][3 * 8 + 7: 3 * 8] <= DATAO[3 * 8 + 7: 3 * 8];
if(!HLT&&WR&&DADDR[31]==0&&/*DADDR[12]==1&&*/BE[2]) RAM[DADDR[11:2]][2 * 8 + 7: 2 * 8] <= DATAO[2 * 8 + 7: 2 * 8];
if(!HLT&&WR&&DADDR[31]==0&&/*DADDR[12]==1&&*/BE[1]) RAM[DADDR[11:2]][1 * 8 + 7: 1 * 8] <= DATAO[1 * 8 + 7: 1 * 8];
if(!HLT&&WR&&DADDR[31]==0&&/*DADDR[12]==1&&*/BE[0]) RAM[DADDR[11:2]][0 * 8 + 7: 0 * 8] <= DATAO[0 * 8 + 7: 0 * 8];
if(WR&&DADDR[31]==0&&/*DADDR[12]==1&&*/BE[3]) MEM[DADDR[12:2]][3 * 8 + 7: 3 * 8] <= DATAO[3 * 8 + 7: 3 * 8];
if(WR&&DADDR[31]==0&&/*DADDR[12]==1&&*/BE[2]) MEM[DADDR[12:2]][2 * 8 + 7: 2 * 8] <= DATAO[2 * 8 + 7: 2 * 8];
if(WR&&DADDR[31]==0&&/*DADDR[12]==1&&*/BE[1]) MEM[DADDR[12:2]][1 * 8 + 7: 1 * 8] <= DATAO[1 * 8 + 7: 1 * 8];
if(WR&&DADDR[31]==0&&/*DADDR[12]==1&&*/BE[0]) MEM[DADDR[12:2]][0 * 8 + 7: 0 * 8] <= DATAO[0 * 8 + 7: 0 * 8];
if(!HLT&&WR&&DADDR[31]==0&&/*DADDR[12]==1&&*/BE[3]) MEM[DADDR[12:2]][3 * 8 + 7: 3 * 8] <= DATAO[3 * 8 + 7: 3 * 8];
if(!HLT&&WR&&DADDR[31]==0&&/*DADDR[12]==1&&*/BE[2]) MEM[DADDR[12:2]][2 * 8 + 7: 2 * 8] <= DATAO[2 * 8 + 7: 2 * 8];
if(!HLT&&WR&&DADDR[31]==0&&/*DADDR[12]==1&&*/BE[1]) MEM[DADDR[12:2]][1 * 8 + 7: 1 * 8] <= DATAO[1 * 8 + 7: 1 * 8];
if(!HLT&&WR&&DADDR[31]==0&&/*DADDR[12]==1&&*/BE[0]) MEM[DADDR[12:2]][0 * 8 + 7: 0 * 8] <= DATAO[0 * 8 + 7: 0 * 8];
`ifdef __THREADING__
//`ifdef __THREADING__
input RXD, // UART recv line
output TXD, // UART xmit line
output reg FINISH_REQ = 0,
output [3:0] DEBUG // osc debug
if(DATAI[15:8]==">") // prompt '>'
$display(" no UART input, finishing simulation...");
$display(" no UART input, end simulation request...");
UART_XREQ <= !UART_XACK; // activate UART!

