URL
https://opencores.org/ocsvn/darkriscv/darkriscv/trunk
Subversion Repositories darkriscv
Compare Revisions
- This comparison shows the changes necessary to convert path
/darkriscv/trunk/rtl
- from Rev 2 to Rev 4
- ↔ Reverse comparison
Rev 2 → Rev 4
/config.vh
66,14 → 66,32
|
`define __3STAGE__ |
|
// read-modify-write cycle: |
// |
// Generate RMW cycles when writing in the memory. This option basically |
// makes the read and write cycle symmetric and may work better in the cases |
// when the 32-bit memory does not support separate write enables for |
// separate 16-bit and 8-bit words. Typically, the RMW cycle results in a |
// decrease of 5% in the performance (not the clock, but the instruction |
// pipeline eficiency) due to memory wait-states. |
// Additional note: the RMW cycle is required for -O3 compilation! |
|
//`define __RMW_CYCLE__ |
|
// muti-threading support: |
// |
// Decreases clock performance by 10% (90MHz), but enables two contexts |
// (threads) in the core. They start in the same code, but the "interrupt" |
// handling is locked in a separate loop and the conext switch is always |
// delayed until the next pipeline flush, in order to decrease the |
// performance impact. Note: threading is currently supported only in the |
// 3-stage pipeline version. |
// Decreases clock performance by 20% (80MHz), but enables two contexts |
// (threads) in the core. The threads work in symmetrical way, which means |
// that they will start with the same exactly core parameters (same initial |
// PC, same initial SP, etc). The boot.s code is designed to handle this |
// difference and set each thread to different applications. |
// Notes: |
// a) threading is currently supported only in the 3-stage pipeline version. |
// b) the old experimental "interrupt mode" was removed, which means that |
// the multi-thread mode does not make anything "visible" other than |
// increment the gpio register. |
// c) the threading in the non interrupt mode just shares the core 50%/50%, |
// in a way that the single-thread performance is reduced. |
|
//`define __THREADING__ |
|
81,10 → 99,10
// |
// The performance measurement can be done in the simulation level by |
// eabling the __PERFMETER__ define, in order to check how the clock cycles |
// are used in the core. The value defines how many clocks are computed |
// before print the result. |
// are used in the core. The report is displayed when the FINISH_REQ signal |
// is actived by the UART. |
|
//`define __PERFMETER__ 70000 |
`define __PERFMETER__ |
|
// mac instruction: |
// |
106,18 → 124,6
|
`define __RV32E__ |
|
// initial PC and SP |
// |
// it is possible program the initial PC and SP. Typically, the PC is set |
// to address 0, representing the start of ROM memory and the SP is set to |
// the final of RAM memory. In the linker, the start of ROM memory matches |
// with the .text area, which is defined in the boot.c code and the start of |
// RAM memory matches with the .data and other volatile data, in a way that |
// the stack can be positioned in the top of RAM and does not match with the |
// .data. |
|
`define __HARVARD__ |
|
// full harvard architecture: |
// |
// When defined, enforses that the instruction and data buses are connected |
132,7 → 138,7
// be better allocated, but in this case is not possible protect the .text |
// area as in the case of separate memory banks. |
|
`define __FLEXBUZZ__ |
//`define __HARVARD__ |
|
// flexbuzz interface (experimental): |
// |
145,6 → 151,18
// in order to insert wait-states and perform the required multiplexing to fit |
// the DLEN operand size in the data bus width available. |
|
`define __FLEXBUZZ__ |
|
// initial PC and SP |
// |
// it is possible program the initial PC and SP. Typically, the PC is set |
// to address 0, representing the start of ROM memory and the SP is set to |
// the final of RAM memory. In the linker, the start of ROM memory matches |
// with the .text area, which is defined in the boot.c code and the start of |
// RAM memory matches with the .data and other volatile data, in a way that |
// the stack can be positioned in the top of RAM and does not match with the |
// .data. |
|
`define __RESETPC__ 32'd0 |
`define __RESETSP__ 32'd8192 |
|
/darkriscv.v
62,9 → 62,9
input RES, // reset |
input HLT, // halt |
|
`ifdef __THREADING__ |
input IREQ, // irq req |
`endif |
//`ifdef __THREADING__ |
// input IREQ, // irq req |
//`endif |
|
input [31:0] IDATA, // instruction data bus |
output [31:0] IADDR, // instruction addr bus |
81,7 → 81,10
output WR, // write enable |
output RD, // read enable |
`endif |
|
|
`ifdef SIMULATION |
input FINISH_REQ, |
`endif |
output [3:0] DEBUG // old-school osciloscope based debug! :) |
); |
|
91,7 → 94,7
wire [31:0] ALL1 = -1; |
|
`ifdef __THREADING__ |
reg XMODE = 0; // 0 = user, 1 = exception |
reg XMODE = 0; // thread ptr |
`endif |
|
// pre-decode: IDATA is break apart as described in the RV32I specification |
340,8 → 343,9
wire JREQ = (JAL||JALR||BMUX); |
wire [31:0] JVAL = JALR ? DADDR : PC+SIMM; // SIMM + (JALR ? U1REG : PC); |
|
`ifdef SIMULATION |
`ifdef __PERFMETER__ |
integer clocks=0, user=0, super=0, halt=0, flush=0; |
integer clocks=0, thread0=0, thread1=0, load=0, store=0, flush=0, halt=0; |
|
always@(posedge CLK) |
begin |
349,29 → 353,57
begin |
clocks = clocks+1; |
|
if(HLT) |
begin |
if(SCC) store = store+1; |
else if(LCC) load = load +1; |
else halt = halt +1; |
end |
else |
begin |
if(FLUSH) |
begin |
flush=flush+1; |
end |
else |
begin |
`ifdef __THREADING__ |
|
if(XMODE==0 && !HLT && !FLUSH) user = user +1; |
if(XMODE==1 && !HLT && !FLUSH) super = super+1; |
if(XMODE==0) thread0 = thread0+1; |
if(XMODE==1) thread1 = thread1+1; |
`else |
if(!HLT && !FLUSH) user = user +1; |
thread0 = thread0 +1; |
`endif |
|
if(HLT) halt=halt+1; |
if(FLUSH) flush=flush+1; |
end |
end |
|
if(clocks && clocks%`__PERFMETER__==0) |
if(FINISH_REQ) |
begin |
$display("%d clocks: %0d%% user, %0d%% super, %0d%% ws, %0d%% flush", |
clocks, |
100*user/clocks, |
100*super/clocks, |
100*halt/clocks, |
100*flush/clocks); |
$display("****************************************************************************"); |
$display("DarkRISCV Pipeline Report:"); |
$display("core0 clocks: %0d",clocks); |
|
$display("core0 running: %0d%% (%0d%% thread0, %0d%% thread1)", |
100.0*(thread0+thread1)/clocks, |
100.0*thread0/clocks, |
100.0*thread1/clocks); |
|
$display("core0 halted: %0d%% (%0d%% load, %0d%% store, %0d%% busy)", |
100.0*(load+store)/clocks, |
100.0*load/clocks, |
100.0*store/clocks, |
100.0*halt/clocks); |
|
$display("core0 stalled: %0d%%",100.0*flush/clocks); |
$display("****************************************************************************"); |
$finish(); |
end |
end |
end |
`else |
$finish(); |
`endif |
`endif |
|
always@(posedge CLK) |
begin |
436,8 → 468,8
NXPC2[XMODE]+4; // normal flow |
|
XMODE <= XRES ? 0 : HLT ? XMODE : // reset and halt |
XMODE==0&& IREQ&&(JAL||JALR||BMUX) ? 1 : // wait pipeflush to switch to irq |
XMODE==1&&!IREQ&&(JAL||JALR||BMUX) ? 0 : XMODE; // wait pipeflush to return from irq |
XMODE==0/*&& IREQ*/&&(JAL||JALR||BMUX) ? 1 : // wait pipeflush to switch to irq |
XMODE==1/*&&!IREQ*/&&(JAL||JALR||BMUX) ? 0 : XMODE; // wait pipeflush to return from irq |
|
`else |
NXPC <= /*XRES ? `__RESETPC__ :*/ HLT ? NXPC : NXPC2; |
/darksocv.v
510,11 → 510,19
reg [1:0] DACK = 0; |
|
wire WHIT = 1; |
wire DHIT = !((RD||WR) && DACK!=1); // the WR operatio does not need ws. in this config. |
wire DHIT = !((RD |
`ifdef __RMW_CYCLE__ |
||WR // worst code ever! but it is 3:12am... |
`endif |
) && DACK!=1); // the WR operatio does not need ws. in this config. |
|
always@(posedge CLK) // stage #1.0 |
begin |
DACK <= RES ? 0 : DACK ? DACK-1 : (RD||WR) ? 1 : 0; // wait-states |
DACK <= RES ? 0 : DACK ? DACK-1 : (RD |
`ifdef __RMW_CYCLE__ |
||WR // 2nd worst code ever! |
`endif |
) ? 1 : 0; // wait-states |
end |
|
`else |
544,7 → 552,7
always@(posedge CLK) |
begin |
|
`ifdef __3STAGE__ |
`ifdef __RMW_CYCLE__ |
|
// read-modify-write operation w/ 1 wait-state: |
|
566,15 → 574,15
`else |
// write-only operation w/ 0 wait-states: |
`ifdef __HARVARD__ |
if(WR&&DADDR[31]==0&&/*DADDR[12]==1&&*/BE[3]) RAM[DADDR[11:2]][3 * 8 + 7: 3 * 8] <= DATAO[3 * 8 + 7: 3 * 8]; |
if(WR&&DADDR[31]==0&&/*DADDR[12]==1&&*/BE[2]) RAM[DADDR[11:2]][2 * 8 + 7: 2 * 8] <= DATAO[2 * 8 + 7: 2 * 8]; |
if(WR&&DADDR[31]==0&&/*DADDR[12]==1&&*/BE[1]) RAM[DADDR[11:2]][1 * 8 + 7: 1 * 8] <= DATAO[1 * 8 + 7: 1 * 8]; |
if(WR&&DADDR[31]==0&&/*DADDR[12]==1&&*/BE[0]) RAM[DADDR[11:2]][0 * 8 + 7: 0 * 8] <= DATAO[0 * 8 + 7: 0 * 8]; |
if(!HLT&&WR&&DADDR[31]==0&&/*DADDR[12]==1&&*/BE[3]) RAM[DADDR[11:2]][3 * 8 + 7: 3 * 8] <= DATAO[3 * 8 + 7: 3 * 8]; |
if(!HLT&&WR&&DADDR[31]==0&&/*DADDR[12]==1&&*/BE[2]) RAM[DADDR[11:2]][2 * 8 + 7: 2 * 8] <= DATAO[2 * 8 + 7: 2 * 8]; |
if(!HLT&&WR&&DADDR[31]==0&&/*DADDR[12]==1&&*/BE[1]) RAM[DADDR[11:2]][1 * 8 + 7: 1 * 8] <= DATAO[1 * 8 + 7: 1 * 8]; |
if(!HLT&&WR&&DADDR[31]==0&&/*DADDR[12]==1&&*/BE[0]) RAM[DADDR[11:2]][0 * 8 + 7: 0 * 8] <= DATAO[0 * 8 + 7: 0 * 8]; |
`else |
if(WR&&DADDR[31]==0&&/*DADDR[12]==1&&*/BE[3]) MEM[DADDR[12:2]][3 * 8 + 7: 3 * 8] <= DATAO[3 * 8 + 7: 3 * 8]; |
if(WR&&DADDR[31]==0&&/*DADDR[12]==1&&*/BE[2]) MEM[DADDR[12:2]][2 * 8 + 7: 2 * 8] <= DATAO[2 * 8 + 7: 2 * 8]; |
if(WR&&DADDR[31]==0&&/*DADDR[12]==1&&*/BE[1]) MEM[DADDR[12:2]][1 * 8 + 7: 1 * 8] <= DATAO[1 * 8 + 7: 1 * 8]; |
if(WR&&DADDR[31]==0&&/*DADDR[12]==1&&*/BE[0]) MEM[DADDR[12:2]][0 * 8 + 7: 0 * 8] <= DATAO[0 * 8 + 7: 0 * 8]; |
if(!HLT&&WR&&DADDR[31]==0&&/*DADDR[12]==1&&*/BE[3]) MEM[DADDR[12:2]][3 * 8 + 7: 3 * 8] <= DATAO[3 * 8 + 7: 3 * 8]; |
if(!HLT&&WR&&DADDR[31]==0&&/*DADDR[12]==1&&*/BE[2]) MEM[DADDR[12:2]][2 * 8 + 7: 2 * 8] <= DATAO[2 * 8 + 7: 2 * 8]; |
if(!HLT&&WR&&DADDR[31]==0&&/*DADDR[12]==1&&*/BE[1]) MEM[DADDR[12:2]][1 * 8 + 7: 1 * 8] <= DATAO[1 * 8 + 7: 1 * 8]; |
if(!HLT&&WR&&DADDR[31]==0&&/*DADDR[12]==1&&*/BE[0]) MEM[DADDR[12:2]][0 * 8 + 7: 0 * 8] <= DATAO[0 * 8 + 7: 0 * 8]; |
`endif |
`endif |
|
687,6 → 695,9
//.IRQ(BOARD_IRQ[1]), |
.RXD(UART_RXD), |
.TXD(UART_TXD), |
`ifdef SIMULATION |
.FINISH_REQ(FINISH_REQ), |
`endif |
.DEBUG(UDEBUG) |
); |
|
708,9 → 719,9
`endif |
.RES(RES), |
.HLT(HLT), |
`ifdef __THREADING__ |
.IREQ(|(IREQ^IACK)), |
`endif |
//`ifdef __THREADING__ |
// .IREQ(|(IREQ^IACK)), |
//`endif |
.IDATA(IDATA), |
.IADDR(IADDR), |
.DADDR(DADDR), |
728,6 → 739,9
.RD(RD), |
`endif |
|
`ifdef SIMULATION |
.FINISH_REQ(FINISH_REQ), |
`endif |
.DEBUG(KDEBUG) |
); |
|
/darkuart.v
88,6 → 88,10
|
input RXD, // UART recv line |
output TXD, // UART xmit line |
|
`ifdef SIMULATION |
output reg FINISH_REQ = 0, |
`endif |
|
output [3:0] DEBUG // osc debug |
); |
140,8 → 144,8
|
if(DATAI[15:8]==">") // prompt '>' |
begin |
$display(" no UART input, finishing simulation..."); |
$finish(); |
$display(" no UART input, end simulation request..."); |
FINISH_REQ <= 1; |
end |
`else |
UART_XREQ <= !UART_XACK; // activate UART! |