URL
https://opencores.org/ocsvn/altor32/altor32/trunk
Subversion Repositories altor32
Compare Revisions
- This comparison shows the changes necessary to convert path
/altor32/trunk/rtl/cpu
- from Rev 39 to Rev 40
- ↔ Reverse comparison
Rev 39 → Rev 40
/altor32_writeback.v
64,17 → 64,16
input mem_ready_i /*verilator public*/, |
|
// Multiplier result |
input mult_i /*verilator public*/, |
input [31:0] mult_result_i /*verilator public*/, |
input [63:0] mult_result_i /*verilator public*/, |
|
// Outputs |
output write_enable_o /*verilator public*/, |
output [4:0] write_addr_o /*verilator public*/, |
output [31:0] write_data_o /*verilator public*/ |
output reg write_enable_o /*verilator public*/, |
output reg [4:0] write_addr_o /*verilator public*/, |
output reg [31:0] write_data_o /*verilator public*/ |
); |
|
//----------------------------------------------------------------- |
// Registers |
// Registers / Wires |
//----------------------------------------------------------------- |
|
// Register address |
88,8 → 87,10
// Register writeback enable |
reg write_rd_q; |
|
reg [1:0] mem_offset_q; |
|
//------------------------------------------------------------------- |
// Writeback |
// Pipeline Registers |
//------------------------------------------------------------------- |
always @ (posedge clk_i or posedge rst_i) |
begin |
99,6 → 100,7
result_q <= 32'h00000000; |
rd_q <= 5'b00000; |
opcode_q <= 8'b0; |
mem_offset_q <= 2'b0; |
end |
else |
begin |
105,8 → 107,9
rd_q <= rd_i; |
result_q <= alu_result_i; |
|
opcode_q <= {2'b00,opcode_i[31:26]}; |
|
opcode_q <= {2'b00,opcode_i[31:26]}; |
mem_offset_q<= mem_offset_i; |
|
// Register writeback required? |
if (rd_i != 5'b00000) |
write_rd_q <= 1'b1; |
129,7 → 132,7
|
// Memory load result |
.mem_result_i(mem_result_i), |
.mem_offset_i(mem_offset_i), |
.mem_offset_i(mem_offset_q), |
|
// Result |
.load_result_o(load_result_w), |
137,10 → 140,24
); |
|
//------------------------------------------------------------------- |
// Assignments |
// Writeback |
//------------------------------------------------------------------- |
assign write_enable_o = load_inst_w ? (write_rd_q & mem_ready_i) : write_rd_q; |
assign write_data_o = load_inst_w ? load_result_w : (mult_i ? mult_result_i : result_q); |
assign write_addr_o = rd_q; |
always @ * |
begin |
write_addr_o = rd_q; |
|
// Load result |
if (load_inst_w) |
begin |
write_enable_o = write_rd_q & mem_ready_i; |
write_data_o = load_result_w; |
end |
// Normal ALU instruction |
else |
begin |
write_enable_o = write_rd_q; |
write_data_o = result_q; |
end |
end |
|
endmodule |
/altor32_icache.v
70,28 → 70,37
//----------------------------------------------------------------- |
parameter BOOT_VECTOR = 32'h00000000; |
|
// Option: Number of ways (supports 1 or 2) |
parameter CACHE_NUM_WAYS = 1; |
|
// Option: Number of cache lines (2^param) * line_size_bytes = cache size |
parameter CACHE_LINE_ADDR_WIDTH = 8 - (CACHE_NUM_WAYS-1); /* 256 lines total across all ways */ |
|
parameter CACHE_LINE_SIZE_WIDTH = 5; /* 5-bits -> 32 entries */ |
parameter CACHE_LINE_SIZE_BYTES = 2 ** CACHE_LINE_SIZE_WIDTH; /* 32 bytes / 4 words per line */ |
parameter CACHE_LINE_ADDR_WIDTH = 8; /* 256 lines */ |
parameter CACHE_LINE_WORDS_IDX_MAX = CACHE_LINE_SIZE_WIDTH - 2; /* 3-bit = 111 */ |
parameter CACHE_TAG_ENTRIES = 2 ** CACHE_LINE_ADDR_WIDTH ; /* 256 tag entries */ |
parameter CACHE_DSIZE = CACHE_LINE_ADDR_WIDTH * CACHE_LINE_SIZE_BYTES; /* 8KB data */ |
parameter CACHE_LINE_SIZE_BYTES = 2 ** CACHE_LINE_SIZE_WIDTH; /* 32 bytes / 8 words per line */ |
|
parameter CACHE_TAG_ENTRIES = 2 ** CACHE_LINE_ADDR_WIDTH ; /* 128 tag entries */ |
parameter CACHE_DSIZE = CACHE_NUM_WAYS * (2 ** CACHE_LINE_ADDR_WIDTH) * CACHE_LINE_SIZE_BYTES; /* 8KB data */ |
parameter CACHE_DWIDTH = CACHE_LINE_ADDR_WIDTH + CACHE_LINE_SIZE_WIDTH - 2; /* 10-bits */ |
|
parameter CACHE_TAG_WIDTH = 16; /* 16-bit tag entry size */ |
parameter CACHE_TAG_LINE_ADDR_WIDTH = CACHE_TAG_WIDTH - 1; /* 15 bits of data (tag entry size minus valid bit) */ |
parameter CACHE_TAG_STAT_BITS = 1 + (CACHE_NUM_WAYS-1); |
|
parameter CACHE_TAG_LINE_ADDR_WIDTH = CACHE_TAG_WIDTH - CACHE_TAG_STAT_BITS; /* 15 bits of data (tag entry size minus valid / LRU bit) */ |
|
parameter CACHE_TAG_ADDR_LOW = CACHE_LINE_SIZE_WIDTH + CACHE_LINE_ADDR_WIDTH; |
parameter CACHE_TAG_ADDR_HIGH = CACHE_TAG_LINE_ADDR_WIDTH + CACHE_LINE_SIZE_WIDTH + CACHE_LINE_ADDR_WIDTH - 1; |
|
// Tag fields |
parameter CACHE_TAG_VALID_BIT = 15; |
parameter CACHE_TAG_LRU_BIT = 14; // If CACHE_NUM_WAYS > 1 |
parameter CACHE_TAG_ADDR_BITS = CACHE_TAG_WIDTH - CACHE_TAG_STAT_BITS; |
|
// 31 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 |
// |--------------| | | | | | | | | | | | | | | | | |
// +--------------------+ +-------------------+ +-----------+ |
// Tag entry Line address Address |
// (15-bits) (8-bits) within line |
// +-----------------+ +-------------------+ +-----------+ |
// Tag entry Line address Address |
// (14/15-bits) (7/8-bits) within line |
// (5-bits) |
|
//----------------------------------------------------------------- |
99,9 → 108,11
//----------------------------------------------------------------- |
|
// Tag read / write data |
wire [CACHE_TAG_WIDTH-1:0] tag_out_w; |
reg [CACHE_TAG_WIDTH-1:0] tag_in_r; |
reg tag_wr_r; |
wire [CACHE_TAG_WIDTH-1:0] tag_out0_w; |
reg [CACHE_TAG_WIDTH-1:0] tag_in0_r; |
wire [CACHE_TAG_WIDTH-1:0] tag_out1_w; |
reg [CACHE_TAG_WIDTH-1:0] tag_in1_r; |
|
// Tag address |
wire [CACHE_LINE_ADDR_WIDTH-1:0] tag_address_w; |
109,8 → 120,12
// Data memory read / write |
wire [CACHE_DWIDTH-1:0] address_rd_w; |
wire [CACHE_DWIDTH-1:0] address_wr_w; |
wire cache_wr_w; |
|
wire cache_wr0_w; |
wire cache_wr1_w; |
|
reg way_update_q; |
|
// Current / Miss PC |
reg [31:0] last_pc_q; |
reg [31:0] miss_pc_q; |
139,20 → 154,23
assign address_rd_w = pc_i[CACHE_LINE_ADDR_WIDTH + CACHE_LINE_SIZE_WIDTH - 1:2]; |
|
// Cache miss output if requested PC is not in the tag memory |
wire miss_w = ~tag_out_w[CACHE_TAG_VALID_BIT] | |
(last_pc_q[CACHE_TAG_ADDR_HIGH:CACHE_TAG_ADDR_LOW] != tag_out_w[14:0]); |
wire miss0_w = ~tag_out0_w[CACHE_TAG_VALID_BIT] | |
(last_pc_q[CACHE_TAG_ADDR_HIGH:CACHE_TAG_ADDR_LOW] != tag_out0_w[CACHE_TAG_ADDR_BITS-1:0]); |
|
wire miss1_w = ~tag_out1_w[CACHE_TAG_VALID_BIT] | |
(last_pc_q[CACHE_TAG_ADDR_HIGH:CACHE_TAG_ADDR_LOW] != tag_out1_w[CACHE_TAG_ADDR_BITS-1:0]); |
|
// Stall the CPU if cache state machine is not idle! |
wire busy_w = (state_q != STATE_CHECK) | read_while_busy_q; |
|
// Cache output valid |
assign valid_o = busy_w ? 1'b0 : ~miss_w; |
assign valid_o = busy_w ? 1'b0 : ~(miss0_w & miss1_w); |
|
// Flushing: Last line to flush |
wire flush_last_w = (flush_addr_q == {CACHE_LINE_ADDR_WIDTH{1'b0}}); |
|
// Is this a cache miss? |
wire cache_miss_w = miss_w & // Tag lookup failed |
wire cache_miss_w = miss0_w & miss1_w & // Tag lookup failed |
!rd_i & // NOT new read request cycle |
!read_while_busy_q & // NOT pending read whilst busy |
!flush_q & // NOT flush request |
228,18 → 246,50
end |
|
//----------------------------------------------------------------- |
// Select way to be replaced |
//----------------------------------------------------------------- |
reg lru_way_r; |
|
// 2-Way |
generate |
if (CACHE_NUM_WAYS >= 2) |
begin: LRU_SELECT |
always @ * |
begin |
if (tag_out0_w[CACHE_TAG_LRU_BIT]) |
lru_way_r = 1'b0; |
else |
lru_way_r = 1'b1; |
end |
end |
// 1-Way |
else |
begin: LRU_FIXED |
wire lru_way_w = 1'b0; |
always @ * |
lru_way_r = lru_way_w; |
end |
endgenerate |
|
//----------------------------------------------------------------- |
// Flop request details |
//----------------------------------------------------------------- |
reg [CACHE_LINE_ADDR_WIDTH-1:0] tag_address_q; |
|
always @ (posedge rst_i or posedge clk_i ) |
begin |
if (rst_i == 1'b1) |
begin |
miss_pc_q <= BOOT_VECTOR + `VECTOR_RESET; |
last_pc_q <= 32'h00000000; |
miss_pc_q <= BOOT_VECTOR + `VECTOR_RESET; |
last_pc_q <= 32'h00000000; |
|
tag_address_q <= {CACHE_LINE_ADDR_WIDTH{1'b0}}; |
way_update_q <= 1'b0; |
end |
else |
begin |
last_pc_q <= pc_i; |
last_pc_q <= pc_i; |
tag_address_q <= tag_address_w; |
|
case (state_q) |
|
250,8 → 300,16
begin |
// Cache hit (or new read request), store fetch PC |
if (!cache_miss_w) |
begin |
miss_pc_q <= pc_i; |
end |
end |
// Cache miss |
else |
begin |
// Select line way to replace |
way_update_q <= lru_way_r; |
end |
end |
default: |
; |
endcase |
295,8 → 353,9
//----------------------------------------------------------------- |
always @ * |
begin |
tag_in_r = {CACHE_TAG_WIDTH{1'b0}}; |
tag_wr_r = 1'b0; |
tag_in0_r = tag_out0_w; |
tag_in1_r = tag_out1_w; |
tag_wr_r = 1'b0; |
|
case (state_q) |
|
309,9 → 368,51
if (cache_miss_w) |
begin |
// Update tag memory with this line's details |
tag_in_r = {1'b1, miss_pc_q[CACHE_TAG_ADDR_HIGH:CACHE_TAG_ADDR_LOW]}; |
if (lru_way_r) |
begin |
tag_in1_r[CACHE_TAG_ADDR_BITS-1:0] = miss_pc_q[CACHE_TAG_ADDR_HIGH:CACHE_TAG_ADDR_LOW]; |
tag_in1_r[CACHE_TAG_VALID_BIT] = 1'b1; |
|
if (CACHE_NUM_WAYS >= 2) |
begin |
tag_in1_r[CACHE_TAG_LRU_BIT] = 1'b0; |
tag_in0_r[CACHE_TAG_LRU_BIT] = 1'b1; |
end |
end |
else |
begin |
tag_in0_r[CACHE_TAG_ADDR_BITS-1:0] = miss_pc_q[CACHE_TAG_ADDR_HIGH:CACHE_TAG_ADDR_LOW]; |
tag_in0_r[CACHE_TAG_VALID_BIT] = 1'b1; |
|
if (CACHE_NUM_WAYS >= 2) |
begin |
tag_in0_r[CACHE_TAG_LRU_BIT] = 1'b0; |
tag_in1_r[CACHE_TAG_LRU_BIT] = 1'b1; |
end |
end |
|
tag_wr_r = 1'b1; |
end |
// Update LRU (if possible) |
else if ((tag_address_q == tag_address_w) && (CACHE_NUM_WAYS >= 2)) |
begin |
// Hit Way 0 |
if (!miss0_w) |
begin |
// Least recently used way is 1 |
tag_in1_r[CACHE_TAG_LRU_BIT] = 1'b1; |
tag_in0_r[CACHE_TAG_LRU_BIT] = 1'b0; |
end |
// Hit Way 1 |
else |
begin |
// Least recently used way is 0 |
tag_in0_r[CACHE_TAG_LRU_BIT] = 1'b1; |
tag_in1_r[CACHE_TAG_LRU_BIT] = 1'b0; |
end |
|
tag_wr_r = 1'b1; |
end |
end |
default: |
; |
415,17 → 516,20
//----------------------------------------------------------------- |
// Tag memory |
//----------------------------------------------------------------- |
altor32_ram_dp |
wire [(CACHE_NUM_WAYS*CACHE_TAG_WIDTH)-1:0] tag_in; |
wire [(CACHE_NUM_WAYS*CACHE_TAG_WIDTH)-1:0] tag_out; |
|
altor32_ram_dp |
#( |
.WIDTH(CACHE_TAG_WIDTH), |
.WIDTH(CACHE_TAG_WIDTH * CACHE_NUM_WAYS), |
.SIZE(CACHE_LINE_ADDR_WIDTH) |
) |
u1_tag_mem |
u_tag_mem |
( |
// Tag read/write port |
.aclk_i(clk_i), |
.adat_o(tag_out_w), |
.adat_i(tag_in_r), |
.adat_o(tag_out), |
.adat_i(tag_in), |
.aadr_i(tag_address_w), |
.awr_i(tag_wr_r), |
|
433,24 → 537,67
.bclk_i(clk_i), |
.badr_i(flush_addr_q), |
.bdat_o(/*open*/), |
.bdat_i({CACHE_TAG_WIDTH{1'b0}}), |
.bwr_i(flush_wr_q) |
.bdat_i({(CACHE_NUM_WAYS*CACHE_TAG_WIDTH){1'b0}}), |
.bwr_i(flush_wr_q) |
); |
|
// 2-Way |
generate |
if (CACHE_NUM_WAYS >= 2) |
begin: TAG_2WAY |
assign tag_in = {tag_in1_r, tag_in0_r}; |
assign {tag_out1_w, tag_out0_w} = tag_out; |
end |
// 1-Way |
else |
begin: TAG_1WAY |
assign tag_in = tag_in0_r; |
assign tag_out0_w = tag_out; |
assign tag_out1_w = {(CACHE_TAG_WIDTH){1'b0}}; |
end |
endgenerate |
|
//----------------------------------------------------------------- |
// Data memory |
//----------------------------------------------------------------- |
altor32_ram_dp |
wire [31:0] way0_instruction_w /*verilator public*/; |
wire [31:0] way1_instruction_w /*verilator public*/; |
|
// Way 0 Instruction Memory |
altor32_ram_dp |
#( |
.WIDTH(32), |
.SIZE(CACHE_DWIDTH) |
) |
u2_data_mem |
u2_data_way0 |
( |
// Data read port |
.aclk_i(clk_i), |
.aadr_i(address_rd_w), |
.adat_o(way0_instruction_w), |
.adat_i(32'h00), |
.awr_i(1'b0), |
|
// Data write port |
.bclk_i(clk_i), |
.badr_i(address_wr_w), |
.bdat_o(/*open*/), |
.bdat_i(mem_data_w), |
.bwr_i(cache_wr0_w) |
); |
|
// Way 1 Instruction Memory |
altor32_ram_dp |
#( |
.WIDTH(32), |
.SIZE(CACHE_DWIDTH) |
) |
u2_data_way1 |
( |
// Data read port |
.aclk_i(clk_i), |
.aadr_i(address_rd_w), |
.adat_o(instruction_o), |
.adat_o(way1_instruction_w), |
.adat_i(32'h00), |
.awr_i(1'b0), |
|
459,13 → 606,16
.badr_i(address_wr_w), |
.bdat_o(/*open*/), |
.bdat_i(mem_data_w), |
.bwr_i(cache_wr_w) |
.bwr_i(cache_wr1_w) |
); |
|
// Select between ways for result |
assign instruction_o = (miss0_w == 1'b0) ? way0_instruction_w : way1_instruction_w; |
|
// Write to cache on wishbone response |
assign address_wr_w = {miss_pc_q[CACHE_LINE_ADDR_WIDTH + CACHE_LINE_SIZE_WIDTH - 1:CACHE_LINE_SIZE_WIDTH], mem_resp_addr_w[CACHE_LINE_SIZE_WIDTH-1:2]}; |
|
assign cache_wr_w = (state_q == STATE_FETCH) & mem_valid_w; |
assign cache_wr0_w = (state_q == STATE_FETCH) & mem_valid_w & ~way_update_q; |
assign cache_wr1_w = (state_q == STATE_FETCH) & mem_valid_w & way_update_q; |
|
endmodule |
|
/altor32_dfu.v
64,14 → 64,8
input [4:0] rd_load_i /*verilator public*/, |
|
// Multiplier status |
input mult_lo_ex_i /*verilator public*/, |
input mult_hi_ex_i /*verilator public*/, |
input mult_lo_wb_i /*verilator public*/, |
input mult_hi_wb_i /*verilator public*/, |
input mult_ex_i /*verilator public*/, |
|
// Multiplier result |
input [63:0] result_mult_i /*verilator public*/, |
|
// Result (EXEC) |
input [31:0] result_ex_i /*verilator public*/, |
|
124,7 → 118,7
else if (ra_i == rd_ex_i) |
begin |
// Multiplier has one cycle latency, stall if needed now |
if (mult_lo_ex_i | mult_hi_wb_i) |
if (mult_ex_i) |
stall_o = 1'b1; |
else |
begin |
140,12 → 134,7
//--------------------------------------------------------------- |
else if (ra_i == rd_wb_i) |
begin |
if (mult_hi_wb_i) |
result_ra_o = result_mult_i[63:32]; |
else if (mult_lo_wb_i) |
result_ra_o = result_mult_i[31:0]; |
else |
result_ra_o = result_wb_i; |
result_ra_o = result_wb_i; |
|
resolved_o = 1'b1; |
`ifdef CONF_CORE_DEBUG |
179,7 → 168,7
else if (rb_i == rd_ex_i) |
begin |
// Multiplier has one cycle latency, stall if needed now |
if (mult_lo_ex_i | mult_hi_wb_i) |
if (mult_ex_i) |
stall_o = 1'b1; |
else |
begin |
195,13 → 184,8
// RB from PC-8 (writeback) |
//--------------------------------------------------------------- |
else if (rb_i == rd_wb_i) |
begin |
if (mult_hi_wb_i) |
result_rb_o = result_mult_i[63:32]; |
else if (mult_lo_wb_i) |
result_rb_o = result_mult_i[31:0]; |
else |
result_rb_o = result_wb_i; |
begin |
result_rb_o = result_wb_i; |
|
resolved_o = 1'b1; |
|
/altor32.v
125,8 → 125,7
|
// Result from execute |
wire [31:0] ex_result_w; |
wire ex_mult_w; |
wire [31:0] ex_mult_res_w; |
wire [63:0] ex_mult_res_w; |
|
// Branch request |
wire ex_branch_w; |
432,7 → 431,6
.opcode_pc_o(/* not used */), |
.reg_rd_o(ex_rd_w), |
.reg_rd_value_o(ex_result_w), |
.mult_o(ex_mult_w), |
.mult_res_o(ex_mult_res_w), |
|
// Register write back bypass |
476,7 → 474,6
.mem_ready_i(dcache_ack_w), |
|
// Multiplier result |
.mult_i(ex_mult_w), |
.mult_result_i(ex_mult_res_w), |
|
// Outputs |
/altor32_exec.v
89,8 → 89,7
output [31:0] opcode_pc_o /*verilator public*/, |
output [4:0] reg_rd_o /*verilator public*/, |
output [31:0] reg_rd_value_o /*verilator public*/, |
output mult_o /*verilator public*/, |
output [31:0] mult_res_o /*verilator public*/, |
output [63:0] mult_res_o /*verilator public*/, |
|
// Register write back bypass |
input [4:0] wb_rd_i /*verilator public*/, |
313,14 → 312,8
.rd_load_i(load_rd_q), |
|
// Multiplier status |
.mult_lo_ex_i(1'b0), |
.mult_hi_ex_i(1'b0), |
.mult_lo_wb_i(1'b0), |
.mult_hi_wb_i(1'b0), |
.mult_ex_i(1'b0), |
|
// Multiplier result |
.result_mult_i(64'b0), |
|
// Result (EXEC) |
.result_ex_i(ex_result_w), |
|
663,12 → 656,6
write_rd_r = 1'b1; |
end |
|
inst_mul_w, // l.mul |
inst_mulu_w: // l.mulu |
begin |
write_rd_r = 1'b1; |
end |
|
inst_addi_w: // l.addi |
begin |
alu_func_r = `ALU_ADD; |
1621,8 → 1608,7
assign reg_rd_o = ex_rd_q; |
assign reg_rd_value_o = ex_result_w; |
|
assign mult_o = 1'b0; |
assign mult_res_o = 32'b0; |
assign mult_res_o = 64'b0; |
|
//------------------------------------------------------------------- |
// Hooks for debug |