OpenCores
URL https://opencores.org/ocsvn/altor32/altor32/trunk

Subversion Repositories altor32

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /altor32/trunk
    from Rev 39 to Rev 40
    Reverse comparison

Rev 39 → Rev 40

/rtl/cpu/altor32_writeback.v
64,17 → 64,16
input mem_ready_i /*verilator public*/,
 
// Multiplier result
input mult_i /*verilator public*/,
input [31:0] mult_result_i /*verilator public*/,
input [63:0] mult_result_i /*verilator public*/,
 
// Outputs
output write_enable_o /*verilator public*/,
output [4:0] write_addr_o /*verilator public*/,
output [31:0] write_data_o /*verilator public*/
output reg write_enable_o /*verilator public*/,
output reg [4:0] write_addr_o /*verilator public*/,
output reg [31:0] write_data_o /*verilator public*/
);
 
//-----------------------------------------------------------------
// Registers
// Registers / Wires
//-----------------------------------------------------------------
 
// Register address
88,8 → 87,10
// Register writeback enable
reg write_rd_q;
 
reg [1:0] mem_offset_q;
 
//-------------------------------------------------------------------
// Writeback
// Pipeline Registers
//-------------------------------------------------------------------
always @ (posedge clk_i or posedge rst_i)
begin
99,6 → 100,7
result_q <= 32'h00000000;
rd_q <= 5'b00000;
opcode_q <= 8'b0;
mem_offset_q <= 2'b0;
end
else
begin
105,8 → 107,9
rd_q <= rd_i;
result_q <= alu_result_i;
 
opcode_q <= {2'b00,opcode_i[31:26]};
opcode_q <= {2'b00,opcode_i[31:26]};
mem_offset_q<= mem_offset_i;
 
// Register writeback required?
if (rd_i != 5'b00000)
write_rd_q <= 1'b1;
129,7 → 132,7
 
// Memory load result
.mem_result_i(mem_result_i),
.mem_offset_i(mem_offset_i),
.mem_offset_i(mem_offset_q),
 
// Result
.load_result_o(load_result_w),
137,10 → 140,24
);
 
//-------------------------------------------------------------------
// Assignments
// Writeback
//-------------------------------------------------------------------
assign write_enable_o = load_inst_w ? (write_rd_q & mem_ready_i) : write_rd_q;
assign write_data_o = load_inst_w ? load_result_w : (mult_i ? mult_result_i : result_q);
assign write_addr_o = rd_q;
always @ *
begin
write_addr_o = rd_q;
 
// Load result
if (load_inst_w)
begin
write_enable_o = write_rd_q & mem_ready_i;
write_data_o = load_result_w;
end
// Normal ALU instruction
else
begin
write_enable_o = write_rd_q;
write_data_o = result_q;
end
end
 
endmodule
/rtl/cpu/altor32_icache.v
70,28 → 70,37
//-----------------------------------------------------------------
parameter BOOT_VECTOR = 32'h00000000;
 
// Option: Number of ways (supports 1 or 2)
parameter CACHE_NUM_WAYS = 1;
 
// Option: Number of cache lines (2^param) * line_size_bytes = cache size
parameter CACHE_LINE_ADDR_WIDTH = 8 - (CACHE_NUM_WAYS-1); /* 256 lines total across all ways */
 
parameter CACHE_LINE_SIZE_WIDTH = 5; /* 5-bits -> 32 entries */
parameter CACHE_LINE_SIZE_BYTES = 2 ** CACHE_LINE_SIZE_WIDTH; /* 32 bytes / 4 words per line */
parameter CACHE_LINE_ADDR_WIDTH = 8; /* 256 lines */
parameter CACHE_LINE_WORDS_IDX_MAX = CACHE_LINE_SIZE_WIDTH - 2; /* 3-bit = 111 */
parameter CACHE_TAG_ENTRIES = 2 ** CACHE_LINE_ADDR_WIDTH ; /* 256 tag entries */
parameter CACHE_DSIZE = CACHE_LINE_ADDR_WIDTH * CACHE_LINE_SIZE_BYTES; /* 8KB data */
parameter CACHE_LINE_SIZE_BYTES = 2 ** CACHE_LINE_SIZE_WIDTH; /* 32 bytes / 8 words per line */
 
parameter CACHE_TAG_ENTRIES = 2 ** CACHE_LINE_ADDR_WIDTH ; /* 128 tag entries */
parameter CACHE_DSIZE = CACHE_NUM_WAYS * (2 ** CACHE_LINE_ADDR_WIDTH) * CACHE_LINE_SIZE_BYTES; /* 8KB data */
parameter CACHE_DWIDTH = CACHE_LINE_ADDR_WIDTH + CACHE_LINE_SIZE_WIDTH - 2; /* 10-bits */
 
parameter CACHE_TAG_WIDTH = 16; /* 16-bit tag entry size */
parameter CACHE_TAG_LINE_ADDR_WIDTH = CACHE_TAG_WIDTH - 1; /* 15 bits of data (tag entry size minus valid bit) */
parameter CACHE_TAG_STAT_BITS = 1 + (CACHE_NUM_WAYS-1);
 
parameter CACHE_TAG_LINE_ADDR_WIDTH = CACHE_TAG_WIDTH - CACHE_TAG_STAT_BITS; /* 15 bits of data (tag entry size minus valid / LRU bit) */
 
parameter CACHE_TAG_ADDR_LOW = CACHE_LINE_SIZE_WIDTH + CACHE_LINE_ADDR_WIDTH;
parameter CACHE_TAG_ADDR_HIGH = CACHE_TAG_LINE_ADDR_WIDTH + CACHE_LINE_SIZE_WIDTH + CACHE_LINE_ADDR_WIDTH - 1;
 
// Tag fields
parameter CACHE_TAG_VALID_BIT = 15;
parameter CACHE_TAG_LRU_BIT = 14; // If CACHE_NUM_WAYS > 1
parameter CACHE_TAG_ADDR_BITS = CACHE_TAG_WIDTH - CACHE_TAG_STAT_BITS;
 
// 31 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00
// |--------------| | | | | | | | | | | | | | | | |
// +--------------------+ +-------------------+ +-----------+
// Tag entry Line address Address
// (15-bits) (8-bits) within line
// +-----------------+ +-------------------+ +-----------+
// Tag entry Line address Address
// (14/15-bits) (7/8-bits) within line
// (5-bits)
 
//-----------------------------------------------------------------
99,9 → 108,11
//-----------------------------------------------------------------
 
// Tag read / write data
wire [CACHE_TAG_WIDTH-1:0] tag_out_w;
reg [CACHE_TAG_WIDTH-1:0] tag_in_r;
reg tag_wr_r;
wire [CACHE_TAG_WIDTH-1:0] tag_out0_w;
reg [CACHE_TAG_WIDTH-1:0] tag_in0_r;
wire [CACHE_TAG_WIDTH-1:0] tag_out1_w;
reg [CACHE_TAG_WIDTH-1:0] tag_in1_r;
 
// Tag address
wire [CACHE_LINE_ADDR_WIDTH-1:0] tag_address_w;
109,8 → 120,12
// Data memory read / write
wire [CACHE_DWIDTH-1:0] address_rd_w;
wire [CACHE_DWIDTH-1:0] address_wr_w;
wire cache_wr_w;
 
wire cache_wr0_w;
wire cache_wr1_w;
 
reg way_update_q;
 
// Current / Miss PC
reg [31:0] last_pc_q;
reg [31:0] miss_pc_q;
139,20 → 154,23
assign address_rd_w = pc_i[CACHE_LINE_ADDR_WIDTH + CACHE_LINE_SIZE_WIDTH - 1:2];
 
// Cache miss output if requested PC is not in the tag memory
wire miss_w = ~tag_out_w[CACHE_TAG_VALID_BIT] |
(last_pc_q[CACHE_TAG_ADDR_HIGH:CACHE_TAG_ADDR_LOW] != tag_out_w[14:0]);
wire miss0_w = ~tag_out0_w[CACHE_TAG_VALID_BIT] |
(last_pc_q[CACHE_TAG_ADDR_HIGH:CACHE_TAG_ADDR_LOW] != tag_out0_w[CACHE_TAG_ADDR_BITS-1:0]);
 
wire miss1_w = ~tag_out1_w[CACHE_TAG_VALID_BIT] |
(last_pc_q[CACHE_TAG_ADDR_HIGH:CACHE_TAG_ADDR_LOW] != tag_out1_w[CACHE_TAG_ADDR_BITS-1:0]);
 
// Stall the CPU if cache state machine is not idle!
wire busy_w = (state_q != STATE_CHECK) | read_while_busy_q;
 
// Cache output valid
assign valid_o = busy_w ? 1'b0 : ~miss_w;
assign valid_o = busy_w ? 1'b0 : ~(miss0_w & miss1_w);
 
// Flushing: Last line to flush
wire flush_last_w = (flush_addr_q == {CACHE_LINE_ADDR_WIDTH{1'b0}});
 
// Is this a cache miss?
wire cache_miss_w = miss_w & // Tag lookup failed
wire cache_miss_w = miss0_w & miss1_w & // Tag lookup failed
!rd_i & // NOT new read request cycle
!read_while_busy_q & // NOT pending read whilst busy
!flush_q & // NOT flush request
228,18 → 246,50
end
 
//-----------------------------------------------------------------
// Select way to be replaced
//-----------------------------------------------------------------
reg lru_way_r;
 
// 2-Way
generate
if (CACHE_NUM_WAYS >= 2)
begin: LRU_SELECT
always @ *
begin
if (tag_out0_w[CACHE_TAG_LRU_BIT])
lru_way_r = 1'b0;
else
lru_way_r = 1'b1;
end
end
// 1-Way
else
begin: LRU_FIXED
wire lru_way_w = 1'b0;
always @ *
lru_way_r = lru_way_w;
end
endgenerate
 
//-----------------------------------------------------------------
// Flop request details
//-----------------------------------------------------------------
reg [CACHE_LINE_ADDR_WIDTH-1:0] tag_address_q;
 
always @ (posedge rst_i or posedge clk_i )
begin
if (rst_i == 1'b1)
begin
miss_pc_q <= BOOT_VECTOR + `VECTOR_RESET;
last_pc_q <= 32'h00000000;
miss_pc_q <= BOOT_VECTOR + `VECTOR_RESET;
last_pc_q <= 32'h00000000;
 
tag_address_q <= {CACHE_LINE_ADDR_WIDTH{1'b0}};
way_update_q <= 1'b0;
end
else
begin
last_pc_q <= pc_i;
last_pc_q <= pc_i;
tag_address_q <= tag_address_w;
case (state_q)
 
250,8 → 300,16
begin
// Cache hit (or new read request), store fetch PC
if (!cache_miss_w)
begin
miss_pc_q <= pc_i;
end
end
// Cache miss
else
begin
// Select line way to replace
way_update_q <= lru_way_r;
end
end
default:
;
endcase
295,8 → 353,9
//-----------------------------------------------------------------
always @ *
begin
tag_in_r = {CACHE_TAG_WIDTH{1'b0}};
tag_wr_r = 1'b0;
tag_in0_r = tag_out0_w;
tag_in1_r = tag_out1_w;
tag_wr_r = 1'b0;
 
case (state_q)
 
309,9 → 368,51
if (cache_miss_w)
begin
// Update tag memory with this line's details
tag_in_r = {1'b1, miss_pc_q[CACHE_TAG_ADDR_HIGH:CACHE_TAG_ADDR_LOW]};
if (lru_way_r)
begin
tag_in1_r[CACHE_TAG_ADDR_BITS-1:0] = miss_pc_q[CACHE_TAG_ADDR_HIGH:CACHE_TAG_ADDR_LOW];
tag_in1_r[CACHE_TAG_VALID_BIT] = 1'b1;
 
if (CACHE_NUM_WAYS >= 2)
begin
tag_in1_r[CACHE_TAG_LRU_BIT] = 1'b0;
tag_in0_r[CACHE_TAG_LRU_BIT] = 1'b1;
end
end
else
begin
tag_in0_r[CACHE_TAG_ADDR_BITS-1:0] = miss_pc_q[CACHE_TAG_ADDR_HIGH:CACHE_TAG_ADDR_LOW];
tag_in0_r[CACHE_TAG_VALID_BIT] = 1'b1;
 
if (CACHE_NUM_WAYS >= 2)
begin
tag_in0_r[CACHE_TAG_LRU_BIT] = 1'b0;
tag_in1_r[CACHE_TAG_LRU_BIT] = 1'b1;
end
end
 
tag_wr_r = 1'b1;
end
// Update LRU (if possible)
else if ((tag_address_q == tag_address_w) && (CACHE_NUM_WAYS >= 2))
begin
// Hit Way 0
if (!miss0_w)
begin
// Least recently used way is 1
tag_in1_r[CACHE_TAG_LRU_BIT] = 1'b1;
tag_in0_r[CACHE_TAG_LRU_BIT] = 1'b0;
end
// Hit Way 1
else
begin
// Least recently used way is 0
tag_in0_r[CACHE_TAG_LRU_BIT] = 1'b1;
tag_in1_r[CACHE_TAG_LRU_BIT] = 1'b0;
end
 
tag_wr_r = 1'b1;
end
end
default:
;
415,17 → 516,20
//-----------------------------------------------------------------
// Tag memory
//-----------------------------------------------------------------
altor32_ram_dp
wire [(CACHE_NUM_WAYS*CACHE_TAG_WIDTH)-1:0] tag_in;
wire [(CACHE_NUM_WAYS*CACHE_TAG_WIDTH)-1:0] tag_out;
 
altor32_ram_dp
#(
.WIDTH(CACHE_TAG_WIDTH),
.WIDTH(CACHE_TAG_WIDTH * CACHE_NUM_WAYS),
.SIZE(CACHE_LINE_ADDR_WIDTH)
)
u1_tag_mem
u_tag_mem
(
// Tag read/write port
.aclk_i(clk_i),
.adat_o(tag_out_w),
.adat_i(tag_in_r),
.adat_o(tag_out),
.adat_i(tag_in),
.aadr_i(tag_address_w),
.awr_i(tag_wr_r),
433,24 → 537,67
.bclk_i(clk_i),
.badr_i(flush_addr_q),
.bdat_o(/*open*/),
.bdat_i({CACHE_TAG_WIDTH{1'b0}}),
.bwr_i(flush_wr_q)
.bdat_i({(CACHE_NUM_WAYS*CACHE_TAG_WIDTH){1'b0}}),
.bwr_i(flush_wr_q)
);
 
// 2-Way
generate
if (CACHE_NUM_WAYS >= 2)
begin: TAG_2WAY
assign tag_in = {tag_in1_r, tag_in0_r};
assign {tag_out1_w, tag_out0_w} = tag_out;
end
// 1-Way
else
begin: TAG_1WAY
assign tag_in = tag_in0_r;
assign tag_out0_w = tag_out;
assign tag_out1_w = {(CACHE_TAG_WIDTH){1'b0}};
end
endgenerate
 
//-----------------------------------------------------------------
// Data memory
//-----------------------------------------------------------------
altor32_ram_dp
wire [31:0] way0_instruction_w /*verilator public*/;
wire [31:0] way1_instruction_w /*verilator public*/;
 
// Way 0 Instruction Memory
altor32_ram_dp
#(
.WIDTH(32),
.SIZE(CACHE_DWIDTH)
)
u2_data_mem
u2_data_way0
(
// Data read port
.aclk_i(clk_i),
.aadr_i(address_rd_w),
.adat_o(way0_instruction_w),
.adat_i(32'h00),
.awr_i(1'b0),
// Data write port
.bclk_i(clk_i),
.badr_i(address_wr_w),
.bdat_o(/*open*/),
.bdat_i(mem_data_w),
.bwr_i(cache_wr0_w)
);
 
// Way 1 Instruction Memory
altor32_ram_dp
#(
.WIDTH(32),
.SIZE(CACHE_DWIDTH)
)
u2_data_way1
(
// Data read port
.aclk_i(clk_i),
.aadr_i(address_rd_w),
.adat_o(instruction_o),
.adat_o(way1_instruction_w),
.adat_i(32'h00),
.awr_i(1'b0),
459,13 → 606,16
.badr_i(address_wr_w),
.bdat_o(/*open*/),
.bdat_i(mem_data_w),
.bwr_i(cache_wr_w)
.bwr_i(cache_wr1_w)
);
 
// Select between ways for result
assign instruction_o = (miss0_w == 1'b0) ? way0_instruction_w : way1_instruction_w;
 
// Write to cache on wishbone response
assign address_wr_w = {miss_pc_q[CACHE_LINE_ADDR_WIDTH + CACHE_LINE_SIZE_WIDTH - 1:CACHE_LINE_SIZE_WIDTH], mem_resp_addr_w[CACHE_LINE_SIZE_WIDTH-1:2]};
 
assign cache_wr_w = (state_q == STATE_FETCH) & mem_valid_w;
assign cache_wr0_w = (state_q == STATE_FETCH) & mem_valid_w & ~way_update_q;
assign cache_wr1_w = (state_q == STATE_FETCH) & mem_valid_w & way_update_q;
 
endmodule
 
/rtl/cpu/altor32_dfu.v
64,14 → 64,8
input [4:0] rd_load_i /*verilator public*/,
 
// Multiplier status
input mult_lo_ex_i /*verilator public*/,
input mult_hi_ex_i /*verilator public*/,
input mult_lo_wb_i /*verilator public*/,
input mult_hi_wb_i /*verilator public*/,
input mult_ex_i /*verilator public*/,
 
// Multiplier result
input [63:0] result_mult_i /*verilator public*/,
 
// Result (EXEC)
input [31:0] result_ex_i /*verilator public*/,
 
124,7 → 118,7
else if (ra_i == rd_ex_i)
begin
// Multiplier has one cycle latency, stall if needed now
if (mult_lo_ex_i | mult_hi_wb_i)
if (mult_ex_i)
stall_o = 1'b1;
else
begin
140,12 → 134,7
//---------------------------------------------------------------
else if (ra_i == rd_wb_i)
begin
if (mult_hi_wb_i)
result_ra_o = result_mult_i[63:32];
else if (mult_lo_wb_i)
result_ra_o = result_mult_i[31:0];
else
result_ra_o = result_wb_i;
result_ra_o = result_wb_i;
 
resolved_o = 1'b1;
`ifdef CONF_CORE_DEBUG
179,7 → 168,7
else if (rb_i == rd_ex_i)
begin
// Multiplier has one cycle latency, stall if needed now
if (mult_lo_ex_i | mult_hi_wb_i)
if (mult_ex_i)
stall_o = 1'b1;
else
begin
195,13 → 184,8
// RB from PC-8 (writeback)
//---------------------------------------------------------------
else if (rb_i == rd_wb_i)
begin
if (mult_hi_wb_i)
result_rb_o = result_mult_i[63:32];
else if (mult_lo_wb_i)
result_rb_o = result_mult_i[31:0];
else
result_rb_o = result_wb_i;
begin
result_rb_o = result_wb_i;
 
resolved_o = 1'b1;
 
/rtl/cpu/altor32.v
125,8 → 125,7
 
// Result from execute
wire [31:0] ex_result_w;
wire ex_mult_w;
wire [31:0] ex_mult_res_w;
wire [63:0] ex_mult_res_w;
 
// Branch request
wire ex_branch_w;
432,7 → 431,6
.opcode_pc_o(/* not used */),
.reg_rd_o(ex_rd_w),
.reg_rd_value_o(ex_result_w),
.mult_o(ex_mult_w),
.mult_res_o(ex_mult_res_w),
 
// Register write back bypass
476,7 → 474,6
.mem_ready_i(dcache_ack_w),
 
// Multiplier result
.mult_i(ex_mult_w),
.mult_result_i(ex_mult_res_w),
 
// Outputs
/rtl/cpu/altor32_exec.v
89,8 → 89,7
output [31:0] opcode_pc_o /*verilator public*/,
output [4:0] reg_rd_o /*verilator public*/,
output [31:0] reg_rd_value_o /*verilator public*/,
output mult_o /*verilator public*/,
output [31:0] mult_res_o /*verilator public*/,
output [63:0] mult_res_o /*verilator public*/,
 
// Register write back bypass
input [4:0] wb_rd_i /*verilator public*/,
313,14 → 312,8
.rd_load_i(load_rd_q),
 
// Multiplier status
.mult_lo_ex_i(1'b0),
.mult_hi_ex_i(1'b0),
.mult_lo_wb_i(1'b0),
.mult_hi_wb_i(1'b0),
.mult_ex_i(1'b0),
 
// Multiplier result
.result_mult_i(64'b0),
 
// Result (EXEC)
.result_ex_i(ex_result_w),
 
663,12 → 656,6
write_rd_r = 1'b1;
end
 
inst_mul_w, // l.mul
inst_mulu_w: // l.mulu
begin
write_rd_r = 1'b1;
end
 
inst_addi_w: // l.addi
begin
alu_func_r = `ALU_ADD;
1621,8 → 1608,7
assign reg_rd_o = ex_rd_q;
assign reg_rd_value_o = ex_result_w;
 
assign mult_o = 1'b0;
assign mult_res_o = 32'b0;
assign mult_res_o = 64'b0;
 
//-------------------------------------------------------------------
// Hooks for debug

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.