OpenCores
URL https://opencores.org/ocsvn/amber/amber/trunk

Subversion Repositories amber

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /amber/trunk/hw/vlog
    from Rev 15 to Rev 16
    Reverse comparison

Rev 15 → Rev 16

/amber/amber.v File deleted
/amber/fetch.v File deleted
/amber/wishbone.v File deleted
/amber/multiply.v File deleted
/amber/register_bank.v File deleted
/amber/alu.v File deleted
/amber/coprocessor.v File deleted
/amber/decompile.v File deleted
/amber/execute.v File deleted
/amber/decode.v File deleted
/amber/cache.v File deleted
/amber/barrel_shift.v File deleted
/amber25/a25_barrel_shift.v
0,0 → 1,251
//////////////////////////////////////////////////////////////////
// //
// Barrel Shifter for Amber 25 Core //
// //
// This file is part of the Amber project //
// http://www.opencores.org/project,amber //
// //
// Description //
// Provides 32-bit shifts LSL, LSR, ASR and ROR //
// //
// Author(s): //
// - Conor Santifort, csantifort.amber@gmail.com //
// //
//////////////////////////////////////////////////////////////////
// //
// Copyright (C) 2011 Authors and OPENCORES.ORG //
// //
// This source file may be used and distributed without //
// restriction provided that this copyright statement is not //
// removed from the file and that any derivative work contains //
// the original copyright notice and the associated disclaimer. //
// //
// This source file is free software; you can redistribute it //
// and/or modify it under the terms of the GNU Lesser General //
// Public License as published by the Free Software Foundation; //
// either version 2.1 of the License, or (at your option) any //
// later version. //
// //
// This source is distributed in the hope that it will be //
// useful, but WITHOUT ANY WARRANTY; without even the implied //
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR //
// PURPOSE. See the GNU Lesser General Public License for more //
// details. //
// //
// You should have received a copy of the GNU Lesser General //
// Public License along with this source; if not, download it //
// from http://www.opencores.org/lgpl.shtml //
// //
//////////////////////////////////////////////////////////////////
 
 
module a25_barrel_shift (
 
input [31:0] i_in,
input i_carry_in,
input [7:0] i_shift_amount, // uses 8 LSBs of Rs, or a 5 bit immediate constant
input i_shift_imm_zero, // high when immediate shift value of zero selected
input [1:0] i_function,
 
output [31:0] o_out,
output o_carry_out
 
);
 
`include "a25_localparams.v"
 
// MSB is carry out
wire [32:0] lsl_out;
wire [32:0] lsr_out;
wire [32:0] asr_out;
wire [32:0] ror_out;
 
 
// Logical shift right zero is redundant as it is the same as logical shift left zero, so
// the assembler will convert LSR #0 (and ASR #0 and ROR #0) into LSL #0, and allow
// lsr #32 to be specified.
 
// lsl #0 is a special case, where the shifter carry out is the old value of the status flags
// C flag. The contents of Rm are used directly as the second operand.
assign lsl_out = i_shift_imm_zero ? {i_carry_in, i_in } : // fall through case
 
i_shift_amount == 8'd 0 ? {i_carry_in, i_in } : // fall through case
i_shift_amount == 8'd 1 ? {i_in[31], i_in[30: 0], 1'd0} :
i_shift_amount == 8'd 2 ? {i_in[30], i_in[29: 0], 2'd0} :
i_shift_amount == 8'd 3 ? {i_in[29], i_in[28: 0], 3'd0} :
i_shift_amount == 8'd 4 ? {i_in[28], i_in[27: 0], 4'd0} :
i_shift_amount == 8'd 5 ? {i_in[27], i_in[26: 0], 5'd0} :
i_shift_amount == 8'd 6 ? {i_in[26], i_in[25: 0], 6'd0} :
i_shift_amount == 8'd 7 ? {i_in[25], i_in[24: 0], 7'd0} :
i_shift_amount == 8'd 8 ? {i_in[24], i_in[23: 0], 8'd0} :
i_shift_amount == 8'd 9 ? {i_in[23], i_in[22: 0], 9'd0} :
i_shift_amount == 8'd10 ? {i_in[22], i_in[21: 0], 10'd0} :
i_shift_amount == 8'd11 ? {i_in[21], i_in[20: 0], 11'd0} :
i_shift_amount == 8'd12 ? {i_in[20], i_in[19: 0], 12'd0} :
i_shift_amount == 8'd13 ? {i_in[19], i_in[18: 0], 13'd0} :
i_shift_amount == 8'd14 ? {i_in[18], i_in[17: 0], 14'd0} :
i_shift_amount == 8'd15 ? {i_in[17], i_in[16: 0], 15'd0} :
i_shift_amount == 8'd16 ? {i_in[16], i_in[15: 0], 16'd0} :
i_shift_amount == 8'd17 ? {i_in[15], i_in[14: 0], 17'd0} :
i_shift_amount == 8'd18 ? {i_in[14], i_in[13: 0], 18'd0} :
i_shift_amount == 8'd19 ? {i_in[13], i_in[12: 0], 19'd0} :
i_shift_amount == 8'd20 ? {i_in[12], i_in[11: 0], 20'd0} :
i_shift_amount == 8'd21 ? {i_in[11], i_in[10: 0], 21'd0} :
 
i_shift_amount == 8'd22 ? {i_in[10], i_in[ 9: 0], 22'd0} :
i_shift_amount == 8'd23 ? {i_in[ 9], i_in[ 8: 0], 23'd0} :
i_shift_amount == 8'd24 ? {i_in[ 8], i_in[ 7: 0], 24'd0} :
i_shift_amount == 8'd25 ? {i_in[ 7], i_in[ 6: 0], 25'd0} :
i_shift_amount == 8'd26 ? {i_in[ 6], i_in[ 5: 0], 26'd0} :
i_shift_amount == 8'd27 ? {i_in[ 5], i_in[ 4: 0], 27'd0} :
i_shift_amount == 8'd28 ? {i_in[ 4], i_in[ 3: 0], 28'd0} :
i_shift_amount == 8'd29 ? {i_in[ 3], i_in[ 2: 0], 29'd0} :
i_shift_amount == 8'd30 ? {i_in[ 2], i_in[ 1: 0], 30'd0} :
i_shift_amount == 8'd31 ? {i_in[ 1], i_in[ 0: 0], 31'd0} :
i_shift_amount == 8'd32 ? {i_in[ 0], 32'd0 } : // 32
{1'd0, 32'd0 } ; // > 32
 
// The form of the shift field which might be expected to correspond to LSR #0 is used
// to encode LSR #32, which has a zero result with bit 31 of Rm as the carry output.
// carry out, < -------- out ---------->
assign lsr_out = i_shift_imm_zero ? {i_in[31], 32'd0 } :
 
i_shift_amount == 8'd 0 ? {i_carry_in, i_in } : // fall through case
i_shift_amount == 8'd 1 ? {i_in[ 0], 1'd0, i_in[31: 1]} :
i_shift_amount == 8'd 2 ? {i_in[ 1], 2'd0, i_in[31: 2]} :
i_shift_amount == 8'd 3 ? {i_in[ 2], 3'd0, i_in[31: 3]} :
i_shift_amount == 8'd 4 ? {i_in[ 3], 4'd0, i_in[31: 4]} :
i_shift_amount == 8'd 5 ? {i_in[ 4], 5'd0, i_in[31: 5]} :
i_shift_amount == 8'd 6 ? {i_in[ 5], 6'd0, i_in[31: 6]} :
i_shift_amount == 8'd 7 ? {i_in[ 6], 7'd0, i_in[31: 7]} :
i_shift_amount == 8'd 8 ? {i_in[ 7], 8'd0, i_in[31: 8]} :
i_shift_amount == 8'd 9 ? {i_in[ 8], 9'd0, i_in[31: 9]} :
i_shift_amount == 8'd10 ? {i_in[ 9], 10'd0, i_in[31:10]} :
i_shift_amount == 8'd11 ? {i_in[10], 11'd0, i_in[31:11]} :
i_shift_amount == 8'd12 ? {i_in[11], 12'd0, i_in[31:12]} :
i_shift_amount == 8'd13 ? {i_in[12], 13'd0, i_in[31:13]} :
i_shift_amount == 8'd14 ? {i_in[13], 14'd0, i_in[31:14]} :
i_shift_amount == 8'd15 ? {i_in[14], 15'd0, i_in[31:15]} :
i_shift_amount == 8'd16 ? {i_in[15], 16'd0, i_in[31:16]} :
i_shift_amount == 8'd17 ? {i_in[16], 17'd0, i_in[31:17]} :
i_shift_amount == 8'd18 ? {i_in[17], 18'd0, i_in[31:18]} :
i_shift_amount == 8'd19 ? {i_in[18], 19'd0, i_in[31:19]} :
 
i_shift_amount == 8'd20 ? {i_in[19], 20'd0, i_in[31:20]} :
i_shift_amount == 8'd21 ? {i_in[20], 21'd0, i_in[31:21]} :
i_shift_amount == 8'd22 ? {i_in[21], 22'd0, i_in[31:22]} :
i_shift_amount == 8'd23 ? {i_in[22], 23'd0, i_in[31:23]} :
i_shift_amount == 8'd24 ? {i_in[23], 24'd0, i_in[31:24]} :
i_shift_amount == 8'd25 ? {i_in[24], 25'd0, i_in[31:25]} :
i_shift_amount == 8'd26 ? {i_in[25], 26'd0, i_in[31:26]} :
i_shift_amount == 8'd27 ? {i_in[26], 27'd0, i_in[31:27]} :
i_shift_amount == 8'd28 ? {i_in[27], 28'd0, i_in[31:28]} :
i_shift_amount == 8'd29 ? {i_in[28], 29'd0, i_in[31:29]} :
 
i_shift_amount == 8'd30 ? {i_in[29], 30'd0, i_in[31:30]} :
i_shift_amount == 8'd31 ? {i_in[30], 31'd0, i_in[31 ]} :
i_shift_amount == 8'd32 ? {i_in[31], 32'd0 } :
{1'd0, 32'd0 } ; // > 32
 
 
// The form of the shift field which might be expected to give ASR #0 is used to encode
// ASR #32. Bit 31 of Rm is again used as the carry output, and each bit of operand 2 is
// also equal to bit 31 of Rm. The result is therefore all ones or all zeros, according to
// the value of bit 31 of Rm.
 
// carry out, < -------- out ---------->
assign asr_out = i_shift_imm_zero ? {i_in[31], {32{i_in[31]}} } :
 
i_shift_amount == 8'd 0 ? {i_carry_in, i_in } : // fall through case
i_shift_amount == 8'd 1 ? {i_in[ 0], { 2{i_in[31]}}, i_in[30: 1]} :
i_shift_amount == 8'd 2 ? {i_in[ 1], { 3{i_in[31]}}, i_in[30: 2]} :
i_shift_amount == 8'd 3 ? {i_in[ 2], { 4{i_in[31]}}, i_in[30: 3]} :
i_shift_amount == 8'd 4 ? {i_in[ 3], { 5{i_in[31]}}, i_in[30: 4]} :
i_shift_amount == 8'd 5 ? {i_in[ 4], { 6{i_in[31]}}, i_in[30: 5]} :
i_shift_amount == 8'd 6 ? {i_in[ 5], { 7{i_in[31]}}, i_in[30: 6]} :
i_shift_amount == 8'd 7 ? {i_in[ 6], { 8{i_in[31]}}, i_in[30: 7]} :
i_shift_amount == 8'd 8 ? {i_in[ 7], { 9{i_in[31]}}, i_in[30: 8]} :
i_shift_amount == 8'd 9 ? {i_in[ 8], {10{i_in[31]}}, i_in[30: 9]} :
i_shift_amount == 8'd10 ? {i_in[ 9], {11{i_in[31]}}, i_in[30:10]} :
i_shift_amount == 8'd11 ? {i_in[10], {12{i_in[31]}}, i_in[30:11]} :
i_shift_amount == 8'd12 ? {i_in[11], {13{i_in[31]}}, i_in[30:12]} :
i_shift_amount == 8'd13 ? {i_in[12], {14{i_in[31]}}, i_in[30:13]} :
i_shift_amount == 8'd14 ? {i_in[13], {15{i_in[31]}}, i_in[30:14]} :
i_shift_amount == 8'd15 ? {i_in[14], {16{i_in[31]}}, i_in[30:15]} :
i_shift_amount == 8'd16 ? {i_in[15], {17{i_in[31]}}, i_in[30:16]} :
i_shift_amount == 8'd17 ? {i_in[16], {18{i_in[31]}}, i_in[30:17]} :
i_shift_amount == 8'd18 ? {i_in[17], {19{i_in[31]}}, i_in[30:18]} :
i_shift_amount == 8'd19 ? {i_in[18], {20{i_in[31]}}, i_in[30:19]} :
 
i_shift_amount == 8'd20 ? {i_in[19], {21{i_in[31]}}, i_in[30:20]} :
i_shift_amount == 8'd21 ? {i_in[20], {22{i_in[31]}}, i_in[30:21]} :
i_shift_amount == 8'd22 ? {i_in[21], {23{i_in[31]}}, i_in[30:22]} :
i_shift_amount == 8'd23 ? {i_in[22], {24{i_in[31]}}, i_in[30:23]} :
i_shift_amount == 8'd24 ? {i_in[23], {25{i_in[31]}}, i_in[30:24]} :
i_shift_amount == 8'd25 ? {i_in[24], {26{i_in[31]}}, i_in[30:25]} :
i_shift_amount == 8'd26 ? {i_in[25], {27{i_in[31]}}, i_in[30:26]} :
i_shift_amount == 8'd27 ? {i_in[26], {28{i_in[31]}}, i_in[30:27]} :
i_shift_amount == 8'd28 ? {i_in[27], {29{i_in[31]}}, i_in[30:28]} :
i_shift_amount == 8'd29 ? {i_in[28], {30{i_in[31]}}, i_in[30:29]} :
 
i_shift_amount == 8'd30 ? {i_in[29], {31{i_in[31]}}, i_in[30 ]} :
i_shift_amount == 8'd31 ? {i_in[30], {32{i_in[31]}} } :
{i_in[31], {32{i_in[31]}} } ; // >= 32
 
// carry out, < ------- out --------->
assign ror_out = i_shift_imm_zero ? {i_in[ 0], i_carry_in, i_in[31: 1]} : // RXR, (ROR w/ imm 0)
 
i_shift_amount[7:0] == 8'd 0 ? {i_carry_in, i_in } : // fall through case
i_shift_amount[4:0] == 5'd 0 ? {i_in[31], i_in } : // Rs > 31
i_shift_amount[4:0] == 5'd 1 ? {i_in[ 0], i_in[ 0], i_in[31: 1]} :
i_shift_amount[4:0] == 5'd 2 ? {i_in[ 1], i_in[ 1: 0], i_in[31: 2]} :
i_shift_amount[4:0] == 5'd 3 ? {i_in[ 2], i_in[ 2: 0], i_in[31: 3]} :
i_shift_amount[4:0] == 5'd 4 ? {i_in[ 3], i_in[ 3: 0], i_in[31: 4]} :
i_shift_amount[4:0] == 5'd 5 ? {i_in[ 4], i_in[ 4: 0], i_in[31: 5]} :
i_shift_amount[4:0] == 5'd 6 ? {i_in[ 5], i_in[ 5: 0], i_in[31: 6]} :
i_shift_amount[4:0] == 5'd 7 ? {i_in[ 6], i_in[ 6: 0], i_in[31: 7]} :
i_shift_amount[4:0] == 5'd 8 ? {i_in[ 7], i_in[ 7: 0], i_in[31: 8]} :
i_shift_amount[4:0] == 5'd 9 ? {i_in[ 8], i_in[ 8: 0], i_in[31: 9]} :
i_shift_amount[4:0] == 5'd10 ? {i_in[ 9], i_in[ 9: 0], i_in[31:10]} :
i_shift_amount[4:0] == 5'd11 ? {i_in[10], i_in[10: 0], i_in[31:11]} :
i_shift_amount[4:0] == 5'd12 ? {i_in[11], i_in[11: 0], i_in[31:12]} :
i_shift_amount[4:0] == 5'd13 ? {i_in[12], i_in[12: 0], i_in[31:13]} :
i_shift_amount[4:0] == 5'd14 ? {i_in[13], i_in[13: 0], i_in[31:14]} :
i_shift_amount[4:0] == 5'd15 ? {i_in[14], i_in[14: 0], i_in[31:15]} :
i_shift_amount[4:0] == 5'd16 ? {i_in[15], i_in[15: 0], i_in[31:16]} :
i_shift_amount[4:0] == 5'd17 ? {i_in[16], i_in[16: 0], i_in[31:17]} :
i_shift_amount[4:0] == 5'd18 ? {i_in[17], i_in[17: 0], i_in[31:18]} :
i_shift_amount[4:0] == 5'd19 ? {i_in[18], i_in[18: 0], i_in[31:19]} :
 
i_shift_amount[4:0] == 5'd20 ? {i_in[19], i_in[19: 0], i_in[31:20]} :
i_shift_amount[4:0] == 5'd21 ? {i_in[20], i_in[20: 0], i_in[31:21]} :
i_shift_amount[4:0] == 5'd22 ? {i_in[21], i_in[21: 0], i_in[31:22]} :
i_shift_amount[4:0] == 5'd23 ? {i_in[22], i_in[22: 0], i_in[31:23]} :
i_shift_amount[4:0] == 5'd24 ? {i_in[23], i_in[23: 0], i_in[31:24]} :
i_shift_amount[4:0] == 5'd25 ? {i_in[24], i_in[24: 0], i_in[31:25]} :
i_shift_amount[4:0] == 5'd26 ? {i_in[25], i_in[25: 0], i_in[31:26]} :
i_shift_amount[4:0] == 5'd27 ? {i_in[26], i_in[26: 0], i_in[31:27]} :
i_shift_amount[4:0] == 5'd28 ? {i_in[27], i_in[27: 0], i_in[31:28]} :
i_shift_amount[4:0] == 5'd29 ? {i_in[28], i_in[28: 0], i_in[31:29]} :
 
i_shift_amount[4:0] == 5'd30 ? {i_in[29], i_in[29: 0], i_in[31:30]} :
{i_in[30], i_in[30: 0], i_in[31:31]} ;
 
assign {o_carry_out, o_out} = i_function == LSL ? lsl_out :
i_function == LSR ? lsr_out :
i_function == ASR ? asr_out :
ror_out ;
 
endmodule
 
 
/amber25/a25_dcache.v
0,0 → 1,970
//////////////////////////////////////////////////////////////////
// //
// L1 Instruction Cache for Amber 25 Core //
// //
// This file is part of the Amber project //
// http://www.opencores.org/project,amber //
// //
// Description //
// Synthesizable L1 Unified Data and Instruction Cache //
// Cache is 4-way, 256 line and 16 bytes per line for //
// a total of 16KB. The cache policy is write-through and //
// read allocate. For swap instructions (SWP and SWPB) the //
// location is evicted from the cache and read from main //
// memory. //
// //
// Author(s): //
// - Conor Santifort, csantifort.amber@gmail.com //
// //
//////////////////////////////////////////////////////////////////
// //
// Copyright (C) 2011 Authors and OPENCORES.ORG //
// //
// This source file may be used and distributed without //
// restriction provided that this copyright statement is not //
// removed from the file and that any derivative work contains //
// the original copyright notice and the associated disclaimer. //
// //
// This source file is free software; you can redistribute it //
// and/or modify it under the terms of the GNU Lesser General //
// Public License as published by the Free Software Foundation; //
// either version 2.1 of the License, or (at your option) any //
// later version. //
// //
// This source is distributed in the hope that it will be //
// useful, but WITHOUT ANY WARRANTY; without even the implied //
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR //
// PURPOSE. See the GNU Lesser General Public License for more //
// details. //
// //
// You should have received a copy of the GNU Lesser General //
// Public License along with this source; if not, download it //
// from http://www.opencores.org/lgpl.shtml //
// //
//////////////////////////////////////////////////////////////////
 
`include "a25_config_defines.v"
 
module a25_dcache
#(
 
// ---------------------------------------------------------
// Cache Configuration
 
// Limited to Linux 4k page sizes -> 256 lines
parameter CACHE_LINES = 256,
 
// This cannot be changed without some major surgeory on
// this module
parameter CACHE_WORDS_PER_LINE = 4,
 
// Changing this parameter is the recommended
// way to change the overall cache size; 2, 4 and 8 ways are supported.
// 2 ways -> 8KB cache
// 4 ways -> 16KB cache
// 8 ways -> 32KB cache
parameter WAYS = `A25_DCACHE_WAYS ,
 
// derived configuration parameters
parameter CACHE_ADDR_WIDTH = log2 ( CACHE_LINES ), // = 8
parameter WORD_SEL_WIDTH = log2 ( CACHE_WORDS_PER_LINE ), // = 2
parameter TAG_ADDR_WIDTH = 32 - CACHE_ADDR_WIDTH - WORD_SEL_WIDTH - 2, // = 20
parameter TAG_WIDTH = TAG_ADDR_WIDTH + 1, // = 21, including Valid flag
parameter CACHE_LINE_WIDTH = CACHE_WORDS_PER_LINE * 32, // = 128
parameter TAG_ADDR32_LSB = CACHE_ADDR_WIDTH + WORD_SEL_WIDTH + 2, // = 12
parameter CACHE_ADDR32_MSB = CACHE_ADDR_WIDTH + WORD_SEL_WIDTH + 2 - 1, // = 11
parameter CACHE_ADDR32_LSB = WORD_SEL_WIDTH + 2 , // = 4
parameter WORD_SEL_MSB = WORD_SEL_WIDTH + 2 - 1, // = 3
parameter WORD_SEL_LSB = 2 // = 2
// ---------------------------------------------------------
)
 
 
(
input i_clk,
 
// Read / Write requests from core
input i_request,
input i_exclusive, // exclusive access, part of swap instruction
input [31:0] i_write_data,
input i_write_enable, // write request from execute stage
input [31:0] i_address, // registered address from execute
input [31:0] i_address_nxt, // un-registered version of address from execute stage
input [3:0] i_byte_enable,
input i_cache_enable, // from co-processor 15 configuration register
input i_cache_flush, // from co-processor 15 register
 
output [31:0] o_read_data,
input i_fetch_stall,
output o_stall,
 
// WB Read Request
output o_wb_req, // Read Request
input [31:0] i_wb_read_data, // wb bus
input i_wb_ready // wb_stb && !wb_ack
);
 
`include "a25_localparams.v"
`include "a25_functions.v"
 
// One-hot encoded
localparam C_INIT = 0,
C_CORE = 1,
C_FILL = 2,
C_INVA = 3,
C_STATES = 4;
localparam [3:0] CS_INIT = 4'd0,
CS_IDLE = 4'd1,
CS_FILL0 = 4'd2,
CS_FILL1 = 4'd3,
CS_FILL2 = 4'd4,
CS_FILL3 = 4'd5,
CS_FILL_COMPLETE = 4'd6,
CS_TURN_AROUND = 4'd7,
CS_WRITE_HIT1 = 4'd8,
CS_WRITE_HIT_WAIT_WB = 4'd8,
CS_WRITE_MISS_WAIT_WB = 4'd9,
CS_EX_DELETE = 4'd10;
 
reg [3:0] c_state = CS_IDLE;
reg [C_STATES-1:0] source_sel = 1'd1 << C_CORE;
reg [CACHE_ADDR_WIDTH:0] init_count = 'd0;
wire [TAG_WIDTH-1:0] tag_rdata_way [WAYS-1:0];
wire [CACHE_LINE_WIDTH-1:0] data_rdata_way[WAYS-1:0];
wire [WAYS-1:0] data_wenable_way;
wire [WAYS-1:0] data_hit_way;
reg [WAYS-1:0] data_hit_way_r = 'd0;
wire [WAYS-1:0] tag_wenable_way;
reg [WAYS-1:0] select_way = 'd0;
wire [WAYS-1:0] next_way;
reg [WAYS-1:0] valid_bits_r = 'd0;
 
reg [3:0] random_num = 4'hf;
 
wire [CACHE_ADDR_WIDTH-1:0] tag_address;
wire [TAG_WIDTH-1:0] tag_wdata;
wire tag_wenable;
 
wire [CACHE_LINE_WIDTH-1:0] read_miss_wdata;
wire [CACHE_LINE_WIDTH-1:0] write_hit_wdata;
reg [CACHE_LINE_WIDTH-1:0] data_wdata_r = 'd0;
wire [CACHE_LINE_WIDTH-1:0] consecutive_write_wdata;
wire [CACHE_LINE_WIDTH-1:0] data_wdata;
wire [CACHE_ADDR_WIDTH-1:0] data_address;
wire [31:0] write_data_word;
 
wire idle_hit;
wire read_miss;
wire write_miss;
wire write_hit;
wire consecutive_write;
wire fill_state;
 
reg [31:0] miss_address = 'd0;
wire [CACHE_LINE_WIDTH-1:0] hit_rdata;
 
wire read_stall;
wire write_stall;
wire cache_busy_stall;
wire access_stall;
wire write_state;
 
wire request_pulse;
wire request_hold;
reg request_r = 'd0;
wire [CACHE_ADDR_WIDTH-1:0] address;
reg [CACHE_LINE_WIDTH-1:0] wb_rdata_burst = 'd0;
 
wire exclusive_access;
wire ex_read_hit;
reg ex_read_hit_r = 'd0;
reg [WAYS-1:0] ex_read_hit_way = 'd0;
reg [CACHE_ADDR_WIDTH-1:0] ex_read_address;
wire ex_read_hit_clear;
wire ex_read_cache_busy;
 
reg [31:0] wb_address = 'd0;
wire rbuf_hit = 'd0;
wire wb_hit;
 
genvar i;
 
// ======================================
// Address to use for cache access
// ======================================
// If currently stalled then the address for the next
// cycle will be the same as it is in the current cycle
//
assign access_stall = i_fetch_stall || o_stall;
 
assign address = access_stall ? i_address [CACHE_ADDR32_MSB:CACHE_ADDR32_LSB] :
i_address_nxt[CACHE_ADDR32_MSB:CACHE_ADDR32_LSB] ;
 
// ======================================
// Outputs
// ======================================
assign o_read_data = wb_hit ? i_wb_read_data :
i_address[WORD_SEL_MSB:WORD_SEL_LSB] == 2'd0 ? hit_rdata [31:0] :
i_address[WORD_SEL_MSB:WORD_SEL_LSB] == 2'd1 ? hit_rdata [63:32] :
i_address[WORD_SEL_MSB:WORD_SEL_LSB] == 2'd2 ? hit_rdata [95:64] :
hit_rdata [127:96] ;
 
// Don't allow the cache to stall the wb i/f for an exclusive access
// The cache needs a couple of cycles to flush a potential copy of the exclusive
// address, but the wb can do the access in parallel. So there is no
// stall in the state CS_EX_DELETE, even though the cache is out of action.
// This works fine as long as the wb is stalling the core
assign o_stall = request_hold && ( read_stall || write_stall || cache_busy_stall || ex_read_cache_busy );
 
assign o_wb_req = ( (read_miss || write_miss || write_hit) && c_state == CS_IDLE ) || consecutive_write;
 
// ======================================
// Cache State Machine
// ======================================
 
// Little State Machine to Flush Tag RAMS
always @ ( posedge i_clk )
if ( i_cache_flush )
begin
c_state <= CS_INIT;
source_sel <= 1'd1 << C_INIT;
init_count <= 'd0;
`ifdef A25_CACHE_DEBUG
`TB_DEBUG_MESSAGE
$display("Cache Flush");
`endif
end
else
case ( c_state )
CS_INIT :
if ( init_count < CACHE_LINES [CACHE_ADDR_WIDTH:0] )
begin
init_count <= init_count + 1'd1;
source_sel <= 1'd1 << C_INIT;
end
else
begin
source_sel <= 1'd1 << C_CORE;
c_state <= CS_TURN_AROUND;
end
CS_IDLE :
begin
source_sel <= 1'd1 << C_CORE;
if ( ex_read_hit || ex_read_hit_r )
begin
select_way <= data_hit_way | ex_read_hit_way;
c_state <= CS_EX_DELETE;
source_sel <= 1'd1 << C_INVA;
end
else if ( read_miss )
c_state <= CS_FILL0;
else if ( write_hit )
begin
if ( i_wb_ready )
c_state <= CS_WRITE_HIT1;
else
c_state <= CS_WRITE_HIT_WAIT_WB;
end
else if ( write_miss && !i_wb_ready )
c_state <= CS_WRITE_MISS_WAIT_WB;
end
CS_FILL0 :
// wb read request asserted, wait for ack
if ( i_wb_ready )
c_state <= CS_FILL1;
CS_FILL1 :
// first read of burst of 4
// wb read request asserted, wait for ack
if ( i_wb_ready )
c_state <= CS_FILL2;
 
 
CS_FILL2 :
// second read of burst of 4
// wb read request asserted, wait for ack
if ( i_wb_ready )
c_state <= CS_FILL3;
CS_FILL3 :
// third read of burst of 4
// wb read request asserted, wait for ack
if ( i_wb_ready )
begin
c_state <= CS_FILL_COMPLETE;
source_sel <= 1'd1 << C_FILL;
// Pick a way to write the cache update into
// Either pick one of the invalid caches, or if all are valid, then pick
// one randomly
select_way <= next_way;
random_num <= {random_num[2], random_num[1], random_num[0],
random_num[3]^random_num[2]};
end
 
 
// Write the read fetch data in this cycle
CS_FILL_COMPLETE :
begin
// Back to normal cache operations, but
// use physical address for first read as
// address moved before the stall was asserted for the read_miss
// However don't use it if its a non-cached address!
source_sel <= 1'd1 << C_CORE;
c_state <= CS_TURN_AROUND;
end
 
// Ignore the tag read data in this cycle
// Wait 1 cycle to pre-read the cache and return to normal operation
CS_TURN_AROUND :
begin
c_state <= CS_IDLE;
end
 
// Flush the entry matching an exclusive access
CS_EX_DELETE:
begin
`ifdef A25_CACHE_DEBUG
`TB_DEBUG_MESSAGE
$display("Cache deleted Locked entry");
`endif
c_state <= CS_TURN_AROUND;
source_sel <= 1'd1 << C_CORE;
end
CS_WRITE_HIT1:
if ( !consecutive_write )
c_state <= CS_IDLE;
 
 
CS_WRITE_HIT_WAIT_WB:
// wait for an ack on the wb bus to complete the write
if ( i_wb_ready )
c_state <= CS_IDLE;
 
 
CS_WRITE_MISS_WAIT_WB:
// wait for an ack on the wb bus to complete the write
if ( i_wb_ready )
c_state <= CS_IDLE;
endcase
 
 
// ======================================
// Capture WB Block Read - burst of 4 words
// ======================================
always @ ( posedge i_clk )
if ( i_wb_ready )
wb_rdata_burst <= {i_wb_read_data, wb_rdata_burst[127:32]};
 
 
// ======================================
// Miss Address
// ======================================
always @ ( posedge i_clk )
if ( o_wb_req || write_hit )
miss_address <= i_address;
 
always @ ( posedge i_clk )
if ( write_hit )
begin
data_hit_way_r <= data_hit_way;
end
 
always @ ( posedge i_clk )
if ( write_hit || consecutive_write )
begin
data_wdata_r <= data_wdata;
end
 
assign consecutive_write = miss_address[31:4] == i_address[31:4] &&
i_write_enable &&
c_state == CS_WRITE_HIT1 &&
request_pulse;
 
 
always @(posedge i_clk)
if ( o_wb_req )
wb_address <= i_address;
else if ( i_wb_ready && fill_state )
wb_address <= {wb_address[31:4], wb_address[3:2] + 1'd1, 2'd0};
assign fill_state = c_state == CS_FILL0 || c_state == CS_FILL1 || c_state == CS_FILL2 || c_state == CS_FILL3 ;
assign wb_hit = i_address == wb_address && i_wb_ready && fill_state;
 
 
// ======================================
// Hold Requests
// ======================================
always @(posedge i_clk)
request_r <= (request_pulse || request_r) && o_stall;
 
assign request_hold = request_pulse || request_r;
 
 
// ======================================
// Remember Read-Modify-Write Hit
// ======================================
assign ex_read_hit_clear = c_state == CS_EX_DELETE;
 
always @ ( posedge i_clk )
if ( ex_read_hit_clear )
begin
ex_read_hit_r <= 1'd0;
ex_read_hit_way <= 'd0;
end
else if ( ex_read_hit )
begin
`ifdef A25_CACHE_DEBUG
`TB_DEBUG_MESSAGE
$display ("Exclusive access cache hit address 0x%08h", i_address);
`endif
ex_read_hit_r <= 1'd1;
ex_read_hit_way <= data_hit_way;
end
else if ( c_state == CS_FILL_COMPLETE && ex_read_hit_r )
ex_read_hit_way <= select_way;
 
always @ (posedge i_clk)
if ( ex_read_hit )
ex_read_address <= i_address[CACHE_ADDR32_MSB:CACHE_ADDR32_LSB];
 
 
assign tag_address = source_sel[C_FILL] ? miss_address [CACHE_ADDR32_MSB:CACHE_ADDR32_LSB] :
source_sel[C_INVA] ? ex_read_address :
source_sel[C_INIT] ? init_count[CACHE_ADDR_WIDTH-1:0] :
source_sel[C_CORE] ? address :
{CACHE_ADDR_WIDTH{1'd0}} ;
 
 
assign data_address = consecutive_write ? miss_address[CACHE_ADDR32_MSB:CACHE_ADDR32_LSB] :
write_hit ? i_address [CACHE_ADDR32_MSB:CACHE_ADDR32_LSB] :
source_sel[C_FILL] ? miss_address[CACHE_ADDR32_MSB:CACHE_ADDR32_LSB] :
source_sel[C_CORE] ? address :
{CACHE_ADDR_WIDTH{1'd0}} ;
 
assign tag_wdata = source_sel[C_FILL] ? {1'd1, miss_address[31:TAG_ADDR32_LSB]} :
{TAG_WIDTH{1'd0}} ;
 
 
// Data comes in off the WB bus in wrap4 with the missed data word first
assign data_wdata = write_hit && c_state == CS_IDLE ? write_hit_wdata :
consecutive_write ? consecutive_write_wdata :
read_miss_wdata ;
 
assign read_miss_wdata = miss_address[3:2] == 2'd0 ? wb_rdata_burst :
miss_address[3:2] == 2'd1 ? { wb_rdata_burst[95:0], wb_rdata_burst[127:96] }:
miss_address[3:2] == 2'd2 ? { wb_rdata_burst[63:0], wb_rdata_burst[127:64] }:
{ wb_rdata_burst[31:0], wb_rdata_burst[127:32] };
 
 
assign write_hit_wdata = i_address[3:2] == 2'd0 ? {hit_rdata[127:32], write_data_word } :
i_address[3:2] == 2'd1 ? {hit_rdata[127:64], write_data_word, hit_rdata[31:0] } :
i_address[3:2] == 2'd2 ? {hit_rdata[127:96], write_data_word, hit_rdata[63:0] } :
{ write_data_word, hit_rdata[95:0] } ;
wire [31:0] con_read_data_word;
wire [31:0] con_write_data_word;
 
assign consecutive_write_wdata =
i_address[3:2] == 2'd0 ? {data_wdata_r[127:32], con_write_data_word } :
i_address[3:2] == 2'd1 ? {data_wdata_r[127:64], con_write_data_word, data_wdata_r[31:0] } :
i_address[3:2] == 2'd2 ? {data_wdata_r[127:96], con_write_data_word, data_wdata_r[63:0] } :
{ con_write_data_word, data_wdata_r[95:0] } ;
assign con_read_data_word =
i_address[3:2] == 2'd0 ? data_wdata_r[ 31: 0] :
i_address[3:2] == 2'd1 ? data_wdata_r[ 63: 32] :
i_address[3:2] == 2'd2 ? data_wdata_r[ 95: 64] :
data_wdata_r[127: 96] ;
 
 
assign con_write_data_word = i_byte_enable == 4'b0001 ? { con_read_data_word[31: 8], i_write_data[ 7: 0] } :
i_byte_enable == 4'b0010 ? { con_read_data_word[31:16], i_write_data[15: 8], con_read_data_word[ 7:0]} :
i_byte_enable == 4'b0100 ? { con_read_data_word[31:24], i_write_data[23:16], con_read_data_word[15:0]} :
i_byte_enable == 4'b1000 ? { i_write_data[31:24], con_read_data_word[23:0]} :
i_byte_enable == 4'b0011 ? { con_read_data_word[31:16], i_write_data[15: 0] } :
i_byte_enable == 4'b1100 ? { i_write_data[31:16], con_read_data_word[15:0]} :
i_write_data ;
 
 
 
// Use Byte Enables
assign write_data_word = i_byte_enable == 4'b0001 ? { o_read_data[31: 8], i_write_data[ 7: 0] } :
i_byte_enable == 4'b0010 ? { o_read_data[31:16], i_write_data[15: 8], o_read_data[ 7:0]} :
i_byte_enable == 4'b0100 ? { o_read_data[31:24], i_write_data[23:16], o_read_data[15:0]} :
i_byte_enable == 4'b1000 ? { i_write_data[31:24], o_read_data[23:0]} :
i_byte_enable == 4'b0011 ? { o_read_data[31:16], i_write_data[15: 0] } :
i_byte_enable == 4'b1100 ? { i_write_data[31:16], o_read_data[15:0]} :
i_write_data ;
 
assign tag_wenable = source_sel[C_INVA] ? 1'd1 :
source_sel[C_FILL] ? 1'd1 :
source_sel[C_INIT] ? 1'd1 :
source_sel[C_CORE] ? 1'd0 :
1'd0 ;
 
assign request_pulse = i_request && i_cache_enable;
 
assign exclusive_access = i_exclusive && i_cache_enable;
 
 
assign idle_hit = |data_hit_way;
 
assign write_hit = request_hold && i_write_enable && idle_hit;
assign write_miss = request_hold && i_write_enable && !idle_hit && !consecutive_write;
assign read_miss = request_hold && !idle_hit && !i_write_enable;
 
// Exclusive read idle_hit
assign ex_read_hit = exclusive_access && !i_write_enable && idle_hit;
 
// Added to fix rare swap bug which occurs when the cache starts
// a fill just as the swap instruction starts to execute. The cache
// fails to check for a read idle_hit on the swap read cycle.
// This signal stalls the core in that case until after the
// fill has completed.
assign ex_read_cache_busy = exclusive_access && !i_write_enable && c_state != CS_IDLE;
 
// Need to stall for a write miss to wait for the current wb
// read miss access to complete. Also for a write idle_hit, need
// to stall for 1 cycle while the data cache is being written to
assign write_state = c_state == CS_IDLE || c_state == CS_WRITE_HIT1 ||
c_state == CS_WRITE_HIT_WAIT_WB || c_state == CS_WRITE_MISS_WAIT_WB;
assign write_stall = (write_miss && !(i_wb_ready && write_state)) || (write_hit && !i_wb_ready);
 
assign read_stall = request_hold && !idle_hit && !rbuf_hit && !wb_hit && !i_write_enable;
 
assign cache_busy_stall = c_state == CS_FILL_COMPLETE || c_state == CS_TURN_AROUND || c_state == CS_INIT ||
(fill_state && !rbuf_hit && !wb_hit);
 
 
// ======================================
// Instantiate RAMS
// ======================================
 
generate
for ( i=0; i<WAYS;i=i+1 ) begin : rams
 
// Tag RAMs
`ifdef XILINX_SPARTAN6_FPGA
xs6_sram_256x21_line_en
`endif
 
`ifdef XILINX_VIRTEX6_FPGA
xv6_sram_256x21_line_en
`endif
 
`ifndef XILINX_FPGA
generic_sram_line_en
`endif
 
#(
.DATA_WIDTH ( TAG_WIDTH ),
.INITIALIZE_TO_ZERO ( 1 ),
.ADDRESS_WIDTH ( CACHE_ADDR_WIDTH ))
u_tag (
.i_clk ( i_clk ),
.i_write_data ( tag_wdata ),
.i_write_enable ( tag_wenable_way[i] ),
.i_address ( tag_address ),
 
.o_read_data ( tag_rdata_way[i] )
);
// Data RAMs
`ifdef XILINX_SPARTAN6_FPGA
xs6_sram_256x128_byte_en
`endif
 
`ifdef XILINX_VIRTEX6_FPGA
xv6_sram_256x128_byte_en
`endif
 
`ifndef XILINX_FPGA
generic_sram_byte_en
`endif
 
#(
.DATA_WIDTH ( CACHE_LINE_WIDTH) ,
.ADDRESS_WIDTH ( CACHE_ADDR_WIDTH) )
u_data (
.i_clk ( i_clk ),
.i_write_data ( data_wdata ),
.i_write_enable ( data_wenable_way[i] ),
.i_address ( data_address ),
.i_byte_enable ( {CACHE_LINE_WIDTH/8{1'd1}} ),
.o_read_data ( data_rdata_way[i] )
);
 
 
// Per tag-ram write-enable
assign tag_wenable_way[i] = tag_wenable && ( select_way[i] || source_sel[C_INIT] );
 
// Per data-ram write-enable
assign data_wenable_way[i] = (source_sel[C_FILL] && select_way[i]) ||
(write_hit && data_hit_way[i] && c_state == CS_IDLE) ||
(consecutive_write && data_hit_way_r[i]);
// Per data-ram idle_hit flag
assign data_hit_way[i] = tag_rdata_way[i][TAG_WIDTH-1] &&
tag_rdata_way[i][TAG_ADDR_WIDTH-1:0] == i_address[31:TAG_ADDR32_LSB] &&
c_state == CS_IDLE;
end
endgenerate
 
 
// ======================================
// Register Valid Bits
// ======================================
generate
if ( WAYS == 2 ) begin : valid_bits_2ways
 
always @ ( posedge i_clk )
if ( c_state == CS_IDLE )
valid_bits_r <= {tag_rdata_way[1][TAG_WIDTH-1],
tag_rdata_way[0][TAG_WIDTH-1]};
end
else if ( WAYS == 3 ) begin : valid_bits_3ways
 
always @ ( posedge i_clk )
if ( c_state == CS_IDLE )
valid_bits_r <= {tag_rdata_way[2][TAG_WIDTH-1],
tag_rdata_way[1][TAG_WIDTH-1],
tag_rdata_way[0][TAG_WIDTH-1]};
end
else if ( WAYS == 4 ) begin : valid_bits_4ways
 
always @ ( posedge i_clk )
if ( c_state == CS_IDLE )
valid_bits_r <= {tag_rdata_way[3][TAG_WIDTH-1],
tag_rdata_way[2][TAG_WIDTH-1],
tag_rdata_way[1][TAG_WIDTH-1],
tag_rdata_way[0][TAG_WIDTH-1]};
end
else begin : valid_bits_8ways
 
always @ ( posedge i_clk )
if ( c_state == CS_IDLE )
valid_bits_r <= {tag_rdata_way[7][TAG_WIDTH-1],
tag_rdata_way[6][TAG_WIDTH-1],
tag_rdata_way[5][TAG_WIDTH-1],
tag_rdata_way[4][TAG_WIDTH-1],
tag_rdata_way[3][TAG_WIDTH-1],
tag_rdata_way[2][TAG_WIDTH-1],
tag_rdata_way[1][TAG_WIDTH-1],
tag_rdata_way[0][TAG_WIDTH-1]};
end
endgenerate
 
 
// ======================================
// Select read idle_hit data
// ======================================
 
generate
if ( WAYS == 2 ) begin : read_data_2ways
 
assign hit_rdata = data_hit_way[0] ? data_rdata_way[0] :
data_hit_way[1] ? data_rdata_way[1] :
{CACHE_LINE_WIDTH{1'd1}} ; // all 1's for debug
end
else if ( WAYS == 3 ) begin : read_data_3ways
 
assign hit_rdata = data_hit_way[0] ? data_rdata_way[0] :
data_hit_way[1] ? data_rdata_way[1] :
data_hit_way[2] ? data_rdata_way[2] :
{CACHE_LINE_WIDTH{1'd1}} ; // all 1's for debug
end
else if ( WAYS == 4 ) begin : read_data_4ways
 
assign hit_rdata = data_hit_way[0] ? data_rdata_way[0] :
data_hit_way[1] ? data_rdata_way[1] :
data_hit_way[2] ? data_rdata_way[2] :
data_hit_way[3] ? data_rdata_way[3] :
{CACHE_LINE_WIDTH{1'd1}} ; // all 1's for debug
end
else begin : read_data_8ways
 
assign hit_rdata = data_hit_way[0] ? data_rdata_way[0] :
data_hit_way[1] ? data_rdata_way[1] :
data_hit_way[2] ? data_rdata_way[2] :
data_hit_way[3] ? data_rdata_way[3] :
data_hit_way[4] ? data_rdata_way[4] :
data_hit_way[5] ? data_rdata_way[5] :
data_hit_way[6] ? data_rdata_way[6] :
data_hit_way[7] ? data_rdata_way[7] :
{CACHE_LINE_WIDTH{1'd1}} ; // all 1's for debug
end
endgenerate
 
 
// ======================================
// Function to select the way to use
// for fills
// ======================================
generate
if ( WAYS == 2 ) begin : pick_way_2ways
 
assign next_way = pick_way ( valid_bits_r, random_num );
 
function [WAYS-1:0] pick_way;
input [WAYS-1:0] valid_bits;
input [3:0] random_num;
begin
if ( valid_bits[0] == 1'd0 )
// way 0 not occupied so use it
pick_way = 2'b01;
else if ( valid_bits[1] == 1'd0 )
// way 1 not occupied so use it
pick_way = 2'b10;
else
begin
// All ways occupied so pick one randomly
case (random_num[3:1])
3'd0, 3'd3,
3'd5, 3'd6: pick_way = 2'b10;
default: pick_way = 2'b01;
endcase
end
end
endfunction
end
else if ( WAYS == 3 ) begin : pick_way_3ways
 
assign next_way = pick_way ( valid_bits_r, random_num );
 
function [WAYS-1:0] pick_way;
input [WAYS-1:0] valid_bits;
input [3:0] random_num;
begin
if ( valid_bits[0] == 1'd0 )
// way 0 not occupied so use it
pick_way = 3'b001;
else if ( valid_bits[1] == 1'd0 )
// way 1 not occupied so use it
pick_way = 3'b010;
else if ( valid_bits[2] == 1'd0 )
// way 2 not occupied so use it
pick_way = 3'b100;
else
begin
// All ways occupied so pick one randomly
case (random_num[3:1])
3'd0, 3'd1, 3'd2: pick_way = 3'b010;
3'd2, 3'd3, 3'd4: pick_way = 3'b100;
default: pick_way = 3'b001;
endcase
end
end
endfunction
end
else if ( WAYS == 4 ) begin : pick_way_4ways
 
assign next_way = pick_way ( valid_bits_r, random_num );
 
function [WAYS-1:0] pick_way;
input [WAYS-1:0] valid_bits;
input [3:0] random_num;
begin
if ( valid_bits[0] == 1'd0 )
// way 0 not occupied so use it
pick_way = 4'b0001;
else if ( valid_bits[1] == 1'd0 )
// way 1 not occupied so use it
pick_way = 4'b0010;
else if ( valid_bits[2] == 1'd0 )
// way 2 not occupied so use it
pick_way = 4'b0100;
else if ( valid_bits[3] == 1'd0 )
// way 3 not occupied so use it
pick_way = 4'b1000;
else
begin
// All ways occupied so pick one randomly
case (random_num[3:1])
3'd0, 3'd1: pick_way = 4'b0100;
3'd2, 3'd3: pick_way = 4'b1000;
3'd4, 3'd5: pick_way = 4'b0001;
default: pick_way = 4'b0010;
endcase
end
end
endfunction
end
else begin : pick_way_8ways
 
assign next_way = pick_way ( valid_bits_r, random_num );
 
function [WAYS-1:0] pick_way;
input [WAYS-1:0] valid_bits;
input [3:0] random_num;
begin
if ( valid_bits[0] == 1'd0 )
// way 0 not occupied so use it
pick_way = 8'b00000001;
else if ( valid_bits[1] == 1'd0 )
// way 1 not occupied so use it
pick_way = 8'b00000010;
else if ( valid_bits[2] == 1'd0 )
// way 2 not occupied so use it
pick_way = 8'b00000100;
else if ( valid_bits[3] == 1'd0 )
// way 3 not occupied so use it
pick_way = 8'b00001000;
else if ( valid_bits[4] == 1'd0 )
// way 3 not occupied so use it
pick_way = 8'b00010000;
else if ( valid_bits[5] == 1'd0 )
// way 3 not occupied so use it
pick_way = 8'b00100000;
else if ( valid_bits[6] == 1'd0 )
// way 3 not occupied so use it
pick_way = 8'b01000000;
else if ( valid_bits[7] == 1'd0 )
// way 3 not occupied so use it
pick_way = 8'b10000000;
else
begin
// All ways occupied so pick one randomly
case (random_num[3:1])
3'd0: pick_way = 8'b00010000;
3'd1: pick_way = 8'b00100000;
3'd2: pick_way = 8'b01000000;
3'd3: pick_way = 8'b10000000;
3'd4: pick_way = 8'b00000001;
3'd5: pick_way = 8'b00000010;
3'd6: pick_way = 8'b00000100;
default: pick_way = 8'b00001000;
endcase
end
end
endfunction
end
endgenerate
 
 
// ========================================================
// Debug WB bus - not synthesizable
// ========================================================
//synopsys translate_off
wire [(6*8)-1:0] xSOURCE_SEL;
wire [(22*8)-1:0] xC_STATE;
 
assign xSOURCE_SEL = source_sel[C_CORE] ? "C_CORE" :
source_sel[C_INIT] ? "C_INIT" :
source_sel[C_FILL] ? "C_FILL" :
source_sel[C_INVA] ? "C_INVA" :
"UNKNON" ;
assign xC_STATE = c_state == CS_INIT ? "CS_INIT" :
c_state == CS_IDLE ? "CS_IDLE" :
c_state == CS_FILL0 ? "CS_FILL0" :
c_state == CS_FILL1 ? "CS_FILL1" :
c_state == CS_FILL2 ? "CS_FILL2" :
c_state == CS_FILL3 ? "CS_FILL3" :
c_state == CS_FILL_COMPLETE ? "CS_FILL_COMPLETE" :
c_state == CS_EX_DELETE ? "CS_EX_DELETE" :
c_state == CS_TURN_AROUND ? "CS_TURN_AROUND" :
c_state == CS_WRITE_HIT1 ? "CS_WRITE_HIT1" :
c_state == CS_WRITE_HIT_WAIT_WB ? "CS_WRITE_HIT_WAIT_WB" :
c_state == CS_WRITE_MISS_WAIT_WB ? "CS_WRITE_MISS_WAIT_WB" :
"UNKNOWN" ;
 
 
generate
if ( WAYS == 2 ) begin : check_hit_2ways
 
always @( posedge i_clk )
if ( (data_hit_way[0] + data_hit_way[1] ) > 4'd1 )
begin
`TB_ERROR_MESSAGE
$display("Hit in more than one cache ways!");
end
end
else if ( WAYS == 3 ) begin : check_hit_3ways
 
always @( posedge i_clk )
if ( (data_hit_way[0] + data_hit_way[1] + data_hit_way[2] ) > 4'd1 )
begin
`TB_ERROR_MESSAGE
$display("Hit in more than one cache ways!");
end
end
else if ( WAYS == 4 ) begin : check_hit_4ways
 
always @( posedge i_clk )
if ( (data_hit_way[0] + data_hit_way[1] +
data_hit_way[2] + data_hit_way[3] ) > 4'd1 )
begin
`TB_ERROR_MESSAGE
$display("Hit in more than one cache ways!");
end
end
else if ( WAYS == 8 ) begin : check_hit_8ways
 
always @( posedge i_clk )
if ( (data_hit_way[0] + data_hit_way[1] +
data_hit_way[2] + data_hit_way[3] +
data_hit_way[4] + data_hit_way[5] +
data_hit_way[6] + data_hit_way[7] ) > 4'd1 )
begin
`TB_ERROR_MESSAGE
$display("Hit in more than one cache ways!");
end
end
else begin : check_hit_nways
 
initial
begin
`TB_ERROR_MESSAGE
$display("Unsupported number of ways %0d", WAYS);
$display("Set A25_DCACHE_WAYS in a25_config_defines.v to either 2,3,4 or 8");
end
 
end
endgenerate
 
 
//synopsys translate_on
endmodule
 
/amber25/a25_functions.v
0,0 → 1,173
//////////////////////////////////////////////////////////////////
// //
// Functions for Amber 25 Core //
// //
// This file is part of the Amber project //
// http://www.opencores.org/project,amber //
// //
// Description //
// Functions used in more than one module //
// //
// Author(s): //
// - Conor Santifort, csantifort.amber@gmail.com //
// //
//////////////////////////////////////////////////////////////////
// //
// Copyright (C) 2011 Authors and OPENCORES.ORG //
// //
// This source file may be used and distributed without //
// restriction provided that this copyright statement is not //
// removed from the file and that any derivative work contains //
// the original copyright notice and the associated disclaimer. //
// //
// This source file is free software; you can redistribute it //
// and/or modify it under the terms of the GNU Lesser General //
// Public License as published by the Free Software Foundation; //
// either version 2.1 of the License, or (at your option) any //
// later version. //
// //
// This source is distributed in the hope that it will be //
// useful, but WITHOUT ANY WARRANTY; without even the implied //
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR //
// PURPOSE. See the GNU Lesser General Public License for more //
// details. //
// //
// You should have received a copy of the GNU Lesser General //
// Public License along with this source; if not, download it //
// from http://www.opencores.org/lgpl.shtml //
// //
//////////////////////////////////////////////////////////////////
 
 
// ========================================================
// PC Filter - Remove the status bits
// ========================================================
function [31:0] pcf;
input [31:0] pc_reg;
begin
pcf = {6'd0, pc_reg[25:2], 2'd0};
end
endfunction
 
 
// ========================================================
// 4-bit to 16-bit 1-hot decode
// ========================================================
function [14:0] decode;
input [3:0] reg_sel;
begin
case ( reg_sel )
4'h0: decode = 15'h0001;
4'h1: decode = 15'h0002;
4'h2: decode = 15'h0004;
4'h3: decode = 15'h0008;
4'h4: decode = 15'h0010;
4'h5: decode = 15'h0020;
4'h6: decode = 15'h0040;
4'h7: decode = 15'h0080;
4'h8: decode = 15'h0100;
4'h9: decode = 15'h0200;
4'ha: decode = 15'h0400;
4'hb: decode = 15'h0800;
4'hc: decode = 15'h1000;
4'hd: decode = 15'h2000;
4'he: decode = 15'h4000;
default: decode = 15'h0000;
endcase
end
endfunction
 
 
// ========================================================
// Convert Stats Bits Mode to one-hot encoded version
// ========================================================
function [3:0] oh_status_bits_mode;
input [1:0] fn_status_bits_mode;
begin
oh_status_bits_mode =
fn_status_bits_mode == SVC ? 1'd1 << OH_SVC :
fn_status_bits_mode == IRQ ? 1'd1 << OH_IRQ :
fn_status_bits_mode == FIRQ ? 1'd1 << OH_FIRQ :
1'd1 << OH_USR ;
end
endfunction
 
// ========================================================
// Convert mode into ascii name
// ========================================================
function [(14*8)-1:0] mode_name;
input [4:0] mode;
begin
 
mode_name = mode == USR ? "User " :
mode == SVC ? "Supervisor " :
mode == IRQ ? "Interrupt " :
mode == FIRQ ? "Fast Interrupt" :
"UNKNOWN " ;
end
endfunction
 
 
// ========================================================
// Conditional Execution Function
// ========================================================
// EQ Z set
// NE Z clear
// CS C set
// CC C clear
// MI N set
// PL N clear
// VS V set
// VC V clear
// HI C set and Z clear
// LS C clear or Z set
// GE N == V
// LT N != V
// GT Z == 0,N == V
// LE Z == 1 or N != V
// AL Always (unconditional)
// NV Never
 
function conditional_execute;
input [3:0] condition;
input [3:0] flags;
begin
conditional_execute
= ( condition == AL ) ||
( condition == EQ && flags[2] ) ||
( condition == NE && !flags[2] ) ||
( condition == CS && flags[1] ) ||
( condition == CC && !flags[1] ) ||
( condition == MI && flags[3] ) ||
( condition == PL && !flags[3] ) ||
( condition == VS && flags[0] ) ||
( condition == VC && !flags[0] ) ||
( condition == HI && flags[1] && !flags[2] ) ||
( condition == LS && (!flags[1] || flags[2]) ) ||
( condition == GE && flags[3] == flags[0] ) ||
( condition == LT && flags[3] != flags[0] ) ||
 
( condition == GT && !flags[2] && flags[3] == flags[0] ) ||
( condition == LE && (flags[2] || flags[3] != flags[0])) ;
end
endfunction
 
 
// ========================================================
// Log 2
// ========================================================
 
function [31:0] log2;
input [31:0] num;
integer i;
 
begin
log2 = 32'd0;
for (i=0; i<30; i=i+1)
if ((2**i > num) && (log2 == 0))
log2 = i-1;
end
endfunction
/amber25/a25_write_back.v
0,0 → 1,80
//////////////////////////////////////////////////////////////////
// //
// Write Back - Instantiates the write back stage //
// sub-modules of the Amber 25 Core //
// //
// This file is part of the Amber project //
// http://www.opencores.org/project,amber //
// //
// Description //
// //
// Author(s): //
// - Conor Santifort, csantifort.amber@gmail.com //
// //
//////////////////////////////////////////////////////////////////
// //
// Copyright (C) 2011 Authors and OPENCORES.ORG //
// //
// This source file may be used and distributed without //
// restriction provided that this copyright statement is not //
// removed from the file and that any derivative work contains //
// the original copyright notice and the associated disclaimer. //
// //
// This source file is free software; you can redistribute it //
// and/or modify it under the terms of the GNU Lesser General //
// Public License as published by the Free Software Foundation; //
// either version 2.1 of the License, or (at your option) any //
// later version. //
// //
// This source is distributed in the hope that it will be //
// useful, but WITHOUT ANY WARRANTY; without even the implied //
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR //
// PURPOSE. See the GNU Lesser General Public License for more //
// details. //
// //
// You should have received a copy of the GNU Lesser General //
// Public License along with this source; if not, download it //
// from http://www.opencores.org/lgpl.shtml //
// //
//////////////////////////////////////////////////////////////////
 
 
module a25_write_back
(
input i_clk,
input i_mem_stall, // Mem stage asserting stall
 
input [31:0] i_mem_read_data, // data reads
input i_mem_read_data_valid, // read data is valid
input [9:0] i_mem_load_rd, // Rd for data reads
 
output [31:0] o_wb_read_data, // data reads
output o_wb_read_data_valid, // read data is valid
output [9:0] o_wb_load_rd, // Rd for data reads
 
input [31:0] i_daddress,
input i_daddress_valid
);
 
reg [31:0] mem_read_data_r = 'd0; // Register read data from Data Cache
reg mem_read_data_valid_r = 'd0; // Register read data from Data Cache
reg [9:0] mem_load_rd_r = 'd0; // Register the Rd value for loads
reg [31:0] daddress_r = 'd0; // Register read data from Data Cache
 
assign o_wb_read_data = mem_read_data_r;
assign o_wb_read_data_valid = mem_read_data_valid_r;
assign o_wb_load_rd = mem_load_rd_r;
 
 
always @( posedge i_clk )
if ( !i_mem_stall )
begin
mem_read_data_r <= i_mem_read_data;
mem_read_data_valid_r <= i_mem_read_data_valid;
mem_load_rd_r <= i_mem_load_rd;
daddress_r <= i_daddress;
end
 
endmodule
 
/amber25/a25_icache.v
0,0 → 1,782
//////////////////////////////////////////////////////////////////
// //
// L1 Instruction Cache for Amber 25 Core //
// //
// This file is part of the Amber project //
// http://www.opencores.org/project,amber //
// //
// Description //
// Synthesizable L1 Unified Data and Instruction Cache //
// Cache is 4-way, 256 line and 16 bytes per line for //
// a total of 16KB. The cache policy is write-through and //
// read allocate. For swap instructions (SWP and SWPB) the //
// location is evicted from the cache and read from main //
// memory. //
// //
// Author(s): //
// - Conor Santifort, csantifort.amber@gmail.com //
// //
//////////////////////////////////////////////////////////////////
// //
// Copyright (C) 2011 Authors and OPENCORES.ORG //
// //
// This source file may be used and distributed without //
// restriction provided that this copyright statement is not //
// removed from the file and that any derivative work contains //
// the original copyright notice and the associated disclaimer. //
// //
// This source file is free software; you can redistribute it //
// and/or modify it under the terms of the GNU Lesser General //
// Public License as published by the Free Software Foundation; //
// either version 2.1 of the License, or (at your option) any //
// later version. //
// //
// This source is distributed in the hope that it will be //
// useful, but WITHOUT ANY WARRANTY; without even the implied //
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR //
// PURPOSE. See the GNU Lesser General Public License for more //
// details. //
// //
// You should have received a copy of the GNU Lesser General //
// Public License along with this source; if not, download it //
// from http://www.opencores.org/lgpl.shtml //
// //
//////////////////////////////////////////////////////////////////
 
`include "a25_config_defines.v"
 
module a25_icache
#(
 
// ---------------------------------------------------------
// Cache Configuration
 
// Limited to Linux 4k page sizes -> 256 lines
parameter CACHE_LINES = 256,
 
// This cannot be changed without some major surgeory on
// this module
parameter CACHE_WORDS_PER_LINE = 4,
 
// Changing this parameter is the recommended
// way to change the overall cache size; 2, 4 and 8 ways are supported.
// 2 ways -> 8KB cache
// 4 ways -> 16KB cache
// 8 ways -> 32KB cache
parameter WAYS = `A25_ICACHE_WAYS ,
 
// derived configuration parameters
parameter CACHE_ADDR_WIDTH = log2 ( CACHE_LINES ), // = 8
parameter WORD_SEL_WIDTH = log2 ( CACHE_WORDS_PER_LINE ), // = 2
parameter TAG_ADDR_WIDTH = 32 - CACHE_ADDR_WIDTH - WORD_SEL_WIDTH - 2, // = 20
parameter TAG_WIDTH = TAG_ADDR_WIDTH + 1, // = 21, including Valid flag
parameter CACHE_LINE_WIDTH = CACHE_WORDS_PER_LINE * 32, // = 128
parameter TAG_ADDR32_LSB = CACHE_ADDR_WIDTH + WORD_SEL_WIDTH + 2, // = 12
parameter CACHE_ADDR32_MSB = CACHE_ADDR_WIDTH + WORD_SEL_WIDTH + 2 - 1, // = 11
parameter CACHE_ADDR32_LSB = WORD_SEL_WIDTH + 2 , // = 4
parameter WORD_SEL_MSB = WORD_SEL_WIDTH + 2 - 1, // = 3
parameter WORD_SEL_LSB = 2 // = 2
// ---------------------------------------------------------
)
 
 
(
input i_clk,
input i_core_stall,
output o_stall,
 
// Read / Write requests from core
input i_select,
input [31:0] i_address, // registered address from execute
input [31:0] i_address_nxt, // un-registered version of address from execute stage
input i_cache_enable, // from co-processor 15 configuration register
input i_cache_flush, // from co-processor 15 register
 
output [31:0] o_read_data,
 
// WB Read Request
output o_wb_req, // Read Request
input [31:0] i_wb_read_data,
input i_wb_ready
);
 
`include "a25_localparams.v"
`include "a25_functions.v"
 
// One-hot encoded
localparam C_INIT = 0,
C_CORE = 1,
C_FILL = 2,
C_INVA = 3,
C_STATES = 4;
localparam [3:0] CS_INIT = 4'd0,
CS_IDLE = 4'd1,
CS_FILL0 = 4'd2,
CS_FILL1 = 4'd3,
CS_FILL2 = 4'd4,
CS_FILL3 = 4'd5,
CS_FILL4 = 4'd6,
CS_FILL_COMPLETE = 4'd7,
CS_TURN_AROUND = 4'd8,
CS_WRITE_HIT1 = 4'd9,
CS_EX_DELETE = 4'd10;
 
reg [3:0] c_state = CS_IDLE;
reg [C_STATES-1:0] source_sel = 1'd1 << C_CORE;
reg [CACHE_ADDR_WIDTH:0] init_count = 'd0;
wire [TAG_WIDTH-1:0] tag_rdata_way [WAYS-1:0];
wire [CACHE_LINE_WIDTH-1:0] data_rdata_way[WAYS-1:0];
wire [WAYS-1:0] data_wenable_way;
wire [WAYS-1:0] data_hit_way;
wire [WAYS-1:0] tag_wenable_way;
reg [WAYS-1:0] select_way = 'd0;
wire [WAYS-1:0] next_way;
reg [WAYS-1:0] valid_bits_r = 'd0;
 
reg [3:0] random_num = 4'hf;
 
wire [CACHE_ADDR_WIDTH-1:0] tag_address;
wire [TAG_WIDTH-1:0] tag_wdata;
wire tag_wenable;
 
wire [CACHE_LINE_WIDTH-1:0] data_wdata;
wire [CACHE_ADDR_WIDTH-1:0] data_address;
wire [31:0] write_data_word;
 
wire idle_hit;
wire read_miss;
wire read_miss_fill;
wire invalid_read;
wire fill_state;
 
reg [31:0] miss_address = 'd0;
wire [CACHE_LINE_WIDTH-1:0] hit_rdata;
 
wire cache_busy_stall;
wire read_stall;
 
wire enable;
wire [CACHE_ADDR_WIDTH-1:0] address;
wire [31:0] address_c;
reg [31:0] address_r = 'd0;
 
reg [CACHE_LINE_WIDTH-1:0] wb_rdata_burst_r = 'd0;
wire [CACHE_LINE_WIDTH-1:0] wb_rdata_burst;
 
reg [31:0] wb_address = 'd0;
wire rbuf_hit = 'd0;
wire wb_hit;
genvar i;
 
// ======================================
// Address to use for cache access
// ======================================
// If currently stalled then the address for the next
// cycle will be the same as it is in the current cycle
//
assign address_c = i_core_stall ? i_address : //[CACHE_ADDR32_MSB:CACHE_ADDR32_LSB] :
i_address_nxt; //[CACHE_ADDR32_MSB:CACHE_ADDR32_LSB] ;
 
assign address = address_c[CACHE_ADDR32_MSB:CACHE_ADDR32_LSB];
 
// ======================================
// Outputs
// ======================================
assign o_read_data = wb_hit ? i_wb_read_data :
i_address[WORD_SEL_MSB:WORD_SEL_LSB] == 2'd0 ? hit_rdata [31:0] :
i_address[WORD_SEL_MSB:WORD_SEL_LSB] == 2'd1 ? hit_rdata [63:32] :
i_address[WORD_SEL_MSB:WORD_SEL_LSB] == 2'd2 ? hit_rdata [95:64] :
hit_rdata [127:96] ;
 
// Don't allow the cache to stall the wb i/f for an exclusive access
// The cache needs a couple of cycles to flush a potential copy of the exclusive
// address, but the wb can do the access in parallel. So there is no
// stall in the state CS_EX_DELETE, even though the cache is out of action.
// This works fine as long as the wb is stalling the core
assign o_stall = read_stall || cache_busy_stall;
 
assign o_wb_req = read_miss && c_state == CS_IDLE;
 
// ======================================
// Cache State Machine
// ======================================
 
// Little State Machine to Flush Tag RAMS
always @ ( posedge i_clk )
if ( i_cache_flush )
begin
c_state <= CS_INIT;
source_sel <= 1'd1 << C_INIT;
init_count <= 'd0;
`ifdef A25_CACHE_DEBUG
`TB_DEBUG_MESSAGE
$display("Cache Flush");
`endif
end
else
case ( c_state )
CS_INIT :
if ( init_count < CACHE_LINES [CACHE_ADDR_WIDTH:0] )
begin
init_count <= init_count + 1'd1;
source_sel <= 1'd1 << C_INIT;
end
else
begin
source_sel <= 1'd1 << C_CORE;
c_state <= CS_TURN_AROUND;
end
CS_IDLE :
begin
source_sel <= 1'd1 << C_CORE;
if ( read_miss )
c_state <= CS_FILL0;
end
CS_FILL0 :
begin
// wb read request asserted, wait for ack
if ( i_wb_ready )
c_state <= CS_FILL1;
end
CS_FILL1 :
begin
// wb read request asserted, wait for ack
if ( i_wb_ready )
c_state <= CS_FILL2;
end
CS_FILL2 :
// first read of burst of 4
// wb read request asserted, wait for ack
if ( i_wb_ready )
c_state <= CS_FILL3;
 
CS_FILL3 :
begin
select_way <= next_way;
random_num <= {random_num[2], random_num[1], random_num[0],
random_num[3]^random_num[2]};
// third read of burst of 4
// wb read request asserted, wait for ack
if ( i_wb_ready )
begin
c_state <= CS_FILL_COMPLETE;
// Pick a way to write the cache update into
// Either pick one of the invalid caches, or if all are valid, then pick
// one randomly
end
end
 
// Write the read fetch data in this cycle
CS_FILL_COMPLETE :
begin
// Back to normal cache operations, but
// use physical address for first read as
// address moved before the stall was asserted for the read_miss
// However don't use it if its a non-cached address!
source_sel <= 1'd1 << C_CORE;
c_state <= CS_TURN_AROUND;
end
 
// Ignore the tag read data in this cycle
// Wait 1 cycle to pre-read the cache and return to normal operation
CS_TURN_AROUND :
begin
c_state <= CS_IDLE;
end
endcase
 
 
// ======================================
// Capture WB Block Read - burst of 4 words
// ======================================
assign wb_rdata_burst = {i_wb_read_data, wb_rdata_burst_r[127:32]};
always @ ( posedge i_clk )
if ( i_wb_ready )
wb_rdata_burst_r <= wb_rdata_burst;
 
 
// ======================================
// Miss Address
// ======================================
always @ ( posedge i_clk )
if ( c_state == CS_IDLE )
miss_address <= i_address;
 
 
always @ ( posedge i_clk )
address_r <= address_c;
 
assign invalid_read = address_r != i_address;
 
 
always @(posedge i_clk)
if ( o_wb_req )
wb_address <= i_address;
else if ( i_wb_ready && fill_state )
wb_address <= {wb_address[31:4], wb_address[3:2] + 1'd1, 2'd0};
assign fill_state = c_state == CS_FILL0 || c_state == CS_FILL1 || c_state == CS_FILL2 || c_state == CS_FILL3 ;
assign wb_hit = i_address == wb_address && i_wb_ready && fill_state;
 
assign tag_address = read_miss_fill ? miss_address [CACHE_ADDR32_MSB:CACHE_ADDR32_LSB] :
source_sel[C_INIT] ? init_count[CACHE_ADDR_WIDTH-1:0] :
address ;
 
assign data_address = read_miss_fill ? miss_address[CACHE_ADDR32_MSB:CACHE_ADDR32_LSB] :
address ;
 
assign tag_wdata = read_miss_fill ? {1'd1, miss_address[31:TAG_ADDR32_LSB]} :
{TAG_WIDTH{1'd0}} ;
 
 
// Data comes in off the WB bus in wrap4 with the missed data word first
assign data_wdata = miss_address[3:2] == 2'd0 ? { wb_rdata_burst[127:0] }:
miss_address[3:2] == 2'd1 ? { wb_rdata_burst[95:0], wb_rdata_burst[127:96] }:
miss_address[3:2] == 2'd2 ? { wb_rdata_burst[63:0], wb_rdata_burst[127:64] }:
{ wb_rdata_burst[31:0], wb_rdata_burst[127:32] };
 
 
assign read_miss_fill = c_state == CS_FILL3 && i_wb_ready;
 
 
assign tag_wenable = read_miss_fill ? 1'd1 :
source_sel[C_INVA] ? 1'd1 :
source_sel[C_FILL] ? 1'd1 :
source_sel[C_INIT] ? 1'd1 :
source_sel[C_CORE] ? 1'd0 :
1'd0 ;
 
assign enable = i_select && i_cache_enable;
 
assign idle_hit = |data_hit_way;
 
assign read_miss = enable && !idle_hit && !invalid_read;
 
assign read_stall = enable && !idle_hit && !rbuf_hit && !wb_hit;
 
assign cache_busy_stall = (c_state == CS_TURN_AROUND && enable) || c_state == CS_INIT;
 
 
// ======================================
// Instantiate RAMS
// ======================================
 
generate
for ( i=0; i<WAYS;i=i+1 ) begin : rams
 
// Tag RAMs
`ifdef XILINX_SPARTAN6_FPGA
xs6_sram_256x21_line_en
`endif
 
`ifdef XILINX_VIRTEX6_FPGA
xv6_sram_256x21_line_en
`endif
 
`ifndef XILINX_FPGA
generic_sram_line_en
`endif
 
#(
.DATA_WIDTH ( TAG_WIDTH ),
.INITIALIZE_TO_ZERO ( 1 ),
.ADDRESS_WIDTH ( CACHE_ADDR_WIDTH ))
u_tag (
.i_clk ( i_clk ),
.i_write_data ( tag_wdata ),
.i_write_enable ( tag_wenable_way[i] ),
.i_address ( tag_address ),
 
.o_read_data ( tag_rdata_way[i] )
);
// Data RAMs
`ifdef XILINX_SPARTAN6_FPGA
xs6_sram_256x128_byte_en
`endif
 
`ifdef XILINX_VIRTEX6_FPGA
xv6_sram_256x128_byte_en
`endif
 
`ifndef XILINX_FPGA
generic_sram_byte_en
`endif
 
#(
.DATA_WIDTH ( CACHE_LINE_WIDTH) ,
.ADDRESS_WIDTH ( CACHE_ADDR_WIDTH) )
u_data (
.i_clk ( i_clk ),
.i_write_data ( data_wdata ),
.i_write_enable ( data_wenable_way[i] ),
.i_address ( data_address ),
.i_byte_enable ( {CACHE_LINE_WIDTH/8{1'd1}} ),
.o_read_data ( data_rdata_way[i] )
);
 
 
// Per tag-ram write-enable
assign tag_wenable_way[i] = tag_wenable && ( select_way[i] || source_sel[C_INIT] );
 
// Per data-ram write-enable
assign data_wenable_way[i] = ( source_sel[C_FILL] || read_miss_fill ) && select_way[i];
 
// Per data-ram idle_hit flag
assign data_hit_way[i] = tag_rdata_way[i][TAG_WIDTH-1] &&
tag_rdata_way[i][TAG_ADDR_WIDTH-1:0] == i_address[31:TAG_ADDR32_LSB] &&
c_state == CS_IDLE;
end
endgenerate
 
 
// ======================================
// Register Valid Bits
// ======================================
generate
if ( WAYS == 2 ) begin : valid_bits_2ways
 
always @ ( posedge i_clk )
if ( c_state == CS_IDLE )
valid_bits_r <= {tag_rdata_way[1][TAG_WIDTH-1],
tag_rdata_way[0][TAG_WIDTH-1]};
end
else if ( WAYS == 3 ) begin : valid_bits_3ways
 
always @ ( posedge i_clk )
if ( c_state == CS_IDLE )
valid_bits_r <= {tag_rdata_way[2][TAG_WIDTH-1],
tag_rdata_way[1][TAG_WIDTH-1],
tag_rdata_way[0][TAG_WIDTH-1]};
end
else if ( WAYS == 4 ) begin : valid_bits_4ways
 
always @ ( posedge i_clk )
if ( c_state == CS_IDLE )
valid_bits_r <= {tag_rdata_way[3][TAG_WIDTH-1],
tag_rdata_way[2][TAG_WIDTH-1],
tag_rdata_way[1][TAG_WIDTH-1],
tag_rdata_way[0][TAG_WIDTH-1]};
end
else begin : valid_bits_8ways
 
always @ ( posedge i_clk )
if ( c_state == CS_IDLE )
valid_bits_r <= {tag_rdata_way[7][TAG_WIDTH-1],
tag_rdata_way[6][TAG_WIDTH-1],
tag_rdata_way[5][TAG_WIDTH-1],
tag_rdata_way[4][TAG_WIDTH-1],
tag_rdata_way[3][TAG_WIDTH-1],
tag_rdata_way[2][TAG_WIDTH-1],
tag_rdata_way[1][TAG_WIDTH-1],
tag_rdata_way[0][TAG_WIDTH-1]};
end
endgenerate
 
 
// ======================================
// Select read hit data
// ======================================
generate
if ( WAYS == 2 ) begin : read_data_2ways
 
assign hit_rdata = data_hit_way[0] ? data_rdata_way[0] :
data_hit_way[1] ? data_rdata_way[1] :
{CACHE_LINE_WIDTH{1'd1}} ; // all 1's for debug
end
else if ( WAYS == 3 ) begin : read_data_3ways
 
assign hit_rdata = data_hit_way[0] ? data_rdata_way[0] :
data_hit_way[1] ? data_rdata_way[1] :
data_hit_way[2] ? data_rdata_way[2] :
{CACHE_LINE_WIDTH{1'd1}} ; // all 1's for debug
end
else if ( WAYS == 4 ) begin : read_data_4ways
 
assign hit_rdata = data_hit_way[0] ? data_rdata_way[0] :
data_hit_way[1] ? data_rdata_way[1] :
data_hit_way[2] ? data_rdata_way[2] :
data_hit_way[3] ? data_rdata_way[3] :
{CACHE_LINE_WIDTH{1'd1}} ; // all 1's for debug
end
else begin : read_data_8ways
 
assign hit_rdata = data_hit_way[0] ? data_rdata_way[0] :
data_hit_way[1] ? data_rdata_way[1] :
data_hit_way[2] ? data_rdata_way[2] :
data_hit_way[3] ? data_rdata_way[3] :
data_hit_way[4] ? data_rdata_way[4] :
data_hit_way[5] ? data_rdata_way[5] :
data_hit_way[6] ? data_rdata_way[6] :
data_hit_way[7] ? data_rdata_way[7] :
{CACHE_LINE_WIDTH{1'd1}} ; // all 1's for debug
end
endgenerate
 
 
// ======================================
// Function to select the way to use
// for fills
// ======================================
generate
if ( WAYS == 2 ) begin : pick_way_2ways
 
assign next_way = pick_way ( valid_bits_r, random_num );
 
function [WAYS-1:0] pick_way;
input [WAYS-1:0] valid_bits;
input [3:0] random_num;
begin
if ( valid_bits[0] == 1'd0 )
// way 0 not occupied so use it
pick_way = 2'b01;
else if ( valid_bits[1] == 1'd0 )
// way 1 not occupied so use it
pick_way = 2'b10;
else
begin
// All ways occupied so pick one randomly
case (random_num[3:1])
3'd0, 3'd3,
3'd5, 3'd6: pick_way = 2'b10;
default: pick_way = 2'b01;
endcase
end
end
endfunction
end
else if ( WAYS == 3 ) begin : pick_way_3ways
 
assign next_way = pick_way ( valid_bits_r, random_num );
 
function [WAYS-1:0] pick_way;
input [WAYS-1:0] valid_bits;
input [3:0] random_num;
begin
if ( valid_bits[0] == 1'd0 )
// way 0 not occupied so use it
pick_way = 3'b001;
else if ( valid_bits[1] == 1'd0 )
// way 1 not occupied so use it
pick_way = 3'b010;
else if ( valid_bits[2] == 1'd0 )
// way 2 not occupied so use it
pick_way = 3'b100;
else
begin
// All ways occupied so pick one randomly
case (random_num[3:1])
3'd0, 3'd1, 3'd2: pick_way = 3'b010;
3'd2, 3'd3, 3'd4: pick_way = 3'b100;
default: pick_way = 3'b001;
endcase
end
end
endfunction
end
else if ( WAYS == 4 ) begin : pick_way_4ways
 
assign next_way = pick_way ( valid_bits_r, random_num );
 
function [WAYS-1:0] pick_way;
input [WAYS-1:0] valid_bits;
input [3:0] random_num;
begin
if ( valid_bits[0] == 1'd0 )
// way 0 not occupied so use it
pick_way = 4'b0001;
else if ( valid_bits[1] == 1'd0 )
// way 1 not occupied so use it
pick_way = 4'b0010;
else if ( valid_bits[2] == 1'd0 )
// way 2 not occupied so use it
pick_way = 4'b0100;
else if ( valid_bits[3] == 1'd0 )
// way 3 not occupied so use it
pick_way = 4'b1000;
else
begin
// All ways occupied so pick one randomly
case (random_num[3:1])
3'd0, 3'd1: pick_way = 4'b0100;
3'd2, 3'd3: pick_way = 4'b1000;
3'd4, 3'd5: pick_way = 4'b0001;
default: pick_way = 4'b0010;
endcase
end
end
endfunction
end
else begin : pick_way_8ways
 
assign next_way = pick_way ( valid_bits_r, random_num );
 
function [WAYS-1:0] pick_way;
input [WAYS-1:0] valid_bits;
input [3:0] random_num;
begin
if ( valid_bits[0] == 1'd0 )
// way 0 not occupied so use it
pick_way = 8'b00000001;
else if ( valid_bits[1] == 1'd0 )
// way 1 not occupied so use it
pick_way = 8'b00000010;
else if ( valid_bits[2] == 1'd0 )
// way 2 not occupied so use it
pick_way = 8'b00000100;
else if ( valid_bits[3] == 1'd0 )
// way 3 not occupied so use it
pick_way = 8'b00001000;
else if ( valid_bits[4] == 1'd0 )
// way 3 not occupied so use it
pick_way = 8'b00010000;
else if ( valid_bits[5] == 1'd0 )
// way 3 not occupied so use it
pick_way = 8'b00100000;
else if ( valid_bits[6] == 1'd0 )
// way 3 not occupied so use it
pick_way = 8'b01000000;
else if ( valid_bits[7] == 1'd0 )
// way 3 not occupied so use it
pick_way = 8'b10000000;
else
begin
// All ways occupied so pick one randomly
case (random_num[3:1])
3'd0: pick_way = 8'b00010000;
3'd1: pick_way = 8'b00100000;
3'd2: pick_way = 8'b01000000;
3'd3: pick_way = 8'b10000000;
3'd4: pick_way = 8'b00000001;
3'd5: pick_way = 8'b00000010;
3'd6: pick_way = 8'b00000100;
default: pick_way = 8'b00001000;
endcase
end
end
endfunction
end
endgenerate
 
 
// ========================================================
// Debug WB bus - not synthesizable
// ========================================================
//synopsys translate_off
wire [(6*8)-1:0] xSOURCE_SEL;
wire [(20*8)-1:0] xC_STATE;
 
assign xSOURCE_SEL = source_sel[C_CORE] ? "C_CORE" :
source_sel[C_INIT] ? "C_INIT" :
source_sel[C_FILL] ? "C_FILL" :
source_sel[C_INVA] ? "C_INVA" :
"UNKNON" ;
assign xC_STATE = c_state == CS_INIT ? "CS_INIT" :
c_state == CS_IDLE ? "CS_IDLE" :
c_state == CS_FILL0 ? "CS_FILL0" :
c_state == CS_FILL1 ? "CS_FILL1" :
c_state == CS_FILL2 ? "CS_FILL2" :
c_state == CS_FILL3 ? "CS_FILL3" :
c_state == CS_FILL4 ? "CS_FILL4" :
c_state == CS_FILL_COMPLETE ? "CS_FILL_COMPLETE" :
c_state == CS_EX_DELETE ? "CS_EX_DELETE" :
c_state == CS_TURN_AROUND ? "CS_TURN_AROUND" :
c_state == CS_WRITE_HIT1 ? "CS_WRITE_HIT1" :
"UNKNOWN" ;
 
 
generate
if ( WAYS == 2 ) begin : check_hit_2ways
 
always @( posedge i_clk )
if ( (data_hit_way[0] + data_hit_way[1] ) > 4'd1 )
begin
`TB_ERROR_MESSAGE
$display("Hit in more than one cache ways!");
end
end
else if ( WAYS == 3 ) begin : check_hit_3ways
 
always @( posedge i_clk )
if ( (data_hit_way[0] + data_hit_way[1] + data_hit_way[2] ) > 4'd1 )
begin
`TB_ERROR_MESSAGE
$display("Hit in more than one cache ways!");
end
end
else if ( WAYS == 4 ) begin : check_hit_4ways
 
always @( posedge i_clk )
if ( (data_hit_way[0] + data_hit_way[1] +
data_hit_way[2] + data_hit_way[3] ) > 4'd1 )
begin
`TB_ERROR_MESSAGE
$display("Hit in more than one cache ways!");
end
end
else if ( WAYS == 8 ) begin : check_hit_8ways
 
always @( posedge i_clk )
if ( (data_hit_way[0] + data_hit_way[1] +
data_hit_way[2] + data_hit_way[3] +
data_hit_way[4] + data_hit_way[5] +
data_hit_way[6] + data_hit_way[7] ) > 4'd1 )
begin
`TB_ERROR_MESSAGE
$display("Hit in more than one cache ways!");
end
end
else begin : check_hit_nways
 
initial
begin
`TB_ERROR_MESSAGE
$display("Unsupported number of ways %0d", WAYS);
$display("Set A25_ICACHE_WAYS in a25_config_defines.v to either 2,3,4 or 8");
end
 
end
endgenerate
//synopsys translate_on
endmodule
 
/amber25/a25_mem.v
0,0 → 1,210
//////////////////////////////////////////////////////////////////
// //
// Memory Access - Instantiates the memory access stage //
// sub-modules of the Amber 25 Core //
// //
// This file is part of the Amber project //
// http://www.opencores.org/project,amber //
// //
// Description //
// Instantiates the Data Cache //
// Also contains a little bit of logic to decode memory //
// accesses to decide if they are cached or not //
// //
// Author(s): //
// - Conor Santifort, csantifort.amber@gmail.com //
// //
//////////////////////////////////////////////////////////////////
// //
// Copyright (C) 2011 Authors and OPENCORES.ORG //
// //
// This source file may be used and distributed without //
// restriction provided that this copyright statement is not //
// removed from the file and that any derivative work contains //
// the original copyright notice and the associated disclaimer. //
// //
// This source file is free software; you can redistribute it //
// and/or modify it under the terms of the GNU Lesser General //
// Public License as published by the Free Software Foundation; //
// either version 2.1 of the License, or (at your option) any //
// later version. //
// //
// This source is distributed in the hope that it will be //
// useful, but WITHOUT ANY WARRANTY; without even the implied //
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR //
// PURPOSE. See the GNU Lesser General Public License for more //
// details. //
// //
// You should have received a copy of the GNU Lesser General //
// Public License along with this source; if not, download it //
// from http://www.opencores.org/lgpl.shtml //
// //
//////////////////////////////////////////////////////////////////
 
 
module a25_mem
(
input i_clk,
input i_fetch_stall, // Fetch stage asserting stall
output o_mem_stall, // Mem stage asserting stall
 
input [31:0] i_daddress,
input i_daddress_valid,
input [31:0] i_daddress_nxt, // un-registered version of address to the cache rams
input [31:0] i_write_data,
input i_write_enable,
input i_exclusive, // high for read part of swap access
input [3:0] i_byte_enable,
input [7:0] i_exec_load_rd, // The destination register for a load instruction
input i_cache_enable, // cache enable
input i_cache_flush, // cache flush
input [31:0] i_cacheable_area, // each bit corresponds to 2MB address space
 
output [31:0] o_mem_read_data,
output o_mem_read_data_valid,
output [9:0] o_mem_load_rd, // The destination register for a load instruction
 
// Wishbone accesses
output o_wb_cached_req, // Cached Request
output o_wb_uncached_req, // Unached Request
output o_wb_qword, // High for a quad-word read request
output o_wb_write, // Read=0, Write=1
output [3:0] o_wb_byte_enable, // byte eable
output [31:0] o_wb_write_data,
output [31:0] o_wb_address, // wb bus
input [31:0] i_wb_read_data, // wb bus
input i_wb_cached_ready, // wishbone access complete and read data valid
input i_wb_uncached_ready // wishbone access complete and read data valid
);
 
`include "memory_configuration.v"
 
wire [31:0] cache_read_data;
wire address_cachable;
wire sel_cache_p;
wire sel_cache;
wire cached_wb_req;
wire uncached_data_access;
wire uncached_data_access_p;
wire cache_stall;
wire uncached_wb_wait;
reg uncached_wb_req_r = 'd0;
reg uncached_wb_stop_r = 'd0;
reg cached_wb_stop_r = 'd0;
wire daddress_valid_p; // pulse
reg [31:0] mem_read_data_r = 'd0;
reg mem_read_data_valid_r = 'd0;
reg [9:0] mem_load_rd_r = 'd0;
wire [9:0] mem_load_rd_c;
wire [31:0] mem_read_data_c;
wire mem_read_data_valid_c;
reg mem_stall_r = 'd0;
wire use_mem_reg;
reg fetch_only_stall_r = 'd0;
wire fetch_only_stall;
wire void_output;
wire wb_stop;
reg daddress_valid_stop_r = 'd0;
 
 
// ======================================
// Memory Decode
// ======================================
assign address_cachable = in_cachable_mem( i_daddress ) && i_cacheable_area[i_daddress[25:21]];
assign sel_cache_p = daddress_valid_p && address_cachable && i_cache_enable && !i_exclusive;
assign sel_cache = i_daddress_valid && address_cachable && i_cache_enable && !i_exclusive;
assign uncached_data_access = i_daddress_valid && !sel_cache && !(cache_stall);
assign uncached_data_access_p = daddress_valid_p && !sel_cache && !(cache_stall);
 
assign use_mem_reg = wb_stop && !mem_stall_r;
assign o_mem_read_data = use_mem_reg ? mem_read_data_r : mem_read_data_c;
assign o_mem_load_rd = use_mem_reg ? mem_load_rd_r : mem_load_rd_c;
assign o_mem_read_data_valid = !void_output && (use_mem_reg ? mem_read_data_valid_r : mem_read_data_valid_c);
 
 
// Return read data either from the wishbone bus or the cache
assign mem_read_data_c = sel_cache ? cache_read_data :
uncached_data_access ? i_wb_read_data :
32'h76543210 ;
assign mem_load_rd_c = {i_daddress[1:0], i_exec_load_rd};
assign mem_read_data_valid_c = i_daddress_valid && !i_write_enable && !o_mem_stall;
 
assign o_mem_stall = uncached_wb_wait || cache_stall;
 
// Request wishbone access
assign o_wb_byte_enable = i_byte_enable;
assign o_wb_write = i_write_enable;
assign o_wb_address = {i_daddress[31:2], 2'd0};
assign o_wb_write_data = i_write_data;
assign o_wb_cached_req = !cached_wb_stop_r && cached_wb_req;
assign o_wb_uncached_req = !uncached_wb_stop_r && uncached_data_access_p;
assign o_wb_qword = !cached_wb_stop_r && cached_wb_req && !i_write_enable;
 
assign uncached_wb_wait = (o_wb_uncached_req || uncached_wb_req_r) && !i_wb_uncached_ready;
 
always @( posedge i_clk )
begin
uncached_wb_req_r <= (o_wb_uncached_req || uncached_wb_req_r) && !i_wb_uncached_ready;
end
 
assign fetch_only_stall = i_fetch_stall && !o_mem_stall;
 
always @( posedge i_clk )
fetch_only_stall_r <= fetch_only_stall;
 
assign void_output = (fetch_only_stall_r && fetch_only_stall) || (fetch_only_stall_r && mem_read_data_valid_r);
 
 
// pulse this signal
assign daddress_valid_p = i_daddress_valid && !daddress_valid_stop_r;
 
always @( posedge i_clk )
begin
uncached_wb_stop_r <= (uncached_wb_stop_r || uncached_data_access_p) && (i_fetch_stall || o_mem_stall);
cached_wb_stop_r <= (cached_wb_stop_r || cached_wb_req) && (i_fetch_stall || o_mem_stall);
daddress_valid_stop_r <= (daddress_valid_stop_r || daddress_valid_p) && (i_fetch_stall || o_mem_stall);
// hold this until the mem access completes
mem_stall_r <= o_mem_stall;
end
 
 
assign wb_stop = uncached_wb_stop_r || cached_wb_stop_r;
 
always @( posedge i_clk )
if ( !wb_stop || o_mem_stall )
begin
mem_read_data_r <= mem_read_data_c;
mem_load_rd_r <= mem_load_rd_c;
mem_read_data_valid_r <= mem_read_data_valid_c;
end
 
 
// ======================================
// L1 Data Cache
// ======================================
a25_dcache u_dcache (
.i_clk ( i_clk ),
.i_fetch_stall ( i_fetch_stall ),
.o_stall ( cache_stall ),
.i_request ( sel_cache_p ),
.i_exclusive ( i_exclusive ),
.i_write_data ( i_write_data ),
.i_write_enable ( i_write_enable ),
.i_address ( i_daddress ),
.i_address_nxt ( i_daddress_nxt ),
.i_byte_enable ( i_byte_enable ),
 
.i_cache_enable ( i_cache_enable ),
.i_cache_flush ( i_cache_flush ),
.o_read_data ( cache_read_data ),
.o_wb_req ( cached_wb_req ),
.i_wb_read_data ( i_wb_read_data ),
.i_wb_ready ( i_wb_cached_ready )
);
 
 
 
endmodule
 
/amber25/a25_wishbone.v
0,0 → 1,416
//////////////////////////////////////////////////////////////////
// //
// Wishbone master interface for the Amber 25 core //
// //
// This file is part of the Amber project //
// http://www.opencores.org/project,amber //
// //
// Description //
// Turns memory access requests from the execute stage and //
// cache into wishbone bus cycles. For 4-word read requests //
// from the cache and swap accesses ( read followed by write //
// to the same address) from the execute stage, //
// a block transfer is done. All other requests result in //
// single word transfers. //
// //
// Write accesses can be done in a single clock cycle on //
// the wishbone bus, is the destination allows it. The //
// next transfer will begin immediately on the //
// next cycle on the bus. This looks like a block transfer //
// and does hold ownership of the wishbone bus, preventing //
// the other master ( the ethernet MAC) from gaining //
// ownership between those two cycles. But otherwise it would //
// be necessary to insert a wait cycle after every write, //
// slowing down the performance of the core by around 5 to //
// 10%. //
// //
// Author(s): //
// - Conor Santifort, csantifort.amber@gmail.com //
// //
//////////////////////////////////////////////////////////////////
// //
// Copyright (C) 2011 Authors and OPENCORES.ORG //
// //
// This source file may be used and distributed without //
// restriction provided that this copyright statement is not //
// removed from the file and that any derivative work contains //
// the original copyright notice and the associated disclaimer. //
// //
// This source file is free software; you can redistribute it //
// and/or modify it under the terms of the GNU Lesser General //
// Public License as published by the Free Software Foundation; //
// either version 2.1 of the License, or (at your option) any //
// later version. //
// //
// This source is distributed in the hope that it will be //
// useful, but WITHOUT ANY WARRANTY; without even the implied //
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR //
// PURPOSE. See the GNU Lesser General Public License for more //
// details. //
// //
// You should have received a copy of the GNU Lesser General //
// Public License along with this source; if not, download it //
// from http://www.opencores.org/lgpl.shtml //
// //
//////////////////////////////////////////////////////////////////
 
 
module a25_wishbone
(
input i_clk,
 
// Instruction Cache Accesses
input i_icache_req,
input i_icache_qword,
input [31:0] i_icache_address,
output [31:0] o_icache_read_data,
output o_icache_ready,
 
// Data Cache Accesses
input i_exclusive, // high for read part of swap access
input i_dcache_cached_req,
input i_dcache_uncached_req,
input i_dcache_qword,
input i_dcache_write,
input [31:0] i_dcache_write_data,
input [3:0] i_dcache_byte_enable, // valid for writes only
input [31:0] i_dcache_address,
output [31:0] o_dcache_read_data,
output o_dcache_cached_ready,
output o_dcache_uncached_ready,
 
// Wishbone Bus
output reg [31:0] o_wb_adr = 'd0,
output reg [3:0] o_wb_sel = 'd0,
output reg o_wb_we = 'd0,
input [31:0] i_wb_dat,
output reg [31:0] o_wb_dat = 'd0,
output reg o_wb_cyc = 'd0,
output reg o_wb_stb = 'd0,
input i_wb_ack,
input i_wb_err
 
);
 
 
localparam [3:0] WB_IDLE = 3'd0,
WB_BURST1 = 3'd1,
WB_BURST2 = 3'd2,
WB_BURST3 = 3'd3,
WB_WAIT_ACK = 3'd4;
 
reg [2:0] wishbone_st = WB_IDLE;
 
wire icache_read_req_c;
wire icache_read_qword_c;
wire [31:0] icache_read_addr_c;
wire dcache_read_qword_c;
 
wire dcache_req_c;
wire write_req_c;
wire dcache_cached_rreq_c;
wire dcache_cached_wreq_c;
wire dcache_uncached_rreq_c;
wire dcache_uncached_wreq_c;
 
wire dcache_cached_rreq_in;
wire dcache_cached_wreq_in;
wire dcache_uncached_rreq_in;
wire dcache_uncached_wreq_in;
 
reg dcache_cached_rreq_r = 'd0;
reg dcache_cached_wreq_r = 'd0;
reg dcache_uncached_rreq_r = 'd0;
reg dcache_uncached_wreq_r = 'd0;
 
wire dcache_cached_wready;
wire dcache_uncached_wready;
wire dcache_cached_rready;
wire dcache_uncached_rready;
 
wire start_access;
wire [3:0] byte_enable;
reg exclusive_access = 'd0;
wire read_ack;
wire wait_write_ack;
reg icache_read_req_r = 'd0;
reg icache_read_qword_r = 'd0;
reg [31:0] icache_read_addr_r = 'd0;
reg dcache_read_qword_r = 'd0;
wire icache_read_req_in;
wire icache_read_ready;
reg servicing_dcache_cached_read_r = 'd0;
reg servicing_dcache_uncached_read_r = 'd0;
reg servicing_icache_r = 'd0;
wire extra_write;
reg extra_write_r = 'd0;
reg [31:0] extra_write_data_r;
reg [31:0] extra_write_address_r;
reg [3:0] extra_write_be_r;
 
assign read_ack = !o_wb_we && i_wb_ack;
 
assign dcache_cached_rready = dcache_cached_rreq_r && servicing_dcache_cached_read_r && read_ack;
assign dcache_uncached_rready = dcache_uncached_rreq_r && servicing_dcache_uncached_read_r && read_ack;
 
 
assign o_dcache_cached_ready = dcache_cached_rready || dcache_cached_wready;
assign o_dcache_uncached_ready = dcache_uncached_rready || dcache_uncached_wready;
assign o_dcache_read_data = i_wb_dat;
assign icache_read_ready = servicing_icache_r && read_ack;
assign o_icache_ready = icache_read_ready;
assign o_icache_read_data = i_wb_dat;
 
 
assign dcache_cached_rreq_in = i_dcache_cached_req && !i_dcache_write;
assign dcache_cached_wreq_in = i_dcache_cached_req && i_dcache_write;
assign dcache_uncached_rreq_in = i_dcache_uncached_req && !i_dcache_write;
assign dcache_uncached_wreq_in = i_dcache_uncached_req && i_dcache_write;
assign icache_read_req_in = i_icache_req && !o_icache_ready;
 
assign dcache_cached_rreq_c = ( dcache_cached_rreq_in || dcache_cached_rreq_r ) && !(servicing_dcache_cached_read_r && read_ack);
assign dcache_uncached_rreq_c = ( dcache_uncached_rreq_in || dcache_uncached_rreq_r ) && !(servicing_dcache_uncached_read_r && read_ack);
 
assign dcache_read_qword_c = ( i_dcache_qword || dcache_read_qword_r ) && !(servicing_dcache_cached_read_r && read_ack);
 
assign icache_read_req_c = ( icache_read_req_in || icache_read_req_r ) && !(servicing_icache_r && read_ack);
assign icache_read_qword_c = ( i_icache_qword || icache_read_qword_r ) && !(servicing_icache_r && read_ack);
assign icache_read_addr_c = i_icache_req ? i_icache_address : icache_read_addr_r;
 
assign dcache_req_c = dcache_cached_rreq_c || dcache_cached_wreq_c || dcache_uncached_rreq_c || dcache_uncached_wreq_c;
assign write_req_c = dcache_cached_wreq_c || dcache_uncached_wreq_c;
 
assign start_access = !wait_write_ack && (dcache_req_c || icache_read_req_c);
 
// For writes the byte enable is always 4'hf
assign byte_enable = write_req_c ? i_dcache_byte_enable : 4'hf;
 
assign dcache_cached_wready = (dcache_cached_wreq_c && wishbone_st == WB_IDLE);
assign dcache_uncached_wready = (dcache_uncached_wreq_c && wishbone_st == WB_IDLE);
assign dcache_cached_wreq_c = dcache_cached_wreq_in || dcache_cached_wreq_r;
assign dcache_uncached_wreq_c = dcache_uncached_wreq_in || dcache_uncached_wreq_r;
 
 
// ======================================
// Register Accesses
// ======================================
 
assign extra_write = wishbone_st == WB_IDLE && !i_wb_ack && ((dcache_cached_wreq_c && dcache_cached_wready)||
(dcache_uncached_wreq_c && dcache_uncached_wready));
 
always @( posedge i_clk )
if ( wishbone_st == WB_WAIT_ACK && i_wb_ack && extra_write_r )
o_wb_dat <= extra_write_data_r;
else if ( start_access )
o_wb_dat <= i_dcache_write_data;
 
 
always @( posedge i_clk )
begin
icache_read_req_r <= icache_read_req_in || icache_read_req_c;
icache_read_qword_r <= i_icache_qword || icache_read_qword_c;
if ( i_icache_req ) icache_read_addr_r <= i_icache_address;
dcache_read_qword_r <= i_dcache_qword || dcache_read_qword_c;
dcache_cached_wreq_r <= dcache_cached_wreq_c && (wishbone_st != WB_IDLE || (o_wb_stb && !i_wb_ack));
dcache_uncached_wreq_r <= dcache_uncached_wreq_c && (wishbone_st != WB_IDLE || (o_wb_stb && !i_wb_ack));
// A buffer to hold a second write while on eis in progress
if ( extra_write )
begin
extra_write_data_r <= i_dcache_write_data;
extra_write_address_r <= i_dcache_address;
extra_write_be_r <= i_dcache_byte_enable;
end
 
 
// The flag can be set during any state but only cleared during WB_IDLE or WB_WAIT_ACK
if ( dcache_cached_rreq_r )
begin
if ( wishbone_st == WB_IDLE || wishbone_st == WB_WAIT_ACK )
dcache_cached_rreq_r <= dcache_cached_rreq_c && !o_dcache_cached_ready;
end
else
dcache_cached_rreq_r <= dcache_cached_rreq_c && !o_dcache_cached_ready;
if ( dcache_uncached_rreq_r )
begin
if ( wishbone_st == WB_IDLE || wishbone_st == WB_WAIT_ACK )
dcache_uncached_rreq_r <= dcache_uncached_rreq_c && !o_dcache_uncached_ready;
end
else
dcache_uncached_rreq_r <= dcache_uncached_rreq_c && !o_dcache_uncached_ready;
end
assign wait_write_ack = o_wb_stb && o_wb_we && !i_wb_ack;
 
 
always @( posedge i_clk )
case ( wishbone_st )
WB_IDLE :
begin
extra_write_r <= extra_write;
if ( start_access )
begin
o_wb_stb <= 1'd1;
o_wb_cyc <= 1'd1;
o_wb_sel <= byte_enable;
end
else if ( !wait_write_ack )
begin
o_wb_stb <= 1'd0;
// Hold cyc high after an exclusive access
// to hold ownership of the wishbone bus
o_wb_cyc <= exclusive_access;
end
 
if ( wait_write_ack )
begin
// still waiting for last (write) access to complete
wishbone_st <= WB_WAIT_ACK;
servicing_dcache_cached_read_r <= dcache_cached_rreq_c;
servicing_dcache_uncached_read_r <= dcache_uncached_rreq_c;
end
// dcache accesses have priority over icache
else if ( dcache_cached_rreq_c || dcache_uncached_rreq_c )
begin
if ( dcache_cached_rreq_c )
servicing_dcache_cached_read_r <= 1'd1;
else if ( dcache_uncached_rreq_c )
servicing_dcache_uncached_read_r <= 1'd1;
if ( dcache_read_qword_c )
wishbone_st <= WB_BURST1;
else
wishbone_st <= WB_WAIT_ACK;
exclusive_access <= i_exclusive;
end
// The core does not currently issue exclusive write requests
// but there's no reason why this might not be added some
// time in the future so allow for it here
else if ( write_req_c )
begin
exclusive_access <= i_exclusive;
end
// do a burst of 4 read to fill a cache line
else if ( icache_read_req_c && icache_read_qword_c )
begin
wishbone_st <= WB_BURST1;
exclusive_access <= 1'd0;
servicing_icache_r <= 1'd1;
end
// single word read request from fetch stage
else if ( icache_read_req_c )
begin
wishbone_st <= WB_WAIT_ACK;
exclusive_access <= 1'd0;
servicing_icache_r <= 1'd1;
end
 
if ( start_access )
begin
if ( dcache_req_c )
begin
o_wb_we <= write_req_c;
// only update these on new wb access to make debug easier
o_wb_adr[31:2] <= i_dcache_address[31:2];
o_wb_adr[1:0] <= byte_enable == 4'b0001 ? 2'd0 :
byte_enable == 4'b0010 ? 2'd1 :
byte_enable == 4'b0100 ? 2'd2 :
byte_enable == 4'b1000 ? 2'd3 :
byte_enable == 4'b0011 ? 2'd0 :
byte_enable == 4'b1100 ? 2'd2 :
2'd0 ;
end
else
begin
o_wb_we <= 1'd0;
o_wb_adr[31:0] <= {icache_read_addr_c[31:2], 2'd0};
end
end
end
 
// Read burst, wait for first ack
WB_BURST1:
if ( i_wb_ack )
begin
// burst of 4 that wraps
o_wb_adr[3:2] <= o_wb_adr[3:2] + 1'd1;
wishbone_st <= WB_BURST2;
end
// Read burst, wait for second ack
WB_BURST2:
if ( i_wb_ack )
begin
// burst of 4 that wraps
o_wb_adr[3:2] <= o_wb_adr[3:2] + 1'd1;
wishbone_st <= WB_BURST3;
end
// Read burst, wait for third ack
WB_BURST3:
if ( i_wb_ack )
begin
// burst of 4 that wraps
o_wb_adr[3:2] <= o_wb_adr[3:2] + 1'd1;
wishbone_st <= WB_WAIT_ACK;
end
 
 
// Wait for the wishbone ack to be asserted
WB_WAIT_ACK:
if ( i_wb_ack )
// Another write that was acked and needs to be sent before returning to IDLE ?
if ( extra_write_r )
begin
extra_write_r <= 'd0;
o_wb_stb <= 1'd1;
o_wb_cyc <= exclusive_access;
o_wb_sel <= extra_write_be_r;
o_wb_we <= 1'd1;
o_wb_adr[31:0] <= extra_write_address_r;
end
else
begin
wishbone_st <= WB_IDLE;
o_wb_stb <= 1'd0;
o_wb_cyc <= exclusive_access;
o_wb_we <= 1'd0;
servicing_dcache_cached_read_r <= 1'd0;
servicing_dcache_uncached_read_r <= 1'd0;
servicing_icache_r <= 1'd0;
end
endcase
 
// ========================================================
// Debug Wishbone bus - not synthesizable
// ========================================================
//synopsys translate_off
wire [(14*8)-1:0] xWB_STATE;
 
 
assign xWB_STATE = wishbone_st == WB_IDLE ? "WB_IDLE" :
wishbone_st == WB_BURST1 ? "WB_BURST1" :
wishbone_st == WB_BURST2 ? "WB_BURST2" :
wishbone_st == WB_BURST3 ? "WB_BURST3" :
wishbone_st == WB_WAIT_ACK ? "WB_WAIT_ACK" :
"UNKNOWN" ;
 
//synopsys translate_on
endmodule
 
/amber25/a25_register_bank.v
0,0 → 1,389
//////////////////////////////////////////////////////////////////
// //
// Register Bank for Amber 25 Core //
// //
// This file is part of the Amber project //
// http://www.opencores.org/project,amber //
// //
// Description //
// Contains 37 32-bit registers, 16 of which are visible //
// ina any one operating mode. Registers use real flipflops, //
// rather than SRAM. This makes sense for an FPGA //
// implementation, where flipflops are plentiful. //
// //
// Author(s): //
// - Conor Santifort, csantifort.amber@gmail.com //
// //
//////////////////////////////////////////////////////////////////
// //
// Copyright (C) 2011 Authors and OPENCORES.ORG //
// //
// This source file may be used and distributed without //
// restriction provided that this copyright statement is not //
// removed from the file and that any derivative work contains //
// the original copyright notice and the associated disclaimer. //
// //
// This source file is free software; you can redistribute it //
// and/or modify it under the terms of the GNU Lesser General //
// Public License as published by the Free Software Foundation; //
// either version 2.1 of the License, or (at your option) any //
// later version. //
// //
// This source is distributed in the hope that it will be //
// useful, but WITHOUT ANY WARRANTY; without even the implied //
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR //
// PURPOSE. See the GNU Lesser General Public License for more //
// details. //
// //
// You should have received a copy of the GNU Lesser General //
// Public License along with this source; if not, download it //
// from http://www.opencores.org/lgpl.shtml //
// //
//////////////////////////////////////////////////////////////////
 
module a25_register_bank (
 
input i_clk,
input i_access_stall,
input i_mem_stall,
 
input [1:0] i_mode_idec, // user, supervisor, irq_idec, firq_idec etc.
// Used for register writes
input [1:0] i_mode_exec, // 1 periods delayed from i_mode_idec
// Used for register reads
input [3:0] i_mode_rds_exec, // Use one-hot version specifically for rds,
// includes i_user_mode_regs_store
input i_firq_not_user_mode,
input [3:0] i_rm_sel,
input [3:0] i_rs_sel,
input [3:0] i_rn_sel,
 
input i_pc_wen,
input [14:0] i_reg_bank_wen,
 
input [23:0] i_pc, // program counter [25:2]
input [31:0] i_reg,
 
input [31:0] i_wb_read_data,
input i_wb_read_data_valid,
input [3:0] i_wb_read_data_rd,
input i_wb_user_mode,
 
input [3:0] i_status_bits_flags,
input i_status_bits_irq_mask,
input i_status_bits_firq_mask,
 
output [31:0] o_rm,
output reg [31:0] o_rs,
output reg [31:0] o_rd,
output [31:0] o_rn,
output [31:0] o_pc
 
);
 
`include "a25_localparams.v"
`include "a25_functions.v"
 
 
// User Mode Registers
reg [31:0] r0 = 32'hdead_beef;
reg [31:0] r1 = 32'hdead_beef;
reg [31:0] r2 = 32'hdead_beef;
reg [31:0] r3 = 32'hdead_beef;
reg [31:0] r4 = 32'hdead_beef;
reg [31:0] r5 = 32'hdead_beef;
reg [31:0] r6 = 32'hdead_beef;
reg [31:0] r7 = 32'hdead_beef;
reg [31:0] r8 = 32'hdead_beef;
reg [31:0] r9 = 32'hdead_beef;
reg [31:0] r10 = 32'hdead_beef;
reg [31:0] r11 = 32'hdead_beef;
reg [31:0] r12 = 32'hdead_beef;
reg [31:0] r13 = 32'hdead_beef;
reg [31:0] r14 = 32'hdead_beef;
reg [23:0] r15 = 24'hc0_ffee;
 
wire [31:0] r0_out;
wire [31:0] r1_out;
wire [31:0] r2_out;
wire [31:0] r3_out;
wire [31:0] r4_out;
wire [31:0] r5_out;
wire [31:0] r6_out;
wire [31:0] r7_out;
wire [31:0] r8_out;
wire [31:0] r9_out;
wire [31:0] r10_out;
wire [31:0] r11_out;
wire [31:0] r12_out;
wire [31:0] r13_out;
wire [31:0] r14_out;
wire [31:0] r15_out_rm;
wire [31:0] r15_out_rm_nxt;
wire [31:0] r15_out_rn;
 
wire [31:0] r8_rds;
wire [31:0] r9_rds;
wire [31:0] r10_rds;
wire [31:0] r11_rds;
wire [31:0] r12_rds;
wire [31:0] r13_rds;
wire [31:0] r14_rds;
 
// Supervisor Mode Registers
reg [31:0] r13_svc = 32'hdead_beef;
reg [31:0] r14_svc = 32'hdead_beef;
 
// Interrupt Mode Registers
reg [31:0] r13_irq = 32'hdead_beef;
reg [31:0] r14_irq = 32'hdead_beef;
 
// Fast Interrupt Mode Registers
reg [31:0] r8_firq = 32'hdead_beef;
reg [31:0] r9_firq = 32'hdead_beef;
reg [31:0] r10_firq = 32'hdead_beef;
reg [31:0] r11_firq = 32'hdead_beef;
reg [31:0] r12_firq = 32'hdead_beef;
reg [31:0] r13_firq = 32'hdead_beef;
reg [31:0] r14_firq = 32'hdead_beef;
 
wire usr_exec;
wire svc_exec;
wire irq_exec;
wire firq_exec;
 
wire usr_idec;
wire svc_idec;
wire irq_idec;
wire firq_idec;
wire [14:0] read_data_wen;
wire [14:0] reg_bank_wen_c;
wire pc_wen_c;
wire pc_dmem_wen;
 
 
// Write Enables from execute stage
assign usr_idec = i_mode_idec == USR;
assign svc_idec = i_mode_idec == SVC;
assign irq_idec = i_mode_idec == IRQ;
 
// pre-encoded in decode stage to speed up long path
assign firq_idec = i_firq_not_user_mode;
 
// Read Enables from stage 1 (fetch)
assign usr_exec = i_mode_exec == USR;
assign svc_exec = i_mode_exec == SVC;
assign irq_exec = i_mode_exec == IRQ;
assign firq_exec = i_mode_exec == FIRQ;
 
assign read_data_wen = {15{i_wb_read_data_valid & ~i_mem_stall}} & decode (i_wb_read_data_rd);
 
assign reg_bank_wen_c = {15{~i_access_stall}} & i_reg_bank_wen;
assign pc_wen_c = ~i_access_stall & i_pc_wen;
assign pc_dmem_wen = i_wb_read_data_valid & ~i_mem_stall & i_wb_read_data_rd == 4'd15;
 
 
// ========================================================
// Register Update
// ========================================================
always @ ( posedge i_clk )
begin
r0 <= reg_bank_wen_c[0 ] ? i_reg : read_data_wen[0 ] ? i_wb_read_data : r0;
r1 <= reg_bank_wen_c[1 ] ? i_reg : read_data_wen[1 ] ? i_wb_read_data : r1;
r2 <= reg_bank_wen_c[2 ] ? i_reg : read_data_wen[2 ] ? i_wb_read_data : r2;
r3 <= reg_bank_wen_c[3 ] ? i_reg : read_data_wen[3 ] ? i_wb_read_data : r3;
r4 <= reg_bank_wen_c[4 ] ? i_reg : read_data_wen[4 ] ? i_wb_read_data : r4;
r5 <= reg_bank_wen_c[5 ] ? i_reg : read_data_wen[5 ] ? i_wb_read_data : r5;
r6 <= reg_bank_wen_c[6 ] ? i_reg : read_data_wen[6 ] ? i_wb_read_data : r6;
r7 <= reg_bank_wen_c[7 ] ? i_reg : read_data_wen[7 ] ? i_wb_read_data : r7;
r8 <= reg_bank_wen_c[8 ] && !firq_idec ? i_reg : read_data_wen[8 ] && ( !firq_idec || i_wb_user_mode ) ? i_wb_read_data : r8;
r9 <= reg_bank_wen_c[9 ] && !firq_idec ? i_reg : read_data_wen[9 ] && ( !firq_idec || i_wb_user_mode ) ? i_wb_read_data : r9;
r10 <= reg_bank_wen_c[10] && !firq_idec ? i_reg : read_data_wen[10] && ( !firq_idec || i_wb_user_mode ) ? i_wb_read_data : r10;
r11 <= reg_bank_wen_c[11] && !firq_idec ? i_reg : read_data_wen[11] && ( !firq_idec || i_wb_user_mode ) ? i_wb_read_data : r11;
r12 <= reg_bank_wen_c[12] && !firq_idec ? i_reg : read_data_wen[12] && ( !firq_idec || i_wb_user_mode ) ? i_wb_read_data : r12;
r8_firq <= reg_bank_wen_c[8 ] && firq_idec ? i_reg : read_data_wen[8 ] && ( firq_idec && !i_wb_user_mode ) ? i_wb_read_data : r8_firq;
r9_firq <= reg_bank_wen_c[9 ] && firq_idec ? i_reg : read_data_wen[9 ] && ( firq_idec && !i_wb_user_mode ) ? i_wb_read_data : r9_firq;
r10_firq <= reg_bank_wen_c[10] && firq_idec ? i_reg : read_data_wen[10] && ( firq_idec && !i_wb_user_mode ) ? i_wb_read_data : r10_firq;
r11_firq <= reg_bank_wen_c[11] && firq_idec ? i_reg : read_data_wen[11] && ( firq_idec && !i_wb_user_mode ) ? i_wb_read_data : r11_firq;
r12_firq <= reg_bank_wen_c[12] && firq_idec ? i_reg : read_data_wen[12] && ( firq_idec && !i_wb_user_mode ) ? i_wb_read_data : r12_firq;
 
r13 <= reg_bank_wen_c[13] && usr_idec ? i_reg : read_data_wen[13] && ( usr_idec || i_wb_user_mode ) ? i_wb_read_data : r13;
r14 <= reg_bank_wen_c[14] && usr_idec ? i_reg : read_data_wen[14] && ( usr_idec || i_wb_user_mode ) ? i_wb_read_data : r14;
r13_svc <= reg_bank_wen_c[13] && svc_idec ? i_reg : read_data_wen[13] && ( svc_idec && !i_wb_user_mode ) ? i_wb_read_data : r13_svc;
r14_svc <= reg_bank_wen_c[14] && svc_idec ? i_reg : read_data_wen[14] && ( svc_idec && !i_wb_user_mode ) ? i_wb_read_data : r14_svc;
r13_irq <= reg_bank_wen_c[13] && irq_idec ? i_reg : read_data_wen[13] && ( irq_idec && !i_wb_user_mode ) ? i_wb_read_data : r13_irq;
r14_irq <= reg_bank_wen_c[14] && irq_idec ? i_reg : read_data_wen[14] && ( irq_idec && !i_wb_user_mode ) ? i_wb_read_data : r14_irq;
r13_firq <= reg_bank_wen_c[13] && firq_idec ? i_reg : read_data_wen[13] && ( firq_idec && !i_wb_user_mode ) ? i_wb_read_data : r13_firq;
r14_firq <= reg_bank_wen_c[14] && firq_idec ? i_reg : read_data_wen[14] && ( firq_idec && !i_wb_user_mode ) ? i_wb_read_data : r14_firq;
r15 <= pc_wen_c ? i_pc : pc_dmem_wen ? i_wb_read_data[25:2] : r15;
end
// ========================================================
// Register Read based on Mode
// ========================================================
assign r0_out = r0;
assign r1_out = r1;
assign r2_out = r2;
assign r3_out = r3;
assign r4_out = r4;
assign r5_out = r5;
assign r6_out = r6;
assign r7_out = r7;
 
assign r8_out = firq_exec ? r8_firq : r8;
assign r9_out = firq_exec ? r9_firq : r9;
assign r10_out = firq_exec ? r10_firq : r10;
assign r11_out = firq_exec ? r11_firq : r11;
assign r12_out = firq_exec ? r12_firq : r12;
 
assign r13_out = usr_exec ? r13 :
svc_exec ? r13_svc :
irq_exec ? r13_irq :
r13_firq ;
assign r14_out = usr_exec ? r14 :
svc_exec ? r14_svc :
irq_exec ? r14_irq :
r14_firq ;
 
assign r15_out_rm = { i_status_bits_flags,
i_status_bits_irq_mask,
i_status_bits_firq_mask,
r15,
i_mode_exec};
 
assign r15_out_rm_nxt = { i_status_bits_flags,
i_status_bits_irq_mask,
i_status_bits_firq_mask,
i_pc,
i_mode_exec};
assign r15_out_rn = {6'd0, r15, 2'd0};
 
 
// rds outputs
assign r8_rds = i_mode_rds_exec[OH_FIRQ] ? r8_firq : r8;
assign r9_rds = i_mode_rds_exec[OH_FIRQ] ? r9_firq : r9;
assign r10_rds = i_mode_rds_exec[OH_FIRQ] ? r10_firq : r10;
assign r11_rds = i_mode_rds_exec[OH_FIRQ] ? r11_firq : r11;
assign r12_rds = i_mode_rds_exec[OH_FIRQ] ? r12_firq : r12;
 
assign r13_rds = i_mode_rds_exec[OH_USR] ? r13 :
i_mode_rds_exec[OH_SVC] ? r13_svc :
i_mode_rds_exec[OH_IRQ] ? r13_irq :
r13_firq ;
assign r14_rds = i_mode_rds_exec[OH_USR] ? r14 :
i_mode_rds_exec[OH_SVC] ? r14_svc :
i_mode_rds_exec[OH_IRQ] ? r14_irq :
r14_firq ;
 
 
// ========================================================
// Program Counter out
// ========================================================
assign o_pc = r15_out_rn;
 
// ========================================================
// Rm Selector
// ========================================================
assign o_rm = i_rm_sel == 4'd0 ? r0_out :
i_rm_sel == 4'd1 ? r1_out :
i_rm_sel == 4'd2 ? r2_out :
i_rm_sel == 4'd3 ? r3_out :
i_rm_sel == 4'd4 ? r4_out :
i_rm_sel == 4'd5 ? r5_out :
i_rm_sel == 4'd6 ? r6_out :
i_rm_sel == 4'd7 ? r7_out :
i_rm_sel == 4'd8 ? r8_out :
i_rm_sel == 4'd9 ? r9_out :
i_rm_sel == 4'd10 ? r10_out :
i_rm_sel == 4'd11 ? r11_out :
i_rm_sel == 4'd12 ? r12_out :
i_rm_sel == 4'd13 ? r13_out :
i_rm_sel == 4'd14 ? r14_out :
r15_out_rm ;
 
 
// ========================================================
// Rds Selector
// ========================================================
always @*
case ( i_rs_sel )
4'd0 : o_rs = r0_out ;
4'd1 : o_rs = r1_out ;
4'd2 : o_rs = r2_out ;
4'd3 : o_rs = r3_out ;
4'd4 : o_rs = r4_out ;
4'd5 : o_rs = r5_out ;
4'd6 : o_rs = r6_out ;
4'd7 : o_rs = r7_out ;
4'd8 : o_rs = r8_rds ;
4'd9 : o_rs = r9_rds ;
4'd10 : o_rs = r10_rds ;
4'd11 : o_rs = r11_rds ;
4'd12 : o_rs = r12_rds ;
4'd13 : o_rs = r13_rds ;
4'd14 : o_rs = r14_rds ;
4'd15 : o_rs = r15_out_rn ;
default: o_rs = r15_out_rn ;
endcase
 
// ========================================================
// Rd Selector
// ========================================================
always @*
case ( i_rs_sel )
4'd0 : o_rd = r0_out ;
4'd1 : o_rd = r1_out ;
4'd2 : o_rd = r2_out ;
4'd3 : o_rd = r3_out ;
4'd4 : o_rd = r4_out ;
4'd5 : o_rd = r5_out ;
4'd6 : o_rd = r6_out ;
4'd7 : o_rd = r7_out ;
4'd8 : o_rd = r8_rds ;
4'd9 : o_rd = r9_rds ;
4'd10 : o_rd = r10_rds ;
4'd11 : o_rd = r11_rds ;
4'd12 : o_rd = r12_rds ;
4'd13 : o_rd = r13_rds ;
4'd14 : o_rd = r14_rds ;
4'd15 : o_rd = r15_out_rm_nxt ;
default: o_rd = r15_out_rm_nxt ;
endcase
 
// ========================================================
// Rn Selector
// ========================================================
assign o_rn = i_rn_sel == 4'd0 ? r0_out :
i_rn_sel == 4'd1 ? r1_out :
i_rn_sel == 4'd2 ? r2_out :
i_rn_sel == 4'd3 ? r3_out :
i_rn_sel == 4'd4 ? r4_out :
i_rn_sel == 4'd5 ? r5_out :
i_rn_sel == 4'd6 ? r6_out :
i_rn_sel == 4'd7 ? r7_out :
i_rn_sel == 4'd8 ? r8_out :
i_rn_sel == 4'd9 ? r9_out :
i_rn_sel == 4'd10 ? r10_out :
i_rn_sel == 4'd11 ? r11_out :
i_rn_sel == 4'd12 ? r12_out :
i_rn_sel == 4'd13 ? r13_out :
i_rn_sel == 4'd14 ? r14_out :
r15_out_rn ;
 
 
endmodule
 
 
/amber25/a25_multiply.v
0,0 → 1,205
//////////////////////////////////////////////////////////////////
// //
// Multiplication Module for Amber 25 Core //
// //
// This file is part of the Amber project //
// http://www.opencores.org/project,amber //
// //
// Description //
// 64-bit Booth signed or unsigned multiply and //
// multiply-accumulate supported. It takes about 38 clock //
// cycles to complete an operation. //
// //
// Author(s): //
// - Conor Santifort, csantifort.amber@gmail.com //
// //
//////////////////////////////////////////////////////////////////
// //
// Copyright (C) 2011 Authors and OPENCORES.ORG //
// //
// This source file may be used and distributed without //
// restriction provided that this copyright statement is not //
// removed from the file and that any derivative work contains //
// the original copyright notice and the associated disclaimer. //
// //
// This source file is free software; you can redistribute it //
// and/or modify it under the terms of the GNU Lesser General //
// Public License as published by the Free Software Foundation; //
// either version 2.1 of the License, or (at your option) any //
// later version. //
// //
// This source is distributed in the hope that it will be //
// useful, but WITHOUT ANY WARRANTY; without even the implied //
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR //
// PURPOSE. See the GNU Lesser General Public License for more //
// details. //
// //
// You should have received a copy of the GNU Lesser General //
// Public License along with this source; if not, download it //
// from http://www.opencores.org/lgpl.shtml //
// //
//////////////////////////////////////////////////////////////////
 
 
 
// bit 0 go, bit 1 accumulate
// Command:
// 4'b01 : MUL - 32 bit multiplication
// 4'b11 : MLA - 32 bit multiply and accumulate
//
// 34-bit Booth adder
// The adder needs to be 34 bit to deal with signed and unsigned 32-bit
// multiplication inputs. This adds 1 extra bit. Then to deal with the
// case of two max negative numbers another bit is required.
//
 
module a25_multiply (
input i_clk,
input i_access_stall,
 
input [31:0] i_a_in, // Rds
input [31:0] i_b_in, // Rm
input [1:0] i_function,
input i_execute,
 
output [31:0] o_out,
output [1:0] o_flags, // [1] = N, [0] = Z
output reg o_done = 'd0 // goes high 2 cycles before completion
);
 
 
wire enable;
wire accumulate;
wire [33:0] multiplier;
wire [33:0] multiplier_bar;
wire [33:0] sum;
wire [33:0] sum34_b;
 
reg [5:0] count = 'd0;
reg [5:0] count_nxt;
reg [67:0] product = 'd0;
reg [67:0] product_nxt;
reg [1:0] flags_nxt;
reg sum_acc1_carry = 'd0;
reg sum_acc1_carry_nxt;
wire [32:0] sum_acc1; // the MSB is the carry out for the upper 32 bit addition
 
 
assign enable = i_function[0];
assign accumulate = i_function[1];
assign multiplier = { 2'd0, i_a_in} ;
assign multiplier_bar = ~{ 2'd0, i_a_in} + 34'd1 ;
 
assign sum34_b = product[1:0] == 2'b01 ? multiplier :
product[1:0] == 2'b10 ? multiplier_bar :
34'd0 ;
 
 
// Use DSP modules from Xilinx Spartan6 FPGA devices
`ifdef XILINX_FPGA
// -----------------------------------
// 34-bit adder - booth multiplication
// -----------------------------------
`ifdef XILINX_SPARTAN6_FPGA
xs6_addsub_n #(.WIDTH(34))
`endif
`ifdef XILINX_VIRTEX6_FPGA
xv6_addsub_n #(.WIDTH(34))
`endif
u_xx_addsub_34_sum (
.i_a ( product[67:34] ),
.i_b ( sum34_b ),
.i_cin ( 1'd0 ),
.i_sub ( 1'd0 ),
.o_sum ( sum ),
.o_co ( )
);
 
// ------------------------------------
// 33-bit adder - accumulate operations
// ------------------------------------
`ifdef XILINX_SPARTAN6_FPGA
xs6_addsub_n #(.WIDTH(33))
`endif
`ifdef XILINX_VIRTEX6_FPGA
xv6_addsub_n #(.WIDTH(33))
`endif
u_xx_addsub_33_acc1 (
.i_a ( {1'd0, product[32:1]} ),
.i_b ( {1'd0, i_a_in} ),
.i_cin ( 1'd0 ),
.i_sub ( 1'd0 ),
.o_sum ( sum_acc1 ),
.o_co ( )
);
 
`else
// -----------------------------------
// 34-bit adder - booth multiplication
// -----------------------------------
assign sum = product[67:34] + sum34_b;
// ------------------------------------
// 33-bit adder - accumulate operations
// ------------------------------------
assign sum_acc1 = {1'd0, product[32:1]} + {1'd0, i_a_in};
`endif
 
 
always @*
begin
// Defaults
count_nxt = count;
sum_acc1_carry_nxt = sum_acc1_carry;
product_nxt = product;
// update Negative and Zero flags
// Use registered value of product so this adds an extra cycle
// but this avoids having the 64-bit zero comparator on the
// main adder path
flags_nxt = { product[32], product[32:1] == 32'd0 };
 
if ( count == 6'd0 )
product_nxt = {33'd0, 1'd0, i_b_in, 1'd0 } ;
else if ( count <= 6'd33 )
product_nxt = { sum[33], sum, product[33:1]} ;
else if ( count == 6'd34 && accumulate )
begin
// Note that bit 0 is not part of the product. It is used during the booth
// multiplication algorithm
product_nxt = { product[64:33], sum_acc1[31:0], 1'd0}; // Accumulate
sum_acc1_carry_nxt = sum_acc1[32];
end
// Multiplication state counter
if (count == 6'd0) // start
count_nxt = enable ? 6'd1 : 6'd0;
else if ((count == 6'd34 && !accumulate) || // MUL
(count == 6'd35 && accumulate) ) // MLA
count_nxt = 6'd0;
else
count_nxt = count + 1'd1;
end
 
 
always @ ( posedge i_clk )
if ( !i_access_stall )
begin
count <= i_execute ? count_nxt : count;
product <= i_execute ? product_nxt : product;
sum_acc1_carry <= i_execute ? sum_acc1_carry_nxt : sum_acc1_carry;
o_done <= i_execute ? count == 6'd31 : o_done;
end
 
// Outputs
assign o_out = product[32:1];
assign o_flags = flags_nxt;
endmodule
 
 
/amber25/a25_alu.v
0,0 → 1,174
//////////////////////////////////////////////////////////////////
// //
// Arithmetic Logic Unit (ALU) for Amber 25 Core //
// //
// This file is part of the Amber project //
// http://www.opencores.org/project,amber //
// //
// Description //
// Supported functions: 32-bit add and subtract, AND, OR, //
// XOR, NOT, Zero extent 8-bit numbers //
// //
// Author(s): //
// - Conor Santifort, csantifort.amber@gmail.com //
// //
//////////////////////////////////////////////////////////////////
// //
// Copyright (C) 2011 Authors and OPENCORES.ORG //
// //
// This source file may be used and distributed without //
// restriction provided that this copyright statement is not //
// removed from the file and that any derivative work contains //
// the original copyright notice and the associated disclaimer. //
// //
// This source file is free software; you can redistribute it //
// and/or modify it under the terms of the GNU Lesser General //
// Public License as published by the Free Software Foundation; //
// either version 2.1 of the License, or (at your option) any //
// later version. //
// //
// This source is distributed in the hope that it will be //
// useful, but WITHOUT ANY WARRANTY; without even the implied //
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR //
// PURPOSE. See the GNU Lesser General Public License for more //
// details. //
// //
// You should have received a copy of the GNU Lesser General //
// Public License along with this source; if not, download it //
// from http://www.opencores.org/lgpl.shtml //
// //
//////////////////////////////////////////////////////////////////
 
 
module a25_alu (
 
input [31:0] i_a_in,
input [31:0] i_b_in,
input i_barrel_shift_carry,
input i_status_bits_carry,
input [8:0] i_function,
 
output [31:0] o_out,
output [3:0] o_flags
);
 
wire [31:0] a, b, b_not;
wire [31:0] and_out, or_out, xor_out;
wire [31:0] sign_ex8_out, sign_ex_16_out;
wire [31:0] zero_ex8_out, zero_ex_16_out;
wire [32:0] fadder_out;
wire swap_sel;
wire not_sel;
wire [1:0] cin_sel;
wire cout_sel;
wire [3:0] out_sel;
wire carry_in;
wire carry_out;
wire overflow_out;
wire fadder_carry_out;
 
assign { swap_sel, not_sel, cin_sel, cout_sel, out_sel } = i_function;
 
 
// ========================================================
// A Select
// ========================================================
assign a = (swap_sel ) ? i_b_in : i_a_in ;
 
// ========================================================
// B Select
// ========================================================
assign b = (swap_sel ) ? i_a_in : i_b_in ;
// ========================================================
// Not Select
// ========================================================
assign b_not = (not_sel ) ? ~b : b ;
// ========================================================
// Cin Select
// ========================================================
assign carry_in = (cin_sel==2'd0 ) ? 1'd0 :
(cin_sel==2'd1 ) ? 1'd1 :
i_status_bits_carry ; // add with carry
 
// ========================================================
// Cout Select
// ========================================================
assign carry_out = (cout_sel==1'd0 ) ? fadder_carry_out :
i_barrel_shift_carry ;
 
// For non-addition/subtractions that incorporate a shift
// operation, C is set to the last bit
// shifted out of the value by the shifter.
 
 
// ========================================================
// Overflow out
// ========================================================
// Only assert the overflow flag when using the adder
assign overflow_out = out_sel == 4'd1 &&
// overflow if adding two positive numbers and get a negative number
( (!a[31] && !b_not[31] && fadder_out[31]) ||
// or adding two negative numbers and get a positive number
(a[31] && b_not[31] && !fadder_out[31]) );
 
 
// ========================================================
// ALU Operations
// ========================================================
 
`ifdef XILINX_FPGA
 
// XIlinx Spartan 6 DSP module
`ifdef XILINX_SPARTAN6_FPGA
xs6_addsub_n #(.WIDTH(33))
`endif
`ifdef XILINX_VIRTEX6_FPGA
xv6_addsub_n #(.WIDTH(33))
`endif
u_xx_addsub_33(
.i_a ( {1'd0,a} ),
.i_b ( {1'd0,b_not} ),
.i_cin ( carry_in ),
.i_sub ( 1'd0 ),
.o_sum ( fadder_out ),
.o_co ( )
);
 
`else
assign fadder_out = { 1'd0,a} + {1'd0,b_not} + {32'd0,carry_in};
`endif
 
assign fadder_carry_out = fadder_out[32];
assign and_out = a & b_not;
assign or_out = a | b_not;
assign xor_out = a ^ b_not;
assign zero_ex8_out = {24'd0, b_not[7:0]};
assign zero_ex_16_out = {16'd0, b_not[15:0]};
assign sign_ex8_out = {{24{b_not[7]}}, b_not[7:0]};
assign sign_ex_16_out = {{16{b_not[15]}}, b_not[15:0]};
// ========================================================
// Out Select
// ========================================================
assign o_out = out_sel == 4'd0 ? b_not :
out_sel == 4'd1 ? fadder_out[31:0] :
out_sel == 4'd2 ? zero_ex_16_out :
out_sel == 4'd3 ? zero_ex8_out :
out_sel == 4'd4 ? sign_ex_16_out :
out_sel == 4'd5 ? sign_ex8_out :
out_sel == 4'd6 ? xor_out :
out_sel == 4'd7 ? or_out :
and_out ;
 
assign o_flags = { o_out[31], // negative
|o_out == 1'd0, // zero
carry_out, // carry
overflow_out // overflow
};
endmodule
 
 
/amber25/a25_decode.v
0,0 → 1,1756
//////////////////////////////////////////////////////////////////
// //
// Decode stage of Amber 25 Core //
// //
// This file is part of the Amber project //
// http://www.opencores.org/project,amber //
// //
// Description //
// This module is the most complex part of the Amber core //
// It decodes and sequences all instructions and handles all //
// interrupts //
// //
// Author(s): //
// - Conor Santifort, csantifort.amber@gmail.com //
// //
//////////////////////////////////////////////////////////////////
// //
// Copyright (C) 2011 Authors and OPENCORES.ORG //
// //
// This source file may be used and distributed without //
// restriction provided that this copyright statement is not //
// removed from the file and that any derivative work contains //
// the original copyright notice and the associated disclaimer. //
// //
// This source file is free software; you can redistribute it //
// and/or modify it under the terms of the GNU Lesser General //
// Public License as published by the Free Software Foundation; //
// either version 2.1 of the License, or (at your option) any //
// later version. //
// //
// This source is distributed in the hope that it will be //
// useful, but WITHOUT ANY WARRANTY; without even the implied //
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR //
// PURPOSE. See the GNU Lesser General Public License for more //
// details. //
// //
// You should have received a copy of the GNU Lesser General //
// Public License along with this source; if not, download it //
// from http://www.opencores.org/lgpl.shtml //
// //
//////////////////////////////////////////////////////////////////
 
 
module a25_decode
(
input i_clk,
input [31:0] i_fetch_instruction,
input i_access_stall, // stall all stages of the cpu at the same time
input i_irq, // interrupt request
input i_firq, // Fast interrupt request
input i_dabt, // data abort interrupt request
input i_iabt, // instruction pre-fetch abort flag
input i_adex, // Address Exception
input [31:0] i_execute_iaddress, // Registered instruction address output by execute stage
input [31:0] i_execute_daddress, // Registered instruction address output by execute stage
input [7:0] i_abt_status, // Abort status
input [31:0] i_execute_status_bits, // current status bits values in execute stage
input i_multiply_done, // multiply unit is nearly done
 
 
// --------------------------------------------------
// Control signals to execute stage
// --------------------------------------------------
// output reg [4:0] o_read_data_alignment = 1'd0, // 2 LSBs of read address used for calculating shift in ldrb ops
 
output reg [31:0] o_imm32 = 'd0,
output reg [4:0] o_imm_shift_amount = 'd0,
output reg o_shift_imm_zero = 'd0,
output reg [3:0] o_condition = 4'he, // 4'he = al
output reg o_decode_exclusive = 'd0, // exclusive access request ( swap instruction )
output reg o_decode_iaccess = 1'd1, // Indicates an instruction access
output reg o_decode_daccess = 'd0, // Indicates a data access
output reg [1:0] o_status_bits_mode = 2'b11, // SVC
output reg o_status_bits_irq_mask = 1'd1,
output reg o_status_bits_firq_mask = 1'd1,
 
output reg [3:0] o_rm_sel = 'd0,
output reg [3:0] o_rs_sel = 'd0,
output reg [7:0] o_load_rd = 'd0, // [7] load flags with PC
// [6] load status bits with PC
// [5] Write into User Mode register
// [4] zero-extend load
// [3:0] destination register, Rd
output reg [3:0] o_rn_sel = 'd0,
output reg [1:0] o_barrel_shift_amount_sel = 'd0,
output reg [1:0] o_barrel_shift_data_sel = 'd0,
output reg [1:0] o_barrel_shift_function = 'd0,
output reg [8:0] o_alu_function = 'd0,
output reg [1:0] o_multiply_function = 'd0,
output reg [2:0] o_interrupt_vector_sel = 'd0,
output reg [3:0] o_iaddress_sel = 4'd2,
output reg [3:0] o_daddress_sel = 4'd2,
output reg [2:0] o_pc_sel = 3'd2,
output reg [1:0] o_byte_enable_sel = 'd0, // byte, halfword or word write
output reg [2:0] o_status_bits_sel = 'd0,
output reg [2:0] o_reg_write_sel,
output reg o_user_mode_regs_store_nxt,
output reg o_firq_not_user_mode,
 
output reg o_write_data_wen = 'd0,
output reg o_base_address_wen = 'd0, // save ldm base address register
// in case of data abort
output reg o_pc_wen = 1'd1,
output reg [14:0] o_reg_bank_wen = 'd0,
output reg o_status_bits_flags_wen = 'd0,
output reg o_status_bits_mode_wen = 'd0,
output reg o_status_bits_irq_mask_wen = 'd0,
output reg o_status_bits_firq_mask_wen = 'd0,
 
// --------------------------------------------------
// Co-Processor interface
// --------------------------------------------------
output reg [2:0] o_copro_opcode1 = 'd0,
output reg [2:0] o_copro_opcode2 = 'd0,
output reg [3:0] o_copro_crn = 'd0,
output reg [3:0] o_copro_crm = 'd0,
output reg [3:0] o_copro_num = 'd0,
output reg [1:0] o_copro_operation = 'd0, // 0 = no operation,
// 1 = Move to Amber Core Register from Coprocessor
// 2 = Move to Coprocessor from Amber Core Register
output reg o_copro_write_data_wen = 'd0,
output o_iabt_trigger,
output [31:0] o_iabt_address,
output [7:0] o_iabt_status,
output o_dabt_trigger,
output [31:0] o_dabt_address,
output [7:0] o_dabt_status,
output o_conflict
 
 
);
 
`include "a25_localparams.v"
`include "a25_functions.v"
 
localparam [4:0] RST_WAIT1 = 5'd0,
RST_WAIT2 = 5'd1,
INT_WAIT1 = 5'd2,
INT_WAIT2 = 5'd3,
EXECUTE = 5'd4,
PRE_FETCH_EXEC = 5'd5, // Execute the Pre-Fetched Instruction
MEM_WAIT1 = 5'd6, // conditionally decode current instruction, in case
// previous instruction does not execute in S2
MEM_WAIT2 = 5'd7,
PC_STALL1 = 5'd8, // Program Counter altered
// conditionally decude current instruction, in case
// previous instruction does not execute in S2
PC_STALL2 = 5'd9,
MTRANS_EXEC1 = 5'd10,
MTRANS_EXEC2 = 5'd11,
MTRANS_ABORT = 5'd12,
MULT_PROC1 = 5'd13, // first cycle, save pre fetch instruction
MULT_PROC2 = 5'd14, // do multiplication
MULT_STORE = 5'd15, // save RdLo
MULT_ACCUMU = 5'd16, // Accumulate add lower 32 bits
SWAP_WRITE = 5'd17,
SWAP_WAIT1 = 5'd18,
SWAP_WAIT2 = 5'd19,
COPRO_WAIT = 5'd20;
// ========================================================
// Internal signals
// ========================================================
wire [31:0] instruction;
wire instruction_iabt; // abort flag, follows the instruction
wire instruction_adex; // address exception flag, follows the instruction
wire [31:0] instruction_address; // instruction virtual address, follows
// the instruction
wire [7:0] instruction_iabt_status; // abort status, follows the instruction
wire [1:0] instruction_sel;
reg [3:0] type;
wire [3:0] opcode;
wire [7:0] imm8;
wire [31:0] offset12;
wire [31:0] offset24;
wire [4:0] shift_imm;
 
wire opcode_compare;
wire mem_op;
wire load_op;
wire store_op;
wire write_pc;
wire current_write_pc;
reg load_pc_nxt;
reg load_pc_r = 'd0;
wire immediate_shift_op;
wire rds_use_rs;
wire branch;
wire mem_op_pre_indexed;
wire mem_op_post_indexed;
 
// Flop inputs
wire [31:0] imm32_nxt;
wire [4:0] imm_shift_amount_nxt;
wire shift_imm_zero_nxt;
wire [3:0] condition_nxt;
reg decode_exclusive_nxt;
reg decode_iaccess_nxt;
reg decode_daccess_nxt;
 
reg [1:0] barrel_shift_function_nxt;
wire [8:0] alu_function_nxt;
reg [1:0] multiply_function_nxt;
reg [1:0] status_bits_mode_nxt;
reg status_bits_irq_mask_nxt;
reg status_bits_firq_mask_nxt;
 
wire [3:0] rm_sel_nxt;
wire [3:0] rs_sel_nxt;
 
wire [3:0] rn_sel_nxt;
reg [1:0] barrel_shift_amount_sel_nxt;
reg [1:0] barrel_shift_data_sel_nxt;
reg [3:0] iaddress_sel_nxt;
reg [3:0] daddress_sel_nxt;
reg [2:0] pc_sel_nxt;
reg [1:0] byte_enable_sel_nxt;
reg [2:0] status_bits_sel_nxt;
reg [2:0] reg_write_sel_nxt;
wire firq_not_user_mode_nxt;
 
// ALU Function signals
reg alu_swap_sel_nxt;
reg alu_not_sel_nxt;
reg [1:0] alu_cin_sel_nxt;
reg alu_cout_sel_nxt;
reg [3:0] alu_out_sel_nxt;
 
reg write_data_wen_nxt;
reg copro_write_data_wen_nxt;
reg base_address_wen_nxt;
reg pc_wen_nxt;
reg [14:0] reg_bank_wen_nxt;
reg status_bits_flags_wen_nxt;
reg status_bits_mode_wen_nxt;
reg status_bits_irq_mask_wen_nxt;
reg status_bits_firq_mask_wen_nxt;
 
reg saved_current_instruction_wen; // saved load instruction
reg pre_fetch_instruction_wen; // pre-fetch instruction
 
reg [4:0] control_state = RST_WAIT1;
reg [4:0] control_state_nxt;
 
 
wire dabt;
reg dabt_reg = 'd0;
reg dabt_reg_d1;
reg iabt_reg = 'd0;
reg adex_reg = 'd0;
reg [31:0] fetch_address_r = 'd0;
reg [7:0] abt_status_reg = 'd0;
reg [31:0] fetch_instruction_r = 'd0;
reg [31:0] saved_current_instruction = 'd0;
reg saved_current_instruction_iabt = 'd0; // access abort flag
reg saved_current_instruction_adex = 'd0; // address exception
reg [31:0] saved_current_instruction_address = 'd0; // virtual address of abort instruction
reg [7:0] saved_current_instruction_iabt_status = 'd0; // status of abort instruction
reg [31:0] pre_fetch_instruction = 'd0;
reg pre_fetch_instruction_iabt = 'd0; // access abort flag
reg pre_fetch_instruction_adex = 'd0; // address exception
reg [31:0] pre_fetch_instruction_address = 'd0; // virtual address of abort instruction
reg [7:0] pre_fetch_instruction_iabt_status = 'd0; // status of abort instruction
reg [31:0] hold_instruction = 'd0;
reg hold_instruction_iabt = 'd0; // access abort flag
reg hold_instruction_adex = 'd0; // address exception
reg [31:0] hold_instruction_address = 'd0; // virtual address of abort instruction
reg [7:0] hold_instruction_iabt_status = 'd0; // status of abort instruction
 
wire instruction_valid;
wire instruction_execute;
 
reg [3:0] mtrans_reg1; // the current register being accessed as part of stm/ldm
reg [3:0] mtrans_reg2; // the next register being accessed as part of stm/ldm
reg [31:0] mtrans_instruction_nxt;
wire [15:0] mtrans_reg2_mask;
 
wire [31:0] mtrans_base_reg_change;
wire [4:0] mtrans_num_registers;
wire use_saved_current_instruction;
wire use_hold_instruction;
wire use_pre_fetch_instruction;
wire interrupt;
wire [1:0] interrupt_mode;
wire [2:0] next_interrupt;
reg irq = 'd0;
reg firq = 'd0;
wire firq_request;
wire irq_request;
wire swi_request;
wire und_request;
wire dabt_request;
reg [1:0] copro_operation_nxt;
reg mtrans_r15 = 'd0;
reg mtrans_r15_nxt;
reg restore_base_address = 'd0;
reg restore_base_address_nxt;
 
wire regop_set_flags;
 
wire [7:0] load_rd_nxt;
wire load_rd_byte;
wire ldm_user_mode;
wire ldm_status_bits;
wire ldm_flags;
wire [6:0] load_rd_d1_nxt;
reg [6:0] load_rd_d1 = 'd0; // MSB is the valid bit
wire rn_valid;
wire rm_valid;
wire rs_valid;
wire rd_valid;
wire stm_valid;
wire rn_conflict1;
wire rn_conflict2;
wire rm_conflict1;
wire rm_conflict2;
wire rs_conflict1;
wire rs_conflict2;
wire rd_conflict1;
wire rd_conflict2;
wire stm_conflict1a;
wire stm_conflict1b;
wire stm_conflict2a;
wire stm_conflict2b;
wire conflict1; // Register conflict1 with ldr operation
wire conflict2; // Register conflict1 with ldr operation
wire conflict; // Register conflict1 with ldr operation
reg conflict_r = 'd0;
 
 
// ========================================================
// Instruction Abort and Data Abort outputs
// ========================================================
 
assign o_iabt_trigger = instruction_iabt && o_status_bits_mode == SVC && control_state == INT_WAIT1;
assign o_iabt_address = instruction_address;
assign o_iabt_status = instruction_iabt_status;
 
assign o_dabt_trigger = dabt_reg && !dabt_reg_d1;
assign o_dabt_address = fetch_address_r;
assign o_dabt_status = abt_status_reg;
 
 
// ========================================================
// Instruction Decode
// ========================================================
 
// for instructions that take more than one cycle
// the instruction is saved in the 'saved_mem_instruction'
// register and then that register is used for the rest of
// the execution of the instruction.
// But if the instruction does not execute because of the
// condition, then need to select the next instruction to
// decode
assign use_saved_current_instruction = instruction_execute &&
( control_state == MEM_WAIT1 ||
control_state == MEM_WAIT2 ||
control_state == MTRANS_EXEC1 ||
control_state == MTRANS_EXEC2 ||
control_state == MTRANS_ABORT ||
control_state == MULT_PROC1 ||
control_state == MULT_PROC2 ||
control_state == MULT_ACCUMU ||
control_state == MULT_STORE ||
control_state == INT_WAIT1 ||
control_state == INT_WAIT2 ||
control_state == SWAP_WRITE ||
control_state == SWAP_WAIT1 ||
control_state == SWAP_WAIT2 ||
control_state == COPRO_WAIT );
 
assign use_hold_instruction = conflict_r;
 
assign use_pre_fetch_instruction = control_state == PRE_FETCH_EXEC;
 
 
assign instruction_sel = use_hold_instruction ? 2'd3 : // hold_instruction
use_saved_current_instruction ? 2'd1 : // saved_current_instruction
use_pre_fetch_instruction ? 2'd2 : // pre_fetch_instruction
2'd0 ; // fetch_instruction_r
 
assign instruction = instruction_sel == 2'd0 ? fetch_instruction_r :
instruction_sel == 2'd1 ? saved_current_instruction :
instruction_sel == 2'd3 ? hold_instruction :
pre_fetch_instruction ;
 
// abort flag
assign instruction_iabt = instruction_sel == 2'd0 ? iabt_reg :
instruction_sel == 2'd1 ? saved_current_instruction_iabt :
instruction_sel == 2'd3 ? hold_instruction_iabt :
pre_fetch_instruction_iabt ;
assign instruction_address = instruction_sel == 2'd0 ? fetch_address_r :
instruction_sel == 2'd1 ? saved_current_instruction_address :
instruction_sel == 2'd3 ? hold_instruction_address :
pre_fetch_instruction_address ;
 
assign instruction_iabt_status = instruction_sel == 2'd0 ? abt_status_reg :
instruction_sel == 2'd1 ? saved_current_instruction_iabt_status :
instruction_sel == 2'd3 ? hold_instruction_iabt_status :
pre_fetch_instruction_iabt_status ;
 
// instruction address exception
assign instruction_adex = instruction_sel == 2'd0 ? adex_reg :
instruction_sel == 2'd1 ? saved_current_instruction_adex :
instruction_sel == 2'd3 ? hold_instruction_adex :
pre_fetch_instruction_adex ;
 
// Instruction Decode - Order is important!
always @*
casez ({instruction[27:20], instruction[7:4]})
12'b00010?001001 : type = SWAP;
12'b000000??1001 : type = MULT;
12'b00?????????? : type = REGOP;
12'b01?????????? : type = TRANS;
12'b100????????? : type = MTRANS;
12'b101????????? : type = BRANCH;
12'b110????????? : type = CODTRANS;
12'b1110???????0 : type = COREGOP;
12'b1110???????1 : type = CORTRANS;
default: type = SWI;
endcase
 
// ========================================================
// Fixed fields within the instruction
// ========================================================
assign opcode = instruction[24:21];
assign condition_nxt = instruction[31:28];
 
assign rm_sel_nxt = instruction[3:0];
assign rn_sel_nxt = branch ? 4'd15 : instruction[19:16]; // Use PC to calculate branch destination
assign rs_sel_nxt = control_state == SWAP_WRITE ? instruction[3:0] : // Rm gets written out to memory
type == MTRANS ? mtrans_reg1 :
branch ? 4'd15 : // Update the PC
rds_use_rs ? instruction[11:8] :
instruction[15:12] ;
 
// Load from memory into registers
assign ldm_user_mode = type == MTRANS && {instruction[22:20],instruction[15]} == 4'b1010;
assign ldm_flags = type == MTRANS && rs_sel_nxt == 4'd15 && instruction[20] && instruction[22];
assign ldm_status_bits = type == MTRANS && rs_sel_nxt == 4'd15 && instruction[20] && instruction[22] && i_execute_status_bits[1:0] != USR;
assign load_rd_byte = (type == TRANS || type == SWAP) && instruction[22];
assign load_rd_nxt = {ldm_flags, ldm_status_bits, ldm_user_mode, load_rd_byte, rs_sel_nxt};
 
 
// MSB indicates valid dirty target register
assign load_rd_d1_nxt = {o_decode_daccess && !o_write_data_wen, o_load_rd[3:0]};
assign shift_imm = instruction[11:7];
assign offset12 = { 20'h0, instruction[11:0]};
assign offset24 = {{6{instruction[23]}}, instruction[23:0], 2'd0 }; // sign extend
assign imm8 = instruction[7:0];
 
assign immediate_shift_op = instruction[25];
assign rds_use_rs = (type == REGOP && !instruction[25] && instruction[4]) ||
(type == MULT &&
(control_state == MULT_PROC1 ||
control_state == MULT_PROC2 ||
instruction_valid && !interrupt )) ;
assign branch = type == BRANCH;
assign opcode_compare = opcode == CMP || opcode == CMN || opcode == TEQ || opcode == TST ;
assign mem_op = type == TRANS;
assign load_op = mem_op && instruction[20];
assign store_op = mem_op && !instruction[20];
assign write_pc = (pc_wen_nxt && pc_sel_nxt != 3'd0) || load_pc_r || load_pc_nxt;
assign current_write_pc = (pc_wen_nxt && pc_sel_nxt != 3'd0) || load_pc_nxt;
assign regop_set_flags = type == REGOP && instruction[20];
 
assign mem_op_pre_indexed = instruction[24] && instruction[21];
assign mem_op_post_indexed = !instruction[24];
 
assign imm32_nxt = // add 0 to Rm
type == MULT ? { 32'd0 } :
// 4 x number of registers
type == MTRANS ? { mtrans_base_reg_change } :
type == BRANCH ? { offset24 } :
type == TRANS ? { offset12 } :
instruction[11:8] == 4'h0 ? { 24'h0, imm8[7:0] } :
instruction[11:8] == 4'h1 ? { imm8[1:0], 24'h0, imm8[7:2] } :
instruction[11:8] == 4'h2 ? { imm8[3:0], 24'h0, imm8[7:4] } :
instruction[11:8] == 4'h3 ? { imm8[5:0], 24'h0, imm8[7:6] } :
instruction[11:8] == 4'h4 ? { imm8[7:0], 24'h0 } :
instruction[11:8] == 4'h5 ? { 2'h0, imm8[7:0], 22'h0 } :
instruction[11:8] == 4'h6 ? { 4'h0, imm8[7:0], 20'h0 } :
instruction[11:8] == 4'h7 ? { 6'h0, imm8[7:0], 18'h0 } :
instruction[11:8] == 4'h8 ? { 8'h0, imm8[7:0], 16'h0 } :
instruction[11:8] == 4'h9 ? { 10'h0, imm8[7:0], 14'h0 } :
instruction[11:8] == 4'ha ? { 12'h0, imm8[7:0], 12'h0 } :
instruction[11:8] == 4'hb ? { 14'h0, imm8[7:0], 10'h0 } :
instruction[11:8] == 4'hc ? { 16'h0, imm8[7:0], 8'h0 } :
instruction[11:8] == 4'hd ? { 18'h0, imm8[7:0], 6'h0 } :
instruction[11:8] == 4'he ? { 20'h0, imm8[7:0], 4'h0 } :
{ 22'h0, imm8[7:0], 2'h0 } ;
 
 
assign imm_shift_amount_nxt = shift_imm ;
 
// This signal is encoded in the decode stage because
// it is on the critical path in the execute stage
assign shift_imm_zero_nxt = imm_shift_amount_nxt == 5'd0 && // immediate amount = 0
barrel_shift_amount_sel_nxt == 2'd2; // shift immediate amount
 
assign alu_function_nxt = { alu_swap_sel_nxt,
alu_not_sel_nxt,
alu_cin_sel_nxt,
alu_cout_sel_nxt,
alu_out_sel_nxt };
// ========================================================
// Register Conflict Detection
// ========================================================
assign rn_valid = type == REGOP || type == MULT || type == SWAP || type == TRANS || type == MTRANS || type == CODTRANS;
assign rm_valid = type == REGOP || type == MULT || type == SWAP || (type == TRANS && immediate_shift_op);
assign rs_valid = rds_use_rs;
assign rd_valid = type == TRANS && store_op; // str instruction
assign stm_valid = type == MTRANS && !instruction[20]; // stm instruction
assign rn_conflict1 = rn_valid && ( load_rd_d1_nxt[4] && rn_sel_nxt == load_rd_d1_nxt[3:0] );
assign rn_conflict2 = rn_valid && ( load_rd_d1 [4] && rn_sel_nxt == load_rd_d1 [3:0] );
assign rm_conflict1 = rm_valid && ( load_rd_d1_nxt[4] && rm_sel_nxt == load_rd_d1_nxt[3:0] );
assign rm_conflict2 = rm_valid && ( load_rd_d1 [4] && rm_sel_nxt == load_rd_d1 [3:0] );
assign rs_conflict1 = rs_valid && ( load_rd_d1_nxt[4] && rs_sel_nxt == load_rd_d1_nxt[3:0] );
assign rs_conflict2 = rs_valid && ( load_rd_d1 [4] && rs_sel_nxt == load_rd_d1 [3:0] );
assign rd_conflict1 = rd_valid && ( load_rd_d1_nxt[4] && instruction[15:12] == load_rd_d1_nxt[3:0] );
assign rd_conflict2 = rd_valid && ( load_rd_d1 [4] && instruction[15:12] == load_rd_d1 [3:0] );
assign stm_conflict1a = stm_valid && ( load_rd_d1_nxt[4] && mtrans_reg1 == load_rd_d1_nxt[3:0] );
assign stm_conflict1b = stm_valid && ( load_rd_d1_nxt[4] && mtrans_reg2 == load_rd_d1_nxt[3:0] );
assign stm_conflict2a = stm_valid && ( load_rd_d1 [4] && mtrans_reg1 == load_rd_d1 [3:0] );
assign stm_conflict2b = stm_valid && ( load_rd_d1 [4] && mtrans_reg2 == load_rd_d1 [3:0] );
assign conflict1 = instruction_valid &&
(rn_conflict1 || rm_conflict1 || rs_conflict1 || rd_conflict1 ||
stm_conflict1a || stm_conflict1b);
 
assign conflict2 = instruction_valid &&
(rn_conflict2 || rm_conflict2 || rs_conflict2 || rd_conflict2 ||
stm_conflict2a || stm_conflict2b);
 
assign conflict = conflict1 || conflict2;
 
 
always @( posedge i_clk )
if ( !i_access_stall )
begin
conflict_r <= conflict;
end
 
assign o_conflict = conflict;
 
 
// ========================================================
// MTRANS Operations
// ========================================================
// Bit 15 = r15
// Bit 0 = r0
// In ldm and stm instructions r0 is loaded or stored first
always @*
casez ( instruction[15:0] )
16'b???????????????1 : mtrans_reg1 = 4'h0 ;
16'b??????????????10 : mtrans_reg1 = 4'h1 ;
16'b?????????????100 : mtrans_reg1 = 4'h2 ;
16'b????????????1000 : mtrans_reg1 = 4'h3 ;
16'b???????????10000 : mtrans_reg1 = 4'h4 ;
16'b??????????100000 : mtrans_reg1 = 4'h5 ;
16'b?????????1000000 : mtrans_reg1 = 4'h6 ;
16'b????????10000000 : mtrans_reg1 = 4'h7 ;
16'b???????100000000 : mtrans_reg1 = 4'h8 ;
16'b??????1000000000 : mtrans_reg1 = 4'h9 ;
16'b?????10000000000 : mtrans_reg1 = 4'ha ;
16'b????100000000000 : mtrans_reg1 = 4'hb ;
16'b???1000000000000 : mtrans_reg1 = 4'hc ;
16'b??10000000000000 : mtrans_reg1 = 4'hd ;
16'b?100000000000000 : mtrans_reg1 = 4'he ;
default : mtrans_reg1 = 4'hf ;
endcase
 
 
assign mtrans_reg2_mask = 1'd1<<mtrans_reg1;
 
always @*
casez ( instruction[15:0] & ~mtrans_reg2_mask )
16'b???????????????1 : mtrans_reg2 = 4'h0 ;
16'b??????????????10 : mtrans_reg2 = 4'h1 ;
16'b?????????????100 : mtrans_reg2 = 4'h2 ;
16'b????????????1000 : mtrans_reg2 = 4'h3 ;
16'b???????????10000 : mtrans_reg2 = 4'h4 ;
16'b??????????100000 : mtrans_reg2 = 4'h5 ;
16'b?????????1000000 : mtrans_reg2 = 4'h6 ;
16'b????????10000000 : mtrans_reg2 = 4'h7 ;
16'b???????100000000 : mtrans_reg2 = 4'h8 ;
16'b??????1000000000 : mtrans_reg2 = 4'h9 ;
16'b?????10000000000 : mtrans_reg2 = 4'ha ;
16'b????100000000000 : mtrans_reg2 = 4'hb ;
16'b???1000000000000 : mtrans_reg2 = 4'hc ;
16'b??10000000000000 : mtrans_reg2 = 4'hd ;
16'b?100000000000000 : mtrans_reg2 = 4'he ;
default : mtrans_reg2 = 4'hf ;
endcase
 
always @*
casez (instruction[15:0])
16'b???????????????1 : mtrans_instruction_nxt = {instruction[31:16], instruction[15: 1], 1'd0};
16'b??????????????10 : mtrans_instruction_nxt = {instruction[31:16], instruction[15: 2], 2'd0};
16'b?????????????100 : mtrans_instruction_nxt = {instruction[31:16], instruction[15: 3], 3'd0};
16'b????????????1000 : mtrans_instruction_nxt = {instruction[31:16], instruction[15: 4], 4'd0};
16'b???????????10000 : mtrans_instruction_nxt = {instruction[31:16], instruction[15: 5], 5'd0};
16'b??????????100000 : mtrans_instruction_nxt = {instruction[31:16], instruction[15: 6], 6'd0};
16'b?????????1000000 : mtrans_instruction_nxt = {instruction[31:16], instruction[15: 7], 7'd0};
16'b????????10000000 : mtrans_instruction_nxt = {instruction[31:16], instruction[15: 8], 8'd0};
16'b???????100000000 : mtrans_instruction_nxt = {instruction[31:16], instruction[15: 9], 9'd0};
16'b??????1000000000 : mtrans_instruction_nxt = {instruction[31:16], instruction[15:10], 10'd0};
16'b?????10000000000 : mtrans_instruction_nxt = {instruction[31:16], instruction[15:11], 11'd0};
16'b????100000000000 : mtrans_instruction_nxt = {instruction[31:16], instruction[15:12], 12'd0};
16'b???1000000000000 : mtrans_instruction_nxt = {instruction[31:16], instruction[15:13], 13'd0};
16'b??10000000000000 : mtrans_instruction_nxt = {instruction[31:16], instruction[15:14], 14'd0};
16'b?100000000000000 : mtrans_instruction_nxt = {instruction[31:16], instruction[15 ], 15'd0};
default : mtrans_instruction_nxt = {instruction[31:16], 16'd0};
endcase
 
 
// number of registers to be stored
assign mtrans_num_registers = {4'd0, instruction[15]} +
{4'd0, instruction[14]} +
{4'd0, instruction[13]} +
{4'd0, instruction[12]} +
{4'd0, instruction[11]} +
{4'd0, instruction[10]} +
{4'd0, instruction[ 9]} +
{4'd0, instruction[ 8]} +
{4'd0, instruction[ 7]} +
{4'd0, instruction[ 6]} +
{4'd0, instruction[ 5]} +
{4'd0, instruction[ 4]} +
{4'd0, instruction[ 3]} +
{4'd0, instruction[ 2]} +
{4'd0, instruction[ 1]} +
{4'd0, instruction[ 0]} ;
// 4 x number of registers to be stored
assign mtrans_base_reg_change = {25'd0, mtrans_num_registers, 2'd0};
 
// ========================================================
// Interrupts
// ========================================================
 
assign firq_request = firq && !i_execute_status_bits[26];
assign irq_request = irq && !i_execute_status_bits[27];
assign swi_request = type == SWI;
assign dabt_request = dabt_reg;
 
// copro15 and copro13 only supports reg trans opcodes
// all other opcodes involving co-processors cause an
// undefined instrution interrupt
assign und_request = type == CODTRANS ||
type == COREGOP ||
( type == CORTRANS && instruction[11:8] != 4'd15 );
 
 
// in order of priority !!
// Highest
// 1 Reset
// 2 Data Abort (including data TLB miss)
// 3 FIRQ
// 4 IRQ
// 5 Prefetch Abort (including prefetch TLB miss)
// 6 Undefined instruction, SWI
// Lowest
assign next_interrupt = dabt_request ? 3'd1 : // Data Abort
firq_request ? 3'd2 : // FIRQ
irq_request ? 3'd3 : // IRQ
instruction_adex ? 3'd4 : // Address Exception
instruction_iabt ? 3'd5 : // PreFetch Abort, only triggered
// if the instruction is used
und_request ? 3'd6 : // Undefined Instruction
swi_request ? 3'd7 : // SWI
3'd0 ; // none
 
// SWI and undefined instructions do not cause an interrupt in the decode
// stage. They only trigger interrupts if they arfe executed, so the
// interrupt is triggered if the execute condition is met in the execute stage
assign interrupt = next_interrupt != 3'd0 &&
next_interrupt != 3'd7 && // SWI
next_interrupt != 3'd6 ; // undefined interrupt
 
 
assign interrupt_mode = next_interrupt == 3'd2 ? FIRQ :
next_interrupt == 3'd3 ? IRQ :
next_interrupt == 3'd4 ? SVC :
next_interrupt == 3'd5 ? SVC :
next_interrupt == 3'd6 ? SVC :
next_interrupt == 3'd7 ? SVC :
next_interrupt == 3'd1 ? SVC :
USR ;
 
 
 
 
// ========================================================
// Generate control signals
// ========================================================
always @*
begin
// default mode
status_bits_mode_nxt = i_execute_status_bits[1:0]; // change to mode in execute stage get reflected
// back to this stage automatically
status_bits_irq_mask_nxt = o_status_bits_irq_mask;
status_bits_firq_mask_nxt = o_status_bits_firq_mask;
decode_exclusive_nxt = 1'd0;
decode_daccess_nxt = 1'd0;
decode_iaccess_nxt = 1'd1;
copro_operation_nxt = 'd0;
// Save an instruction to use later
saved_current_instruction_wen = 1'd0;
pre_fetch_instruction_wen = 1'd0;
mtrans_r15_nxt = mtrans_r15;
restore_base_address_nxt = restore_base_address;
// default Mux Select values
barrel_shift_amount_sel_nxt = 'd0; // don't shift the input
barrel_shift_data_sel_nxt = 'd0; // immediate value
barrel_shift_function_nxt = 'd0;
multiply_function_nxt = 'd0;
iaddress_sel_nxt = 'd0;
daddress_sel_nxt = 'd0;
pc_sel_nxt = 'd0;
load_pc_nxt = 'd0;
byte_enable_sel_nxt = 'd0;
status_bits_sel_nxt = 'd0;
reg_write_sel_nxt = 'd0;
o_user_mode_regs_store_nxt = 'd0;
// ALU Muxes
alu_swap_sel_nxt = 'd0;
alu_not_sel_nxt = 'd0;
alu_cin_sel_nxt = 'd0;
alu_cout_sel_nxt = 'd0;
alu_out_sel_nxt = 'd0;
// default Flop Write Enable values
write_data_wen_nxt = 'd0;
copro_write_data_wen_nxt = 'd0;
base_address_wen_nxt = 'd0;
pc_wen_nxt = 'd1;
reg_bank_wen_nxt = 'd0; // Don't select any
status_bits_flags_wen_nxt = 'd0;
status_bits_mode_wen_nxt = 'd0;
status_bits_irq_mask_wen_nxt = 'd0;
status_bits_firq_mask_wen_nxt = 'd0;
 
if ( instruction_valid && !interrupt && !conflict )
begin
if ( type == REGOP )
begin
if ( !opcode_compare )
begin
// Check is the load destination is the PC
if (instruction[15:12] == 4'd15)
begin
pc_sel_nxt = 3'd1; // alu_out
iaddress_sel_nxt = 4'd1; // alu_out
end
else
reg_bank_wen_nxt = decode (instruction[15:12]);
end
if ( !immediate_shift_op )
barrel_shift_function_nxt = instruction[6:5];
if ( !immediate_shift_op )
barrel_shift_data_sel_nxt = 2'd2; // Shift value from Rm register
if ( !immediate_shift_op && instruction[4] )
barrel_shift_amount_sel_nxt = 2'd1; // Shift amount from Rs registter
if ( !immediate_shift_op && !instruction[4] )
barrel_shift_amount_sel_nxt = 2'd2; // Shift immediate amount
if ( opcode == ADD || opcode == CMN ) // CMN is just like an ADD
begin
alu_out_sel_nxt = 4'd1; // Add
end
if ( opcode == ADC ) // Add with Carry
begin
alu_out_sel_nxt = 4'd1; // Add
alu_cin_sel_nxt = 2'd2; // carry in from status_bits
end
if ( opcode == SUB || opcode == CMP ) // Subtract
begin
alu_out_sel_nxt = 4'd1; // Add
alu_cin_sel_nxt = 2'd1; // cin = 1
alu_not_sel_nxt = 1'd1; // invert B
end
// SBC (Subtract with Carry) subtracts the value of its
// second operand and the value of NOT(Carry flag) from
// the value of its first operand.
// Rd = Rn - shifter_operand - NOT(C Flag)
if ( opcode == SBC ) // Subtract with Carry
begin
alu_out_sel_nxt = 4'd1; // Add
alu_cin_sel_nxt = 2'd2; // carry in from status_bits
alu_not_sel_nxt = 1'd1; // invert B
end
if ( opcode == RSB ) // Reverse Subtract
begin
alu_out_sel_nxt = 4'd1; // Add
alu_cin_sel_nxt = 2'd1; // cin = 1
alu_not_sel_nxt = 1'd1; // invert B
alu_swap_sel_nxt = 1'd1; // swap A and B
end
if ( opcode == RSC ) // Reverse Subtract with carry
begin
alu_out_sel_nxt = 4'd1; // Add
alu_cin_sel_nxt = 2'd2; // carry in from status_bits
alu_not_sel_nxt = 1'd1; // invert B
alu_swap_sel_nxt = 1'd1; // swap A and B
end
if ( opcode == AND || opcode == TST ) // Logical AND, Test (using AND operator)
begin
alu_out_sel_nxt = 4'd8; // AND
alu_cout_sel_nxt = 1'd1; // i_barrel_shift_carry
end
if ( opcode == EOR || opcode == TEQ ) // Logical Exclusive OR, Test Equivalence (using EOR operator)
begin
alu_out_sel_nxt = 4'd6; // XOR
alu_cout_sel_nxt = 1'd1; // i_barrel_shift_carry
end
 
if ( opcode == ORR )
begin
alu_out_sel_nxt = 4'd7; // OR
alu_cout_sel_nxt = 1'd1; // i_barrel_shift_carry
end
if ( opcode == BIC ) // Bit Clear (using AND & NOT operators)
begin
alu_out_sel_nxt = 4'd8; // AND
alu_not_sel_nxt = 1'd1; // invert B
alu_cout_sel_nxt = 1'd1; // i_barrel_shift_carry
end
if ( opcode == MOV ) // Move
begin
alu_cout_sel_nxt = 1'd1; // i_barrel_shift_carry
end
if ( opcode == MVN ) // Move NOT
begin
alu_not_sel_nxt = 1'd1; // invert B
alu_cout_sel_nxt = 1'd1; // i_barrel_shift_carry
end
end
// Load & Store instructions
if ( mem_op )
begin
if ( load_op && instruction[15:12] == 4'd15 ) // Write to PC
begin
saved_current_instruction_wen = 1'd1; // Save the memory access instruction to refer back to later
pc_wen_nxt = 1'd0; // hold current PC value rather than an instruction fetch
load_pc_nxt = 1'd1;
end
 
decode_daccess_nxt = 1'd1; // indicate a valid data access
alu_out_sel_nxt = 4'd1; // Add
if ( !instruction[23] ) // U: Subtract offset
begin
alu_cin_sel_nxt = 2'd1; // cin = 1
alu_not_sel_nxt = 1'd1; // invert B
end
if ( store_op )
begin
write_data_wen_nxt = 1'd1;
if ( type == TRANS && instruction[22] )
byte_enable_sel_nxt = 2'd1; // Save byte
end
// need to update the register holding the address ?
// This is Rn bits [19:16]
if ( mem_op_pre_indexed || mem_op_post_indexed )
begin
// Check is the load destination is the PC
if ( rn_sel_nxt == 4'd15 )
pc_sel_nxt = 3'd1;
else
reg_bank_wen_nxt = decode ( rn_sel_nxt );
end
// if post-indexed, then use Rn rather than ALU output, as address
if ( mem_op_post_indexed )
daddress_sel_nxt = 4'd4; // Rn
else
daddress_sel_nxt = 4'd1; // alu out
if ( instruction[25] && type == TRANS )
barrel_shift_data_sel_nxt = 2'd2; // Shift value from Rm register
if ( type == TRANS && instruction[25] && shift_imm != 5'd0 )
begin
barrel_shift_function_nxt = instruction[6:5];
barrel_shift_amount_sel_nxt = 2'd2; // imm_shift_amount
end
end
 
if ( type == BRANCH )
begin
pc_sel_nxt = 3'd1; // alu_out
iaddress_sel_nxt = 4'd1; // alu_out
alu_out_sel_nxt = 4'd1; // Add
if ( instruction[24] ) // Link
begin
reg_bank_wen_nxt = decode (4'd14); // Save PC to LR
reg_write_sel_nxt = 3'd1; // pc - 32'd4
end
end
 
if ( type == MTRANS )
begin
saved_current_instruction_wen = 1'd1; // Save the memory access instruction to refer back to later
decode_daccess_nxt = 1'd1; // valid data access
alu_out_sel_nxt = 4'd1; // Add
mtrans_r15_nxt = instruction[15]; // load or save r15 ?
base_address_wen_nxt = 1'd1; // Save the value of the register used for the base address,
// in case of a data abort, and need to restore the value
 
if ( mtrans_num_registers > 4'd1 )
begin
iaddress_sel_nxt = 4'd3; // pc (not pc + 4)
pc_wen_nxt = 1'd0; // hold current PC value rather than an instruction fetch
end
 
 
// The spec says -
// If the instruction would have overwritten the base with data
// (that is, it has the base in the transfer list), the overwriting is prevented.
// This is true even when the abort occurs after the base word gets loaded
restore_base_address_nxt = instruction[20] &&
(instruction[15:0] & (1'd1 << instruction[19:16]));
 
// Increment
if ( instruction[23] )
begin
if ( instruction[24] ) // increment before
daddress_sel_nxt = 4'd7; // Rn + 4
else
daddress_sel_nxt = 4'd4; // Rn
end
else
// Decrement
begin
alu_cin_sel_nxt = 2'd1; // cin = 1
alu_not_sel_nxt = 1'd1; // invert B
if ( !instruction[24] ) // decrement after
daddress_sel_nxt = 4'd6; // alu out + 4
else
daddress_sel_nxt = 4'd1; // alu out
end
// Load or store ?
if ( !instruction[20] ) // Store
write_data_wen_nxt = 1'd1;
// stm: store the user mode registers, when in priviledged mode
if ( {instruction[22:20]} == 3'b100 )
o_user_mode_regs_store_nxt = 1'd1;
// update the base register ?
if ( instruction[21] ) // the W bit
reg_bank_wen_nxt = decode (rn_sel_nxt);
 
// write to the pc ?
if ( instruction[20] && mtrans_reg1 == 4'd15 ) // Write to PC
begin
saved_current_instruction_wen = 1'd1; // Save the memory access instruction to refer back to later
pc_wen_nxt = 1'd0; // hold current PC value rather than an instruction fetch
load_pc_nxt = 1'd1;
end
end
if ( type == MULT )
begin
multiply_function_nxt[0] = 1'd1; // set enable
// some bits can be changed just below
saved_current_instruction_wen = 1'd1; // Save the Multiply instruction to
// refer back to later
pc_wen_nxt = 1'd0; // hold current PC value
if ( instruction[21] )
multiply_function_nxt[1] = 1'd1; // accumulate
end
// swp - do read part first
if ( type == SWAP )
begin
saved_current_instruction_wen = 1'd1; // Save the memory access instruction to refer back to later
pc_wen_nxt = 1'd0; // hold current PC value
decode_iaccess_nxt = 1'd0; // skip the instruction fetch
decode_daccess_nxt = 1'd1; // data access
barrel_shift_data_sel_nxt = 2'd2; // Shift value from Rm register
daddress_sel_nxt = 4'd4; // Rn
decode_exclusive_nxt = 1'd1; // signal an exclusive access
end
 
 
// mcr & mrc - takes two cycles
if ( type == CORTRANS && !und_request )
begin
saved_current_instruction_wen = 1'd1; // Save the memory access instruction to refer back to later
pc_wen_nxt = 1'd0; // hold current PC value
iaddress_sel_nxt = 4'd3; // pc (not pc + 4)
if ( instruction[20] ) // MRC
copro_operation_nxt = 2'd1; // Register transfer from Co-Processor
else // MCR
begin
// Don't enable operation to Co-Processor until next period
// So it gets the Rd value from the execution stage at the same time
copro_operation_nxt = 2'd0;
copro_write_data_wen_nxt = 1'd1; // Rd register value to co-processor
end
end
 
if ( type == SWI || und_request )
begin
// save address of next instruction to Supervisor Mode LR
reg_write_sel_nxt = 3'd1; // pc -4
reg_bank_wen_nxt = decode (4'd14); // LR
iaddress_sel_nxt = 4'd2; // interrupt_vector
pc_sel_nxt = 3'd2; // interrupt_vector
status_bits_mode_nxt = interrupt_mode; // e.g. Supervisor mode
status_bits_mode_wen_nxt = 1'd1;
// disable normal interrupts
status_bits_irq_mask_nxt = 1'd1;
status_bits_irq_mask_wen_nxt = 1'd1;
end
 
if ( regop_set_flags )
begin
status_bits_flags_wen_nxt = 1'd1;
// If <Rd> is r15, the ALU output is copied to the Status Bits.
// Not allowed to use r15 for mul or lma instructions
if ( instruction[15:12] == 4'd15 )
begin
status_bits_sel_nxt = 3'd1; // alu out
// Priviledged mode? Then also update the other status bits
if ( i_execute_status_bits[1:0] != USR )
begin
status_bits_mode_wen_nxt = 1'd1;
status_bits_irq_mask_wen_nxt = 1'd1;
status_bits_firq_mask_wen_nxt = 1'd1;
end
end
end
end
 
// Handle asynchronous interrupts.
// interrupts are processed only during execution states
// multicycle instructions must complete before the interrupt starts
// SWI, Address Exception and Undefined Instruction interrupts are only executed if the
// instruction that causes the interrupt is conditionally executed so
// its not handled here
if ( instruction_valid && interrupt && next_interrupt != 3'd6 )
begin
// Save the interrupt causing instruction to refer back to later
// This also saves the instruction abort vma and status, in the case of an
// instruction abort interrupt
saved_current_instruction_wen = 1'd1;
// save address of next instruction to Supervisor Mode LR
// Address Exception ?
if ( next_interrupt == 3'd4 )
reg_write_sel_nxt = 3'd7; // pc
else
reg_write_sel_nxt = 3'd1; // pc -4
reg_bank_wen_nxt = decode (4'd14); // LR
iaddress_sel_nxt = 4'd2; // interrupt_vector
pc_sel_nxt = 3'd2; // interrupt_vector
status_bits_mode_nxt = interrupt_mode; // e.g. Supervisor mode
status_bits_mode_wen_nxt = 1'd1;
// disable normal interrupts
status_bits_irq_mask_nxt = 1'd1;
status_bits_irq_mask_wen_nxt = 1'd1;
 
// disable fast interrupts
if ( next_interrupt == 3'd2 ) // FIRQ
begin
status_bits_firq_mask_nxt = 1'd1;
status_bits_firq_mask_wen_nxt = 1'd1;
end
end
 
// previous instruction was ldr
// if it is currently executing in the execute stage do the following
if ( control_state == MEM_WAIT1 && !conflict )
begin
// Save the next instruction to execute later
// Do this even if the ldr instruction does not execute because of Condition
pre_fetch_instruction_wen = 1'd1;
if ( instruction_execute ) // conditional execution state
begin
iaddress_sel_nxt = 4'd3; // pc (not pc + 4)
pc_wen_nxt = 1'd0; // hold current PC value
load_pc_nxt = load_pc_r;
end
end
// completion of ldr instruction
if ( control_state == MEM_WAIT2 )
begin
if ( !dabt ) // dont load data there is an abort on the data read
begin
pc_wen_nxt = 1'd0; // hold current PC value
// Check if the load destination is the PC
if (( type == TRANS && instruction[15:12] == 4'd15 ) ||
( type == MTRANS && instruction[20] && mtrans_reg1 == 4'd15 ))
begin
pc_sel_nxt = 3'd3; // read_data_filtered
iaddress_sel_nxt = 4'd3; // hold value after reading in from mem
load_pc_nxt = load_pc_r;
end
end
end
// second cycle of multiple load or store
if ( control_state == MTRANS_EXEC1 && !conflict )
begin
// Save the next instruction to execute later
pre_fetch_instruction_wen = 1'd1;
if ( instruction_execute ) // conditional execution state
begin
daddress_sel_nxt = 4'd5; // o_address
decode_daccess_nxt = 1'd1; // data access
if ( mtrans_num_registers > 4'd2 )
decode_iaccess_nxt = 1'd0; // skip the instruction fetch
 
 
if ( mtrans_num_registers != 4'd1 )
begin
pc_wen_nxt = 1'd0; // hold current PC value
iaddress_sel_nxt = 4'd3; // pc (not pc + 4)
end
 
if ( !instruction[20] ) // Store
write_data_wen_nxt = 1'd1;
// stm: store the user mode registers, when in priviledged mode
if ( {instruction[22:20]} == 3'b100 )
o_user_mode_regs_store_nxt = 1'd1;
// write to the pc ?
if ( instruction[20] && mtrans_reg1 == 4'd15 ) // Write to PC
begin
saved_current_instruction_wen = 1'd1; // Save the memory access instruction to refer back to later
pc_wen_nxt = 1'd0; // hold current PC value rather than an instruction fetch
load_pc_nxt = 1'd1;
end
end
end
// third cycle of multiple load or store
if ( control_state == MTRANS_EXEC2 )
begin
daddress_sel_nxt = 4'd5; // o_address
decode_daccess_nxt = 1'd1; // data access
 
if ( mtrans_num_registers > 4'd2 )
begin
decode_iaccess_nxt = 1'd0; // skip the instruction fetch
end
 
if ( mtrans_num_registers > 4'd1 )
begin
pc_wen_nxt = 1'd0; // hold current PC value
iaddress_sel_nxt = 4'd3; // pc (not pc + 4)
end
// Store
if ( !instruction[20] )
write_data_wen_nxt = 1'd1;
// stm: store the user mode registers, when in priviledged mode
if ( {instruction[22:20]} == 3'b100 )
o_user_mode_regs_store_nxt = 1'd1;
 
// write to the pc ?
if ( instruction[20] && mtrans_reg1 == 4'd15 ) // Write to PC
begin
saved_current_instruction_wen = 1'd1; // Save the memory access instruction to refer back to later
pc_wen_nxt = 1'd0; // hold current PC value rather than an instruction fetch
load_pc_nxt = 1'd1;
end
end
// state is for when a data abort interrupt is triggered during an ldm
if ( control_state == MTRANS_ABORT )
begin
// Restore the Base Address, if the base register is included in the
// list of registers being loaded
if (restore_base_address) // ldm with base address in register list
begin
reg_write_sel_nxt = 3'd6; // write base_register
reg_bank_wen_nxt = decode ( instruction[19:16] ); // to Rn
end
end
// Multiply or Multiply-Accumulate
if ( control_state == MULT_PROC1 && instruction_execute && !conflict )
begin
// Save the next instruction to execute later
// Do this even if this instruction does not execute because of Condition
pre_fetch_instruction_wen = 1'd1;
pc_wen_nxt = 1'd0; // hold current PC value
multiply_function_nxt = o_multiply_function;
end
 
// Multiply or Multiply-Accumulate
// Do multiplication
// Wait for done or accumulate signal
if ( control_state == MULT_PROC2 )
begin
// Save the next instruction to execute later
// Do this even if this instruction does not execute because of Condition
pc_wen_nxt = 1'd0; // hold current PC value
iaddress_sel_nxt = 4'd3; // pc (not pc + 4)
multiply_function_nxt = o_multiply_function;
end
 
// Save RdLo
// always last cycle of all multiply or multiply accumulate operations
if ( control_state == MULT_STORE )
begin
reg_write_sel_nxt = 3'd2; // multiply_out
multiply_function_nxt = o_multiply_function;
if ( type == MULT ) // 32-bit
reg_bank_wen_nxt = decode (instruction[19:16]); // Rd
else // 64-bit / Long
reg_bank_wen_nxt = decode (instruction[15:12]); // RdLo
if ( instruction[20] ) // the 'S' bit
begin
status_bits_sel_nxt = 3'd4; // { multiply_flags, status_bits_flags[1:0] }
status_bits_flags_wen_nxt = 1'd1;
end
end
 
// Add lower 32 bits to multiplication product
if ( control_state == MULT_ACCUMU )
begin
multiply_function_nxt = o_multiply_function;
pc_wen_nxt = 1'd0; // hold current PC value
iaddress_sel_nxt = 4'd3; // pc (not pc + 4)
end
 
// swp - do write request in 2nd cycle
if ( control_state == SWAP_WRITE && instruction_execute && !conflict )
begin
barrel_shift_data_sel_nxt = 2'd2; // Shift value from Rm register
daddress_sel_nxt = 4'd4; // Rn
write_data_wen_nxt = 1'd1;
decode_iaccess_nxt = 1'd0; // skip the instruction fetch
decode_daccess_nxt = 1'd1; // data access
if ( instruction[22] )
byte_enable_sel_nxt = 2'd1; // Save byte
if ( instruction_execute ) // conditional execution state
pc_wen_nxt = 1'd0; // hold current PC value
// Save the next instruction to execute later
// Do this even if this instruction does not execute because of Condition
pre_fetch_instruction_wen = 1'd1;
load_pc_nxt = load_pc_r;
end
 
// swp - receive read response in 3rd cycle
if ( control_state == SWAP_WAIT1 )
begin
 
if ( instruction_execute ) // conditional execution state
begin
iaddress_sel_nxt = 4'd3; // pc (not pc + 4)
pc_wen_nxt = 1'd0; // hold current PC value
end
if ( !dabt )
begin
// Check is the load destination is the PC
if ( instruction[15:12] == 4'd15 )
begin
pc_sel_nxt = 3'd3; // read_data_filtered
iaddress_sel_nxt = 4'd3; // hold value after reading in from mem
load_pc_nxt = load_pc_r;
end
end
end
 
// 1 cycle delay for Co-Processor Register access
if ( control_state == COPRO_WAIT && instruction_execute && !conflict )
begin
pre_fetch_instruction_wen = 1'd1;
if ( instruction[20] ) // mrc instruction
begin
// Check is the load destination is the PC
if ( instruction[15:12] == 4'd15 )
begin
// If r15 is specified for <Rd>, the condition code flags are
// updated instead of a general-purpose register.
status_bits_sel_nxt = 3'd3; // i_copro_data
status_bits_flags_wen_nxt = 1'd1;
// Can't change these in USR mode
if ( i_execute_status_bits[1:0] != USR )
begin
status_bits_mode_wen_nxt = 1'd1;
status_bits_irq_mask_wen_nxt = 1'd1;
status_bits_firq_mask_wen_nxt = 1'd1;
end
end
else
reg_bank_wen_nxt = decode (instruction[15:12]);
reg_write_sel_nxt = 3'd5; // i_copro_data
end
else // mcr instruction
begin
copro_operation_nxt = 2'd2; // Register transfer to Co-Processor
end
end
 
// Have just changed the status_bits mode but this
// creates a 1 cycle gap with the old mode
// coming back from execute into instruction_decode
// So squash that old mode value during this
// cycle of the interrupt transition
if ( control_state == INT_WAIT1 )
status_bits_mode_nxt = o_status_bits_mode; // Supervisor mode
 
end
 
 
// Speed up the long path from u_decode/fetch_instruction_r to u_register_bank/r8_firq
// This pre-encodes the firq_s3 signal thats used in u_register_bank
// assign firq_not_user_mode_nxt = !user_mode_regs_load_nxt && status_bits_mode_nxt == FIRQ;
assign firq_not_user_mode_nxt = status_bits_mode_nxt == FIRQ;
 
 
// ========================================================
// Next State Logic
// ========================================================
 
// this replicates the current value of the execute signal in the execute stage
assign instruction_execute = conditional_execute ( o_condition, i_execute_status_bits[31:28] );
 
 
// First state of executing a new instruction
// Its complex because of conditional execution of multi-cycle instructions
assign instruction_valid = ((control_state == EXECUTE || control_state == PRE_FETCH_EXEC) ||
// when last instruction was multi-cycle instruction but did not execute
// because condition was false then act like you're in the execute state
(!instruction_execute && (control_state == PC_STALL1 ||
control_state == MEM_WAIT1 ||
control_state == COPRO_WAIT ||
control_state == SWAP_WRITE ||
control_state == MULT_PROC1 ||
control_state == MTRANS_EXEC1 ) ));
 
 
always @*
begin
// default is to hold the current state
control_state_nxt = control_state;
// Note: The order is important here
if ( control_state == RST_WAIT1 ) control_state_nxt = RST_WAIT2;
if ( control_state == RST_WAIT2 ) control_state_nxt = EXECUTE;
if ( control_state == INT_WAIT1 ) control_state_nxt = INT_WAIT2;
if ( control_state == INT_WAIT2 ) control_state_nxt = EXECUTE;
if ( control_state == COPRO_WAIT ) control_state_nxt = PRE_FETCH_EXEC;
if ( control_state == PC_STALL1 ) control_state_nxt = PC_STALL2;
if ( control_state == PC_STALL2 ) control_state_nxt = EXECUTE;
if ( control_state == SWAP_WRITE ) control_state_nxt = SWAP_WAIT1;
if ( control_state == SWAP_WAIT1 ) control_state_nxt = SWAP_WAIT2;
if ( control_state == MULT_STORE ) control_state_nxt = PRE_FETCH_EXEC;
if ( control_state == MTRANS_ABORT ) control_state_nxt = PRE_FETCH_EXEC;
 
if ( control_state == MEM_WAIT1 )
control_state_nxt = MEM_WAIT2;
 
if ( control_state == MEM_WAIT2 ||
control_state == SWAP_WAIT2 )
begin
if ( write_pc ) // writing to the PC!!
control_state_nxt = PC_STALL1;
else
control_state_nxt = PRE_FETCH_EXEC;
end
if ( control_state == MTRANS_EXEC1 )
begin
if ( mtrans_instruction_nxt[15:0] != 16'd0 )
control_state_nxt = MTRANS_EXEC2;
else // if the register list holds a single register
begin
if ( dabt ) // data abort
control_state_nxt = MTRANS_ABORT;
else if ( write_pc ) // writing to the PC!!
control_state_nxt = MEM_WAIT1;
else
control_state_nxt = PRE_FETCH_EXEC;
end
end
// Stay in State MTRANS_EXEC2 until the full list of registers to
// load or store has been processed
if ( control_state == MTRANS_EXEC2 && mtrans_num_registers == 5'd1 )
begin
if ( dabt ) // data abort
control_state_nxt = MTRANS_ABORT;
else if ( write_pc ) // writing to the PC!!
control_state_nxt = MEM_WAIT1;
else
control_state_nxt = PRE_FETCH_EXEC;
end
if ( control_state == MULT_PROC1 )
begin
if (!instruction_execute)
control_state_nxt = PRE_FETCH_EXEC;
else
control_state_nxt = MULT_PROC2;
end
if ( control_state == MULT_PROC2 )
begin
if ( i_multiply_done )
if ( o_multiply_function[1] ) // Accumulate ?
control_state_nxt = MULT_ACCUMU;
else
control_state_nxt = MULT_STORE;
end
if ( control_state == MULT_ACCUMU )
begin
control_state_nxt = MULT_STORE;
end
// This should come at the end, so that conditional execution works
// correctly
if ( instruction_valid )
begin
// default is to stay in execute state, or to move into this
// state from a conditional execute state
control_state_nxt = EXECUTE;
if ( current_write_pc )
control_state_nxt = PC_STALL1;
 
if ( load_op && instruction[15:12] == 4'd15 ) // load new PC value
control_state_nxt = MEM_WAIT1;
 
// ldm rx, {pc}
if ( type == MTRANS && instruction[20] && mtrans_reg1 == 4'd15 ) // Write to PC
control_state_nxt = MEM_WAIT1;
 
if ( type == MTRANS && !conflict && mtrans_num_registers != 5'd0 && mtrans_num_registers != 5'd1 )
control_state_nxt = MTRANS_EXEC1;
 
if ( type == MULT && !conflict )
control_state_nxt = MULT_PROC1;
 
if ( type == SWAP && !conflict )
control_state_nxt = SWAP_WRITE;
 
if ( type == CORTRANS && !und_request && !conflict )
control_state_nxt = COPRO_WAIT;
// interrupt overrides everything else so its last
if ( interrupt && !conflict )
control_state_nxt = INT_WAIT1;
end
end
 
 
// ========================================================
// Register Update
// ========================================================
always @ ( posedge i_clk )
if ( !i_access_stall )
begin
if (!conflict)
begin
fetch_instruction_r <= i_fetch_instruction;
fetch_address_r <= i_execute_iaddress;
iabt_reg <= i_iabt;
adex_reg <= i_adex;
abt_status_reg <= i_abt_status;
end
 
o_status_bits_mode <= status_bits_mode_nxt;
o_status_bits_irq_mask <= status_bits_irq_mask_nxt;
o_status_bits_firq_mask <= status_bits_firq_mask_nxt;
o_imm32 <= imm32_nxt;
o_imm_shift_amount <= imm_shift_amount_nxt;
o_shift_imm_zero <= shift_imm_zero_nxt;
// when have an interrupt, execute the interrupt operation
// unconditionally in the execute stage
// ensures that status_bits register gets updated correctly
// Likewise when in middle of multi-cycle instructions
// execute them unconditionally
o_condition <= instruction_valid && !interrupt ? condition_nxt : AL;
o_decode_exclusive <= decode_exclusive_nxt;
o_decode_iaccess <= decode_iaccess_nxt;
o_decode_daccess <= decode_daccess_nxt;
o_rm_sel <= rm_sel_nxt;
o_rs_sel <= rs_sel_nxt;
o_load_rd <= load_rd_nxt;
load_rd_d1 <= load_rd_d1_nxt;
load_pc_r <= load_pc_nxt;
o_rn_sel <= rn_sel_nxt;
o_barrel_shift_amount_sel <= barrel_shift_amount_sel_nxt;
o_barrel_shift_data_sel <= barrel_shift_data_sel_nxt;
o_barrel_shift_function <= barrel_shift_function_nxt;
o_alu_function <= alu_function_nxt;
o_multiply_function <= multiply_function_nxt;
o_interrupt_vector_sel <= next_interrupt;
o_iaddress_sel <= iaddress_sel_nxt;
o_daddress_sel <= daddress_sel_nxt;
o_pc_sel <= pc_sel_nxt;
o_byte_enable_sel <= byte_enable_sel_nxt;
o_status_bits_sel <= status_bits_sel_nxt;
o_reg_write_sel <= reg_write_sel_nxt;
o_firq_not_user_mode <= firq_not_user_mode_nxt;
o_write_data_wen <= write_data_wen_nxt;
o_base_address_wen <= base_address_wen_nxt;
o_pc_wen <= pc_wen_nxt;
o_reg_bank_wen <= reg_bank_wen_nxt;
o_status_bits_flags_wen <= status_bits_flags_wen_nxt;
o_status_bits_mode_wen <= status_bits_mode_wen_nxt;
o_status_bits_irq_mask_wen <= status_bits_irq_mask_wen_nxt;
o_status_bits_firq_mask_wen <= status_bits_firq_mask_wen_nxt;
o_copro_opcode1 <= instruction[23:21];
o_copro_opcode2 <= instruction[7:5];
o_copro_crn <= instruction[19:16];
o_copro_crm <= instruction[3:0];
o_copro_num <= instruction[11:8];
o_copro_operation <= copro_operation_nxt;
o_copro_write_data_wen <= copro_write_data_wen_nxt;
mtrans_r15 <= mtrans_r15_nxt;
restore_base_address <= restore_base_address_nxt;
control_state <= control_state_nxt;
end
 
 
 
always @ ( posedge i_clk )
if ( !i_access_stall )
begin
// sometimes this is a pre-fetch instruction
// e.g. two ldr instructions in a row. The second ldr will be saved
// to the pre-fetch instruction register
// then when its decoded, a copy is saved to the saved_current_instruction
// register
if ( type == MTRANS )
begin
saved_current_instruction <= mtrans_instruction_nxt;
saved_current_instruction_iabt <= instruction_iabt;
saved_current_instruction_adex <= instruction_adex;
saved_current_instruction_address <= instruction_address;
saved_current_instruction_iabt_status <= instruction_iabt_status;
end
else if ( saved_current_instruction_wen )
begin
saved_current_instruction <= instruction;
saved_current_instruction_iabt <= instruction_iabt;
saved_current_instruction_adex <= instruction_adex;
saved_current_instruction_address <= instruction_address;
saved_current_instruction_iabt_status <= instruction_iabt_status;
end
 
if ( pre_fetch_instruction_wen )
begin
pre_fetch_instruction <= fetch_instruction_r;
pre_fetch_instruction_iabt <= iabt_reg;
pre_fetch_instruction_adex <= adex_reg;
pre_fetch_instruction_address <= fetch_address_r;
pre_fetch_instruction_iabt_status <= abt_status_reg;
end
hold_instruction <= instruction;
hold_instruction_iabt <= instruction_iabt;
hold_instruction_adex <= instruction_adex;
hold_instruction_address <= instruction_address;
hold_instruction_iabt_status <= instruction_iabt_status;
end
 
 
always @ ( posedge i_clk )
if ( !i_access_stall )
begin
irq <= i_irq;
firq <= i_firq;
if ( control_state == INT_WAIT1 && o_status_bits_mode == SVC )
begin
dabt_reg <= 1'd0;
end
else
begin
dabt_reg <= dabt_reg || i_dabt;
end
dabt_reg_d1 <= dabt_reg;
end
 
assign dabt = dabt_reg || i_dabt;
 
 
// ========================================================
// Decompiler for debugging core - not synthesizable
// ========================================================
//synopsys translate_off
 
`include "debug_functions.v"
 
a25_decompile u_decompile (
.i_clk ( i_clk ),
.i_access_stall ( i_access_stall ),
.i_instruction ( instruction ),
.i_instruction_valid ( instruction_valid &&!conflict ),
.i_instruction_execute ( instruction_execute ),
.i_instruction_address ( instruction_address ),
.i_interrupt ( {3{interrupt}} & next_interrupt ),
.i_interrupt_state ( control_state == INT_WAIT2 ),
.i_instruction_undefined ( und_request ),
.i_pc_sel ( o_pc_sel ),
.i_pc_wen ( o_pc_wen )
);
 
 
wire [(15*8)-1:0] xCONTROL_STATE;
wire [(15*8)-1:0] xMODE;
wire [( 8*8)-1:0] xTYPE;
 
assign xCONTROL_STATE =
control_state == RST_WAIT1 ? "RST_WAIT1" :
control_state == RST_WAIT2 ? "RST_WAIT2" :
 
 
control_state == INT_WAIT1 ? "INT_WAIT1" :
control_state == INT_WAIT2 ? "INT_WAIT2" :
control_state == EXECUTE ? "EXECUTE" :
control_state == PRE_FETCH_EXEC ? "PRE_FETCH_EXEC" :
control_state == MEM_WAIT1 ? "MEM_WAIT1" :
control_state == MEM_WAIT2 ? "MEM_WAIT2" :
control_state == PC_STALL1 ? "PC_STALL1" :
control_state == PC_STALL2 ? "PC_STALL2" :
control_state == MTRANS_EXEC1 ? "MTRANS_EXEC1" :
control_state == MTRANS_EXEC2 ? "MTRANS_EXEC2" :
control_state == MTRANS_ABORT ? "MTRANS_ABORT" :
control_state == MULT_PROC1 ? "MULT_PROC1" :
control_state == MULT_PROC2 ? "MULT_PROC2" :
control_state == MULT_STORE ? "MULT_STORE" :
control_state == MULT_ACCUMU ? "MULT_ACCUMU" :
control_state == SWAP_WRITE ? "SWAP_WRITE" :
control_state == SWAP_WAIT1 ? "SWAP_WAIT1" :
control_state == SWAP_WAIT2 ? "SWAP_WAIT2" :
control_state == COPRO_WAIT ? "COPRO_WAIT" :
"UNKNOWN " ;
 
assign xMODE = mode_name ( o_status_bits_mode );
 
assign xTYPE =
type == REGOP ? "REGOP" :
type == MULT ? "MULT" :
type == SWAP ? "SWAP" :
type == TRANS ? "TRANS" :
type == MTRANS ? "MTRANS" :
type == BRANCH ? "BRANCH" :
type == CODTRANS ? "CODTRANS" :
type == COREGOP ? "COREGOP" :
type == CORTRANS ? "CORTRANS" :
type == SWI ? "SWI" :
"UNKNOWN" ;
 
 
always @( posedge i_clk )
if (control_state == EXECUTE && ((instruction[0] === 1'bx) || (instruction[31] === 1'bx)))
begin
`TB_ERROR_MESSAGE
$display("Instruction with x's =%08h", instruction);
end
//synopsys translate_on
 
endmodule
 
 
/amber25/a25_core.v
0,0 → 1,495
//////////////////////////////////////////////////////////////////
// //
// Amber 25 Core top-Level module //
// //
// This file is part of the Amber project //
// http://www.opencores.org/project,amber //
// //
// Description //
// Instantiates the core consisting of fetch, instruction //
// decode, execute, and co-processor. //
// //
// Author(s): //
// - Conor Santifort, csantifort.amber@gmail.com //
// //
//////////////////////////////////////////////////////////////////
// //
// Copyright (C) 2011 Authors and OPENCORES.ORG //
// //
// This source file may be used and distributed without //
// restriction provided that this copyright statement is not //
// removed from the file and that any derivative work contains //
// the original copyright notice and the associated disclaimer. //
// //
// This source file is free software; you can redistribute it //
// and/or modify it under the terms of the GNU Lesser General //
// Public License as published by the Free Software Foundation; //
// either version 2.1 of the License, or (at your option) any //
// later version. //
// //
// This source is distributed in the hope that it will be //
// useful, but WITHOUT ANY WARRANTY; without even the implied //
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR //
// PURPOSE. See the GNU Lesser General Public License for more //
// details. //
// //
// You should have received a copy of the GNU Lesser General //
// Public License along with this source; if not, download it //
// from http://www.opencores.org/lgpl.shtml //
// //
//////////////////////////////////////////////////////////////////
 
 
module a25_core
(
input i_clk,
 
input i_irq, // Interrupt request, active high
input i_firq, // Fast Interrupt request, active high
 
input i_system_rdy, // Amber is stalled when this is low
 
// Wishbone Master I/F
output [31:0] o_wb_adr,
output [3:0] o_wb_sel,
output o_wb_we,
input [31:0] i_wb_dat,
output [31:0] o_wb_dat,
output o_wb_cyc,
output o_wb_stb,
input i_wb_ack,
input i_wb_err
 
);
 
wire [31:0] execute_iaddress;
wire execute_iaddress_valid;
wire [31:0] execute_iaddress_nxt; // un-registered version of execute_address
// to the instruction cache rams
wire [31:0] execute_daddress;
wire execute_daddress_valid;
wire [31:0] execute_daddress_nxt; // un-registered version of execute_daddress
// to the data cache rams
wire [31:0] write_data;
wire write_enable;
wire [31:0] fetch_instruction;
// wire priviledged;
wire decode_exclusive;
wire decode_iaccess;
wire decode_daccess;
wire [3:0] byte_enable;
wire exclusive; // swap access
wire cache_enable; // Enabel the cache
wire cache_flush; // Flush the cache
wire [31:0] cacheable_area;
 
wire fetch_stall;
wire mem_stall;
wire access_stall;
 
wire [1:0] status_bits_mode;
wire status_bits_irq_mask;
wire status_bits_firq_mask;
wire status_bits_flags_wen;
wire status_bits_mode_wen;
wire status_bits_irq_mask_wen;
wire status_bits_firq_mask_wen;
wire [31:0] execute_status_bits;
wire [31:0] imm32;
wire [4:0] imm_shift_amount;
wire shift_imm_zero;
wire [3:0] condition;
 
wire [3:0] rm_sel;
wire [3:0] rs_sel;
wire [7:0] decode_load_rd;
wire [7:0] exec_load_rd;
wire [3:0] rn_sel;
wire [1:0] barrel_shift_amount_sel;
wire [1:0] barrel_shift_data_sel;
wire [1:0] barrel_shift_function;
wire [8:0] alu_function;
wire [1:0] multiply_function;
wire [2:0] interrupt_vector_sel;
wire [3:0] iaddress_sel;
wire [3:0] daddress_sel;
wire [2:0] pc_sel;
wire [1:0] byte_enable_sel;
wire [2:0] status_bits_sel;
wire [2:0] reg_write_sel;
// wire user_mode_regs_load;
wire user_mode_regs_store_nxt;
wire firq_not_user_mode;
 
wire write_data_wen;
wire copro_write_data_wen;
wire base_address_wen;
wire pc_wen;
wire [14:0] reg_bank_wen;
 
wire [2:0] copro_opcode1;
wire [2:0] copro_opcode2;
wire [3:0] copro_crn;
wire [3:0] copro_crm;
wire [3:0] copro_num;
wire [1:0] copro_operation;
wire [31:0] copro_read_data;
wire [31:0] copro_write_data;
wire multiply_done;
 
wire decode_fault;
wire iabt_trigger;
wire dabt_trigger;
 
wire [7:0] decode_fault_status;
wire [7:0] iabt_fault_status;
wire [7:0] dabt_fault_status;
 
wire [31:0] decode_fault_address;
wire [31:0] iabt_fault_address;
wire [31:0] dabt_fault_address;
 
wire adex;
 
wire [31:0] mem_read_data;
wire mem_read_data_valid;
wire [9:0] mem_load_rd;
 
wire [31:0] wb_read_data;
wire wb_read_data_valid;
wire [9:0] wb_load_rd;
wire dcache_wb_cached_req;
wire dcache_wb_uncached_req;
wire dcache_wb_qword;
wire dcache_wb_write;
wire [3:0] dcache_wb_byte_enable;
wire [31:0] dcache_wb_address;
wire [31:0] dcache_wb_read_data;
wire [31:0] dcache_wb_write_data;
wire dcache_wb_cached_ready;
wire dcache_wb_uncached_ready;
wire [31:0] icache_wb_address;
wire icache_wb_req;
wire icache_wb_qword;
wire [31:0] icache_wb_adr;
wire [31:0] icache_wb_read_data;
wire icache_wb_ready;
 
wire conflict;
 
 
// data abort has priority
assign decode_fault_status = dabt_trigger ? dabt_fault_status : iabt_fault_status;
assign decode_fault_address = dabt_trigger ? dabt_fault_address : iabt_fault_address;
assign decode_fault = dabt_trigger | iabt_trigger;
 
assign access_stall = fetch_stall || mem_stall;
 
// ======================================
// Fetch Stage
// ======================================
a25_fetch u_fetch (
.i_clk ( i_clk ),
.i_mem_stall ( mem_stall ),
.i_conflict ( conflict ),
.i_system_rdy ( i_system_rdy ),
.o_fetch_stall ( fetch_stall ),
 
.i_iaddress ( {execute_iaddress[31:2], 2'd0} ),
.i_iaddress_valid ( execute_iaddress_valid ),
.i_iaddress_nxt ( execute_iaddress_nxt ),
.o_fetch_instruction ( fetch_instruction ),
.i_cache_enable ( cache_enable ),
.i_cache_flush ( cache_flush ),
.i_cacheable_area ( cacheable_area ),
 
 
.o_wb_req ( icache_wb_req ),
.o_wb_qword ( icache_wb_qword ),
.o_wb_address ( icache_wb_address ),
.i_wb_read_data ( icache_wb_read_data ),
.i_wb_ready ( icache_wb_ready )
);
 
 
// ======================================
// Decode Stage
// ======================================
a25_decode u_decode (
.i_clk ( i_clk ),
.i_access_stall ( access_stall ),
// Instruction fetch or data read signals
.i_fetch_instruction ( fetch_instruction ),
.i_execute_iaddress ( execute_iaddress ),
.i_execute_daddress ( execute_daddress ),
.i_adex ( adex ),
.i_iabt ( 1'd0 ),
.i_dabt ( 1'd0 ),
.i_abt_status ( 8'd0 ),
.i_irq ( i_irq ),
.i_firq ( i_firq ),
.i_execute_status_bits ( execute_status_bits ),
.i_multiply_done ( multiply_done ),
.o_status_bits_mode ( status_bits_mode ),
.o_status_bits_irq_mask ( status_bits_irq_mask ),
.o_status_bits_firq_mask ( status_bits_firq_mask ),
.o_imm32 ( imm32 ),
.o_imm_shift_amount ( imm_shift_amount ),
.o_shift_imm_zero ( shift_imm_zero ),
.o_condition ( condition ),
.o_decode_exclusive ( decode_exclusive ),
.o_decode_iaccess ( decode_iaccess ),
.o_decode_daccess ( decode_daccess ),
.o_rm_sel ( rm_sel ),
.o_rs_sel ( rs_sel ),
.o_load_rd ( decode_load_rd ),
.o_rn_sel ( rn_sel ),
.o_barrel_shift_amount_sel ( barrel_shift_amount_sel ),
.o_barrel_shift_data_sel ( barrel_shift_data_sel ),
.o_barrel_shift_function ( barrel_shift_function ),
.o_alu_function ( alu_function ),
.o_multiply_function ( multiply_function ),
.o_interrupt_vector_sel ( interrupt_vector_sel ),
.o_iaddress_sel ( iaddress_sel ),
.o_daddress_sel ( daddress_sel ),
.o_pc_sel ( pc_sel ),
.o_byte_enable_sel ( byte_enable_sel ),
.o_status_bits_sel ( status_bits_sel ),
.o_reg_write_sel ( reg_write_sel ),
// .o_user_mode_regs_load ( user_mode_regs_load ),
.o_user_mode_regs_store_nxt ( user_mode_regs_store_nxt ),
.o_firq_not_user_mode ( firq_not_user_mode ),
.o_write_data_wen ( write_data_wen ),
.o_base_address_wen ( base_address_wen ),
.o_pc_wen ( pc_wen ),
.o_reg_bank_wen ( reg_bank_wen ),
.o_status_bits_flags_wen ( status_bits_flags_wen ),
.o_status_bits_mode_wen ( status_bits_mode_wen ),
.o_status_bits_irq_mask_wen ( status_bits_irq_mask_wen ),
.o_status_bits_firq_mask_wen ( status_bits_firq_mask_wen ),
.o_copro_opcode1 ( copro_opcode1 ),
.o_copro_opcode2 ( copro_opcode2 ),
.o_copro_crn ( copro_crn ),
.o_copro_crm ( copro_crm ),
.o_copro_num ( copro_num ),
.o_copro_operation ( copro_operation ),
.o_copro_write_data_wen ( copro_write_data_wen ),
.o_iabt_trigger ( iabt_trigger ),
.o_iabt_address ( iabt_fault_address ),
.o_iabt_status ( iabt_fault_status ),
.o_dabt_trigger ( dabt_trigger ),
.o_dabt_address ( dabt_fault_address ),
.o_dabt_status ( dabt_fault_status ),
.o_conflict ( conflict )
);
 
 
// ======================================
// Execute Stage
// ======================================
a25_execute u_execute (
.i_clk ( i_clk ),
.i_access_stall ( access_stall ),
.i_mem_stall ( mem_stall ),
.i_wb_read_data ( wb_read_data ),
.i_wb_read_data_valid ( wb_read_data_valid ),
.i_wb_load_rd ( wb_load_rd ),
 
.i_copro_read_data ( copro_read_data ),
.o_write_data ( write_data ),
.o_copro_write_data ( copro_write_data ),
.o_iaddress ( execute_iaddress ),
.o_iaddress_valid ( execute_iaddress_valid ),
.o_iaddress_nxt ( execute_iaddress_nxt ),
.o_daddress ( execute_daddress ),
.o_daddress_nxt ( execute_daddress_nxt ),
.o_daddress_valid ( execute_daddress_valid ),
.o_byte_enable ( byte_enable ),
.o_write_enable ( write_enable ),
.o_exclusive ( exclusive ),
.o_priviledged ( ),
.o_exec_load_rd ( exec_load_rd ),
 
.o_adex ( adex ),
.o_status_bits ( execute_status_bits ),
.o_multiply_done ( multiply_done ),
 
.i_status_bits_mode ( status_bits_mode ),
.i_status_bits_irq_mask ( status_bits_irq_mask ),
.i_status_bits_firq_mask ( status_bits_firq_mask ),
.i_imm32 ( imm32 ),
.i_imm_shift_amount ( imm_shift_amount ),
.i_shift_imm_zero ( shift_imm_zero ),
.i_condition ( condition ),
.i_decode_exclusive ( decode_exclusive ),
.i_decode_iaccess ( decode_iaccess ),
.i_decode_daccess ( decode_daccess ),
.i_rm_sel ( rm_sel ),
.i_rs_sel ( rs_sel ),
.i_decode_load_rd ( decode_load_rd ),
.i_rn_sel ( rn_sel ),
.i_barrel_shift_amount_sel ( barrel_shift_amount_sel ),
.i_barrel_shift_data_sel ( barrel_shift_data_sel ),
.i_barrel_shift_function ( barrel_shift_function ),
.i_alu_function ( alu_function ),
.i_multiply_function ( multiply_function ),
.i_interrupt_vector_sel ( interrupt_vector_sel ),
.i_iaddress_sel ( iaddress_sel ),
.i_daddress_sel ( daddress_sel ),
.i_pc_sel ( pc_sel ),
.i_byte_enable_sel ( byte_enable_sel ),
.i_status_bits_sel ( status_bits_sel ),
.i_reg_write_sel ( reg_write_sel ),
// .i_user_mode_regs_load ( user_mode_regs_load ),
.i_user_mode_regs_store_nxt ( user_mode_regs_store_nxt ),
.i_firq_not_user_mode ( firq_not_user_mode ),
.i_write_data_wen ( write_data_wen ),
.i_base_address_wen ( base_address_wen ),
.i_pc_wen ( pc_wen ),
.i_reg_bank_wen ( reg_bank_wen ),
.i_status_bits_flags_wen ( status_bits_flags_wen ),
.i_status_bits_mode_wen ( status_bits_mode_wen ),
.i_status_bits_irq_mask_wen ( status_bits_irq_mask_wen ),
.i_status_bits_firq_mask_wen ( status_bits_firq_mask_wen ),
.i_copro_write_data_wen ( copro_write_data_wen ),
.i_conflict ( conflict )
);
 
 
// ======================================
// Memory access stage with data cache
// ======================================
a25_mem u_mem (
.i_clk ( i_clk ),
.i_fetch_stall ( fetch_stall ),
.o_mem_stall ( mem_stall ),
.i_daddress ( execute_daddress ),
.i_daddress_valid ( execute_daddress_valid ),
.i_daddress_nxt ( execute_daddress_nxt ),
.i_write_data ( write_data ),
.i_write_enable ( write_enable ),
.i_byte_enable ( byte_enable ),
.i_exclusive ( exclusive ),
.i_exec_load_rd ( exec_load_rd ),
 
.o_mem_read_data ( mem_read_data ),
.o_mem_read_data_valid ( mem_read_data_valid ),
.o_mem_load_rd ( mem_load_rd ),
 
.i_cache_enable ( cache_enable ),
.i_cache_flush ( cache_flush ),
.i_cacheable_area ( cacheable_area ),
.o_wb_cached_req ( dcache_wb_cached_req ),
.o_wb_uncached_req ( dcache_wb_uncached_req ),
.o_wb_qword ( dcache_wb_qword ),
.o_wb_write ( dcache_wb_write ),
.o_wb_write_data ( dcache_wb_write_data ),
.o_wb_byte_enable ( dcache_wb_byte_enable ),
.o_wb_address ( dcache_wb_address ),
.i_wb_read_data ( dcache_wb_read_data ),
.i_wb_cached_ready ( dcache_wb_cached_ready ),
.i_wb_uncached_ready ( dcache_wb_uncached_ready )
);
 
 
// ======================================
// Write back stage with data cache
// ======================================
a25_write_back u_write_back (
.i_clk ( i_clk ),
.i_mem_stall ( mem_stall ),
 
.i_daddress ( execute_daddress ),
.i_daddress_valid ( execute_daddress_valid ),
 
.i_mem_read_data ( mem_read_data ),
.i_mem_read_data_valid ( mem_read_data_valid ),
.i_mem_load_rd ( mem_load_rd ),
 
.o_wb_read_data ( wb_read_data ),
.o_wb_read_data_valid ( wb_read_data_valid ),
.o_wb_load_rd ( wb_load_rd )
);
 
 
 
// ======================================
// Wishbone Master I/F
// ======================================
a25_wishbone u_wishbone (
// CPU Side
.i_clk ( i_clk ),
// Instruction Cache Accesses
.i_icache_req ( icache_wb_req ),
.i_icache_qword ( icache_wb_qword ),
.i_icache_address ( icache_wb_address ),
.o_icache_read_data ( icache_wb_read_data ),
.o_icache_ready ( icache_wb_ready ),
 
// Data Cache Accesses
.i_exclusive ( exclusive ),
.i_dcache_cached_req ( dcache_wb_cached_req ),
.i_dcache_uncached_req ( dcache_wb_uncached_req ),
.i_dcache_qword ( dcache_wb_qword ),
.i_dcache_write ( dcache_wb_write ),
.i_dcache_write_data ( dcache_wb_write_data ),
.i_dcache_byte_enable ( dcache_wb_byte_enable ),
.i_dcache_address ( dcache_wb_address ),
.o_dcache_read_data ( dcache_wb_read_data ),
.o_dcache_cached_ready ( dcache_wb_cached_ready ),
.o_dcache_uncached_ready ( dcache_wb_uncached_ready ),
 
.o_wb_adr ( o_wb_adr ),
.o_wb_sel ( o_wb_sel ),
.o_wb_we ( o_wb_we ),
.i_wb_dat ( i_wb_dat ),
.o_wb_dat ( o_wb_dat ),
.o_wb_cyc ( o_wb_cyc ),
.o_wb_stb ( o_wb_stb ),
.i_wb_ack ( i_wb_ack ),
.i_wb_err ( i_wb_err )
);
 
 
 
// ======================================
// Co-Processor #15
// ======================================
a25_coprocessor u_coprocessor (
.i_clk ( i_clk ),
.i_access_stall ( access_stall ),
.i_copro_opcode1 ( copro_opcode1 ),
.i_copro_opcode2 ( copro_opcode2 ),
.i_copro_crn ( copro_crn ),
.i_copro_crm ( copro_crm ),
.i_copro_num ( copro_num ),
.i_copro_operation ( copro_operation ),
.i_copro_write_data ( copro_write_data ),
.i_fault ( decode_fault ),
.i_fault_status ( decode_fault_status ),
.i_fault_address ( decode_fault_address ),
.o_copro_read_data ( copro_read_data ),
.o_cache_enable ( cache_enable ),
.o_cache_flush ( cache_flush ),
.o_cacheable_area ( cacheable_area )
);
 
 
endmodule
 
/amber25/a25_fetch.v
0,0 → 1,137
//////////////////////////////////////////////////////////////////
// //
// Fetch - Instantiates the fetch stage sub-modules of //
// the Amber 25 Core //
// //
// This file is part of the Amber project //
// http://www.opencores.org/project,amber //
// //
// Description //
// Instantiates the Cache and Wishbone I/F //
// Also contains a little bit of logic to decode memory //
// accesses to decide if they are cached or not //
// //
// Author(s): //
// - Conor Santifort, csantifort.amber@gmail.com //
// //
//////////////////////////////////////////////////////////////////
// //
// Copyright (C) 2011 Authors and OPENCORES.ORG //
// //
// This source file may be used and distributed without //
// restriction provided that this copyright statement is not //
// removed from the file and that any derivative work contains //
// the original copyright notice and the associated disclaimer. //
// //
// This source file is free software; you can redistribute it //
// and/or modify it under the terms of the GNU Lesser General //
// Public License as published by the Free Software Foundation; //
// either version 2.1 of the License, or (at your option) any //
// later version. //
// //
// This source is distributed in the hope that it will be //
// useful, but WITHOUT ANY WARRANTY; without even the implied //
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR //
// PURPOSE. See the GNU Lesser General Public License for more //
// details. //
// //
// You should have received a copy of the GNU Lesser General //
// Public License along with this source; if not, download it //
// from http://www.opencores.org/lgpl.shtml //
// //
//////////////////////////////////////////////////////////////////
 
 
module a25_fetch
(
input i_clk,
input i_mem_stall,
input i_conflict, // Decode stage stall pipeline because of an instruction conflict
output o_fetch_stall, // when this is asserted all registers
// in decode and exec stages are frozen
input i_system_rdy, // External system can stall core with this signal
 
input [31:0] i_iaddress,
input i_iaddress_valid,
input [31:0] i_iaddress_nxt, // un-registered version of address to the cache rams
output [31:0] o_fetch_instruction,
 
input i_cache_enable, // cache enable
input i_cache_flush, // cache flush
input [31:0] i_cacheable_area, // each bit corresponds to 2MB address space
 
output o_wb_req,
output o_wb_qword, // High for a quad-word fetch request
output [31:0] o_wb_address,
input [31:0] i_wb_read_data,
input i_wb_ready
 
);
 
`include "memory_configuration.v"
 
wire core_stall;
wire cache_stall;
wire [31:0] cache_read_data;
wire sel_cache;
wire uncached_instruction_read;
wire address_cachable;
wire icache_wb_req;
wire wait_wb;
reg wb_req_r = 'd0;
 
// ======================================
// Memory Decode
// ======================================
assign address_cachable = in_cachable_mem( i_iaddress ) && i_cacheable_area[i_iaddress[25:21]];
 
assign sel_cache = address_cachable && i_iaddress_valid && i_cache_enable;
 
// Don't start wishbone transfers when the cache is stalling the core
// The cache stalls the core during its initialization sequence
assign uncached_instruction_read = !sel_cache && i_iaddress_valid && !(cache_stall);
 
// Return read data either from the wishbone bus or the cache
assign o_fetch_instruction = sel_cache ? cache_read_data :
uncached_instruction_read ? i_wb_read_data :
32'hffeeddcc ;
 
// Stall the instruction decode and execute stages of the core
// when the fetch stage needs more than 1 cycle to return the requested
// read data
assign o_fetch_stall = !i_system_rdy || wait_wb || cache_stall;
 
assign o_wb_address = i_iaddress;
assign o_wb_req = icache_wb_req || uncached_instruction_read;
assign o_wb_qword = icache_wb_req;
 
assign wait_wb = (o_wb_req || wb_req_r) && !i_wb_ready;
 
always @(posedge i_clk)
wb_req_r <= o_wb_req && !i_wb_ready;
 
assign core_stall = o_fetch_stall || i_mem_stall || i_conflict;
 
// ======================================
// L1 Instruction Cache
// ======================================
a25_icache u_cache (
.i_clk ( i_clk ),
.i_core_stall ( core_stall ),
.o_stall ( cache_stall ),
.i_select ( sel_cache ),
.i_address ( i_iaddress ),
.i_address_nxt ( i_iaddress_nxt ),
.i_cache_enable ( i_cache_enable ),
.i_cache_flush ( i_cache_flush ),
.o_read_data ( cache_read_data ),
.o_wb_req ( icache_wb_req ),
.i_wb_read_data ( i_wb_read_data ),
.i_wb_ready ( i_wb_ready )
);
 
 
endmodule
 
/amber25/a25_localparams.v
0,0 → 1,117
//////////////////////////////////////////////////////////////////
// //
// Parameters file for Amber 25 Core //
// //
// This file is part of the Amber project //
// http://www.opencores.org/project,amber //
// //
// Description //
// Holds general parameters that are used is several core //
// modules //
// //
// Author(s): //
// - Conor Santifort, csantifort.amber@gmail.com //
// //
//////////////////////////////////////////////////////////////////
// //
// Copyright (C) 2011 Authors and OPENCORES.ORG //
// //
// This source file may be used and distributed without //
// restriction provided that this copyright statement is not //
// removed from the file and that any derivative work contains //
// the original copyright notice and the associated disclaimer. //
// //
// This source file is free software; you can redistribute it //
// and/or modify it under the terms of the GNU Lesser General //
// Public License as published by the Free Software Foundation; //
// either version 2.1 of the License, or (at your option) any //
// later version. //
// //
// This source is distributed in the hope that it will be //
// useful, but WITHOUT ANY WARRANTY; without even the implied //
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR //
// PURPOSE. See the GNU Lesser General Public License for more //
// details. //
// //
// You should have received a copy of the GNU Lesser General //
// Public License along with this source; if not, download it //
// from http://www.opencores.org/lgpl.shtml //
// //
//////////////////////////////////////////////////////////////////
 
 
// Instruction Types
localparam [3:0] REGOP = 4'h0, // Data processing
MULT = 4'h1, // Multiply
SWAP = 4'h2, // Single Data Swap
TRANS = 4'h3, // Single data transfer
MTRANS = 4'h4, // Multi-word data transfer
BRANCH = 4'h5, // Branch
CODTRANS = 4'h6, // Co-processor data transfer
COREGOP = 4'h7, // Co-processor data operation
CORTRANS = 4'h8, // Co-processor register transfer
SWI = 4'h9; // software interrupt
 
 
// Opcodes
localparam [3:0] AND = 4'h0, // Logical AND
EOR = 4'h1, // Logical Exclusive OR
SUB = 4'h2, // Subtract
RSB = 4'h3, // Reverse Subtract
ADD = 4'h4, // Add
ADC = 4'h5, // Add with Carry
SBC = 4'h6, // Subtract with Carry
RSC = 4'h7, // Reverse Subtract with Carry
TST = 4'h8, // Test (using AND operator)
TEQ = 4'h9, // Test Equivalence (using EOR operator)
CMP = 4'ha, // Compare (using Subtract operator)
CMN = 4'hb, // Compare Negated
ORR = 4'hc, // Logical OR
MOV = 4'hd, // Move
BIC = 4'he, // Bit Clear (using AND & NOT operators)
MVN = 4'hf; // Move NOT
// Condition Encoding
localparam [3:0] EQ = 4'h0, // Equal / Z set
NE = 4'h1, // Not equal / Z clear
CS = 4'h2, // Carry set / C set
CC = 4'h3, // Carry clear / C clear
MI = 4'h4, // Minus / N set
PL = 4'h5, // Plus / N clear
VS = 4'h6, // Overflow / V set
VC = 4'h7, // No overflow / V clear
HI = 4'h8, // Unsigned higher / C set and Z clear
LS = 4'h9, // Unsigned lower
// or same / C clear or Z set
GE = 4'ha, // Signed greater
// than or equal / N == V
LT = 4'hb, // Signed less than / N != V
GT = 4'hc, // Signed greater
// than / Z == 0, N == V
LE = 4'hd, // Signed less than
// or equal / Z == 1, N != V
AL = 4'he, // Always
NV = 4'hf; // Never
 
// Any instruction with a condition field of 0b1111 is UNPREDICTABLE.
// Shift Types
localparam [1:0] LSL = 2'h0,
LSR = 2'h1,
ASR = 2'h2,
RRX = 2'h3,
ROR = 2'h3;
// Modes
localparam [1:0] SVC = 2'b11, // Supervisor
IRQ = 2'b10, // Interrupt
FIRQ = 2'b01, // Fast Interrupt
USR = 2'b00; // User
 
// One-Hot Mode encodings
localparam [5:0] OH_USR = 0,
OH_IRQ = 1,
OH_FIRQ = 2,
OH_SVC = 3;
 
 
/amber25/a25_decompile.v
0,0 → 1,909
//////////////////////////////////////////////////////////////////
// //
// Decompiler for Amber 25 Core //
// //
// This file is part of the Amber project //
// http://www.opencores.org/project,amber //
// //
// Description //
// Decompiler for debugging core - not synthesizable //
// Shows instruction in Execute Stage at last clock of //
// the instruction //
// //
// Author(s): //
// - Conor Santifort, csantifort.amber@gmail.com //
// //
//////////////////////////////////////////////////////////////////
// //
// Copyright (C) 2011 Authors and OPENCORES.ORG //
// //
// This source file may be used and distributed without //
// restriction provided that this copyright statement is not //
// removed from the file and that any derivative work contains //
// the original copyright notice and the associated disclaimer. //
// //
// This source file is free software; you can redistribute it //
// and/or modify it under the terms of the GNU Lesser General //
// Public License as published by the Free Software Foundation; //
// either version 2.1 of the License, or (at your option) any //
// later version. //
// //
// This source is distributed in the hope that it will be //
// useful, but WITHOUT ANY WARRANTY; without even the implied //
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR //
// PURPOSE. See the GNU Lesser General Public License for more //
// details. //
// //
// You should have received a copy of the GNU Lesser General //
// Public License along with this source; if not, download it //
// from http://www.opencores.org/lgpl.shtml //
// //
//////////////////////////////////////////////////////////////////
 
`include "a25_config_defines.v"
 
module a25_decompile
(
input i_clk,
input i_access_stall,
input [31:0] i_instruction,
input i_instruction_valid,
input i_instruction_undefined,
input i_instruction_execute,
input [2:0] i_interrupt, // non-zero value means interrupt triggered
input i_interrupt_state,
input [31:0] i_instruction_address,
input [2:0] i_pc_sel,
input i_pc_wen
 
);
 
`include "a25_localparams.v"
`ifdef A25_DECOMPILE
 
integer i;
 
wire [31:0] imm32;
wire [7:0] imm8;
wire [11:0] offset12;
wire [7:0] offset8;
wire [3:0] reg_n, reg_d, reg_m, reg_s;
wire [4:0] shift_imm;
wire [3:0] opcode;
wire [3:0] condition;
wire [3:0] type;
wire opcode_compare;
wire opcode_move;
wire no_shift;
wire shift_op_imm;
wire [1:0] mtrans_type;
wire s_bit;
 
reg [(5*8)-1:0] xINSTRUCTION_EXECUTE;
reg [(5*8)-1:0] xINSTRUCTION_EXECUTE_R = "--- ";
wire [(8*8)-1:0] TYPE_NAME;
reg [3:0] fchars;
reg [31:0] execute_address = 'd0;
reg [2:0] interrupt_d1;
reg [31:0] clk_count = 'd0;
reg [31:0] execute_instruction = 'd0;
reg execute_now = 'd0;
reg execute_valid = 'd0;
reg execute_undefined = 'd0;
 
 
// ========================================================
// Delay instruction to Execute stage
// ========================================================
always @( posedge i_clk )
if ( !i_access_stall && i_instruction_valid )
begin
execute_instruction <= i_instruction;
execute_address <= i_instruction_address;
execute_undefined <= i_instruction_undefined;
execute_now <= 1'd1;
end
else
execute_now <= 1'd0;
 
 
always @ ( posedge i_clk )
if ( !i_access_stall )
execute_valid <= i_instruction_valid;
// ========================================================
// Open File
// ========================================================
integer decompile_file;
 
initial
#1 decompile_file = $fopen(`A25_DECOMPILE_FILE, "w");
 
 
// ========================================================
// Fields within the instruction
// ========================================================
assign opcode = execute_instruction[24:21];
assign condition = execute_instruction[31:28];
assign s_bit = execute_instruction[20];
assign reg_n = execute_instruction[19:16];
assign reg_d = execute_instruction[15:12];
assign reg_m = execute_instruction[3:0];
assign reg_s = execute_instruction[11:8];
assign shift_imm = execute_instruction[11:7];
assign offset12 = execute_instruction[11:0];
assign offset8 = {execute_instruction[11:8], execute_instruction[3:0]};
assign imm8 = execute_instruction[7:0];
 
assign no_shift = execute_instruction[11:4] == 8'h0;
assign mtrans_type = execute_instruction[24:23];
 
 
assign opcode_compare =
opcode == CMP ||
opcode == CMN ||
opcode == TEQ ||
opcode == TST ;
assign opcode_move =
opcode == MOV ||
opcode == MVN ;
assign shift_op_imm = type == REGOP && execute_instruction[25] == 1'd1;
 
assign imm32 = execute_instruction[11:8] == 4'h0 ? { 24'h0, imm8[7:0] } :
execute_instruction[11:8] == 4'h1 ? { imm8[1:0], 24'h0, imm8[7:2] } :
execute_instruction[11:8] == 4'h2 ? { imm8[3:0], 24'h0, imm8[7:4] } :
execute_instruction[11:8] == 4'h3 ? { imm8[5:0], 24'h0, imm8[7:6] } :
execute_instruction[11:8] == 4'h4 ? { imm8[7:0], 24'h0 } :
execute_instruction[11:8] == 4'h5 ? { 2'h0, imm8[7:0], 22'h0 } :
execute_instruction[11:8] == 4'h6 ? { 4'h0, imm8[7:0], 20'h0 } :
execute_instruction[11:8] == 4'h7 ? { 6'h0, imm8[7:0], 18'h0 } :
execute_instruction[11:8] == 4'h8 ? { 8'h0, imm8[7:0], 16'h0 } :
execute_instruction[11:8] == 4'h9 ? { 10'h0, imm8[7:0], 14'h0 } :
execute_instruction[11:8] == 4'ha ? { 12'h0, imm8[7:0], 12'h0 } :
execute_instruction[11:8] == 4'hb ? { 14'h0, imm8[7:0], 10'h0 } :
execute_instruction[11:8] == 4'hc ? { 16'h0, imm8[7:0], 8'h0 } :
execute_instruction[11:8] == 4'hd ? { 18'h0, imm8[7:0], 6'h0 } :
execute_instruction[11:8] == 4'he ? { 20'h0, imm8[7:0], 4'h0 } :
{ 22'h0, imm8[7:0], 2'h0 } ;
 
 
// ========================================================
// Instruction decode
// ========================================================
// the order of these matters
assign type =
{execute_instruction[27:23], execute_instruction[21:20], execute_instruction[11:4] } == { 5'b00010, 2'b00, 8'b00001001 } ? SWAP : // Before REGOP
{execute_instruction[27:22], execute_instruction[7:4] } == { 6'b000000, 4'b1001 } ? MULT : // Before REGOP
{execute_instruction[27:26] } == { 2'b00 } ? REGOP :
{execute_instruction[27:26] } == { 2'b01 } ? TRANS :
{execute_instruction[27:25] } == { 3'b100 } ? MTRANS :
{execute_instruction[27:25] } == { 3'b101 } ? BRANCH :
{execute_instruction[27:25] } == { 3'b110 } ? CODTRANS :
{execute_instruction[27:24], execute_instruction[4] } == { 4'b1110, 1'b0 } ? COREGOP :
{execute_instruction[27:24], execute_instruction[4] } == { 4'b1110, 1'b1 } ? CORTRANS :
SWI ;
 
//
// Convert some important signals to ASCII
// so their values can easily be displayed on a waveform viewer
//
assign TYPE_NAME = type == REGOP ? "REGOP " :
type == MULT ? "MULT " :
type == SWAP ? "SWAP " :
type == TRANS ? "TRANS " :
type == MTRANS ? "MTRANS " :
type == BRANCH ? "BRANCH " :
type == CODTRANS ? "CODTRANS" :
type == COREGOP ? "COREGOP " :
type == CORTRANS ? "CORTRANS" :
type == SWI ? "SWI " :
"UNKNOWN " ;
 
always @*
begin
if ( !execute_now )
begin
xINSTRUCTION_EXECUTE = xINSTRUCTION_EXECUTE_R;
end // stalled
 
else if ( type == REGOP && opcode == ADC ) xINSTRUCTION_EXECUTE = "adc ";
else if ( type == REGOP && opcode == ADD ) xINSTRUCTION_EXECUTE = "add ";
else if ( type == REGOP && opcode == AND ) xINSTRUCTION_EXECUTE = "and ";
else if ( type == BRANCH && execute_instruction[24] == 1'b0 ) xINSTRUCTION_EXECUTE = "b ";
else if ( type == REGOP && opcode == BIC ) xINSTRUCTION_EXECUTE = "bic ";
else if ( type == BRANCH && execute_instruction[24] == 1'b1 ) xINSTRUCTION_EXECUTE = "bl ";
else if ( type == COREGOP ) xINSTRUCTION_EXECUTE = "cdp ";
else if ( type == REGOP && opcode == CMN ) xINSTRUCTION_EXECUTE = "cmn ";
else if ( type == REGOP && opcode == CMP ) xINSTRUCTION_EXECUTE = "cmp ";
else if ( type == REGOP && opcode == EOR ) xINSTRUCTION_EXECUTE = "eor ";
else if ( type == CODTRANS && execute_instruction[20] == 1'b1 ) xINSTRUCTION_EXECUTE = "ldc ";
else if ( type == MTRANS && execute_instruction[20] == 1'b1 ) xINSTRUCTION_EXECUTE = "ldm ";
else if ( type == TRANS && {execute_instruction[22],execute_instruction[20]} == {1'b0, 1'b1} ) xINSTRUCTION_EXECUTE = "ldr ";
else if ( type == TRANS && {execute_instruction[22],execute_instruction[20]} == {1'b1, 1'b1} ) xINSTRUCTION_EXECUTE = "ldrb ";
else if ( type == CORTRANS && execute_instruction[20] == 1'b0 ) xINSTRUCTION_EXECUTE = "mcr ";
else if ( type == MULT && execute_instruction[21] == 1'b1 ) xINSTRUCTION_EXECUTE = "mla ";
else if ( type == REGOP && opcode == MOV ) xINSTRUCTION_EXECUTE = "mov ";
else if ( type == CORTRANS && execute_instruction[20] == 1'b1 ) xINSTRUCTION_EXECUTE = "mrc ";
else if ( type == MULT && execute_instruction[21] == 1'b0 ) xINSTRUCTION_EXECUTE = "mul ";
else if ( type == REGOP && opcode == MVN ) xINSTRUCTION_EXECUTE = "mvn ";
else if ( type == REGOP && opcode == ORR ) xINSTRUCTION_EXECUTE = "orr ";
else if ( type == REGOP && opcode == RSB ) xINSTRUCTION_EXECUTE = "rsb ";
else if ( type == REGOP && opcode == RSC ) xINSTRUCTION_EXECUTE = "rsc ";
else if ( type == REGOP && opcode == SBC ) xINSTRUCTION_EXECUTE = "sbc ";
else if ( type == CODTRANS && execute_instruction[20] == 1'b0 ) xINSTRUCTION_EXECUTE = "stc ";
else if ( type == MTRANS && execute_instruction[20] == 1'b0 ) xINSTRUCTION_EXECUTE = "stm ";
else if ( type == TRANS && {execute_instruction[22],execute_instruction[20]} == {1'b0, 1'b0} ) xINSTRUCTION_EXECUTE = "str ";
else if ( type == TRANS && {execute_instruction[22],execute_instruction[20]} == {1'b1, 1'b0} ) xINSTRUCTION_EXECUTE = "strb ";
else if ( type == REGOP && opcode == SUB ) xINSTRUCTION_EXECUTE = "sub ";
else if ( type == SWI ) xINSTRUCTION_EXECUTE = "swi ";
else if ( type == SWAP && execute_instruction[22] == 1'b0 ) xINSTRUCTION_EXECUTE = "swp ";
else if ( type == SWAP && execute_instruction[22] == 1'b1 ) xINSTRUCTION_EXECUTE = "swpb ";
else if ( type == REGOP && opcode == TEQ ) xINSTRUCTION_EXECUTE = "teq ";
else if ( type == REGOP && opcode == TST ) xINSTRUCTION_EXECUTE = "tst ";
else xINSTRUCTION_EXECUTE = "unkow";
end
 
always @ ( posedge i_clk )
xINSTRUCTION_EXECUTE_R <= xINSTRUCTION_EXECUTE;
 
always @( posedge i_clk )
clk_count <= clk_count + 1'd1;
always @( posedge i_clk )
if ( execute_now )
begin
// Interrupts override instructions that are just starting
if ( interrupt_d1 == 3'd0 || interrupt_d1 == 3'd7 )
begin
$fwrite(decompile_file,"%09d ", clk_count);
// Right justify the address
if ( execute_address < 32'h10) $fwrite(decompile_file," %01x: ", {execute_address[ 3:1], 1'd0});
else if ( execute_address < 32'h100) $fwrite(decompile_file," %02x: ", {execute_address[ 7:1], 1'd0});
else if ( execute_address < 32'h1000) $fwrite(decompile_file," %03x: ", {execute_address[11:1], 1'd0});
else if ( execute_address < 32'h10000) $fwrite(decompile_file," %04x: ", {execute_address[15:1], 1'd0});
else if ( execute_address < 32'h100000) $fwrite(decompile_file," %05x: ", {execute_address[19:1], 1'd0});
else if ( execute_address < 32'h1000000) $fwrite(decompile_file," %06x: ", {execute_address[23:1], 1'd0});
else if ( execute_address < 32'h10000000) $fwrite(decompile_file," %07x: ", {execute_address[27:1], 1'd0});
else $fwrite(decompile_file,"%8x: ", {execute_address[31:1], 1'd0});
// Mark that the instruction is not being executed
// condition field in execute stage allows instruction to execute ?
if (!i_instruction_execute)
begin
$fwrite(decompile_file,"-");
if ( type == SWI )
$display ("Cycle %09d SWI not taken *************", clk_count);
end
else
$fwrite(decompile_file," ");
// ========================================
// print the instruction name
// ========================================
case (numchars( xINSTRUCTION_EXECUTE ))
4'd1: $fwrite(decompile_file,"%s", xINSTRUCTION_EXECUTE[39:32] );
4'd2: $fwrite(decompile_file,"%s", xINSTRUCTION_EXECUTE[39:24] );
4'd3: $fwrite(decompile_file,"%s", xINSTRUCTION_EXECUTE[39:16] );
4'd4: $fwrite(decompile_file,"%s", xINSTRUCTION_EXECUTE[39: 8] );
default: $fwrite(decompile_file,"%s", xINSTRUCTION_EXECUTE[39: 0] );
endcase
 
fchars = 8 - numchars(xINSTRUCTION_EXECUTE);
// Print the Multiple transfer type
if (type == MTRANS )
begin
w_mtrans_type;
fchars = fchars - 2;
end
 
// Print the s bit
if ( ((type == REGOP && !opcode_compare) || type == MULT ) && s_bit == 1'b1 )
begin
$fwrite(decompile_file,"s");
fchars = fchars - 1;
end
 
// Print the p bit
if ( type == REGOP && opcode_compare && s_bit == 1'b1 && reg_d == 4'd15 )
begin
$fwrite(decompile_file,"p");
fchars = fchars - 1;
end
 
// Print the condition code
if ( condition != AL )
begin
wcond;
fchars = fchars - 2;
end
// Align spaces after instruction
case ( fchars )
4'd0: $fwrite(decompile_file,"");
4'd1: $fwrite(decompile_file," ");
4'd2: $fwrite(decompile_file," ");
4'd3: $fwrite(decompile_file," ");
4'd4: $fwrite(decompile_file," ");
4'd5: $fwrite(decompile_file," ");
4'd6: $fwrite(decompile_file," ");
4'd7: $fwrite(decompile_file," ");
4'd8: $fwrite(decompile_file," ");
default: $fwrite(decompile_file," ");
endcase
// ========================================
// print the arguments for the instruction
// ========================================
case ( type )
REGOP: regop_args;
TRANS: trans_args;
MTRANS: mtrans_args;
BRANCH: branch_args;
MULT: mult_args;
SWAP: swap_args;
CODTRANS: codtrans_args;
COREGOP: begin
// `TB_ERROR_MESSAGE
$write("Coregop not implemented in decompiler yet\n");
end
CORTRANS: cortrans_args;
SWI: $fwrite(decompile_file,"#0x%06h", execute_instruction[23:0]);
default: begin
`TB_ERROR_MESSAGE
$write("Unknown Instruction Type ERROR\n");
end
endcase
$fwrite( decompile_file,"\n" );
end
 
// Undefined Instruction Interrupts
if ( i_instruction_execute && execute_undefined )
begin
$fwrite( decompile_file,"%09d interrupt undefined instruction", clk_count );
$fwrite( decompile_file,", return addr " );
$fwrite( decompile_file,"%08x\n", pcf(get_reg_val(5'd21)-4'd4) );
end
// Software Interrupt
if ( i_instruction_execute && type == SWI )
begin
$fwrite( decompile_file,"%09d interrupt swi", clk_count );
$fwrite( decompile_file,", return addr " );
$fwrite( decompile_file,"%08x\n", pcf(get_reg_val(5'd21)-4'd4) );
end
end
 
 
always @( posedge i_clk )
if ( !i_access_stall )
begin
interrupt_d1 <= i_interrupt;
// Asynchronous Interrupts
if ( interrupt_d1 != 3'd0 && i_interrupt_state )
begin
$fwrite( decompile_file,"%09d interrupt ", clk_count );
case ( interrupt_d1 )
3'd1: $fwrite( decompile_file,"data abort" );
3'd2: $fwrite( decompile_file,"firq" );
3'd3: $fwrite( decompile_file,"irq" );
3'd4: $fwrite( decompile_file,"address exception" );
3'd5: $fwrite( decompile_file,"instruction abort" );
default: $fwrite( decompile_file,"unknown type" );
endcase
$fwrite( decompile_file,", return addr " );
case ( interrupt_d1 )
3'd1: $fwrite(decompile_file,"%08h\n", pcf(get_reg_val(5'd16)));
3'd2: $fwrite(decompile_file,"%08h\n", pcf(get_reg_val(5'd17)));
3'd3: $fwrite(decompile_file,"%08h\n", pcf(get_reg_val(5'd18)));
3'd4: $fwrite(decompile_file,"%08h\n", pcf(get_reg_val(5'd19)));
3'd5: $fwrite(decompile_file,"%08h\n", pcf(get_reg_val(5'd19)));
3'd7: $fwrite(decompile_file,"%08h\n", pcf(get_reg_val(5'd20)));
default: ;
endcase
end
end
 
 
// jump
// Dont print a jump message for interrupts
always @( posedge i_clk )
if (
i_pc_sel != 3'd0 &&
i_pc_wen &&
!i_access_stall &&
i_instruction_execute &&
i_interrupt == 3'd0 &&
!execute_undefined &&
type != SWI &&
execute_address != get_32bit_signal(0) // Don't print jump to same address
)
begin
$fwrite(decompile_file,"%09d jump from ", clk_count);
fwrite_hex_drop_zeros(decompile_file, pcf(execute_address));
$fwrite(decompile_file," to ");
fwrite_hex_drop_zeros(decompile_file, pcf(get_32bit_signal(0)) ); // u_execute.pc_nxt
$fwrite(decompile_file,", r0 %08h, ", get_reg_val ( 5'd0 ));
$fwrite(decompile_file,"r1 %08h\n", get_reg_val ( 5'd1 ));
end
 
// =================================================================================
// Memory Reads and Writes
// =================================================================================
 
reg [31:0] tmp_address;
 
// Data access
always @( posedge i_clk )
begin
// Data Write
if ( get_1bit_signal(0) && !get_1bit_signal(3) )
begin
$fwrite(decompile_file, "%09d write addr ", clk_count);
tmp_address = get_32bit_signal(2);
fwrite_hex_drop_zeros(decompile_file, {tmp_address [31:2], 2'd0} );
$fwrite(decompile_file, ", data %08h, be %h",
get_32bit_signal(3), // u_cache.i_write_data
get_4bit_signal (0)); // u_cache.i_byte_enable
$fwrite(decompile_file, "\n");
end
// Data Read
if ( get_1bit_signal(4) && !get_1bit_signal(1) )
begin
$fwrite(decompile_file, "%09d read addr ", clk_count);
tmp_address = get_32bit_signal(5);
fwrite_hex_drop_zeros(decompile_file, {tmp_address[31:2], 2'd0} );
$fwrite(decompile_file, ", data %08h to ", get_32bit_signal(4));
warmreg(get_4bit_signal(1));
$fwrite(decompile_file, "\n");
end
end
 
 
// =================================================================================
// Tasks
// =================================================================================
 
// Write Condition field
task wcond;
begin
case( condition)
4'h0: $fwrite(decompile_file,"eq");
4'h1: $fwrite(decompile_file,"ne");
4'h2: $fwrite(decompile_file,"cs");
4'h3: $fwrite(decompile_file,"cc");
4'h4: $fwrite(decompile_file,"mi");
4'h5: $fwrite(decompile_file,"pl");
4'h6: $fwrite(decompile_file,"vs");
4'h7: $fwrite(decompile_file,"vc");
4'h8: $fwrite(decompile_file,"hi");
4'h9: $fwrite(decompile_file,"ls");
4'ha: $fwrite(decompile_file,"ge");
4'hb: $fwrite(decompile_file,"lt");
4'hc: $fwrite(decompile_file,"gt");
4'hd: $fwrite(decompile_file,"le");
4'he: $fwrite(decompile_file," "); // Always
default: $fwrite(decompile_file,"nv"); // Never
endcase
end
endtask
 
// ldm and stm types
task w_mtrans_type;
begin
case( mtrans_type )
4'h0: $fwrite(decompile_file,"da");
4'h1: $fwrite(decompile_file,"ia");
4'h2: $fwrite(decompile_file,"db");
4'h3: $fwrite(decompile_file,"ib");
default: $fwrite(decompile_file,"xx");
endcase
end
endtask
 
// e.g. mrc 15, 0, r9, cr0, cr0, {0}
task cortrans_args;
begin
// Co-Processor Number
$fwrite(decompile_file,"%1d, ", execute_instruction[11:8]);
// opcode1
$fwrite(decompile_file,"%1d, ", execute_instruction[23:21]);
// Rd [15:12]
warmreg(reg_d);
// CRn [19:16]
$fwrite(decompile_file,", cr%1d", execute_instruction[19:16]);
// CRm [3:0]
$fwrite(decompile_file,", cr%1d", execute_instruction[3:0]);
// Opcode2 [7:5]
$fwrite(decompile_file,", {%1d}", execute_instruction[7:5]);
end
endtask
 
 
// ldc 15, 0, r9, cr0, cr0, {0}
task codtrans_args;
begin
// Co-Processor Number
$fwrite(decompile_file,"%1d, ", execute_instruction[11:8]);
// CRd [15:12]
$fwrite(decompile_file,"cr%1d, ", execute_instruction[15:12]);
// Rd [19:16]
warmreg(reg_n);
end
endtask
 
 
task branch_args;
reg [31:0] shift_amount;
begin
if (execute_instruction[23]) // negative
shift_amount = {~execute_instruction[23:0] + 24'd1, 2'd0};
else
shift_amount = {execute_instruction[23:0], 2'd0};
 
if (execute_instruction[23]) // negative
fwrite_hex_drop_zeros ( decompile_file, get_reg_val( 5'd21 ) - shift_amount );
else
fwrite_hex_drop_zeros ( decompile_file, get_reg_val( 5'd21 ) + shift_amount );
end
endtask
 
 
task mult_args;
begin
warmreg(reg_n); // Rd is in the Rn position for MULT instructions
$fwrite(decompile_file,", ");
warmreg(reg_m);
$fwrite(decompile_file,", ");
warmreg(reg_s);
 
if (execute_instruction[21]) // MLA
begin
$fwrite(decompile_file,", ");
warmreg(reg_d);
end
end
endtask
 
 
task swap_args;
begin
warmreg(reg_d);
$fwrite(decompile_file,", ");
warmreg(reg_m);
$fwrite(decompile_file,", [");
warmreg(reg_n);
$fwrite(decompile_file,"]");
end
endtask
 
 
task regop_args;
begin
if (!opcode_compare)
warmreg(reg_d);
if (!opcode_move )
begin
if (!opcode_compare)
begin
$fwrite(decompile_file,", ");
if (reg_d < 4'd10 || reg_d > 4'd12)
$fwrite(decompile_file," ");
end
warmreg(reg_n);
$fwrite(decompile_file,", ");
if (reg_n < 4'd10 || reg_n > 4'd12)
$fwrite(decompile_file," ");
end
else
begin
$fwrite(decompile_file,", ");
if (reg_d < 4'd10 || reg_d > 4'd12)
$fwrite(decompile_file," ");
end
if (shift_op_imm)
begin
if (|imm32[31:15])
$fwrite(decompile_file,"#0x%08h", imm32);
else
$fwrite(decompile_file,"#%1d", imm32);
end
else // Rm
begin
warmreg(reg_m);
if (execute_instruction[4])
// Register Shifts
wshiftreg;
else
// Immediate shifts
wshift;
end
end
endtask
 
 
task trans_args;
begin
warmreg(reg_d); // Destination register
 
casez ({execute_instruction[25:23], execute_instruction[21], no_shift, offset12==12'd0})
6'b0100?0 : begin $fwrite(decompile_file,", ["); warmreg(reg_n); $fwrite(decompile_file,", #-%1d]" , offset12); end
6'b0110?0 : begin $fwrite(decompile_file,", ["); warmreg(reg_n); $fwrite(decompile_file,", #%1d]" , offset12); end
6'b0100?1 : begin $fwrite(decompile_file,", ["); warmreg(reg_n); $fwrite(decompile_file,"]"); end
6'b0110?1 : begin $fwrite(decompile_file,", ["); warmreg(reg_n); $fwrite(decompile_file,"]"); end
6'b0101?? : begin $fwrite(decompile_file,", ["); warmreg(reg_n); $fwrite(decompile_file,", #-%1d]!", offset12); end
6'b0111?? : begin $fwrite(decompile_file,", ["); warmreg(reg_n); $fwrite(decompile_file,", #%1d]!" , offset12); end
 
6'b0000?0 : begin $fwrite(decompile_file,", ["); warmreg(reg_n); $fwrite(decompile_file,"], #-%1d", offset12); end
6'b0010?0 : begin $fwrite(decompile_file,", ["); warmreg(reg_n); $fwrite(decompile_file,"], #%1d" , offset12); end
6'b0001?0 : begin $fwrite(decompile_file,", ["); warmreg(reg_n); $fwrite(decompile_file,"], #-%1d", offset12); end
6'b0011?0 : begin $fwrite(decompile_file,", ["); warmreg(reg_n); $fwrite(decompile_file,"], #%1d" , offset12); end
6'b0000?1 : begin $fwrite(decompile_file,", ["); warmreg(reg_n); $fwrite(decompile_file,"]"); end
6'b0010?1 : begin $fwrite(decompile_file,", ["); warmreg(reg_n); $fwrite(decompile_file,"]"); end
6'b0001?1 : begin $fwrite(decompile_file,", ["); warmreg(reg_n); $fwrite(decompile_file,"]"); end
6'b0011?1 : begin $fwrite(decompile_file,", ["); warmreg(reg_n); $fwrite(decompile_file,"]"); end
 
6'b11001? : begin $fwrite(decompile_file,", ["); warmreg(reg_n); $fwrite(decompile_file,", -"); warmreg(reg_m); $fwrite(decompile_file,"]"); end
6'b11101? : begin $fwrite(decompile_file,", ["); warmreg(reg_n); $fwrite(decompile_file,", "); warmreg(reg_m); $fwrite(decompile_file,"]"); end
6'b11011? : begin $fwrite(decompile_file,", ["); warmreg(reg_n); $fwrite(decompile_file,", -"); warmreg(reg_m); $fwrite(decompile_file,"]!"); end
6'b11111? : begin $fwrite(decompile_file,", ["); warmreg(reg_n); $fwrite(decompile_file,", "); warmreg(reg_m); $fwrite(decompile_file,"]!"); end
 
6'b10001? : begin $fwrite(decompile_file,", ["); warmreg(reg_n); $fwrite(decompile_file,"], -"); warmreg(reg_m); end
6'b10101? : begin $fwrite(decompile_file,", ["); warmreg(reg_n); $fwrite(decompile_file,"], "); warmreg(reg_m); end
6'b10011? : begin $fwrite(decompile_file,", ["); warmreg(reg_n); $fwrite(decompile_file,"], -"); warmreg(reg_m); end
6'b10111? : begin $fwrite(decompile_file,", ["); warmreg(reg_n); $fwrite(decompile_file,"], "); warmreg(reg_m); end
 
6'b11000? : begin $fwrite(decompile_file,", ["); warmreg(reg_n); $fwrite(decompile_file,", -"); warmreg(reg_m); wshift; $fwrite(decompile_file,"]"); end
6'b11100? : begin $fwrite(decompile_file,", ["); warmreg(reg_n); $fwrite(decompile_file,", "); warmreg(reg_m); wshift; $fwrite(decompile_file,"]"); end
6'b11010? : begin $fwrite(decompile_file,", ["); warmreg(reg_n); $fwrite(decompile_file,", -"); warmreg(reg_m); wshift; $fwrite(decompile_file,"]!");end
6'b11110? : begin $fwrite(decompile_file,", ["); warmreg(reg_n); $fwrite(decompile_file,", "); warmreg(reg_m); wshift; $fwrite(decompile_file,"]!");end
 
6'b10000? : begin $fwrite(decompile_file,", ["); warmreg(reg_n); $fwrite(decompile_file,"], -"); warmreg(reg_m); wshift; end
6'b10100? : begin $fwrite(decompile_file,", ["); warmreg(reg_n); $fwrite(decompile_file,"], "); warmreg(reg_m); wshift; end
6'b10010? : begin $fwrite(decompile_file,", ["); warmreg(reg_n); $fwrite(decompile_file,"], -"); warmreg(reg_m); wshift; end
6'b10110? : begin $fwrite(decompile_file,", ["); warmreg(reg_n); $fwrite(decompile_file,"], "); warmreg(reg_m); wshift; end
 
endcase
end
endtask
 
 
task mtrans_args;
begin
warmreg(reg_n);
if (execute_instruction[21]) $fwrite(decompile_file,"!");
$fwrite(decompile_file,", {");
for (i=0;i<16;i=i+1)
if (execute_instruction[i])
begin
warmreg(i);
if (more_to_come(execute_instruction[15:0], i))
$fwrite(decompile_file,", ");
end
$fwrite(decompile_file,"}");
// SDM: store the user mode registers, when in priviledged mode
if (execute_instruction[22:20] == 3'b100)
$fwrite(decompile_file,"^");
end
endtask
 
 
task wshift;
begin
// Check that its a valid shift operation. LSL by #0 is the null operator
if (execute_instruction[6:5] != LSL || shift_imm != 5'd0)
begin
case(execute_instruction[6:5])
2'd0: $fwrite(decompile_file,", lsl");
2'd1: $fwrite(decompile_file,", lsr");
2'd2: $fwrite(decompile_file,", asr");
2'd3: if (shift_imm == 5'd0) $fwrite(decompile_file,", rrx"); else $fwrite(decompile_file,", ror");
endcase
 
if (execute_instruction[6:5] != 2'd3 || shift_imm != 5'd0)
$fwrite(decompile_file," #%1d", shift_imm);
end
end
endtask
 
 
task wshiftreg;
begin
case(execute_instruction[6:5])
2'd0: $fwrite(decompile_file,", lsl ");
2'd1: $fwrite(decompile_file,", lsr ");
2'd2: $fwrite(decompile_file,", asr ");
2'd3: $fwrite(decompile_file,", ror ");
endcase
 
warmreg(reg_s);
end
endtask
 
 
task warmreg;
input [3:0] regnum;
begin
if (regnum < 4'd12)
$fwrite(decompile_file,"r%1d", regnum);
else
case (regnum)
4'd12 : $fwrite(decompile_file,"ip");
4'd13 : $fwrite(decompile_file,"sp");
4'd14 : $fwrite(decompile_file,"lr");
4'd15 : $fwrite(decompile_file,"pc");
endcase
end
endtask
 
 
task fwrite_hex_drop_zeros;
input [31:0] file;
input [31:0] num;
begin
if (num[31:28] != 4'd0)
$fwrite(file, "%x", num);
else if (num[27:24] != 4'd0)
$fwrite(file, "%x", num[27:0]);
else if (num[23:20] != 4'd0)
$fwrite(file, "%x", num[23:0]);
else if (num[19:16] != 4'd0)
$fwrite(file, "%x", num[19:0]);
else if (num[15:12] != 4'd0)
$fwrite(file, "%x", num[15:0]);
else if (num[11:8] != 4'd0)
$fwrite(file, "%x", num[11:0]);
else if (num[7:4] != 4'd0)
$fwrite(file, "%x", num[7:0]);
else
$fwrite(file, "%x", num[3:0]);
end
endtask
 
 
 
// =================================================================================
// Functions
// =================================================================================
 
// Get current value of register
function [31:0] get_reg_val;
input [4:0] regnum;
begin
case (regnum)
5'd0 : get_reg_val = `U_REGISTER_BANK.r0_out;
5'd1 : get_reg_val = `U_REGISTER_BANK.r1_out;
5'd2 : get_reg_val = `U_REGISTER_BANK.r2_out;
5'd3 : get_reg_val = `U_REGISTER_BANK.r3_out;
5'd4 : get_reg_val = `U_REGISTER_BANK.r4_out;
5'd5 : get_reg_val = `U_REGISTER_BANK.r5_out;
5'd6 : get_reg_val = `U_REGISTER_BANK.r6_out;
5'd7 : get_reg_val = `U_REGISTER_BANK.r7_out;
5'd8 : get_reg_val = `U_REGISTER_BANK.r8_out;
5'd9 : get_reg_val = `U_REGISTER_BANK.r9_out;
5'd10 : get_reg_val = `U_REGISTER_BANK.r10_out;
5'd11 : get_reg_val = `U_REGISTER_BANK.r11_out;
5'd12 : get_reg_val = `U_REGISTER_BANK.r12_out;
5'd13 : get_reg_val = `U_REGISTER_BANK.r13_out;
5'd14 : get_reg_val = `U_REGISTER_BANK.r14_out;
5'd15 : get_reg_val = `U_REGISTER_BANK.r15_out_rm; // the version of pc with status bits
5'd16 : get_reg_val = `U_REGISTER_BANK.r14_svc;
5'd17 : get_reg_val = `U_REGISTER_BANK.r14_firq;
5'd18 : get_reg_val = `U_REGISTER_BANK.r14_irq;
5'd19 : get_reg_val = `U_REGISTER_BANK.r14_svc;
5'd20 : get_reg_val = `U_REGISTER_BANK.r14_svc;
5'd21 : get_reg_val = `U_REGISTER_BANK.r15_out_rn; // the version of pc without status bits
endcase
end
endfunction
 
 
function [31:0] get_32bit_signal;
input [2:0] num;
begin
case (num)
3'd0: get_32bit_signal = `U_EXECUTE.pc_nxt;
3'd1: get_32bit_signal = `U_EXECUTE.o_iaddress;
3'd2: get_32bit_signal = `U_EXECUTE.o_daddress;
3'd3: get_32bit_signal = `U_EXECUTE.o_write_data;
// 3'd4: get_32bit_signal = `U_EXECUTE.read_data_filtered;
3'd4: get_32bit_signal = `U_EXECUTE.i_wb_read_data;
3'd5: get_32bit_signal = `U_WB.daddress_r;
endcase
end
endfunction
 
 
function get_1bit_signal;
input [2:0] num;
begin
case (num)
3'd0: get_1bit_signal = `U_EXECUTE.o_write_enable;
3'd1: get_1bit_signal = `U_AMBER.mem_stall;
3'd2: get_1bit_signal = `U_EXECUTE.o_daddress_valid;
3'd3: get_1bit_signal = `U_AMBER.access_stall;
3'd4: get_1bit_signal = `U_WB.mem_read_data_valid_r;
endcase
end
endfunction
 
 
function [3:0] get_4bit_signal;
input [2:0] num;
begin
case (num)
3'd0: get_4bit_signal = `U_EXECUTE.o_byte_enable;
3'd1: get_4bit_signal = `U_WB.mem_load_rd_r;
endcase
end
endfunction
 
 
function [3:0] numchars;
input [(5*8)-1:0] xINSTRUCTION_EXECUTE;
begin
if (xINSTRUCTION_EXECUTE[31:0] == " ")
numchars = 4'd1;
else if (xINSTRUCTION_EXECUTE[23:0] == " ")
numchars = 4'd2;
else if (xINSTRUCTION_EXECUTE[15:0] == " ")
numchars = 4'd3;
else if (xINSTRUCTION_EXECUTE[7:0] == " ")
numchars = 4'd4;
else
numchars = 4'd5;
end
endfunction
 
 
function more_to_come;
input [15:0] regs;
input [31:0] i;
begin
case (i)
15 : more_to_come = 1'd0;
14 : more_to_come = regs[15] ? 1'd1 : 1'd0;
13 : more_to_come = |regs[15:14] ? 1'd1 : 1'd0;
12 : more_to_come = |regs[15:13] ? 1'd1 : 1'd0;
11 : more_to_come = |regs[15:12] ? 1'd1 : 1'd0;
10 : more_to_come = |regs[15:11] ? 1'd1 : 1'd0;
9 : more_to_come = |regs[15:10] ? 1'd1 : 1'd0;
8 : more_to_come = |regs[15: 9] ? 1'd1 : 1'd0;
7 : more_to_come = |regs[15: 8] ? 1'd1 : 1'd0;
6 : more_to_come = |regs[15: 7] ? 1'd1 : 1'd0;
5 : more_to_come = |regs[15: 6] ? 1'd1 : 1'd0;
4 : more_to_come = |regs[15: 5] ? 1'd1 : 1'd0;
3 : more_to_come = |regs[15: 4] ? 1'd1 : 1'd0;
2 : more_to_come = |regs[15: 3] ? 1'd1 : 1'd0;
1 : more_to_come = |regs[15: 2] ? 1'd1 : 1'd0;
0 : more_to_come = |regs[15: 1] ? 1'd1 : 1'd0;
endcase
end
endfunction
 
`endif
 
endmodule
 
/amber25/a25_coprocessor.v
0,0 → 1,197
//////////////////////////////////////////////////////////////////
// //
// Co-processor module for Amber 25 Core //
// //
// This file is part of the Amber project //
// http://www.opencores.org/project,amber //
// //
// Description //
// Co_processor 15 registers and control signals //
// Author(s): //
// - Conor Santifort, csantifort.amber@gmail.com //
// //
//////////////////////////////////////////////////////////////////
// //
// Copyright (C) 2011 Authors and OPENCORES.ORG //
// //
// This source file may be used and distributed without //
// restriction provided that this copyright statement is not //
// removed from the file and that any derivative work contains //
// the original copyright notice and the associated disclaimer. //
// //
// This source file is free software; you can redistribute it //
// and/or modify it under the terms of the GNU Lesser General //
// Public License as published by the Free Software Foundation; //
// either version 2.1 of the License, or (at your option) any //
// later version. //
// //
// This source is distributed in the hope that it will be //
// useful, but WITHOUT ANY WARRANTY; without even the implied //
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR //
// PURPOSE. See the GNU Lesser General Public License for more //
// details. //
// //
// You should have received a copy of the GNU Lesser General //
// Public License along with this source; if not, download it //
// from http://www.opencores.org/lgpl.shtml //
// //
//////////////////////////////////////////////////////////////////
 
 
module a25_coprocessor
(
input i_clk,
input i_access_stall, // stall all stages of the cpu at the same time
input [2:0] i_copro_opcode1,
input [2:0] i_copro_opcode2,
input [3:0] i_copro_crn, // Register Number
input [3:0] i_copro_crm,
input [3:0] i_copro_num,
input [1:0] i_copro_operation,
input [31:0] i_copro_write_data,
 
input i_fault, // high to latch the fault address and status
input [7:0] i_fault_status,
input [31:0] i_fault_address, // the address that caused the fault
 
output reg [31:0] o_copro_read_data,
output o_cache_enable,
output o_cache_flush,
output [31:0] o_cacheable_area
);
 
// Bit 0 - Cache on(1)/off
// Bit 1 - Shared (1) or seperate User/Supervisor address space
// Bit 2 - address monitor mode(1)
reg [2:0] cache_control = 3'b000;
 
// Bit 0 - 2MB memory from 0 to 0x01fffff cacheable(1)/not cachable
// Bit 1 - next 2MB region etc.
reg [31:0] cacheable_area = 32'h0;
 
// Marks memory regions as read only so writes are ignored by the cache
// Bit 0 - 2MB memory from 0 to 0x01fffff updateable(1)/not updateable
// Bit 1 - next 2MB region etc.
reg [31:0] updateable_area = 32'h0;
 
// Accesses to a region with a flag set in this register cause the
// cache to flush
// Bit 0 - 2MB memory from 0 to 0x01fffff
// Bit 1 - next 2MB region etc.
reg [31:0] disruptive_area = 32'h0;
 
 
reg [7:0] fault_status = 'd0;
reg [31:0] fault_address = 'd0; // the address that caused the fault
 
wire copro15_reg1_write;
 
 
// ---------------------------
// Outputs
// ---------------------------
assign o_cache_enable = cache_control[0];
assign o_cache_flush = copro15_reg1_write;
assign o_cacheable_area = cacheable_area;
// ---------------------------
// Capture an access fault address and status
// ---------------------------
always @ ( posedge i_clk )
if ( !i_access_stall )
begin
if ( i_fault )
begin
`ifdef A25_COPRO15_DEBUG
$display ("Fault status set to 0x%08x", i_fault_status);
$display ("Fault address set to 0x%08x", i_fault_address);
`endif
fault_status <= i_fault_status;
fault_address <= i_fault_address;
end
end
 
 
// ---------------------------
// Register Writes
// ---------------------------
always @ ( posedge i_clk )
if ( !i_access_stall )
begin
if ( i_copro_operation == 2'd2 )
case ( i_copro_crn )
4'd2: cache_control <= i_copro_write_data[2:0];
4'd3: cacheable_area <= i_copro_write_data[31:0];
4'd4: updateable_area <= i_copro_write_data[31:0];
4'd5: disruptive_area <= i_copro_write_data[31:0];
endcase
end
 
// Flush the cache
assign copro15_reg1_write = !i_access_stall && i_copro_operation == 2'd2 && i_copro_crn == 4'd1;
 
 
// ---------------------------
// Register Reads
// ---------------------------
always @ ( posedge i_clk )
if ( !i_access_stall )
case ( i_copro_crn )
// ID Register - [31:24] Company id, [23:16] Manuf id, [15:8] Part type, [7:0] revision
4'd0: o_copro_read_data <= 32'h4156_0300;
4'd2: o_copro_read_data <= {29'd0, cache_control};
4'd3: o_copro_read_data <= cacheable_area;
4'd4: o_copro_read_data <= updateable_area;
4'd5: o_copro_read_data <= disruptive_area;
4'd6: o_copro_read_data <= {24'd0, fault_status };
4'd7: o_copro_read_data <= fault_address;
default: o_copro_read_data <= 32'd0;
endcase
 
 
 
// ========================================================
// Debug code - not synthesizable
// ========================================================
 
`ifdef A25_COPRO15_DEBUG
//synopsys translate_off
reg [1:0] copro_operation_d1;
reg [3:0] copro_crn_d1;
 
always @( posedge i_clk )
if ( !i_access_stall )
begin
copro_operation_d1 <= i_copro_operation;
copro_crn_d1 <= i_copro_crn;
end
 
always @( posedge i_clk )
if ( !i_access_stall )
begin
if ( i_copro_operation == 2'd2 ) // mcr
case ( i_copro_crn )
4'd 1: begin `TB_DEBUG_MESSAGE $display ("Write 0x%08h to Co-Pro 15 #1, Flush Cache", i_copro_write_data); end
4'd 2: begin `TB_DEBUG_MESSAGE $display ("Write 0x%08h to Co-Pro 15 #2, Cache Control", i_copro_write_data); end
4'd 3: begin `TB_DEBUG_MESSAGE $display ("Write 0x%08h to Co-Pro 15 #3, Cacheable area", i_copro_write_data); end
4'd 4: begin `TB_DEBUG_MESSAGE $display ("Write 0x%08h to Co-Pro 15 #4, Updateable area", i_copro_write_data); end
4'd 5: begin `TB_DEBUG_MESSAGE $display ("Write 0x%08h to Co-Pro 15 #5, Disruptive area", i_copro_write_data); end
endcase
if ( copro_operation_d1 == 2'd1 ) // mrc
case ( copro_crn_d1 )
4'd 0: begin `TB_DEBUG_MESSAGE $display ("Read 0x%08h from Co-Pro 15 #0, ID Register", o_copro_read_data); end
4'd 2: begin `TB_DEBUG_MESSAGE $display ("Read 0x%08h from Co-Pro 15 #2, Cache control", o_copro_read_data); end
4'd 3: begin `TB_DEBUG_MESSAGE $display ("Read 0x%08h from Co-Pro 15 #3, Cacheable area", o_copro_read_data); end
4'd 4: begin `TB_DEBUG_MESSAGE $display ("Read 0x%08h from Co-Pro 15 #4, Updateable area", o_copro_read_data); end
4'd 5: begin `TB_DEBUG_MESSAGE $display ("Read 0x%08h from Co-Pro 15 #4, Disruptive area", o_copro_read_data); end
4'd 6: begin `TB_DEBUG_MESSAGE $display ("Read 0x%08h from Co-Pro 15 #6, Fault Status Register", o_copro_read_data); end
4'd 7: begin `TB_DEBUG_MESSAGE $display ("Read 0x%08h from Co-Pro 15 #7, Fault Address Register", o_copro_read_data); end
endcase
end
//synopsys translate_on
`endif
 
endmodule
 
/amber25/a25_execute.v
0,0 → 1,623
//////////////////////////////////////////////////////////////////
// //
// Execute stage of Amber 25 Core //
// //
// This file is part of the Amber project //
// http://www.opencores.org/project,amber //
// //
// Description //
// Executes instructions. Instantiates the register file, ALU //
// multiplication unit and barrel shifter. This stage is //
// relitively simple. All the complex stuff is done in the //
// decode stage. //
// //
// Author(s): //
// - Conor Santifort, csantifort.amber@gmail.com //
// //
//////////////////////////////////////////////////////////////////
// //
// Copyright (C) 2011 Authors and OPENCORES.ORG //
// //
// This source file may be used and distributed without //
// restriction provided that this copyright statement is not //
// removed from the file and that any derivative work contains //
// the original copyright notice and the associated disclaimer. //
// //
// This source file is free software; you can redistribute it //
// and/or modify it under the terms of the GNU Lesser General //
// Public License as published by the Free Software Foundation; //
// either version 2.1 of the License, or (at your option) any //
// later version. //
// //
// This source is distributed in the hope that it will be //
// useful, but WITHOUT ANY WARRANTY; without even the implied //
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR //
// PURPOSE. See the GNU Lesser General Public License for more //
// details. //
// //
// You should have received a copy of the GNU Lesser General //
// Public License along with this source; if not, download it //
// from http://www.opencores.org/lgpl.shtml //
// //
//////////////////////////////////////////////////////////////////
 
 
module a25_execute (
 
input i_clk,
input i_access_stall, // stall all stages of the cpu at the same time
input i_mem_stall, // data memory access stalls
 
input [31:0] i_wb_read_data, // data reads
input i_wb_read_data_valid, // read data is valid
input [9:0] i_wb_load_rd, // Rd for data reads
 
input [31:0] i_copro_read_data, // From Co-Processor, to either Register
// or Memory
input i_decode_iaccess, // Indicates an instruction access
input i_decode_daccess, // Indicates a data access
input [7:0] i_decode_load_rd, // The destination register for a load instruction
 
output reg [31:0] o_copro_write_data = 'd0,
output reg [31:0] o_write_data = 'd0,
output reg [31:0] o_iaddress = 32'hdead_dead,
output [31:0] o_iaddress_nxt, // un-registered version of address to the
// cache rams address ports
output reg o_iaddress_valid = 'd0, // High when instruction address is valid
output reg [31:0] o_daddress = 32'h0, // Address to data cache
output [31:0] o_daddress_nxt, // un-registered version of address to the
// cache rams address ports
output reg o_daddress_valid = 'd0, // High when data address is valid
output reg o_adex = 'd0, // Address Exception
output reg o_priviledged = 'd0, // Priviledged access
output reg o_exclusive = 'd0, // swap access
output reg o_write_enable = 'd0,
output reg [3:0] o_byte_enable = 'd0,
output reg [7:0] o_exec_load_rd = 'd0, // The destination register for a load instruction
output [31:0] o_status_bits, // Full PC will all status bits, but PC part zero'ed out
output o_multiply_done,
 
 
// --------------------------------------------------
// Control signals from Instruction Decode stage
// --------------------------------------------------
input [1:0] i_status_bits_mode,
input i_status_bits_irq_mask,
input i_status_bits_firq_mask,
input [31:0] i_imm32,
input [4:0] i_imm_shift_amount,
input i_shift_imm_zero,
input [3:0] i_condition,
input i_decode_exclusive, // swap access
 
input [3:0] i_rm_sel,
input [3:0] i_rs_sel,
input [3:0] i_rn_sel,
input [1:0] i_barrel_shift_amount_sel,
input [1:0] i_barrel_shift_data_sel,
input [1:0] i_barrel_shift_function,
input [8:0] i_alu_function,
input [1:0] i_multiply_function,
input [2:0] i_interrupt_vector_sel,
input [3:0] i_iaddress_sel,
input [3:0] i_daddress_sel,
input [2:0] i_pc_sel,
input [1:0] i_byte_enable_sel,
input [2:0] i_status_bits_sel,
input [2:0] i_reg_write_sel,
// input i_user_mode_regs_load,
input i_user_mode_regs_store_nxt,
input i_firq_not_user_mode,
 
input i_write_data_wen,
input i_base_address_wen, // save LDM base address register,
// in case of data abort
input i_pc_wen,
input [14:0] i_reg_bank_wen,
input i_status_bits_flags_wen,
input i_status_bits_mode_wen,
input i_status_bits_irq_mask_wen,
input i_status_bits_firq_mask_wen,
input i_copro_write_data_wen,
input i_conflict
 
);
 
`include "a25_localparams.v"
`include "a25_functions.v"
 
// ========================================================
// Internal signals
// ========================================================
wire [31:0] write_data_nxt;
wire [3:0] byte_enable_nxt;
wire [31:0] pc_plus4;
wire [31:0] pc_minus4;
wire [31:0] daddress_plus4;
wire [31:0] alu_plus4;
wire [31:0] rn_plus4;
wire [31:0] alu_out;
wire [3:0] alu_flags;
wire [31:0] rm;
wire [31:0] rs;
wire [31:0] rd;
wire [31:0] rn;
wire [31:0] pc;
wire [31:0] pc_nxt;
wire [31:0] interrupt_vector;
wire [7:0] shift_amount;
wire [31:0] barrel_shift_in;
wire [31:0] barrel_shift_out;
wire barrel_shift_carry;
 
wire [3:0] status_bits_flags_nxt;
reg [3:0] status_bits_flags = 'd0;
wire [1:0] status_bits_mode_nxt;
reg [1:0] status_bits_mode = SVC;
// one-hot encoded rs select
wire [3:0] status_bits_mode_rds_oh_nxt;
reg [3:0] status_bits_mode_rds_oh = 1'd1 << OH_SVC;
wire status_bits_mode_rds_oh_update;
wire status_bits_irq_mask_nxt;
reg status_bits_irq_mask = 1'd1;
wire status_bits_firq_mask_nxt;
reg status_bits_firq_mask = 1'd1;
 
wire execute; // high when condition execution is true
wire [31:0] reg_write_nxt;
wire pc_wen;
wire [14:0] reg_bank_wen;
wire [31:0] multiply_out;
wire [1:0] multiply_flags;
reg [31:0] base_address = 'd0; // Saves base address during LDM instruction in
// case of data abort
wire [31:0] read_data_filtered1;
wire [31:0] read_data_filtered;
 
wire write_enable_nxt;
wire daddress_valid_nxt;
wire iaddress_valid_nxt;
wire priviledged_nxt;
wire priviledged_update;
wire iaddress_update;
wire daddress_update;
wire base_address_update;
wire write_data_update;
wire copro_write_data_update;
wire byte_enable_update;
wire exec_load_rd_update;
wire write_enable_update;
wire exclusive_update;
wire status_bits_flags_update;
wire status_bits_mode_update;
wire status_bits_irq_mask_update;
wire status_bits_firq_mask_update;
 
wire [31:0] alu_out_pc_filtered;
wire adex_nxt;
wire [31:0] save_int_pc;
wire [31:0] save_int_pc_m4;
wire ldm_flags;
wire ldm_status_bits;
 
// ========================================================
// Status Bits in PC register
// ========================================================
assign o_status_bits = { status_bits_flags, // 31:28
status_bits_irq_mask, // 7
status_bits_firq_mask, // 6
24'd0,
status_bits_mode }; // 1:0 = mode
 
 
// ========================================================
// Status Bits Select
// ========================================================
assign ldm_flags = i_wb_read_data_valid & ~i_mem_stall & i_wb_load_rd[7];
assign ldm_status_bits = i_wb_read_data_valid & ~i_mem_stall & i_wb_load_rd[6];
 
 
assign status_bits_flags_nxt = ldm_flags ? read_data_filtered[31:28] :
i_status_bits_sel == 3'd0 ? alu_flags :
i_status_bits_sel == 3'd1 ? alu_out [31:28] :
i_status_bits_sel == 3'd3 ? i_copro_read_data[31:28] :
// 4 = update flags after a multiply operation
{ multiply_flags, status_bits_flags[1:0] } ;
 
assign status_bits_mode_nxt = ldm_status_bits ? read_data_filtered [1:0] :
i_status_bits_sel == 3'd0 ? i_status_bits_mode :
i_status_bits_sel == 3'd1 ? alu_out [1:0] :
i_copro_read_data [1:0] ;
 
 
// Used for the Rds output of register_bank - this special version of
// status_bits_mode speeds up the critical path from status_bits_mode through the
// register_bank, barrel_shifter and alu. It moves a mux needed for the
// i_user_mode_regs_store_nxt signal back into the previous stage -
// so its really part of the decode stage even though the logic is right here
// In addition the signal is one-hot encoded to further speed up the logic
 
assign status_bits_mode_rds_oh_nxt = i_user_mode_regs_store_nxt ? 1'd1 << OH_USR :
status_bits_mode_update ? oh_status_bits_mode(status_bits_mode_nxt) :
oh_status_bits_mode(status_bits_mode) ;
 
assign status_bits_irq_mask_nxt = ldm_status_bits ? read_data_filtered [27] :
i_status_bits_sel == 3'd0 ? i_status_bits_irq_mask :
i_status_bits_sel == 3'd1 ? alu_out [27] :
i_copro_read_data [27] ;
assign status_bits_firq_mask_nxt = ldm_status_bits ? read_data_filtered [26] :
i_status_bits_sel == 3'd0 ? i_status_bits_firq_mask :
i_status_bits_sel == 3'd1 ? alu_out [26] :
i_copro_read_data [26] ;
 
 
 
// ========================================================
// Adders
// ========================================================
assign pc_plus4 = pc + 32'd4;
assign pc_minus4 = pc - 32'd4;
assign daddress_plus4 = o_daddress + 32'd4;
assign alu_plus4 = alu_out + 32'd4;
assign rn_plus4 = rn + 32'd4;
 
// ========================================================
// Barrel Shift Amount Select
// ========================================================
// An immediate shift value of 0 is translated into 32
assign shift_amount = i_barrel_shift_amount_sel == 2'd0 ? 8'd0 :
i_barrel_shift_amount_sel == 2'd1 ? rs[7:0] :
{3'd0, i_imm_shift_amount } ;
 
 
// ========================================================
// Barrel Shift Data Select
// ========================================================
assign barrel_shift_in = i_barrel_shift_data_sel == 2'd0 ? i_imm32 : rm ;
 
 
// ========================================================
// Interrupt vector Select
// ========================================================
 
assign interrupt_vector = // Reset vector
(i_interrupt_vector_sel == 3'd0) ? 32'h00000000 :
// Data abort interrupt vector
(i_interrupt_vector_sel == 3'd1) ? 32'h00000010 :
// Fast interrupt vector
(i_interrupt_vector_sel == 3'd2) ? 32'h0000001c :
// Regular interrupt vector
(i_interrupt_vector_sel == 3'd3) ? 32'h00000018 :
// Prefetch abort interrupt vector
(i_interrupt_vector_sel == 3'd5) ? 32'h0000000c :
// Undefined instruction interrupt vector
(i_interrupt_vector_sel == 3'd6) ? 32'h00000004 :
// Software (SWI) interrupt vector
(i_interrupt_vector_sel == 3'd7) ? 32'h00000008 :
// Default is the address exception interrupt
32'h00000014 ;
 
 
// ========================================================
// Address Select
// ========================================================
assign pc_dmem_wen = i_wb_read_data_valid & ~i_mem_stall & i_wb_load_rd[3:0] == 4'd15;
 
// If rd is the pc, then seperate the address bits from the status bits for
// generating the next address to fetch
assign alu_out_pc_filtered = pc_wen && i_pc_sel == 3'd1 ? pcf(alu_out) : alu_out;
 
// if current instruction does not execute because it does not meet the condition
// then address advances to next instruction
assign o_iaddress_nxt = (pc_dmem_wen) ? pcf(read_data_filtered) :
(!execute) ? pc_plus4 :
(i_iaddress_sel == 4'd0) ? pc_plus4 :
(i_iaddress_sel == 4'd1) ? alu_out_pc_filtered :
(i_iaddress_sel == 4'd2) ? interrupt_vector :
pc ;
 
 
 
// if current instruction does not execute because it does not meet the condition
// then address advances to next instruction
assign o_daddress_nxt = (i_daddress_sel == 4'd1) ? alu_out_pc_filtered :
(i_daddress_sel == 4'd2) ? interrupt_vector :
(i_daddress_sel == 4'd4) ? rn :
(i_daddress_sel == 4'd5) ? daddress_plus4 : // MTRANS address incrementer
(i_daddress_sel == 4'd6) ? alu_plus4 : // MTRANS decrement after
rn_plus4 ; // MTRANS increment before
 
// Data accesses use 32-bit address space, but instruction
// accesses are restricted to 26 bit space
assign adex_nxt = |o_iaddress_nxt[31:26] && i_decode_iaccess;
 
 
// ========================================================
// Filter Read Data
// ========================================================
// mem_load_rd[9:8] -> shift ROR bytes
// mem_load_rd[7] -> load flags with PC
// mem_load_rd[6] -> load status bits with PC
// mem_load_rd[5] -> Write into User Mode register
// mem_load_rd[4] -> zero_extend byte
// mem_load_rd[3:0] -> Destination Register
assign read_data_filtered1 = i_wb_load_rd[9:8] === 2'd0 ? i_wb_read_data :
i_wb_load_rd[9:8] === 2'd1 ? {i_wb_read_data[7:0], i_wb_read_data[31:8]} :
i_wb_load_rd[9:8] === 2'd2 ? {i_wb_read_data[15:0], i_wb_read_data[31:16]} :
{i_wb_read_data[23:0], i_wb_read_data[31:24]} ;
 
assign read_data_filtered = i_wb_load_rd[4] ? {24'd0, read_data_filtered1[7:0]} : read_data_filtered1 ;
 
 
// ========================================================
// Program Counter Select
// ========================================================
// If current instruction does not execute because it does not meet the condition
// then PC advances to next instruction
assign pc_nxt = (!execute) ? pc_plus4 :
i_pc_sel == 3'd0 ? pc_plus4 :
i_pc_sel == 3'd1 ? alu_out :
i_pc_sel == 3'd2 ? interrupt_vector :
i_pc_sel == 3'd3 ? pcf(read_data_filtered) :
pc_minus4 ;
 
 
// ========================================================
// Register Write Select
// ========================================================
 
assign save_int_pc = { status_bits_flags,
status_bits_irq_mask,
status_bits_firq_mask,
pc[25:2],
status_bits_mode };
 
 
assign save_int_pc_m4 = { status_bits_flags,
status_bits_irq_mask,
status_bits_firq_mask,
pc_minus4[25:2],
status_bits_mode };
 
 
assign reg_write_nxt = i_reg_write_sel == 3'd0 ? alu_out :
// save pc to lr on an interrupt
i_reg_write_sel == 3'd1 ? save_int_pc_m4 :
// to update Rd at the end of Multiplication
i_reg_write_sel == 3'd2 ? multiply_out :
i_reg_write_sel == 3'd3 ? o_status_bits :
i_reg_write_sel == 3'd5 ? i_copro_read_data : // mrc
i_reg_write_sel == 3'd6 ? base_address :
save_int_pc ;
 
 
// ========================================================
// Byte Enable Select
// ========================================================
assign byte_enable_nxt = i_byte_enable_sel == 2'd0 ? 4'b1111 : // word write
i_byte_enable_sel == 2'd2 ? // halfword write
( o_daddress_nxt[1] == 1'd0 ? 4'b0011 :
4'b1100 ) :
o_daddress_nxt[1:0] == 2'd0 ? 4'b0001 : // byte write
o_daddress_nxt[1:0] == 2'd1 ? 4'b0010 :
o_daddress_nxt[1:0] == 2'd2 ? 4'b0100 :
4'b1000 ;
 
 
// ========================================================
// Write Data Select
// ========================================================
assign write_data_nxt = i_byte_enable_sel == 2'd0 ? rd :
{4{rd[ 7:0]}} ;
 
 
// ========================================================
// Conditional Execution
// ========================================================
assign execute = conditional_execute ( i_condition, status_bits_flags );
// allow the PC to increment to the next instruction when current
// instruction does not execute
assign pc_wen = (i_pc_wen || !execute) && !i_conflict;
 
// only update register bank if current instruction executes
assign reg_bank_wen = {{15{execute}} & i_reg_bank_wen};
 
 
// ========================================================
// Priviledged output flag
// ========================================================
// Need to look at status_bits_mode_nxt so switch to priviledged mode
// at the same time as assert interrupt vector address
assign priviledged_nxt = ( i_status_bits_mode_wen ? status_bits_mode_nxt : status_bits_mode ) != USR ;
 
 
// ========================================================
// Write Enable
// ========================================================
// This must be de-asserted when execute is fault
assign write_enable_nxt = execute && i_write_data_wen;
 
 
// ========================================================
// Address Valid
// ========================================================
assign daddress_valid_nxt = execute && i_decode_daccess && !i_access_stall;
assign iaddress_valid_nxt = i_decode_iaccess;
 
 
// ========================================================
// Register Update
// ========================================================
 
assign daddress_update = !i_access_stall;
assign exec_load_rd_update = !i_access_stall && execute;
assign priviledged_update = !i_access_stall;
assign exclusive_update = !i_access_stall && execute;
assign write_enable_update = !i_access_stall;
assign write_data_update = !i_access_stall && execute && i_write_data_wen;
assign byte_enable_update = !i_access_stall && execute && i_write_data_wen;
 
assign iaddress_update = pc_dmem_wen || (!i_access_stall && !i_conflict);
assign copro_write_data_update = !i_access_stall && execute && i_copro_write_data_wen;
 
assign base_address_update = !i_access_stall && execute && i_base_address_wen;
// assign dcache_read_data_update = !i_mem_stall;
assign status_bits_flags_update = ldm_flags || (!i_access_stall && execute && i_status_bits_flags_wen);
assign status_bits_mode_update = ldm_status_bits || (!i_access_stall && execute && i_status_bits_mode_wen);
assign status_bits_mode_rds_oh_update = !i_access_stall;
assign status_bits_irq_mask_update = ldm_status_bits || (!i_access_stall && execute && i_status_bits_irq_mask_wen);
assign status_bits_firq_mask_update = ldm_status_bits || (!i_access_stall && execute && i_status_bits_firq_mask_wen);
 
 
always @( posedge i_clk )
begin
o_daddress <= daddress_update ? o_daddress_nxt : o_daddress;
o_daddress_valid <= daddress_update ? daddress_valid_nxt : o_daddress_valid;
o_exec_load_rd <= exec_load_rd_update ? i_decode_load_rd : o_exec_load_rd;
o_priviledged <= priviledged_update ? priviledged_nxt : o_priviledged;
o_exclusive <= exclusive_update ? i_decode_exclusive : o_exclusive;
o_write_enable <= write_enable_update ? write_enable_nxt : o_write_enable;
o_write_data <= write_data_update ? write_data_nxt : o_write_data;
o_byte_enable <= byte_enable_update ? byte_enable_nxt : o_byte_enable;
o_iaddress <= iaddress_update ? o_iaddress_nxt : o_iaddress;
o_iaddress_valid <= iaddress_update ? iaddress_valid_nxt : o_iaddress_valid;
o_adex <= iaddress_update ? adex_nxt : o_adex;
o_copro_write_data <= copro_write_data_update ? write_data_nxt : o_copro_write_data;
 
base_address <= base_address_update ? rn : base_address;
 
status_bits_flags <= status_bits_flags_update ? status_bits_flags_nxt : status_bits_flags;
status_bits_mode <= status_bits_mode_update ? status_bits_mode_nxt : status_bits_mode;
status_bits_mode_rds_oh <= status_bits_mode_rds_oh_update ? status_bits_mode_rds_oh_nxt : status_bits_mode_rds_oh;
status_bits_irq_mask <= status_bits_irq_mask_update ? status_bits_irq_mask_nxt : status_bits_irq_mask;
status_bits_firq_mask <= status_bits_firq_mask_update ? status_bits_firq_mask_nxt : status_bits_firq_mask;
end
 
 
// ========================================================
// Instantiate Barrel Shift
// ========================================================
a25_barrel_shift u_barrel_shift (
.i_in ( barrel_shift_in ),
.i_carry_in ( status_bits_flags[1] ),
.i_shift_amount ( shift_amount ),
.i_shift_imm_zero ( i_shift_imm_zero ),
.i_function ( i_barrel_shift_function ),
 
.o_out ( barrel_shift_out ),
.o_carry_out ( barrel_shift_carry )
);
 
 
// ========================================================
// Instantiate ALU
// ========================================================
a25_alu u_alu (
.i_a_in ( rn ),
.i_b_in ( barrel_shift_out ),
.i_barrel_shift_carry ( barrel_shift_carry ),
.i_status_bits_carry ( status_bits_flags[1] ),
.i_function ( i_alu_function ),
 
.o_out ( alu_out ),
.o_flags ( alu_flags )
);
 
 
// ========================================================
// Instantiate Booth 64-bit Multiplier-Accumulator
// ========================================================
a25_multiply u_multiply (
.i_clk ( i_clk ),
.i_access_stall ( i_access_stall ),
.i_a_in ( rs ),
.i_b_in ( rm ),
.i_function ( i_multiply_function ),
.i_execute ( execute ),
.o_out ( multiply_out ),
.o_flags ( multiply_flags ), // [1] = N, [0] = Z
.o_done ( o_multiply_done )
);
 
 
// ========================================================
// Instantiate Register Bank
// ========================================================
a25_register_bank u_register_bank(
.i_clk ( i_clk ),
.i_access_stall ( i_access_stall ),
.i_mem_stall ( i_mem_stall ),
.i_rm_sel ( i_rm_sel ),
.i_rs_sel ( i_rs_sel ),
.i_rn_sel ( i_rn_sel ),
.i_pc_wen ( pc_wen ),
.i_reg_bank_wen ( reg_bank_wen ),
.i_pc ( pc_nxt[25:2] ),
.i_reg ( reg_write_nxt ),
.i_mode_idec ( i_status_bits_mode ),
.i_mode_exec ( status_bits_mode ),
 
.i_wb_read_data ( read_data_filtered ),
.i_wb_read_data_valid ( i_wb_read_data_valid ),
.i_wb_read_data_rd ( i_wb_load_rd[3:0] ),
.i_wb_user_mode ( i_wb_load_rd[5] ),
 
.i_status_bits_flags ( status_bits_flags ),
.i_status_bits_irq_mask ( status_bits_irq_mask ),
.i_status_bits_firq_mask ( status_bits_firq_mask ),
 
// pre-encoded in decode stage to speed up long path
.i_firq_not_user_mode ( i_firq_not_user_mode ),
// use one-hot version for speed, combine with i_user_mode_regs_store
.i_mode_rds_exec ( status_bits_mode_rds_oh ),
.o_rm ( rm ),
.o_rs ( rs ),
.o_rd ( rd ),
.o_rn ( rn ),
.o_pc ( pc )
);
 
 
// ========================================================
// Debug - non-synthesizable code
// ========================================================
//synopsys translate_off
 
wire [(2*8)-1:0] xCONDITION;
wire [(4*8)-1:0] xMODE;
 
assign xCONDITION = i_condition == EQ ? "EQ" :
i_condition == NE ? "NE" :
i_condition == CS ? "CS" :
i_condition == CC ? "CC" :
i_condition == MI ? "MI" :
i_condition == PL ? "PL" :
i_condition == VS ? "VS" :
i_condition == VC ? "VC" :
i_condition == HI ? "HI" :
i_condition == LS ? "LS" :
i_condition == GE ? "GE" :
i_condition == LT ? "LT" :
i_condition == GT ? "GT" :
i_condition == LE ? "LE" :
i_condition == AL ? "AL" :
"NV " ;
 
assign xMODE = status_bits_mode == SVC ? "SVC" :
status_bits_mode == IRQ ? "IRQ" :
status_bits_mode == FIRQ ? "FIRQ" :
status_bits_mode == USR ? "USR" :
"XXX" ;
 
//synopsys translate_on
 
endmodule
 
 
/amber25/a25_config_defines.v
0,0 → 1,79
//////////////////////////////////////////////////////////////////
// //
// Amber Configuration and Debug for the Amber 25 Core //
// //
// This file is part of the Amber project //
// http://www.opencores.org/project,amber //
// //
// Description //
// Contains a set of defines used to configure and debug //
// the Amber core //
// //
// Author(s): //
// - Conor Santifort, csantifort.amber@gmail.com //
// //
//////////////////////////////////////////////////////////////////
// //
// Copyright (C) 2011 Authors and OPENCORES.ORG //
// //
// This source file may be used and distributed without //
// restriction provided that this copyright statement is not //
// removed from the file and that any derivative work contains //
// the original copyright notice and the associated disclaimer. //
// //
// This source file is free software; you can redistribute it //
// and/or modify it under the terms of the GNU Lesser General //
// Public License as published by the Free Software Foundation; //
// either version 2.1 of the License, or (at your option) any //
// later version. //
// //
// This source is distributed in the hope that it will be //
// useful, but WITHOUT ANY WARRANTY; without even the implied //
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR //
// PURPOSE. See the GNU Lesser General Public License for more //
// details. //
// //
// You should have received a copy of the GNU Lesser General //
// Public License along with this source; if not, download it //
// from http://www.opencores.org/lgpl.shtml //
// //
//////////////////////////////////////////////////////////////////
 
`ifndef _A25_CONFIG_DEFINES
`define _A25_CONFIG_DEFINES
 
// Cache Ways
// Changing this parameter is the recommended
// way to change the Amber cache size; 2, 3, 4 and 8 ways are supported.
// 2 ways -> 8KB cache
// 3 ways -> 12KB cache
// 4 ways -> 16KB cache
// 8 ways -> 32KB cache
`define A25_ICACHE_WAYS 4
`define A25_DCACHE_WAYS 4
 
// --------------------------------------------------------------------
// Debug switches
// --------------------------------------------------------------------
 
// Enable the decompiler. The default output file is amber.dis
`define A25_DECOMPILE
 
// Co-processor 15 debug. Registers in here control the cache
//`define A25_COPRO15_DEBUG
 
// Cache debug
//`define A25_CACHE_DEBUG
 
// --------------------------------------------------------------------
 
 
// --------------------------------------------------------------------
// File Names
// --------------------------------------------------------------------
`ifndef A25_DECOMPILE_FILE
`define A25_DECOMPILE_FILE "amber.dis"
`endif
 
`endif
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.