OpenCores
URL https://opencores.org/ocsvn/sparc64soc/sparc64soc/trunk

Subversion Repositories sparc64soc

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /sparc64soc/trunk
    from Rev 4 to Rev 5
    Reverse comparison

Rev 4 → Rev 5

/os2wb/l1dir.v
0,0 → 1,249
module l1dir(
input clk,
input reset,
input cpu, // Issuing CPU number
input strobe, // Start transaction
input [ 1:0] way, // Way to allocate for allocating loads
input [39:0] address,
input load,
input ifill,
input store,
input cas,
input swap,
input strload,
input strstore,
input cacheable,
input prefetch,
input invalidate,
input blockstore,
output [111:0] inval_vect0, // Invalidation vector
output [111:0] inval_vect1,
output [ 1:0] othercachehit, // Other cache hit in the same CPU, wayval0/wayval1
output [ 1:0] othercpuhit, // Any cache hit in the other CPU, wayval0/wayval1
output [ 1:0] wayval0, // Way valid
output [ 1:0] wayval1, // Second way valid for ifill
output ready // Directory init done
);
 
wire [3:0] rdy;
wire dquery0=(!cpu) && store && (!blockstore);
wire dquery1= cpu && store && (!blockstore);
wire dalloc0=(!cpu) && cacheable && (!invalidate) && load && (!prefetch);
wire dalloc1= cpu && cacheable && (!invalidate) && load && (!prefetch);
wire ddealloc0=((!cpu) && (ifill || cas || swap || strstore || (store && blockstore))) ||
( cpu && ((load && cacheable) || ifill || store || cas || swap || strload || strstore));
wire ddealloc1=( cpu && (ifill || cas || swap || strstore || (store && blockstore))) ||
((!cpu) && ((load && cacheable) || ifill || store || cas || swap || strload || strstore));
 
wire iquery0=0;
wire iquery1=0;
wire ialloc0=(!cpu) && cacheable && (!invalidate) && ifill;
wire ialloc1= cpu && cacheable && (!invalidate) && ifill;
wire idealloc0=((!cpu) && ((load && cacheable && (!prefetch) && (!invalidate)) || store || cas || swap || strstore)) ||
( cpu && ((load && cacheable && (!prefetch) && (!invalidate)) || ifill || store || cas || swap || strload || strstore));
wire idealloc1=( cpu && ((load && cacheable && (!prefetch) && (!invalidate)) || store || cas || swap || strstore )) ||
((!cpu) && ((load && cacheable && (!prefetch) && (!invalidate)) || ifill || store || cas || swap || strload || strstore));
 
 
wire [2:0] cpu0_dhit0;
wire [2:0] cpu0_dhit1;
wire [2:0] cpu1_dhit0;
wire [2:0] cpu1_dhit1;
wire [2:0] cpu0_ihit;
wire [2:0] cpu1_ihit;
wire invalidate_d=invalidate && load;
wire invalidate_i=invalidate && ifill;
 
reg ifill_d;
reg load_d;
reg cacheable_d;
reg cpu_d;
reg [39:0] address_d;
reg strobe_d;
reg strobe_d1;
reg strobe_d2;
 
always @(posedge clk)
begin
strobe_d<=strobe;
strobe_d1<=strobe_d;
strobe_d2<=strobe_d1;
end
always @(posedge clk)
if(strobe)
begin
ifill_d<=ifill;
load_d<=load;
cacheable_d<=cacheable;
cpu_d<=cpu;
address_d<=address;
end
 
l1ddir cpu0_ddir(
.clk(clk),
.reset(reset),
.index(address[10:4]),
.way(way),
.tag(address[39:11]),
.strobe(strobe),
.query(dquery0),
.allocate(dalloc0),
.deallocate(ddealloc0),
.dualdealloc(ifill),
.invalidate(invalidate_d && !cpu),
.hit0(cpu0_dhit0),
.hit1(cpu0_dhit1),
.ready(rdy[0])
);
 
l1ddir cpu1_ddir(
.clk(clk),
.reset(reset),
.index(address[10:4]),
.way(way),
.tag(address[39:11]),
.strobe(strobe),
.query(dquery1),
.allocate(dalloc1),
.deallocate(ddealloc1),
.dualdealloc(ifill),
.invalidate(invalidate_d && cpu),
.hit0(cpu1_dhit0),
.hit1(cpu1_dhit1),
.ready(rdy[1])
);
 
l1idir cpu0_idir(
.clk(clk),
.reset(reset),
.index(address[11:5]),
.way(way),
.tag(address[39:12]),
.strobe(strobe),
.query(iquery0),
.allocate(ialloc0),
.deallocate(idealloc0),
.invalidate(invalidate_i && !cpu),
.hit(cpu0_ihit),
.ready(rdy[2])
);
 
l1idir cpu1_idir(
.clk(clk),
.reset(reset),
.index(address[11:5]),
.way(way),
.tag(address[39:12]),
.strobe(strobe),
.query(iquery1),
.allocate(ialloc1),
.deallocate(idealloc1),
.invalidate(invalidate_i && cpu),
.hit(cpu1_ihit),
.ready(rdy[3])
);
 
assign ready=(!rdy[0] | !rdy[1] | !rdy[2] | !rdy[3]) ? 0:1;
assign inval_vect0[3:0]={wayval0,cpu0_ihit[2] && (!address_d[5]),cpu0_dhit0[2] && (address_d[5:4]==2'b00)};
assign inval_vect0[7:4]={wayval0,cpu1_ihit[2] && (!address_d[5]),cpu1_dhit0[2] && (address_d[5:4]==2'b00)};
assign inval_vect0[31:8]=0;
assign inval_vect0[34:32]={wayval0,cpu0_dhit0[2] && (address_d[5:4]==2'b01)};
assign inval_vect0[37:35]={wayval0,cpu1_dhit0[2] && (address_d[5:4]==2'b01)};
assign inval_vect0[55:38]=0;
assign inval_vect0[59:56]={wayval0,cpu0_ihit[2] && address_d[5],cpu0_dhit0[2] && (address_d[5:4]==2'b10)};
assign inval_vect0[63:60]={wayval0,cpu1_ihit[2] && address_d[5],cpu1_dhit0[2] && (address_d[5:4]==2'b10)};
assign inval_vect0[87:64]=0;
assign inval_vect0[90:88]={wayval0,cpu0_dhit0[2] && (address_d[5:4]==2'b11)};
assign inval_vect0[93:91]={wayval0,cpu1_dhit0[2] && (address_d[5:4]==2'b11)};
assign inval_vect0[111:94]=0;
 
assign inval_vect1[3:0]={wayval1,cpu0_dhit1[2] && (address_d[5:4]==2'b00)};
assign inval_vect1[7:4]={wayval1,cpu1_dhit1[2] && (address_d[5:4]==2'b00)};
assign inval_vect1[31:8]=0;
assign inval_vect1[34:32]={wayval1,cpu0_dhit1[2] && (address_d[5:4]==2'b01)};
assign inval_vect1[37:35]={wayval1,cpu1_dhit1[2] && (address_d[5:4]==2'b01)};
assign inval_vect1[55:38]=0;
assign inval_vect1[59:56]={wayval1,cpu0_dhit1[2] && (address_d[5:4]==2'b10)};
assign inval_vect1[63:60]={wayval1,cpu1_dhit1[2] && (address_d[5:4]==2'b10)};
assign inval_vect1[87:64]=0;
assign inval_vect1[90:88]={wayval1,cpu0_dhit1[2] && (address_d[5:4]==2'b11)};
assign inval_vect1[93:91]={wayval1,cpu1_dhit1[2] && (address_d[5:4]==2'b11)};
assign inval_vect1[111:94]=0;
 
assign wayval0=cpu0_dhit0[1:0] | cpu1_dhit0[1:0] | cpu0_ihit[1:0] | cpu1_ihit[1:0];
assign wayval1=cpu0_dhit1[1:0] | cpu1_dhit1[1:0];
assign othercachehit[0]=((!cpu_d) && ifill_d && cpu0_dhit0[2]) ||
( cpu_d && ifill_d && cpu1_dhit0[2]) ||
((!cpu_d) && load_d && cacheable_d && cpu0_ihit[2]) ||
( cpu_d && load_d && cacheable_d && cpu1_ihit[2]);
assign othercachehit[1]=((!cpu_d) && ifill_d && cpu0_dhit1[2]) ||
( cpu_d && ifill_d && cpu1_dhit1[2]);
assign othercpuhit[0]=((!cpu_d) && (cpu1_dhit0[2] || cpu1_ihit[2])) ||
( cpu_d && (cpu0_dhit0[2] || cpu0_ihit[2]));
assign othercpuhit[1]=((!cpu_d) && ifill_d && cpu1_dhit1[2]) ||
( cpu_d && ifill_d && cpu0_dhit1[2]);
 
wire [149:0] ILA_DATA;
 
st2 st2_inst(
.acq_clk(clk),
.acq_data_in(ILA_DATA),
.acq_trigger_in(ILA_DATA),
.storage_enable(strobe || strobe_d || strobe_d1 || strobe_d2)
);
 
assign ILA_DATA[39:0]=address;
assign ILA_DATA[41:40]=way;
assign ILA_DATA[42]=strobe;
assign ILA_DATA[43]=load;
assign ILA_DATA[44]=ifill;
assign ILA_DATA[45]=store;
assign ILA_DATA[46]=cas;
assign ILA_DATA[47]=swap;
assign ILA_DATA[48]=strload;
assign ILA_DATA[49]=strstore;
assign ILA_DATA[50]=cacheable;
assign ILA_DATA[51]=prefetch;
assign ILA_DATA[52]=invalidate;
assign ILA_DATA[53]=blockstore;
assign ILA_DATA[55:54]=othercachehit;
assign ILA_DATA[57:56]=othercpuhit;
assign ILA_DATA[59:58]=wayval0;
assign ILA_DATA[61:60]=wayval1;
assign ILA_DATA[69:62]=inval_vect0[7:0];
assign ILA_DATA[75:70]=inval_vect0[37:32];
assign ILA_DATA[83:76]=inval_vect0[63:56];
assign ILA_DATA[89:84]=inval_vect0[93:88];
assign ILA_DATA[97:90]=inval_vect1[7:0];
assign ILA_DATA[103:98]=inval_vect1[37:32];
assign ILA_DATA[111:104]=inval_vect1[63:56];
assign ILA_DATA[117:112]=inval_vect1[93:88];
assign ILA_DATA[118]=dquery0;
assign ILA_DATA[119]=dquery1;
assign ILA_DATA[120]=dalloc0;
assign ILA_DATA[121]=dalloc1;
assign ILA_DATA[122]=ddealloc0;
assign ILA_DATA[123]=ddealloc1;
assign ILA_DATA[124]=iquery0;
assign ILA_DATA[125]=iquery1;
assign ILA_DATA[126]=ialloc0;
assign ILA_DATA[127]=ialloc1;
assign ILA_DATA[128]=idealloc0;
assign ILA_DATA[129]=idealloc1;
 
endmodule
/os2wb/l1ddir.v
0,0 → 1,250
module l1ddir(
input clk,
input reset,
input [ 6:0] index,
input [ 1:0] way,
input [28:0] tag,
input strobe,
input query,
input allocate, //tag->{way,index}
input deallocate, //if({way,index}==tag) {way,index}<-FFFFFF
input dualdealloc,
input invalidate, //all ways
output reg [2:0] hit0,
output reg [2:0] hit1,
output reg ready // directory init completed
);
 
`define INVAL_TAG 29'h10000000
 
reg [28:0] tag_d;
reg [ 6:0] addr0;
reg [ 5:0] addr1;
reg [ 3:0] we0;
reg [ 3:0] we1;
reg [ 3:0] re;
reg [28:0] di;
reg dualdealloc_d;
wire [28:0] do0_0;
wire [28:0] do1_0;
wire [28:0] do2_0;
wire [28:0] do3_0;
wire [28:0] do0_1;
wire [28:0] do1_1;
wire [28:0] do2_1;
wire [28:0] do3_1;
reg query_d;
reg deallocate_d;
reg query_d1;
reg deallocate_d1;
 
always @(posedge clk)
if(strobe)
if(query || deallocate)
begin
tag_d<=tag;
dualdealloc_d<=dualdealloc;
end
 
always @(posedge clk)
begin
query_d<=query && strobe;
deallocate_d<=deallocate && strobe;
query_d1<=query_d;
deallocate_d1<=deallocate_d;
end
cachedir dcache0 (
.clock(clk),
.enable(we0[0] || we1[0] || re[0]),
.wren_a(we0[0]),
.address_a({1'b0,addr0}),
.data_a(di),
.q_a(do0_0),
.wren_b(we1[0]),
.address_b({1'b0,addr1,1'b1}),
.data_b(`INVAL_TAG),
.q_b(do0_1)
);
cachedir dcache1 (
.clock(clk),
.enable(we0[1] || we1[1] || re[1]),
.wren_a(we0[1]),
.address_a({1'b0,addr0}),
.data_a(di),
.q_a(do1_0),
.wren_b(we1[1]),
.address_b({1'b0,addr1,1'b1}),
.data_b(`INVAL_TAG),
.q_b(do1_1)
);
 
cachedir dcache2 (
.clock(clk),
.enable(we0[2] || we1[2] || re[2]),
.wren_a(we0[2]),
.address_a({1'b0,addr0}),
.data_a(di),
.q_a(do2_0),
.wren_b(we1[2]),
.address_b({1'b0,addr1,1'b1}),
.data_b(`INVAL_TAG),
.q_b(do2_1)
);
cachedir dcache3 (
.clock(clk),
.enable(we0[3] || we1[3] || re[3]),
.wren_a(we0[3]),
.address_a({1'b0,addr0}),
.data_a(di),
.q_a(do3_0),
.wren_b(we1[3]),
.address_b({1'b0,addr1,1'b1}),
.data_b(`INVAL_TAG),
.q_b(do3_1)
);
 
wire [3:0] hitvect0={(do3_0==tag_d),(do2_0==tag_d),(do1_0==tag_d),(do0_0==tag_d)};
wire [3:0] hitvect1={(do3_1==tag_d),(do2_1==tag_d),(do1_1==tag_d),(do0_1==tag_d)};
 
`define L1DDIR_RESET 3'b000
`define L1DDIR_INIT 3'b001
`define L1DDIR_IDLE 3'b010
`define L1DDIR_READ 3'b011
`define L1DDIR_DEALLOC 3'b100
 
reg [2:0] state;
 
always @(posedge clk or posedge reset)
if(reset)
begin
state<=`L1DDIR_RESET;
ready<=0;
end
else
case(state)
`L1DDIR_RESET:
begin
addr0<=7'b0;
addr1<=6'b0;
di<=`INVAL_TAG;
we0<=4'b1111;
we1<=4'b1111;
state<=`L1DDIR_INIT;
end
`L1DDIR_INIT:
begin
addr0<=addr0+2;
addr1<=addr1+1;
if(addr0==7'b1111110)
begin
we0<=4'b0;
we1<=4'b0;
ready<=1;
state<=`L1DDIR_IDLE;
end
end
`L1DDIR_IDLE:
if(strobe)
if(invalidate)
begin
we0<=4'b1111;
we1<=0;
addr0<=index;
di<=`INVAL_TAG;
end
else
if(allocate)
begin
case(way)
2'b00:we0<=4'b0001;
2'b01:we0<=4'b0010;
2'b10:we0<=4'b0100;
2'b11:we0<=4'b1000;
endcase
we1<=0;
addr0<=index;
di<=tag;
end
else
if(deallocate)
begin
re<=4'b1111;
we0<=0;
we1<=0;
if(dualdealloc)
begin
addr0<={index[6:1],1'b0};
addr1<=index[6:1];
end
else
addr0<=index;
state<=`L1DDIR_READ;
end
else
if(query)
begin
addr0<=index;
re<=4'b1111;
we0<=0;
we1<=0;
end
else
begin
we0<=0;
we1<=0;
re<=0;
end
`L1DDIR_READ:
state<=`L1DDIR_DEALLOC;
`L1DDIR_DEALLOC:
begin
re<=0;
di<=`INVAL_TAG;
we0<=hitvect0;
if(dualdealloc_d)
we1<=hitvect1;
else
we1<=0;
state<=`L1DDIR_IDLE;
end
endcase
 
always @(posedge clk)
if(query_d1 || deallocate_d1)
begin
case(hitvect0)
4'b0001:hit0<=3'b100;
4'b0010:hit0<=3'b101;
4'b0100:hit0<=3'b110;
4'b1000:hit0<=3'b111;
default:hit0<=3'b000; // Hits will be ORed then
endcase
if(dualdealloc_d && deallocate_d1)
case(hitvect1)
4'b0001:hit1<=3'b100;
4'b0010:hit1<=3'b101;
4'b0100:hit1<=3'b110;
4'b1000:hit1<=3'b111;
default:hit1<=3'b000;
endcase
else
hit1<=3'b000;
end
else
if(strobe)
begin
hit0<=3'b000;
hit1<=3'b000;
end
endmodule
/os2wb/os2wb_dual.v
0,0 → 1,1003
`timescale 1ns / 1ps
//////////////////////////////////////////////////////////////////////////////////
// Company: (C) Athree, 2009
// Engineer: Dmitry Rozhdestvenskiy
// Email dmitry.rozhdestvenskiy@srisc.com dmitryr@a3.spb.ru divx4log@narod.ru
//
// Design Name: Bridge from SPARC Core to Wishbone Master
// Module Name: os2wb
// Project Name: SPARC SoC single-core
//
// LICENSE:
// This is a Free Hardware Design; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// version 2 as published by the Free Software Foundation.
// The above named program is distributed in the hope that it will
// be useful, but WITHOUT ANY WARRANTY; without even the implied
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
// See the GNU General Public License for more details.
//
//////////////////////////////////////////////////////////////////////////////////
module os2wb_dual(
input clk,
input rstn,
// Core interface
input [ 4:0] pcx_req,
input pcx_atom,
input [123:0] pcx_data,
output reg [ 4:0] pcx_grant,
output reg cpx_ready,
output reg [144:0] cpx_packet,
// Wishbone master interface
input [ 63:0] wb_data_i,
input wb_ack,
output reg wb_cycle,
output reg wb_strobe,
output reg wb_we,
output reg [ 7:0] wb_sel,
output reg [ 63:0] wb_addr,
output reg [ 63:0] wb_data_o,
// FPU interface
output reg [123:0] fp_pcx,
output reg fp_req,
input [144:0] fp_cpx,
input fp_rdy,
// Ethernet interrupt, sensed on posedge, mapped to vector 'd29
input eth_int
);
 
reg [123:0] pcx_packet_d; // Latched incoming PCX packet
reg [123:0] pcx_packet_2nd; // Second packet for atomic (CAS)
reg [ 4:0] pcx_req_d; // Latched request
reg pcx_atom_d; // Latched atomic flasg
reg [ 4:0] state; // FSM state
reg [144:0] cpx_packet_1; // First CPX packet
reg [144:0] cpx_packet_2; // Second CPX packet (for atomics and cached IFILLs)
reg cpx_two_packet; // CPX answer is two-packet (!=atomic, SWAP has atomic==0 and answer is two-packet)
 
wire [111:0] inval_vect0; // Invalidate, instr/data, way
wire [111:0] inval_vect1; // IFill may cause two D lines invalidation at a time
 
wire [1:0] othercachehit;
wire [1:0] othercpuhit;
wire [1:0] wayval0;
wire [1:0] wayval1;
 
`define TEST_DRAM_1 5'b00000
`define TEST_DRAM_2 5'b00001
`define TEST_DRAM_3 5'b00010
`define TEST_DRAM_4 5'b00011
`define INIT_DRAM_1 5'b00100
`define INIT_DRAM_2 5'b00101
`define WAKEUP 5'b00110
`define PCX_IDLE 5'b00111
`define GOT_PCX_REQ 5'b01000
`define PCX_REQ_2ND 5'b01001
`define PCX_REQ_STEP1 5'b01010
`define PCX_REQ_STEP1_1 5'b01011
`define PCX_REQ_STEP2 5'b01100
`define PCX_REQ_STEP2_1 5'b01101
`define PCX_REQ_STEP3 5'b01110
`define PCX_REQ_STEP3_1 5'b01111
`define PCX_REQ_STEP4 5'b10000
`define PCX_REQ_STEP4_1 5'b10001
`define PCX_BIS 5'b10010
`define PCX_BIS_1 5'b10011
`define PCX_BIS_2 5'b10100
`define CPX_READY_1 5'b10101
`define CPX_READY_2 5'b10110
`define PCX_REQ_STEP1_2 5'b10111
`define PCX_UNKNOWN 5'b11000
`define PCX_FP_1 5'b11001
`define PCX_FP_2 5'b11010
`define FP_WAIT 5'b11011
`define CPX_FP 5'b11100
`define CPX_SEND_ETH_IRQ 5'b11101
`define CPX_INT_VEC_DIS 5'b11110
`define PCX_REQ_CAS_COMPARE 5'b11111
 
`define MEM_SIZE 64'h00000000_10000000
 
`define TEST_DRAM 1
`define DEBUGGING 1
 
reg cache_init;
wire [3:0] dcache0_hit;
wire [3:0] dcache1_hit;
wire [3:0] icache_hit;
reg multi_hit;
reg multi_hit1;
reg eth_int_d;
reg eth_int_send;
reg eth_int_sent;
reg [3:0] cnt;
 
// PCX channel FIFO
wire [129:0] pcx_data_fifo;
wire pcx_fifo_empty;
reg [ 4:0] pcx_req_1;
reg [ 4:0] pcx_req_2;
reg pcx_atom_1;
reg pcx_atom_2;
reg pcx_data_123_d;
 
always @(posedge clk)
begin
pcx_req_1<=pcx_req;
pcx_atom_1<=pcx_atom;
pcx_atom_2<=pcx_atom_1;
pcx_req_2<=pcx_atom_1 ? pcx_req_1:5'b0;
pcx_grant<=(pcx_req_1 | pcx_req_2);
pcx_data_123_d<=pcx_data[123];
end
pcx_fifo pcx_fifo_inst(
// FIFO should be first word fall-through
// It has no full flag as the core will send only limited number of requests,
// in original design we used it 32 words deep
// Just make it deeper if you experience overflow -
// you can't just send no grant on full because the core expects immediate
// grant for at least two requests for each zone
.aclr(!rstn),
.clock(clk),
.data({pcx_atom_1,pcx_req_1,pcx_data}),
.rdreq(fifo_rd),
.wrreq((pcx_req_1!=5'b00000 && pcx_data[123]) || (pcx_atom_2 && pcx_data_123_d)),
// Second atomic packet for FPU may be invalid, but should be sent to FPU
// so if the first atomic packet is valid we latch both
.empty(pcx_fifo_empty),
.q(pcx_data_fifo)
);
// --------------------------
 
reg wb_ack_d;
 
always @(posedge clk or negedge rstn)
if(!rstn)
eth_int_send<=0;
else
begin
wb_ack_d<=wb_ack;
eth_int_d<=eth_int;
if(eth_int && !eth_int_d)
eth_int_send<=1;
else
if(eth_int_sent)
eth_int_send<=0;
end
 
reg fifo_rd;
wire [123:0] pcx_packet;
assign pcx_packet=pcx_data_fifo[123:0];
 
always @(posedge clk or negedge rstn)
if(rstn==0)
begin
if(`TEST_DRAM)
state<=`TEST_DRAM_1;
else
state<=`INIT_DRAM_1; // DRAM initialization is mandatory!
cpx_ready<=0;
fifo_rd<=0;
cpx_packet<=145'b0;
wb_cycle<=0;
wb_strobe<=0;
wb_we<=0;
wb_sel<=0;
wb_addr<=64'b0;
wb_data_o<=64'b0;
pcx_packet_d<=124'b0;
fp_pcx<=124'b0;
fp_req<=0;
end
else
case(state)
`TEST_DRAM_1:
begin
wb_cycle<=1;
wb_strobe<=1;
wb_sel<=8'hFF;
wb_we<=1;
state<=`TEST_DRAM_2;
end
`TEST_DRAM_2:
if(wb_ack)
begin
wb_strobe<=0;
if(wb_addr<`MEM_SIZE-8)
begin
wb_addr[31:0]<=wb_addr[31:0]+8;
wb_data_o<={wb_addr[31:0]+8,wb_addr[31:0]+8};
state<=`TEST_DRAM_1;
end
else
begin
state<=`TEST_DRAM_3;
wb_cycle<=0;
wb_sel<=0;
wb_we<=0;
wb_data_o<=64'b0;
wb_addr<=64'b0;
end
end
`TEST_DRAM_3:
begin
wb_cycle<=1;
wb_strobe<=1;
wb_sel<=8'hFF;
state<=`TEST_DRAM_4;
end
`TEST_DRAM_4:
if(wb_ack)
begin
wb_strobe<=0;
if(wb_addr<`MEM_SIZE-8)
begin
if(wb_data_i=={wb_addr[31:0],wb_addr[31:0]})
begin
wb_addr[31:0]<=wb_addr[31:0]+8;
state<=`TEST_DRAM_3;
end
end
else
begin
state<=`INIT_DRAM_1;
wb_cycle<=0;
wb_sel<=0;
wb_we<=0;
wb_data_o<=64'b0;
wb_addr<=64'b0;
end
end
`INIT_DRAM_1:
begin
wb_cycle<=1;
wb_strobe<=1;
wb_sel<=8'hFF;
wb_we<=1;
cache_init<=1; // We also init cache directories here
state<=`INIT_DRAM_2;
end
`INIT_DRAM_2:
if(wb_ack)
begin
wb_strobe<=0;
if(wb_addr<`MEM_SIZE-8)
begin
wb_addr[31:0]<=wb_addr[31:0]+8;
pcx_packet_d[64+11:64+4]<=pcx_packet_d[64+11:64+4]+1; // Address for cachedir init
state<=`INIT_DRAM_1;
end
else
begin
state<=`WAKEUP;
wb_cycle<=0;
wb_sel<=0;
wb_we<=0;
cache_init<=0;
wb_addr<=64'b0;
end
end
`WAKEUP:
begin
cpx_packet<=145'h1700000000000000000000000000000010001;
cpx_ready<=1;
state<=`PCX_IDLE;
end
`PCX_IDLE:
begin
cnt<=0;
cpx_packet<=145'b0;
cpx_ready<=0;
cpx_two_packet<=0;
multi_hit<=0;
multi_hit1<=0;
if(eth_int_send)
begin
state<=`CPX_SEND_ETH_IRQ;
eth_int_sent<=1;
end
else
if(!pcx_fifo_empty)
begin
pcx_req_d<=pcx_data_fifo[128:124];
pcx_atom_d<=pcx_data_fifo[129];
fifo_rd<=1;
state<=`GOT_PCX_REQ;
end
end
`GOT_PCX_REQ:
begin
pcx_packet_d<=pcx_packet;
if(`DEBUGGING)
begin
wb_sel[1:0]<=pcx_packet[113:112];
wb_sel[2]<=1;
end
if(pcx_packet[103:64]==40'h9800000800 && pcx_packet[122:118]==5'b00001)
begin
state<=`CPX_INT_VEC_DIS;
fifo_rd<=0;
end
else
if(pcx_atom_d==0)
begin
fifo_rd<=0;
if(pcx_packet[122:118]==5'b01010) // FP req
begin
state<=`PCX_FP_1;
pcx_packet_2nd[123]<=0;
end
else
state<=`PCX_REQ_STEP1;
end
else
state<=`PCX_REQ_2ND;
end
`PCX_REQ_2ND:
begin
pcx_packet_2nd<=pcx_packet; //Latch second packet for atomics
if(`DEBUGGING)
if(pcx_fifo_empty)
wb_sel<=8'h67;
fifo_rd<=0;
if(pcx_packet_d[122:118]==5'b01010) // FP req
state<=`PCX_FP_1;
else
state<=`PCX_REQ_STEP1;
end
`PCX_REQ_STEP1:
begin
if(pcx_packet_d[111]==1'b1) // Invalidate request
begin
cpx_packet_1[144]<=1; // Valid
cpx_packet_1[143:140]<=4'b0100; // Invalidate reply is Store ACK
cpx_packet_1[139]<=1; // L2 miss
cpx_packet_1[138:137]<=0; // Error
cpx_packet_1[136]<=pcx_packet_d[117]; // Non-cacheble
cpx_packet_1[135:134]<=pcx_packet_d[113:112]; // Thread ID
cpx_packet_1[133:131]<=0; // Way valid
cpx_packet_1[130]<=((pcx_packet_d[122:118]==5'b10000) && (pcx_req_d==5'b10000)) ? 1:0; // Four byte fill
cpx_packet_1[129]<=pcx_atom_d;
cpx_packet_1[128]<=pcx_packet_d[110]; // Prefetch
cpx_packet_1[127:0]<={2'b0,pcx_packet_d[109]/*BIS*/,pcx_packet_d[122:118]==5'b00000 ? 2'b01:2'b10,pcx_packet_d[64+5:64+4],3'b0,pcx_packet_d[64+11:64+6],112'b0};
state<=`CPX_READY_1;
end
else
if(pcx_packet_d[122:118]!=5'b01001) // Not INT
begin
wb_cycle<=1'b1;
wb_strobe<=1'b1;
if((pcx_packet_d[122:118]==5'b00000 && !pcx_req_d[4]) || pcx_packet_d[122:118]==5'b00010 || pcx_packet_d[122:118]==5'b00100 || pcx_packet_d[122:118]==5'b00110)
wb_addr<={pcx_req_d,19'b0,pcx_packet_d[103:64+4],4'b0000}; //DRAM load/streamload, CAS and SWAP always use DRAM and load first
else
if(pcx_packet_d[122:118]==5'b10000 && !pcx_req_d[4])
wb_addr<={pcx_req_d,19'b0,pcx_packet_d[103:64+5],5'b00000}; //DRAM ifill
else
if(pcx_packet_d[64+39:64+28]==12'hFFF && pcx_packet_d[64+27:64+24]!=4'b0) // flash remap FFF1->FFF8
wb_addr<={pcx_req_d,19'b0,pcx_packet_d[103:64+3]+37'h0000E00000,3'b000};
else
wb_addr<={pcx_req_d,19'b0,pcx_packet_d[103:64+3],3'b000};
wb_data_o<=pcx_packet_d[63:0];
state<=`PCX_REQ_STEP1_1;
end
else
if((pcx_packet_d[12:10]!=3'b000) && !pcx_packet_d[117]) // Not FLUSH int and not this core
state<=`PCX_IDLE;
else
state<=`CPX_READY_1;
case(pcx_packet_d[122:118]) // Packet type
5'b00000://Load
begin
wb_we<=0;
if(!pcx_req_d[4])
wb_sel<=8'b11111111; // DRAM requests are always 128 bit
else
case(pcx_packet_d[106:104]) //Size
3'b000://Byte
case(pcx_packet_d[64+2:64])
3'b000:wb_sel<=8'b10000000;
3'b001:wb_sel<=8'b01000000;
3'b010:wb_sel<=8'b00100000;
3'b011:wb_sel<=8'b00010000;
3'b100:wb_sel<=8'b00001000;
3'b101:wb_sel<=8'b00000100;
3'b110:wb_sel<=8'b00000010;
3'b111:wb_sel<=8'b00000001;
endcase
3'b001://Halfword
case(pcx_packet_d[64+2:64+1])
2'b00:wb_sel<=8'b11000000;
2'b01:wb_sel<=8'b00110000;
2'b10:wb_sel<=8'b00001100;
2'b11:wb_sel<=8'b00000011;
endcase
3'b010://Word
wb_sel<=(pcx_packet_d[64+2]==0) ? 8'b11110000:8'b00001111;
3'b011://Doubleword
wb_sel<=8'b11111111;
3'b100://Quadword
wb_sel<=8'b11111111;
3'b111://Cacheline
wb_sel<=8'b11111111;
default:
wb_sel<=8'b01011010; // Unreal eye-catching value for debug
endcase
end
5'b00001://Store
begin
wb_we<=1;
if(pcx_packet_d[110:109]!=2'b00) //Block (or init) store
wb_sel<=8'b11111111; // Blocks are always 64 bit
else
case(pcx_packet_d[106:104]) //Size
3'b000://Byte
case(pcx_packet_d[64+2:64])
3'b000:wb_sel<=8'b10000000;
3'b001:wb_sel<=8'b01000000;
3'b010:wb_sel<=8'b00100000;
3'b011:wb_sel<=8'b00010000;
3'b100:wb_sel<=8'b00001000;
3'b101:wb_sel<=8'b00000100;
3'b110:wb_sel<=8'b00000010;
3'b111:wb_sel<=8'b00000001;
endcase
3'b001://Halfword
case(pcx_packet_d[64+2:64+1])
2'b00:wb_sel<=8'b11000000;
2'b01:wb_sel<=8'b00110000;
2'b10:wb_sel<=8'b00001100;
2'b11:wb_sel<=8'b00000011;
endcase
3'b010://Word
wb_sel<=(pcx_packet_d[64+2]==0) ? 8'b11110000:8'b00001111;
3'b011://Doubleword
wb_sel<=8'b11111111;
default:
if(`DEBUGGING)
wb_sel<=8'b01011010; // Unreal eye-catching value for debug
endcase
end
5'b00010://CAS
begin
wb_we<=0; //Load first
wb_sel<=8'b11111111; // CAS loads are as cacheline
end
5'b00100://STRLOAD
begin
wb_we<=0;
wb_sel<=8'b11111111; // Stream loads are always 128 bit
end
5'b00101://STRSTORE
begin
wb_we<=1;
case(pcx_packet_d[106:104]) //Size
3'b000://Byte
case(pcx_packet_d[64+2:64])
3'b000:wb_sel<=8'b10000000;
3'b001:wb_sel<=8'b01000000;
3'b010:wb_sel<=8'b00100000;
3'b011:wb_sel<=8'b00010000;
3'b100:wb_sel<=8'b00001000;
3'b101:wb_sel<=8'b00000100;
3'b110:wb_sel<=8'b00000010;
3'b111:wb_sel<=8'b00000001;
endcase
3'b001://Halfword
case(pcx_packet_d[64+2:64+1])
2'b00:wb_sel<=8'b11000000;
2'b01:wb_sel<=8'b00110000;
2'b10:wb_sel<=8'b00001100;
2'b11:wb_sel<=8'b00000011;
endcase
3'b010://Word
wb_sel<=(pcx_packet_d[64+2]==0) ? 8'b11110000:8'b00001111;
3'b011://Doubleword
wb_sel<=8'b11111111;
3'b100://Quadword
wb_sel<=8'b11111111;
3'b111://Cacheline
wb_sel<=8'b11111111;
default:
wb_sel<=8'b01011010; // Unreal eye-catching value for debug
endcase
end
5'b00110://SWAP/LDSTUB
begin
wb_we<=0; // Load first, as CAS
wb_sel<=8'b11111111; // SWAP/LDSTUB loads are as cacheline
end
5'b01001://INT
if(pcx_packet_d[117]) // Flush
cpx_packet_1<={9'h171,pcx_packet_d[113:112],11'h0,pcx_packet_d[64+5:64+4],3'b0,pcx_packet_d[64+11:64+6],30'h0,pcx_packet_d[17:0],46'b0,pcx_packet_d[17:0]}; //FLUSH instruction answer
else // Tread-to-thread interrupt
cpx_packet_1<={9'h170,pcx_packet_d[113:112],52'h0,pcx_packet_d[17:0],46'h0,pcx_packet_d[17:0]};
//5'b01010: FP1 - processed by separate state
//5'b01011: FP2 - processed by separate state
//5'b01101: FWDREQ - not implemented
//5'b01110: FWDREPL - not implemented
5'b10000://IFILL
begin
wb_we<=0;
if(pcx_req_d[4]) // I/O access
wb_sel<=(pcx_packet_d[64+2]==0) ? 8'b11110000:8'b00001111;
else
wb_sel<=8'b11111111;
end
default:
begin
wb_we<=0;
wb_sel<=8'b10101010; // Unreal eye-catching value for debug
end
endcase
end
`PCX_REQ_STEP1_1:
state<=`PCX_REQ_STEP1_2; // Delay for L1 directory
`PCX_REQ_STEP1_2:
begin
if(wb_ack || wb_ack_d)
begin
cpx_packet_1[144]<=1; // Valid
cpx_packet_1[139]<=(pcx_packet_d[122:118]==5'b00000) || (pcx_packet_d[122:118]==5'b10000) ? 1:0; // L2 always miss on load and ifill
cpx_packet_1[138:137]<=0; // Error
cpx_packet_1[136]<=pcx_packet_d[117] || (pcx_packet_d[122:118]==5'b00001) ? 1:0; // Non-cacheble is set on store too
cpx_packet_1[135:134]<=pcx_packet_d[113:112]; // Thread ID
if((pcx_packet_d[122:118]==5'b00000 && !pcx_packet_d[117] && !pcx_packet_d[110]) || (pcx_packet_d[122:118]==5'b10000)) // Cacheble Load or IFill
cpx_packet_1[133:131]<={othercachehit[0],wayval0};
else
cpx_packet_1[133:131]<=3'b000; // Way valid
if(pcx_packet_d[122:118]==5'b00100) // Strload
cpx_packet_1[130]<=pcx_packet_d[106]; // A
else
if(pcx_packet_d[122:118]==5'b00101) // Stream store
cpx_packet_1[130]<=pcx_packet_d[108]; // A
else
cpx_packet_1[130]<=((pcx_packet_d[122:118]==5'b10000) && pcx_req_d[4]) ? 1:0; // Four byte fill
if(pcx_packet_d[122:118]==5'b00100) // Strload
cpx_packet_1[129]<=pcx_packet_d[105]; // B
else
cpx_packet_1[129]<=pcx_atom_d || (pcx_packet_d[122:118]==5'b00110); // SWAP is single-packet but needs atom in CPX
cpx_packet_1[128]<=pcx_packet_d[110] && pcx_packet_d[122:118]==5'b00000; // Prefetch
cpx_packet_2[144]<=1; // Valid
cpx_packet_2[139]<=0; // L2 miss
cpx_packet_2[138:137]<=0; // Error
cpx_packet_2[136]<=pcx_packet_d[117] || (pcx_packet_d[122:118]==5'b00001) ? 1:0; // Non-cacheble is set on store too
cpx_packet_2[135:134]<=pcx_packet_d[113:112]; // Thread ID
if(pcx_packet_d[122:118]==5'b10000) // IFill
cpx_packet_2[133:131]<={othercachehit[1],wayval1};
else
cpx_packet_2[133:131]<=3'b000; // Way valid
cpx_packet_2[130]<=0; // Four byte fill
cpx_packet_2[129]<=pcx_atom_d || (pcx_packet_d[122:118]==5'b00110) || ((pcx_packet_d[122:118]==5'b10000) && !pcx_req_d[4]);
cpx_packet_2[128]<=0; // Prefetch
wb_strobe<=0;
wb_sel<=8'b0;
wb_addr<=64'b0;
wb_data_o<=64'b0;
wb_we<=0;
case(pcx_packet_d[122:118]) // Packet type
5'b00000://Load
begin
cpx_packet_1[143:140]<=4'b0000; // Type
if(!pcx_req_d[4])
begin
cpx_packet_1[127:0]<={wb_data_i,wb_data_i};
state<=`PCX_REQ_STEP2;
end
else
case(pcx_packet_d[106:104]) //Size
3'b000://Byte
begin
case(pcx_packet_d[64+2:64])
3'b000:cpx_packet_1[127:0]<={wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56]};
3'b001:cpx_packet_1[127:0]<={wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48]};
3'b010:cpx_packet_1[127:0]<={wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40]};
3'b011:cpx_packet_1[127:0]<={wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32]};
3'b100:cpx_packet_1[127:0]<={wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24]};
3'b101:cpx_packet_1[127:0]<={wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16]};
3'b110:cpx_packet_1[127:0]<={wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8]};
3'b111:cpx_packet_1[127:0]<={wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0]};
endcase
wb_cycle<=0;
state<=`CPX_READY_1;
end
3'b001://Halfword
begin
case(pcx_packet_d[64+2:64+1])
2'b00:cpx_packet_1[127:0]<={wb_data_i[63:48],wb_data_i[63:48],wb_data_i[63:48],wb_data_i[63:48],wb_data_i[63:48],wb_data_i[63:48],wb_data_i[63:48],wb_data_i[63:48]};
2'b01:cpx_packet_1[127:0]<={wb_data_i[47:32],wb_data_i[47:32],wb_data_i[47:32],wb_data_i[47:32],wb_data_i[47:32],wb_data_i[47:32],wb_data_i[47:32],wb_data_i[47:32]};
2'b10:cpx_packet_1[127:0]<={wb_data_i[31:16],wb_data_i[31:16],wb_data_i[31:16],wb_data_i[31:16],wb_data_i[31:16],wb_data_i[31:16],wb_data_i[31:16],wb_data_i[31:16]};
2'b11:cpx_packet_1[127:0]<={wb_data_i[15: 0],wb_data_i[15: 0],wb_data_i[15: 0],wb_data_i[15: 0],wb_data_i[15: 0],wb_data_i[15: 0],wb_data_i[15: 0],wb_data_i[15: 0]};
endcase
wb_cycle<=0;
state<=`CPX_READY_1;
end
3'b010://Word
begin
if(pcx_packet_d[64+2]==0)
cpx_packet_1[127:0]<={wb_data_i[63:32],wb_data_i[63:32],wb_data_i[63:32],wb_data_i[63:32]};
else
cpx_packet_1[127:0]<={wb_data_i[31:0],wb_data_i[31:0],wb_data_i[31:0],wb_data_i[31:0]};
wb_cycle<=0;
state<=`CPX_READY_1;
end
3'b011://Doubleword
begin
cpx_packet_1[127:0]<={wb_data_i,wb_data_i};
wb_cycle<=0;
state<=`CPX_READY_1;
end
3'b100://Quadword
begin
cpx_packet_1[127:0]<={wb_data_i,wb_data_i};
wb_cycle<=0;
state<=`CPX_READY_1; // 16 byte access to PROM should just duplicate the data
end
3'b111://Cacheline
begin
cpx_packet_1[127:0]<={wb_data_i,wb_data_i};
wb_cycle<=0;
state<=`CPX_READY_1; // 16 byte access to PROM should just duplicate the data
end
default:
begin
cpx_packet_1[127:0]<={wb_data_i,wb_data_i};
wb_cycle<=0;
state<=`PCX_UNKNOWN;
end
endcase
end
5'b00001://Store
begin
cpx_packet_1[143:140]<=4'b0100; // Type
cpx_packet_1[127:0]<={2'b0,pcx_packet_d[109]/*BIS*/,2'b0,pcx_packet_d[64+5:64+4],3'b0,pcx_packet_d[64+11:64+6],inval_vect0};
// if((pcx_packet_d[110:109]==2'b01) && (pcx_packet_d[64+5:64]==0) && !inval_vect0[3] && !inval_vect1[3]) // Block init store
// state<=`PCX_BIS;
// else
// begin
wb_cycle<=0;
state<=`CPX_READY_1;
// end
end
5'b00010://CAS
begin
cpx_packet_1[143:140]<=4'b0000; // Load return for first packet
cpx_packet_2[143:140]<=4'b0100; // Store ACK for second packet
cpx_packet_2[127:0]<={5'b0,pcx_packet_d[64+5:64+4],3'b0,pcx_packet_d[64+11:64+6],inval_vect0};
cpx_packet_1[127:0]<={wb_data_i,wb_data_i};
state<=`PCX_REQ_STEP2;
end
5'b00100://STRLOAD
begin
cpx_packet_1[143:140]<=4'b0010; // Type
cpx_packet_1[127:0]<={wb_data_i,wb_data_i};
state<=`PCX_REQ_STEP2;
end
5'b00101://STRSTORE
begin
cpx_packet_1[143:140]<=4'b0110; // Type
cpx_packet_1[127:0]<={5'b0,pcx_packet_d[64+5:64+4],3'b0,pcx_packet_d[64+11:64+6],inval_vect0};
wb_cycle<=0;
state<=`CPX_READY_1;
end
5'b00110://SWAP/LDSTUB
begin
cpx_packet_1[143:140]<=4'b0000; // Load return for first packet
cpx_packet_2[143:140]<=4'b0100; // Store ACK for second packet
cpx_packet_2[127:0]<={5'b0,pcx_packet_d[64+5:64+4],3'b0,pcx_packet_d[64+11:64+6],inval_vect0};
cpx_packet_1[127:0]<={wb_data_i,wb_data_i};
state<=`PCX_REQ_STEP2;
end
5'b10000://IFILL
begin
cpx_packet_1[143:140]<=4'b0001; // Type
cpx_packet_2[143:140]<=4'b0001; // Type
if(pcx_req_d[4]) // I/O access
begin
if(pcx_packet_d[64+2]==0)
cpx_packet_1[127:0]<={wb_data_i[63:32],wb_data_i[63:32],wb_data_i[63:32],wb_data_i[63:32]};
else
cpx_packet_1[127:0]<={wb_data_i[31:0],wb_data_i[31:0],wb_data_i[31:0],wb_data_i[31:0]};
state<=`CPX_READY_1;
wb_cycle<=0;
end
else
begin
cpx_packet_1[127:0]<={wb_data_i,wb_data_i};
state<=`PCX_REQ_STEP2;
end
end
default:
begin
wb_cycle<=0;
state<=`PCX_UNKNOWN;
end
endcase
end
end
`PCX_REQ_STEP2: // IFill, Load/strload, CAS, SWAP, LDSTUB - alwas load
begin
wb_strobe<=1'b1;
if(pcx_packet_d[122:118]==5'b10000)
wb_addr<={pcx_req_d,19'b0,pcx_packet_d[103:64+5],5'b01000};
else
wb_addr<={pcx_req_d,19'b0,pcx_packet_d[103:64+4],4'b1000};
wb_sel<=8'b11111111; // It is always full width for subsequent IFill and load accesses
state<=`PCX_REQ_STEP2_1;
end
`PCX_REQ_STEP2_1:
if(wb_ack==1)
begin
wb_strobe<=0;
wb_sel<=8'b0;
wb_addr<=64'b0;
wb_data_o<=64'b0;
wb_we<=0;
cpx_packet_1[63:0]<=wb_data_i;
if((pcx_packet_d[122:118]!=5'b00000) && (pcx_packet_d[122:118]!=5'b00100))
if(pcx_packet_d[122:118]!=5'b00010) // IFill, SWAP
state<=`PCX_REQ_STEP3;
else
state<=`PCX_REQ_CAS_COMPARE; // CAS
else
begin
wb_cycle<=0;
state<=`CPX_READY_1;
end
end
`PCX_REQ_CAS_COMPARE:
begin
cpx_two_packet<=1;
if(pcx_packet_d[106:104]==3'b010) // 32-bit
case(pcx_packet_d[64+3:64+2])
2'b00:state<=cpx_packet_1[127:96]==pcx_packet_d[63:32] ? `PCX_REQ_STEP3:`CPX_READY_1;
2'b01:state<=cpx_packet_1[95:64]==pcx_packet_d[63:32] ? `PCX_REQ_STEP3:`CPX_READY_1;
2'b10:state<=cpx_packet_1[63:32]==pcx_packet_d[63:32] ? `PCX_REQ_STEP3:`CPX_READY_1;
2'b11:state<=cpx_packet_1[31:0]==pcx_packet_d[63:32] ? `PCX_REQ_STEP3:`CPX_READY_1;
endcase
else
if(pcx_packet_d[64+3]==0)
state<=cpx_packet_1[127:64]==pcx_packet_d[63:0] ? `PCX_REQ_STEP3:`CPX_READY_1;
else
state<=cpx_packet_1[63:0]==pcx_packet_d[63:0] ? `PCX_REQ_STEP3:`CPX_READY_1;
end
`PCX_REQ_STEP3: // 256-bit IFILL; CAS, SWAP and LDSTUB store
begin
if(pcx_packet_d[122:118]==5'b10000)
wb_addr<={pcx_req_d,19'b0,pcx_packet_d[103:64+5],5'b10000};
else
wb_addr<={pcx_req_d,19'b0,pcx_packet_d[103:64+3],3'b000}; // CAS or SWAP save
cpx_two_packet<=1;
if(pcx_packet_d[122:118]==5'b10000)
wb_we<=0;
else
wb_we<=1;
wb_strobe<=1'b1;
if(pcx_packet_d[122:118]==5'b00010) // CAS
if(pcx_packet_d[106:104]==3'b010)
wb_sel<=(pcx_packet_d[64+2]==0) ? 8'b11110000:8'b00001111;
else
wb_sel<=8'b11111111; //CASX
else
if(pcx_packet_d[122:118]==5'b00110) //SWAP or LDSTUB
if(pcx_packet_d[106:104]==3'b000) //LDSTUB
case(pcx_packet_d[64+2:64])
3'b000:wb_sel<=8'b10000000;
3'b001:wb_sel<=8'b01000000;
3'b010:wb_sel<=8'b00100000;
3'b011:wb_sel<=8'b00010000;
3'b100:wb_sel<=8'b00001000;
3'b101:wb_sel<=8'b00000100;
3'b110:wb_sel<=8'b00000010;
3'b111:wb_sel<=8'b00000001;
endcase
else
wb_sel<=(pcx_packet_d[64+2]==0) ? 8'b11110000:8'b00001111; ///SWAP is always 32-bit
else
wb_sel<=8'b11111111; // It is always full width for subsequent IFill accesses
if(pcx_packet_d[122:118]==5'b00110) //SWAP or LDSTUB
wb_data_o<={pcx_packet_d[63:32],pcx_packet_d[63:32]};
// wb_data_o<=pcx_packet_d[63:0];
else
wb_data_o<=pcx_packet_2nd[63:0]; // CAS store second packet data
// if(pcx_packet_d[106:104]==3'b010)
// wb_data_o<={pcx_packet_2nd[63:32],pcx_packet_2nd[63:32]}; // CAS store second packet data
// else
// wb_data_o<=pcx_packet_2nd[63:0];
state<=`PCX_REQ_STEP3_1;
end
`PCX_REQ_STEP3_1:
if(wb_ack==1)
begin
wb_strobe<=0;
wb_sel<=8'b0;
wb_addr<=64'b0;
wb_we<=0;
wb_data_o<=64'b0;
if(pcx_packet_d[122:118]==5'b10000) // IFill
begin
cpx_packet_2[127:64]<=wb_data_i;
state<=`PCX_REQ_STEP4;
end
else
begin
wb_cycle<=0;
state<=`CPX_READY_1;
end
end
`PCX_REQ_STEP4: // 256-bit IFILL only
begin
wb_strobe<=1'b1;
wb_addr<={pcx_req_d,19'b0,pcx_packet_d[103:64+5],5'b11000};
wb_sel<=8'b11111111; // It is always full width for subsequent accesses
state<=`PCX_REQ_STEP4_1;
end
`PCX_REQ_STEP4_1:
if(wb_ack==1)
begin
wb_cycle<=0;
wb_strobe<=0;
wb_sel<=8'b0;
wb_addr<=64'b0;
wb_we<=0;
cpx_packet_2[63:0]<=wb_data_i;
state<=`CPX_READY_1;
end
`PCX_BIS: // Block init store
begin
wb_strobe<=1'b1;
wb_we<=1;
wb_addr<={pcx_req_d,19'b0,pcx_packet_d[103:64+6],6'b001000};
wb_sel<=8'b11111111;
wb_data_o<=64'b0;
state<=`PCX_BIS_1;
end
`PCX_BIS_1:
if(wb_ack)
begin
wb_strobe<=0;
if(wb_addr[39:0]<(pcx_packet_d[64+39:64]+8*7))
state<=`PCX_BIS_2;
else
begin
wb_cycle<=0;
wb_sel<=0;
wb_we<=0;
wb_addr<=64'b0;
state<=`CPX_READY_1;
end
end
`PCX_BIS_2:
begin
wb_strobe<=1'b1;
wb_addr[5:0]<=wb_addr[5:0]+8;
state<=`PCX_BIS_1;
end
`PCX_FP_1:
begin
fp_pcx<=pcx_packet_d;
fp_req<=1;
state<=`PCX_FP_2;
if(`DEBUGGING)
begin
wb_addr<=pcx_packet_d[103:64];
wb_data_o<=pcx_packet_d[63:0];
wb_sel<=8'h22;
end
end
`PCX_FP_2:
begin
fp_pcx<=pcx_packet_2nd;
state<=`FP_WAIT;
if(`DEBUGGING)
begin
wb_addr<=pcx_packet_2nd[103:64];
wb_data_o<=pcx_packet_d[63:0];
wb_sel<=8'h23;
end
end
`FP_WAIT:
begin
fp_pcx<=124'b0;
fp_req<=0;
if(fp_rdy)
state<=`CPX_FP;
if(`DEBUGGING)
wb_sel<=8'h24;
end
`CPX_FP:
if(fp_cpx[144]) // Packet valid
begin
cpx_packet_1<=fp_cpx;
state<=`CPX_READY_1;
if(`DEBUGGING)
begin
wb_addr<=fp_cpx[63:0];
wb_data_o<=fp_cpx[127:64];
end
end
else
if(!fp_rdy)
state<=`FP_WAIT; // Else wait for another one if it is not here still
`CPX_SEND_ETH_IRQ:
begin
cpx_packet_1<=145'h1_7_000_000000000000001D_000000000000_001D;
eth_int_sent<=0;
state<=`CPX_READY_1;
end
`CPX_INT_VEC_DIS:
begin
if(pcx_packet_d[12:10]==3'b000)
cpx_two_packet<=1; // Send interrupt only if it is for this core
cpx_packet_1[144:140]<=5'b10100;
cpx_packet_1[139:137]<=0;
cpx_packet_1[136]<=1;
cpx_packet_1[135:134]<=pcx_packet_d[113:112]; // Thread ID
cpx_packet_1[133:130]<=0;
cpx_packet_1[129]<=pcx_atom_d;
cpx_packet_1[128]<=0;
cpx_packet_1[127:0]<={5'b0,pcx_packet_d[64+5:64+4],3'b0,pcx_packet_d[64+11:64+6],112'b0};
cpx_packet_2<={9'h170,54'h0,pcx_packet_d[17:0],46'h0,pcx_packet_d[17:0]};
state<=`CPX_READY_1;
end
`CPX_READY_1:
begin
cpx_ready<=1;
cpx_packet<=cpx_packet_1;
cnt<=cnt+1;
if(`DEBUGGING)
if(multi_hit || multi_hit1)
wb_sel<=8'h11;
if(!cpx_two_packet)
state<=`PCX_IDLE;
else
//if(cnt==4'b1111 || pcx_packet_d[103:64]!=40'h9800000800)
state<=`CPX_READY_2;
end
`CPX_READY_2:
begin
cpx_ready<=1;
cpx_packet<=cpx_packet_2;
state<=`PCX_IDLE;
end
`PCX_UNKNOWN:
begin
wb_sel<=8'b10100101; // Illegal eye-catching value for debugging
state<=`PCX_IDLE;
end
endcase
 
l1dir l1dir_inst(
.clk(clk),
.reset(!rstn),
.cpu(0), // Issuing CPU number
.strobe(state==`GOT_PCX_REQ),
.way(pcx_packet[108:107]), // Way to allocate for allocating loads
.address(pcx_packet[64+39:64]),
.load(pcx_packet[122:118]==5'b00000),
.ifill(pcx_packet[122:118]==5'b10000),
.store(pcx_packet[122:118]==5'b00001),
.cas(pcx_packet[122:118]==5'b00010),
.swap(pcx_packet[122:118]==5'b00110),
.strload(pcx_packet[122:118]==5'b00100),
.strstore(pcx_packet[122:118]==5'b00101),
.cacheable((!pcx_packet[117]) && (!pcx_req_d[4])),
.prefetch(pcx_packet[110]),
.invalidate(pcx_packet[111]),
.blockstore(pcx_packet[109] | pcx_packet[110]),
.inval_vect0(inval_vect0), // Invalidation vector
.inval_vect1(inval_vect1),
.othercachehit(othercachehit), // Other cache hit in the same CPU, wayval0/wayval1
.othercpuhit(othercpuhit), // Any cache hit in the other CPU, wayval0/wayval1
.wayval0(wayval0), // Way valid
.wayval1(wayval1), // Second way valid for ifill
.ready(ready), // Directory init done
);
 
endmodule
/os2wb/l1idir.v
0,0 → 1,178
module l1idir(
input clk,
input reset,
input [ 6:0] index,
input [ 1:0] way,
input [27:0] tag,
input strobe,
input query,
input allocate, //tag->{way,index}
input deallocate, //if({way,index}==tag) {way,index}<-FFFFFF
input invalidate, //all ways
output reg [2:0] hit,
output reg ready // directory init completed
);
 
`define INVAL_TAG 28'h8000000
 
reg [27:0] tag_d;
reg [ 6:0] addr;
reg [ 3:0] we;
reg [ 3:0] re;
reg [28:0] di;
 
wire [28:0] do0;
wire [28:0] do1;
wire [28:0] do2;
wire [28:0] do3;
reg query_d;
reg deallocate_d;
reg query_d1;
reg deallocate_d1;
 
always @(posedge clk)
if(strobe)
if(query || deallocate)
begin
tag_d<=tag;
end
 
always @(posedge clk)
begin
query_d<=query && strobe;
deallocate_d<=deallocate && strobe;
query_d1<=query_d;
deallocate_d1<=deallocate_d;
end
cachedir icache01 (
.clock(clk),
.enable(we[0] || re[0] || we[1] || re[1]),
.wren_a(we[0]),
.address_a({1'b0,addr}),
.data_a(di),
.q_a(do0),
.wren_b(we[1]),
.address_b({1'b1,addr}),
.data_b(di),
.q_b(do1)
);
cachedir icache23 (
.clock(clk),
.enable(we[2] || re[2] || we[3] || re[3]),
.wren_a(we[2]),
.address_a({1'b0,addr}),
.data_a(di),
.q_a(do2),
.wren_b(we[3]),
.address_b({1'b1,addr}),
.data_b(di),
.q_b(do3)
);
 
wire [3:0] hitvect={(do3[28:1]==tag_d),(do2[28:1]==tag_d),(do1[28:1]==tag_d),(do0[28:1]==tag_d)};
 
`define L1IDIR_RESET 3'b000
`define L1IDIR_INIT 3'b001
`define L1IDIR_IDLE 3'b010
`define L1IDIR_READ 3'b011
`define L1IDIR_DEALLOC 3'b100
 
reg [2:0] state;
 
always @(posedge clk or posedge reset)
if(reset)
begin
state<=`L1IDIR_RESET;
ready<=0;
end
else
case(state)
`L1IDIR_RESET:
begin
addr<=7'b0;
di<={`INVAL_TAG,1'b0};
we<=4'b1111;
state<=`L1IDIR_INIT;
end
`L1IDIR_INIT:
begin
addr<=addr+1;
if(addr==7'b1111111)
begin
we<=4'b0;
ready<=1;
state<=`L1IDIR_IDLE;
end
end
`L1IDIR_IDLE:
if(strobe)
if(invalidate)
begin
we<=4'b1111;
addr<=index;
di<={`INVAL_TAG,1'b0};
end
else
if(allocate)
begin
case(way)
2'b00:we<=4'b0001;
2'b01:we<=4'b0010;
2'b10:we<=4'b0100;
2'b11:we<=4'b1000;
endcase
addr<=index;
di<={tag,1'b0};
end
else
if(deallocate)
begin
re<=4'b1111;
we<=0;
addr<=index;
state<=`L1IDIR_READ;
end
else
if(query)
begin
addr<=index;
re<=4'b1111;
we<=0;
end
else
begin
we<=0;
re<=0;
end
`L1IDIR_READ:
state<=`L1IDIR_DEALLOC;
`L1IDIR_DEALLOC:
begin
re<=0;
di<={`INVAL_TAG,1'b0};
we<=hitvect;
state<=`L1IDIR_IDLE;
end
endcase
 
always @(posedge clk)
if(query_d1 || deallocate_d1)
case(hitvect)
4'b0001:hit<=3'b100;
4'b0010:hit<=3'b101;
4'b0100:hit<=3'b110;
4'b1000:hit<=3'b111;
default:hit<=3'b000; // Hits will be ORed then
endcase
else
if(strobe)
hit<=3'b000;
endmodule

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.