URL
https://opencores.org/ocsvn/sparc64soc/sparc64soc/trunk
Subversion Repositories sparc64soc
Compare Revisions
- This comparison shows the changes necessary to convert path
/sparc64soc/trunk
- from Rev 4 to Rev 5
- ↔ Reverse comparison
Rev 4 → Rev 5
/os2wb/l1dir.v
0,0 → 1,249
module l1dir( |
input clk, |
input reset, |
|
input cpu, // Issuing CPU number |
input strobe, // Start transaction |
input [ 1:0] way, // Way to allocate for allocating loads |
input [39:0] address, |
input load, |
input ifill, |
input store, |
input cas, |
input swap, |
input strload, |
input strstore, |
input cacheable, |
input prefetch, |
input invalidate, |
input blockstore, |
|
output [111:0] inval_vect0, // Invalidation vector |
output [111:0] inval_vect1, |
output [ 1:0] othercachehit, // Other cache hit in the same CPU, wayval0/wayval1 |
output [ 1:0] othercpuhit, // Any cache hit in the other CPU, wayval0/wayval1 |
output [ 1:0] wayval0, // Way valid |
output [ 1:0] wayval1, // Second way valid for ifill |
output ready // Directory init done |
); |
|
wire [3:0] rdy; |
wire dquery0=(!cpu) && store && (!blockstore); |
wire dquery1= cpu && store && (!blockstore); |
wire dalloc0=(!cpu) && cacheable && (!invalidate) && load && (!prefetch); |
wire dalloc1= cpu && cacheable && (!invalidate) && load && (!prefetch); |
wire ddealloc0=((!cpu) && (ifill || cas || swap || strstore || (store && blockstore))) || |
( cpu && ((load && cacheable) || ifill || store || cas || swap || strload || strstore)); |
wire ddealloc1=( cpu && (ifill || cas || swap || strstore || (store && blockstore))) || |
((!cpu) && ((load && cacheable) || ifill || store || cas || swap || strload || strstore)); |
|
wire iquery0=0; |
wire iquery1=0; |
wire ialloc0=(!cpu) && cacheable && (!invalidate) && ifill; |
wire ialloc1= cpu && cacheable && (!invalidate) && ifill; |
wire idealloc0=((!cpu) && ((load && cacheable && (!prefetch) && (!invalidate)) || store || cas || swap || strstore)) || |
( cpu && ((load && cacheable && (!prefetch) && (!invalidate)) || ifill || store || cas || swap || strload || strstore)); |
wire idealloc1=( cpu && ((load && cacheable && (!prefetch) && (!invalidate)) || store || cas || swap || strstore )) || |
((!cpu) && ((load && cacheable && (!prefetch) && (!invalidate)) || ifill || store || cas || swap || strload || strstore)); |
|
|
wire [2:0] cpu0_dhit0; |
wire [2:0] cpu0_dhit1; |
wire [2:0] cpu1_dhit0; |
wire [2:0] cpu1_dhit1; |
wire [2:0] cpu0_ihit; |
wire [2:0] cpu1_ihit; |
wire invalidate_d=invalidate && load; |
wire invalidate_i=invalidate && ifill; |
|
reg ifill_d; |
reg load_d; |
reg cacheable_d; |
reg cpu_d; |
reg [39:0] address_d; |
reg strobe_d; |
reg strobe_d1; |
reg strobe_d2; |
|
always @(posedge clk) |
begin |
strobe_d<=strobe; |
strobe_d1<=strobe_d; |
strobe_d2<=strobe_d1; |
end |
|
always @(posedge clk) |
if(strobe) |
begin |
ifill_d<=ifill; |
load_d<=load; |
cacheable_d<=cacheable; |
cpu_d<=cpu; |
address_d<=address; |
end |
|
l1ddir cpu0_ddir( |
.clk(clk), |
.reset(reset), |
|
.index(address[10:4]), |
.way(way), |
.tag(address[39:11]), |
.strobe(strobe), |
.query(dquery0), |
.allocate(dalloc0), |
.deallocate(ddealloc0), |
.dualdealloc(ifill), |
.invalidate(invalidate_d && !cpu), |
|
.hit0(cpu0_dhit0), |
.hit1(cpu0_dhit1), |
|
.ready(rdy[0]) |
); |
|
l1ddir cpu1_ddir( |
.clk(clk), |
.reset(reset), |
|
.index(address[10:4]), |
.way(way), |
.tag(address[39:11]), |
.strobe(strobe), |
.query(dquery1), |
.allocate(dalloc1), |
.deallocate(ddealloc1), |
.dualdealloc(ifill), |
.invalidate(invalidate_d && cpu), |
|
.hit0(cpu1_dhit0), |
.hit1(cpu1_dhit1), |
|
.ready(rdy[1]) |
); |
|
l1idir cpu0_idir( |
.clk(clk), |
.reset(reset), |
|
.index(address[11:5]), |
.way(way), |
.tag(address[39:12]), |
.strobe(strobe), |
.query(iquery0), |
.allocate(ialloc0), |
.deallocate(idealloc0), |
.invalidate(invalidate_i && !cpu), |
|
.hit(cpu0_ihit), |
|
.ready(rdy[2]) |
); |
|
l1idir cpu1_idir( |
.clk(clk), |
.reset(reset), |
|
.index(address[11:5]), |
.way(way), |
.tag(address[39:12]), |
.strobe(strobe), |
.query(iquery1), |
.allocate(ialloc1), |
.deallocate(idealloc1), |
.invalidate(invalidate_i && cpu), |
|
.hit(cpu1_ihit), |
|
.ready(rdy[3]) |
); |
|
assign ready=(!rdy[0] | !rdy[1] | !rdy[2] | !rdy[3]) ? 0:1; |
assign inval_vect0[3:0]={wayval0,cpu0_ihit[2] && (!address_d[5]),cpu0_dhit0[2] && (address_d[5:4]==2'b00)}; |
assign inval_vect0[7:4]={wayval0,cpu1_ihit[2] && (!address_d[5]),cpu1_dhit0[2] && (address_d[5:4]==2'b00)}; |
assign inval_vect0[31:8]=0; |
assign inval_vect0[34:32]={wayval0,cpu0_dhit0[2] && (address_d[5:4]==2'b01)}; |
assign inval_vect0[37:35]={wayval0,cpu1_dhit0[2] && (address_d[5:4]==2'b01)}; |
assign inval_vect0[55:38]=0; |
assign inval_vect0[59:56]={wayval0,cpu0_ihit[2] && address_d[5],cpu0_dhit0[2] && (address_d[5:4]==2'b10)}; |
assign inval_vect0[63:60]={wayval0,cpu1_ihit[2] && address_d[5],cpu1_dhit0[2] && (address_d[5:4]==2'b10)}; |
assign inval_vect0[87:64]=0; |
assign inval_vect0[90:88]={wayval0,cpu0_dhit0[2] && (address_d[5:4]==2'b11)}; |
assign inval_vect0[93:91]={wayval0,cpu1_dhit0[2] && (address_d[5:4]==2'b11)}; |
assign inval_vect0[111:94]=0; |
|
assign inval_vect1[3:0]={wayval1,cpu0_dhit1[2] && (address_d[5:4]==2'b00)}; |
assign inval_vect1[7:4]={wayval1,cpu1_dhit1[2] && (address_d[5:4]==2'b00)}; |
assign inval_vect1[31:8]=0; |
assign inval_vect1[34:32]={wayval1,cpu0_dhit1[2] && (address_d[5:4]==2'b01)}; |
assign inval_vect1[37:35]={wayval1,cpu1_dhit1[2] && (address_d[5:4]==2'b01)}; |
assign inval_vect1[55:38]=0; |
assign inval_vect1[59:56]={wayval1,cpu0_dhit1[2] && (address_d[5:4]==2'b10)}; |
assign inval_vect1[63:60]={wayval1,cpu1_dhit1[2] && (address_d[5:4]==2'b10)}; |
assign inval_vect1[87:64]=0; |
assign inval_vect1[90:88]={wayval1,cpu0_dhit1[2] && (address_d[5:4]==2'b11)}; |
assign inval_vect1[93:91]={wayval1,cpu1_dhit1[2] && (address_d[5:4]==2'b11)}; |
assign inval_vect1[111:94]=0; |
|
assign wayval0=cpu0_dhit0[1:0] | cpu1_dhit0[1:0] | cpu0_ihit[1:0] | cpu1_ihit[1:0]; |
assign wayval1=cpu0_dhit1[1:0] | cpu1_dhit1[1:0]; |
assign othercachehit[0]=((!cpu_d) && ifill_d && cpu0_dhit0[2]) || |
( cpu_d && ifill_d && cpu1_dhit0[2]) || |
((!cpu_d) && load_d && cacheable_d && cpu0_ihit[2]) || |
( cpu_d && load_d && cacheable_d && cpu1_ihit[2]); |
assign othercachehit[1]=((!cpu_d) && ifill_d && cpu0_dhit1[2]) || |
( cpu_d && ifill_d && cpu1_dhit1[2]); |
assign othercpuhit[0]=((!cpu_d) && (cpu1_dhit0[2] || cpu1_ihit[2])) || |
( cpu_d && (cpu0_dhit0[2] || cpu0_ihit[2])); |
assign othercpuhit[1]=((!cpu_d) && ifill_d && cpu1_dhit1[2]) || |
( cpu_d && ifill_d && cpu0_dhit1[2]); |
|
wire [149:0] ILA_DATA; |
|
st2 st2_inst( |
.acq_clk(clk), |
.acq_data_in(ILA_DATA), |
.acq_trigger_in(ILA_DATA), |
.storage_enable(strobe || strobe_d || strobe_d1 || strobe_d2) |
); |
|
assign ILA_DATA[39:0]=address; |
assign ILA_DATA[41:40]=way; |
assign ILA_DATA[42]=strobe; |
assign ILA_DATA[43]=load; |
assign ILA_DATA[44]=ifill; |
assign ILA_DATA[45]=store; |
assign ILA_DATA[46]=cas; |
assign ILA_DATA[47]=swap; |
assign ILA_DATA[48]=strload; |
assign ILA_DATA[49]=strstore; |
assign ILA_DATA[50]=cacheable; |
assign ILA_DATA[51]=prefetch; |
assign ILA_DATA[52]=invalidate; |
assign ILA_DATA[53]=blockstore; |
assign ILA_DATA[55:54]=othercachehit; |
assign ILA_DATA[57:56]=othercpuhit; |
assign ILA_DATA[59:58]=wayval0; |
assign ILA_DATA[61:60]=wayval1; |
assign ILA_DATA[69:62]=inval_vect0[7:0]; |
assign ILA_DATA[75:70]=inval_vect0[37:32]; |
assign ILA_DATA[83:76]=inval_vect0[63:56]; |
assign ILA_DATA[89:84]=inval_vect0[93:88]; |
assign ILA_DATA[97:90]=inval_vect1[7:0]; |
assign ILA_DATA[103:98]=inval_vect1[37:32]; |
assign ILA_DATA[111:104]=inval_vect1[63:56]; |
assign ILA_DATA[117:112]=inval_vect1[93:88]; |
assign ILA_DATA[118]=dquery0; |
assign ILA_DATA[119]=dquery1; |
assign ILA_DATA[120]=dalloc0; |
assign ILA_DATA[121]=dalloc1; |
assign ILA_DATA[122]=ddealloc0; |
assign ILA_DATA[123]=ddealloc1; |
assign ILA_DATA[124]=iquery0; |
assign ILA_DATA[125]=iquery1; |
assign ILA_DATA[126]=ialloc0; |
assign ILA_DATA[127]=ialloc1; |
assign ILA_DATA[128]=idealloc0; |
assign ILA_DATA[129]=idealloc1; |
|
endmodule |
/os2wb/l1ddir.v
0,0 → 1,250
module l1ddir( |
input clk, |
input reset, |
|
input [ 6:0] index, |
input [ 1:0] way, |
input [28:0] tag, |
input strobe, |
input query, |
input allocate, //tag->{way,index} |
input deallocate, //if({way,index}==tag) {way,index}<-FFFFFF |
input dualdealloc, |
input invalidate, //all ways |
|
output reg [2:0] hit0, |
output reg [2:0] hit1, |
|
output reg ready // directory init completed |
); |
|
`define INVAL_TAG 29'h10000000 |
|
reg [28:0] tag_d; |
reg [ 6:0] addr0; |
reg [ 5:0] addr1; |
reg [ 3:0] we0; |
reg [ 3:0] we1; |
reg [ 3:0] re; |
reg [28:0] di; |
reg dualdealloc_d; |
wire [28:0] do0_0; |
wire [28:0] do1_0; |
wire [28:0] do2_0; |
wire [28:0] do3_0; |
wire [28:0] do0_1; |
wire [28:0] do1_1; |
wire [28:0] do2_1; |
wire [28:0] do3_1; |
reg query_d; |
reg deallocate_d; |
reg query_d1; |
reg deallocate_d1; |
|
always @(posedge clk) |
if(strobe) |
if(query || deallocate) |
begin |
tag_d<=tag; |
dualdealloc_d<=dualdealloc; |
end |
|
always @(posedge clk) |
begin |
query_d<=query && strobe; |
deallocate_d<=deallocate && strobe; |
query_d1<=query_d; |
deallocate_d1<=deallocate_d; |
end |
|
cachedir dcache0 ( |
.clock(clk), |
.enable(we0[0] || we1[0] || re[0]), |
.wren_a(we0[0]), |
.address_a({1'b0,addr0}), |
.data_a(di), |
.q_a(do0_0), |
|
.wren_b(we1[0]), |
.address_b({1'b0,addr1,1'b1}), |
.data_b(`INVAL_TAG), |
.q_b(do0_1) |
); |
|
cachedir dcache1 ( |
.clock(clk), |
.enable(we0[1] || we1[1] || re[1]), |
.wren_a(we0[1]), |
.address_a({1'b0,addr0}), |
.data_a(di), |
.q_a(do1_0), |
|
.wren_b(we1[1]), |
.address_b({1'b0,addr1,1'b1}), |
.data_b(`INVAL_TAG), |
.q_b(do1_1) |
); |
|
cachedir dcache2 ( |
.clock(clk), |
.enable(we0[2] || we1[2] || re[2]), |
.wren_a(we0[2]), |
.address_a({1'b0,addr0}), |
.data_a(di), |
.q_a(do2_0), |
|
.wren_b(we1[2]), |
.address_b({1'b0,addr1,1'b1}), |
.data_b(`INVAL_TAG), |
.q_b(do2_1) |
); |
|
cachedir dcache3 ( |
.clock(clk), |
.enable(we0[3] || we1[3] || re[3]), |
.wren_a(we0[3]), |
.address_a({1'b0,addr0}), |
.data_a(di), |
.q_a(do3_0), |
|
.wren_b(we1[3]), |
.address_b({1'b0,addr1,1'b1}), |
.data_b(`INVAL_TAG), |
.q_b(do3_1) |
); |
|
wire [3:0] hitvect0={(do3_0==tag_d),(do2_0==tag_d),(do1_0==tag_d),(do0_0==tag_d)}; |
wire [3:0] hitvect1={(do3_1==tag_d),(do2_1==tag_d),(do1_1==tag_d),(do0_1==tag_d)}; |
|
`define L1DDIR_RESET 3'b000 |
`define L1DDIR_INIT 3'b001 |
`define L1DDIR_IDLE 3'b010 |
`define L1DDIR_READ 3'b011 |
`define L1DDIR_DEALLOC 3'b100 |
|
reg [2:0] state; |
|
always @(posedge clk or posedge reset) |
if(reset) |
begin |
state<=`L1DDIR_RESET; |
ready<=0; |
end |
else |
case(state) |
`L1DDIR_RESET: |
begin |
addr0<=7'b0; |
addr1<=6'b0; |
di<=`INVAL_TAG; |
we0<=4'b1111; |
we1<=4'b1111; |
state<=`L1DDIR_INIT; |
end |
`L1DDIR_INIT: |
begin |
addr0<=addr0+2; |
addr1<=addr1+1; |
if(addr0==7'b1111110) |
begin |
we0<=4'b0; |
we1<=4'b0; |
ready<=1; |
state<=`L1DDIR_IDLE; |
end |
end |
`L1DDIR_IDLE: |
if(strobe) |
if(invalidate) |
begin |
we0<=4'b1111; |
we1<=0; |
addr0<=index; |
di<=`INVAL_TAG; |
end |
else |
if(allocate) |
begin |
case(way) |
2'b00:we0<=4'b0001; |
2'b01:we0<=4'b0010; |
2'b10:we0<=4'b0100; |
2'b11:we0<=4'b1000; |
endcase |
we1<=0; |
addr0<=index; |
di<=tag; |
end |
else |
if(deallocate) |
begin |
re<=4'b1111; |
we0<=0; |
we1<=0; |
if(dualdealloc) |
begin |
addr0<={index[6:1],1'b0}; |
addr1<=index[6:1]; |
end |
else |
addr0<=index; |
state<=`L1DDIR_READ; |
end |
else |
if(query) |
begin |
addr0<=index; |
re<=4'b1111; |
we0<=0; |
we1<=0; |
end |
else |
begin |
we0<=0; |
we1<=0; |
re<=0; |
end |
`L1DDIR_READ: |
state<=`L1DDIR_DEALLOC; |
`L1DDIR_DEALLOC: |
begin |
re<=0; |
di<=`INVAL_TAG; |
we0<=hitvect0; |
if(dualdealloc_d) |
we1<=hitvect1; |
else |
we1<=0; |
state<=`L1DDIR_IDLE; |
end |
endcase |
|
always @(posedge clk) |
if(query_d1 || deallocate_d1) |
begin |
case(hitvect0) |
4'b0001:hit0<=3'b100; |
4'b0010:hit0<=3'b101; |
4'b0100:hit0<=3'b110; |
4'b1000:hit0<=3'b111; |
default:hit0<=3'b000; // Hits will be ORed then |
endcase |
if(dualdealloc_d && deallocate_d1) |
case(hitvect1) |
4'b0001:hit1<=3'b100; |
4'b0010:hit1<=3'b101; |
4'b0100:hit1<=3'b110; |
4'b1000:hit1<=3'b111; |
default:hit1<=3'b000; |
endcase |
else |
hit1<=3'b000; |
end |
else |
if(strobe) |
begin |
hit0<=3'b000; |
hit1<=3'b000; |
end |
|
endmodule |
/os2wb/os2wb_dual.v
0,0 → 1,1003
`timescale 1ns / 1ps |
////////////////////////////////////////////////////////////////////////////////// |
// Company: (C) Athree, 2009 |
// Engineer: Dmitry Rozhdestvenskiy |
// Email dmitry.rozhdestvenskiy@srisc.com dmitryr@a3.spb.ru divx4log@narod.ru |
// |
// Design Name: Bridge from SPARC Core to Wishbone Master |
// Module Name: os2wb |
// Project Name: SPARC SoC single-core |
// |
// LICENSE: |
// This is a Free Hardware Design; you can redistribute it and/or |
// modify it under the terms of the GNU General Public License |
// version 2 as published by the Free Software Foundation. |
// The above named program is distributed in the hope that it will |
// be useful, but WITHOUT ANY WARRANTY; without even the implied |
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
// See the GNU General Public License for more details. |
// |
////////////////////////////////////////////////////////////////////////////////// |
module os2wb_dual( |
input clk, |
input rstn, |
|
// Core interface |
input [ 4:0] pcx_req, |
input pcx_atom, |
input [123:0] pcx_data, |
output reg [ 4:0] pcx_grant, |
output reg cpx_ready, |
output reg [144:0] cpx_packet, |
|
// Wishbone master interface |
input [ 63:0] wb_data_i, |
input wb_ack, |
output reg wb_cycle, |
output reg wb_strobe, |
output reg wb_we, |
output reg [ 7:0] wb_sel, |
output reg [ 63:0] wb_addr, |
output reg [ 63:0] wb_data_o, |
|
// FPU interface |
output reg [123:0] fp_pcx, |
output reg fp_req, |
input [144:0] fp_cpx, |
input fp_rdy, |
|
// Ethernet interrupt, sensed on posedge, mapped to vector 'd29 |
input eth_int |
); |
|
reg [123:0] pcx_packet_d; // Latched incoming PCX packet |
reg [123:0] pcx_packet_2nd; // Second packet for atomic (CAS) |
reg [ 4:0] pcx_req_d; // Latched request |
reg pcx_atom_d; // Latched atomic flasg |
reg [ 4:0] state; // FSM state |
reg [144:0] cpx_packet_1; // First CPX packet |
reg [144:0] cpx_packet_2; // Second CPX packet (for atomics and cached IFILLs) |
reg cpx_two_packet; // CPX answer is two-packet (!=atomic, SWAP has atomic==0 and answer is two-packet) |
|
wire [111:0] inval_vect0; // Invalidate, instr/data, way |
wire [111:0] inval_vect1; // IFill may cause two D lines invalidation at a time |
|
wire [1:0] othercachehit; |
wire [1:0] othercpuhit; |
wire [1:0] wayval0; |
wire [1:0] wayval1; |
|
`define TEST_DRAM_1 5'b00000 |
`define TEST_DRAM_2 5'b00001 |
`define TEST_DRAM_3 5'b00010 |
`define TEST_DRAM_4 5'b00011 |
`define INIT_DRAM_1 5'b00100 |
`define INIT_DRAM_2 5'b00101 |
`define WAKEUP 5'b00110 |
`define PCX_IDLE 5'b00111 |
`define GOT_PCX_REQ 5'b01000 |
`define PCX_REQ_2ND 5'b01001 |
`define PCX_REQ_STEP1 5'b01010 |
`define PCX_REQ_STEP1_1 5'b01011 |
`define PCX_REQ_STEP2 5'b01100 |
`define PCX_REQ_STEP2_1 5'b01101 |
`define PCX_REQ_STEP3 5'b01110 |
`define PCX_REQ_STEP3_1 5'b01111 |
`define PCX_REQ_STEP4 5'b10000 |
`define PCX_REQ_STEP4_1 5'b10001 |
`define PCX_BIS 5'b10010 |
`define PCX_BIS_1 5'b10011 |
`define PCX_BIS_2 5'b10100 |
`define CPX_READY_1 5'b10101 |
`define CPX_READY_2 5'b10110 |
`define PCX_REQ_STEP1_2 5'b10111 |
`define PCX_UNKNOWN 5'b11000 |
`define PCX_FP_1 5'b11001 |
`define PCX_FP_2 5'b11010 |
`define FP_WAIT 5'b11011 |
`define CPX_FP 5'b11100 |
`define CPX_SEND_ETH_IRQ 5'b11101 |
`define CPX_INT_VEC_DIS 5'b11110 |
`define PCX_REQ_CAS_COMPARE 5'b11111 |
|
`define MEM_SIZE 64'h00000000_10000000 |
|
`define TEST_DRAM 1 |
`define DEBUGGING 1 |
|
reg cache_init; |
wire [3:0] dcache0_hit; |
wire [3:0] dcache1_hit; |
wire [3:0] icache_hit; |
reg multi_hit; |
reg multi_hit1; |
reg eth_int_d; |
reg eth_int_send; |
reg eth_int_sent; |
reg [3:0] cnt; |
|
// PCX channel FIFO |
wire [129:0] pcx_data_fifo; |
wire pcx_fifo_empty; |
reg [ 4:0] pcx_req_1; |
reg [ 4:0] pcx_req_2; |
reg pcx_atom_1; |
reg pcx_atom_2; |
reg pcx_data_123_d; |
|
always @(posedge clk) |
begin |
pcx_req_1<=pcx_req; |
pcx_atom_1<=pcx_atom; |
pcx_atom_2<=pcx_atom_1; |
pcx_req_2<=pcx_atom_1 ? pcx_req_1:5'b0; |
pcx_grant<=(pcx_req_1 | pcx_req_2); |
pcx_data_123_d<=pcx_data[123]; |
end |
|
pcx_fifo pcx_fifo_inst( |
// FIFO should be first word fall-through |
// It has no full flag as the core will send only limited number of requests, |
// in original design we used it 32 words deep |
// Just make it deeper if you experience overflow - |
// you can't just send no grant on full because the core expects immediate |
// grant for at least two requests for each zone |
.aclr(!rstn), |
.clock(clk), |
.data({pcx_atom_1,pcx_req_1,pcx_data}), |
.rdreq(fifo_rd), |
.wrreq((pcx_req_1!=5'b00000 && pcx_data[123]) || (pcx_atom_2 && pcx_data_123_d)), |
// Second atomic packet for FPU may be invalid, but should be sent to FPU |
// so if the first atomic packet is valid we latch both |
.empty(pcx_fifo_empty), |
.q(pcx_data_fifo) |
); |
// -------------------------- |
|
reg wb_ack_d; |
|
always @(posedge clk or negedge rstn) |
if(!rstn) |
eth_int_send<=0; |
else |
begin |
wb_ack_d<=wb_ack; |
eth_int_d<=eth_int; |
if(eth_int && !eth_int_d) |
eth_int_send<=1; |
else |
if(eth_int_sent) |
eth_int_send<=0; |
end |
|
reg fifo_rd; |
wire [123:0] pcx_packet; |
assign pcx_packet=pcx_data_fifo[123:0]; |
|
always @(posedge clk or negedge rstn) |
if(rstn==0) |
begin |
if(`TEST_DRAM) |
state<=`TEST_DRAM_1; |
else |
state<=`INIT_DRAM_1; // DRAM initialization is mandatory! |
cpx_ready<=0; |
fifo_rd<=0; |
cpx_packet<=145'b0; |
wb_cycle<=0; |
wb_strobe<=0; |
wb_we<=0; |
wb_sel<=0; |
wb_addr<=64'b0; |
wb_data_o<=64'b0; |
pcx_packet_d<=124'b0; |
fp_pcx<=124'b0; |
fp_req<=0; |
end |
else |
case(state) |
`TEST_DRAM_1: |
begin |
wb_cycle<=1; |
wb_strobe<=1; |
wb_sel<=8'hFF; |
wb_we<=1; |
state<=`TEST_DRAM_2; |
end |
`TEST_DRAM_2: |
if(wb_ack) |
begin |
wb_strobe<=0; |
if(wb_addr<`MEM_SIZE-8) |
begin |
wb_addr[31:0]<=wb_addr[31:0]+8; |
wb_data_o<={wb_addr[31:0]+8,wb_addr[31:0]+8}; |
state<=`TEST_DRAM_1; |
end |
else |
begin |
state<=`TEST_DRAM_3; |
wb_cycle<=0; |
wb_sel<=0; |
wb_we<=0; |
wb_data_o<=64'b0; |
wb_addr<=64'b0; |
end |
end |
`TEST_DRAM_3: |
begin |
wb_cycle<=1; |
wb_strobe<=1; |
wb_sel<=8'hFF; |
state<=`TEST_DRAM_4; |
end |
`TEST_DRAM_4: |
if(wb_ack) |
begin |
wb_strobe<=0; |
if(wb_addr<`MEM_SIZE-8) |
begin |
if(wb_data_i=={wb_addr[31:0],wb_addr[31:0]}) |
begin |
wb_addr[31:0]<=wb_addr[31:0]+8; |
state<=`TEST_DRAM_3; |
end |
end |
else |
begin |
state<=`INIT_DRAM_1; |
wb_cycle<=0; |
wb_sel<=0; |
wb_we<=0; |
wb_data_o<=64'b0; |
wb_addr<=64'b0; |
end |
end |
`INIT_DRAM_1: |
begin |
wb_cycle<=1; |
wb_strobe<=1; |
wb_sel<=8'hFF; |
wb_we<=1; |
cache_init<=1; // We also init cache directories here |
state<=`INIT_DRAM_2; |
end |
`INIT_DRAM_2: |
if(wb_ack) |
begin |
wb_strobe<=0; |
if(wb_addr<`MEM_SIZE-8) |
begin |
wb_addr[31:0]<=wb_addr[31:0]+8; |
pcx_packet_d[64+11:64+4]<=pcx_packet_d[64+11:64+4]+1; // Address for cachedir init |
state<=`INIT_DRAM_1; |
end |
else |
begin |
state<=`WAKEUP; |
wb_cycle<=0; |
wb_sel<=0; |
wb_we<=0; |
cache_init<=0; |
wb_addr<=64'b0; |
end |
end |
`WAKEUP: |
begin |
cpx_packet<=145'h1700000000000000000000000000000010001; |
cpx_ready<=1; |
state<=`PCX_IDLE; |
end |
`PCX_IDLE: |
begin |
cnt<=0; |
cpx_packet<=145'b0; |
cpx_ready<=0; |
cpx_two_packet<=0; |
multi_hit<=0; |
multi_hit1<=0; |
if(eth_int_send) |
begin |
state<=`CPX_SEND_ETH_IRQ; |
eth_int_sent<=1; |
end |
else |
if(!pcx_fifo_empty) |
begin |
pcx_req_d<=pcx_data_fifo[128:124]; |
pcx_atom_d<=pcx_data_fifo[129]; |
fifo_rd<=1; |
state<=`GOT_PCX_REQ; |
end |
end |
`GOT_PCX_REQ: |
begin |
pcx_packet_d<=pcx_packet; |
if(`DEBUGGING) |
begin |
wb_sel[1:0]<=pcx_packet[113:112]; |
wb_sel[2]<=1; |
end |
if(pcx_packet[103:64]==40'h9800000800 && pcx_packet[122:118]==5'b00001) |
begin |
state<=`CPX_INT_VEC_DIS; |
fifo_rd<=0; |
end |
else |
if(pcx_atom_d==0) |
begin |
fifo_rd<=0; |
if(pcx_packet[122:118]==5'b01010) // FP req |
begin |
state<=`PCX_FP_1; |
pcx_packet_2nd[123]<=0; |
end |
else |
state<=`PCX_REQ_STEP1; |
end |
else |
state<=`PCX_REQ_2ND; |
end |
`PCX_REQ_2ND: |
begin |
pcx_packet_2nd<=pcx_packet; //Latch second packet for atomics |
if(`DEBUGGING) |
if(pcx_fifo_empty) |
wb_sel<=8'h67; |
fifo_rd<=0; |
if(pcx_packet_d[122:118]==5'b01010) // FP req |
state<=`PCX_FP_1; |
else |
state<=`PCX_REQ_STEP1; |
end |
`PCX_REQ_STEP1: |
begin |
if(pcx_packet_d[111]==1'b1) // Invalidate request |
begin |
cpx_packet_1[144]<=1; // Valid |
cpx_packet_1[143:140]<=4'b0100; // Invalidate reply is Store ACK |
cpx_packet_1[139]<=1; // L2 miss |
cpx_packet_1[138:137]<=0; // Error |
cpx_packet_1[136]<=pcx_packet_d[117]; // Non-cacheble |
cpx_packet_1[135:134]<=pcx_packet_d[113:112]; // Thread ID |
cpx_packet_1[133:131]<=0; // Way valid |
cpx_packet_1[130]<=((pcx_packet_d[122:118]==5'b10000) && (pcx_req_d==5'b10000)) ? 1:0; // Four byte fill |
cpx_packet_1[129]<=pcx_atom_d; |
cpx_packet_1[128]<=pcx_packet_d[110]; // Prefetch |
cpx_packet_1[127:0]<={2'b0,pcx_packet_d[109]/*BIS*/,pcx_packet_d[122:118]==5'b00000 ? 2'b01:2'b10,pcx_packet_d[64+5:64+4],3'b0,pcx_packet_d[64+11:64+6],112'b0}; |
state<=`CPX_READY_1; |
end |
else |
if(pcx_packet_d[122:118]!=5'b01001) // Not INT |
begin |
wb_cycle<=1'b1; |
wb_strobe<=1'b1; |
if((pcx_packet_d[122:118]==5'b00000 && !pcx_req_d[4]) || pcx_packet_d[122:118]==5'b00010 || pcx_packet_d[122:118]==5'b00100 || pcx_packet_d[122:118]==5'b00110) |
wb_addr<={pcx_req_d,19'b0,pcx_packet_d[103:64+4],4'b0000}; //DRAM load/streamload, CAS and SWAP always use DRAM and load first |
else |
if(pcx_packet_d[122:118]==5'b10000 && !pcx_req_d[4]) |
wb_addr<={pcx_req_d,19'b0,pcx_packet_d[103:64+5],5'b00000}; //DRAM ifill |
else |
if(pcx_packet_d[64+39:64+28]==12'hFFF && pcx_packet_d[64+27:64+24]!=4'b0) // flash remap FFF1->FFF8 |
wb_addr<={pcx_req_d,19'b0,pcx_packet_d[103:64+3]+37'h0000E00000,3'b000}; |
else |
wb_addr<={pcx_req_d,19'b0,pcx_packet_d[103:64+3],3'b000}; |
wb_data_o<=pcx_packet_d[63:0]; |
state<=`PCX_REQ_STEP1_1; |
end |
else |
if((pcx_packet_d[12:10]!=3'b000) && !pcx_packet_d[117]) // Not FLUSH int and not this core |
state<=`PCX_IDLE; |
else |
state<=`CPX_READY_1; |
case(pcx_packet_d[122:118]) // Packet type |
5'b00000://Load |
begin |
wb_we<=0; |
if(!pcx_req_d[4]) |
wb_sel<=8'b11111111; // DRAM requests are always 128 bit |
else |
case(pcx_packet_d[106:104]) //Size |
3'b000://Byte |
case(pcx_packet_d[64+2:64]) |
3'b000:wb_sel<=8'b10000000; |
3'b001:wb_sel<=8'b01000000; |
3'b010:wb_sel<=8'b00100000; |
3'b011:wb_sel<=8'b00010000; |
3'b100:wb_sel<=8'b00001000; |
3'b101:wb_sel<=8'b00000100; |
3'b110:wb_sel<=8'b00000010; |
3'b111:wb_sel<=8'b00000001; |
endcase |
3'b001://Halfword |
case(pcx_packet_d[64+2:64+1]) |
2'b00:wb_sel<=8'b11000000; |
2'b01:wb_sel<=8'b00110000; |
2'b10:wb_sel<=8'b00001100; |
2'b11:wb_sel<=8'b00000011; |
endcase |
3'b010://Word |
wb_sel<=(pcx_packet_d[64+2]==0) ? 8'b11110000:8'b00001111; |
3'b011://Doubleword |
wb_sel<=8'b11111111; |
3'b100://Quadword |
wb_sel<=8'b11111111; |
3'b111://Cacheline |
wb_sel<=8'b11111111; |
default: |
wb_sel<=8'b01011010; // Unreal eye-catching value for debug |
endcase |
end |
5'b00001://Store |
begin |
wb_we<=1; |
if(pcx_packet_d[110:109]!=2'b00) //Block (or init) store |
wb_sel<=8'b11111111; // Blocks are always 64 bit |
else |
case(pcx_packet_d[106:104]) //Size |
3'b000://Byte |
case(pcx_packet_d[64+2:64]) |
3'b000:wb_sel<=8'b10000000; |
3'b001:wb_sel<=8'b01000000; |
3'b010:wb_sel<=8'b00100000; |
3'b011:wb_sel<=8'b00010000; |
3'b100:wb_sel<=8'b00001000; |
3'b101:wb_sel<=8'b00000100; |
3'b110:wb_sel<=8'b00000010; |
3'b111:wb_sel<=8'b00000001; |
endcase |
3'b001://Halfword |
case(pcx_packet_d[64+2:64+1]) |
2'b00:wb_sel<=8'b11000000; |
2'b01:wb_sel<=8'b00110000; |
2'b10:wb_sel<=8'b00001100; |
2'b11:wb_sel<=8'b00000011; |
endcase |
3'b010://Word |
wb_sel<=(pcx_packet_d[64+2]==0) ? 8'b11110000:8'b00001111; |
3'b011://Doubleword |
wb_sel<=8'b11111111; |
default: |
if(`DEBUGGING) |
wb_sel<=8'b01011010; // Unreal eye-catching value for debug |
endcase |
end |
5'b00010://CAS |
begin |
wb_we<=0; //Load first |
wb_sel<=8'b11111111; // CAS loads are as cacheline |
end |
5'b00100://STRLOAD |
begin |
wb_we<=0; |
wb_sel<=8'b11111111; // Stream loads are always 128 bit |
end |
5'b00101://STRSTORE |
begin |
wb_we<=1; |
case(pcx_packet_d[106:104]) //Size |
3'b000://Byte |
case(pcx_packet_d[64+2:64]) |
3'b000:wb_sel<=8'b10000000; |
3'b001:wb_sel<=8'b01000000; |
3'b010:wb_sel<=8'b00100000; |
3'b011:wb_sel<=8'b00010000; |
3'b100:wb_sel<=8'b00001000; |
3'b101:wb_sel<=8'b00000100; |
3'b110:wb_sel<=8'b00000010; |
3'b111:wb_sel<=8'b00000001; |
endcase |
3'b001://Halfword |
case(pcx_packet_d[64+2:64+1]) |
2'b00:wb_sel<=8'b11000000; |
2'b01:wb_sel<=8'b00110000; |
2'b10:wb_sel<=8'b00001100; |
2'b11:wb_sel<=8'b00000011; |
endcase |
3'b010://Word |
wb_sel<=(pcx_packet_d[64+2]==0) ? 8'b11110000:8'b00001111; |
3'b011://Doubleword |
wb_sel<=8'b11111111; |
3'b100://Quadword |
wb_sel<=8'b11111111; |
3'b111://Cacheline |
wb_sel<=8'b11111111; |
default: |
wb_sel<=8'b01011010; // Unreal eye-catching value for debug |
endcase |
end |
5'b00110://SWAP/LDSTUB |
begin |
wb_we<=0; // Load first, as CAS |
wb_sel<=8'b11111111; // SWAP/LDSTUB loads are as cacheline |
end |
5'b01001://INT |
if(pcx_packet_d[117]) // Flush |
cpx_packet_1<={9'h171,pcx_packet_d[113:112],11'h0,pcx_packet_d[64+5:64+4],3'b0,pcx_packet_d[64+11:64+6],30'h0,pcx_packet_d[17:0],46'b0,pcx_packet_d[17:0]}; //FLUSH instruction answer |
else // Tread-to-thread interrupt |
cpx_packet_1<={9'h170,pcx_packet_d[113:112],52'h0,pcx_packet_d[17:0],46'h0,pcx_packet_d[17:0]}; |
//5'b01010: FP1 - processed by separate state |
//5'b01011: FP2 - processed by separate state |
//5'b01101: FWDREQ - not implemented |
//5'b01110: FWDREPL - not implemented |
5'b10000://IFILL |
begin |
wb_we<=0; |
if(pcx_req_d[4]) // I/O access |
wb_sel<=(pcx_packet_d[64+2]==0) ? 8'b11110000:8'b00001111; |
else |
wb_sel<=8'b11111111; |
end |
default: |
begin |
wb_we<=0; |
wb_sel<=8'b10101010; // Unreal eye-catching value for debug |
end |
endcase |
end |
`PCX_REQ_STEP1_1: |
state<=`PCX_REQ_STEP1_2; // Delay for L1 directory |
`PCX_REQ_STEP1_2: |
begin |
if(wb_ack || wb_ack_d) |
begin |
cpx_packet_1[144]<=1; // Valid |
cpx_packet_1[139]<=(pcx_packet_d[122:118]==5'b00000) || (pcx_packet_d[122:118]==5'b10000) ? 1:0; // L2 always miss on load and ifill |
cpx_packet_1[138:137]<=0; // Error |
cpx_packet_1[136]<=pcx_packet_d[117] || (pcx_packet_d[122:118]==5'b00001) ? 1:0; // Non-cacheble is set on store too |
cpx_packet_1[135:134]<=pcx_packet_d[113:112]; // Thread ID |
if((pcx_packet_d[122:118]==5'b00000 && !pcx_packet_d[117] && !pcx_packet_d[110]) || (pcx_packet_d[122:118]==5'b10000)) // Cacheble Load or IFill |
cpx_packet_1[133:131]<={othercachehit[0],wayval0}; |
else |
cpx_packet_1[133:131]<=3'b000; // Way valid |
if(pcx_packet_d[122:118]==5'b00100) // Strload |
cpx_packet_1[130]<=pcx_packet_d[106]; // A |
else |
if(pcx_packet_d[122:118]==5'b00101) // Stream store |
cpx_packet_1[130]<=pcx_packet_d[108]; // A |
else |
cpx_packet_1[130]<=((pcx_packet_d[122:118]==5'b10000) && pcx_req_d[4]) ? 1:0; // Four byte fill |
if(pcx_packet_d[122:118]==5'b00100) // Strload |
cpx_packet_1[129]<=pcx_packet_d[105]; // B |
else |
cpx_packet_1[129]<=pcx_atom_d || (pcx_packet_d[122:118]==5'b00110); // SWAP is single-packet but needs atom in CPX |
cpx_packet_1[128]<=pcx_packet_d[110] && pcx_packet_d[122:118]==5'b00000; // Prefetch |
cpx_packet_2[144]<=1; // Valid |
cpx_packet_2[139]<=0; // L2 miss |
cpx_packet_2[138:137]<=0; // Error |
cpx_packet_2[136]<=pcx_packet_d[117] || (pcx_packet_d[122:118]==5'b00001) ? 1:0; // Non-cacheble is set on store too |
cpx_packet_2[135:134]<=pcx_packet_d[113:112]; // Thread ID |
if(pcx_packet_d[122:118]==5'b10000) // IFill |
cpx_packet_2[133:131]<={othercachehit[1],wayval1}; |
else |
cpx_packet_2[133:131]<=3'b000; // Way valid |
cpx_packet_2[130]<=0; // Four byte fill |
cpx_packet_2[129]<=pcx_atom_d || (pcx_packet_d[122:118]==5'b00110) || ((pcx_packet_d[122:118]==5'b10000) && !pcx_req_d[4]); |
cpx_packet_2[128]<=0; // Prefetch |
wb_strobe<=0; |
wb_sel<=8'b0; |
wb_addr<=64'b0; |
wb_data_o<=64'b0; |
wb_we<=0; |
case(pcx_packet_d[122:118]) // Packet type |
5'b00000://Load |
begin |
cpx_packet_1[143:140]<=4'b0000; // Type |
if(!pcx_req_d[4]) |
begin |
cpx_packet_1[127:0]<={wb_data_i,wb_data_i}; |
state<=`PCX_REQ_STEP2; |
end |
else |
case(pcx_packet_d[106:104]) //Size |
3'b000://Byte |
begin |
case(pcx_packet_d[64+2:64]) |
3'b000:cpx_packet_1[127:0]<={wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56]}; |
3'b001:cpx_packet_1[127:0]<={wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48]}; |
3'b010:cpx_packet_1[127:0]<={wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40]}; |
3'b011:cpx_packet_1[127:0]<={wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32]}; |
3'b100:cpx_packet_1[127:0]<={wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24]}; |
3'b101:cpx_packet_1[127:0]<={wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16]}; |
3'b110:cpx_packet_1[127:0]<={wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8]}; |
3'b111:cpx_packet_1[127:0]<={wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0]}; |
endcase |
wb_cycle<=0; |
state<=`CPX_READY_1; |
end |
3'b001://Halfword |
begin |
case(pcx_packet_d[64+2:64+1]) |
2'b00:cpx_packet_1[127:0]<={wb_data_i[63:48],wb_data_i[63:48],wb_data_i[63:48],wb_data_i[63:48],wb_data_i[63:48],wb_data_i[63:48],wb_data_i[63:48],wb_data_i[63:48]}; |
2'b01:cpx_packet_1[127:0]<={wb_data_i[47:32],wb_data_i[47:32],wb_data_i[47:32],wb_data_i[47:32],wb_data_i[47:32],wb_data_i[47:32],wb_data_i[47:32],wb_data_i[47:32]}; |
2'b10:cpx_packet_1[127:0]<={wb_data_i[31:16],wb_data_i[31:16],wb_data_i[31:16],wb_data_i[31:16],wb_data_i[31:16],wb_data_i[31:16],wb_data_i[31:16],wb_data_i[31:16]}; |
2'b11:cpx_packet_1[127:0]<={wb_data_i[15: 0],wb_data_i[15: 0],wb_data_i[15: 0],wb_data_i[15: 0],wb_data_i[15: 0],wb_data_i[15: 0],wb_data_i[15: 0],wb_data_i[15: 0]}; |
endcase |
wb_cycle<=0; |
state<=`CPX_READY_1; |
end |
3'b010://Word |
begin |
if(pcx_packet_d[64+2]==0) |
cpx_packet_1[127:0]<={wb_data_i[63:32],wb_data_i[63:32],wb_data_i[63:32],wb_data_i[63:32]}; |
else |
cpx_packet_1[127:0]<={wb_data_i[31:0],wb_data_i[31:0],wb_data_i[31:0],wb_data_i[31:0]}; |
wb_cycle<=0; |
state<=`CPX_READY_1; |
end |
3'b011://Doubleword |
begin |
cpx_packet_1[127:0]<={wb_data_i,wb_data_i}; |
wb_cycle<=0; |
state<=`CPX_READY_1; |
end |
3'b100://Quadword |
begin |
cpx_packet_1[127:0]<={wb_data_i,wb_data_i}; |
wb_cycle<=0; |
state<=`CPX_READY_1; // 16 byte access to PROM should just duplicate the data |
end |
3'b111://Cacheline |
begin |
cpx_packet_1[127:0]<={wb_data_i,wb_data_i}; |
wb_cycle<=0; |
state<=`CPX_READY_1; // 16 byte access to PROM should just duplicate the data |
end |
default: |
begin |
cpx_packet_1[127:0]<={wb_data_i,wb_data_i}; |
wb_cycle<=0; |
state<=`PCX_UNKNOWN; |
end |
endcase |
end |
5'b00001://Store |
begin |
cpx_packet_1[143:140]<=4'b0100; // Type |
cpx_packet_1[127:0]<={2'b0,pcx_packet_d[109]/*BIS*/,2'b0,pcx_packet_d[64+5:64+4],3'b0,pcx_packet_d[64+11:64+6],inval_vect0}; |
// if((pcx_packet_d[110:109]==2'b01) && (pcx_packet_d[64+5:64]==0) && !inval_vect0[3] && !inval_vect1[3]) // Block init store |
// state<=`PCX_BIS; |
// else |
// begin |
wb_cycle<=0; |
state<=`CPX_READY_1; |
// end |
end |
5'b00010://CAS |
begin |
cpx_packet_1[143:140]<=4'b0000; // Load return for first packet |
cpx_packet_2[143:140]<=4'b0100; // Store ACK for second packet |
cpx_packet_2[127:0]<={5'b0,pcx_packet_d[64+5:64+4],3'b0,pcx_packet_d[64+11:64+6],inval_vect0}; |
cpx_packet_1[127:0]<={wb_data_i,wb_data_i}; |
state<=`PCX_REQ_STEP2; |
end |
5'b00100://STRLOAD |
begin |
cpx_packet_1[143:140]<=4'b0010; // Type |
cpx_packet_1[127:0]<={wb_data_i,wb_data_i}; |
state<=`PCX_REQ_STEP2; |
end |
5'b00101://STRSTORE |
begin |
cpx_packet_1[143:140]<=4'b0110; // Type |
cpx_packet_1[127:0]<={5'b0,pcx_packet_d[64+5:64+4],3'b0,pcx_packet_d[64+11:64+6],inval_vect0}; |
wb_cycle<=0; |
state<=`CPX_READY_1; |
end |
5'b00110://SWAP/LDSTUB |
begin |
cpx_packet_1[143:140]<=4'b0000; // Load return for first packet |
cpx_packet_2[143:140]<=4'b0100; // Store ACK for second packet |
cpx_packet_2[127:0]<={5'b0,pcx_packet_d[64+5:64+4],3'b0,pcx_packet_d[64+11:64+6],inval_vect0}; |
cpx_packet_1[127:0]<={wb_data_i,wb_data_i}; |
state<=`PCX_REQ_STEP2; |
end |
5'b10000://IFILL |
begin |
cpx_packet_1[143:140]<=4'b0001; // Type |
cpx_packet_2[143:140]<=4'b0001; // Type |
if(pcx_req_d[4]) // I/O access |
begin |
if(pcx_packet_d[64+2]==0) |
cpx_packet_1[127:0]<={wb_data_i[63:32],wb_data_i[63:32],wb_data_i[63:32],wb_data_i[63:32]}; |
else |
cpx_packet_1[127:0]<={wb_data_i[31:0],wb_data_i[31:0],wb_data_i[31:0],wb_data_i[31:0]}; |
state<=`CPX_READY_1; |
wb_cycle<=0; |
end |
else |
begin |
cpx_packet_1[127:0]<={wb_data_i,wb_data_i}; |
state<=`PCX_REQ_STEP2; |
end |
end |
default: |
begin |
wb_cycle<=0; |
state<=`PCX_UNKNOWN; |
end |
endcase |
end |
end |
`PCX_REQ_STEP2: // IFill, Load/strload, CAS, SWAP, LDSTUB - alwas load |
begin |
wb_strobe<=1'b1; |
if(pcx_packet_d[122:118]==5'b10000) |
wb_addr<={pcx_req_d,19'b0,pcx_packet_d[103:64+5],5'b01000}; |
else |
wb_addr<={pcx_req_d,19'b0,pcx_packet_d[103:64+4],4'b1000}; |
wb_sel<=8'b11111111; // It is always full width for subsequent IFill and load accesses |
state<=`PCX_REQ_STEP2_1; |
end |
`PCX_REQ_STEP2_1: |
if(wb_ack==1) |
begin |
wb_strobe<=0; |
wb_sel<=8'b0; |
wb_addr<=64'b0; |
wb_data_o<=64'b0; |
wb_we<=0; |
cpx_packet_1[63:0]<=wb_data_i; |
if((pcx_packet_d[122:118]!=5'b00000) && (pcx_packet_d[122:118]!=5'b00100)) |
if(pcx_packet_d[122:118]!=5'b00010) // IFill, SWAP |
state<=`PCX_REQ_STEP3; |
else |
state<=`PCX_REQ_CAS_COMPARE; // CAS |
else |
begin |
wb_cycle<=0; |
state<=`CPX_READY_1; |
end |
end |
`PCX_REQ_CAS_COMPARE: |
begin |
cpx_two_packet<=1; |
if(pcx_packet_d[106:104]==3'b010) // 32-bit |
case(pcx_packet_d[64+3:64+2]) |
2'b00:state<=cpx_packet_1[127:96]==pcx_packet_d[63:32] ? `PCX_REQ_STEP3:`CPX_READY_1; |
2'b01:state<=cpx_packet_1[95:64]==pcx_packet_d[63:32] ? `PCX_REQ_STEP3:`CPX_READY_1; |
2'b10:state<=cpx_packet_1[63:32]==pcx_packet_d[63:32] ? `PCX_REQ_STEP3:`CPX_READY_1; |
2'b11:state<=cpx_packet_1[31:0]==pcx_packet_d[63:32] ? `PCX_REQ_STEP3:`CPX_READY_1; |
endcase |
else |
if(pcx_packet_d[64+3]==0) |
state<=cpx_packet_1[127:64]==pcx_packet_d[63:0] ? `PCX_REQ_STEP3:`CPX_READY_1; |
else |
state<=cpx_packet_1[63:0]==pcx_packet_d[63:0] ? `PCX_REQ_STEP3:`CPX_READY_1; |
end |
`PCX_REQ_STEP3: // 256-bit IFILL; CAS, SWAP and LDSTUB store |
begin |
if(pcx_packet_d[122:118]==5'b10000) |
wb_addr<={pcx_req_d,19'b0,pcx_packet_d[103:64+5],5'b10000}; |
else |
wb_addr<={pcx_req_d,19'b0,pcx_packet_d[103:64+3],3'b000}; // CAS or SWAP save |
cpx_two_packet<=1; |
if(pcx_packet_d[122:118]==5'b10000) |
wb_we<=0; |
else |
wb_we<=1; |
wb_strobe<=1'b1; |
if(pcx_packet_d[122:118]==5'b00010) // CAS |
if(pcx_packet_d[106:104]==3'b010) |
wb_sel<=(pcx_packet_d[64+2]==0) ? 8'b11110000:8'b00001111; |
else |
wb_sel<=8'b11111111; //CASX |
else |
if(pcx_packet_d[122:118]==5'b00110) //SWAP or LDSTUB |
if(pcx_packet_d[106:104]==3'b000) //LDSTUB |
case(pcx_packet_d[64+2:64]) |
3'b000:wb_sel<=8'b10000000; |
3'b001:wb_sel<=8'b01000000; |
3'b010:wb_sel<=8'b00100000; |
3'b011:wb_sel<=8'b00010000; |
3'b100:wb_sel<=8'b00001000; |
3'b101:wb_sel<=8'b00000100; |
3'b110:wb_sel<=8'b00000010; |
3'b111:wb_sel<=8'b00000001; |
endcase |
else |
wb_sel<=(pcx_packet_d[64+2]==0) ? 8'b11110000:8'b00001111; ///SWAP is always 32-bit |
else |
wb_sel<=8'b11111111; // It is always full width for subsequent IFill accesses |
if(pcx_packet_d[122:118]==5'b00110) //SWAP or LDSTUB |
wb_data_o<={pcx_packet_d[63:32],pcx_packet_d[63:32]}; |
// wb_data_o<=pcx_packet_d[63:0]; |
else |
wb_data_o<=pcx_packet_2nd[63:0]; // CAS store second packet data |
// if(pcx_packet_d[106:104]==3'b010) |
// wb_data_o<={pcx_packet_2nd[63:32],pcx_packet_2nd[63:32]}; // CAS store second packet data |
// else |
// wb_data_o<=pcx_packet_2nd[63:0]; |
state<=`PCX_REQ_STEP3_1; |
end |
`PCX_REQ_STEP3_1: |
if(wb_ack==1) |
begin |
wb_strobe<=0; |
wb_sel<=8'b0; |
wb_addr<=64'b0; |
wb_we<=0; |
wb_data_o<=64'b0; |
if(pcx_packet_d[122:118]==5'b10000) // IFill |
begin |
cpx_packet_2[127:64]<=wb_data_i; |
state<=`PCX_REQ_STEP4; |
end |
else |
begin |
wb_cycle<=0; |
state<=`CPX_READY_1; |
end |
end |
`PCX_REQ_STEP4: // 256-bit IFILL only |
begin |
wb_strobe<=1'b1; |
wb_addr<={pcx_req_d,19'b0,pcx_packet_d[103:64+5],5'b11000}; |
wb_sel<=8'b11111111; // It is always full width for subsequent accesses |
state<=`PCX_REQ_STEP4_1; |
end |
`PCX_REQ_STEP4_1: |
if(wb_ack==1) |
begin |
wb_cycle<=0; |
wb_strobe<=0; |
wb_sel<=8'b0; |
wb_addr<=64'b0; |
wb_we<=0; |
cpx_packet_2[63:0]<=wb_data_i; |
state<=`CPX_READY_1; |
end |
`PCX_BIS: // Block init store |
begin |
wb_strobe<=1'b1; |
wb_we<=1; |
wb_addr<={pcx_req_d,19'b0,pcx_packet_d[103:64+6],6'b001000}; |
wb_sel<=8'b11111111; |
wb_data_o<=64'b0; |
state<=`PCX_BIS_1; |
end |
`PCX_BIS_1: |
if(wb_ack) |
begin |
wb_strobe<=0; |
if(wb_addr[39:0]<(pcx_packet_d[64+39:64]+8*7)) |
state<=`PCX_BIS_2; |
else |
begin |
wb_cycle<=0; |
wb_sel<=0; |
wb_we<=0; |
wb_addr<=64'b0; |
state<=`CPX_READY_1; |
end |
end |
`PCX_BIS_2: |
begin |
wb_strobe<=1'b1; |
wb_addr[5:0]<=wb_addr[5:0]+8; |
state<=`PCX_BIS_1; |
end |
`PCX_FP_1: |
begin |
fp_pcx<=pcx_packet_d; |
fp_req<=1; |
state<=`PCX_FP_2; |
if(`DEBUGGING) |
begin |
wb_addr<=pcx_packet_d[103:64]; |
wb_data_o<=pcx_packet_d[63:0]; |
wb_sel<=8'h22; |
end |
end |
`PCX_FP_2: |
begin |
fp_pcx<=pcx_packet_2nd; |
state<=`FP_WAIT; |
if(`DEBUGGING) |
begin |
wb_addr<=pcx_packet_2nd[103:64]; |
wb_data_o<=pcx_packet_d[63:0]; |
wb_sel<=8'h23; |
end |
end |
`FP_WAIT: |
begin |
fp_pcx<=124'b0; |
fp_req<=0; |
if(fp_rdy) |
state<=`CPX_FP; |
if(`DEBUGGING) |
wb_sel<=8'h24; |
end |
`CPX_FP: |
if(fp_cpx[144]) // Packet valid |
begin |
cpx_packet_1<=fp_cpx; |
state<=`CPX_READY_1; |
if(`DEBUGGING) |
begin |
wb_addr<=fp_cpx[63:0]; |
wb_data_o<=fp_cpx[127:64]; |
end |
end |
else |
if(!fp_rdy) |
state<=`FP_WAIT; // Else wait for another one if it is not here still |
`CPX_SEND_ETH_IRQ: |
begin |
cpx_packet_1<=145'h1_7_000_000000000000001D_000000000000_001D; |
eth_int_sent<=0; |
state<=`CPX_READY_1; |
end |
`CPX_INT_VEC_DIS: |
begin |
if(pcx_packet_d[12:10]==3'b000) |
cpx_two_packet<=1; // Send interrupt only if it is for this core |
cpx_packet_1[144:140]<=5'b10100; |
cpx_packet_1[139:137]<=0; |
cpx_packet_1[136]<=1; |
cpx_packet_1[135:134]<=pcx_packet_d[113:112]; // Thread ID |
cpx_packet_1[133:130]<=0; |
cpx_packet_1[129]<=pcx_atom_d; |
cpx_packet_1[128]<=0; |
cpx_packet_1[127:0]<={5'b0,pcx_packet_d[64+5:64+4],3'b0,pcx_packet_d[64+11:64+6],112'b0}; |
cpx_packet_2<={9'h170,54'h0,pcx_packet_d[17:0],46'h0,pcx_packet_d[17:0]}; |
state<=`CPX_READY_1; |
end |
`CPX_READY_1: |
begin |
cpx_ready<=1; |
cpx_packet<=cpx_packet_1; |
cnt<=cnt+1; |
if(`DEBUGGING) |
if(multi_hit || multi_hit1) |
wb_sel<=8'h11; |
if(!cpx_two_packet) |
state<=`PCX_IDLE; |
else |
//if(cnt==4'b1111 || pcx_packet_d[103:64]!=40'h9800000800) |
state<=`CPX_READY_2; |
end |
`CPX_READY_2: |
begin |
cpx_ready<=1; |
cpx_packet<=cpx_packet_2; |
state<=`PCX_IDLE; |
end |
`PCX_UNKNOWN: |
begin |
wb_sel<=8'b10100101; // Illegal eye-catching value for debugging |
state<=`PCX_IDLE; |
end |
endcase |
|
l1dir l1dir_inst( |
.clk(clk), |
.reset(!rstn), |
|
.cpu(0), // Issuing CPU number |
.strobe(state==`GOT_PCX_REQ), |
.way(pcx_packet[108:107]), // Way to allocate for allocating loads |
.address(pcx_packet[64+39:64]), |
.load(pcx_packet[122:118]==5'b00000), |
.ifill(pcx_packet[122:118]==5'b10000), |
.store(pcx_packet[122:118]==5'b00001), |
.cas(pcx_packet[122:118]==5'b00010), |
.swap(pcx_packet[122:118]==5'b00110), |
.strload(pcx_packet[122:118]==5'b00100), |
.strstore(pcx_packet[122:118]==5'b00101), |
.cacheable((!pcx_packet[117]) && (!pcx_req_d[4])), |
.prefetch(pcx_packet[110]), |
.invalidate(pcx_packet[111]), |
.blockstore(pcx_packet[109] | pcx_packet[110]), |
|
.inval_vect0(inval_vect0), // Invalidation vector |
.inval_vect1(inval_vect1), |
.othercachehit(othercachehit), // Other cache hit in the same CPU, wayval0/wayval1 |
.othercpuhit(othercpuhit), // Any cache hit in the other CPU, wayval0/wayval1 |
.wayval0(wayval0), // Way valid |
.wayval1(wayval1), // Second way valid for ifill |
.ready(ready), // Directory init done |
); |
|
endmodule |
/os2wb/l1idir.v
0,0 → 1,178
module l1idir( |
input clk, |
input reset, |
|
input [ 6:0] index, |
input [ 1:0] way, |
input [27:0] tag, |
input strobe, |
input query, |
input allocate, //tag->{way,index} |
input deallocate, //if({way,index}==tag) {way,index}<-FFFFFF |
input invalidate, //all ways |
|
output reg [2:0] hit, |
|
output reg ready // directory init completed |
); |
|
`define INVAL_TAG 28'h8000000 |
|
reg [27:0] tag_d; |
reg [ 6:0] addr; |
reg [ 3:0] we; |
reg [ 3:0] re; |
reg [28:0] di; |
|
wire [28:0] do0; |
wire [28:0] do1; |
wire [28:0] do2; |
wire [28:0] do3; |
reg query_d; |
reg deallocate_d; |
reg query_d1; |
reg deallocate_d1; |
|
always @(posedge clk) |
if(strobe) |
if(query || deallocate) |
begin |
tag_d<=tag; |
end |
|
always @(posedge clk) |
begin |
query_d<=query && strobe; |
deallocate_d<=deallocate && strobe; |
query_d1<=query_d; |
deallocate_d1<=deallocate_d; |
end |
|
cachedir icache01 ( |
.clock(clk), |
.enable(we[0] || re[0] || we[1] || re[1]), |
.wren_a(we[0]), |
.address_a({1'b0,addr}), |
.data_a(di), |
.q_a(do0), |
|
.wren_b(we[1]), |
.address_b({1'b1,addr}), |
.data_b(di), |
.q_b(do1) |
); |
|
cachedir icache23 ( |
.clock(clk), |
.enable(we[2] || re[2] || we[3] || re[3]), |
.wren_a(we[2]), |
.address_a({1'b0,addr}), |
.data_a(di), |
.q_a(do2), |
|
.wren_b(we[3]), |
.address_b({1'b1,addr}), |
.data_b(di), |
.q_b(do3) |
); |
|
wire [3:0] hitvect={(do3[28:1]==tag_d),(do2[28:1]==tag_d),(do1[28:1]==tag_d),(do0[28:1]==tag_d)}; |
|
`define L1IDIR_RESET 3'b000 |
`define L1IDIR_INIT 3'b001 |
`define L1IDIR_IDLE 3'b010 |
`define L1IDIR_READ 3'b011 |
`define L1IDIR_DEALLOC 3'b100 |
|
reg [2:0] state; |
|
always @(posedge clk or posedge reset) |
if(reset) |
begin |
state<=`L1IDIR_RESET; |
ready<=0; |
end |
else |
case(state) |
`L1IDIR_RESET: |
begin |
addr<=7'b0; |
di<={`INVAL_TAG,1'b0}; |
we<=4'b1111; |
state<=`L1IDIR_INIT; |
end |
`L1IDIR_INIT: |
begin |
addr<=addr+1; |
if(addr==7'b1111111) |
begin |
we<=4'b0; |
ready<=1; |
state<=`L1IDIR_IDLE; |
end |
end |
`L1IDIR_IDLE: |
if(strobe) |
if(invalidate) |
begin |
we<=4'b1111; |
addr<=index; |
di<={`INVAL_TAG,1'b0}; |
end |
else |
if(allocate) |
begin |
case(way) |
2'b00:we<=4'b0001; |
2'b01:we<=4'b0010; |
2'b10:we<=4'b0100; |
2'b11:we<=4'b1000; |
endcase |
addr<=index; |
di<={tag,1'b0}; |
end |
else |
if(deallocate) |
begin |
re<=4'b1111; |
we<=0; |
addr<=index; |
state<=`L1IDIR_READ; |
end |
else |
if(query) |
begin |
addr<=index; |
re<=4'b1111; |
we<=0; |
end |
else |
begin |
we<=0; |
re<=0; |
end |
`L1IDIR_READ: |
state<=`L1IDIR_DEALLOC; |
`L1IDIR_DEALLOC: |
begin |
re<=0; |
di<={`INVAL_TAG,1'b0}; |
we<=hitvect; |
state<=`L1IDIR_IDLE; |
end |
endcase |
|
always @(posedge clk) |
if(query_d1 || deallocate_d1) |
case(hitvect) |
4'b0001:hit<=3'b100; |
4'b0010:hit<=3'b101; |
4'b0100:hit<=3'b110; |
4'b1000:hit<=3'b111; |
default:hit<=3'b000; // Hits will be ORed then |
endcase |
else |
if(strobe) |
hit<=3'b000; |
|
endmodule |