URL
https://opencores.org/ocsvn/s1_core/s1_core/trunk
Subversion Repositories s1_core
[/] [s1_core/] [trunk/] [hdl/] [rtl/] [s1_top/] [os2wb.v] - Rev 114
Compare with Previous | Blame | View Log
`timescale 1ns / 1ps ////////////////////////////////////////////////////////////////////////////////// // Company: (C) Athree, 2009 // Engineer: Dmitry Rozhdestvenskiy // Email: dmitryr@a3.spb.ru divx4log@narod.ru // // Design Name: Bridge from SPARC Core to Wishbone Master // Module Name: os2wb // Project Name: SPARC SoC single-core // // LICENSE: // This is a Free Hardware Design; you can redistribute it and/or // modify it under the terms of the GNU General Public License // version 2 as published by the Free Software Foundation. // The above named program is distributed in the hope that it will // be useful, but WITHOUT ANY WARRANTY; without even the implied // warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. // See the GNU General Public License for more details. // ////////////////////////////////////////////////////////////////////////////////// module os2wb( input clk, input rstn, // Core interface input [ 4:0] pcx_req, input pcx_atom, input [123:0] pcx_data, output reg [ 4:0] pcx_grant, output reg cpx_ready, output reg [144:0] cpx_packet, // Wishbone master interface input [ 63:0] wb_data_i, input wb_ack, output reg wb_cycle, output reg wb_strobe, output reg wb_we, output reg [ 7:0] wb_sel, output reg [ 63:0] wb_addr, output reg [ 63:0] wb_data_o, // FPU interface output reg [123:0] fp_pcx, output reg fp_req, input [144:0] fp_cpx, input fp_rdy, // Ethernet interrupt, sensed on posedge, mapped to vector 'd29 input eth_int ); // FSM State typedef enum { FSM_STATE_TEST_DRAM_1 , /* 5'b00000 */ FSM_STATE_TEST_DRAM_2 , /* 5'b00001 */ FSM_STATE_TEST_DRAM_3 , /* 5'b00010 */ FSM_STATE_TEST_DRAM_4 , /* 5'b00011 */ FSM_STATE_INIT_DRAM_1 , /* 5'b00100 */ FSM_STATE_INIT_DRAM_2 , /* 5'b00101 */ FSM_STATE_WAKEUP , /* 5'b00110 */ FSM_STATE_PCX_IDLE , /* 5'b00111 */ FSM_STATE_GOT_PCX_REQ , /* 5'b01000 */ FSM_STATE_PCX_REQ_2ND , /* 5'b01001 */ FSM_STATE_PCX_REQ_STEP1 , /* 5'b01010 */ FSM_STATE_PCX_REQ_STEP1_1 , /* 5'b01011 */ FSM_STATE_PCX_REQ_STEP2 , /* 5'b01100 */ FSM_STATE_PCX_REQ_STEP2_1 , /* 5'b01101 */ FSM_STATE_PCX_REQ_STEP3 , /* 5'b01110 */ FSM_STATE_PCX_REQ_STEP3_1 , /* 5'b01111 */ FSM_STATE_PCX_REQ_STEP4 , /* 5'b10000 */ FSM_STATE_PCX_REQ_STEP4_1 , /* 5'b10001 */ FSM_STATE_PCX_BIS , /* 5'b10010 */ FSM_STATE_PCX_BIS_1 , /* 5'b10011 */ FSM_STATE_PCX_BIS_2 , /* 5'b10100 */ FSM_STATE_CPX_READY_1 , /* 5'b10101 */ FSM_STATE_CPX_READY_2 , /* 5'b10110 */ FSM_STATE_PCX_UNKNOWN , /* 5'b11000 */ FSM_STATE_PCX_FP_1 , /* 5'b11001 */ FSM_STATE_PCX_FP_2 , /* 5'b11010 */ FSM_STATE_FP_WAIT , /* 5'b11011 */ FSM_STATE_CPX_FP , /* 5'b11100 */ FSM_STATE_CPX_SEND_ETH_IRQ , /* 5'b11101 */ FSM_STATE_CPX_INT_VEC_DIS , /* 5'b11110 */ FSM_STATE_PCX_REQ_CAS_COMPARE /* 5'b11111 */ } fsm_state_t; reg fsm_state_t fsm_state; reg [123:0] pcx_packet_d; // Latched incoming PCX packet reg [123:0] pcx_packet_2nd; // Second packet for atomic (CAS) reg [ 4:0] pcx_req_d; // Latched request reg pcx_atom_d; // Latched atomic flasg reg [144:0] cpx_packet_1; // First CPX packet reg [144:0] cpx_packet_2; // Second CPX packet (for atomics and cached IFILLs) reg cpx_two_packet; // CPX answer is two-packet (!=atomic, SWAP has atomic==0 and answer is two-packet) reg [ 3:0] inval_vect0; // Invalidate, instr/data, way reg [ 3:0] inval_vect1; // IFill may cause two D lines invalidation at a time wire [111:0] store_inv_vec; // Store invalidation vector assign store_inv_vec[111:91]=0; assign store_inv_vec[90:88]=((pcx_packet_d[64+5:64+4]==2'b11) && inval_vect0[3:2]==2'b11) ? {inval_vect0[1:0],1'b1}:3'b000; assign store_inv_vec[87:60]=0; assign store_inv_vec[59:56]=((pcx_packet_d[64+5:64+4]==2'b10) && inval_vect0[3:2]==2'b11) || ((pcx_packet_d[64+5]==1'b1) && inval_vect0[3:2]==2'b10) ? {inval_vect0[1:0],!inval_vect0[2],inval_vect0[2]}:4'b0000; assign store_inv_vec[55:35]=0; assign store_inv_vec[34:32]=((pcx_packet_d[64+5:64+4]==2'b01) && inval_vect0[3:2]==2'b11) ? {inval_vect0[1:0],1'b1}:3'b000; assign store_inv_vec[31:4]=0; assign store_inv_vec[3:0]=((pcx_packet_d[64+5:64+4]==2'b00) && inval_vect0[3:2]==2'b11) || ((pcx_packet_d[64+5]==1'b0) && inval_vect0[3:2]==2'b10) ? {inval_vect0[1:0],!inval_vect0[2],inval_vect0[2]}:4'b0000; wire [28:0] dcache0_do0; wire [28:0] dcache0_do1; wire [28:0] dcache1_do0; wire [28:0] dcache1_do1; wire [28:0] dcache2_do0; wire [28:0] dcache2_do1; wire [28:0] dcache3_do0; wire [28:0] dcache3_do1; wire [28:0] icache0_do; wire [28:0] icache1_do; wire [28:0] icache2_do; wire [28:0] icache3_do; `define MEM_SIZE 64'h00000000_10000000 reg cache_init; wire [3:0] dcache0_hit; wire [3:0] dcache1_hit; wire [3:0] icache_hit; reg multi_hit; reg multi_hit1; reg eth_int_d; reg eth_int_send; reg eth_int_sent; reg [3:0] cnt; // PCX channel FIFO wire [129:0] pcx_data_fifo; wire pcx_fifo_empty; wire pcx_fifo_full; // New, unused reg [ 4:0] pcx_req_1; reg [ 4:0] pcx_req_2; reg pcx_atom_1; reg pcx_atom_2; reg pcx_data_123_d; // Moved up compared to Dmitry's reg fifo_rd; wire fifo_wr; wire [123:0] pcx_packet; assign pcx_packet=pcx_data_fifo[123:0]; `ifdef SIMPLY_RISC_DEBUG // For debugging logic printed_once = 0; `endif always @(posedge clk) begin pcx_req_1<=pcx_req; pcx_atom_1<=pcx_atom; pcx_atom_2<=pcx_atom_1; pcx_req_2<=pcx_atom_1 ? pcx_req_1:5'b0; pcx_grant<=(pcx_req_1 | pcx_req_2); pcx_data_123_d<=pcx_data[123]; end assign fifo_wr = (pcx_req_1!=5'b00000 && pcx_data[123]) || (pcx_atom_2 && pcx_data_123_d); pcx_fifo pcx_fifo_inst( // FIFO should be first word fall-through // It has no full flag as the core will send only limited number of requests, // in original design we used it 32 words deep // Just make it deeper if you experience overflow - // you can't just send no grant on full because the core expects immediate // grant for at least two requests for each zone .aclr(!rstn), .clock(clk), .data({pcx_atom_1,pcx_req_1,pcx_data}), .rdreq(fifo_rd), .wrreq(fifo_wr), // Second atomic packet for FPU may be invalid, but should be sent to FPU // so if the first atomic packet is valid we latch both .empty(pcx_fifo_empty), .full(pcx_fifo_full), .q(pcx_data_fifo) ); // -------------------------- always @(posedge clk or negedge rstn) if(!rstn) eth_int_send<=0; else begin eth_int_d<=eth_int; if(eth_int && !eth_int_d) eth_int_send<=1; else if(eth_int_sent) eth_int_send<=0; end always @(posedge clk or negedge rstn) if(rstn==0) begin `ifdef SIMPLY_RISC_TWEAKS fsm_state <= FSM_STATE_WAKEUP; `ifdef SIMPLY_RISC_DEBUG if (!printed_once) begin $display("FSM Reset"); printed_once = 1; end `endif `else `ifdef FPGA_TEST_DRAM fsm_state <= FSM_STATE_TEST_DRAM_1; `else fsm_state <= FSM_STATE_INIT_DRAM_1; // DRAM initialization is mandatory for FPGA! `endif `endif cpx_ready<=0; fifo_rd<=0; cpx_packet<=`CPX_WIDTH'b0; wb_cycle<=0; wb_strobe<=0; wb_we<=0; wb_sel<=0; wb_addr<=64'b0; wb_data_o<=64'b0; pcx_packet_d<=124'b0; fp_pcx<=124'b0; fp_req<=0; end else case(fsm_state) FSM_STATE_TEST_DRAM_1: begin `ifdef SIMPLY_RISC_DEBUG $display("FSM State Test DRAM 1"); `endif wb_cycle<=1; wb_strobe<=1; wb_sel<=8'hFF; wb_we<=1; fsm_state <= FSM_STATE_TEST_DRAM_2; end FSM_STATE_TEST_DRAM_2: begin `ifdef SIMPLY_RISC_DEBUG $display("FSM State Test DRAM 2"); `endif if(wb_ack) begin wb_strobe<=0; if(wb_addr<`MEM_SIZE-8) begin wb_addr[31:0]<=wb_addr[31:0]+8; wb_data_o<={wb_addr[31:0]+32'd8,wb_addr[31:0]+32'd8}; fsm_state <= FSM_STATE_TEST_DRAM_1; end else begin fsm_state <= FSM_STATE_TEST_DRAM_3; wb_cycle<=0; wb_sel<=0; wb_we<=0; wb_data_o<=64'b0; wb_addr<=64'b0; end end // if (wb_ack) end // case: FSM_STATE_TEST_DRAM_2 FSM_STATE_TEST_DRAM_3: begin `ifdef SIMPLY_RISC_DEBUG $display("FSM State Test DRAM 3"); `endif wb_cycle<=1; wb_strobe<=1; wb_sel<=8'hFF; fsm_state <= FSM_STATE_TEST_DRAM_4; end FSM_STATE_TEST_DRAM_4: begin `ifdef SIMPLY_RISC_DEBUG $display("FSM State Test DRAM 4"); `endif if(wb_ack) begin wb_strobe<=0; if(wb_addr<`MEM_SIZE-8) begin if(wb_data_i=={wb_addr[31:0],wb_addr[31:0]}) begin wb_addr[31:0]<=wb_addr[31:0]+8; fsm_state <= FSM_STATE_TEST_DRAM_3; end end else begin fsm_state <= FSM_STATE_INIT_DRAM_1; wb_cycle<=0; wb_sel<=0; wb_we<=0; wb_data_o<=64'b0; wb_addr<=64'b0; end end // if (wb_ack) end // case: FSM_STATE_TEST_DRAM_4 FSM_STATE_INIT_DRAM_1: begin `ifdef SIMPLY_RISC_DEBUG $display("FSM State Init DRAM 1"); `endif wb_cycle<=1; wb_strobe<=1; wb_sel<=8'hFF; wb_we<=1; cache_init<=1; // We also init cache directories here fsm_state <= FSM_STATE_INIT_DRAM_2; end FSM_STATE_INIT_DRAM_2: begin `ifdef SIMPLY_RISC_DEBUG $display("FSM State Init DRAM 2"); `endif if(wb_ack) begin wb_strobe<=0; if(wb_addr<`MEM_SIZE-8) begin wb_addr[31:0]<=wb_addr[31:0]+8; pcx_packet_d[64+11:64+4]<=pcx_packet_d[64+11:64+4]+1; // Address for cachedir init fsm_state <= FSM_STATE_INIT_DRAM_1; end else begin fsm_state <= FSM_STATE_WAKEUP; wb_cycle<=0; wb_sel<=0; wb_we<=0; cache_init<=0; wb_addr<=64'b0; end end // if (wb_ack) end // case: FSM_STATE_INIT_DRAM_2 FSM_STATE_WAKEUP: begin `ifdef SIMPLY_RISC_DEBUG $display("FSM State WakeUp"); `endif cpx_packet<=`CPX_WIDTH'h1700000000000000000000000000000010001; cpx_ready<=1; fsm_state <= FSM_STATE_PCX_IDLE; end FSM_STATE_PCX_IDLE: begin `ifdef SIMPLY_RISC_DEBUG $display("FSM State PCX Idle"); `endif cnt<=0; cpx_packet<=`CPX_WIDTH'b0; cpx_ready<=0; cpx_two_packet<=0; inval_vect0[3]<=0; inval_vect1[3]<=0; multi_hit<=0; multi_hit1<=0; if(eth_int_send) begin `ifdef SIMPLY_RISC_DEBUG $display("FSM State PCX Idle - Ethernet Int Send"); `endif fsm_state <= FSM_STATE_CPX_SEND_ETH_IRQ; eth_int_sent<=1; end else if(!pcx_fifo_empty) begin `ifdef SIMPLY_RISC_DEBUG $display("FSM State PCX Idle - PCX FIFO not empty"); `endif pcx_req_d<=pcx_data_fifo[128:124]; pcx_atom_d<=pcx_data_fifo[129]; fifo_rd<=1; fsm_state <= FSM_STATE_GOT_PCX_REQ; end else begin `ifdef SIMPLY_RISC_DEBUG $display("FSM State PCX Idle - Default case, i.e. no Ethernet and PCX FIFO empty"); `endif end end FSM_STATE_GOT_PCX_REQ: begin `ifdef SIMPLY_RISC_DEBUG $display("FSM State Got PCX Req"); `endif pcx_packet_d<=pcx_packet; `ifdef FPGA_DEBUGGING wb_sel[1:0]<=pcx_packet[113:112]; wb_sel[2]<=1; `endif if(pcx_packet[103:64]==40'h9800000800 && pcx_packet[122:118]==5'b00001) begin fsm_state <= FSM_STATE_CPX_INT_VEC_DIS; fifo_rd<=0; end else if(pcx_atom_d==0) begin fifo_rd<=0; if(pcx_packet[122:118]==5'b01010) begin // FP req fsm_state <= FSM_STATE_PCX_FP_1; pcx_packet_2nd[123]<=0; end else fsm_state <= FSM_STATE_PCX_REQ_STEP1; end else fsm_state <= FSM_STATE_PCX_REQ_2ND; end FSM_STATE_PCX_REQ_2ND: begin `ifdef SIMPLY_RISC_DEBUG $display("FSM State Got PCX Req 2nd"); `endif pcx_packet_2nd<=pcx_packet; //Latch second packet for atomics `ifdef FPGA_DEBUGGING if(pcx_fifo_empty) wb_sel<=8'h67; `endif fifo_rd<=0; if(pcx_packet_d[122:118]==5'b01010) // FP req fsm_state <= FSM_STATE_PCX_FP_1; else fsm_state <= FSM_STATE_PCX_REQ_STEP1; end FSM_STATE_PCX_REQ_STEP1: begin `ifdef SIMPLY_RISC_DEBUG $display("FSM State PCX Req Step 1"); `endif if(pcx_packet_d[111]==1'b1) // Invalidate request begin cpx_packet_1[144]<=1; // Valid cpx_packet_1[143:140]<=4'b0100; // Invalidate reply is Store ACK cpx_packet_1[139]<=1; // L2 miss cpx_packet_1[138:137]<=0; // Error cpx_packet_1[136]<=pcx_packet_d[117]; // Non-cacheble cpx_packet_1[135:134]<=pcx_packet_d[113:112]; // Thread ID cpx_packet_1[133:131]<=0; // Way valid cpx_packet_1[130]<=((pcx_packet_d[122:118]==5'b10000) && (pcx_req_d==5'b10000)) ? 1:0; // Four byte fill cpx_packet_1[129]<=pcx_atom_d; cpx_packet_1[128]<=pcx_packet_d[110]; // Prefetch cpx_packet_1[127:0]<={2'b0,pcx_packet_d[109]/*BIS*/,pcx_packet_d[122:118]==5'b00000 ? 2'b01:2'b10,pcx_packet_d[64+5:64+4],3'b0,pcx_packet_d[64+11:64+6],112'b0}; fsm_state <= FSM_STATE_CPX_READY_1; end else if(pcx_packet_d[122:118]!=5'b01001) // Not INT begin wb_cycle<=1'b1; wb_strobe<=1'b1; if((pcx_packet_d[122:118]==5'b00000 && !pcx_req_d[4]) || pcx_packet_d[122:118]==5'b00010 || pcx_packet_d[122:118]==5'b00100 || pcx_packet_d[122:118]==5'b00110) wb_addr<={pcx_req_d,19'b0,pcx_packet_d[103:64+4],4'b0000}; //DRAM load/streamload, CAS and SWAP always use DRAM and load first else if(pcx_packet_d[122:118]==5'b10000 && !pcx_req_d[4]) wb_addr<={pcx_req_d,19'b0,pcx_packet_d[103:64+5],5'b00000}; //DRAM ifill else if(pcx_packet_d[64+39:64+28]==12'hFFF && pcx_packet_d[64+27:64+24]!=4'b0) // flash remap FFF1->FFF8 wb_addr<={pcx_req_d,19'b0,pcx_packet_d[103:64+3]+37'h0000E00000,3'b000}; else wb_addr<={pcx_req_d,19'b0,pcx_packet_d[103:64+3],3'b000}; wb_data_o<=pcx_packet_d[63:0]; fsm_state <= FSM_STATE_PCX_REQ_STEP1_1; end else if((pcx_packet_d[12:10]!=3'b000) && !pcx_packet_d[117]) // Not FLUSH int and not this core fsm_state <= FSM_STATE_PCX_IDLE; else fsm_state <= FSM_STATE_CPX_READY_1; case(pcx_packet_d[122:118]) // Packet type 5'b00000://Load begin wb_we<=0; if(!pcx_packet_d[110] && !pcx_packet_d[117]) case(icache_hit) 4'b0000:; 4'b0001:inval_vect0<=4'b1_0_00; 4'b0010:inval_vect0<=4'b1_0_01; 4'b0100:inval_vect0<=4'b1_0_10; 4'b1000:inval_vect0<=4'b1_0_11; default:multi_hit<=1; endcase if(!pcx_req_d[4]) wb_sel<=8'b11111111; // DRAM requests are always 128 bit else case(pcx_packet_d[106:104]) //Size 3'b000://Byte case(pcx_packet_d[64+2:64]) 3'b000:wb_sel<=8'b10000000; 3'b001:wb_sel<=8'b01000000; 3'b010:wb_sel<=8'b00100000; 3'b011:wb_sel<=8'b00010000; 3'b100:wb_sel<=8'b00001000; 3'b101:wb_sel<=8'b00000100; 3'b110:wb_sel<=8'b00000010; 3'b111:wb_sel<=8'b00000001; endcase 3'b001://Halfword case(pcx_packet_d[64+2:64+1]) 2'b00:wb_sel<=8'b11000000; 2'b01:wb_sel<=8'b00110000; 2'b10:wb_sel<=8'b00001100; 2'b11:wb_sel<=8'b00000011; endcase 3'b010://Word wb_sel<=(pcx_packet_d[64+2]==0) ? 8'b11110000:8'b00001111; 3'b011://Doubleword wb_sel<=8'b11111111; 3'b100://Quadword wb_sel<=8'b11111111; 3'b111://Cacheline wb_sel<=8'b11111111; default: wb_sel<=8'b01011010; // Unreal eye-catching value for debug endcase end 5'b00001://Store begin wb_we<=1; case({icache_hit,dcache0_hit}) 8'b00000000:; 8'b00000001:inval_vect0<=4'b1_1_00; 8'b00000010:inval_vect0<=4'b1_1_01; 8'b00000100:inval_vect0<=4'b1_1_10; 8'b00001000:inval_vect0<=4'b1_1_11; 8'b00010000:inval_vect0<=4'b1_0_00; 8'b00100000:inval_vect0<=4'b1_0_01; 8'b01000000:inval_vect0<=4'b1_0_10; 8'b10000000:inval_vect0<=4'b1_0_11; default:multi_hit<=1; endcase if(pcx_packet_d[110:109]!=2'b00) //Block (or init) store wb_sel<=8'b11111111; // Blocks are always 64 bit else case(pcx_packet_d[106:104]) //Size 3'b000://Byte case(pcx_packet_d[64+2:64]) 3'b000:wb_sel<=8'b10000000; 3'b001:wb_sel<=8'b01000000; 3'b010:wb_sel<=8'b00100000; 3'b011:wb_sel<=8'b00010000; 3'b100:wb_sel<=8'b00001000; 3'b101:wb_sel<=8'b00000100; 3'b110:wb_sel<=8'b00000010; 3'b111:wb_sel<=8'b00000001; endcase 3'b001://Halfword case(pcx_packet_d[64+2:64+1]) 2'b00:wb_sel<=8'b11000000; 2'b01:wb_sel<=8'b00110000; 2'b10:wb_sel<=8'b00001100; 2'b11:wb_sel<=8'b00000011; endcase 3'b010://Word wb_sel<=(pcx_packet_d[64+2]==0) ? 8'b11110000:8'b00001111; 3'b011://Doubleword wb_sel<=8'b11111111; default: `ifdef FPGA_DEBUGGING wb_sel<=8'b01011010; // Unreal eye-catching value for debug `else wb_sel<=8'b01010101; `endif endcase end 5'b00010://CAS begin wb_we<=0; //Load first case({icache_hit,dcache0_hit}) 8'b00000000:; 8'b00000001:inval_vect0<=4'b1_1_00; 8'b00000010:inval_vect0<=4'b1_1_01; 8'b00000100:inval_vect0<=4'b1_1_10; 8'b00001000:inval_vect0<=4'b1_1_11; 8'b00010000:inval_vect0<=4'b1_0_00; 8'b00100000:inval_vect0<=4'b1_0_01; 8'b01000000:inval_vect0<=4'b1_0_10; 8'b10000000:inval_vect0<=4'b1_0_11; default:multi_hit<=1; endcase wb_sel<=8'b11111111; // CAS loads are as cacheline end 5'b00100://STRLOAD begin wb_we<=0; wb_sel<=8'b11111111; // Stream loads are always 128 bit end 5'b00101://STRSTORE begin wb_we<=1; case({icache_hit,dcache0_hit}) 8'b00000000:; 8'b00000001:inval_vect0<=4'b1_1_00; 8'b00000010:inval_vect0<=4'b1_1_01; 8'b00000100:inval_vect0<=4'b1_1_10; 8'b00001000:inval_vect0<=4'b1_1_11; 8'b00010000:inval_vect0<=4'b1_0_00; 8'b00100000:inval_vect0<=4'b1_0_01; 8'b01000000:inval_vect0<=4'b1_0_10; 8'b10000000:inval_vect0<=4'b1_0_11; default:multi_hit<=1; endcase case(pcx_packet_d[106:104]) //Size 3'b000://Byte case(pcx_packet_d[64+2:64]) 3'b000:wb_sel<=8'b10000000; 3'b001:wb_sel<=8'b01000000; 3'b010:wb_sel<=8'b00100000; 3'b011:wb_sel<=8'b00010000; 3'b100:wb_sel<=8'b00001000; 3'b101:wb_sel<=8'b00000100; 3'b110:wb_sel<=8'b00000010; 3'b111:wb_sel<=8'b00000001; endcase 3'b001://Halfword case(pcx_packet_d[64+2:64+1]) 2'b00:wb_sel<=8'b11000000; 2'b01:wb_sel<=8'b00110000; 2'b10:wb_sel<=8'b00001100; 2'b11:wb_sel<=8'b00000011; endcase 3'b010://Word wb_sel<=(pcx_packet_d[64+2]==0) ? 8'b11110000:8'b00001111; 3'b011://Doubleword wb_sel<=8'b11111111; 3'b100://Quadword wb_sel<=8'b11111111; 3'b111://Cacheline wb_sel<=8'b11111111; default: wb_sel<=8'b01011010; // Unreal eye-catching value for debug endcase end 5'b00110://SWAP/LDSTUB begin case({icache_hit,dcache0_hit}) 8'b00000000:; 8'b00000001:inval_vect0<=4'b1_1_00; 8'b00000010:inval_vect0<=4'b1_1_01; 8'b00000100:inval_vect0<=4'b1_1_10; 8'b00001000:inval_vect0<=4'b1_1_11; 8'b00010000:inval_vect0<=4'b1_0_00; 8'b00100000:inval_vect0<=4'b1_0_01; 8'b01000000:inval_vect0<=4'b1_0_10; 8'b10000000:inval_vect0<=4'b1_0_11; default:multi_hit<=1; endcase wb_we<=0; // Load first, as CAS wb_sel<=8'b11111111; // SWAP/LDSTUB loads are as cacheline end 5'b01001://INT if(pcx_packet_d[117]) // Flush cpx_packet_1<={9'h171,pcx_packet_d[113:112],11'h0,pcx_packet_d[64+5:64+4],3'b0,pcx_packet_d[64+11:64+6],30'h0,pcx_packet_d[17:0],46'b0,pcx_packet_d[17:0]}; //FLUSH instruction answer else // Tread-to-thread interrupt cpx_packet_1<={9'h170,pcx_packet_d[113:112],52'h0,pcx_packet_d[17:0],46'h0,pcx_packet_d[17:0]}; //5'b01010: FP1 - processed by separate state //5'b01011: FP2 - processed by separate state //5'b01101: FWDREQ - not implemented //5'b01110: FWDREPL - not implemented 5'b10000://IFILL begin wb_we<=0; if(!pcx_req_d[4]) // not I/O access begin case(dcache0_hit) 4'b0000:; 4'b0001:inval_vect0<=4'b1_1_00; 4'b0010:inval_vect0<=4'b1_1_01; 4'b0100:inval_vect0<=4'b1_1_10; 4'b1000:inval_vect0<=4'b1_1_11; default:multi_hit<=1; endcase case(dcache1_hit) 4'b0000:; 4'b0001:inval_vect1<=4'b1_1_00; 4'b0010:inval_vect1<=4'b1_1_01; 4'b0100:inval_vect1<=4'b1_1_10; 4'b1000:inval_vect1<=4'b1_1_11; default:multi_hit1<=1; endcase end if(pcx_req_d[4]) // I/O access wb_sel<=(pcx_packet_d[64+2]==0) ? 8'b11110000:8'b00001111; else wb_sel<=8'b11111111; end default: begin wb_we<=0; wb_sel<=8'b10101010; // Unreal eye-catching value for debug end endcase end FSM_STATE_PCX_REQ_STEP1_1: begin `ifdef SIMPLY_RISC_DEBUG $display("FSM State PCX Req Step 1.1"); `endif if(wb_ack) begin cpx_packet_1[144]<=1; // Valid cpx_packet_1[139]<=(pcx_packet_d[122:118]==5'b00000) || (pcx_packet_d[122:118]==5'b10000) ? 1:0; // L2 always miss on load and ifill cpx_packet_1[138:137]<=0; // Error cpx_packet_1[136]<=pcx_packet_d[117] || (pcx_packet_d[122:118]==5'b00001) ? 1:0; // Non-cacheble is set on store too cpx_packet_1[135:134]<=pcx_packet_d[113:112]; // Thread ID if((pcx_packet_d[122:118]==5'b00000 && !pcx_packet_d[117] && !pcx_packet_d[110]) || (pcx_packet_d[122:118]==5'b10000)) // Cacheble Load or IFill cpx_packet_1[133:131]<={inval_vect0[3],inval_vect0[1:0]}; else cpx_packet_1[133:131]<=3'b000; // Way valid if(pcx_packet_d[122:118]==5'b00100) // Strload cpx_packet_1[130]<=pcx_packet_d[106]; // A else if(pcx_packet_d[122:118]==5'b00101) // Stream store cpx_packet_1[130]<=pcx_packet_d[108]; // A else cpx_packet_1[130]<=((pcx_packet_d[122:118]==5'b10000) && pcx_req_d[4]) ? 1:0; // Four byte fill if(pcx_packet_d[122:118]==5'b00100) // Strload cpx_packet_1[129]<=pcx_packet_d[105]; // B else cpx_packet_1[129]<=pcx_atom_d || (pcx_packet_d[122:118]==5'b00110); // SWAP is single-packet but needs atom in CPX cpx_packet_1[128]<=pcx_packet_d[110] && pcx_packet_d[122:118]==5'b00000; // Prefetch cpx_packet_2[144]<=1; // Valid cpx_packet_2[139]<=0; // L2 miss cpx_packet_2[138:137]<=0; // Error cpx_packet_2[136]<=pcx_packet_d[117] || (pcx_packet_d[122:118]==5'b00001) ? 1:0; // Non-cacheble is set on store too cpx_packet_2[135:134]<=pcx_packet_d[113:112]; // Thread ID if(pcx_packet_d[122:118]==5'b10000) // IFill cpx_packet_2[133:131]<={inval_vect1[3],inval_vect1[1:0]}; else cpx_packet_2[133:131]<=3'b000; // Way valid cpx_packet_2[130]<=0; // Four byte fill cpx_packet_2[129]<=pcx_atom_d || (pcx_packet_d[122:118]==5'b00110) || ((pcx_packet_d[122:118]==5'b10000) && !pcx_req_d[4]); cpx_packet_2[128]<=0; // Prefetch wb_strobe<=0; wb_sel<=8'b0; wb_addr<=64'b0; wb_data_o<=64'b0; wb_we<=0; case(pcx_packet_d[122:118]) // Packet type 5'b00000://Load begin cpx_packet_1[143:140]<=4'b0000; // Type if(!pcx_req_d[4]) begin cpx_packet_1[127:0]<={wb_data_i,wb_data_i}; fsm_state <= FSM_STATE_PCX_REQ_STEP2; end else case(pcx_packet_d[106:104]) //Size 3'b000://Byte begin case(pcx_packet_d[64+2:64]) 3'b000:cpx_packet_1[127:0]<={wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56]}; 3'b001:cpx_packet_1[127:0]<={wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48]}; 3'b010:cpx_packet_1[127:0]<={wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40]}; 3'b011:cpx_packet_1[127:0]<={wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32]}; 3'b100:cpx_packet_1[127:0]<={wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24]}; 3'b101:cpx_packet_1[127:0]<={wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16]}; 3'b110:cpx_packet_1[127:0]<={wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8]}; 3'b111:cpx_packet_1[127:0]<={wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0]}; endcase wb_cycle<=0; fsm_state <= FSM_STATE_CPX_READY_1; end 3'b001://Halfword begin case(pcx_packet_d[64+2:64+1]) 2'b00:cpx_packet_1[127:0]<={wb_data_i[63:48],wb_data_i[63:48],wb_data_i[63:48],wb_data_i[63:48],wb_data_i[63:48],wb_data_i[63:48],wb_data_i[63:48],wb_data_i[63:48]}; 2'b01:cpx_packet_1[127:0]<={wb_data_i[47:32],wb_data_i[47:32],wb_data_i[47:32],wb_data_i[47:32],wb_data_i[47:32],wb_data_i[47:32],wb_data_i[47:32],wb_data_i[47:32]}; 2'b10:cpx_packet_1[127:0]<={wb_data_i[31:16],wb_data_i[31:16],wb_data_i[31:16],wb_data_i[31:16],wb_data_i[31:16],wb_data_i[31:16],wb_data_i[31:16],wb_data_i[31:16]}; 2'b11:cpx_packet_1[127:0]<={wb_data_i[15: 0],wb_data_i[15: 0],wb_data_i[15: 0],wb_data_i[15: 0],wb_data_i[15: 0],wb_data_i[15: 0],wb_data_i[15: 0],wb_data_i[15: 0]}; endcase wb_cycle<=0; fsm_state <= FSM_STATE_CPX_READY_1; end 3'b010://Word begin if(pcx_packet_d[64+2]==0) cpx_packet_1[127:0]<={wb_data_i[63:32],wb_data_i[63:32],wb_data_i[63:32],wb_data_i[63:32]}; else cpx_packet_1[127:0]<={wb_data_i[31:0],wb_data_i[31:0],wb_data_i[31:0],wb_data_i[31:0]}; wb_cycle<=0; fsm_state <= FSM_STATE_CPX_READY_1; end 3'b011://Doubleword begin cpx_packet_1[127:0]<={wb_data_i,wb_data_i}; wb_cycle<=0; fsm_state <= FSM_STATE_CPX_READY_1; end 3'b100://Quadword begin cpx_packet_1[127:0]<={wb_data_i,wb_data_i}; wb_cycle<=0; fsm_state <= FSM_STATE_CPX_READY_1; // 16 byte access to PROM should just duplicate the data end 3'b111://Cacheline begin cpx_packet_1[127:0]<={wb_data_i,wb_data_i}; wb_cycle<=0; fsm_state <= FSM_STATE_CPX_READY_1; // 16 byte access to PROM should just duplicate the data end default: begin cpx_packet_1[127:0]<={wb_data_i,wb_data_i}; wb_cycle<=0; fsm_state <= FSM_STATE_PCX_UNKNOWN; end endcase end 5'b00001://Store begin cpx_packet_1[143:140]<=4'b0100; // Type cpx_packet_1[127:0]<={2'b0,pcx_packet_d[109]/*BIS*/,2'b0,pcx_packet_d[64+5:64+4],3'b0,pcx_packet_d[64+11:64+6],store_inv_vec}; // if((pcx_packet_d[110:109]==2'b01) && (pcx_packet_d[64+5:64]==0) && !inval_vect0[3] && !inval_vect1[3]) // Block init store // fsm_state <= FSM_STATE_PCX_BIS; // else // begin wb_cycle<=0; fsm_state <= FSM_STATE_CPX_READY_1; // end end 5'b00010://CAS begin cpx_packet_1[143:140]<=4'b0000; // Load return for first packet cpx_packet_2[143:140]<=4'b0100; // Store ACK for second packet cpx_packet_2[127:0]<={5'b0,pcx_packet_d[64+5:64+4],3'b0,pcx_packet_d[64+11:64+6],store_inv_vec}; cpx_packet_1[127:0]<={wb_data_i,wb_data_i}; fsm_state <= FSM_STATE_PCX_REQ_STEP2; end 5'b00100://STRLOAD begin cpx_packet_1[143:140]<=4'b0010; // Type cpx_packet_1[127:0]<={wb_data_i,wb_data_i}; fsm_state <= FSM_STATE_PCX_REQ_STEP2; end 5'b00101://STRSTORE begin cpx_packet_1[143:140]<=4'b0110; // Type cpx_packet_1[127:0]<={5'b0,pcx_packet_d[64+5:64+4],3'b0,pcx_packet_d[64+11:64+6],store_inv_vec}; wb_cycle<=0; fsm_state <= FSM_STATE_CPX_READY_1; end 5'b00110://SWAP/LDSTUB begin cpx_packet_1[143:140]<=4'b0000; // Load return for first packet cpx_packet_2[143:140]<=4'b0100; // Store ACK for second packet cpx_packet_2[127:0]<={5'b0,pcx_packet_d[64+5:64+4],3'b0,pcx_packet_d[64+11:64+6],store_inv_vec}; cpx_packet_1[127:0]<={wb_data_i,wb_data_i}; fsm_state <= FSM_STATE_PCX_REQ_STEP2; end 5'b10000://IFILL begin cpx_packet_1[143:140]<=4'b0001; // Type cpx_packet_2[143:140]<=4'b0001; // Type if(pcx_req_d[4]) // I/O access begin if(pcx_packet_d[64+2]==0) cpx_packet_1[127:0]<={wb_data_i[63:32],wb_data_i[63:32],wb_data_i[63:32],wb_data_i[63:32]}; else cpx_packet_1[127:0]<={wb_data_i[31:0],wb_data_i[31:0],wb_data_i[31:0],wb_data_i[31:0]}; fsm_state <= FSM_STATE_CPX_READY_1; wb_cycle<=0; end else begin cpx_packet_1[127:0]<={wb_data_i,wb_data_i}; fsm_state <= FSM_STATE_PCX_REQ_STEP2; end end default: begin wb_cycle<=0; fsm_state <= FSM_STATE_PCX_UNKNOWN; end endcase end end FSM_STATE_PCX_REQ_STEP2: // IFill, Load/strload, CAS, SWAP, LDSTUB - alwas load begin `ifdef SIMPLY_RISC_DEBUG $display("FSM State PCX Req Step 2"); `endif wb_strobe<=1'b1; if(pcx_packet_d[122:118]==5'b10000) wb_addr<={pcx_req_d,19'b0,pcx_packet_d[103:64+5],5'b01000}; else wb_addr<={pcx_req_d,19'b0,pcx_packet_d[103:64+4],4'b1000}; wb_sel<=8'b11111111; // It is always full width for subsequent IFill and load accesses fsm_state <= FSM_STATE_PCX_REQ_STEP2_1; end FSM_STATE_PCX_REQ_STEP2_1: begin `ifdef SIMPLY_RISC_DEBUG $display("FSM State PCX Req Step 2.1"); `endif if(wb_ack==1) begin wb_strobe<=0; wb_sel<=8'b0; wb_addr<=64'b0; wb_data_o<=64'b0; wb_we<=0; cpx_packet_1[63:0]<=wb_data_i; if((pcx_packet_d[122:118]!=5'b00000) && (pcx_packet_d[122:118]!=5'b00100)) if(pcx_packet_d[122:118]!=5'b00010) // IFill, SWAP fsm_state <= FSM_STATE_PCX_REQ_STEP3; else fsm_state <= FSM_STATE_PCX_REQ_CAS_COMPARE; // CAS else begin wb_cycle<=0; fsm_state <= FSM_STATE_CPX_READY_1; end end // if (wb_ack==1) end // case: FSM_STATE_PCX_REQ_STEP2_1 FSM_STATE_PCX_REQ_CAS_COMPARE: begin `ifdef SIMPLY_RISC_DEBUG $display("FSM State PCX Req CAS Compare"); `endif cpx_two_packet<=1; if(pcx_packet_d[106:104]==3'b010) // 32-bit case(pcx_packet_d[64+3:64+2]) 2'b00: fsm_state <= (cpx_packet_1[127:96] == pcx_packet_d[63:32]) ? FSM_STATE_PCX_REQ_STEP3 : FSM_STATE_CPX_READY_1; 2'b01: fsm_state <= (cpx_packet_1[95:64] == pcx_packet_d[63:32]) ? FSM_STATE_PCX_REQ_STEP3 : FSM_STATE_CPX_READY_1; 2'b10: fsm_state <= (cpx_packet_1[63:32] == pcx_packet_d[63:32]) ? FSM_STATE_PCX_REQ_STEP3 : FSM_STATE_CPX_READY_1; 2'b11: fsm_state <= (cpx_packet_1[31:0] == pcx_packet_d[63:32]) ? FSM_STATE_PCX_REQ_STEP3 : FSM_STATE_CPX_READY_1; endcase else if(pcx_packet_d[64+3]==0) fsm_state <= (cpx_packet_1[127:64]==pcx_packet_d[63:0]) ? FSM_STATE_PCX_REQ_STEP3 : FSM_STATE_CPX_READY_1; else fsm_state <= (cpx_packet_1[63:0] == pcx_packet_d[63:0]) ? FSM_STATE_PCX_REQ_STEP3 : FSM_STATE_CPX_READY_1; end FSM_STATE_PCX_REQ_STEP3: // 256-bit IFILL; CAS, SWAP and LDSTUB store begin `ifdef SIMPLY_RISC_DEBUG $display("FSM State PCX Req Step 3"); `endif if(pcx_packet_d[122:118]==5'b10000) wb_addr<={pcx_req_d,19'b0,pcx_packet_d[103:64+5],5'b10000}; else wb_addr<={pcx_req_d,19'b0,pcx_packet_d[103:64+3],3'b000}; // CAS or SWAP save cpx_two_packet<=1; if(pcx_packet_d[122:118]==5'b10000) wb_we<=0; else wb_we<=1; wb_strobe<=1'b1; if(pcx_packet_d[122:118]==5'b00010) // CAS if(pcx_packet_d[106:104]==3'b010) wb_sel<=(pcx_packet_d[64+2]==0) ? 8'b11110000:8'b00001111; else wb_sel<=8'b11111111; //CASX else if(pcx_packet_d[122:118]==5'b00110) //SWAP or LDSTUB if(pcx_packet_d[106:104]==3'b000) //LDSTUB case(pcx_packet_d[64+2:64]) 3'b000:wb_sel<=8'b10000000; 3'b001:wb_sel<=8'b01000000; 3'b010:wb_sel<=8'b00100000; 3'b011:wb_sel<=8'b00010000; 3'b100:wb_sel<=8'b00001000; 3'b101:wb_sel<=8'b00000100; 3'b110:wb_sel<=8'b00000010; 3'b111:wb_sel<=8'b00000001; endcase else wb_sel<=(pcx_packet_d[64+2]==0) ? 8'b11110000:8'b00001111; ///SWAP is always 32-bit else wb_sel<=8'b11111111; // It is always full width for subsequent IFill accesses if(pcx_packet_d[122:118]==5'b00110) //SWAP or LDSTUB wb_data_o<={pcx_packet_d[63:32],pcx_packet_d[63:32]}; // wb_data_o<=pcx_packet_d[63:0]; else wb_data_o<=pcx_packet_2nd[63:0]; // CAS store second packet data // if(pcx_packet_d[106:104]==3'b010) // wb_data_o<={pcx_packet_2nd[63:32],pcx_packet_2nd[63:32]}; // CAS store second packet data // else // wb_data_o<=pcx_packet_2nd[63:0]; fsm_state <= FSM_STATE_PCX_REQ_STEP3_1; end FSM_STATE_PCX_REQ_STEP3_1: begin `ifdef SIMPLY_RISC_DEBUG $display("FSM State PCX Req Step 3.1"); `endif if(wb_ack==1) begin wb_strobe<=0; wb_sel<=8'b0; wb_addr<=64'b0; wb_we<=0; wb_data_o<=64'b0; if(pcx_packet_d[122:118]==5'b10000) // IFill begin cpx_packet_2[127:64]<=wb_data_i; fsm_state <= FSM_STATE_PCX_REQ_STEP4; end else begin wb_cycle<=0; fsm_state <= FSM_STATE_CPX_READY_1; end end // if (wb_ack==1) end // case: FSM_STATE_PCX_REQ_STEP3_1 FSM_STATE_PCX_REQ_STEP4: // 256-bit IFILL only begin `ifdef SIMPLY_RISC_DEBUG $display("FSM State PCX Req Step 4"); `endif wb_strobe<=1'b1; wb_addr<={pcx_req_d,19'b0,pcx_packet_d[103:64+5],5'b11000}; wb_sel<=8'b11111111; // It is always full width for subsequent accesses fsm_state <= FSM_STATE_PCX_REQ_STEP4_1; end FSM_STATE_PCX_REQ_STEP4_1: begin `ifdef SIMPLY_RISC_DEBUG $display("FSM State PCX Req Step 4.1"); `endif if(wb_ack==1) begin wb_cycle<=0; wb_strobe<=0; wb_sel<=8'b0; wb_addr<=64'b0; wb_we<=0; cpx_packet_2[63:0]<=wb_data_i; fsm_state <= FSM_STATE_CPX_READY_1; end end // case: FSM_STATE_PCX_REQ_STEP4_1 FSM_STATE_PCX_BIS: // Block init store begin `ifdef SIMPLY_RISC_DEBUG $display("FSM State PCX Bis"); `endif wb_strobe<=1'b1; wb_we<=1; wb_addr<={pcx_req_d,19'b0,pcx_packet_d[103:64+6],6'b001000}; wb_sel<=8'b11111111; wb_data_o<=64'b0; fsm_state <= FSM_STATE_PCX_BIS_1; end FSM_STATE_PCX_BIS_1: begin `ifdef SIMPLY_RISC_DEBUG $display("FSM State PCX Bis 1"); `endif if(wb_ack) begin wb_strobe<=0; if(wb_addr[39:0]<(pcx_packet_d[64+39:64]+8*7)) fsm_state <= FSM_STATE_PCX_BIS_2; else begin wb_cycle<=0; wb_sel<=0; wb_we<=0; wb_addr<=64'b0; fsm_state <= FSM_STATE_CPX_READY_1; end end // if (wb_ack) end // case: FSM_STATE_PCX_BIS_1 FSM_STATE_PCX_BIS_2: begin `ifdef SIMPLY_RISC_DEBUG $display("FSM State PCX Bis 2"); `endif wb_strobe<=1'b1; wb_addr[5:0]<=wb_addr[5:0]+8; fsm_state <= FSM_STATE_PCX_BIS_1; end FSM_STATE_PCX_FP_1: begin `ifdef SIMPLY_RISC_DEBUG $display("FSM State PCX FP 1"); `endif fp_pcx<=pcx_packet_d; fp_req<=1; fsm_state <= FSM_STATE_PCX_FP_2; `ifdef FPGA_DEBUGGING wb_addr<=pcx_packet_d[103:64]; wb_data_o<=pcx_packet_d[63:0]; wb_sel<=8'h22; `endif end FSM_STATE_PCX_FP_2: begin `ifdef SIMPLY_RISC_DEBUG $display("FSM State PCX FP 2"); `endif fp_pcx<=pcx_packet_2nd; fsm_state <= FSM_STATE_FP_WAIT; `ifdef FPGA_DEBUGGING wb_addr<=pcx_packet_2nd[103:64]; wb_data_o<=pcx_packet_d[63:0]; wb_sel<=8'h23; `endif end FSM_STATE_FP_WAIT: begin `ifdef SIMPLY_RISC_DEBUG $display("FSM State FP Wait"); `endif fp_pcx<=124'b0; fp_req<=0; if(fp_rdy) fsm_state <= FSM_STATE_CPX_FP; `ifdef FPGA_DEBUGGING wb_sel<=8'h24; `endif end FSM_STATE_CPX_FP: begin `ifdef SIMPLY_RISC_DEBUG $display("FSM State CPX FP"); `endif if(fp_cpx[144]) // Packet valid begin cpx_packet_1<=fp_cpx; fsm_state <= FSM_STATE_CPX_READY_1; `ifdef FPGA_DEBUGGING wb_addr<=fp_cpx[63:0]; wb_data_o<=fp_cpx[127:64]; `endif end else if(!fp_rdy) fsm_state <= FSM_STATE_FP_WAIT; // Else wait for another one if it is not here still end // case: FSM_STATE_CPX_FP FSM_STATE_CPX_SEND_ETH_IRQ: begin `ifdef SIMPLY_RISC_DEBUG $display("FSM State CPX Send Eth IRQ"); `endif cpx_packet_1<=`CPX_WIDTH'h1_7_000_000000000000001D_000000000000_001D; eth_int_sent<=0; fsm_state <= FSM_STATE_CPX_READY_1; end FSM_STATE_CPX_INT_VEC_DIS: begin `ifdef SIMPLY_RISC_DEBUG $display("FSM State CPX Int Vec Dis"); `endif if(pcx_packet_d[12:10]==3'b000) cpx_two_packet<=1; // Send interrupt only if it is for this core cpx_packet_1[144:140]<=5'b10100; cpx_packet_1[139:137]<=0; cpx_packet_1[136]<=1; cpx_packet_1[135:134]<=pcx_packet_d[113:112]; // Thread ID cpx_packet_1[133:130]<=0; cpx_packet_1[129]<=pcx_atom_d; cpx_packet_1[128]<=0; cpx_packet_1[127:0]<={5'b0,pcx_packet_d[64+5:64+4],3'b0,pcx_packet_d[64+11:64+6],112'b0}; cpx_packet_2<={9'h170,54'h0,pcx_packet_d[17:0],46'h0,pcx_packet_d[17:0]}; fsm_state <= FSM_STATE_CPX_READY_1; end FSM_STATE_CPX_READY_1: begin `ifdef SIMPLY_RISC_DEBUG $display("FSM State CPX Ready 1"); `endif cpx_ready<=1; cpx_packet<=cpx_packet_1; cnt<=cnt+1; `ifdef FPGA_DEBUGGING if(multi_hit || multi_hit1) wb_sel<=8'h11; `endif if(!cpx_two_packet) fsm_state <= FSM_STATE_PCX_IDLE; else //if(cnt==4'b1111 || pcx_packet_d[103:64]!=40'h9800000800) fsm_state <= FSM_STATE_CPX_READY_2; end FSM_STATE_CPX_READY_2: begin `ifdef SIMPLY_RISC_DEBUG $display("FSM State CPX Ready 2"); `endif cpx_ready<=1; cpx_packet<=cpx_packet_2; fsm_state <= FSM_STATE_PCX_IDLE; end FSM_STATE_PCX_UNKNOWN: begin `ifdef SIMPLY_RISC_DEBUG $display("FSM State PCX Unknown"); `endif wb_sel<=8'b10100101; // Illegal eye-catching value for debugging fsm_state <= FSM_STATE_PCX_IDLE; end endcase /* Cache directory checking: Load: allocate D if cacheable, check I, invalidate&deallocate if found Store: check I, invalidate&deallocate if found; check D, invalidate if found IFill: allocate I if cacheable, check D, invalidate&deallocate if found SWAP/LDSTUB: check I, invalidate&deallocate if found; check D, invalidate&deallocate if found CAS: Like SWAP Allocation and querying is made simultaneously at GOT_PCX_REQ (memory read mode does not matter as long as allocation and invalidation are never made to the same directory, so if memory is written its output will not be checked) Invalidation vectors are built during PCX_REQ_STEP1, or Invalidate all ways issued During PCX_REQ_STEP1_1 directory is deallocated if needed */ // Directory enable assign dir_en=((fsm_state==FSM_STATE_GOT_PCX_REQ) || (fsm_state==FSM_STATE_PCX_REQ_STEP1) || cache_init || ((fsm_state==FSM_STATE_PCX_REQ_STEP1_1) && wb_ack)) ? 1:0; // ICache deallocation flag assign loadstore=((pcx_packet_d[122:118]==5'b00000) && !pcx_packet_d[117] && !pcx_packet_d[110]) || // cacheable load, not prefetch (pcx_packet_d[122:118]==5'b00001) || (pcx_packet_d[122:118]==5'b00010) || // Store, CAS (pcx_packet_d[122:118]==5'b00110) || (pcx_packet_d[122:118]==5'b00101); // SWAP/LDSTUB, StrStore // DCache deallocation flag assign ifillcas=(pcx_packet_d[122:118]==5'b00110) || (pcx_packet_d[122:118]==5'b00010) || //SWAP, CAS (pcx_packet_d[122:118]==5'b10000) || (pcx_packet_d[122:118]==5'b00101) || // IFill, StrStore ((pcx_packet_d[122:118]==5'b00001) && pcx_packet_d[110:109]!=2'b00); // Block (or init) store // DCache allocation flag assign cacheload=(pcx_packet[122:118]==5'b00000) && !pcx_packet[110] && !pcx_packet[117] && !pcx_packet[111]; // ICache allocation flag assign cacheifill=(pcx_packet[122:118]==5'b10000) && !pcx_packet[117] && !pcx_packet[111]; assign dcache0_alloc=(fsm_state==FSM_STATE_GOT_PCX_REQ) && (pcx_packet[108:107]==2'b00) && cacheload; assign dcache0_dealloc0=(fsm_state==FSM_STATE_PCX_REQ_STEP1_1) && (inval_vect0==4'b1_1_00) && ifillcas; assign dcache0_dealloc1=(fsm_state==FSM_STATE_PCX_REQ_STEP1_1) && (inval_vect1==4'b1_1_00) && ifillcas; assign dcache1_alloc=(fsm_state==FSM_STATE_GOT_PCX_REQ) && (pcx_packet[108:107]==2'b01) && cacheload; assign dcache1_dealloc0=(fsm_state==FSM_STATE_PCX_REQ_STEP1_1) && (inval_vect0==4'b1_1_01) && ifillcas; assign dcache1_dealloc1=(fsm_state==FSM_STATE_PCX_REQ_STEP1_1) && (inval_vect1==4'b1_1_01) && ifillcas; assign dcache2_alloc=(fsm_state==FSM_STATE_GOT_PCX_REQ) && (pcx_packet[108:107]==2'b10) && cacheload; assign dcache2_dealloc0=(fsm_state==FSM_STATE_PCX_REQ_STEP1_1) && (inval_vect0==4'b1_1_10) && ifillcas; assign dcache2_dealloc1=(fsm_state==FSM_STATE_PCX_REQ_STEP1_1) && (inval_vect1==4'b1_1_10) && ifillcas; assign dcache3_alloc=(fsm_state==FSM_STATE_GOT_PCX_REQ) && (pcx_packet[108:107]==2'b11) && cacheload; assign dcache3_dealloc0=(fsm_state==FSM_STATE_PCX_REQ_STEP1_1) && (inval_vect0==4'b1_1_11) && ifillcas; assign dcache3_dealloc1=(fsm_state==FSM_STATE_PCX_REQ_STEP1_1) && (inval_vect1==4'b1_1_11) && ifillcas; assign icache0_alloc=(fsm_state==FSM_STATE_GOT_PCX_REQ) && (pcx_packet[108:107]==2'b00) && cacheifill; assign icache0_dealloc=(fsm_state==FSM_STATE_PCX_REQ_STEP1_1) && (inval_vect0==4'b1_0_00) && loadstore; assign icache1_alloc=(fsm_state==FSM_STATE_GOT_PCX_REQ) && (pcx_packet[108:107]==2'b01) && cacheifill; assign icache1_dealloc=(fsm_state==FSM_STATE_PCX_REQ_STEP1_1) && (inval_vect0==4'b1_0_01) && loadstore; assign icache2_alloc=(fsm_state==FSM_STATE_GOT_PCX_REQ) && (pcx_packet[108:107]==2'b10) && cacheifill; assign icache2_dealloc=(fsm_state==FSM_STATE_PCX_REQ_STEP1_1) && (inval_vect0==4'b1_0_10) && loadstore; assign icache3_alloc=(fsm_state==FSM_STATE_GOT_PCX_REQ) && (pcx_packet[108:107]==2'b11) && cacheifill; assign icache3_dealloc=(fsm_state==FSM_STATE_PCX_REQ_STEP1_1) && (inval_vect0==4'b1_0_11) && loadstore; assign dcache_inval_all=(fsm_state==FSM_STATE_PCX_REQ_STEP1) && pcx_packet_d[111] && pcx_packet_d[122:118]==5'b00000; assign icache_inval_all=(fsm_state==FSM_STATE_PCX_REQ_STEP1) && pcx_packet_d[111] && pcx_packet_d[122:118]==5'b10000; `define INVAL_TAG 29'h10000000 // DCache least address bit for first bank // it should be 0 for IFill (1 is hardcoded for second bank) assign dcache_la=(fsm_state==FSM_STATE_GOT_PCX_REQ) ? (pcx_packet[122:118]==5'b10000 ? 1'b0:pcx_packet[64+4]): (pcx_packet_d[122:118]==5'b10000 ? 1'b0:pcx_packet_d[64+4]); wire [ 6:0] dcache_index; wire [28:0] dcache_data; assign dcache_index=(fsm_state==FSM_STATE_GOT_PCX_REQ) ? pcx_packet[64+10:64+5]:pcx_packet_d[64+10:64+5]; assign dcache_data=(fsm_state==FSM_STATE_GOT_PCX_REQ) ? pcx_packet[64+39:64+11]:`INVAL_TAG; cachedir dcache0 ( .clock(clk), .enable(dir_en), .wren_a(dcache0_alloc || dcache0_dealloc0 || dcache_inval_all || cache_init), .address_a({1'b0,dcache_index,dcache_la}), .data_a(dcache_data), .q_a(dcache0_do0), .wren_b(dcache0_dealloc1), .address_b({1'b0,dcache_index,1'b1}), .data_b(`INVAL_TAG), .q_b(dcache0_do1) ); cachedir dcache1 ( .clock(clk), .enable(dir_en), .wren_a(dcache1_alloc || dcache1_dealloc0 || dcache_inval_all || cache_init), .address_a({1'b0,dcache_index,dcache_la}), .data_a(dcache_data), .q_a(dcache1_do0), .wren_b(dcache1_dealloc1), .address_b({1'b0,dcache_index,1'b1}), .data_b(`INVAL_TAG), .q_b(dcache1_do1) ); cachedir dcache2 ( .clock(clk), .enable(dir_en), .wren_a(dcache2_alloc || dcache2_dealloc0 || dcache_inval_all || cache_init), .address_a({1'b0,dcache_index,dcache_la}), .data_a(dcache_data), .q_a(dcache2_do0), .wren_b(dcache2_dealloc1), .address_b({1'b0,dcache_index,1'b1}), .data_b(`INVAL_TAG), .q_b(dcache2_do1) ); cachedir dcache3 ( .clock(clk), .enable(dir_en), .wren_a(dcache3_alloc || dcache3_dealloc0 || dcache_inval_all || cache_init), .address_a({1'b0,dcache_index,dcache_la}), .data_a(dcache_data), .q_a(dcache3_do0), .wren_b(dcache3_dealloc1), .address_b({1'b0,dcache_index,1'b1}), .data_b(`INVAL_TAG), .q_b(dcache3_do1) ); assign dcache0_hit={dcache3_do0==pcx_packet_d[64+39:64+11], dcache2_do0==pcx_packet_d[64+39:64+11], dcache1_do0==pcx_packet_d[64+39:64+11], dcache0_do0==pcx_packet_d[64+39:64+11]}; assign dcache1_hit={dcache3_do1==pcx_packet_d[64+39:64+11], dcache2_do1==pcx_packet_d[64+39:64+11], dcache1_do1==pcx_packet_d[64+39:64+11], dcache0_do1==pcx_packet_d[64+39:64+11]}; wire [ 6:0] icache_index; wire [28:0] icache_data; assign icache_index=(fsm_state==FSM_STATE_GOT_PCX_REQ) ? pcx_packet[64+11:64+5]:pcx_packet_d[64+11:64+5]; assign icache_data=(fsm_state==FSM_STATE_GOT_PCX_REQ) ? {pcx_packet[64+39:64+12],1'b0}:`INVAL_TAG; cachedir icache01 ( .clock(clk), .enable(dir_en), .wren_a(icache0_alloc || icache0_dealloc || icache_inval_all || cache_init), .address_a({2'b00,icache_index}), .data_a(icache_data), .q_a(icache0_do), .wren_b(icache1_alloc || icache1_dealloc || icache_inval_all || cache_init), .address_b({2'b01,icache_index}), .data_b(icache_data), .q_b(icache1_do) ); cachedir icache23 ( .clock(clk), .enable(dir_en), .wren_a(icache2_alloc || icache2_dealloc || icache_inval_all || cache_init), .address_a({2'b00,icache_index}), .data_a(icache_data), .q_a(icache2_do), .wren_b(icache3_alloc || icache3_dealloc || icache_inval_all || cache_init), .address_b({2'b01,icache_index}), .data_b(icache_data), .q_b(icache3_do) ); assign icache_hit={icache3_do[28:1]==pcx_packet_d[64+39:64+12], icache2_do[28:1]==pcx_packet_d[64+39:64+12], icache1_do[28:1]==pcx_packet_d[64+39:64+12], icache0_do[28:1]==pcx_packet_d[64+39:64+12]}; /* case(pcx_packet_d[122:118]) // Packet type 5'b00000://Load 5'b00001://Store 5'b00010://CAS 5'b00100://STRLOAD 5'b00101://STRSTORE 5'b00110://SWAP 5'b01001://INT //5'b01010://FP1 //5'b01011://FP2 //5'b01101://FWDREQ //5'b01110://FWDREPL 5'b10000://IFILL endcase */ endmodule