OpenCores
URL https://opencores.org/ocsvn/thor/thor/trunk

Subversion Repositories thor

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /thor/trunk
    from Rev 65 to Rev 66
    Reverse comparison

Rev 65 → Rev 66

/FT64v7/rtl/common/FT64_ICController.v
0,0 → 1,305
// ============================================================================
// __
// \\__/ o\ (C) 2017-2019 Robert Finch, Waterloo
// \ __ / All rights reserved.
// \/_// robfinch<remove>@finitron.ca
// ||
//
// FT64_ICController.v
//
// This source file is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published
// by the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This source file is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//
// ============================================================================
//
`include ".\FT64_config.vh"
`define HIGH 1'b1
`define LOW 1'b0;
 
module FT64_ICController(clk_i, asid, pc0, pc1, pc2, hit0, hit1, hit2, bstate, state,
invline, invlineAddr,
thread_en, ihitL2, selL2, L2_ld, L2_cnt, L2_adr, L2_dato, L2_nxt,
L1_selpc, L1_adr, L1_dat, L1_wr0, L1_wr1, L1_wr2, L1_en, L1_invline, icnxt, icwhich,
icl_o, cti_o, bte_o, bok_i, cyc_o, stb_o, ack_i, err_i, tlbmiss_i, exv_i, sel_o, adr_o, dat_i);
parameter ABW = 64;
parameter AMSB = ABW-1;
parameter RSTPC = 64'hFFFFFFFFFFFC0100;
input clk_i;
input [7:0] asid;
input [AMSB:0] pc0;
input [AMSB:0] pc1;
input [AMSB:0] pc2;
input hit0;
input hit1;
input hit2;
input [4:0] bstate;
(* mark_debug="true" *)
output reg [3:0] state = IDLE;
input invline;
input [71:0] invlineAddr;
input thread_en;
input ihitL2;
output reg selL2 = 1'b0;
output reg L2_ld;
output [2:0] L2_cnt;
output reg [71:0] L2_adr = RSTPC;
input [305:0] L2_dato;
output reg L2_nxt;
output L1_selpc;
output reg [71:0] L1_adr = RSTPC;
output reg [305:0] L1_dat = {2'b0,{38{8'h3D}}}; // NOP
output reg L1_wr0;
output reg L1_wr1;
output reg L1_wr2;
output reg [9:0] L1_en;
output reg L1_invline;
output reg icnxt;
output reg [1:0] icwhich;
output reg icl_o;
output reg [2:0] cti_o = 3'b000;
output reg [1:0] bte_o = 2'b00;
input bok_i;
output reg cyc_o = 1'b0;
output reg stb_o;
input ack_i;
input err_i;
input tlbmiss_i;
input exv_i;
output reg [7:0] sel_o;
output reg [71:0] adr_o;
input [63:0] dat_i;
 
parameter TRUE = 1'b1;
parameter FALSE = 1'b0;
 
reg [3:0] picstate;
`include ".\FT64_busStates.vh"
reg invline_r = 1'b0;
reg [71:0] invlineAddr_r = 72'd0;
 
wire [AMSB:0] pc0plus6 = pc0 + 8'd7;
wire [AMSB:0] pc0plus12 = pc0 + 8'd14;
 
//assign L2_ld = (state==IC_Ack) && (ack_i|err_i|tlbmiss_i|exv_i);
assign L1_selpc = (state==IDLE||state==IC_Next) && !invline_r;
 
wire clk = clk_i;
reg [2:0] iccnt;
assign L2_cnt = iccnt;
 
//BUFH uclkb (.I(clk_i), .O(clk));
 
always @(posedge clk)
begin
L1_wr0 <= FALSE;
L1_wr1 <= FALSE;
L1_wr2 <= FALSE;
L1_en <= 10'h000;
L1_invline <= FALSE;
icnxt <= FALSE;
L2_nxt <= FALSE;
if (invline) begin
invline_r <= 1'b1;
invlineAddr_r <= invlineAddr;
end
 
// Instruction cache state machine.
// On a miss first see if the instruction is in the L2 cache. No need to go to
// the BIU on an L1 miss.
// If not the machine will wait until the BIU loads the L2 cache.
 
// Capture the previous ic state, used to determine how long to wait in
// icstate #4.
picstate <= state;
case(state)
IDLE:
begin
iccnt <= 3'd0;
if (invline_r) begin
L1_adr <= {invlineAddr_r[71:5],5'b0};
L1_invline <= TRUE;
invline_r <= 1'b0;
end
// If the bus unit is busy doing an update involving L1_adr or L2_adr
// we have to wait.
else begin
if (!hit0) begin
L1_adr <= {asid,pc0[AMSB:5],5'h0};
L1_invline <= TRUE;
icwhich <= 2'b00;
state <= IC2;
end
else if (!hit1 && `WAYS > 1) begin
if (thread_en) begin
L1_adr <= {asid,pc1[AMSB:5],5'h0};
end
else begin
L1_adr <= {asid,pc0plus6[AMSB:5],5'h0};
end
L1_invline <= TRUE;
icwhich <= 2'b01;
state <= IC2;
end
else if (!hit2 && `WAYS > 2) begin
if (thread_en) begin
L1_adr <= {asid,pc2[AMSB:5],5'h0};
end
else begin
L1_adr <= {asid,pc0plus12[AMSB:5],5'h0};
end
L1_invline <= TRUE;
icwhich <= 2'b10;
state <= IC2;
end
end
end
IC2: state <= IC3;
IC3: state <= IC3a;
IC3a: state <= IC_WaitL2;
// If data was in the L2 cache already there's no need to wait on the
// BIU to retrieve data. It can be determined if the hit signal was
// already active when this state was entered in which case waiting
// will do no good.
// The IC machine will stall in this state until the BIU has loaded the
// L2 cache.
IC_WaitL2:
if (ihitL2 && picstate==IC3a) begin
L1_en <= 10'h3FF;
L1_wr0 <= TRUE;
L1_wr1 <= TRUE && `WAYS > 1;
L1_wr2 <= TRUE && `WAYS > 2;
// L1_adr <= L2_adr;
// L1_dati is loaded dring an L2 icache load operation
// if (picstate==IC3a)
L1_dat <= L2_dato;
state <= IC5;
end
else begin
if (bstate == B_WaitIC)
state <= IC_Access;
end
/*
else if (state!=IC_Nack)
;
else begin
L1_en <= 10'h3FF;
L1_wr0 <= TRUE;
L1_wr1 <= TRUE && `WAYS > 1;
L1_wr2 <= TRUE && `WAYS > 2;
// L1_adr <= L2_adr;
// L1_dati set below while loading cache line
//L1_dati <= L2_dato;
state <= IC5;
end
*/
IC5: state <= IC6;
IC6: state <= IC7;
IC7: state <= IC_Next;
IC_Next:
begin
state <= IDLE;
icnxt <= TRUE;
end
IC_Access:
begin
iccnt <= 3'd0;
icl_o <= `HIGH;
cti_o <= 3'b001;
bte_o <= 2'b00;
cyc_o <= `HIGH;
stb_o <= `HIGH;
sel_o <= 8'hFF;
adr_o <= {L1_adr[AMSB:5],5'b0};
L2_adr <= L1_adr;
L2_adr[4:0] <= 5'd0;
selL2 <= TRUE;
L2_ld <= TRUE;
state <= IC_Ack;
end
IC_Ack:
if (ack_i|err_i|tlbmiss_i|exv_i) begin
L2_ld <= TRUE;
if (!bok_i) begin
stb_o <= `LOW;
adr_o[AMSB:3] <= adr_o[AMSB:3] + 2'd1;
state <= IC_Nack2;
end
if (tlbmiss_i) begin
L1_dat[305:304] <= 2'd1;
L1_dat[303:0] <= {38{8'h3D}}; // NOP
nack();
end
else if (exv_i) begin
L1_dat[305:304] <= 2'd2;
L1_dat[303:0] <= {38{8'h3D}}; // NOP
nack();
end
else if (err_i) begin
L1_dat[305:304] <= 2'd3;
L1_dat[303:0] <= {38{8'h3D}}; // NOP
nack();
end
else
case(iccnt)
3'd0: L1_dat[63:0] <= dat_i;
3'd1: L1_dat[127:64] <= dat_i;
3'd2: L1_dat[191:128] <= dat_i;
3'd3: L1_dat[255:192] <= dat_i;
3'd4: L1_dat[305:256] <= {2'b00,dat_i[47:0]};
default: L1_dat <= L1_dat;
endcase
iccnt <= iccnt + 3'd1;
if (iccnt==3'd3)
cti_o <= 3'b111;
if (iccnt>=3'd4)
nack();
end
IC_Nack2:
if (~ack_i) begin
stb_o <= `HIGH;
state <= IC_Ack;
end
IC_Nack:
begin
iccnt <= iccnt + 3'd1;
L2_ld <= FALSE;
selL2 <= FALSE;
if (~ack_i) begin
//icl_ctr <= icl_ctr + 40'd1;
state <= IDLE;
L2_nxt <= TRUE;
end
end
default:
begin
state <= IDLE;
end
endcase
end
 
task nack;
begin
icl_o <= `LOW;
cti_o <= 3'b000;
cyc_o <= `LOW;
stb_o <= `LOW;
L1_en <= 10'h3FF;
L1_wr0 <= TRUE;
L1_wr1 <= TRUE && `WAYS > 1;
L1_wr2 <= TRUE && `WAYS > 2;
state <= IC_Nack;
end
endtask
 
endmodule
/FT64v7/rtl/common/FT64_alu.v
25,10 → 25,9
`include "FT64_defines.vh"
`include "FT64_config.vh"
 
module FT64_alu(rst, clk, ld, abort, instr, sz, tlb, store, a, b, c, t, pc, Ra, tgt, tgt2, ven, vm,
module FT64_alu(rst, clk, ld, abort, instr, sz, store, a, b, c, t, pc, Ra, tgt, tgt2, ven, vm,
csr, o, ob, done, idle, excen, exc, thrd, ptrmask, state, mem, shift,
ol, dl, ASID, icl_i, cyc_i, we_i, vadr_i, cyc_o, we_o, padr_o, uncached, tlb_miss,
exv_o, rdv_o, wrv_o
ol, dl
`ifdef SUPPORT_BBMS
, pb, cbl, cbu, ro, dbl, dbu, sbl, sbu, en
`endif
47,7 → 46,6
input abort;
input [47:0] instr;
input [2:0] sz;
input tlb;
input store;
input [63:0] a;
input [63:0] b;
73,19 → 71,6
input shift;
input [1:0] ol;
input [1:0] dl;
input [7:0] ASID;
input icl_i;
input cyc_i;
input we_i;
input [ABW-1:0] vadr_i;
output cyc_o;
output we_o;
output [ABW-1:0] padr_o;
output uncached;
output tlb_miss;
output wrv_o;
output rdv_o;
output exv_o;
`ifdef SUPPORT_BBMS
input [63:0] pb;
input [63:0] cbl;
282,48 → 267,6
default: shift10 <= shift9;
endcase
 
wire tlb_done, tlb_idle;
wire [DBW-1:0] tlbo;
 
`ifdef SUPPORT_TLB
FT64_TLB utlb1 (
.rst(rst),
.clk(clk),
.ld(ld & tlb),
.done(tlb_done),
.idle(tlb_idle),
.ol(ol),
.ASID(ASID),
.op(instr[25:22]),
.regno(instr[21:18]),
.dati(a),
.dato(tlbo),
.uncached(uncached),
.icl_i(icl_i),
.cyc_i(cyc_i),
.we_i(we_i),
.vadr_i(vadr_i),
.cyc_o(cyc_o),
.we_o(we_o),
.padr_o(padr_o),
.TLBMiss(tlb_miss),
.wrv_o(wrv_o),
.rdv_o(rdv_o),
.exv_o(exv_o),
.HTLBVirtPageo()
);
`else
assign tlbo = 64'hDEADDEADDEADDEAD;
assign uncached = 1'b0;
assign padr_o = vadr_i;
assign cyc_o = cyc_i;
assign we_o = we_i;
assign tlb_miss = 1'b0;
assign wrv_o = 1'b0;
assign rdv_o = 1'b0;
assign exv_o = 1'b0;
`endif
 
FT64_bitfield #(DBW) ubf1
(
.inst(instr),
1047,9 → 990,24
`ADD:
`ifdef SIMD
case(sz)
3'd0,3'd4:
3'd0:
begin
o[7:0] = a[7:0] + b[7:0];
o[63:8] = {56{o[7]}};
end
3'd1:
begin
o[15:0] = a[15:0] + b[15:0];
o[63:16] = {48{o[15]}};
end
3'd2:
begin
o[31:0] = a[31:0] + b[31:0];
o[63:32] = {32{o[31]}};
end
3'd4:
begin
o[7:0] = a[7:0] + b[7:0];
o[15:8] = a[15:8] + b[15:8];
o[23:16] = a[23:16] + b[23:16];
o[31:24] = a[31:24] + b[31:24];
1058,7 → 1016,7
o[55:48] = a[55:48] + b[55:48];
o[63:56] = a[63:56] + b[63:56];
end
3'd1,3'd5:
3'd5:
begin
o[15:0] = a[15:0] + b[15:0];
o[31:16] = a[31:16] + b[31:16];
1065,7 → 1023,7
o[47:32] = a[47:32] + b[47:32];
o[63:48] = a[63:48] + b[63:48];
end
3'd2,3'd6:
3'd6:
begin
o[31:0] = a[31:0] + b[31:0];
o[63:32] = a[63:32] + b[63:32];
1078,7 → 1036,6
`else
o = a + b;
`endif
// If the operation is SIMD the target register must be passed in arg T.
`SUB:
`ifdef SIMD
case(sz)
1085,8 → 1042,18
3'd0:
begin
o[7:0] = a[7:0] - b[7:0];
o[63:8] = t[63:8];
o[63:8] = {56{o[7]}};
end
3'd1:
begin
o[15:0] = a[15:0] - b[15:0];
o[63:16] = {48{o[15]}};
end
3'd2:
begin
o[31:0] = a[31:0] - b[31:0];
o[63:32] = {31{o[31]}};
end
3'd4:
begin
o[7:0] = a[7:0] - b[7:0];
1098,7 → 1065,7
o[55:48] = a[55:48] - b[55:48];
o[63:56] = a[63:56] - b[63:56];
end
3'd1,3'd5:
3'd5:
begin
o[15:0] = a[15:0] - b[15:0];
o[31:16] = a[31:16] - b[31:16];
1105,14 → 1072,14
o[47:32] = a[47:32] - b[47:32];
o[63:48] = a[63:48] - b[63:48];
end
3'd2,3'd6:
3'd6:
begin
o[31:0] = a[31:0] - b[31:0];
o[63:32] = a[63:32] - b[63:32];
end
default:
default:
begin
o[63:0] = a - b;
o = a - b;
end
endcase
`else
1160,25 → 1127,10
`MIN:
`ifdef SIMD
case(sz)
3'd0:
begin
o[7:0] = BIG ? ($signed(a[7:0]) < $signed(b[7:0]) ? a[7:0] : b[7:0]) : 8'hCC;
o[63:8] = BIG ? t[63:8] : 56'hCCCCCCCCCCCCCC;
end
3'd1:
begin
o[15:0] = BIG ? ($signed(a[15:0]) < $signed(b[15:0]) ? a[15:0] : b[15:0]) : 16'hCCCC;
o[63:16] = BIG ? t[63:16] : 48'hCCCCCCCCCCCC;
end
3'd2:
begin
o[31:0] = BIG ? ($signed(a[31:0]) < $signed(b[31:0]) ? a[31:0] : b[31:0]) : 32'hCCCCCCCC;
o[63:32] = BIG ? t[63:32] : 32'hCCCCCCCC;
end
3'd3:
begin
o = BIG ? ($signed(a) < $signed(b) ? a : b) : 64'hCCCCCCCCCCCCCCCC;
end
3'd0: o = BIG ? ($signed(a[7:0]) < $signed(b[7:0]) ? {{56{a[7]}},a[7:0]} : {{56{b[7]}},b[7:0]}) : 64'hCCCCCCCCCCCCCCCC;
3'd1: o = BIG ? ($signed(a[15:0]) < $signed(b[15:0]) ? {{48{a[15]}},a[15:0]} : {{48{b[15]}},b[15:0]}) : 64'hCCCCCCCCCCCCCCCC;
3'd2: o = BIG ? ($signed(a[31:0]) < $signed(b[31:0]) ? {{32{a[31]}},a[31:0]} : {{32{b[31]}},b[31:0]}) : 64'hCCCCCCCCCCCCCCCC;
3'd3: o = BIG ? ($signed(a) < $signed(b) ? a : b) : 64'hCCCCCCCCCCCCCCCC;
3'd4:
begin
o[7:0] = BIG ? ($signed(a[7:0]) < $signed(b[7:0]) ? a[7:0] : b[7:0]) : 8'hCC;
1213,7 → 1165,11
`MAX:
`ifdef SIMD
case(sz)
3'd0,3'd4:
3'd0: o = BIG ? ($signed(a[7:0]) > $signed(b[7:0]) ? {{56{a[7]}},a[7:0]} : {{56{b[7]}},b[7:0]}) : 64'hCCCCCCCCCCCCCCCC;
3'd1: o = BIG ? ($signed(a[15:0]) > $signed(b[15:0]) ? {{48{a[15]}},a[15:0]} : {{48{b[15]}},b[15:0]}) : 64'hCCCCCCCCCCCCCCCC;
3'd2: o = BIG ? ($signed(a[31:0]) > $signed(b[31:0]) ? {{32{a[31]}},a[31:0]} : {{32{b[31]}},b[31:0]}) : 64'hCCCCCCCCCCCCCCCC;
3'd3: o = BIG ? ($signed(a) > $signed(b) ? a : b) : 64'hCCCCCCCCCCCCCCCC;
3'd4:
begin
o[7:0] = BIG ? ($signed(a[7:0]) > $signed(b[7:0]) ? a[7:0] : b[7:0]) : 64'hCCCCCCCCCCCCCCCC;
o[15:8] = BIG ? ($signed(a[15:8]) > $signed(b[15:8]) ? a[15:8] : b[15:8]) : 64'hCCCCCCCCCCCCCCCC;
1224,7 → 1180,7
o[55:48] = BIG ? ($signed(a[55:48]) > $signed(b[55:48]) ? a[55:48] : b[55:48]) : 64'hCCCCCCCCCCCCCCCC;
o[63:56] = BIG ? ($signed(a[63:56]) > $signed(b[63:56]) ? a[63:56] : b[63:56]) : 64'hCCCCCCCCCCCCCCCC;
end
3'd1,3'd5:
3'd5:
begin
o[15:0] = BIG ? ($signed(a[15:0]) > $signed(b[15:0]) ? a[15:0] : b[15:0]) : 64'hCCCCCCCCCCCCCCCC;
o[32:16] = BIG ? ($signed(a[32:16]) > $signed(b[32:16]) ? a[32:16] : b[32:16]) : 64'hCCCCCCCCCCCCCCCC;
1231,13 → 1187,13
o[47:32] = BIG ? ($signed(a[47:32]) > $signed(b[47:32]) ? a[47:32] : b[47:32]) : 64'hCCCCCCCCCCCCCCCC;
o[63:48] = BIG ? ($signed(a[63:48]) > $signed(b[63:48]) ? a[63:48] : b[63:48]) : 64'hCCCCCCCCCCCCCCCC;
end
3'd2,3'd6:
3'd6:
begin
o[31:0] = BIG ? ($signed(a[31:0]) > $signed(b[31:0]) ? a[31:0] : b[31:0]) : 64'hCCCCCCCCCCCCCCCC;
o[63:32] = BIG ? ($signed(a[63:32]) > $signed(b[63:32]) ? a[63:32] : b[63:32]) : 64'hCCCCCCCCCCCCCCCC;
end
3'd3,3'd7:
begin
3'd7:
begin
o[63:0] = BIG ? ($signed(a) > $signed(b) ? a : b) : 64'hCCCCCCCCCCCCCCCC;
end
endcase
1264,7 → 1220,6
`RTSNE: o = as!=bs;
endcase
*/
`TLB: o = BIG ? tlbo : 64'hDEADDEADDEADDEAD;
default: o[63:0] = 64'hDEADDEADDEADDEAD;
endcase
`MEMNDX:
1398,25 → 1353,25
end
endcase
`PUSHC:
begin
usa = a - 4'd8;
o = {pb[50:0],BASE_SHIFT} + usa;
end
begin
usa = a - 4'd8;
o = {pb[50:0],BASE_SHIFT} + usa;
end
`LWR,`SWC,`CAS,`CACHE:
begin
usa = a + b;
o = {pb[50:0],BASE_SHIFT} + usa;
end
begin
usa = a + b;
o = {pb[50:0],BASE_SHIFT} + usa;
end
`LV,`SV:
begin
usa = a + b + {ven,3'b0};
o = {pb[50:0],BASE_SHIFT} + usa;
end
begin
usa = a + b + {ven,3'b0};
o = {pb[50:0],BASE_SHIFT} + usa;
end
`CSRRW:
case(instr[27:18])
10'h044: o = BIG ? (csr | {39'd0,thrd,24'h0}) : 64'hDDDDDDDDDDDDDDDD;
default: o = BIG ? csr : 64'hDDDDDDDDDDDDDDDD;
endcase
case(instr[27:18])
10'h044: o = BIG ? (csr | {39'd0,thrd,24'h0}) : 64'hDDDDDDDDDDDDDDDD;
default: o = BIG ? csr : 64'hDDDDDDDDDDDDDDDD;
endcase
`BITFIELD: o = BIG ? bfout : 64'hCCCCCCCCCCCCCCCC;
default: o = 64'hDEADDEADDEADDEAD;
endcase
1510,8 → 1465,6
done <= sao_done;
else if (shift)
done <= adrDone;
else if (tlb & BIG)
done <= tlb_done;
else
done <= TRUE;
end
1535,8 → 1488,6
idle <= sao_idle;
else if (shift)
idle <= adrIdle;
else if (tlb & BIG)
idle <= tlb_idle;
else
idle <= TRUE;
end
1664,11 → 1615,11
exc <= `FLT_WRV;
else
`endif
casez(b[2:0])
3'b100: exc <= o[2:0]!=3'b0 ? `FLT_NONE : `FLT_NONE; // LW / SW
3'b?10: exc <= o[1:0]!=2'b0 ? `FLT_ALN : `FLT_NONE; // LH / LHU / SH
default: exc <= o[ 0] ? `FLT_ALN : `FLT_NONE; // LC / LCU / SC
endcase
casez(b[2:0])
3'b100: exc <= (o[2:0]!=3'b0) ? `FLT_NONE : `FLT_NONE; // LW / SW
3'b?10: exc <= (o[1:0]!=2'b0) ? `FLT_NONE : `FLT_NONE; // LH / LHU / SH
default: exc <= o[ 0] ? `FLT_NONE : `FLT_NONE; // LC / LCU / SC
endcase
end
`LWR,`SWC,`CAS,`CACHE:
begin
1713,24 → 1664,24
3'd2: o[63:0] = $signed(a[31:0]) == $signed(b[31:0]);
3'd3: o[63:0] = $signed(a) == $signed(b);
3'd4: o[63:0] = {
7'h0,$signed(a[7:0]) == $signed(b[7:0]),
7'h0,$signed(a[63:56]) == $signed(b[63:56]),
7'h0,$signed(a[55:48]) == $signed(b[55:48]),
7'h0,$signed(a[47:40]) == $signed(b[47:40]),
7'h0,$signed(a[39:32]) == $signed(b[39:32]),
7'h0,$signed(a[31:24]) == $signed(b[31:24]),
7'h0,$signed(a[23:16]) == $signed(b[23:16]),
7'h0,$signed(a[15:8]) == $signed(b[15:8]),
7'h0,$signed(a[23:16]) == $signed(b[23:16]),
7'h0,$signed(a[31:24]) == $signed(b[31:24]),
7'h0,$signed(a[39:32]) == $signed(b[39:32]),
7'h0,$signed(a[47:40]) == $signed(b[47:40]),
7'h0,$signed(a[55:48]) == $signed(b[55:48]),
7'h0,$signed(a[63:56]) == $signed(b[63:56])
7'h0,$signed(a[7:0]) == $signed(b[7:0])
};
3'd5: o[63:0] = {
15'h0,$signed(a[15:0]) == $signed(b[15:0]),
15'h0,$signed(a[63:48]) == $signed(b[63:48]),
15'h0,$signed(a[47:32]) == $signed(b[47:32]),
15'h0,$signed(a[31:16]) == $signed(b[31:16]),
15'h0,$signed(a[47:32]) == $signed(b[47:32]),
15'h0,$signed(a[63:48]) == $signed(b[63:48])
15'h0,$signed(a[15:0]) == $signed(b[15:0])
};
3'd6: o[63:0] = {
31'h0,$signed(a[31:0]) == $signed(b[31:0]),
31'h0,$signed(a[63:32]) == $signed(b[63:32])
31'h0,$signed(a[63:32]) == $signed(b[63:32]),
31'h0,$signed(a[31:0]) == $signed(b[31:0])
};
3'd7: o[63:0] = $signed(a[63:0]) == $signed(b[63:0]);
endcase
1754,24 → 1705,24
3'd2: o[63:0] = $signed(a[31:0]) < $signed(b[31:0]);
3'd3: o[63:0] = $signed(a) < $signed(b);
3'd4: o[63:0] = {
7'h0,$signed(a[7:0]) < $signed(b[7:0]),
7'h0,$signed(a[63:56]) < $signed(b[63:56]),
7'h0,$signed(a[55:48]) < $signed(b[55:48]),
7'h0,$signed(a[47:40]) < $signed(b[47:40]),
7'h0,$signed(a[39:32]) < $signed(b[39:32]),
7'h0,$signed(a[31:24]) < $signed(b[31:24]),
7'h0,$signed(a[23:16]) < $signed(b[23:16]),
7'h0,$signed(a[15:8]) < $signed(b[15:8]),
7'h0,$signed(a[23:16]) < $signed(b[23:16]),
7'h0,$signed(a[31:24]) < $signed(b[31:24]),
7'h0,$signed(a[39:32]) < $signed(b[39:32]),
7'h0,$signed(a[47:40]) < $signed(b[47:40]),
7'h0,$signed(a[55:48]) < $signed(b[55:48]),
7'h0,$signed(a[63:56]) < $signed(b[63:56])
7'h0,$signed(a[7:0]) < $signed(b[7:0])
};
3'd5: o[63:0] = {
15'h0,$signed(a[15:0]) < $signed(b[15:0]),
15'h0,$signed(a[63:48]) < $signed(b[63:48]),
15'h0,$signed(a[47:32]) < $signed(b[47:32]),
15'h0,$signed(a[31:16]) < $signed(b[31:16]),
15'h0,$signed(a[47:32]) < $signed(b[47:32]),
15'h0,$signed(a[63:48]) < $signed(b[63:48])
15'h0,$signed(a[15:0]) < $signed(b[15:0])
};
3'd6: o[63:0] = {
31'h0,$signed(a[31:0]) < $signed(b[31:0]),
31'h0,$signed(a[63:32]) < $signed(b[63:32])
31'h0,$signed(a[63:32]) < $signed(b[63:32]),
31'h0,$signed(a[31:0]) < $signed(b[31:0])
};
3'd7: o[63:0] = $signed(a[63:0]) < $signed(b[63:0]);
endcase
1795,24 → 1746,24
3'd2: o[63:0] = $signed(a[31:0]) <= $signed(b[31:0]);
3'd3: o[63:0] = $signed(a) <= $signed(b);
3'd4: o[63:0] = {
7'h0,$signed(a[7:0]) <= $signed(b[7:0]),
7'h0,$signed(a[63:56]) <= $signed(b[63:56]),
7'h0,$signed(a[55:48]) <= $signed(b[55:48]),
7'h0,$signed(a[47:40]) <= $signed(b[47:40]),
7'h0,$signed(a[39:32]) <= $signed(b[39:32]),
7'h0,$signed(a[31:24]) <= $signed(b[31:24]),
7'h0,$signed(a[23:16]) <= $signed(b[23:16]),
7'h0,$signed(a[15:8]) <= $signed(b[15:8]),
7'h0,$signed(a[23:16]) <= $signed(b[23:16]),
7'h0,$signed(a[31:24]) <= $signed(b[31:24]),
7'h0,$signed(a[39:32]) <= $signed(b[39:32]),
7'h0,$signed(a[47:40]) <= $signed(b[47:40]),
7'h0,$signed(a[55:48]) <= $signed(b[55:48]),
7'h0,$signed(a[63:56]) <= $signed(b[63:56])
7'h0,$signed(a[7:0]) <= $signed(b[7:0])
};
3'd5: o[63:0] = {
15'h0,$signed(a[15:0]) <= $signed(b[15:0]),
15'h0,$signed(a[63:48]) <= $signed(b[63:48]),
15'h0,$signed(a[47:32]) <= $signed(b[47:32]),
15'h0,$signed(a[31:16]) <= $signed(b[31:16]),
15'h0,$signed(a[47:32]) <= $signed(b[47:32]),
15'h0,$signed(a[63:48]) <= $signed(b[63:48])
15'h0,$signed(a[15:0]) <= $signed(b[15:0])
};
3'd6: o[63:0] = {
31'h0,$signed(a[31:0]) <= $signed(b[31:0]),
31'h0,$signed(a[63:32]) <= $signed(b[63:32])
31'h0,$signed(a[63:32]) <= $signed(b[63:32]),
31'h0,$signed(a[31:0]) <= $signed(b[31:0])
};
3'd7: o[63:0] = $signed(a[63:0]) <= $signed(b[63:0]);
endcase
1831,26 → 1782,29
begin
`ifdef SIMD
case(sz[2:0])
3'd4,3'd0: o = {
7'h0,(a[7:0]) < (b[7:0]),
7'h0,(a[15:8]) < (b[15:8]),
7'h0,(a[23:16]) < (b[23:16]),
7'h0,(a[31:24]) < (b[31:24]),
7'h0,(a[39:32]) < (b[39:32]),
7'h0,(a[47:40]) < (b[47:40]),
7'h0,(a[55:48]) < (b[55:48]),
7'h0,(a[63:56]) < (b[63:56])
};
3'd5,3'd1: o = {
15'h0,(a[15:0]) < (b[15:0]),
15'h0,(a[31:16]) < (b[31:16]),
15'h0,(a[47:32]) < (b[47:32]),
15'h0,(a[63:48]) < (b[63:48])
};
3'd6,3'd2: o = {
31'h0,(a[31:0]) < (b[31:0]),
31'h0,(a[63:32]) < (b[63:32])
};
3'd0: o = (a[7:0]) < (b[7:0]);
3'd1: o = (a[15:0]) < (b[15:0]);
3'd2: o = (a[31:0]) < (b[31:0]);
3'd4: o = {
7'h0,(a[63:56]) < (b[63:56]),
7'h0,(a[55:48]) < (b[55:48]),
7'h0,(a[47:40]) < (b[47:40]),
7'h0,(a[39:32]) < (b[39:32]),
7'h0,(a[31:24]) < (b[31:24]),
7'h0,(a[23:16]) < (b[23:16]),
7'h0,(a[15:8]) < (b[15:8]),
7'h0,(a[7:0]) < (b[7:0])
};
3'd5: o = {
15'h0,(a[63:48]) < (b[63:48]),
15'h0,(a[47:32]) < (b[47:32]),
15'h0,(a[31:16]) < (b[31:16]),
15'h0,(a[15:0]) < (b[15:0])
};
3'd6: o = {
31'h0,(a[63:32]) < (b[63:32]),
31'h0,(a[31:0]) < (b[31:0])
};
3'd7,3'd3: o = (a[63:0]) < (b[63:0]);
endcase
`else
1868,31 → 1822,30
begin
`ifdef SIMD
case(sz[2:0])
3'd0: o[63:0] = (a[7:0]) <= (b[7:0]);
3'd1: o[63:0] = (a[15:0]) <= (b[15:0]);
3'd2: o[63:0] = (a[31:0]) <= (b[31:0]);
3'd3: o[63:0] = (a) <= (b);
3'd4: o[63:0] = {
7'h0,(a[7:0]) <= (b[7:0]),
7'h0,(a[15:8]) <= (b[15:8]),
7'h0,(a[23:16]) <= (b[23:16]),
7'h0,(a[31:24]) <= (b[31:24]),
7'h0,(a[39:32]) <= (b[39:32]),
7'h0,(a[47:40]) <= (b[47:40]),
7'h0,(a[55:48]) <= (b[55:48]),
7'h0,(a[63:56]) <= (b[63:56])
};
3'd5: o[63:0] = {
15'h0,(a[15:0]) <= (b[15:0]),
15'h0,(a[31:16]) <= (b[31:16]),
15'h0,(a[47:32]) <= (b[47:32]),
15'h0,(a[63:48]) <= (b[63:48])
};
3'd6: o[63:0] = {
31'h0,(a[31:0]) <= (b[31:0]),
31'h0,(a[63:32]) <= (b[63:32])
};
3'd7: o[63:0] = (a[63:0]) <= (b[63:0]);
3'd0: o = (a[7:0]) <= (b[7:0]);
3'd1: o = (a[15:0]) <= (b[15:0]);
3'd2: o = (a[31:0]) <= (b[31:0]);
3'd4: o = {
7'h0,(a[63:56]) <= (b[63:56]),
7'h0,(a[55:48]) <= (b[55:48]),
7'h0,(a[47:40]) <= (b[47:40]),
7'h0,(a[39:32]) <= (b[39:32]),
7'h0,(a[31:24]) <= (b[31:24]),
7'h0,(a[23:16]) <= (b[23:16]),
7'h0,(a[15:8]) <= (b[15:8]),
7'h0,(a[7:0]) <= (b[7:0])
};
3'd5: o = {
15'h0,(a[63:48]) <= (b[63:48]),
15'h0,(a[47:32]) <= (b[47:32]),
15'h0,(a[31:16]) <= (b[31:16]),
15'h0,(a[15:0]) <= (b[15:0])
};
3'd6: o = {
31'h0,(a[63:32]) <= (b[63:32]),
31'h0,(a[31:0]) <= (b[31:0])
};
3'd7,3'd3: o = (a[63:0]) <= (b[63:0]);
endcase
`else
o[63:0] = (a[63:0]) <= (b[63:0]);
/FT64v7/rtl/common/FT64_busStates.vh
0,0 → 1,42
parameter BIDLE = 5'd0;
parameter B_StoreAck = 5'd1;
parameter B_DCacheLoadStart = 5'd2;
parameter B_DCacheLoadStb = 5'd3;
parameter B_DCacheLoadWait = 5'd4;
parameter B_DCacheLoadResetBusy = 5'd6;
parameter B8 = 5'd8;
parameter B11 = 5'd11;
parameter B_RMWAck = 5'd12;
parameter B_DLoadAck = 5'd13;
parameter B14 = 5'd14;
parameter B15 = 5'd15;
parameter B16 = 5'd16;
parameter B17 = 5'd17;
parameter B18 = 5'd18;
parameter B_LSNAck = 5'd19;
parameter B2a = 5'd20;
parameter B2b = 5'd21;
parameter B2c = 5'd22;
parameter B_DCacheLoadAck = 5'd23;
parameter B20 = 5'd24;
parameter B21 = 5'd25;
parameter B_WaitSeg = 5'd29;
parameter B_DLoadNack = 5'd30;
parameter B_WaitIC = 5'd31;
 
parameter IDLE = 4'd0;
parameter IC1 = 4'd1;
parameter IC2 = 4'd2;
parameter IC3 = 4'd3;
parameter IC_WaitL2 = 4'd4;
parameter IC5 = 4'd5;
parameter IC6 = 4'd6;
parameter IC7 = 4'd7;
parameter IC_Next = 4'd8;
parameter IC9 = 4'd9;
parameter IC10 = 4'd10;
parameter IC3a = 4'd11;
parameter IC_Access = 4'd12;
parameter IC_Ack = 4'd13;
parameter IC_Nack = 4'd14;
parameter IC_Nack2 = 4'd15;
/FT64v7/rtl/common/FT64_dcache.v
1,6 → 1,6
// ============================================================================
// __
// \\__/ o\ (C) 2018 Robert Finch, Waterloo
// \\__/ o\ (C) 2018-2019 Robert Finch, Waterloo
// \ __ / All rights reserved.
// \/_// robfinch<remove>@finitron.ca
// ||
31,10 → 31,10
input dce; // data cache enable
input wclk;
input wr;
input [7:0] sel;
input [31:0] sel;
input [37:0] wadr;
output whit;
input [63:0] i;
input [255:0] i;
input [255:0] li; // line input
input rclk;
input [2:0] rdsize;
69,7 → 69,7
(
.wclk(wclk),
.dce(dce),
.wr(wr && wadr[4:3]==2'b11),
.wr(wr),
.wadr(wadr),
.rclk(rclk),
.radr(radr),
103,9 → 103,9
input rst;
input clka;
input ena;
input [7:0] wea;
input [31:0] wea;
input [13:0] addra;
input [63:0] dina;
input [255:0] dina;
input clkb;
input enb;
input [13:0] addrb;
126,25 → 126,11
 
genvar g;
generate begin
for (g = 0; g < 4; g = g + 1)
for (g = 0; g < 32; g = g + 1)
always @(posedge clka)
begin
if (ena & wea[0] & addra[4:3]==g) mem[addra[13:5]][g*64+7:g*64] <= dina[7:0];
if (ena & wea[1] & addra[4:3]==g) mem[addra[13:5]][g*64+15:g*64+8] <= dina[15:8];
if (ena & wea[2] & addra[4:3]==g) mem[addra[13:5]][g*64+23:g*64+16] <= dina[23:16];
if (ena & wea[3] & addra[4:3]==g) mem[addra[13:5]][g*64+31:g*64+24] <= dina[31:24];
if (ena & wea[4] & addra[4:3]==g) mem[addra[13:5]][g*64+39:g*64+32] <= dina[39:32];
if (ena & wea[5] & addra[4:3]==g) mem[addra[13:5]][g*64+47:g*64+40] <= dina[47:40];
if (ena & wea[6] & addra[4:3]==g) mem[addra[13:5]][g*64+55:g*64+48] <= dina[55:48];
if (ena & wea[7] & addra[4:3]==g) mem[addra[13:5]][g*64+63:g*64+56] <= dina[63:56];
if (ena & wea[0] & addra[4:3]==g) valid[addra[13:5]][g*8] <= 1'b1;
if (ena & wea[1] & addra[4:3]==g) valid[addra[13:5]][g*8+1] <= 1'b1;
if (ena & wea[2] & addra[4:3]==g) valid[addra[13:5]][g*8+2] <= 1'b1;
if (ena & wea[3] & addra[4:3]==g) valid[addra[13:5]][g*8+3] <= 1'b1;
if (ena & wea[4] & addra[4:3]==g) valid[addra[13:5]][g*8+4] <= 1'b1;
if (ena & wea[5] & addra[4:3]==g) valid[addra[13:5]][g*8+5] <= 1'b1;
if (ena & wea[6] & addra[4:3]==g) valid[addra[13:5]][g*8+6] <= 1'b1;
if (ena & wea[7] & addra[4:3]==g) valid[addra[13:5]][g*8+7] <= 1'b1;
if (ena & wea[g]) mem[addra[13:5]][g*8+7:g*8] <= dina[g*8+7:g*8];
if (ena & wea[g]) valid[addra[13:5]][g] <= 1'b1;
end
end
endgenerate
/FT64v7/rtl/common/FT64_defines.vh
353,7 → 353,7
`define CSR_CAS 10'h02C
`define CSR_TVEC 10'b00000110???
`define CSR_IM_STACK 10'h040
`define CSR_OL_STACK 10'h041
`define CSR_ODL_STACK 10'h041
`define CSR_PL_STACK 10'h042
`define CSR_RS_STACK 10'h043
`define CSR_STATUS 10'h044
461,6 → 461,7
 
`define DRAMSLOT_AVAIL 3'b000
`define DRAMSLOT_BUSY 3'b001
`define DRAMSLOT_BUSY2 3'b010
`define DRAMSLOT_REQBUS 3'b101
`define DRAMSLOT_HASBUS 3'b110
`define DRAMREQ_READY 3'b111
/FT64v7/rtl/common/FT64_icache.v
375,23 → 375,26
reg [9:0] en1, en2;
reg invline1, invline2;
 
wire iclk;
BUFH ucb1 (.I(clk), .O(iclk));
 
// Must update the cache memory on the cycle after a write to the tag memmory.
// Otherwise lineno won't be valid. Tag memory takes two clock cycles to update.
always @(posedge clk)
always @(posedge iclk)
wr1 <= wr;
always @(posedge clk)
always @(posedge iclk)
wr2 <= wr1;
always @(posedge clk)
always @(posedge iclk)
i1 <= i[305:0];
always @(posedge clk)
always @(posedge iclk)
i2 <= i1;
always @(posedge clk)
always @(posedge iclk)
en1 <= en;
always @(posedge clk)
always @(posedge iclk)
en2 <= en1;
always @(posedge clk)
always @(posedge iclk)
invline1 <= invline;
always @(posedge clk)
always @(posedge iclk)
invline2 <= invline1;
 
generate begin : tags
400,7 → 403,7
FT64_L1_icache_mem #(.pLines(pLines)) u1
(
.rst(rst),
.clk(clk),
.clk(iclk),
.wr(wr1),
.en(en1),
.i(i1),
414,7 → 417,7
FT64_L1_icache_cmptag4way #(.pLines(pLines)) u3
(
.rst(rst),
.clk(clk),
.clk(iclk),
.nxt(nxt),
.wr(wr),
.adr(adr),
427,7 → 430,7
FT64_L1_icache_mem u1
(
.rst(rst),
.clk(clk),
.clk(iclk),
.wr(wr2),
.en(en2),
.i(i2),
441,7 → 444,7
FT64_L1_icache_camtag u2
(
.rst(rst),
.clk(clk),
.clk(iclk),
.nxt(nxt),
.wlineno(wlineno),
.wadr(wadr),
538,7 → 541,7
// address bit 4).
// -----------------------------------------------------------------------------
 
module FT64_L2_icache(rst, clk, nxt, wr, wr_ack, rd_ack, xsel, adr, cnt, exv_i, i, err_i, o, hit, invall, invline);
module FT64_L2_icache(rst, clk, nxt, wr, adr, cnt, exv_i, i, err_i, o, hit, invall, invline);
parameter CAMTAGS = 1'b0; // 32 way
parameter FOURWAY = 1'b1;
parameter AMSB = 63;
546,9 → 549,6
input clk;
input nxt;
input wr;
output wr_ack;
output rd_ack;
input xsel;
input [AMSB+8:0] adr;
input [2:0] cnt;
input exv_i;
575,7 → 575,7
always @(posedge clk)
wr2 <= wr1;
always @(posedge clk)
sel1 <= {xsel,adr[4:3]};
sel1 <= cnt;
always @(posedge clk)
sel2 <= sel1;
always @(posedge clk)
585,20 → 585,7
always @(posedge clk)
f2 <= f1;
reg [3:0] rdackx;
always @(posedge clk)
if (rst)
rdackx <= 4'b0;
else begin
if (last_adr != adr || wr || wr1 || wr2)
rdackx <= 4'b0;
else
rdackx <= {rdackx,~(wr|wr1|wr2)};
end
 
assign rd_ack = rdackx[3] & ~(last_adr!=adr || wr || wr1 || wr2);
 
always @(posedge clk)
i1 <= i;
always @(posedge clk)
i2 <= i1;
657,7 → 644,6
endgenerate
 
assign hit = taghit & lv;
assign wr_ack = wr2;
 
endmodule
 
/FT64v7/rtl/common/FT64_ipt.v
27,7 → 27,7
`define TRUE 1'b1
`define FALSE 1'b0
`endif
//`define BYPASS 1'b1
`define BYPASS 1'b1
 
module FT64_ipt(rst, clk, pkeys_i, ol_i, bte_i, cti_i, cs_i, icl_i, cyc_i, stb_i, ack_o, we_i, sel_i, vadr_i, dat_i, dat_o,
bte_o, cti_o, cyc_o, ack_i, we_o, sel_o, padr_o, exv_o, rdv_o, wrv_o, prv_o, page_fault);
156,6 → 156,12
sel_o <= sel_i;
`ifdef BYPASS
always @(posedge clk)
pt_wr <= 1'b0;
always @(posedge clk)
pt_ad <= 16'h0;
always @(posedge clk)
pt_dati <= 42'h0;
always @(posedge clk)
cyc_o <= cyc_i;
always @(posedge clk)
we_o <= we_i;
171,6 → 177,8
prv_o <= 1'b0;
always @(posedge clk)
page_fault <= 1'b0;
always @(posedge clk)
ack_o <= 1'b0;
`else
always @(posedge clk)
if (rst) begin
428,7 → 436,8
 
// Wait a clock cycle for a page fault to register.
S_WAIT1:
goto(S_IDLE);
if (!ack_i)
goto(S_IDLE);
S_ACK:
if (ack_i) begin
/FT64v7/rtl/common/FT64_pic.v
90,6 → 90,7
);
parameter pIOAddress = 32'hFFDC_0F00;
 
wire clk;
reg [31:0] trig;
reg [31:0] ie; // interrupt enable register
reg rdy1;
117,12 → 118,15
wire cs = cyc_i && stb_i && adr_i[31:8]==pIOAddress[31:8];
assign vol_o = cs;
 
always @(posedge clk_i)
assign clk = clk_i;
//BUFH ucb1 (.I(clk_i), .O(clk));
 
always @(posedge clk)
rdy1 <= cs;
assign ack_o = cs ? (wr_i ? 1'b1 : rdy1) : 1'b0;
 
// write registers
always @(posedge clk_i)
always @(posedge clk)
if (rst_i) begin
ie <= 32'h0;
rste <= 32'h0;
155,7 → 159,7
end
 
// read registers
always @(posedge clk_i)
always @(posedge clk)
begin
if (irqenc!=5'd0)
$display("PIC: %d",irqenc);
174,7 → 178,7
assign nmio = nmii & ie[0];
 
// Edge detect circuit
always @(posedge clk_i)
always @(posedge clk)
begin
for (n = 1; n < 32; n = n + 1)
begin
188,7 → 192,7
// irq requests are latched on every rising clock edge to prevent
// misreads
// nmi is not encoded
always @(posedge clk_i)
always @(posedge clk)
begin
irqenc <= 5'd0;
for (n = 31; n > 0; n = n - 1)
/FT64v7/rtl/twoway/FT64.v
66,7 → 66,7
output [31:0] pcr_o;
output [63:0] pcr2_o;
output [63:0] pkeys_o;
output reg icl_o;
output icl_o;
output reg cr_o;
output reg sr_o;
input rbi_i;
120,6 → 120,8
parameter IQS_DONE = 3'd5;
parameter IQS_CMT = 3'd6;
 
`include "..\common\FT64_busStates.vh"
 
wire clk;
//BUFG uclkb1
//(
137,6 → 139,7
 
wire dc_ack;
wire acki = ack_i|dc_ack;
wire tlb_miss;
wire [RBIT:0] Ra0, Ra1, Ra2;
wire [RBIT:0] Rb0, Rb1, Rb2;
wire [RBIT:0] Rc0, Rc1, Rc2;
281,13 → 284,20
for (n = 0; n < 16; n = n + 1)
prf_source[n] <= 1'b0;
end
`ifdef SUPPORT_SMT
wire [1:0] ol [0:NTHREAD];
wire [1:0] dl [0:NTHREAD];
`else
wire [1:0] ol;
wire [1:0] dl;
`endif
wire [`ABITS] pc0a;
wire [`ABITS] pc1a;
wire [`ABITS] pc2a;
`ifdef SUPPORT_BBMS
wire [`ABITS] pc0 = (pc0a[47:40]==8'hFF||ol==2'b00) ? pc0a : {pb[50:0],13'd0} + pc0a[47:0];
wire [`ABITS] pc1 = (pc1a[47:40]==8'hFF||ol==2'b00) ? pc1a : {pb[50:0],13'd0} + pc1a[47:0];
wire [`ABITS] pc2 = (pc2a[47:40]==8'hFF||ol==2'b00) ? pc2a : {pb[50:0],13'd0} + pc2a[47:0];
wire [`ABITS] pc0 = (pc0a[47:40]==8'hFF||ol[0]==2'b00) ? pc0a : {pb[50:0],13'd0} + pc0a[47:0];
wire [`ABITS] pc1 = (pc1a[47:40]==8'hFF||ol[1]==2'b00) ? pc1a : {pb[50:0],13'd0} + pc1a[47:0];
wire [`ABITS] pc2 = (pc2a[47:40]==8'hFF||ol[2]==2'b00) ? pc2a : {pb[50:0],13'd0} + pc2a[47:0];
`else
wire [`ABITS] pc0 = pc0a;
wire [`ABITS] pc1 = pc1a;
357,6 → 367,16
(* mark_debug = "true" *)
reg [15:0] cause[0:15];
`ifdef SUPPORT_SMT
reg [31:0] im_stack [0:NTHREAD];
wire [3:0] im = im_stack[0][3:0];
reg [15:0] ol_stack [0:NTHREAD];
reg [15:0] dl_stack [0:NTHREAD];
assign ol[0] = ol_stack[0][1:0];
assign ol[1] = ol_stack[1][1:0];
assign ol[2] = ol_stack[2][1:0];
assign dl[0] = dl_stack[0][1:0];
assign dl[1] = dl_stack[1][1:0];
assign dl[2] = dl_stack[2][1:0];
reg [`ABITS] epc [0:NTHREAD];
reg [`ABITS] epc0 [0:NTHREAD];
reg [`ABITS] epc1 [0:NTHREAD];
368,9 → 388,6
reg [`ABITS] epc7 [0:NTHREAD];
reg [`ABITS] epc8 [0:NTHREAD]; // exception pc and stack
reg [63:0] mstatus [0:NTHREAD]; // machine status
wire [3:0] im = mstatus[0][3:0];
wire [1:0] ol [0:NTHREAD];
wire [1:0] dl [0:NTHREAD];
assign ol[0] = mstatus[0][5:4]; // operating level
assign dl[0] = mstatus[0][21:20];
wire [7:0] cpl [0:NTHREAD];
380,9 → 397,6
assign cpl[1] = mstatus[1][13:6]; // current privilege level
assign dl[1] = mstatus[1][21:20];
wire [7:0] ASID = mstatus[0][47:40];
reg [15:0] ol_stack [0:NTHREAD];
reg [15:0] dl_stack [0:NTHREAD];
reg [31:0] im_stack [0:NTHREAD];
reg [63:0] pl_stack [0:NTHREAD];
reg [63:0] rs_stack [0:NTHREAD];
reg [63:0] brs_stack [0:NTHREAD];
394,6 → 408,8
//assign ol_o = mprv ? ol_stack[0][2:0] : ol[0];
wire vca = mstatus[0][32]; // vector chaining active
`else
reg [31:0] im_stack = 32'hFFFFFFFF;
wire [3:0] im = im_stack[3:0];
reg [`ABITS] epc ;
reg [`ABITS] epc0 ;
reg [`ABITS] epc1 ;
405,17 → 421,13
reg [`ABITS] epc7 ;
reg [`ABITS] epc8 ; // exception pc and stack
reg [63:0] mstatus ; // machine status
wire [3:0] im = mstatus[3:0];
wire [1:0] ol ;
wire [1:0] dl;
assign ol = mstatus[5:4]; // operating level
assign dl = mstatus[21:20];
reg [15:0] ol_stack;
reg [15:0] dl_stack;
assign ol = ol_stack[1:0]; // operating level
assign dl = dl_stack[1:0];
wire [7:0] cpl ;
assign cpl = mstatus[13:6]; // current privilege level
wire [5:0] rgs ;
reg [15:0] ol_stack ;
reg [15:0] dl_stack ;
reg [31:0] im_stack ;
reg [63:0] pl_stack ;
reg [63:0] rs_stack ;
reg [63:0] brs_stack ;
788,6 → 800,7
wire [63:0] alu0b_bus;
wire [63:0] alu0_out;
wire [`QBITSP1] alu0_id;
(* mark_debug="true" *)
wire [`XBITS] alu0_exc;
wire alu0_v;
wire alu0_branchmiss;
1017,77 → 1030,38
reg [63:0] commit2_bus;
 
reg StoreAck1;
reg [4:0] bstate;
parameter BIDLE = 5'd0;
parameter B_StoreAck = 5'd1;
parameter B_DCacheLoadStart = 5'd2;
parameter B_DCacheLoadStb = 5'd3;
parameter B_DCacheLoadWait1 = 5'd4;
parameter B_DCacheLoadWait2 = 5'd5;
parameter B_DCacheLoadResetBusy = 5'd6;
parameter B_ICacheAck = 5'd7;
parameter B8 = 5'd8;
parameter B_ICacheNack = 5'd9;
parameter B_ICacheNack2 = 5'd10;
parameter B11 = 5'd11;
parameter B12 = 5'd12;
parameter B_DLoadAck = 5'd13;
parameter B14 = 5'd14;
parameter B15 = 5'd15;
parameter B16 = 5'd16;
parameter B17 = 5'd17;
parameter B18 = 5'd18;
parameter B_LSNAck = 5'd19;
parameter B2a = 5'd20;
parameter B2b = 5'd21;
parameter B2c = 5'd22;
parameter B_DCacheLoadAck = 5'd23;
parameter B20 = 5'd24;
parameter B21 = 5'd25;
parameter B_DCacheLoadWait3 = 5'd26;
parameter B_LoadDesc = 5'd27;
parameter B_LoadDescStb = 5'd28;
parameter B_WaitSeg = 5'd29;
parameter B_DLoadNack = 5'd30;
reg [4:0] bstate = BIDLE;
wire [3:0] icstate;
parameter SEG_IDLE = 2'd0;
parameter SEG_CHK = 2'd1;
parameter SEG_UPD = 2'd2;
parameter SEG_DONE = 2'd3;
reg [1:0] bwhich;
reg [3:0] icstate,picstate;
parameter IDLE = 4'd0;
parameter IC1 = 4'd1;
parameter IC2 = 4'd2;
parameter IC3 = 4'd3;
parameter IC_WaitL2 = 4'd4;
parameter IC5 = 4'd5;
parameter IC6 = 4'd6;
parameter IC7 = 4'd7;
parameter IC_Next = 4'd8;
parameter IC9 = 4'd9;
parameter IC10 = 4'd10;
parameter IC3a = 4'd11;
reg invic, invdc;
reg [1:0] icwhich;
reg icnxt,L2_nxt;
reg invicl;
wire [1:0] icwhich;
wire icnxt;
wire L2_nxt;
wire ihit0,ihit1,ihit2,ihitL2;
wire ihit = ihit0&ihit1&ihit2;
reg phit;
wire threadx;
always @*
phit <= ihit&&icstate==IDLE;
reg [2:0] iccnt;
phit <= (ihit&&icstate==IDLE) && !invicl;
(* mark_debug="true" *)
reg icack;
reg L1_wr0,L1_wr1,L1_wr2;
reg L1_invline;
wire L1_wr0,L1_wr1,L1_wr2;
wire L1_invline;
wire [1:0] ic0_fault,ic1_fault,ic2_fault;
reg [9:0] L1_en;
reg [71:0] L1_adr, L2_adr;
reg [305:0] L1_dati;
wire [9:0] L1_en;
wire [71:0] L1_adr;
wire [71:0] L2_adr;
wire [305:0] L2_dato;
reg L2_xsel;
wire selL2;
 
wire icclk;
BUFH ucb1 (.I(clk), .O(icclk));
 
generate begin : gRegfileInst
if (`WAYS > 2) begin : gb1
FT64_regfile2w9r_oc #(.RBIT(RBIT)) urf1
1193,9 → 1167,6
endcase
endfunction
 
wire [`ABITS] pc0plus6 = pc0 + 32'd7;
wire [`ABITS] pc0plus12 = pc0 + 32'd14;
 
generate begin : gInsnVar
if (`WAYS > 1) begin
always @*
1214,6 → 1185,65
end
endgenerate
 
wire L1_selpc;
wire [2:0] icti;
wire [1:0] ibte;
wire icyc;
wire istb;
wire [7:0] isel;
wire [71:0] iadr;
wire L2_ld;
wire [305:0] L1_dat;
wire [2:0] L2_cnt;
reg [71:0] invlineAddr;
 
FT64_ICController uL1ctrl
(
.clk_i(clk),
.asid(ASID),
.pc0(pc0),
.pc1(pc1),
.pc2(pc2),
.hit0(ihit0),
.hit1(ihit1),
.hit2(ihit2),
.bstate(bstate),
.state(icstate),
.invline(invicl),
.invlineAddr(invlineAddr),
.thread_en(thread_en),
.L1_selpc(L1_selpc),
.L1_adr(L1_adr),
.L1_dat(L1_dat),
.L1_wr0(L1_wr0),
.L1_wr1(L1_wr1),
.L1_wr2(L1_wr2),
.L1_en(L1_en),
.L1_invline(L1_invline),
.ihitL2(ihitL2),
.selL2(selL2),
.L2_ld(L2_ld),
.L2_cnt(L2_cnt),
.L2_adr(L2_adr),
.L2_dato(L2_dato),
.L2_nxt(L2_nxt),
.icnxt(icnxt),
.icwhich(icwhich),
.icl_o(icl_o),
.cti_o(icti),
.bte_o(ibte),
.bok_i(bok_i),
.cyc_o(icyc),
.stb_o(istb),
.ack_i(acki),
.err_i(err_i),
.tlbmiss_i(tlb_miss),
.exv_i(exv_i),
.sel_o(isel),
.adr_o(iadr),
.dat_i(dat_i)
);
 
FT64_L1_icache #(.pSize(`L1_ICACHE_SIZE)) uic0
(
.rst(rst),
1222,9 → 1252,9
.wr(L1_wr0),
.wr_ack(),
.en(L1_en),
.adr((icstate==IDLE||icstate==IC_Next) ? {pcr[7:0],pc0} : L1_adr),
.adr(L1_selpc ? {ASID,pc0} : L1_adr),
.wadr(L1_adr),
.i(L1_dati),
.i(L1_dat),
.o(insn0a),
.fault(ic0_fault),
.hit(ihit0),
1241,9 → 1271,9
.wr(L1_wr1),
.wr_ack(),
.en(L1_en),
.adr((icstate==IDLE||icstate==IC_Next) ? (thread_en ? {pcr[7:0],pc1}: {pcr[7:0],pc0plus6} ): L1_adr),
.adr(L1_selpc ? (thread_en ? {ASID,pc1}: {ASID,pc0plus6} ): L1_adr),
.wadr(L1_adr),
.i(L1_dati),
.i(L1_dat),
.o(insn1b),
.fault(ic1_fault),
.hit(ihit1),
1263,9 → 1293,9
.wr(L1_wr2),
.wr_ack(),
.en(L1_en),
.adr((icstate==IDLE||icstate==IC_Next) ? (thread_en ? {pcr[7:0],pc2} : {pcr[7:0],pc0plus12}) : L1_adr),
.adr(L1_selpc ? (thread_en ? {ASID,pc2} : {ASID,pc0plus12}) : L1_adr),
.wadr(L1_adr),
.i(L1_dati),
.i(L1_dat),
.o(insn2b),
.fault(ic2_fault),
.hit(ihit2),
1283,10 → 1313,9
.rst(rst),
.clk(clk),
.nxt(L2_nxt),
.wr(bstate==B_ICacheAck && (ack_i|err_i)),
.xsel(L2_xsel),
.adr(L2_adr),
.cnt(iccnt),
.wr(L2_ld),
.adr(selL2 ? L2_adr: L1_adr),
.cnt(L2_cnt),
.exv_i(exvq),
.i(dat_i),
.err_i(errq),
1861,10 → 1890,15
else
insn0[20:0] <= {irq_i,1'b0,vec_i,2'b00,`BRK};
end
else if (ic0_fault[1])
else if (insn0a[15:0]==16'h0000)
insn0 <= {32'h00,6'd0,5'd0,4'h0,1'b0,`FLT_IBE,2'b00,`BRK};
else if (ic0_fault[0])
insn0 <= {32'h00,6'd0,5'd0,4'h0,1'b0,`FLT_EXF,2'b00,`BRK};
else
case(ic0_fault)
2'd0: ; // no fault, don't alter instruction
2'd1: insn0 <= {32'h00,6'd0,5'd0,4'h0,1'b0,`FLT_TLB,2'b00,`BRK};
2'd2: insn0 <= {32'h00,6'd0,5'd0,4'h0,1'b0,`FLT_EXF,2'b00,`BRK};
2'd3: insn0 <= {32'h00,6'd0,5'd0,4'h0,1'b0,`FLT_IBE,2'b00,`BRK};
endcase
end
else begin
insn0 <= {8'h00,`NOP_INSN};
1887,10 → 1921,12
else
insn1[20:0] <= {irq_i,1'b0,vec_i,2'b00,`BRK};
end
else if (ic1_fault[1])
insn1 <= {32'h00,6'd0,5'd0,4'h0,1'b0,`FLT_IBE,2'b00,`BRK};
else if (ic1_fault[0])
insn1 <= {32'h00,6'd0,5'd0,4'h0,1'b0,`FLT_EXF,2'b00,`BRK};
case(ic1_fault)
2'd0: ; // no fault, don't alter instruction
2'd1: insn1 <= {32'h00,6'd0,5'd0,4'h0,1'b0,`FLT_TLB,2'b00,`BRK};
2'd2: insn1 <= {32'h00,6'd0,5'd0,4'h0,1'b0,`FLT_EXF,2'b00,`BRK};
2'd3: insn1 <= {32'h00,6'd0,5'd0,4'h0,1'b0,`FLT_IBE,2'b00,`BRK};
endcase
end
else begin
insn1 <= {8'h00,`NOP_INSN};
1911,10 → 1947,12
else
insn2[20:0] <= {irq_i,1'b0,vec_i,2'b00,`BRK};
end
else if (ic2_fault[1])
insn2 <= {32'h00,6'd0,5'd0,4'h0,1'b0,`FLT_IBE,2'b00,`BRK};
else if (ic2_fault[0])
insn2 <= {32'h00,6'd0,5'd0,4'h0,1'b0,`FLT_EXF,2'b00,`BRK};
case(ic2_fault)
2'd0: ; // no fault, don't alter instruction
2'd1: insn2 <= {32'h00,6'd0,5'd0,4'h0,1'b0,`FLT_TLB,2'b00,`BRK};
2'd2: insn2 <= {32'h00,6'd0,5'd0,4'h0,1'b0,`FLT_EXF,2'b00,`BRK};
2'd3: insn2 <= {32'h00,6'd0,5'd0,4'h0,1'b0,`FLT_IBE,2'b00,`BRK};
endcase
end
else
insn2 <= `NOP_INSN;
1929,6 → 1967,8
 
reg preload;
reg [1:0] dccnt;
reg [3:0] dcwait = 4'd3;
reg [3:0] dcwait_ctr = 4'd3;
wire dhit0, dhit1, dhit2;
wire dhit0a, dhit1a, dhit2a;
wire dhit00, dhit10, dhit20;
1936,6 → 1976,9
reg [`ABITS] dc_wadr;
reg [63:0] dc_wdat;
reg isStore;
reg [31:0] dcsel;
reg [255:0] dcbuf;
reg dcwr;
 
// If the data is in the write buffer, give the buffer a chance to
// write out the data before trying to load from the cache.
1960,9 → 2003,9
assign dhit2 = dhit2a && !wb_hit2;
wire whit0, whit1, whit2;
 
wire wr_dcache0 = (bstate==B_DCacheLoadAck && ack_i)||(((bstate==B_StoreAck && StoreAck1) || (bstate==B_LSNAck && isStore)) && whit0);
wire wr_dcache1 = (bstate==B_DCacheLoadAck && ack_i)||(((bstate==B_StoreAck && StoreAck1) || (bstate==B_LSNAck && isStore)) && whit1);
wire wr_dcache2 = (bstate==B_DCacheLoadAck && ack_i)||(((bstate==B_StoreAck && StoreAck1) || (bstate==B_LSNAck && isStore)) && whit2);
wire wr_dcache0 = (dcwr)||(((bstate==B_StoreAck && StoreAck1) || (bstate==B_LSNAck && isStore)) && whit0);
wire wr_dcache1 = (dcwr)||(((bstate==B_StoreAck && StoreAck1) || (bstate==B_LSNAck && isStore)) && whit1);
wire wr_dcache2 = (dcwr)||(((bstate==B_StoreAck && StoreAck1) || (bstate==B_LSNAck && isStore)) && whit2);
 
FT64_dcache udc0
(
1970,13 → 2013,13
.wclk(clk),
.dce(dce),
.wr(wr_dcache0),
.sel(sel_o),
.wadr({pcr[7:0],vadr}),
.sel(dcsel),
.wadr({ASID,vadr}),
.whit(whit0),
.i((bstate==B_DCacheLoadAck) ? dat_i : dat_o),
.i(dcbuf),
.rclk(clk),
.rdsize(dram0_memsize),
.radr({pcr[7:0],dram0_addr}),
.radr({ASID,dram0_addr}),
.o(dc0_out),
.rhit(dhit0a)
);
1988,13 → 2031,13
.wclk(clk),
.dce(dce),
.wr(wr_dcache1),
.sel(sel_o),
.wadr({pcr[7:0],vadr}),
.sel(dcsel),
.wadr({ASID,vadr}),
.whit(whit1),
.i((bstate==B_DCacheLoadAck) ? dat_i : dat_o),
.i(dcbuf),
.rclk(clk),
.rdsize(dram1_memsize),
.radr({pcr[7:0],dram1_addr}),
.radr({ASID,dram1_addr}),
.o(dc1_out),
.rhit(dhit1a)
);
2006,13 → 2049,13
.wclk(clk),
.dce(dce),
.wr(wr_dcache2),
.sel(sel_o),
.wadr({pcr[7:0],vadr}),
.sel(dcsel),
.wadr({ASID,vadr}),
.whit(whit2),
.i((bstate==B_DCacheLoadAck) ? dat_i : dat_o),
.i(dcbuf),
.rclk(clk),
.rdsize(dram2_memsize),
.radr({pcr[7:0],dram2_addr}),
.radr({ASID,dram2_addr}),
.o(dc2_out),
.rhit(dhit2a)
);
3036,7 → 3079,7
endcase
else
CacheCmd = 5'd0;
`CACHE: CacheCmd = isn[15:11];
`CACHE: CacheCmd = isn[17:13];
default: CacheCmd = 5'd0;
endcase
endfunction
5305,7 → 5348,7
end
end
 
reg [2:0] wbptr;
reg [2:0] wbptr = 2'd0;
// Stomp logic for branch miss.
/*
FT64_stomp #(QENTRIES) ustmp1
5356,7 → 5399,8
wire id1_clk, id2_clk, id3_clk;
 
// Always at least one decoder
assign id1_clk = clk_i;
BUFH uidclk (.I(clk_i), .O(id1_clk));
//assign id1_clk = clk_i;
//BUFGCE uclkb2
//(
// .I(clk_i),
5516,8 → 5560,8
`CSR_BADINSTR: csr_r <= bad_instr[{alu0_thrd,csrno[11:10]}];
`CSR_CAUSE: csr_r <= {48'd0,cause[{alu0_thrd,csrno[11:10]}]};
`ifdef SUPPORT_SMT
`CSR_ODL_STACK: csr_r <= {16'h0,dl_stack[alu0_thrd],16'h0,ol_stack[alu0_thrd]};
`CSR_IM_STACK: csr_r <= im_stack[alu0_thrd];
`CSR_OL_STACK: csr_r <= {dl_stack[alu0_thrd],ol_stack[alu0_thrd]};
`CSR_PL_STACK: csr_r <= pl_stack[alu0_thrd];
`CSR_RS_STACK: csr_r <= rs_stack[alu0_thrd];
`CSR_STATUS: csr_r <= mstatus[alu0_thrd][63:0];
5531,8 → 5575,8
`CSR_EPC6: csr_r <= epc6[alu0_thrd];
`CSR_EPC7: csr_r <= epc7[alu0_thrd];
`else
`CSR_ODL_STACK: csr_r <= {16'h0,dl_stack,16'h0,ol_stack};
`CSR_IM_STACK: csr_r <= im_stack;
`CSR_OL_STACK: csr_r <= {dl_stack,ol_stack};
`CSR_PL_STACK: csr_r <= pl_stack;
`CSR_RS_STACK: csr_r <= rs_stack;
`CSR_STATUS: csr_r <= mstatus[63:0];
5601,16 → 5645,18
alu1_xs <= 64'd0;
`endif
 
wire alu_clk = clk;
//BUFH uclka (.I(clk), .O(alu_clk));
 
//always @*
// read_csr(alu0_instr[29:18],csr_r,alu0_thrd);
FT64_alu #(.BIG(1'b1),.SUP_VECTOR(SUP_VECTOR)) ualu0 (
.rst(rst),
.clk(clk),
.clk(alu_clk),
.ld(alu0_ld),
.abort(alu0_abort),
.instr(alu0_instr),
.sz(alu0_sz),
.tlb(alu0_tlb),
.store(alu0_store),
.a(alu0_argA),
.b(alu0_argB),
5630,22 → 5676,9
.thrd(alu0_thrd),
.mem(alu0_mem),
.shift(alu0_shft), // 48 bit shift inst.
.ol(ol),
.ASID(ASID),
.icl_i(icl_o),
.cyc_i(cyc),
.we_i(we),
.vadr_i(vadr),
.cyc_o(cyc_o),
.we_o(we_o),
.padr_o(adr_o),
.uncached(),
.tlb_miss(tlb_miss),
.exv_o(exv_i),
.wrv_o(wrv_i),
.rdv_o(rdv_i)
.ol(ol)
`ifdef SUPPORT_BBMS
.pb(dl==2'b00 ? 64'd0 : pb),
, .pb(dl==2'b00 ? 64'd0 : pb),
.cbl(cbl),
.cbu(cbu),
.ro(ro),
5665,7 → 5698,6
.abort(alu1_abort),
.instr(alu1_instr),
.sz(alu1_sz),
.tlb(1'b0),
.store(alu1_store),
.a(alu1_argA),
.b(alu1_argB),
5685,21 → 5717,9
.thrd(1'b0),
.mem(alu1_mem),
.shift(alu1_shft),
.ol(2'b0),
.ASID(8'h0),
.cyc_i(1'b0),
.we_i(1'b0),
.vadr_i(64'd0),
.cyc_o(),
.we_o(),
.padr_o(),
.uncached(),
.tlb_miss(),
.exv_o(),
.wrv_o(),
.rdv_o()
.ol(2'b0)
`ifdef SUPPORT_BBMS
.pb(dl==2'b00 ? 64'd0 : pb),
, .pb(dl==2'b00 ? 64'd0 : pb),
.cbl(cbl),
.cbu(cbu),
.ro(ro),
5714,6 → 5734,51
end
endgenerate
 
wire tlb_done;
wire tlb_idle;
wire [63:0] tlbo;
wire uncached;
`ifdef SUPPORT_TLB
FT64_TLB utlb1 (
.clk(clk),
.ld(alu0_ld & alu0_tlb),
.done(tlb_done),
.idle(tlb_idle),
.ol(ol),
.ASID(ASID),
.op(alu0_instr[25:22]),
.regno(alu0_instr[21:18]),
.dati(alu0_argA),
.dato(tlbo),
.uncached(uncached),
.icl_i(icl_o),
.cyc_i(cyc),
.we_i(we),
.vadr_i(vadr),
.cyc_o(cyc_o),
.we_o(we_o),
.padr_o(adr_o),
.TLBMiss(tlb_miss),
.wrv_o(wrv_o),
.rdv_o(rdv_o),
.exv_o(exv_o),
.HTLBVirtPageo()
);
`else
assign tlb_done = 1'b1;
assign tlb_idle = 1'b1;
assign tlbo = 64'hDEADDEADDEADDEAD;
assign uncached = 1'b0;
assign adr_o = vadr;
assign cyc_o = cyc;
assign we_o = we;
assign tlb_miss = 1'b0;
assign wrv_o = 1'b0;
assign rdv_o = 1'b0;
assign exv_o = 1'b0;
assign exv_i = 1'b0; // for now
`endif
 
always @*
begin
alu0_cmt <= 1'b1;
6264,12 → 6329,12
// branchmiss_thrd <= excmiss ? excthrd : fcu_thrd;
//end
wire alu0_done_pe, alu1_done_pe, pe_wait;
edge_det uedalu0d (.clk(clk), .ce(1'b1), .i(alu0_done), .pe(alu0_done_pe), .ne(), .ee());
edge_det uedalu0d (.clk(clk), .ce(1'b1), .i(alu0_done&tlb_done), .pe(alu0_done_pe), .ne(), .ee());
edge_det uedalu1d (.clk(clk), .ce(1'b1), .i(alu1_done), .pe(alu1_done_pe), .ne(), .ee());
edge_det uedwait1 (.clk(clk), .ce(1'b1), .i((waitctr==48'd1) || signal_i[fcu_argA[4:0]|fcu_argI[4:0]]), .pe(pe_wait), .ne(), .ee());
 
// Bus randomization to mitigate meltdown attacks
wire [63:0] ralu0_bus = |alu0_exc ? {4{lfsro}} : alu0_bus;
wire [63:0] ralu0_bus = |alu0_exc ? {4{lfsro}} : alu0_tlb ? tlbo : alu0_bus;
wire [63:0] ralu1_bus = |alu1_exc ? {4{lfsro}} : alu1_bus;
wire [63:0] rfpu1_bus = |fpu1_exc ? {4{lfsro}} : fpu1_bus;
wire [63:0] rfpu2_bus = |fpu2_exc ? {4{lfsro}} : fpu2_bus;
6278,19 → 6343,30
wire [63:0] rdramB_bus = dramB_bus;
wire [63:0] rdramC_bus = dramC_bus;
 
// Hold reset for five seconds
reg [31:0] rst_ctr;
always @(posedge clk)
if (rst) begin
if (rst)
rst_ctr <= 32'd0;
else begin
if (rst_ctr < 32'd10)
rst_ctr <= rst_ctr + 24'd1;
end
 
always @(posedge clk)
if (rst|(rst_ctr < 32'd10)) begin
`ifdef SUPPORT_SMT
mstatus[0] <= 64'h4000F; // select register set #16 for thread 0
mstatus[1] <= 64'h4800F; // select register set #18 for thread 1
rs_stack[0] <= 64'd16;
brs_stack[0] <= 64'd16;
rs_stack[1] <= 64'd18;
brs_stack[1] <= 64'd18;
mstatus[0] <= 64'h4000F; // select register set #16 for thread 0
mstatus[1] <= 64'h4800F; // select register set #18 for thread 1
rs_stack[0] <= 64'd16;
brs_stack[0] <= 64'd16;
rs_stack[1] <= 64'd18;
brs_stack[1] <= 64'd18;
`else
mstatus <= 64'h4000F; // select register set #16 for thread 0
rs_stack <= 64'd16;
brs_stack <= 64'd16;
im_stack <= 32'hFFFFFFFF;
mstatus <= 64'h4000F; // select register set #16 for thread 0
rs_stack <= 64'd16;
brs_stack <= 64'd16;
`endif
for (n = 0; n < QENTRIES; n = n + 1) begin
iqentry_state[n] <= IQS_INVALID;
6363,9 → 6439,11
dram0_id <= 1'b0;
dram1_id <= 1'b0;
dram2_id <= 1'b0;
L1_adr <= RSTPC;
L2_adr <= RSTPC;
dram0_store <= 1'b0;
dram1_store <= 1'b0;
dram2_store <= 1'b0;
invic <= FALSE;
invicl <= FALSE;
tail0 <= 3'd0;
tail1 <= 3'd1;
for (n = 0; n < QENTRIES; n = n + 1)
6408,7 → 6486,6
dramC_v <= 0;
I <= 0;
CC <= 0;
icstate <= IDLE;
bstate <= BIDLE;
tick <= 64'd0;
ol_o <= 2'b0;
6422,8 → 6499,6
sr_o <= `LOW;
cr_o <= `LOW;
vadr <= RSTPC;
icl_o <= `LOW; // instruction cache load
L1_dati <= 306'd0;
cr0 <= 64'd0;
cr0[13:8] <= 6'd0; // select compressed instruction group #0
cr0[30] <= TRUE; // enable data caching
6475,7 → 6550,7
wb_id[n] <= {QENTRIES{1'b0}};
wb_ol[n] <= 2'b00;
wb_sel[n] <= 8'h00;
wb_addr[n] <= 32'd0;
wb_addr[n] <= 64'd0;
wb_data[n] <= 64'd0;
end
wb_en <= `TRUE;
6579,6 → 6654,8
nop_fetchbuf <= 4'h0;
excmiss <= FALSE;
invic <= FALSE;
if (L1_invline)
invicl <= FALSE;
tick <= tick + 64'd1;
alu0_ld <= FALSE;
alu1_ld <= FALSE;
6993,7 → 7070,7
iqentry_tgt [ alu0_id[`QBITS] ] <= alu0_tgt;
iqentry_res [ alu0_id[`QBITS] ] <= ralu0_bus;
iqentry_exc [ alu0_id[`QBITS] ] <= alu0_exc;
if (!iqentry_mem[ alu0_id[`QBITS] ] && alu0_done) begin
if (!iqentry_mem[ alu0_id[`QBITS] ] && alu0_done && tlb_done) begin
// iqentry_done[ alu0_id[`QBITS] ] <= `TRUE;
iqentry_state[alu0_id[`QBITS]] <= IQS_CMT;
end
7746,145 → 7823,60
 
 
rf_source[0] <= 0;
L1_wr0 <= FALSE;
L1_wr1 <= FALSE;
L1_wr2 <= FALSE;
L1_en <= 10'h000;
L1_invline <= FALSE;
icnxt <= FALSE;
L2_nxt <= FALSE;
// Instruction cache state machine.
// On a miss first see if the instruction is in the L2 cache. No need to go to
// the BIU on an L1 miss.
// If not the machine will wait until the BIU loads the L2 cache.
 
// Capture the previous ic state, used to determine how long to wait in
// icstate #4.
picstate <= icstate;
case(icstate)
IDLE:
// If the bus unit is busy doing an update involving L1_adr or L2_adr
// we have to wait.
if (bstate != B_ICacheAck && bstate != B_ICacheNack && bstate != B_ICacheNack2) begin
if (!ihit0) begin
L1_adr <= {pcr[7:0],pc0[AMSB:5],5'h0};
L2_adr <= {pcr[7:0],pc0[AMSB:5],5'h0};
L1_invline <= TRUE;
icwhich <= 2'b00;
iccnt <= 3'b00;
icstate <= IC2;
end
else if (!ihit1 && `WAYS > 1) begin
if (thread_en) begin
L1_adr <= {pcr[7:0],pc1[AMSB:5],5'h0};
L2_adr <= {pcr[7:0],pc1[AMSB:5],5'h0};
end
else begin
L1_adr <= {pcr[7:0],pc0plus6[AMSB:5],5'h0};
L2_adr <= {pcr[7:0],pc0plus6[AMSB:5],5'h0};
end
L1_invline <= TRUE;
icwhich <= 2'b01;
iccnt <= 3'b00;
icstate <= IC2;
end
else if (!ihit2 && `WAYS > 2) begin
if (thread_en) begin
L1_adr <= {pcr[7:0],pc2[AMSB:5],5'h0};
L2_adr <= {pcr[7:0],pc2[AMSB:5],5'h0};
end
else begin
L1_adr <= {pcr[7:0],pc0plus12[AMSB:5],5'h0};
L2_adr <= {pcr[7:0],pc0plus12[AMSB:5],5'h0};
end
L1_invline <= TRUE;
icwhich <= 2'b10;
iccnt <= 3'b00;
icstate <= IC2;
end
end
IC2: icstate <= IC3;
IC3: icstate <= IC3a;
IC3a: icstate <= IC_WaitL2;
// If data was in the L2 cache already there's no need to wait on the
// BIU to retrieve data. It can be determined if the hit signal was
// already active when this state was entered in which case waiting
// will do no good.
// The IC machine will stall in this state until the BIU has loaded the
// L2 cache.
IC_WaitL2:
if (ihitL2 && picstate==IC3a) begin
L1_en <= 10'h3FF;
L1_wr0 <= TRUE;
L1_wr1 <= TRUE && `WAYS > 1;
L1_wr2 <= TRUE && `WAYS > 2;
// L1_adr <= L2_adr;
// L1_dati is loaded dring an L2 icache load operation
// if (picstate==IC3a)
L1_dati <= L2_dato;
icstate <= IC5;
end
else if (bstate!=B_ICacheNack)
;
else begin
L1_en <= 10'h3FF;
L1_wr0 <= TRUE;
L1_wr1 <= TRUE && `WAYS > 1;
L1_wr2 <= TRUE && `WAYS > 2;
// L1_adr <= L2_adr;
// L1_dati set below while loading cache line
//L1_dati <= L2_dato;
icstate <= IC5;
end
 
IC5: icstate <= IC6;
IC6: icstate <= IC7;
IC7: icstate <= IC_Next;
IC_Next:
begin
icstate <= IDLE;
icnxt <= TRUE;
end
default: icstate <= IDLE;
endcase
 
// A store will never be stomped on because they aren't issued until it's
// guarenteed there will be no change of flow.
// A load or other long running instruction might be stomped on by a change
// of program flow. Stomped on loads already in progress can be aborted early.
// In the case of an aborted load, random data is returned and any exceptions
// are nullified.
if (dram0_load)
case(dram0)
`DRAMSLOT_AVAIL: ;
`DRAMSLOT_BUSY:
// if (iqentry_v[dram0_id[`QBITS]] && !iqentry_stomp[dram0_id[`QBITS]])
if (iqentry_v[dram0_id[`QBITS]] && !iqentry_stomp[dram0_id[`QBITS]])
dram0 <= dram0 + !dram0_unc;
// else begin
// dram0 <= `DRAMSLOT_AVAIL;
// dram0_load <= `FALSE;
// end
3'd2:
// if (iqentry_v[dram0_id[`QBITS]] && !iqentry_stomp[dram0_id[`QBITS]])
else begin
dram0 <= `DRAMREQ_READY;
dram0_load <= `FALSE;
xdati[63:0] <= {4{lfsro}};
end
3'd2,3'd3:
if (iqentry_v[dram0_id[`QBITS]] && !iqentry_stomp[dram0_id[`QBITS]])
dram0 <= dram0 + 3'd1;
// else begin
// dram0 <= `DRAMSLOT_AVAIL;
// dram0_load <= `FALSE;
// end
3'd3:
// if (iqentry_v[dram0_id[`QBITS]] && !iqentry_stomp[dram0_id[`QBITS]])
dram0 <= dram0 + 3'd1;
// else begin
// dram0 <= `DRAMSLOT_AVAIL;
// dram0_load <= `FALSE;
// end
else begin
dram0 <= `DRAMREQ_READY;
dram0_load <= `FALSE;
xdati[63:0] <= {4{lfsro}};
end
3'd4:
// if (iqentry_v[dram0_id[`QBITS]] && !iqentry_stomp[dram0_id[`QBITS]]) begin
if (iqentry_v[dram0_id[`QBITS]] && !iqentry_stomp[dram0_id[`QBITS]]) begin
if (dhit0)
dram0 <= `DRAMREQ_READY;
else
dram0 <= `DRAMSLOT_REQBUS;
// end
// else begin
// dram0 <= `DRAMSLOT_AVAIL;
// dram0_load <= `FALSE;
// end
`DRAMSLOT_REQBUS: ;
`DRAMSLOT_HASBUS: ;
end
else begin
dram0 <= `DRAMREQ_READY;
dram0_load <= `FALSE;
xdati[63:0] <= {4{lfsro}};
end
`DRAMSLOT_REQBUS:
if (iqentry_v[dram0_id[`QBITS]] && !iqentry_stomp[dram0_id[`QBITS]])
;
else begin
dram0 <= `DRAMREQ_READY;
dram0_load <= `FALSE;
xdati[63:0] <= {4{lfsro}};
end
`DRAMSLOT_HASBUS:
if (iqentry_v[dram0_id[`QBITS]] && !iqentry_stomp[dram0_id[`QBITS]])
;
else begin
dram0 <= `DRAMREQ_READY;
dram0_load <= `FALSE;
xdati[63:0] <= {4{lfsro}};
end
`DRAMREQ_READY: dram0 <= `DRAMSLOT_AVAIL;
endcase
 
7923,16 → 7915,10
3'd3:
dram2 <= dram2 + 3'd1;
3'd4:
// if (iqentry_v[dram2_id[`QBITS]] && !iqentry_stomp[dram2_id[`QBITS]]) begin
if (dhit2)
dram2 <= `DRAMREQ_READY;
else
dram2 <= `DRAMSLOT_REQBUS;
// end
/* else begin
dram2 <= `DRAMSLOT_AVAIL;
dram2_load <= `FALSE;
end*/
`DRAMSLOT_REQBUS: ;
`DRAMSLOT_HASBUS: ;
`DRAMREQ_READY: dram2 <= `DRAMSLOT_AVAIL;
7975,10 → 7961,10
fnSelect(dram0_instr,dram0_addr),
dram0_ol,
dram0_addr,
fnDato(dram0_instr,dram0_data)
fnDato(dram0_instr,dram0_data),
wbptr
);
// iqentry_done[ dram0_id[`QBITS] ] <= `VAL;
// iqentry_out[ dram0_id[`QBITS] ] <= `INV;
wbptr <= wbptr + 2'd1;
iqentry_state[ dram0_id[`QBITS] ] <= IQS_DONE;
end
end
7992,8 → 7978,10
fnSelect(dram1_instr,dram1_addr),
dram1_ol,
dram1_addr,
fnDato(dram1_instr,dram1_data)
fnDato(dram1_instr,dram1_data),
wbptr
);
wbptr <= wbptr + 2'd1;
iqentry_state[ dram1_id[`QBITS] ] <= IQS_DONE;
end
end
8007,8 → 7995,10
fnSelect(dram2_instr,dram2_addr),
dram2_ol,
dram2_addr,
fnDato(dram2_instr,dram2_data)
fnDato(dram2_instr,dram2_data),
wbptr
);
wbptr <= wbptr + 2'd1;
iqentry_state[ dram2_id[`QBITS] ] <= IQS_DONE;
end
end
8035,10 → 8025,12
sel_o <= wb_sel[0];
vadr <= wb_addr[0];
dat_o <= wb_data[0];
dcbuf <= {4{wb_data[0]}};
dcsel <= wb_sel[0] << {wb_addr[0][4:3],3'b0};
ol_o <= wb_ol[0];
wbo_id <= wb_id[0];
isStore <= TRUE;
bstate <= wb_rmw[0] ? B12 : B_StoreAck;
bstate <= wb_rmw[0] ? B_RMWAck : B_StoreAck;
wb_v[0] <= `INV;
end
if (wb_v[0]==`INV && !writing_wb) begin
8058,7 → 8050,8
end
 
`endif
if (~|wb_v && dram0==`DRAMSLOT_BUSY && dram0_rmw) begin
if (~|wb_v && dram0==`DRAMSLOT_BUSY && dram0_rmw
&& !iqentry_stomp[dram0_id[`QBITS]]) begin
`ifdef SUPPORT_DBG
if (dbg_smatch0|dbg_lmatch0) begin
dramA_v <= `TRUE;
8080,13 → 8073,16
cyc <= `HIGH;
stb_o <= `HIGH;
sel_o <= fnSelect(dram0_instr,dram0_addr);
dcbuf <= {4{fnDato(dram0_instr,dram0_data)}};
dcsel <= fnSelect(dram0_instr,dram0_addr) << {dram0_addr[4:3],3'b0};
vadr <= dram0_addr;
dat_o <= fnDato(dram0_instr,dram0_data);
ol_o <= dram0_ol;
bstate <= B12;
bstate <= B_RMWAck;
end
end
else if (~|wb_v && dram1==`DRAMSLOT_BUSY && dram1_rmw && `NUM_MEM > 1) begin
else if (~|wb_v && dram1==`DRAMSLOT_BUSY && dram1_rmw && `NUM_MEM > 1
&& !iqentry_stomp[dram1_id[`QBITS]]) begin
`ifdef SUPPORT_DBG
if (dbg_smatch1|dbg_lmatch1) begin
dramB_v <= `TRUE;
8111,10 → 8107,13
vadr <= dram1_addr;
dat_o <= fnDato(dram1_instr,dram1_data);
ol_o <= dram1_ol;
bstate <= B12;
dcbuf <= {4{fnDato(dram1_instr,dram1_data)}};
dcsel <= fnSelect(dram1_instr,dram1_addr) << {dram1_addr[4:3],3'b0};
bstate <= B_RMWAck;
end
end
else if (~|wb_v && dram2==`DRAMSLOT_BUSY && dram2_rmw && `NUM_MEM > 2) begin
else if (~|wb_v && dram2==`DRAMSLOT_BUSY && dram2_rmw && `NUM_MEM > 2
&& !iqentry_stomp[dram2_id[`QBITS]]) begin
`ifdef SUPPORT_DBG
if (dbg_smatch2|dbg_lmatch2) begin
dramC_v <= `TRUE;
8139,7 → 8138,9
vadr <= dram2_addr;
dat_o <= fnDato(dram2_instr,dram2_data);
ol_o <= dram2_ol;
bstate <= B12;
dcbuf <= {4{fnDato(dram2_instr,dram2_data)}};
dcsel <= fnSelect(dram2_instr,dram2_addr) << {dram2_addr[4:3],3'b0};
bstate <= B_RMWAck;
end
end
`ifndef HAS_WB
8169,6 → 8170,8
ol_o <= dram0_ol;
isStore <= TRUE;
bstate <= B_StoreAck;
dcbuf <= {4{fnDato(dram0_instr,dram0_data)}};
dcsel <= fnSelect(dram0_instr,dram0_addr) << {dram0_addr[4:3],3'b0};
end
// cr_o <= IsSWC(dram0_instr);
end
8197,6 → 8200,8
dat_o <= fnDato(dram1_instr,dram1_data);
ol_o <= dram1_ol;
isStore <= TRUE;
dcbuf <= {4{fnDato(dram1_instr,dram1_data)}};
dcsel <= fnSelect(dram1_instr,dram1_addr) << {dram1_addr[4:3],3'b0};
bstate <= B_StoreAck;
end
// cr_o <= IsSWC(dram0_instr);
8226,6 → 8231,8
dat_o <= fnDato(dram2_instr,dram2_data);
ol_o <= dram2_ol;
isStore <= TRUE;
dcbuf <= {4{fnDato(dram2_instr,dram2_data)}};
dcsel <= fnSelect(dram2_instr,dram2_addr) << {dram2_addr[4:3],3'b0};
bstate <= B_StoreAck;
end
// cr_o <= IsSWC(dram0_instr);
8233,7 → 8240,8
end
`endif
// Check for read misses on the data cache
else if (~|wb_v && !dram0_unc && dram0==`DRAMSLOT_REQBUS && dram0_load) begin
else if (~|wb_v && !dram0_unc && dram0==`DRAMSLOT_REQBUS && dram0_load
&& !iqentry_stomp[dram0_id[`QBITS]]) begin
`ifdef SUPPORT_DBG
if (dbg_lmatch0) begin
dramA_v <= `TRUE;
8251,7 → 8259,8
bstate <= B_DCacheLoadStart;
end
end
else if (~|wb_v && !dram1_unc && dram1==`DRAMSLOT_REQBUS && dram1_load && `NUM_MEM > 1) begin
else if (~|wb_v && !dram1_unc && dram1==`DRAMSLOT_REQBUS && dram1_load && `NUM_MEM > 1
&& !iqentry_stomp[dram1_id[`QBITS]]) begin
`ifdef SUPPORT_DBG
if (dbg_lmatch1) begin
dramB_v <= `TRUE;
8269,7 → 8278,8
bstate <= B_DCacheLoadStart;
end
end
else if (~|wb_v && !dram2_unc && dram2==`DRAMSLOT_REQBUS && dram2_load && `NUM_MEM > 2) begin
else if (~|wb_v && !dram2_unc && dram2==`DRAMSLOT_REQBUS && dram2_load && `NUM_MEM > 2
&& !iqentry_stomp[dram2_id[`QBITS]]) begin
`ifdef SUPPORT_DBG
if (dbg_lmatch2) begin
dramC_v <= `TRUE;
8287,7 → 8297,8
bstate <= B_DCacheLoadStart;
end
end
else if (~|wb_v && dram0_unc && dram0==`DRAMSLOT_BUSY && dram0_load) begin
else if (~|wb_v && dram0_unc && dram0==`DRAMSLOT_BUSY && dram0_load
&& !iqentry_stomp[dram0_id[`QBITS]]) begin
`ifdef SUPPORT_DBG
if (dbg_lmatch0) begin
dramA_v <= `TRUE;
8300,6 → 8311,7
`endif
if (!acki) begin
bwhich <= 2'b00;
dram0 <= `DRAMSLOT_HASBUS;
cyc <= `HIGH;
stb_o <= `HIGH;
sel_o <= fnSelect(dram0_instr,dram0_addr);
8310,7 → 8322,8
bstate <= B_DLoadAck;
end
end
else if (~|wb_v && dram1_unc && dram1==`DRAMSLOT_BUSY && dram1_load && `NUM_MEM > 1) begin
else if (~|wb_v && dram1_unc && dram1==`DRAMSLOT_BUSY && dram1_load && `NUM_MEM > 1
&& !iqentry_stomp[dram1_id[`QBITS]]) begin
`ifdef SUPPORT_DBG
if (dbg_lmatch1) begin
dramB_v <= `TRUE;
8323,6 → 8336,7
`endif
if (!acki) begin
bwhich <= 2'b01;
dram1 <= `DRAMSLOT_HASBUS;
cyc <= `HIGH;
stb_o <= `HIGH;
sel_o <= fnSelect(dram1_instr,dram1_addr);
8333,7 → 8347,8
bstate <= B_DLoadAck;
end
end
else if (~|wb_v && dram2_unc && dram2==`DRAMSLOT_BUSY && dram2_load && `NUM_MEM > 2) begin
else if (~|wb_v && dram2_unc && dram2==`DRAMSLOT_BUSY && dram2_load && `NUM_MEM > 2
&& !iqentry_stomp[dram2_id[`QBITS]]) begin
`ifdef SUPPORT_DBG
if (dbg_lmatch2) begin
dramC_v <= `TRUE;
8346,6 → 8361,7
`endif
if (!acki) begin
bwhich <= 2'b10;
dram2 <= `DRAMSLOT_HASBUS;
cyc <= `HIGH;
stb_o <= `HIGH;
sel_o <= fnSelect(dram2_instr,dram2_addr);
8359,6 → 8375,8
// Check for L2 cache miss
else if (~|wb_v && !ihitL2 && !acki)
begin
bstate <= B_WaitIC;
/*
cti_o <= 3'b001;
bte_o <= 2'b00;//2'b01; // 4 beat burst wrap
cyc <= `HIGH;
8369,16 → 8387,30
icack <= 1'b0;
// adr_o <= icwhich ? {pc0[31:5],5'b0} : {pc1[31:5],5'b0};
// L2_adr <= icwhich ? {pc0[31:5],5'b0} : {pc1[31:5],5'b0};
vadr <= {pcr[7:0],L1_adr[AMSB:5],5'h0};
vadr <= {L1_adr[AMSB:5],5'h0};
`ifdef SUPPORT_SMT
`else
ol_o <= ol;//???
`endif
L2_adr <= {pcr[7:0],L1_adr[AMSB:5],5'h0};
L2_adr <= {L1_adr[AMSB:5],5'h0};
L2_xsel <= 1'b0;
selL2 <= TRUE;
bstate <= B_ICacheAck;
*/
end
end
B_WaitIC:
begin
cti_o <= icti;
bte_o <= ibte;
cyc <= icyc;
stb_o <= istb;
sel_o <= isel;
vadr <= iadr;
we <= 1'b0;
if (L2_nxt)
bstate <= BIDLE;
end
 
// Terminal state for a store operation.
// Note that if only a single memory channel is selected, bwhich will be a
8451,25 → 8483,63
// Select should be selecting all byte lanes for a cache load
sel_o <= 8'hFF;
// bwhich should always be one of the three channels.
// If single bit upset, continue to select channel zero when
// there's only one available.
case(bwhich)
2'd0: begin
vadr <= {dram0_addr[AMSB:5],5'b0};
ol_o <= dram0_ol;
end
2'd1: if (`NUM_MEM > 1) begin
vadr <= {dram1_addr[AMSB:5],5'b0};
ol_o <= dram1_ol;
if (iqentry_stomp[dram1_id[`QBITS]]) begin
wb_nack();
dram1 <= `DRAMREQ_READY;
bstate <= BIDLE;
end
end
else begin
vadr <= {dram0_addr[AMSB:5],5'b0};
ol_o <= dram0_ol;
if (iqentry_stomp[dram0_id[`QBITS]]) begin
wb_nack();
dram0 <= `DRAMREQ_READY;
bstate <= BIDLE;
end
end
2'd2: if (`NUM_MEM > 2) begin
vadr <= {dram2_addr[AMSB:5],5'b0};
ol_o <= dram2_ol;
if (iqentry_stomp[dram2_id[`QBITS]]) begin
wb_nack();
dram2 <= `DRAMREQ_READY;
bstate <= BIDLE;
end
end
else if (`NUM_MEM > 1) begin
vadr <= {dram1_addr[AMSB:5],5'b0};
ol_o <= dram1_ol;
if (iqentry_stomp[dram1_id[`QBITS]]) begin
wb_nack();
dram1 <= `DRAMREQ_READY;
bstate <= BIDLE;
end
end
else begin
vadr <= {dram0_addr[AMSB:5],5'b0};
ol_o <= dram0_ol;
if (iqentry_stomp[dram0_id[`QBITS]]) begin
wb_nack();
dram0 <= `DRAMREQ_READY;
bstate <= BIDLE;
end
end
default:
begin
$display("Invalid memory channel selection");
$stop;
wb_nack();
bstate <= BIDLE;
vadr <= {dram0_addr[AMSB:5],5'b0};
ol_o <= dram0_ol;
if (iqentry_stomp[dram0_id[`QBITS]]) begin
wb_nack();
dram0 <= `DRAMREQ_READY;
bstate <= BIDLE;
end
end
endcase
end
8476,172 → 8546,189
 
// Data cache load terminal state
B_DCacheLoadAck:
if (acki|err_i|tlb_miss|rdv_i) begin
if (!bok_i) begin
stb_o <= `LOW;
bstate <= B_DCacheLoadStb;
end
errq <= errq | err_i;
rdvq <= rdvq | rdv_i;
if (!preload) // A preload instruction ignores any error
case(bwhich)
2'd0: iqentry_exc[dram0_id[`QBITS]] <= tlb_miss ? `FLT_TLB : err_i ? `FLT_DBE : rdv_i ? `FLT_DRF : `FLT_NONE;
2'd1: iqentry_exc[dram1_id[`QBITS]] <= tlb_miss ? `FLT_TLB : err_i ? `FLT_DBE : rdv_i ? `FLT_DRF : `FLT_NONE;
2'd2: iqentry_exc[dram2_id[`QBITS]] <= tlb_miss ? `FLT_TLB : err_i ? `FLT_DBE : rdv_i ? `FLT_DRF : `FLT_NONE;
default: ;
endcase
dccnt <= dccnt + 2'd1;
vadr[4:3] <= vadr[4:3] + 2'd1;
if (dccnt==2'd2)
cti_o <= 3'b111;
if (dccnt==2'd3) begin
wb_nack();
bstate <= B_DCacheLoadWait1;
end
end
begin
dcsel <= 32'hFFFFFFFF;
if (acki|err_i|tlb_miss|rdv_i) begin
if (!bok_i) begin
stb_o <= `LOW;
bstate <= B_DCacheLoadStb;
end
errq <= errq | err_i;
rdvq <= rdvq | rdv_i;
if (!preload) // A preload instruction ignores any error
if (dccnt==3'd3)
case(bwhich)
2'd0:
if (iqentry_stomp[dram0_id[`QBITS]])
iqentry_exc[dram0_id[`QBITS]] <= `FLT_NONE;
else
iqentry_exc[dram0_id[`QBITS]] <= tlb_miss ? `FLT_TLB : err_i ? `FLT_DBE : rdv_i ? `FLT_DRF : `FLT_NONE;
2'd1:
if (iqentry_stomp[dram1_id[`QBITS]])
iqentry_exc[dram1_id[`QBITS]] <= `FLT_NONE;
else
iqentry_exc[dram1_id[`QBITS]] <= tlb_miss ? `FLT_TLB : err_i ? `FLT_DBE : rdv_i ? `FLT_DRF : `FLT_NONE;
2'd2:
if (iqentry_stomp[dram2_id[`QBITS]])
iqentry_exc[dram2_id[`QBITS]] <= `FLT_NONE;
else
iqentry_exc[dram2_id[`QBITS]] <= tlb_miss ? `FLT_TLB : err_i ? `FLT_DBE : rdv_i ? `FLT_DRF : `FLT_NONE;
default:
if (iqentry_stomp[dram0_id[`QBITS]])
iqentry_exc[dram0_id[`QBITS]] <= `FLT_NONE;
else
iqentry_exc[dram0_id[`QBITS]] <= tlb_miss ? `FLT_TLB : err_i ? `FLT_DBE : rdv_i ? `FLT_DRF : `FLT_NONE;
endcase
case(dccnt)
2'd0: dcbuf[63:0] <= dat_i;
2'd1: dcbuf[127:64] <= dat_i;
2'd2: dcbuf[191:128] <= dat_i;
2'd3: dcbuf[255:192] <= dat_i;
endcase
dccnt <= dccnt + 2'd1;
vadr[4:3] <= vadr[4:3] + 2'd1;
if (dccnt==2'd2)
cti_o <= 3'b111;
if (dccnt==2'd3) begin
wb_nack();
dcwr <= 1'b1;
dcwait_ctr <= dcwait;
bstate <= B_DCacheLoadWait;
end
end
end
B_DCacheLoadStb:
begin
stb_o <= `HIGH;
bstate <= B_DCacheLoadAck;
case(bwhich)
2'd0:
if (iqentry_stomp[dram0_id[`QBITS]]) begin
wb_nack();
dram0 <= `DRAMREQ_READY;
bstate <= BIDLE;
end
2'd1:
if (iqentry_stomp[dram1_id[`QBITS]]) begin
wb_nack();
dram1 <= `DRAMREQ_READY;
bstate <= BIDLE;
end
2'd2:
if (iqentry_stomp[dram2_id[`QBITS]]) begin
wb_nack();
dram2 <= `DRAMREQ_READY;
bstate <= BIDLE;
end
default:
if (iqentry_stomp[dram0_id[`QBITS]]) begin
wb_nack();
dram0 <= `DRAMREQ_READY;
bstate <= BIDLE;
end
endcase
end
B_DCacheLoadWait1: bstate <= B_DCacheLoadWait2;
B_DCacheLoadWait2: bstate <= B_DCacheLoadResetBusy;
//B_DCacheLoadWait3: bstate <= B_DCacheLoadResetBusy;
B_DCacheLoadWait:
begin
dcsel <= 32'h0;
dcwr <= 1'b0;
dcwait_ctr <= dcwait_ctr - 4'd1;
if (dcwait_ctr[3]) // detect underflow
bstate <= B_DCacheLoadResetBusy;
end
// There could be more than one memory cycle active. We reset the state
// of all the machines to retest for a hit because otherwise sequential
// of the other machines to retest for a hit because otherwise sequential
// loading of memory will cause successive machines to miss resulting in
// multiple dcache loads that aren't needed.
B_DCacheLoadResetBusy:
begin
if (dram0 != `DRAMSLOT_AVAIL && dram0_addr[AMSB:5]==vadr[AMSB:5]) dram0 <= `DRAMSLOT_BUSY; // causes retest of dhit
if (dram1 != `DRAMSLOT_AVAIL && dram1_addr[AMSB:5]==vadr[AMSB:5]) dram1 <= `DRAMSLOT_BUSY;
if (dram2 != `DRAMSLOT_AVAIL && dram2_addr[AMSB:5]==vadr[AMSB:5]) dram2 <= `DRAMSLOT_BUSY;
if (`NUM_MEM > 1)
case(bwhich)
2'b01:
begin
dram1 <= `DRAMREQ_READY;
if (dram0 != `DRAMSLOT_AVAIL && dram0_addr[AMSB:5]==vadr[AMSB:5]) dram0 <= `DRAMSLOT_BUSY; // causes retest of dhit
if (dram2 != `DRAMSLOT_AVAIL && dram2_addr[AMSB:5]==vadr[AMSB:5]) dram2 <= `DRAMSLOT_BUSY;
end
2'b10:
if (`NUM_MEM > 2) begin
dram2 <= `DRAMREQ_READY;
if (dram0 != `DRAMSLOT_AVAIL && dram0_addr[AMSB:5]==vadr[AMSB:5]) dram0 <= `DRAMSLOT_BUSY; // causes retest of dhit
if (dram1 != `DRAMSLOT_AVAIL && dram1_addr[AMSB:5]==vadr[AMSB:5]) dram1 <= `DRAMSLOT_BUSY;
end
else begin
dram0 <= `DRAMREQ_READY;
if (dram1 != `DRAMSLOT_AVAIL && dram1_addr[AMSB:5]==vadr[AMSB:5]) dram1 <= `DRAMSLOT_BUSY;
if (dram2 != `DRAMSLOT_AVAIL && dram2_addr[AMSB:5]==vadr[AMSB:5]) dram2 <= `DRAMSLOT_BUSY;
end
default:
begin
dram0 <= `DRAMREQ_READY;
if (dram1 != `DRAMSLOT_AVAIL && dram1_addr[AMSB:5]==vadr[AMSB:5]) dram1 <= `DRAMSLOT_BUSY;
if (dram2 != `DRAMSLOT_AVAIL && dram2_addr[AMSB:5]==vadr[AMSB:5]) dram2 <= `DRAMSLOT_BUSY;
end
endcase
else begin
dram0 <= `DRAMREQ_READY;
end
bstate <= BIDLE;
end
 
// Ack state for instruction cache load
// Once the first ack is received in burst mode, further acks are not necessary
// as the core counts the number of data items. Occasionally missing acks were
// causing a problem.
B_ICacheAck:
if (acki|err_i|tlb_miss|exv_i|icack) begin
if (!bok_i) begin
stb_o <= `LOW;
bstate <= B_ICacheNack2;
end
else
icack <= 1'b1;
errq <= errq | err_i;
exvq <= exvq | exv_i;
if (tlb_miss) begin
L1_dati <= {19{`INSN_FLT_TLB}};
wb_nack();
icl_o <= `LOW;
bstate <= B_ICacheNack;
B_RMWAck:
if (acki|err_i|tlb_miss|rdv_i) begin
if (isCAS) begin
iqentry_res [ casid[`QBITS] ] <= (dat_i == cas);
iqentry_exc [ casid[`QBITS] ] <= tlb_miss ? `FLT_TLB : err_i ? `FLT_DRF : rdv_i ? `FLT_DRF : `FLT_NONE;
// iqentry_done[ casid[`QBITS] ] <= `VAL;
// iqentry_out [ casid[`QBITS] ] <= `INV;
iqentry_state [ casid[`QBITS] ] <= IQS_DONE;
iqentry_instr[ casid[`QBITS]] <= `NOP_INSN;
if (err_i | rdv_i)
iqentry_ma[casid[`QBITS]] <= vadr;
if (dat_i == cas) begin
stb_o <= `LOW;
we <= `TRUE;
bstate <= B15;
check_abort_load();
end
else begin
cas <= dat_i;
cyc <= `LOW;
stb_o <= `LOW;
case(bwhich)
2'b00: dram0 <= `DRAMREQ_READY;
2'b01: dram1 <= `DRAMREQ_READY;
2'b10: dram2 <= `DRAMREQ_READY;
default: ;
endcase
bstate <= B_LSNAck;
check_abort_load();
end
end
else if (exv_i) begin
L1_dati <= {19{`INSN_FLT_EXF}};
wb_nack();
icl_o <= `LOW;
bstate <= B_ICacheNack;
else if (isRMW) begin
rmw_instr <= iqentry_instr[casid[`QBITS]];
rmw_argA <= dat_i;
if (isSpt) begin
rmw_argB <= 64'd1 << iqentry_a1[casid[`QBITS]][63:58];
rmw_argC <= iqentry_instr[casid[`QBITS]][5:0]==`R2 ?
iqentry_a3[casid[`QBITS]][64] << iqentry_a1[casid[`QBITS]][63:58] :
iqentry_a2[casid[`QBITS]][64] << iqentry_a1[casid[`QBITS]][63:58];
end
else if (isInc) begin
rmw_argB <= iqentry_instr[casid[`QBITS]][5:0]==`R2 ? {{59{iqentry_instr[casid[`QBITS]][22]}},iqentry_instr[casid[`QBITS]][22:18]} :
{{59{iqentry_instr[casid[`QBITS]][17]}},iqentry_instr[casid[`QBITS]][17:13]};
end
else begin // isAMO
iqentry_res [ casid[`QBITS] ] <= dat_i;
rmw_argB <= iqentry_instr[casid[`QBITS]][31] ? {{59{iqentry_instr[casid[`QBITS]][20:16]}},iqentry_instr[casid[`QBITS]][20:16]} : iqentry_a2[casid[`QBITS]];
end
iqentry_exc [ casid[`QBITS] ] <= tlb_miss ? `FLT_TLB : err_i ? `FLT_DRF : rdv_i ? `FLT_DRF : `FLT_NONE;
stb_o <= `LOW;
bstate <= B20;
check_abort_load();
end
else if (err_i) begin
L1_dati <= {19{`INSN_FLT_IBE}};
wb_nack();
icl_o <= `LOW;
bstate <= B_ICacheNack;
end
else
case(iccnt)
3'd0: L1_dati[63:0] <= dat_i;
3'd1: L1_dati[127:64] <= dat_i;
3'd2: L1_dati[191:128] <= dat_i;
3'd3: L1_dati[255:192] <= dat_i;
3'd4: L1_dati[305:256] <= {2'b00,dat_i[47:0]};
default: L1_dati <= L1_dati;
endcase
iccnt <= iccnt + 3'd1;
if (iccnt==3'd3)
cti_o <= 3'b111;
if (iccnt==3'd4) begin
wb_nack();
icl_o <= `LOW;
bstate <= B_ICacheNack;
end
else begin
L2_adr[4:3] <= L2_adr[4:3] + 2'd1;
if (L2_adr[4:3]==2'b11)
L2_xsel <= 1'b1;
end
end
B_ICacheNack2:
if (~acki) begin
stb_o <= `HIGH;
vadr[AMSB:3] <= vadr[AMSB:3] + 2'd1;
bstate <= B_ICacheAck;
end
B_ICacheNack:
begin
L2_xsel <= 1'b0;
if (~acki) begin
icl_ctr <= icl_ctr + 40'd1;
bstate <= BIDLE;
L2_nxt <= TRUE;
vadr <= 32'hCCCCCCC8;
end
end
 
B12:
if (acki|err_i|tlb_miss|rdv_i) begin
if (isCAS) begin
iqentry_res [ casid[`QBITS] ] <= (dat_i == cas);
iqentry_exc [ casid[`QBITS] ] <= tlb_miss ? `FLT_TLB : err_i ? `FLT_DRF : rdv_i ? `FLT_DRF : `FLT_NONE;
// iqentry_done[ casid[`QBITS] ] <= `VAL;
// iqentry_out [ casid[`QBITS] ] <= `INV;
iqentry_state [ casid[`QBITS] ] <= IQS_DONE;
iqentry_instr[ casid[`QBITS]] <= `NOP_INSN;
if (err_i | rdv_i) iqentry_ma[casid[`QBITS]] <= vadr;
if (dat_i == cas) begin
stb_o <= `LOW;
we <= `TRUE;
bstate <= B15;
end
else begin
cas <= dat_i;
cyc <= `LOW;
stb_o <= `LOW;
case(bwhich)
2'b00: dram0 <= `DRAMREQ_READY;
2'b01: dram1 <= `DRAMREQ_READY;
2'b10: dram2 <= `DRAMREQ_READY;
default: ;
endcase
bstate <= B_LSNAck;
end
end
else if (isRMW) begin
rmw_instr <= iqentry_instr[casid[`QBITS]];
rmw_argA <= dat_i;
if (isSpt) begin
rmw_argB <= 64'd1 << iqentry_a1[casid[`QBITS]][63:58];
rmw_argC <= iqentry_instr[casid[`QBITS]][5:0]==`R2 ?
iqentry_a3[casid[`QBITS]][64] << iqentry_a1[casid[`QBITS]][63:58] :
iqentry_a2[casid[`QBITS]][64] << iqentry_a1[casid[`QBITS]][63:58];
end
else if (isInc) begin
rmw_argB <= iqentry_instr[casid[`QBITS]][5:0]==`R2 ? {{59{iqentry_instr[casid[`QBITS]][22]}},iqentry_instr[casid[`QBITS]][22:18]} :
{{59{iqentry_instr[casid[`QBITS]][17]}},iqentry_instr[casid[`QBITS]][17:13]};
end
else begin // isAMO
iqentry_res [ casid[`QBITS] ] <= dat_i;
rmw_argB <= iqentry_instr[casid[`QBITS]][31] ? {{59{iqentry_instr[casid[`QBITS]][20:16]}},iqentry_instr[casid[`QBITS]][20:16]} : iqentry_a2[casid[`QBITS]];
end
iqentry_exc [ casid[`QBITS] ] <= tlb_miss ? `FLT_TLB : err_i ? `FLT_DRF : rdv_i ? `FLT_DRF : `FLT_NONE;
stb_o <= `LOW;
bstate <= B20;
end
end
 
// Regular load
B_DLoadAck:
if (acki|err_i|tlb_miss|rdv_i) begin
8669,43 → 8756,65
end
else
dram0 <= `DRAMREQ_READY;
iqentry_exc [ dram0_id[`QBITS] ] <= tlb_miss ? `FLT_TLB : err_i ? `FLT_DRF : rdv_i ? `FLT_DRF : `FLT_NONE;
if (iqentry_stomp[dram0_id[`QBITS]])
iqentry_exc [dram0_id[`QBITS]] <= `FLT_NONE;
else
iqentry_exc [ dram0_id[`QBITS] ] <= tlb_miss ? `FLT_TLB : err_i ? `FLT_DRF : rdv_i ? `FLT_DRF : `FLT_NONE;
end
2'b01: if (`NUM_MEM > 1) begin
dram1 <= `DRAMREQ_READY;
iqentry_exc [ dram1_id[`QBITS] ] <= tlb_miss ? `FLT_TLB : err_i ? `FLT_DRF : rdv_i ? `FLT_DRF : `FLT_NONE;
if (iqentry_stomp[dram1_id[`QBITS]])
iqentry_exc [dram1_id[`QBITS]] <= `FLT_NONE;
else
iqentry_exc [ dram1_id[`QBITS] ] <= tlb_miss ? `FLT_TLB : err_i ? `FLT_DRF : rdv_i ? `FLT_DRF : `FLT_NONE;
end
2'b10: if (`NUM_MEM > 2) begin
dram2 <= `DRAMREQ_READY;
iqentry_exc [ dram2_id[`QBITS] ] <= tlb_miss ? `FLT_TLB : err_i ? `FLT_DRF : rdv_i ? `FLT_DRF : `FLT_NONE;
if (iqentry_stomp[dram2_id[`QBITS]])
iqentry_exc [dram2_id[`QBITS]] <= `FLT_NONE;
else
iqentry_exc [ dram2_id[`QBITS] ] <= tlb_miss ? `FLT_TLB : err_i ? `FLT_DRF : rdv_i ? `FLT_DRF : `FLT_NONE;
end
default: ;
endcase
bstate <= B_LSNAck;
check_abort_load();
end
B_DLoadNack:
if (~acki) begin
stb_o <= `HIGH;
bstate <= B_DLoadAck;
check_abort_load();
end
 
// Three cycles to detemrine if there's a cache hit during a store.
B16: begin
case(bwhich)
2'd0: if (dhit0) begin dram0 <= `DRAMREQ_READY; bstate <= B17; end
2'd1: if (dhit1) begin dram1 <= `DRAMREQ_READY; bstate <= B17; end
2'd2: if (dhit2) begin dram2 <= `DRAMREQ_READY; bstate <= B17; end
default: bstate <= BIDLE;
endcase
end
B17: bstate <= B18;
B18: bstate <= B_LSNAck;
B16:
begin
case(bwhich)
2'd0: if (dhit0) begin dram0 <= `DRAMREQ_READY; bstate <= B17; end
2'd1: if (dhit1) begin dram1 <= `DRAMREQ_READY; bstate <= B17; end
2'd2: if (dhit2) begin dram2 <= `DRAMREQ_READY; bstate <= B17; end
default: bstate <= BIDLE;
endcase
check_abort_load();
end
B17:
begin
bstate <= B18;
check_abort_load();
end
B18:
begin
bstate <= B_LSNAck;
check_abort_load();
end
B_LSNAck:
begin
bstate <= BIDLE;
StoreAck1 <= `FALSE;
isStore <= `FALSE;
end
begin
bstate <= BIDLE;
StoreAck1 <= `FALSE;
isStore <= `FALSE;
check_abort_load();
end
B20:
if (~acki) begin
stb_o <= `HIGH;
8712,11 → 8821,13
we <= `HIGH;
dat_o <= fnDato(rmw_instr,rmw_res);
bstate <= B_StoreAck;
check_abort_load();
end
B21:
if (~acki) begin
stb_o <= `HIGH;
bstate <= B12;
bstate <= B_RMWAck;
check_abort_load();
end
default: bstate <= BIDLE;
endcase
9113,6 → 9224,17
*/
assign exc_o = iqentry_exc[heads[0]][7:0];
 
task check_abort_load;
begin
case(bwhich)
2'd0: if (iqentry_stomp[dram0_id[`QBITS]]) begin bstate <= BIDLE; dram0 <= `DRAMREQ_READY; end
2'd1: if (iqentry_stomp[dram1_id[`QBITS]]) begin bstate <= BIDLE; dram1 <= `DRAMREQ_READY; end
2'd2: if (iqentry_stomp[dram2_id[`QBITS]]) begin bstate <= BIDLE; dram2 <= `DRAMREQ_READY; end
default: if (iqentry_stomp[dram0_id[`QBITS]]) begin bstate <= BIDLE; dram0 <= `DRAMREQ_READY; end
endcase
end
endtask
 
// Update the write buffer.
task wb_update;
input [`QBITS] id;
9121,6 → 9243,7
input [1:0] ol;
input [`ABITS] addr;
input [63:0] data;
input [2:0] wbptr;
begin
if (wbm && wbptr > 1 && wb_addr[wbptr-1][AMSB:3]==addr[AMSB:3]
&& wb_ol[wbptr-1]==ol && wb_rmw[wbptr-1]==rmw && wb_v[wbptr-1]) begin
9153,7 → 9276,6
wb_sel[wbptr] <= sel;
wb_addr[wbptr] <= {addr[AMSB:3],3'b0};
wb_data[wbptr] <= data;
wbptr <= wbptr + 2'd1;
end
end
endtask
9548,11 → 9670,14
input [7:0] causecd;
begin
excmiss <= TRUE;
`ifdef SUPPORT_SMT
excmisspc <= {tvec[3'd0][AMSB:8],1'b0,ol[thread],5'h00};
excthrd <= iqentry_thrd[head];
badaddr[{thread,2'd0}] <= iqentry_ma[head];
bad_instr[{thread,2'd0}] <= iqentry_instr[head];
im_stack <= {im_stack[27:0],4'hF};
`ifdef SUPPORT_SMT
excthrd <= iqentry_thrd[head];
ol_stack[thread] <= {ol_stack[thread][13:0],2'b00};
dl_stack[thread] <= {dl_stack[thread][13:0],2'b00};
epc0[thread] <= iqentry_pc[head];
epc1[thread] <= epc0[thread];
epc2[thread] <= epc1[thread];
9562,9 → 9687,6
epc6[thread] <= epc5[thread];
epc7[thread] <= epc6[thread];
epc8[thread] <= epc7[thread];
im_stack[thread] <= {im_stack[thread][27:0],im};
ol_stack[thread] <= {ol_stack[thread][13:0],ol[thread]};
dl_stack[thread] <= {dl_stack[thread][13:0],dl[thread]};
pl_stack[thread] <= {pl_stack[thread][55:0],cpl[thread]};
rs_stack[thread] <= {rs_stack[thread][59:0],`EXC_RGS};
brs_stack[thread] <= {brs_stack[thread][59:0],`EXC_RGS};
9573,10 → 9695,9
mstatus[thread][13:6] <= 8'h00;
mstatus[thread][19:14] <= `EXC_RGS;
`else
excmisspc <= {tvec[3'd0][AMSB:8],1'b0,ol,5'h00};
excthrd <= 1'b0;
badaddr[{1'b0,2'd0}] <= iqentry_ma[head];
bad_instr[3'd0] <= iqentry_instr[head];
ol_stack <= {ol_stack[13:0],2'b00};
dl_stack <= {dl_stack[13:0],2'b00};
epc0 <= iqentry_pc[head];
epc1 <= epc0;
epc2 <= epc1;
9586,9 → 9707,6
epc6 <= epc5;
epc7 <= epc6;
epc8 <= epc7;
im_stack <= {im_stack[27:0],im};
ol_stack <= {ol_stack[13:0],ol};
dl_stack <= {dl_stack[13:0],dl};
pl_stack <= {pl_stack[55:0],cpl};
rs_stack <= {rs_stack[59:0],`EXC_RGS};
brs_stack <= {rs_stack[59:0],`EXC_RGS};
9626,7 → 9744,10
// hardware interrupt at a higher priority than the current priority.
if ((|iqentry_instr[head][25:21]) || iqentry_instr[head][20:17] > im) begin
excmiss <= TRUE;
im_stack <= {im_stack[27:0],4'hF};
`ifdef SUPPORT_SMT
ol_stack[thread] <= {ol_stack[thread][13:0],2'b00};
dl_stack[thread] <= {dl_stack[thread][13:0],2'b00};
excmisspc <= {tvec[3'd0][AMSB:8],1'b0,ol[thread],5'h00};
excthrd <= iqentry_thrd[head];
epc0[thread] <= iqentry_pc[head] + {iqentry_instr[head][25:21],1'b0};
9638,9 → 9759,6
epc6[thread] <= epc5[thread];
epc7[thread] <= epc6[thread];
epc8[thread] <= epc7[thread];
im_stack[thread] <= {im_stack[thread][27:0],im};
ol_stack[thread] <= {ol_stack[thread][13:0],ol[thread]};
dl_stack[thread] <= {dl_stack[thread][13:0],dl[thread]};
pl_stack[thread] <= {pl_stack[thread][55:0],cpl[thread]};
rs_stack[thread] <= {rs_stack[thread][59:0],`BRK_RGS};
brs_stack[thread] <= {brs_stack[thread][59:0],`BRK_RGS};
9664,6 → 9782,8
brs_stack[thread][5:0] <= `BRK_RGS;
end
`else
ol_stack <= {ol_stack[13:0],2'b00};
dl_stack <= {dl_stack[13:0],2'b00};
excmisspc <= {tvec[3'd0][AMSB:8],1'b0,ol,5'h00};
excthrd <= 1'b0;
epc0 <= iqentry_pc[head] + {iqentry_instr[head][25:21],1'b0};
9675,9 → 9795,6
epc6 <= epc5;
epc7 <= epc6;
epc8 <= epc7;
im_stack <= {im_stack[27:0],im};
ol_stack <= {ol_stack[13:0],ol};
dl_stack <= {dl_stack[13:0],dl};
pl_stack <= {pl_stack[55:0],cpl};
rs_stack <= {rs_stack[59:0],`BRK_RGS};
brs_stack <= {brs_stack[59:0],`BRK_RGS};
9798,6 → 9915,7
case(iqentry_instr[head][`INSTRUCTION_S2])
`CACHEX:
case(iqentry_instr[head][22:18])
5'h02: begin invicl <= TRUE; invlineAddr <= {ASID,iqentry_res[head]}; end
5'h03: invic <= TRUE;
5'h10: cr0[30] <= FALSE;
5'h11: cr0[30] <= TRUE;
9830,6 → 9948,7
`endif
`CACHE:
case(iqentry_instr[head][17:13])
5'h02: begin invicl <= TRUE; invlineAddr <= {ASID,iqentry_res[head]}; end
5'h03: invic <= TRUE;
5'h10: cr0[30] <= FALSE;
5'h11: cr0[30] <= TRUE;
9954,7 → 10073,7
`CSR_CAUSE: dat <= {48'd0,cause[{thread,csrno[11:10]}]};
`ifdef SUPPORT_SMT
`CSR_IM_STACK: dat <= im_stack[thread];
`CSR_OL_STACK: dat <= {dl_stack[thread],ol_stack[thread]};
`CSR_OL_STACK: dat <= {16'h0,dl_stack[thread],16'h0,ol_stack[thread]};
`CSR_PL_STACK: dat <= pl_stack[thread];
`CSR_RS_STACK: dat <= rs_stack[thread];
`CSR_STATUS: dat <= mstatus[thread][63:0];
9968,7 → 10087,7
`CSR_EPC7: dat <= epc7[thread];
`else
`CSR_IM_STACK: dat <= im_stack;
`CSR_OL_STACK: dat <= {dl_stack,ol_stack};
`CSR_ODL_STACK: dat <= {16'h0,dl_stack,16'h0,ol_stack};
`CSR_PL_STACK: dat <= pl_stack;
`CSR_RS_STACK: dat <= rs_stack;
`CSR_STATUS: dat <= mstatus[63:0];
10077,7 → 10196,7
`CSR_TVEC: tvec[csrno[2:0]] <= dat[31:0];
`ifdef SUPPORT_SMT
`CSR_IM_STACK: im_stack[thread] <= dat[31:0];
`CSR_OL_STACK: begin
`CSR_ODL_STACK: begin
ol_stack[thread] <= dat[15:0];
dl_stack[thread] <= dat[31:16];
end
10094,9 → 10213,9
`CSR_EPC7: epc7[thread] <= dat;
`else
`CSR_IM_STACK: im_stack <= dat[31:0];
`CSR_OL_STACK: begin
`CSR_ODL_STACK: begin
ol_stack <= dat[15:0];
dl_stack <= dat[31:16];
dl_stack <= dat[47:32];
end
`CSR_PL_STACK: pl_stack <= dat;
`CSR_RS_STACK: rs_stack <= dat;
/FT64v7/rtl/twoway/FT64_TLB.v
2,7 → 2,7
`include "FT64_config.vh"
//=============================================================================
// __
// \\__/ o\ (C) 2011-2018 Robert Finch, Waterloo
// \\__/ o\ (C) 2011-2019 Robert Finch, Waterloo
// \ __ / All rights reserved.
// \/_// robfinch<remove>@finitron.ca
// ||
27,12 → 27,13
// TLB
// The TLB contains 256 entries, that are 16 way set associative.
// The TLB is shared between the instruction and data streams.
// The code is carefully constructed to not require reset signals.
//
//=============================================================================
//
`define TLBMissPage {DBW-13{1'b1}}
 
module FT64_TLB(rst, clk, ld, done, idle, ol,
module FT64_TLB(clk, ld, done, idle, ol,
ASID, op, regno, dati, dato,
uncached,
icl_i, cyc_i, we_i, vadr_i, cyc_o, we_o, padr_o,
50,7 → 51,6
parameter INC3 = 4'd4;
parameter AGE1 = 4'd5;
parameter AGE2 = 4'd6;
input rst;
input clk;
input ld;
output done;
57,7 → 57,7
output idle;
input [1:0] ol; // operating level
input [ABW-1:0] vadr_i;
output reg [ABW-1:0] padr_o;
output reg [ABW-1:0] padr_o = 64'hFFFFFFFFFFFC0100;
output uncached;
 
input icl_i;
78,7 → 78,7
 
integer n;
 
reg [3:0] state;
reg [1:0] state = IDLE;
assign done = state==(IDLE && !ld) || state==TWO;
assign idle = state==IDLE && !ld;
 
97,11 → 97,11
reg HTLBValid;
reg [ABW-1:0] miss_addr;
 
reg TLBenabled;
reg [7:0] i;
reg TLBenabled = 1'b0;
reg [7:0] i = 8'h00;
reg [DBW-1:0] Index;
reg [3:0] Random;
reg [3:0] Wired;
reg [3:0] Random = 4'hF;
reg [3:0] Wired = 4'd0;
reg [2:0] PageSize;
reg [15:0] Match;
 
120,22 → 120,22
reg [ENTRIES-1:0] TLBValid;
reg [DBW-1:0] imiss_addr;
reg [DBW-1:0] dmiss_addr;
reg [DBW-1:0] PageTblAddr;
reg [DBW-1:0] PageTblCtrl;
reg [DBW-1:0] PageTblAddr = {DBW{1'b0}};
reg [DBW-1:0] PageTblCtrl = {DBW{1'b0}};
 
reg [23:0] age_lmt;
reg [23:0] age_ctr;
reg [23:0] age_lmt = 24'd20000;
reg [23:0] age_ctr = 24'd0;
wire age_tick = age_ctr < 24'd5;
reg cyc_en, age_en;
reg [3:0] ar_state;
reg ar_wr;
reg [7:0] age_adr, ar_adr;
reg cyc_en = 1'b1, age_en = 1'b1;
reg [3:0] ar_state = IDLE;
reg ar_wr = 1'b0;
reg [7:0] age_adr = 8'h00, ar_adr = 8'h00;
reg [32:0] count;
reg [31:0] ar_dati;
wire [31:0] ar_dato;
reg [31:0] ar_cdato;
reg getset_age;
reg doLoad;
reg doLoad = 1'b0;
 
/*
initial begin
225,11 → 225,9
);
 
always @(posedge clk)
if (rst) begin
age_ctr <= 24'd0;
end
else begin
if (age_ctr==24'd0)
begin
// age_ctr > age_lmt when counter hits -1, saves comparing to zero as well
if (age_ctr > age_lmt)
age_ctr <= age_lmt;
else
age_ctr <= age_ctr - 4'd1;
237,10 → 235,7
 
// Handle Random register
always @(posedge clk)
if (rst) begin
Random <= 4'hF;
end
else begin
begin
if (Random==Wired)
Random <= 4'hF;
else
254,10 → 249,7
end
 
always @(posedge clk)
if (rst) begin
state <= IDLE;
end
else begin
begin
case(state)
IDLE:
if (ld)
278,29 → 270,17
 
// Set index to page table
always @(posedge clk)
if (rst) begin
i <= 8'd0;
if (state==ONE) begin
case(op)
`TLB_RD,`TLB_WI:
i <= {Index[7:4],(HTLBVirtPage >> {HTLBPageSize,1'b0}) & 4'hF};
`TLB_WR:
i <= {Random,(HTLBVirtPage >> {HTLBPageSize,1'b0}) & 4'hF};
endcase
end
else begin
if (state==ONE) begin
case(op)
`TLB_RD,`TLB_WI:
i <= {Index[7:4],(HTLBVirtPage >> {HTLBPageSize,1'b0}) & 4'hF};
`TLB_WR:
i <= {Random,(HTLBVirtPage >> {HTLBPageSize,1'b0}) & 4'hF};
endcase
end
end
 
always @(posedge clk)
if (rst) begin
TLBenabled <= 1'b0;
Wired <= 4'd0;
PageTblAddr <= {DBW{1'b0}};
PageTblCtrl <= {DBW{1'b0}};
age_lmt <= 24'd20000;
end
else begin
begin
if (miss_addr == {DBW{1'b0}} && TLBMiss)
miss_addr <= vadr_i;
 
427,16 → 407,7
TLBAgeRam uar1(clk,ar_wr,ar_adr,ar_dati,ar_dato);
 
always @(posedge clk)
if (rst) begin
age_adr <= 4'd0;
ar_wr <= 1'b0;
ar_adr <= 4'd0;
ar_state <= IDLE;
cyc_en <= 1'b1;
age_en <= 1'b1;
doLoad <= 1'b0;
end
else begin
begin
ar_wr <= 1'b0;
getset_age <= 1'b0;
if (ld)
518,28 → 489,25
 
assign uncached = TLBC[{q[3:0],vadrs[3:0]}]==3'd1;// || unmappedDataArea;
 
assign TLBMiss = TLBenabled & (!unmappedArea & (q[4] | ~TLBValid[{q[3:0],vadrs[3:0]}]) ||
assign TLBMiss = (ol!=2'b00) && TLBenabled && (!unmappedArea & (q[4] | ~TLBValid[{q[3:0],vadrs[3:0]}]) ||
(ol!=2'b00 && hitIOPage));
 
always @(posedge clk)
cyc_o <= cyc_i & (~TLBMiss | ~TLBenabled);
cyc_o <= cyc_i && (!TLBMiss || !TLBenabled || (ol == 2'b00));
 
always @(posedge clk)
we_o <= we_i & ((~TLBMiss & tlbWo1) | ~TLBenabled);
we_o <= we_i & ((~TLBMiss & tlbWo1) | ~TLBenabled || (ol==2'b00));
 
always @(posedge clk)
wrv_o <= we_i & ~TLBMiss & ~tlbWo1 & TLBenabled;
wrv_o <= we_i & ~TLBMiss & ~tlbWo1 & TLBenabled && (ol != 2'b00);
 
always @(posedge clk)
rdv_o <= ~we_i & ~TLBMiss & ~tlbRo1 & TLBenabled;
rdv_o <= ~we_i & ~TLBMiss & ~tlbRo1 & TLBenabled && (ol != 2'b00);
 
always @(posedge clk)
exv_o <= icl_i & ~TLBMiss & ~tlbXo1 & TLBenabled;
exv_o <= icl_i & ~TLBMiss & ~tlbXo1 & TLBenabled && (ol != 2'b00);
 
always @(posedge clk)
if (rst)
padr_o <= 32'hFFFC0100;
else begin
if (TLBenabled && ol != 2'b00) begin
case(PageSize)
3'd0: padr_o[ABW-1:13] <= unmappedArea ? vadr_i[ABW-1:13] : TLBMiss ? `TLBMissPage: PFN;
554,7 → 522,6
end
else
padr_o <= vadr_i;
end
 
endmodule
 
/FT64v7/rtl/twoway/FT64_fetchbuf_x1.v
114,6 → 114,9
 
reg [55:0] cinsn0;
 
wire iclk = clk;
//BUFH ucb1 (.I(clk), .O(iclk));
 
//`include "FT64_decode.vh"
 
function IsBranch;
336,7 → 339,7
 
reg did_branch;
 
always @(posedge clk)
always @(posedge iclk)
if (rst) begin
pc0 <= RSTPC;
fetchbufA_v <= 1'b0;
532,9 → 535,9
fetchbufA_v <= `VAL;
fetchbufA_pc <= pc0;
if (phit && ~freezePC)
pc0 <= pc0 + insln0;
else
pc0 <= pc0;
pc0[31:0] <= pc0[31:0] + insln0;
// else
// pc0 <= pc0;
end
endtask
 
545,9 → 548,9
fetchbufB_v <= `VAL;
fetchbufB_pc <= pc0;
if (phit && ~freezePC)
pc0 <= pc0 + insln0;
else
pc0 <= pc0;
pc0[31:0] <= pc0[31:0] + insln0;
// else
// pc0 <= pc0;
end
endtask
 
/FT64v7/software/AS64/bin/AS64.exe Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream
/FT64v7/software/CC64/bin/CC64.exe Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.