URL
https://opencores.org/ocsvn/thor/thor/trunk
Subversion Repositories thor
[/] [thor/] [trunk/] [FT64v7/] [rtl/] [twoway/] [FT64.v] - Rev 66
Compare with Previous | Blame | View Log
// ============================================================================ // __ // \\__/ o\ (C) 2017-2019 Robert Finch, Waterloo // \ __ / All rights reserved. // \/_// robfinch<remove>@finitron.ca // || // // FT64.v // Features include: // - 16/32/48 bit instructions // - vector instruction set, // - SIMD instructions // - data width of 64 bits // - 32 general purpose registers // - 32 floating point registers // - 32 vector registers, length 63 // - powerful branch prediction // - branch target buffer (BTB) // - return address predictor (RSB) // - bus interface unit // - instruction and data caches // - fine-grained simultaneous multi-threading (SMT) // - bus randomizer on exceptional conditions // // This source file is free software: you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License as published // by the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // This source file is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program. If not, see <http://www.gnu.org/licenses/>. // // Approx 41,000 LUTs. 66,000 LC's. // ============================================================================ // `include "FT64_config.vh" `include "FT64_defines.vh" module FT64(hartid, rst, clk_i, clk4x, tm_clk_i, irq_i, vec_i, bte_o, cti_o, bok_i, cyc_o, stb_o, ack_i, err_i, we_o, sel_o, adr_o, dat_o, dat_i, ol_o, pcr_o, pcr2_o, pkeys_o, icl_o, sr_o, cr_o, rbi_i, signal_i, exc_o); input [63:0] hartid; input rst; input clk_i; input clk4x; input tm_clk_i; input [3:0] irq_i; input [7:0] vec_i; output reg [1:0] bte_o; output reg [2:0] cti_o; input bok_i; output cyc_o; output reg stb_o; input ack_i; input err_i; output we_o; output reg [7:0] sel_o; output [`ABITS] adr_o; output reg [63:0] dat_o; input [63:0] dat_i; output reg [1:0] ol_o; output [31:0] pcr_o; output [63:0] pcr2_o; output [63:0] pkeys_o; output icl_o; output reg cr_o; output reg sr_o; input rbi_i; input [31:0] signal_i; (* mark_debug="true" *) output [7:0] exc_o; parameter TM_CLKFREQ = 20000000; parameter QENTRIES = `QENTRIES; parameter RSTPC = 64'hFFFFFFFFFFFC0100; parameter BRKPC = 64'hFFFFFFFFFFFC0000; `ifdef SUPPORT_SMT parameter PREGS = 256; // number of physical registers - 1 parameter AREGS = 256; // number of architectural registers `else parameter PREGS = 128; parameter AREGS = 128; `endif parameter RBIT = 11; parameter DEBUG = 1'b0; parameter NMAP = QENTRIES; parameter BRANCH_PRED = 1'b0; parameter SUP_TXE = 1'b0; `ifdef SUPPORT_VECTOR parameter SUP_VECTOR = 1'b1; `else parameter SUP_VECTOR = 1'b0; `endif parameter DBW = 64; parameter ABW = 64; parameter AMSB = ABW-1; parameter NTHREAD = 1; reg [7:0] i; integer n; integer j, k; genvar g, h; parameter TRUE = 1'b1; parameter FALSE = 1'b0; // Memory access sizes parameter byt = 3'd0; parameter wyde = 3'd1; parameter tetra = 3'd2; parameter octa = 3'd3; parameter hexi = 3'd4; // IQ states parameter IQS_INVALID = 3'd0; parameter IQS_QUEUED = 3'd1; parameter IQS_OUT = 3'd2; parameter IQS_AGEN = 3'd3; parameter IQS_MEM = 3'd4; parameter IQS_DONE = 3'd5; parameter IQS_CMT = 3'd6; `include "..\common\FT64_busStates.vh" wire clk; //BUFG uclkb1 //( // .I(clk_i), // .O(clk) //); assign clk = clk_i; wire exv_i; wire rdv_i; wire wrv_i; reg [ABW-1:0] vadr; reg cyc; reg we; wire dc_ack; wire acki = ack_i|dc_ack; wire tlb_miss; wire [RBIT:0] Ra0, Ra1, Ra2; wire [RBIT:0] Rb0, Rb1, Rb2; wire [RBIT:0] Rc0, Rc1, Rc2; wire [RBIT:0] Rt0, Rt1, Rt2; wire [63:0] rfoa0,rfob0,rfoc0,rfoc0a,rfot0; wire [63:0] rfoa1,rfob1,rfoc1,rfoc1a,rfot1; wire [63:0] rfoa2,rfob2,rfoc2,rfoc2a,rfot2; `ifdef SUPPORT_SMT wire [7:0] Ra0s = {Ra0[7:0]}; wire [7:0] Ra1s = {Ra1[7:0]}; wire [7:0] Ra2s = {Ra2[7:0]}; wire [7:0] Rb0s = {Rb0[7:0]}; wire [7:0] Rb1s = {Rb1[7:0]}; wire [7:0] Rb2s = {Rb2[7:0]}; wire [7:0] Rc0s = {Rc0[7:0]}; wire [7:0] Rc1s = {Rc1[7:0]}; wire [7:0] Rc2s = {Rc2[7:0]}; wire [7:0] Rt0s = {Rt0[7:0]}; wire [7:0] Rt1s = {Rt1[7:0]}; wire [7:0] Rt2s = {Rt2[7:0]}; `else wire [6:0] Ra0s = {Ra0[7],Ra0[5:0]}; wire [6:0] Ra1s = {Ra1[7],Ra1[5:0]}; wire [6:0] Ra2s = {Ra2[7],Ra2[5:0]}; wire [6:0] Rb0s = {Rb0[7],Rb0[5:0]}; wire [6:0] Rb1s = {Rb1[7],Rb1[5:0]}; wire [6:0] Rb2s = {Rb2[7],Rb2[5:0]}; wire [6:0] Rc0s = {Rc0[7],Rc0[5:0]}; wire [6:0] Rc1s = {Rc1[7],Rc1[5:0]}; wire [6:0] Rc2s = {Rc2[7],Rc2[5:0]}; wire [6:0] Rt0s = {Rt0[7],Rt0[5:0]}; wire [6:0] Rt1s = {Rt1[7],Rt1[5:0]}; wire [6:0] Rt2s = {Rt2[7],Rt2[5:0]}; /* wire [5:0] Ra0s = {Ra0[5:0]}; wire [5:0] Ra1s = {Ra1[5:0]}; wire [5:0] Rb0s = {Rb0[5:0]}; wire [5:0] Rb1s = {Rb1[5:0]}; wire [5:0] Rc0s = {Rc0[5:0]}; wire [5:0] Rc1s = {Rc1[5:0]}; wire [5:0] Rt0s = {Rt0[5:0]}; wire [5:0] Rt1s = {Rt1[5:0]}; */ `endif reg [63:0] wbrcd; wire [5:0] brgs; `ifdef SUPPORT_BBMS reg [15:0] thrd_handle [0:63]; reg [63:0] prg_base [0:63]; reg [63:0] prg_limit [0:63]; reg [63:0] en_barrier [0:63]; // environment bound reg [63:0] cl_barrier [0:63]; reg [63:0] cu_barrier [0:63]; reg [63:0] ro_barrier [0:63]; reg [63:0] dl_barrier [0:63]; reg [63:0] du_barrier [0:63]; reg [63:0] sl_barrier [0:63]; reg [63:0] su_barrier [0:63]; reg [7:0] env_priv [0:63]; reg [7:0] cod_priv [0:63]; reg [7:0] rdo_priv [0:63]; reg [7:0] dat_priv [0:63]; reg [7:0] stk_priv [0:63]; reg [15:0] th; reg [63:0] pb; reg [63:0] cbl; reg [63:0] cbu; reg [63:0] ro; reg [63:0] dbl; reg [63:0] dbu; reg [63:0] sbl; reg [63:0] sbu; reg [63:0] en; reg [7:0] env_pl; reg [7:0] cod_pl; reg [7:0] rdo_pl; reg [7:0] dat_pl; reg [7:0] stk_pl; initial begin for (n = 0; n < 64; n = n + 1) begin thrd_handle[n] <= 1'd0; prg_base[n] <= 1'd0; cl_barrier[n] <= 1'd0; cu_barrier[n] <= 64'hFFFFFFFFFFFFFFFF; ro_barrier[n] <= 1'd0; dl_barrier[n] <= 1'd0; du_barrier[n] <= 64'hFFFFFFFFFFFFFFFF; sl_barrier[n] <= 1'd0; su_barrier[n] <= 64'hFFFFFFFFFFFFFFFF; env_priv[n] <= 8'h00; cod_priv[n] <= 8'h00; rdo_priv[n] <= 8'h00; dat_priv[n] <= 8'h00; stk_priv[n] <= 8'h00; end end always @(posedge clk_i) begin th <= thrd_handle[brgs]; pb <= prg_base[brgs]; cbl <= cl_barrier[brgs]; cbu <= cu_barrier[brgs]; ro <= ro_barrier[brgs]; dbl <= dl_barrier[brgs]; dbu <= du_barrier[brgs]; sbl <= sl_barrier[brgs]; sbu <= su_barrier[brgs]; en <= en_barrier[brgs]; env_pl <= env_priv[brgs]; cod_pl <= cod_priv[brgs]; rdo_pl <= rdo_priv[brgs]; dat_pl <= dat_priv[brgs]; stk_pl <= stk_priv[brgs]; end //wire [23:0] currentPrgSelector = prg_selector[brgs]; `else wire [63:0] pb = 1'd0; wire [63:0] cbl = 1'd0; wire [63:0] cbu = 64'hFFFFFFFFFFFFFFFF; wire [63:0] ro = 1'd0; wire [63:0] dbl = 1'd0; wire [63:0] dbu = 64'hFFFFFFFFFFFFFFFF; wire [63:0] sbl = 1'd0; wire [63:0] sbu = 64'hFFFFFFFFFFFFFFFF; wire [63:0] en = 1'd0; wire [7:0] env_pl = 8'h00; wire [7:0] cod_pl = 8'h00; wire [7:0] rdo_pl = 8'h00; wire [7:0] dat_pl = 8'h00; wire [7:0] stk_pl = 8'h00; `endif reg [PREGS-1:0] rf_v; reg [`QBITSP1] rf_source[0:AREGS-1]; reg [15:0] prf_v; reg [`QBITSP1] prf_source[0:15]; initial begin for (n = 0; n < AREGS; n = n + 1) rf_source[n] = 1'b0; for (n = 0; n < 16; n = n + 1) prf_source[n] <= 1'b0; end `ifdef SUPPORT_SMT wire [1:0] ol [0:NTHREAD]; wire [1:0] dl [0:NTHREAD]; `else wire [1:0] ol; wire [1:0] dl; `endif wire [`ABITS] pc0a; wire [`ABITS] pc1a; wire [`ABITS] pc2a; `ifdef SUPPORT_BBMS wire [`ABITS] pc0 = (pc0a[47:40]==8'hFF||ol[0]==2'b00) ? pc0a : {pb[50:0],13'd0} + pc0a[47:0]; wire [`ABITS] pc1 = (pc1a[47:40]==8'hFF||ol[1]==2'b00) ? pc1a : {pb[50:0],13'd0} + pc1a[47:0]; wire [`ABITS] pc2 = (pc2a[47:40]==8'hFF||ol[2]==2'b00) ? pc2a : {pb[50:0],13'd0} + pc2a[47:0]; `else wire [`ABITS] pc0 = pc0a; wire [`ABITS] pc1 = pc1a; wire [`ABITS] pc2 = pc2a; `endif reg excmiss; reg [`ABITS] excmisspc; reg excthrd; reg exception_set; reg rdvq; // accumulated read violation reg errq; // accumulated err_i input status reg exvq; // Vector reg [5:0] vqe0, vqe1, vqe2; // vector element being queued reg [5:0] vqet0, vqet1, vqet2; reg [7:0] vl; // vector length reg [63:0] vm [0:7]; // vector mask registers reg [1:0] m2; reg [31:0] wb_merges; // CSR's reg [63:0] cr0; wire snr = cr0[17]; // sequence number reset wire dce = cr0[30]; // data cache enable wire bpe = cr0[32]; // branch predictor enable wire wbm = cr0[34]; wire sple = cr0[35]; // speculative load enable wire ctgtxe = cr0[33]; wire pred_on = 1'b0; reg [63:0] pmr; wire id1_available = pmr[0]; wire id2_available = pmr[1]; wire id3_available = pmr[2]; wire alu0_available = pmr[8]; wire alu1_available = pmr[9]; wire fpu1_available = pmr[16]; wire fpu2_available = pmr[17]; wire mem1_available = pmr[24]; wire mem2_available = pmr[25]; wire mem3_available = pmr[26]; wire fcu_available = pmr[32]; // Simply setting this flag to zero should strip out almost all the logic // associated SMT. `ifdef SUPPORT_SMT wire thread_en = cr0[16]; `else wire thread_en = 1'b0; `endif wire vechain = cr0[18]; // Performance CSR's reg [39:0] iq_ctr; reg [39:0] irq_ctr; // count of number of interrupts reg [39:0] bm_ctr; // branch miss counter reg [39:0] br_ctr; // branch counter reg [39:0] icl_ctr; // instruction cache load counter reg [7:0] fcu_timeout; reg [63:0] tick; reg [63:0] wc_time; reg [31:0] pcr; reg [63:0] pcr2; assign pcr_o = pcr; assign pcr2_o = pcr2; reg [63:0] aec; (* mark_debug = "true" *) reg [15:0] cause[0:15]; `ifdef SUPPORT_SMT reg [31:0] im_stack [0:NTHREAD]; wire [3:0] im = im_stack[0][3:0]; reg [15:0] ol_stack [0:NTHREAD]; reg [15:0] dl_stack [0:NTHREAD]; assign ol[0] = ol_stack[0][1:0]; assign ol[1] = ol_stack[1][1:0]; assign ol[2] = ol_stack[2][1:0]; assign dl[0] = dl_stack[0][1:0]; assign dl[1] = dl_stack[1][1:0]; assign dl[2] = dl_stack[2][1:0]; reg [`ABITS] epc [0:NTHREAD]; reg [`ABITS] epc0 [0:NTHREAD]; reg [`ABITS] epc1 [0:NTHREAD]; reg [`ABITS] epc2 [0:NTHREAD]; reg [`ABITS] epc3 [0:NTHREAD]; reg [`ABITS] epc4 [0:NTHREAD]; reg [`ABITS] epc5 [0:NTHREAD]; reg [`ABITS] epc6 [0:NTHREAD]; reg [`ABITS] epc7 [0:NTHREAD]; reg [`ABITS] epc8 [0:NTHREAD]; // exception pc and stack reg [63:0] mstatus [0:NTHREAD]; // machine status assign ol[0] = mstatus[0][5:4]; // operating level assign dl[0] = mstatus[0][21:20]; wire [7:0] cpl [0:NTHREAD]; assign cpl[0] = mstatus[0][13:6]; // current privilege level wire [5:0] rgs [0:NTHREAD]; assign ol[1] = mstatus[1][5:4]; // operating level assign cpl[1] = mstatus[1][13:6]; // current privilege level assign dl[1] = mstatus[1][21:20]; wire [7:0] ASID = mstatus[0][47:40]; reg [63:0] pl_stack [0:NTHREAD]; reg [63:0] rs_stack [0:NTHREAD]; reg [63:0] brs_stack [0:NTHREAD]; reg [63:0] fr_stack [0:NTHREAD]; assign rgs[0] = rs_stack[0][5:0]; assign rgs[1] = rs_stack[1][5:0]; wire mprv = mstatus[0][55]; wire [5:0] fprgs = mstatus[0][25:20]; //assign ol_o = mprv ? ol_stack[0][2:0] : ol[0]; wire vca = mstatus[0][32]; // vector chaining active `else reg [31:0] im_stack = 32'hFFFFFFFF; wire [3:0] im = im_stack[3:0]; reg [`ABITS] epc ; reg [`ABITS] epc0 ; reg [`ABITS] epc1 ; reg [`ABITS] epc2 ; reg [`ABITS] epc3 ; reg [`ABITS] epc4 ; reg [`ABITS] epc5 ; reg [`ABITS] epc6 ; reg [`ABITS] epc7 ; reg [`ABITS] epc8 ; // exception pc and stack reg [63:0] mstatus ; // machine status reg [15:0] ol_stack; reg [15:0] dl_stack; assign ol = ol_stack[1:0]; // operating level assign dl = dl_stack[1:0]; wire [7:0] cpl ; assign cpl = mstatus[13:6]; // current privilege level wire [5:0] rgs ; reg [63:0] pl_stack ; reg [63:0] rs_stack ; reg [63:0] brs_stack ; reg [63:0] fr_stack ; assign rgs = rs_stack[5:0]; assign brgs = brs_stack[5:0]; wire mprv = mstatus[55]; wire [7:0] ASID = mstatus[47:40]; wire [5:0] fprgs = mstatus[25:20]; //assign ol_o = mprv ? ol_stack[2:0] : ol; wire vca = mstatus[32]; // vector chaining active `endif reg [63:0] keys; assign pkeys_o = keys; reg [63:0] tcb; reg [47:0] bad_instr[0:15]; reg [`ABITS] badaddr[0:15]; reg [`ABITS] tvec[0:7]; reg [63:0] sema; reg [63:0] vm_sema; reg [63:0] cas; // compare and swap reg [63:0] ve_hold; reg isCAS, isAMO, isInc, isSpt, isRMW; reg [`QBITS] casid; reg [4:0] regLR = 5'd29; reg [2:0] fp_rm; reg fp_inexe; reg fp_dbzxe; reg fp_underxe; reg fp_overxe; reg fp_invopxe; reg fp_giopxe; reg fp_nsfp = 1'b0; reg fp_fractie; reg fp_raz; reg fp_neg; reg fp_pos; reg fp_zero; reg fp_inf; reg fp_inex; // inexact exception reg fp_dbzx; // divide by zero exception reg fp_underx; // underflow exception reg fp_overx; // overflow exception reg fp_giopx; // global invalid operation exception reg fp_sx; // summary exception reg fp_swtx; // software triggered exception reg fp_gx; reg fp_invopx; reg fp_infzerox; reg fp_zerozerox; reg fp_subinfx; reg fp_infdivx; reg fp_NaNCmpx; reg fp_cvtx; reg fp_sqrtx; reg fp_snanx; wire [31:0] fp_status = { fp_rm, fp_inexe, fp_dbzxe, fp_underxe, fp_overxe, fp_invopxe, fp_nsfp, fp_fractie, fp_raz, 1'b0, fp_neg, fp_pos, fp_zero, fp_inf, fp_swtx, fp_inex, fp_dbzx, fp_underx, fp_overx, fp_giopx, fp_gx, fp_sx, fp_cvtx, fp_sqrtx, fp_NaNCmpx, fp_infzerox, fp_zerozerox, fp_infdivx, fp_subinfx, fp_snanx }; reg [63:0] fpu_csr; wire [5:0] fp_rgs = fpu_csr[37:32]; //reg [25:0] m[0:8191]; reg [3:0] panic; // indexes the message structure reg [128:0] message [0:15]; // indexed by panic wire int_commit; reg StatusHWI; (* mark_debug = "true" *) reg [55:0] insn0, insn1, insn2; wire [55:0] insn0a, insn1b, insn2b; reg [55:0] insn1a, insn2a; // Only need enough bits in the seqnence number to cover the instructions in // the queue plus an extra count for skipping on branch misses. In this case // that would be four bits minimum (count 0 to 8). wire [63:0] rdat0,rdat1,rdat2; reg [127:0] xdati; reg canq1, canq2, canq3; (* mark_debug = "true" *) reg queued1; reg queued2; reg queued3; (* mark_debug = "true" *) reg queuedNop; reg [47:0] codebuf[0:63]; reg [QENTRIES-1:0] setpred; // instruction queue (ROB) // State and stqte decodes reg [2:0] iqentry_state [0:QENTRIES-1]; reg [QENTRIES-1:0] iqentry_v; // entry valid? -- this should be the first bit reg [QENTRIES-1:0] iqentry_done; reg [QENTRIES-1:0] iqentry_out; reg [QENTRIES-1:0] iqentry_agen; reg [`SNBITS] iqentry_sn [0:QENTRIES-1]; // instruction sequence number reg [QENTRIES-1:0] iqentry_iv; // instruction is valid reg [`QBITSP1] iqentry_is [0:QENTRIES-1]; // source of instruction reg [QENTRIES-1:0] iqentry_thrd; // which thread the instruction is in reg [QENTRIES-1:0] iqentry_pt; // predict taken reg [QENTRIES-1:0] iqentry_bt; // update branch target buffer reg [QENTRIES-1:0] iqentry_takb; // take branch record reg [QENTRIES-1:0] iqentry_jal; reg [2:0] iqentry_sz [0:QENTRIES-1]; reg [QENTRIES-1:0] iqentry_alu = 8'h00; // alu type instruction reg [QENTRIES-1:0] iqentry_alu0; // only valid on alu #0 reg [QENTRIES-1:0] iqentry_fpu; // floating point instruction reg [QENTRIES-1:0] iqentry_fc; // flow control instruction reg [QENTRIES-1:0] iqentry_canex = 8'h00; // true if it's an instruction that can exception reg [QENTRIES-1:0] iqentry_oddball = 8'h00; // writes to register file reg [QENTRIES-1:0] iqentry_load; // is a memory load instruction reg [QENTRIES-1:0] iqentry_loadv; // is a volatile memory load instruction reg [QENTRIES-1:0] iqentry_loadseg; reg [QENTRIES-1:0] iqentry_store; // is a memory store instruction reg [QENTRIES-1:0] iqentry_preload; // is a memory preload instruction reg [QENTRIES-1:0] iqentry_ldcmp; reg [QENTRIES-1:0] iqentry_mem; // touches memory: 1 if LW/SW reg [QENTRIES-1:0] iqentry_memndx; // indexed memory operation reg [2:0] iqentry_memsz [0:QENTRIES-1]; // size of memory op reg [QENTRIES-1:0] iqentry_rmw; // memory RMW op reg [QENTRIES-1:0] iqentry_push; reg [QENTRIES-1:0] iqentry_memdb; reg [QENTRIES-1:0] iqentry_memsb; reg [QENTRIES-1:0] iqentry_rtop; reg [QENTRIES-1:0] iqentry_sei; reg [QENTRIES-1:0] iqentry_aq; // memory aquire reg [QENTRIES-1:0] iqentry_rl; // memory release reg [QENTRIES-1:0] iqentry_shft; reg [QENTRIES-1:0] iqentry_jmp; // changes control flow: 1 if BEQ/JALR reg [QENTRIES-1:0] iqentry_br; // Bcc (for predictor) reg [QENTRIES-1:0] iqentry_ret; reg [QENTRIES-1:0] iqentry_irq; reg [QENTRIES-1:0] iqentry_brk; reg [QENTRIES-1:0] iqentry_rti; reg [QENTRIES-1:0] iqentry_sync; // sync instruction reg [QENTRIES-1:0] iqentry_fsync; reg [QENTRIES-1:0] iqentry_tlb; reg [QENTRIES-1:0] iqentry_cmp; reg [QENTRIES-1:0] iqentry_rfw = 1'b0; // writes to register file reg [QENTRIES-1:0] iqentry_prfw = 1'b0; reg [7:0] iqentry_we [0:QENTRIES-1]; // enable strobe reg [63:0] iqentry_res [0:QENTRIES-1]; // instruction result reg [63:0] iqentry_seg_base [0:QENTRIES-1]; // reg [63:0] iqentry_seg_lb [0:QENTRIES-1]; // reg [63:0] iqentry_seg_ub [0:QENTRIES-1]; // reg [63:0] iqentry_seg_acr [0:QENTRIES-1]; // reg [63:0] iqentry_ares [0:QENTRIES-1]; // alternate instruction result reg [47:0] iqentry_instr[0:QENTRIES-1]; // instruction opcode reg [2:0] iqentry_insln[0:QENTRIES-1]; // instruction length reg [7:0] iqentry_exc [0:QENTRIES-1]; // only for branches ... indicates a HALT instruction reg [RBIT:0] iqentry_tgt[0:QENTRIES-1]; // Rt field or ZERO -- this is the instruction's target (if any) reg [7:0] iqentry_vl [0:QENTRIES-1]; reg [5:0] iqentry_ven [0:QENTRIES-1]; // vector element number reg [AMSB:0] iqentry_ma [0:QENTRIES-1]; // memory address reg [63:0] iqentry_a0 [0:QENTRIES-1]; // argument 0 (immediate) reg [63:0] iqentry_a1 [0:QENTRIES-1]; // argument 1 reg [QENTRIES-1:0] iqentry_a1_v; // arg1 valid reg [`QBITSP1] iqentry_a1_s [0:QENTRIES-1]; // arg1 source (iq entry # with top bit representing ALU/DRAM bus) reg [63:0] iqentry_a2 [0:QENTRIES-1]; // argument 2 reg iqentry_a2_v [0:QENTRIES-1]; // arg2 valid reg [`QBITSP1] iqentry_a2_s [0:QENTRIES-1]; // arg2 source (iq entry # with top bit representing ALU/DRAM bus) reg [63:0] iqentry_a3 [0:QENTRIES-1]; // argument 3 reg iqentry_a3_v [0:QENTRIES-1]; // arg3 valid reg [`QBITSP1] iqentry_a3_s [0:QENTRIES-1]; // arg3 source (iq entry # with top bit representing ALU/DRAM bus) reg [`ABITS] iqentry_pc [0:QENTRIES-1]; // program counter for this instruction reg [RBIT:0] iqentry_Ra [0:QENTRIES-1]; reg [RBIT:0] iqentry_Rb [0:QENTRIES-1]; reg [RBIT:0] iqentry_Rc [0:QENTRIES-1]; // debugging //reg [4:0] iqentry_ra [0:7]; // Ra initial begin for (n = 0; n < QENTRIES; n = n + 1) iqentry_a1_s[n] <= 5'd0; iqentry_a2_s[n] <= 5'd0; iqentry_a3_s[n] <= 5'd0; end reg [QENTRIES-1:0] iqentry_source = {QENTRIES{1'b0}}; reg [QENTRIES-1:0] iqentry_imm; reg [QENTRIES-1:0] iqentry_memready; reg [QENTRIES-1:0] iqentry_memopsvalid; reg [QENTRIES-1:0] memissue = {QENTRIES{1'b0}}; reg [1:0] missued; reg [7:0] last_issue0, last_issue1, last_issue2; reg [QENTRIES-1:0] iqentry_memissue; reg [QENTRIES-1:0] iqentry_stomp; reg [3:0] stompedOnRets; reg [QENTRIES-1:0] iqentry_alu0_issue; reg [QENTRIES-1:0] iqentry_alu1_issue; reg [QENTRIES-1:0] iqentry_alu2_issue; reg [QENTRIES-1:0] iqentry_id1issue; reg [QENTRIES-1:0] iqentry_id2issue; reg [QENTRIES-1:0] iqentry_id3issue; reg [1:0] iqentry_mem_islot [0:QENTRIES-1]; reg [QENTRIES-1:0] iqentry_fcu_issue; reg [QENTRIES-1:0] iqentry_fpu1_issue; reg [QENTRIES-1:0] iqentry_fpu2_issue; reg [PREGS-1:1] livetarget; reg [PREGS-1:1] iqentry_livetarget [0:QENTRIES-1]; reg [PREGS-1:1] iqentry_latestID [0:QENTRIES-1]; reg [PREGS-1:1] iqentry_cumulative [0:QENTRIES-1]; wire [PREGS-1:1] iq_out [0:QENTRIES-1]; reg [`QBITS] tail0; reg [`QBITS] tail1; reg [`QBITS] tail2; reg [`QBITS] heads[0:QENTRIES-1]; // To detect a head change at time of commit. Some values need to pulsed // with a single pulse. reg [`QBITS] ohead[0:2]; reg ocommit0_v, ocommit1_v, ocommit2_v; reg [11:0] cmt_timer; wire take_branch0; wire take_branch1; reg [3:0] nop_fetchbuf; wire fetchbuf; // determines which pair to read from & write to wire [3:0] fb_panic; wire [47:0] fetchbuf0_instr; wire [2:0] fetchbuf0_insln; wire [`ABITS] fetchbuf0_pc; (* mark_debug = "true" *) wire fetchbuf0_v; wire fetchbuf0_thrd; wire fetchbuf0_mem; wire fetchbuf0_rfw; wire [47:0] fetchbuf1_instr; wire [2:0] fetchbuf1_insln; wire [`ABITS] fetchbuf1_pc; wire fetchbuf1_v; wire fetchbuf1_thrd; wire fetchbuf1_mem; wire fetchbuf1_rfw; wire [47:0] fetchbuf2_instr; wire [2:0] fetchbuf2_insln; wire [`ABITS] fetchbuf2_pc; wire fetchbuf2_v; wire fetchbuf2_thrd; wire fetchbuf2_mem; wire fetchbuf2_rfw; wire [47:0] fetchbufA_instr; wire [`ABITS] fetchbufA_pc; wire fetchbufA_v; wire [47:0] fetchbufB_instr; wire [`ABITS] fetchbufB_pc; wire fetchbufB_v; wire [47:0] fetchbufC_instr; wire [`ABITS] fetchbufC_pc; wire fetchbufC_v; wire [47:0] fetchbufD_instr; wire [`ABITS] fetchbufD_pc; wire fetchbufD_v; wire [47:0] fetchbufE_instr; wire [`ABITS] fetchbufE_pc; wire fetchbufE_v; wire [47:0] fetchbufF_instr; wire [`ABITS] fetchbufF_pc; wire fetchbufF_v; //reg did_branchback0; //reg did_branchback1; reg id1_v; reg [`QBITSP1] id1_id; reg [47:0] id1_instr; reg [5:0] id1_ven; reg [7:0] id1_vl; reg id1_thrd; reg id1_pt; reg [4:0] id1_Rt; wire [143:0] id1_bus; reg id2_v; reg [`QBITSP1] id2_id; reg [47:0] id2_instr; reg [5:0] id2_ven; reg [7:0] id2_vl; reg id2_thrd; reg id2_pt; reg [4:0] id2_Rt; wire [143:0] id2_bus; reg id3_v; reg [`QBITSP1] id3_id; reg [47:0] id3_instr; reg [5:0] id3_ven; reg [7:0] id3_vl; reg id3_thrd; reg id3_pt; reg [4:0] id3_Rt; wire [143:0] id3_bus; reg [63:0] alu0_xs = 64'd0; reg [63:0] alu1_xs = 64'd0; reg [3:0] alu0_pred; reg alu0_cmt; wire alu0_abort; reg alu0_ld; reg alu0_dataready; wire alu0_done; wire alu0_idle; reg [`QBITSP1] alu0_sourceid; reg [47:0] alu0_instr; reg alu0_tlb; reg alu0_mem; reg alu0_load; reg alu0_store; reg alu0_push; reg alu0_shft; reg [RBIT:0] alu0_Ra; reg [63:0] alu0_argA; reg [63:0] alu0_argB; reg [63:0] alu0_argC; reg [63:0] alu0_argT; reg [63:0] alu0_argI; // only used by BEQ reg [2:0] alu0_sz; reg [RBIT:0] alu0_tgt; reg [5:0] alu0_ven; reg alu0_thrd; reg [`ABITS] alu0_pc; reg [63:0] alu0_bus; wire [63:0] alu0b_bus; wire [63:0] alu0_out; wire [`QBITSP1] alu0_id; (* mark_debug="true" *) wire [`XBITS] alu0_exc; wire alu0_v; wire alu0_branchmiss; wire [`ABITS] alu0_misspc; reg [3:0] alu1_pred; reg alu1_cmt; wire alu1_abort; reg alu1_ld; reg alu1_dataready; wire alu1_done; wire alu1_idle; reg [`QBITSP1] alu1_sourceid; reg [47:0] alu1_instr; reg alu1_mem; reg alu1_load; reg alu1_store; reg alu1_push; reg alu1_shft; reg [RBIT:0] alu1_Ra; reg [63:0] alu1_argA; reg [63:0] alu1_argB; reg [63:0] alu1_argC; reg [63:0] alu1_argT; reg [63:0] alu1_argI; // only used by BEQ reg [2:0] alu1_sz; reg [RBIT:0] alu1_tgt; reg [5:0] alu1_ven; reg [`ABITS] alu1_pc; reg alu1_thrd; reg [63:0] alu1_bus; wire [63:0] alu1b_bus; wire [63:0] alu1_out; wire [`QBITSP1] alu1_id; wire [`XBITS] alu1_exc; wire alu1_v; wire alu1_branchmiss; wire [`ABITS] alu1_misspc; wire [`XBITS] fpu_exc; reg [3:0] fpu1_pred; reg fpu1_cmt; reg fpu1_ld; reg fpu1_dataready = 1'b1; wire fpu1_done = 1'b1; wire fpu1_idle; reg [`QBITSP1] fpu1_sourceid; reg [47:0] fpu1_instr; reg [63:0] fpu1_argA; reg [63:0] fpu1_argB; reg [63:0] fpu1_argC; reg [63:0] fpu1_argT; reg [63:0] fpu1_argI; // only used by BEQ reg [RBIT:0] fpu1_tgt; reg [`ABITS] fpu1_pc; wire [63:0] fpu1_out = 64'h0; reg [63:0] fpu1_bus = 64'h0; wire [`QBITSP1] fpu1_id; wire [`XBITS] fpu1_exc = 9'h000; wire fpu1_v; wire [31:0] fpu1_status; reg [3:0] fpu2_pred; reg fpu2_cmt; reg fpu2_ld; reg fpu2_dataready = 1'b1; wire fpu2_done = 1'b1; wire fpu2_idle; reg [`QBITSP1] fpu2_sourceid; reg [47:0] fpu2_instr; reg [63:0] fpu2_argA; reg [63:0] fpu2_argB; reg [63:0] fpu2_argC; reg [63:0] fpu2_argT; reg [63:0] fpu2_argI; // only used by BEQ reg [RBIT:0] fpu2_tgt; reg [`ABITS] fpu2_pc; wire [63:0] fpu2_out = 64'h0; reg [63:0] fpu2_bus = 64'h0; wire [`QBITSP1] fpu2_id; wire [`XBITS] fpu2_exc = 9'h000; wire fpu2_v; wire [31:0] fpu2_status; reg [7:0] fccnt; reg [47:0] waitctr; reg [3:0] fcu_pred; reg fcu_cmt; reg fcu_ld; reg fcu_dataready; reg fcu_done; reg fcu_idle = 1'b1; reg [`QBITSP1] fcu_sourceid; reg [47:0] fcu_instr; reg [47:0] fcu_prevInstr; reg [2:0] fcu_insln; reg fcu_pt; // predict taken reg fcu_branch; reg fcu_call; reg fcu_ret; reg fcu_jal; reg fcu_brk; reg fcu_rti; reg [63:0] fcu_argA; reg [63:0] fcu_argB; reg [63:0] fcu_argC; reg [63:0] fcu_argI; // only used by BEQ reg [63:0] fcu_argT; reg [63:0] fcu_argT2; reg [63:0] fcu_epc; reg [23:0] fcu_ecs; // excepted code segment reg [23:0] fcu_rs; // return selector reg [`ABITS] fcu_pc; reg [`ABITS] fcu_nextpc; reg [`ABITS] fcu_brdisp; wire [63:0] fcu_out; reg [63:0] fcu_bus; wire [`QBITSP1] fcu_id; reg [`XBITS] fcu_exc; wire fcu_v; reg fcu_thrd; reg fcu_branchmiss; reg fcu_clearbm; reg [`ABITS] fcu_misspc; reg [63:0] rmw_argA; reg [63:0] rmw_argB; reg [63:0] rmw_argC; wire [63:0] rmw_res; reg [47:0] rmw_instr; // write buffer reg [63:0] wb_data [0:`WB_DEPTH-1]; reg [`ABITS] wb_addr [0:`WB_DEPTH-1]; reg [1:0] wb_ol [0:`WB_DEPTH-1]; reg [`WB_DEPTH-1:0] wb_v; reg [`WB_DEPTH-1:0] wb_rmw; reg [QENTRIES-1:0] wb_id [0:`WB_DEPTH-1]; reg [QENTRIES-1:0] wbo_id; reg [7:0] wb_sel [0:`WB_DEPTH-1]; reg wb_en; reg wb_shift; reg branchmiss = 1'b0; reg branchmiss_thrd = 1'b0; reg [`ABITS] misspc; reg [`QBITS] missid; wire take_branch; wire take_branchA; wire take_branchB; wire take_branchC; wire take_branchD; wire dram_avail; reg [2:0] dram0; // state of the DRAM request (latency = 4; can have three in pipeline) reg [2:0] dram1; // state of the DRAM request (latency = 4; can have three in pipeline) reg [2:0] dram2; // state of the DRAM request (latency = 4; can have three in pipeline) reg [63:0] dram0_data; reg [`ABITS] dram0_addr; reg [47:0] dram0_instr; reg dram0_rmw; reg dram0_preload; reg [RBIT:0] dram0_tgt; reg [`QBITSP1] dram0_id; reg dram0_unc; reg [2:0] dram0_memsize; reg dram0_load; // is a load operation reg dram0_loadseg; reg dram0_store; reg [1:0] dram0_ol; reg [63:0] dram1_data; reg [`ABITS] dram1_addr; reg [47:0] dram1_instr; reg dram1_rmw; reg dram1_preload; reg [RBIT:0] dram1_tgt; reg [`QBITSP1] dram1_id; reg dram1_unc; reg [2:0] dram1_memsize; reg dram1_load; reg dram1_loadseg; reg dram1_store; reg [1:0] dram1_ol; reg [63:0] dram2_data; reg [`ABITS] dram2_addr; reg [47:0] dram2_instr; reg dram2_rmw; reg dram2_preload; reg [RBIT:0] dram2_tgt; reg [`QBITSP1] dram2_id; reg dram2_unc; reg [2:0] dram2_memsize; reg dram2_load; reg dram2_loadseg; reg dram2_store; reg [1:0] dram2_ol; reg dramA_v; reg [`QBITSP1] dramA_id; reg [63:0] dramA_bus; reg dramB_v; reg [`QBITSP1] dramB_id; reg [63:0] dramB_bus; reg dramC_v; reg [`QBITSP1] dramC_id; reg [63:0] dramC_bus; wire outstanding_stores; reg [63:0] I; // instruction count reg [63:0] CC; // commit count reg commit0_v; reg [`QBITSP1] commit0_id; reg [RBIT:0] commit0_tgt; reg [7:0] commit0_we = 8'h00; reg [63:0] commit0_bus; reg commit1_v; reg [`QBITSP1] commit1_id; reg [RBIT:0] commit1_tgt; reg [7:0] commit1_we = 8'h00; reg [63:0] commit1_bus; reg commit2_v; reg [`QBITSP1] commit2_id; reg [RBIT:0] commit2_tgt; reg [7:0] commit2_we = 8'h00; reg [63:0] commit2_bus; reg StoreAck1; reg [4:0] bstate = BIDLE; wire [3:0] icstate; parameter SEG_IDLE = 2'd0; parameter SEG_CHK = 2'd1; parameter SEG_UPD = 2'd2; parameter SEG_DONE = 2'd3; reg [1:0] bwhich; reg invic, invdc; reg invicl; wire [1:0] icwhich; wire icnxt; wire L2_nxt; wire ihit0,ihit1,ihit2,ihitL2; wire ihit = ihit0&ihit1&ihit2; reg phit; wire threadx; always @* phit <= (ihit&&icstate==IDLE) && !invicl; (* mark_debug="true" *) reg icack; wire L1_wr0,L1_wr1,L1_wr2; wire L1_invline; wire [1:0] ic0_fault,ic1_fault,ic2_fault; wire [9:0] L1_en; wire [71:0] L1_adr; wire [71:0] L2_adr; wire [305:0] L2_dato; wire selL2; wire icclk; BUFH ucb1 (.I(clk), .O(icclk)); generate begin : gRegfileInst if (`WAYS > 2) begin : gb1 FT64_regfile2w9r_oc #(.RBIT(RBIT)) urf1 ( .clk(clk), .clk4x(clk4x), .wr0(commit0_v), .wr1(commit1_v), .we0(commit0_we), .we1(commit1_we), .wa0(commit0_tgt), .wa1(commit1_tgt), .i0(commit0_bus), .i1(commit1_bus), .rclk(~clk), .ra0(Ra0), .ra1(Rb0), .ra2(Rc0), .o0(rfoa0), .o1(rfob0), .o2(rfoc0a), .ra3(Ra1), .ra4(Rb1), .ra5(Rc1), .o3(rfoa1), .o4(rfob1), .o5(rfoc1a), .ra6(Ra2), .ra7(Rb2), .ra8(Rc2), .o6(rfoa2), .o7(rfob2), .o8(rfoc2a) ); assign rfoc0 = Rc0[11:6]==6'h3F ? vm[Rc0[2:0]] : rfoc0a; assign rfoc1 = Rc1[11:6]==6'h3F ? vm[Rc1[2:0]] : rfoc1a; assign rfoc2 = Rc2[11:6]==6'h3F ? vm[Rc2[2:0]] : rfoc2a; end else if (`WAYS > 1) begin : gb1 FT64_regfile2w6r_oc #(.RBIT(RBIT)) urf1 ( .clk(clk), .clk4x(clk4x), .wr0(commit0_v), .wr1(commit1_v), .we0(commit0_we), .we1(commit1_we), .wa0(commit0_tgt), .wa1(commit1_tgt), .i0(commit0_bus), .i1(commit1_bus), .rclk(~clk), .ra0(Ra0), .ra1(Rb0), .ra2(Rc0), .o0(rfoa0), .o1(rfob0), .o2(rfoc0a), .ra3(Ra1), .ra4(Rb1), .ra5(Rc1), .o3(rfoa1), .o4(rfob1), .o5(rfoc1a) ); assign rfoc0 = Rc0[11:6]==6'h3F ? vm[Rc0[2:0]] : rfoc0a; assign rfoc1 = Rc1[11:6]==6'h3F ? vm[Rc1[2:0]] : rfoc1a; end else begin : gb1 FT64_regfile1w4r_oc #(.RBIT(RBIT)) urf1 ( .clk(clk), .wr0(commit0_v), .wa0(commit0_tgt), .we0(8'hFF), .i0(commit0_bus), .rclk(~clk), .ra0(Ra0), .ra1(Rb0), .ra2(Rc0), .ra3(Rt0), .o0(rfoa0), .o1(rfob0), .o2(rfoc0a), .o3(rfot0) ); end assign rfoc0 = Rc0[11:6]==6'h3F ? vm[Rc0[2:0]] : rfoc0a; end endgenerate function [3:0] fnInsLength; input [47:0] ins; `ifdef SUPPORT_DCI if (ins[`INSTRUCTION_OP]==`CMPRSSD) fnInsLength = 4'd2 | pred_on; else `endif case(ins[7:6]) 2'd0: fnInsLength = 4'd4 | pred_on; 2'd1: fnInsLength = 4'd6 | pred_on; default: fnInsLength = 4'd2 | pred_on; endcase endfunction generate begin : gInsnVar if (`WAYS > 1) begin always @* if (thread_en) insn1a <= insn1b; else insn1a <= {insn1b,insn0a} >> {fnInsLength(insn0a),3'b0}; end if (`WAYS > 2) begin always @* if (thread_en) insn2a <= insn2b; else insn2a <= {insn2b,insn1b,insn0a} >> {fnInsLength(insn0a) + fnInsLength(insn1a),3'b0}; end end endgenerate wire L1_selpc; wire [2:0] icti; wire [1:0] ibte; wire icyc; wire istb; wire [7:0] isel; wire [71:0] iadr; wire L2_ld; wire [305:0] L1_dat; wire [2:0] L2_cnt; reg [71:0] invlineAddr; FT64_ICController uL1ctrl ( .clk_i(clk), .asid(ASID), .pc0(pc0), .pc1(pc1), .pc2(pc2), .hit0(ihit0), .hit1(ihit1), .hit2(ihit2), .bstate(bstate), .state(icstate), .invline(invicl), .invlineAddr(invlineAddr), .thread_en(thread_en), .L1_selpc(L1_selpc), .L1_adr(L1_adr), .L1_dat(L1_dat), .L1_wr0(L1_wr0), .L1_wr1(L1_wr1), .L1_wr2(L1_wr2), .L1_en(L1_en), .L1_invline(L1_invline), .ihitL2(ihitL2), .selL2(selL2), .L2_ld(L2_ld), .L2_cnt(L2_cnt), .L2_adr(L2_adr), .L2_dato(L2_dato), .L2_nxt(L2_nxt), .icnxt(icnxt), .icwhich(icwhich), .icl_o(icl_o), .cti_o(icti), .bte_o(ibte), .bok_i(bok_i), .cyc_o(icyc), .stb_o(istb), .ack_i(acki), .err_i(err_i), .tlbmiss_i(tlb_miss), .exv_i(exv_i), .sel_o(isel), .adr_o(iadr), .dat_i(dat_i) ); FT64_L1_icache #(.pSize(`L1_ICACHE_SIZE)) uic0 ( .rst(rst), .clk(clk), .nxt(icnxt), .wr(L1_wr0), .wr_ack(), .en(L1_en), .adr(L1_selpc ? {ASID,pc0} : L1_adr), .wadr(L1_adr), .i(L1_dat), .o(insn0a), .fault(ic0_fault), .hit(ihit0), .invall(invic), .invline(L1_invline) ); generate begin : gICacheInst if (`WAYS > 1) begin FT64_L1_icache #(.pSize(`L1_ICACHE_SIZE)) uic1 ( .rst(rst), .clk(clk), .nxt(icnxt), .wr(L1_wr1), .wr_ack(), .en(L1_en), .adr(L1_selpc ? (thread_en ? {ASID,pc1}: {ASID,pc0plus6} ): L1_adr), .wadr(L1_adr), .i(L1_dat), .o(insn1b), .fault(ic1_fault), .hit(ihit1), .invall(invic), .invline(L1_invline) ); end else begin assign ihit1 = 1'b1; end if (`WAYS > 2) begin FT64_L1_icache #(.pSize(`L1_ICACHE_SIZE)) uic2 ( .rst(rst), .clk(clk), .nxt(icnxt), .wr(L1_wr2), .wr_ack(), .en(L1_en), .adr(L1_selpc ? (thread_en ? {ASID,pc2} : {ASID,pc0plus12}) : L1_adr), .wadr(L1_adr), .i(L1_dat), .o(insn2b), .fault(ic2_fault), .hit(ihit2), .invall(invic), .invline(L1_invline) ); end else assign ihit2 = 1'b1; end endgenerate FT64_L2_icache uic2 ( .rst(rst), .clk(clk), .nxt(L2_nxt), .wr(L2_ld), .adr(selL2 ? L2_adr: L1_adr), .cnt(L2_cnt), .exv_i(exvq), .i(dat_i), .err_i(errq), .o(L2_dato), .hit(ihitL2), .invall(invic), .invline() ); wire predict_taken; wire predict_taken0; wire predict_taken1; wire predict_taken2; wire predict_takenA; wire predict_takenB; wire predict_takenC; wire predict_takenD; wire predict_takenE; wire predict_takenF; wire predict_takenA1; wire predict_takenB1; wire predict_takenC1; wire predict_takenD1; wire [`ABITS] btgtA, btgtB, btgtC, btgtD, btgtE, btgtF; wire btbwr0 = iqentry_v[heads[0]] && iqentry_state[heads[0]]==IQS_CMT && (iqentry_fc[heads[0]]); generate begin: gbtbvar if (`WAYS > 1) begin wire btbwr1 = iqentry_v[heads[1]] && iqentry_state[heads[1]]==IQS_CMT && (iqentry_fc[heads[1]]); end if (`WAYS > 2) begin wire btbwr2 = iqentry_v[heads[2]] && iqentry_state[heads[2]]==IQS_CMT && (iqentry_fc[heads[2]]); end end endgenerate wire fcu_clk; `ifdef FCU_ENH //BUFGCE ufcuclk //( // .I(clk_i), // .CE(fcu_available), // .O(fcu_clk) //); `endif assign fcu_clk = clk_i; generate begin: gBTBInst if (`WAYS > 2) begin `ifdef FCU_ENH FT64_BTB #(.AMSB(AMSB)) ubtb1 ( .rst(rst), .wclk(fcu_clk), .wr0(btbwr0), .wadr0(iqentry_pc[heads[0]]), .wdat0(iqentry_ma[heads[0]]), .valid0((iqentry_br[heads[0]] ? iqentry_takb[heads[0]] : iqentry_bt[heads[0]]) & iqentry_v[heads[0]]), .wr1(btbwr1), .wadr1(iqentry_pc[heads[1]]), .wdat1(iqentry_ma[heads[1]]), .valid1((iqentry_br[heads[1]] ? iqentry_takb[heads[1]] : iqentry_bt[heads[1]]) & iqentry_v[heads[1]]), .wr2(btbwr2), .wadr2(iqentry_pc[heads[2]]), .wdat2(iqentry_ma[heads[2]]), .valid2((iqentry_br[heads[2]] ? iqentry_takb[heads[2]] : iqentry_bt[heads[2]]) & iqentry_v[heads[2]]), .rclk(~clk), .pcA(fetchbufA_pc), .btgtA(btgtA), .pcB(fetchbufB_pc), .btgtB(btgtB), .pcC(fetchbufC_pc), .btgtC(btgtC), .pcD(fetchbufD_pc), .btgtD(btgtD), .pcE(fetchbufE_pc), .btgtE(btgtE), .pcF(fetchbufF_pc), .btgtF(btgtF), .npcA(BRKPC), .npcB(BRKPC), .npcC(BRKPC), .npcD(BRKPC), .npcE(BRKPC), .npcF(BRKPC) ); `else // Branch tergets are picked up by fetchbuf logic and need to be present. // Without a target predictor they are just set to the reset address. // This virtually guarentees a miss. assign btgtA = RSTPC; assign btgtB = RSTPC; assign btgtC = RSTPC; assign btgtD = RSTPC; assign btgtE = RSTPC; assign btgtF = RSTPC; `endif end else if (`WAYS > 1) begin `ifdef FCU_ENH FT64_BTB #(.AMSB(AMSB)) ubtb1 ( .rst(rst), .wclk(fcu_clk), .wr0(btbwr0), .wadr0(iqentry_pc[heads[0]]), .wdat0(iqentry_ma[heads[0]]), .valid0((iqentry_br[heads[0]] ? iqentry_takb[heads[0]] : iqentry_bt[heads[0]]) & iqentry_v[heads[0]]), .wr1(btbwr1), .wadr1(iqentry_pc[heads[1]]), .wdat1(iqentry_ma[heads[1]]), .valid1((iqentry_br[heads[1]] ? iqentry_takb[heads[1]] : iqentry_bt[heads[1]]) & iqentry_v[heads[1]]), .rclk(~clk), .pcA(fetchbufA_pc), .btgtA(btgtA), .pcB(fetchbufB_pc), .btgtB(btgtB), .pcC(fetchbufC_pc), .btgtC(btgtC), .pcD(fetchbufD_pc), .btgtD(btgtD), .pcE(32'd0), .btgtE(), .pcF(32'd0), .btgtF(), .npcA(BRKPC), .npcB(BRKPC), .npcC(BRKPC), .npcD(BRKPC), .npcE(BRKPC), .npcF(BRKPC) ); `else // Branch tergets are picked up by fetchbuf logic and need to be present. // Without a target predictor they are just set to the reset address. // This virtually guarentees a miss. assign btgtA = RSTPC; assign btgtB = RSTPC; assign btgtC = RSTPC; assign btgtD = RSTPC; `endif end else begin `ifdef FCU_ENH FT64_BTB #(.AMSB(AMSB)) ubtb1 ( .rst(rst), .wclk(fcu_clk), .wr0(btbwr0), .wadr0(iqentry_pc[heads[0]]), .wdat0(iqentry_ma[heads[0]]), .valid0((iqentry_br[heads[0]] ? iqentry_takb[heads[0]] : iqentry_bt[heads[0]]) & iqentry_v[heads[0]]), .wr1(1'b0); .wadr1(RSTPC), .wdat1(RSTPC), .valid1(1'b0), .wr2(1'b0); .wadr2(RSTPC), .wdat2(RSTPC), .valid2(1'b0), .rclk(~clk), .pcA(fetchbufA_pc), .btgtA(btgtA), .pcB(fetchbufB_pc), .btgtB(btgtB), .pcC(32'd0), .btgtC(), .pcD(32'd0), .btgtD(), .pcE(32'd0), .btgtE(), .pcF(32'd0), .btgtF(), .hitA(), .hitB(), .hitC(), .hitD(), .hitE(), .hitF(), .npcA(BRKPC), .npcB(BRKPC), .npcC(BRKPC), .npcD(BRKPC), .npcE(BRKPC), .npcF(BRKPC) ); `else // Branch tergets are picked up by fetchbuf logic and need to be present. // Without a target predictor they are just set to the reset address. // This virtually guarentees a miss. assign btgtA = RSTPC; assign btgtB = RSTPC; `endif end end endgenerate generate begin: gBPInst if (`WAYS > 2) begin `ifdef FCU_ENH FT64_BranchPredictor ubp1 ( .rst(rst), .clk(fcu_clk), .en(bpe), .xisBranch0(iqentry_br[heads[0]] & commit0_v), .xisBranch1(iqentry_br[heads[1]] & commit1_v), .xisBranch2(iqentry_br[heads[2]] & commit2_v), .pcA(fetchbufA_pc), .pcB(fetchbufB_pc), .pcC(fetchbufC_pc), .pcD(fetchbufD_pc), .pcE(fetchbufE_pc), .pcF(fetchbufF_pc), .xpc0(iqentry_pc[heads[0]]), .xpc1(iqentry_pc[heads[1]]), .xpc2(iqentry_pc[heads[2]]), .takb0(commit0_v & iqentry_takb[heads[0]]), .takb1(commit1_v & iqentry_takb[heads[1]]), .takb2(commit2_v & iqentry_takb[heads[2]]), .predict_takenA(predict_takenA), .predict_takenB(predict_takenB), .predict_takenC(predict_takenC), .predict_takenD(predict_takenD), .predict_takenE(predict_takenE), .predict_takenF(predict_takenF) ); `else // Predict based on sign of displacement assign predict_takenA = fetchbufA_instr[6] ? fetchbufA_instr[47] : fetchbufA_instr[31]; assign predict_takenB = fetchbufB_instr[6] ? fetchbufB_instr[47] : fetchbufB_instr[31]; assign predict_takenC = fetchbufC_instr[6] ? fetchbufC_instr[47] : fetchbufC_instr[31]; assign predict_takenD = fetchbufD_instr[6] ? fetchbufD_instr[47] : fetchbufD_instr[31]; assign predict_takenE = fetchbufE_instr[6] ? fetchbufE_instr[47] : fetchbufE_instr[31]; assign predict_takenF = fetchbufF_instr[6] ? fetchbufF_instr[47] : fetchbufF_instr[31]; `endif end else if (`WAYS > 1) begin `ifdef FCU_ENH FT64_BranchPredictor ubp1 ( .rst(rst), .clk(fcu_clk), .en(bpe), .xisBranch0(iqentry_br[heads[0]] & commit0_v), .xisBranch1(iqentry_br[heads[1]] & commit1_v), .xisBranch2(iqentry_br[heads[2]] & commit2_v), .pcA(fetchbufA_pc), .pcB(fetchbufB_pc), .pcC(fetchbufC_pc), .pcD(fetchbufD_pc), .pcE(32'd0), .pcF(32'd0), .xpc0(iqentry_pc[heads[0]]), .xpc1(iqentry_pc[heads[1]]), .xpc2(iqentry_pc[heads[2]]), .takb0(commit0_v & iqentry_takb[heads[0]]), .takb1(commit1_v & iqentry_takb[heads[1]]), .takb2(commit2_v & iqentry_takb[heads[2]]), .predict_takenA(predict_takenA), .predict_takenB(predict_takenB), .predict_takenC(predict_takenC), .predict_takenD(predict_takenD), .predict_takenE(), .predict_takenF() ); `else // Predict based on sign of displacement assign predict_takenA = fetchbufA_instr[6] ? fetchbufA_instr[47] : fetchbufA_instr[31]; assign predict_takenB = fetchbufB_instr[6] ? fetchbufB_instr[47] : fetchbufB_instr[31]; assign predict_takenC = fetchbufC_instr[6] ? fetchbufC_instr[47] : fetchbufC_instr[31]; assign predict_takenD = fetchbufD_instr[6] ? fetchbufD_instr[47] : fetchbufD_instr[31]; `endif end else begin `ifdef FCU_ENH FT64_BranchPredictor ubp1 ( .rst(rst), .clk(fcu_clk), .en(bpe), .xisBranch0(iqentry_br[heads[0]] & commit0_v), .xisBranch1(iqentry_br[heads[1]] & commit1_v), .xisBranch2(iqentry_br[heads[2]] & commit2_v), .pcA(fetchbufA_pc), .pcB(fetchbufB_pc), .pcC(32'd0), .pcD(32'd0), .pcE(32'd0), .pcF(32'd0), .xpc0(iqentry_pc[heads[0]]), .xpc1(iqentry_pc[heads[1]]), .xpc2(iqentry_pc[heads[2]]), .takb0(commit0_v & iqentry_takb[heads[0]]), .takb1(commit1_v & iqentry_takb[heads[1]]), .takb2(commit2_v & iqentry_takb[heads[2]]), .predict_takenA(predict_takenA), .predict_takenB(predict_takenB), .predict_takenC(), .predict_takenD(), .predict_takenE(), .predict_takenF() ); `else // Predict based on sign of displacement assign predict_takenA = fetchbufA_instr[6] ? fetchbufA_instr[47] : fetchbufA_instr[31]; assign predict_takenB = fetchbufB_instr[6] ? fetchbufB_instr[47] : fetchbufB_instr[31]; `endif end end endgenerate //----------------------------------------------------------------------------- // Debug //----------------------------------------------------------------------------- `ifdef SUPPORT_DBG wire [DBW-1:0] dbg_stat1x; reg [DBW-1:0] dbg_stat; reg [DBW-1:0] dbg_ctrl; reg [ABW-1:0] dbg_adr0; reg [ABW-1:0] dbg_adr1; reg [ABW-1:0] dbg_adr2; reg [ABW-1:0] dbg_adr3; reg dbg_imatchA0,dbg_imatchA1,dbg_imatchA2,dbg_imatchA3,dbg_imatchA; reg dbg_imatchB0,dbg_imatchB1,dbg_imatchB2,dbg_imatchB3,dbg_imatchB; wire dbg_lmatch00 = dbg_ctrl[0] && dbg_ctrl[17:16]==2'b11 && dram0_addr[AMSB:3]==dbg_adr0[AMSB:3] && ((dbg_ctrl[19:18]==2'b00 && dram0_addr[2:0]==dbg_adr0[2:0]) || (dbg_ctrl[19:18]==2'b01 && dram0_addr[2:1]==dbg_adr0[2:1]) || (dbg_ctrl[19:18]==2'b10 && dram0_addr[2]==dbg_adr0[2]) || dbg_ctrl[19:18]==2'b11) ; wire dbg_lmatch01 = dbg_ctrl[0] && dbg_ctrl[17:16]==2'b11 && dram1_addr[AMSB:3]==dbg_adr0[AMSB:3] && ((dbg_ctrl[19:18]==2'b00 && dram1_addr[2:0]==dbg_adr0[2:0]) || (dbg_ctrl[19:18]==2'b01 && dram1_addr[2:1]==dbg_adr0[2:1]) || (dbg_ctrl[19:18]==2'b10 && dram1_addr[2]==dbg_adr0[2]) || dbg_ctrl[19:18]==2'b11) ; wire dbg_lmatch02 = dbg_ctrl[0] && dbg_ctrl[17:16]==2'b11 && dram2_addr[AMSB:3]==dbg_adr0[AMSB:3] && ((dbg_ctrl[19:18]==2'b00 && dram2_addr[2:0]==dbg_adr0[2:0]) || (dbg_ctrl[19:18]==2'b01 && dram2_addr[2:1]==dbg_adr0[2:1]) || (dbg_ctrl[19:18]==2'b10 && dram2_addr[2]==dbg_adr0[2]) || dbg_ctrl[19:18]==2'b11) ; wire dbg_lmatch10 = dbg_ctrl[1] && dbg_ctrl[21:20]==2'b11 && dram0_addr[AMSB:3]==dbg_adr1[AMSB:3] && ((dbg_ctrl[23:22]==2'b00 && dram0_addr[2:0]==dbg_adr1[2:0]) || (dbg_ctrl[23:22]==2'b01 && dram0_addr[2:1]==dbg_adr1[2:1]) || (dbg_ctrl[23:22]==2'b10 && dram0_addr[2]==dbg_adr1[2]) || dbg_ctrl[23:22]==2'b11) ; wire dbg_lmatch11 = dbg_ctrl[1] && dbg_ctrl[21:20]==2'b11 && dram1_addr[AMSB:3]==dbg_adr1[AMSB:3] && ((dbg_ctrl[23:22]==2'b00 && dram1_addr[2:0]==dbg_adr1[2:0]) || (dbg_ctrl[23:22]==2'b01 && dram1_addr[2:1]==dbg_adr1[2:1]) || (dbg_ctrl[23:22]==2'b10 && dram1_addr[2]==dbg_adr1[2]) || dbg_ctrl[23:22]==2'b11) ; wire dbg_lmatch12 = dbg_ctrl[1] && dbg_ctrl[21:20]==2'b11 && dram2_addr[AMSB:3]==dbg_adr1[AMSB:3] && ((dbg_ctrl[23:22]==2'b00 && dram2_addr[2:0]==dbg_adr1[2:0]) || (dbg_ctrl[23:22]==2'b01 && dram2_addr[2:1]==dbg_adr1[2:1]) || (dbg_ctrl[23:22]==2'b10 && dram2_addr[2]==dbg_adr1[2]) || dbg_ctrl[23:22]==2'b11) ; wire dbg_lmatch20 = dbg_ctrl[2] && dbg_ctrl[25:24]==2'b11 && dram0_addr[AMSB:3]==dbg_adr2[AMSB:3] && ((dbg_ctrl[27:26]==2'b00 && dram0_addr[2:0]==dbg_adr2[2:0]) || (dbg_ctrl[27:26]==2'b01 && dram0_addr[2:1]==dbg_adr2[2:1]) || (dbg_ctrl[27:26]==2'b10 && dram0_addr[2]==dbg_adr2[2]) || dbg_ctrl[27:26]==2'b11) ; wire dbg_lmatch21 = dbg_ctrl[2] && dbg_ctrl[25:24]==2'b11 && dram1_addr[AMSB:3]==dbg_adr2[AMSB:3] && ((dbg_ctrl[27:26]==2'b00 && dram1_addr[2:0]==dbg_adr2[2:0]) || (dbg_ctrl[27:26]==2'b01 && dram1_addr[2:1]==dbg_adr2[2:1]) || (dbg_ctrl[27:26]==2'b10 && dram1_addr[2]==dbg_adr2[2]) || dbg_ctrl[27:26]==2'b11) ; wire dbg_lmatch22 = dbg_ctrl[2] && dbg_ctrl[25:24]==2'b11 && dram2_addr[AMSB:3]==dbg_adr2[AMSB:3] && ((dbg_ctrl[27:26]==2'b00 && dram2_addr[2:0]==dbg_adr2[2:0]) || (dbg_ctrl[27:26]==2'b01 && dram2_addr[2:1]==dbg_adr2[2:1]) || (dbg_ctrl[27:26]==2'b10 && dram2_addr[2]==dbg_adr2[2]) || dbg_ctrl[27:26]==2'b11) ; wire dbg_lmatch30 = dbg_ctrl[3] && dbg_ctrl[29:28]==2'b11 && dram0_addr[AMSB:3]==dbg_adr3[AMSB:3] && ((dbg_ctrl[31:30]==2'b00 && dram0_addr[2:0]==dbg_adr3[2:0]) || (dbg_ctrl[31:30]==2'b01 && dram0_addr[2:1]==dbg_adr3[2:1]) || (dbg_ctrl[31:30]==2'b10 && dram0_addr[2]==dbg_adr3[2]) || dbg_ctrl[31:30]==2'b11) ; wire dbg_lmatch31 = dbg_ctrl[3] && dbg_ctrl[29:28]==2'b11 && dram1_addr[AMSB:3]==dbg_adr3[AMSB:3] && ((dbg_ctrl[31:30]==2'b00 && dram1_addr[2:0]==dbg_adr3[2:0]) || (dbg_ctrl[31:30]==2'b01 && dram1_addr[2:1]==dbg_adr3[2:1]) || (dbg_ctrl[31:30]==2'b10 && dram1_addr[2]==dbg_adr3[2]) || dbg_ctrl[31:30]==2'b11) ; wire dbg_lmatch32 = dbg_ctrl[3] && dbg_ctrl[29:28]==2'b11 && dram2_addr[AMSB:3]==dbg_adr3[AMSB:3] && ((dbg_ctrl[31:30]==2'b00 && dram2_addr[2:0]==dbg_adr3[2:0]) || (dbg_ctrl[31:30]==2'b01 && dram2_addr[2:1]==dbg_adr3[2:1]) || (dbg_ctrl[31:30]==2'b10 && dram2_addr[2]==dbg_adr3[2]) || dbg_ctrl[31:30]==2'b11) ; wire dbg_lmatch0 = dbg_lmatch00|dbg_lmatch10|dbg_lmatch20|dbg_lmatch30; wire dbg_lmatch1 = dbg_lmatch01|dbg_lmatch11|dbg_lmatch21|dbg_lmatch31; wire dbg_lmatch2 = dbg_lmatch02|dbg_lmatch12|dbg_lmatch22|dbg_lmatch32; wire dbg_lmatch = dbg_lmatch00|dbg_lmatch10|dbg_lmatch20|dbg_lmatch30| dbg_lmatch01|dbg_lmatch11|dbg_lmatch21|dbg_lmatch31| dbg_lmatch02|dbg_lmatch12|dbg_lmatch22|dbg_lmatch32 ; wire dbg_smatch00 = dbg_ctrl[0] && dbg_ctrl[17:16]==2'b11 && dram0_addr[AMSB:3]==dbg_adr0[AMSB:3] && ((dbg_ctrl[19:18]==2'b00 && dram0_addr[2:0]==dbg_adr0[2:0]) || (dbg_ctrl[19:18]==2'b01 && dram0_addr[2:1]==dbg_adr0[2:1]) || (dbg_ctrl[19:18]==2'b10 && dram0_addr[2]==dbg_adr0[2]) || dbg_ctrl[19:18]==2'b11) ; wire dbg_smatch01 = dbg_ctrl[0] && dbg_ctrl[17:16]==2'b11 && dram1_addr[AMSB:3]==dbg_adr0[AMSB:3] && ((dbg_ctrl[19:18]==2'b00 && dram1_addr[2:0]==dbg_adr0[2:0]) || (dbg_ctrl[19:18]==2'b01 && dram1_addr[2:1]==dbg_adr0[2:1]) || (dbg_ctrl[19:18]==2'b10 && dram1_addr[2]==dbg_adr0[2]) || dbg_ctrl[19:18]==2'b11) ; wire dbg_smatch02 = dbg_ctrl[0] && dbg_ctrl[17:16]==2'b11 && dram2_addr[AMSB:3]==dbg_adr0[AMSB:3] && ((dbg_ctrl[19:18]==2'b00 && dram2_addr[2:0]==dbg_adr0[2:0]) || (dbg_ctrl[19:18]==2'b01 && dram2_addr[2:1]==dbg_adr0[2:1]) || (dbg_ctrl[19:18]==2'b10 && dram2_addr[2]==dbg_adr0[2]) || dbg_ctrl[19:18]==2'b11) ; wire dbg_smatch10 = dbg_ctrl[1] && dbg_ctrl[21:20]==2'b11 && dram0_addr[AMSB:3]==dbg_adr1[AMSB:3] && ((dbg_ctrl[23:22]==2'b00 && dram0_addr[2:0]==dbg_adr1[2:0]) || (dbg_ctrl[23:22]==2'b01 && dram0_addr[2:1]==dbg_adr1[2:1]) || (dbg_ctrl[23:22]==2'b10 && dram0_addr[2]==dbg_adr1[2]) || dbg_ctrl[23:22]==2'b11) ; wire dbg_smatch11 = dbg_ctrl[1] && dbg_ctrl[21:20]==2'b11 && dram1_addr[AMSB:3]==dbg_adr1[AMSB:3] && ((dbg_ctrl[23:22]==2'b00 && dram1_addr[2:0]==dbg_adr1[2:0]) || (dbg_ctrl[23:22]==2'b01 && dram1_addr[2:1]==dbg_adr1[2:1]) || (dbg_ctrl[23:22]==2'b10 && dram1_addr[2]==dbg_adr1[2]) || dbg_ctrl[23:22]==2'b11) ; wire dbg_smatch12 = dbg_ctrl[1] && dbg_ctrl[21:20]==2'b11 && dram2_addr[AMSB:3]==dbg_adr1[AMSB:3] && ((dbg_ctrl[23:22]==2'b00 && dram2_addr[2:0]==dbg_adr1[2:0]) || (dbg_ctrl[23:22]==2'b01 && dram2_addr[2:1]==dbg_adr1[2:1]) || (dbg_ctrl[23:22]==2'b10 && dram2_addr[2]==dbg_adr1[2]) || dbg_ctrl[23:22]==2'b11) ; wire dbg_smatch20 = dbg_ctrl[2] && dbg_ctrl[25:24]==2'b11 && dram0_addr[AMSB:3]==dbg_adr2[AMSB:3] && ((dbg_ctrl[27:26]==2'b00 && dram0_addr[2:0]==dbg_adr2[2:0]) || (dbg_ctrl[27:26]==2'b01 && dram0_addr[2:1]==dbg_adr2[2:1]) || (dbg_ctrl[27:26]==2'b10 && dram0_addr[2]==dbg_adr2[2]) || dbg_ctrl[27:26]==2'b11) ; wire dbg_smatch21 = dbg_ctrl[2] && dbg_ctrl[25:24]==2'b11 && dram1_addr[AMSB:3]==dbg_adr2[AMSB:3] && ((dbg_ctrl[27:26]==2'b00 && dram1_addr[2:0]==dbg_adr2[2:0]) || (dbg_ctrl[27:26]==2'b01 && dram1_addr[2:1]==dbg_adr2[2:1]) || (dbg_ctrl[27:26]==2'b10 && dram1_addr[2]==dbg_adr2[2]) || dbg_ctrl[27:26]==2'b11) ; wire dbg_smatch22 = dbg_ctrl[2] && dbg_ctrl[25:24]==2'b11 && dram2_addr[AMSB:3]==dbg_adr2[AMSB:3] && ((dbg_ctrl[27:26]==2'b00 && dram2_addr[2:0]==dbg_adr2[2:0]) || (dbg_ctrl[27:26]==2'b01 && dram2_addr[2:1]==dbg_adr2[2:1]) || (dbg_ctrl[27:26]==2'b10 && dram2_addr[2]==dbg_adr2[2]) || dbg_ctrl[27:26]==2'b11) ; wire dbg_smatch30 = dbg_ctrl[3] && dbg_ctrl[29:28]==2'b11 && dram0_addr[AMSB:3]==dbg_adr3[AMSB:3] && ((dbg_ctrl[31:30]==2'b00 && dram0_addr[2:0]==dbg_adr3[2:0]) || (dbg_ctrl[31:30]==2'b01 && dram0_addr[2:1]==dbg_adr3[2:1]) || (dbg_ctrl[31:30]==2'b10 && dram0_addr[2]==dbg_adr3[2]) || dbg_ctrl[31:30]==2'b11) ; wire dbg_smatch31 = dbg_ctrl[3] && dbg_ctrl[29:28]==2'b11 && dram1_addr[AMSB:3]==dbg_adr3[AMSB:3] && ((dbg_ctrl[31:30]==2'b00 && dram1_addr[2:0]==dbg_adr3[2:0]) || (dbg_ctrl[31:30]==2'b01 && dram1_addr[2:1]==dbg_adr3[2:1]) || (dbg_ctrl[31:30]==2'b10 && dram1_addr[2]==dbg_adr3[2]) || dbg_ctrl[31:30]==2'b11) ; wire dbg_smatch32 = dbg_ctrl[3] && dbg_ctrl[29:28]==2'b11 && dram2_addr[AMSB:3]==dbg_adr3[AMSB:3] && ((dbg_ctrl[31:30]==2'b00 && dram2_addr[2:0]==dbg_adr3[2:0]) || (dbg_ctrl[31:30]==2'b01 && dram2_addr[2:1]==dbg_adr3[2:1]) || (dbg_ctrl[31:30]==2'b10 && dram2_addr[2]==dbg_adr3[2]) || dbg_ctrl[31:30]==2'b11) ; wire dbg_smatch0 = dbg_smatch00|dbg_smatch10|dbg_smatch20|dbg_smatch30; wire dbg_smatch1 = dbg_smatch01|dbg_smatch11|dbg_smatch21|dbg_smatch31; wire dbg_smatch2 = dbg_smatch02|dbg_smatch12|dbg_smatch22|dbg_smatch32; wire dbg_smatch = dbg_smatch00|dbg_smatch10|dbg_smatch20|dbg_smatch30| dbg_smatch01|dbg_smatch11|dbg_smatch21|dbg_smatch31| dbg_smatch02|dbg_smatch12|dbg_smatch22|dbg_smatch32 ; wire dbg_stat0 = dbg_imatchA0 | dbg_imatchB0 | dbg_lmatch00 | dbg_lmatch01 | dbg_lmatch02 | dbg_smatch00 | dbg_smatch01 | dbg_smatch02; wire dbg_stat1 = dbg_imatchA1 | dbg_imatchB1 | dbg_lmatch10 | dbg_lmatch11 | dbg_lmatch12 | dbg_smatch10 | dbg_smatch11 | dbg_smatch12; wire dbg_stat2 = dbg_imatchA2 | dbg_imatchB2 | dbg_lmatch20 | dbg_lmatch21 | dbg_lmatch22 | dbg_smatch20 | dbg_smatch21 | dbg_smatch22; wire dbg_stat3 = dbg_imatchA3 | dbg_imatchB3 | dbg_lmatch30 | dbg_lmatch31 | dbg_lmatch32 | dbg_smatch30 | dbg_smatch31 | dbg_smatch32; assign dbg_stat1x = {dbg_stat3,dbg_stat2,dbg_stat1,dbg_stat0}; wire debug_on = |dbg_ctrl[3:0]|dbg_ctrl[7]|dbg_ctrl[63]; always @* begin if (dbg_ctrl[0] && dbg_ctrl[17:16]==2'b00 && fetchbuf0_pc==dbg_adr0) dbg_imatchA0 = `TRUE; if (dbg_ctrl[1] && dbg_ctrl[21:20]==2'b00 && fetchbuf0_pc==dbg_adr1) dbg_imatchA1 = `TRUE; if (dbg_ctrl[2] && dbg_ctrl[25:24]==2'b00 && fetchbuf0_pc==dbg_adr2) dbg_imatchA2 = `TRUE; if (dbg_ctrl[3] && dbg_ctrl[29:28]==2'b00 && fetchbuf0_pc==dbg_adr3) dbg_imatchA3 = `TRUE; if (dbg_imatchA0|dbg_imatchA1|dbg_imatchA2|dbg_imatchA3) dbg_imatchA = `TRUE; end always @* begin if (dbg_ctrl[0] && dbg_ctrl[17:16]==2'b00 && fetchbuf1_pc==dbg_adr0) dbg_imatchB0 = `TRUE; if (dbg_ctrl[1] && dbg_ctrl[21:20]==2'b00 && fetchbuf1_pc==dbg_adr1) dbg_imatchB1 = `TRUE; if (dbg_ctrl[2] && dbg_ctrl[25:24]==2'b00 && fetchbuf1_pc==dbg_adr2) dbg_imatchB2 = `TRUE; if (dbg_ctrl[3] && dbg_ctrl[29:28]==2'b00 && fetchbuf1_pc==dbg_adr3) dbg_imatchB3 = `TRUE; if (dbg_imatchB0|dbg_imatchB1|dbg_imatchB2|dbg_imatchB3) dbg_imatchB = `TRUE; end `endif //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- // freezePC squashes the pc increment if there's an irq. // If there is a segment prefix present then defer the freezing of the pc. // If a hardware interrupt instruction is encountered in the instruction stream // flag it as a privilege violation. wire freezePC = (irq_i > im) && !int_commit; always @* if (freezePC) begin insn0 <= {32'h00,6'd0,5'd0,irq_i,1'b0,vec_i,2'b00,`BRK}; end else if (phit) begin // if (insn0a[`INSTRUCTION_OP]==`BRK && insn0a[25:21]==5'd0 && insn0a[`INSTRUCTION_L2]==2'b00) // insn0 <= {6'd1,5'd0,4'b0,1'b0,`FLT_PRIV,2'b00,`BRK}; // else insn0 <= insn0a; if (insn0a[15:0]==16'hFF00) begin // BRK #255 if (~|irq_i) insn0 <= {8'h00,`NOP_INSN}; else insn0[20:0] <= {irq_i,1'b0,vec_i,2'b00,`BRK}; end else if (insn0a[15:0]==16'h0000) insn0 <= {32'h00,6'd0,5'd0,4'h0,1'b0,`FLT_IBE,2'b00,`BRK}; else case(ic0_fault) 2'd0: ; // no fault, don't alter instruction 2'd1: insn0 <= {32'h00,6'd0,5'd0,4'h0,1'b0,`FLT_TLB,2'b00,`BRK}; 2'd2: insn0 <= {32'h00,6'd0,5'd0,4'h0,1'b0,`FLT_EXF,2'b00,`BRK}; 2'd3: insn0 <= {32'h00,6'd0,5'd0,4'h0,1'b0,`FLT_IBE,2'b00,`BRK}; endcase end else begin insn0 <= {8'h00,`NOP_INSN}; end generate begin : gInsnMux if (`WAYS > 1) begin always @* if (freezePC && !thread_en) begin insn1 <= {8'h00,6'd0,5'd0,irq_i,1'b0,vec_i,2'b00,`BRK}; end else if (phit) begin // if (insn1a[`INSTRUCTION_OP]==`BRK && insn1a[25:21]==5'd0 && insn1a[`INSTRUCTION_L2]==2'b00) // insn1 <= {6'd1,5'd0,4'b0,1'b0,`FLT_PRIV,2'b00,`BRK}; // else insn1 <= insn1a; if (insn1a[15:0]==16'hFF00) begin if (~|irq_i) insn1 <= {8'h00,`NOP_INSN}; else insn1[20:0] <= {irq_i,1'b0,vec_i,2'b00,`BRK}; end case(ic1_fault) 2'd0: ; // no fault, don't alter instruction 2'd1: insn1 <= {32'h00,6'd0,5'd0,4'h0,1'b0,`FLT_TLB,2'b00,`BRK}; 2'd2: insn1 <= {32'h00,6'd0,5'd0,4'h0,1'b0,`FLT_EXF,2'b00,`BRK}; 2'd3: insn1 <= {32'h00,6'd0,5'd0,4'h0,1'b0,`FLT_IBE,2'b00,`BRK}; endcase end else begin insn1 <= {8'h00,`NOP_INSN}; end end if (`WAYS > 2) begin always @* if (freezePC && !thread_en) insn2 <= {6'd0,5'd0,irq_i,1'b0,vec_i,2'b00,`BRK}; else if (phit) begin // if (insn2a[`INSTRUCTION_OP]==`BRK && insn1a[25:21]==5'd0 && insn2a[`INSTRUCTION_L2]==2'b00) // insn2 <= {6'd1,5'd0,4'b0,1'b0,`FLT_PRIV,2'b00,`BRK}; // else insn2 <= insn2a; if (insn2a[15:0]==16'hFF00) begin if (~|irq_i) insn2 <= {8'h00,`NOP_INSN}; else insn2[20:0] <= {irq_i,1'b0,vec_i,2'b00,`BRK}; end case(ic2_fault) 2'd0: ; // no fault, don't alter instruction 2'd1: insn2 <= {32'h00,6'd0,5'd0,4'h0,1'b0,`FLT_TLB,2'b00,`BRK}; 2'd2: insn2 <= {32'h00,6'd0,5'd0,4'h0,1'b0,`FLT_EXF,2'b00,`BRK}; 2'd3: insn2 <= {32'h00,6'd0,5'd0,4'h0,1'b0,`FLT_IBE,2'b00,`BRK}; endcase end else insn2 <= `NOP_INSN; end end endgenerate wire [63:0] dc0_out, dc1_out, dc2_out; assign rdat0 = dram0_unc ? xdati[63:0] : dc0_out; assign rdat1 = dram1_unc ? xdati[63:0] : dc1_out; assign rdat2 = dram2_unc ? xdati[63:0] : dc2_out; reg preload; reg [1:0] dccnt; reg [3:0] dcwait = 4'd3; reg [3:0] dcwait_ctr = 4'd3; wire dhit0, dhit1, dhit2; wire dhit0a, dhit1a, dhit2a; wire dhit00, dhit10, dhit20; wire dhit01, dhit11, dhit21; reg [`ABITS] dc_wadr; reg [63:0] dc_wdat; reg isStore; reg [31:0] dcsel; reg [255:0] dcbuf; reg dcwr; // If the data is in the write buffer, give the buffer a chance to // write out the data before trying to load from the cache. reg wb_hit0, wb_hit1, wb_hit2; always @* begin wb_hit0 <= FALSE; wb_hit1 <= FALSE; wb_hit2 <= FALSE; for (n = 0; n < `WB_DEPTH; n = n + 1) begin if (wb_v[n] && wb_addr[n][AMSB:3]==dram0_addr[AMSB:3]) wb_hit0 <= TRUE; if (`NUM_MEM > 1 && wb_v[n] && wb_addr[n][AMSB:3]==dram1_addr[AMSB:3]) wb_hit1 <= TRUE; if (`NUM_MEM > 2 && wb_v[n] && wb_addr[n][AMSB:3]==dram2_addr[AMSB:3]) wb_hit2 <= TRUE; end end assign dhit0 = dhit0a && !wb_hit0; assign dhit1 = dhit1a && !wb_hit1; assign dhit2 = dhit2a && !wb_hit2; wire whit0, whit1, whit2; wire wr_dcache0 = (dcwr)||(((bstate==B_StoreAck && StoreAck1) || (bstate==B_LSNAck && isStore)) && whit0); wire wr_dcache1 = (dcwr)||(((bstate==B_StoreAck && StoreAck1) || (bstate==B_LSNAck && isStore)) && whit1); wire wr_dcache2 = (dcwr)||(((bstate==B_StoreAck && StoreAck1) || (bstate==B_LSNAck && isStore)) && whit2); FT64_dcache udc0 ( .rst(rst), .wclk(clk), .dce(dce), .wr(wr_dcache0), .sel(dcsel), .wadr({ASID,vadr}), .whit(whit0), .i(dcbuf), .rclk(clk), .rdsize(dram0_memsize), .radr({ASID,dram0_addr}), .o(dc0_out), .rhit(dhit0a) ); generate begin : gDCacheInst if (`NUM_MEM > 1) begin FT64_dcache udc1 ( .rst(rst), .wclk(clk), .dce(dce), .wr(wr_dcache1), .sel(dcsel), .wadr({ASID,vadr}), .whit(whit1), .i(dcbuf), .rclk(clk), .rdsize(dram1_memsize), .radr({ASID,dram1_addr}), .o(dc1_out), .rhit(dhit1a) ); end if (`NUM_MEM > 2) begin FT64_dcache udc2 ( .rst(rst), .wclk(clk), .dce(dce), .wr(wr_dcache2), .sel(dcsel), .wadr({ASID,vadr}), .whit(whit2), .i(dcbuf), .rclk(clk), .rdsize(dram2_memsize), .radr({ASID,dram2_addr}), .o(dc2_out), .rhit(dhit2a) ); end end endgenerate `ifdef SUPPORT_SMT function [RBIT:0] fnRa; input [47:0] isn; input [5:0] vqei; input [5:0] vli; input thrd; case(isn[`INSTRUCTION_OP]) `IVECTOR: case(isn[`INSTRUCTION_S2]) `VCIDX,`VSCAN: fnRa = {6'd0,1'b1,isn[`INSTRUCTION_RA]}; `VMxx: case(isn[25:23]) `VMAND,`VMOR,`VMXOR,`VMXNOR,`VMPOP,`VMFIRST,`VMLAST: fnRa = {6'h3F,1'b1,2'b0,isn[10:8]}; `VMFILL:fnRa = {6'd0,1'b1,isn[`INSTRUCTION_RA]}; default:fnRa = {6'h3F,1'b1,2'b0,isn[10:8]}; endcase `VSHLV: fnRa = (vqei+1+isn[15:11] >= vli) ? 11'h000 : {vli-vqei-isn[15:11]-1,1'b1,isn[`INSTRUCTION_RA]}; `VSHRV: fnRa = (vqei+isn[15:11] >= vli) ? 11'h000 : {vqei+isn[15:11],1'b1,isn[`INSTRUCTION_RA]}; `VSxx,`VSxxU,`VSxxS,`VSxxSU: fnRa = {vqei,1'b1,isn[`INSTRUCTION_RA]}; default: fnRa = {vqei,1'b1,isn[`INSTRUCTION_RA]}; endcase `R2: casez(isn[`INSTRUCTION_S2]) `MOV: case(isn[25:23]) 3'd0: fnRa = {rgs[thrd],1'b0,isn[`INSTRUCTION_RA]}; 3'd1: fnRa = {isn[26],isn[22:18],1'b0,isn[`INSTRUCTION_RA]}; 3'd2: fnRa = {rgs[thrd],1'b0,isn[`INSTRUCTION_RA]}; 3'd3: fnRa = {rs_stack[thrd][5:0],1'b0,isn[`INSTRUCTION_RA]}; 3'd4: fnRa = {rgs[thrd],1'b0,isn[`INSTRUCTION_RA]}; 3'd5: fnRa = {fp_rgs[thrd],1'b0,isn[`INSTRUCTION_RA]}; 3'd6: fnRa = {fp_rgs[thrd],1'b0,isn[`INSTRUCTION_RA]}; default:fnRa = {rgs[thrd],1'b0,isn[`INSTRUCTION_RA]}; endcase `VMOV: case (isn[`INSTRUCTION_S1]) 5'h0: fnRa = {rgs[thrd],1'b0,isn[`INSTRUCTION_RA]}; 5'h1: fnRa = {6'h3F,1'b1,isn[`INSTRUCTION_RA]}; endcase default: fnRa = {rgs[thrd],1'b0,isn[`INSTRUCTION_RA]}; endcase `FLOAT: fnRa = {fp_rgs[thrd],1'b0,isn[`INSTRUCTION_RA]}; default: fnRa = {rgs[thrd],1'b0,isn[`INSTRUCTION_RA]}; endcase endfunction function [RBIT:0] fnRb; input [47:0] isn; input fb; input [5:0] vqei; input [5:0] rfoa0i; input [5:0] rfoa1i; input thrd; case(isn[`INSTRUCTION_OP]) `R2: case(isn[`INSTRUCTION_S2]) `VEX: fnRb = fb ? {rfoa1i,1'b1,isn[`INSTRUCTION_RB]} : {rfoa0i,1'b1,isn[`INSTRUCTION_RB]}; `LVX,`SVX: fnRb = {vqei,1'b1,isn[`INSTRUCTION_RB]}; default: fnRb = {rgs[thrd],1'b0,isn[`INSTRUCTION_RB]}; endcase `IVECTOR: case(isn[`INSTRUCTION_S2]) `VMxx: case(isn[25:23]) `VMAND,`VMOR,`VMXOR,`VMXNOR,`VMPOP: fnRb = {6'h3F,1'b1,2'b0,isn[20:18]}; default: fnRb = 12'h000; endcase `VXCHG: fnRb = {vqei,1'b1,isn[`INSTRUCTION_RB]}; `VSxx,`VSxxU: fnRb = {vqei,1'b1,isn[`INSTRUCTION_RB]}; `VSxxS,`VSxxSU: fnRb = {vqei,1'b0,isn[`INSTRUCTION_RB]}; `VADDS,`VSUBS,`VMULS,`VANDS,`VORS,`VXORS,`VXORS: fnRb = {rgs[thrd],1'b0,isn[`INSTRUCTION_RB]}; `VSHL,`VSHR,`VASR: fnRb = {isn[25],isn[22]}==2'b00 ? {rgs[thrd],1'b0,isn[`INSTRUCTION_RB]} : {vqei,1'b1,isn[`INSTRUCTION_RB]}; default: fnRb = {vqei,1'b1,isn[`INSTRUCTION_RB]}; endcase `FLOAT: fnRb = {fp_rgs[thrd],1'b0,isn[`INSTRUCTION_RB]}; default: fnRb = {rgs[thrd],1'b0,isn[`INSTRUCTION_RB]}; endcase endfunction function [RBIT:0] fnRc; input [47:0] isn; input [5:0] vqei; input thrd; case(isn[`INSTRUCTION_OP]) `R2: fnRc = {rgs[thrd],1'b0,isn[`INSTRUCTION_RC]}; `MEMNDX: fnRc = {rgs[thrd],1'b0,isn[`INSTRUCTION_RC]}; // SVX not implemented `IVECTOR: case(isn[`INSTRUCTION_S2]) `VSxx,`VSxxS,`VSxxU,`VSxxSU: fnRc = {6'h3F,1'b1,2'b0,isn[25:23]}; default: fnRc = {vqei,1'b1,isn[`INSTRUCTION_RC]}; endcase `FLOAT: fnRc = {fp_rgs[thrd],1'b0,isn[`INSTRUCTION_RC]}; default: fnRc = {rgs[thrd],1'b0,isn[`INSTRUCTION_RC]}; endcase endfunction function [RBIT:0] fnRt; input [47:0] isn; input [5:0] vqei; input [5:0] vli; input thrd; casez(isn[`INSTRUCTION_OP]) `IVECTOR: case(isn[`INSTRUCTION_S2]) `VMxx: case(isn[25:23]) `VMAND,`VMOR,`VMXOR,`VMXNOR,`VMFILL: fnRt = {6'h3F,1'b1,2'b0,isn[15:13]}; `VMPOP: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; default: fnRt = {6'h3F,1'b1,2'b0,isn[15:13]}; endcase `VSxx,`VSxxU,`VSxxS,`VSxxSU: fnRt = {6'h3F,1'b1,2'b0,isn[15:13]}; `VSHLV: fnRt = (vqei+1 >= vli) ? 11'h000 : {vli-vqei-1,1'b1,isn[`INSTRUCTION_RT]}; `VSHRV: fnRt = (vqei >= vli) ? 11'h000 : {vqei,1'b1,isn[`INSTRUCTION_RT]}; `VEINS: fnRt = {vqei,1'b1,isn[`INSTRUCTION_RT]}; // ToDo: add element # from Ra `V2BITS: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; default: fnRt = {vqei,1'b1,isn[`INSTRUCTION_Rt]}; endcase `R2: if (isn[`INSTRUCTION_L2]==2'b01) case(isn[47:42]) `CMOVEZ: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; `CMOVNZ: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; default: fnRt = 12'd0; endcase else casez(isn[`INSTRUCTION_S2]) `MOV: case(isn[25:23]) 3'd0: fnRt = {isn[26],isn[22:18],1'b0,isn[`INSTRUCTION_RT]}; 3'd1: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; 3'd2: fnRt = {rs_stack[thrd][5:0],1'b0,isn[`INSTRUCTION_RT]}; 3'd3: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; 3'd4: fnRt = {fp_rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; 3'd5: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; 3'd6: fnRt = {fp_rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; default:fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; endcase `VMOV: case (isn[`INSTRUCTION_S1]) 5'h0: fnRt = {6'h3F,1'b1,isn[`INSTRUCTION_RT]}; 5'h1: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; default: fnRt = 12'h000; endcase `R1: case(isn[22:18]) `CNTLO,`CNTLZ,`CNTPOP,`ABS,`NOT,`NEG,`REDOR,`ZXB,`ZXC,`ZXH,`SXB,`SXC,`SXH: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; `MEMDB,`MEMSB,`SYNC: fnRt = 12'd0; default: fnRt = 12'd0; endcase `CMOVEZ: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; `CMOVNZ: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; `MUX: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; `MIN: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; `MAX: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; `LVX: fnRt = {vqei,1'b1,isn[20:16]}; `SHIFTR: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; `SHIFT31,`SHIFT63: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; `SEI: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; `WAIT,`RTI,`CHK: fnRt = 12'd0; default: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; endcase `MEMNDX: begin if (!isn[31]) case({isn[31:28],isn[22:21]}) `LVX, `CACHEX, `LVBX,`LVBUX,`LVCX,`LVCUX,`LVHX,`LVHUX,`LVWX, `LBX,`LBUX,`LCX,`LCUX,`LHX,`LHUX,`LWX,`LWRX: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; default: fnRt = 12'd0; endcase else case({isn[31:28],isn[17:16]}) `PUSH: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; `SBX,`SCX,`SHX,`SWX,`SWCX,`CACHEX: fnRt = 12'd0; default: fnRt = 12'd0; endcase end `FLOAT: case(isn[31:26]) `FTX,`FCX,`FEX,`FDX,`FRM: fnRt = 12'd0; `FSYNC: fnRt = 12'd0; default: fnRt = {fp_rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; endcase `BRK: fnRt = 12'd0; `REX: fnRt = 12'd0; `CHK: fnRt = 12'd0; //`EXEC: fnRt = 12'd0; `Bcc: fnRt = 12'd0; `BLcc: fnRt = 12'd0; `BBc: fnRt = 12'd0; `NOP: fnRt = 12'd0; `BEQI: fnRt = 12'd0; `BNEI: fnRt = 12'd0; `SB,`Sx,`SWC,`CACHE: fnRt = 12'd0; `JMP: fnRt = 12'd0; `CALL: fnRt = {rgs[thrd],1'b0,5'd29}; // regLR `LV: fnRt = {vqei,1'b1,isn[`INSTRUCTION_RT]}; `AMO: fnRt = isn[31] ? {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]} : {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; `AUIPC,`LUI: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; default: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; endcase endfunction `else function [RBIT:0] fnRa; input [47:0] isn; input [5:0] vqei; input [5:0] vli; input thrd; case(isn[`INSTRUCTION_OP]) `IVECTOR: case(isn[`INSTRUCTION_S2]) `VCIDX,`VSCAN: fnRa = {6'd0,1'b1,isn[`INSTRUCTION_RA]}; `VMxx: case(isn[25:23]) `VMAND,`VMOR,`VMXOR,`VMXNOR,`VMPOP,`VMFIRST,`VMLAST: fnRa = {6'h3F,1'b1,2'b0,isn[10:8]}; `VMFILL:fnRa = {6'd0,1'b1,isn[`INSTRUCTION_RA]}; default:fnRa = {6'h3F,1'b1,2'b0,isn[10:8]}; endcase `VSHLV: fnRa = (vqei+1+isn[15:11] >= vli) ? 11'h000 : {vli-vqei-isn[15:11]-1,1'b1,isn[`INSTRUCTION_RA]}; `VSHRV: fnRa = (vqei+isn[15:11] >= vli) ? 11'h000 : {vqei+isn[15:11],1'b1,isn[`INSTRUCTION_RA]}; `VSxx,`VSxxU,`VSxxS,`VSxxSU: fnRa = {vqei,1'b1,isn[`INSTRUCTION_RA]}; default: fnRa = {vqei,1'b1,isn[`INSTRUCTION_RA]}; endcase `R2: casez(isn[`INSTRUCTION_S2]) `MOV: case(isn[25:23]) 3'd0: fnRa = {rgs,1'b0,isn[`INSTRUCTION_RA]}; 3'd1: fnRa = {isn[26],isn[22:18],1'b0,isn[`INSTRUCTION_RA]}; 3'd2: fnRa = {rgs,1'b0,isn[`INSTRUCTION_RA]}; 3'd3: fnRa = {rs_stack[5:0],1'b0,isn[`INSTRUCTION_RA]}; 3'd4: fnRa = {rgs,1'b0,isn[`INSTRUCTION_RA]}; 3'd5: fnRa = {fp_rgs,1'b0,isn[`INSTRUCTION_RA]}; 3'd6: fnRa = {fp_rgs,1'b0,isn[`INSTRUCTION_RA]}; default:fnRa = {rgs,1'b0,isn[`INSTRUCTION_RA]}; endcase `VMOV: case (isn[`INSTRUCTION_S1]) 5'h0: fnRa = {rgs,1'b0,isn[`INSTRUCTION_RA]}; 5'h1: fnRa = {6'h3F,1'b1,isn[`INSTRUCTION_RA]}; default: fnRa = {rgs,1'b0,isn[`INSTRUCTION_RA]}; endcase default: fnRa = {rgs,1'b0,isn[`INSTRUCTION_RA]}; endcase `FLOAT: fnRa = {fp_rgs,1'b0,isn[`INSTRUCTION_RA]}; default: fnRa = {rgs,1'b0,isn[`INSTRUCTION_RA]}; endcase endfunction function [RBIT:0] fnRb; input [47:0] isn; input fb; input [5:0] vqei; input [5:0] rfoa0i; input [5:0] rfoa1i; input thrd; case(isn[`INSTRUCTION_OP]) `RR: case(isn[`INSTRUCTION_S2]) `VEX: fnRb = fb ? {rfoa1i,1'b1,isn[`INSTRUCTION_RB]} : {rfoa0i,1'b1,isn[`INSTRUCTION_RB]}; `LVX,`SVX: fnRb = {vqei,1'b1,isn[`INSTRUCTION_RB]}; default: fnRb = {rgs,1'b0,isn[`INSTRUCTION_RB]}; endcase `IVECTOR: case(isn[`INSTRUCTION_S2]) `VMxx: case(isn[25:23]) `VMAND,`VMOR,`VMXOR,`VMXNOR,`VMPOP: fnRb = {6'h3F,1'b1,2'b0,isn[20:18]}; default: fnRb = 12'h000; endcase `VXCHG: fnRb = {vqei,1'b1,isn[`INSTRUCTION_RB]}; `VSxx,`VSxxU: fnRb = {vqei,1'b1,isn[`INSTRUCTION_RB]}; `VSxxS,`VSxxSU: fnRb = {vqei,1'b0,isn[`INSTRUCTION_RB]}; `VADDS,`VSUBS,`VMULS,`VANDS,`VORS,`VXORS,`VXORS: fnRb = {rgs,1'b0,isn[`INSTRUCTION_RB]}; `VSHL,`VSHR,`VASR: fnRb = {isn[25],isn[22]}==2'b00 ? {rgs,1'b0,isn[`INSTRUCTION_RB]} : {vqei,1'b1,isn[`INSTRUCTION_RB]}; default: fnRb = {vqei,1'b1,isn[`INSTRUCTION_RB]}; endcase `FLOAT: fnRb = {fp_rgs,1'b0,isn[`INSTRUCTION_RB]}; default: fnRb = {rgs,1'b0,isn[`INSTRUCTION_RB]}; endcase endfunction function [RBIT:0] fnRc; input [47:0] isn; input [5:0] vqei; input thrd; case(isn[`INSTRUCTION_OP]) `R2: fnRc = {rgs,1'b0,isn[`INSTRUCTION_RC]}; `MEMNDX: fnRc = {rgs,1'b0,isn[`INSTRUCTION_RC]}; // SVX not implemented `IVECTOR: case(isn[`INSTRUCTION_S2]) `VSxx,`VSxxS,`VSxxU,`VSxxSU: fnRc = {6'h3F,1'b1,2'b0,isn[25:23]}; default: fnRc = {vqei,1'b1,isn[`INSTRUCTION_RC]}; endcase `FLOAT: fnRc = {fp_rgs,1'b0,isn[`INSTRUCTION_RC]}; default: fnRc = {rgs,1'b0,isn[`INSTRUCTION_RC]}; endcase endfunction function [RBIT:0] fnRt; input [47:0] isn; input [5:0] vqei; input [5:0] vli; input thrd; casez(isn[`INSTRUCTION_OP]) `IVECTOR: case(isn[`INSTRUCTION_S2]) `VMxx: case(isn[25:23]) `VMAND,`VMOR,`VMXOR,`VMXNOR,`VMFILL: fnRt = {6'h3F,1'b1,2'b0,isn[15:13]}; `VMPOP: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]}; default: fnRt = {6'h3F,1'b1,2'b0,isn[15:13]}; endcase `VSxx,`VSxxU,`VSxxS,`VSxxSU: fnRt = {6'h3F,1'b1,2'b0,isn[15:13]}; `VSHLV: fnRt = (vqei+1 >= vli) ? 11'h000 : {vli-vqei-1,1'b1,isn[`INSTRUCTION_RT]}; `VSHRV: fnRt = (vqei >= vli) ? 11'h000 : {vqei,1'b1,isn[`INSTRUCTION_RT]}; `VEINS: fnRt = {vqei,1'b1,isn[`INSTRUCTION_RT]}; // ToDo: add element # from Ra `V2BITS: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]}; default: fnRt = {vqei,1'b1,isn[`INSTRUCTION_RT]}; endcase `FVECTOR: case(isn[`INSTRUCTION_S2]) `VMxx: case(isn[25:23]) `VMAND,`VMOR,`VMXOR,`VMXNOR,`VMFILL: fnRt = {6'h3F,1'b1,2'b0,isn[15:13]}; `VMPOP: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RB]}; default: fnRt = {6'h3F,1'b1,2'b0,isn[15:13]}; endcase `VSxx,`VSxxU,`VSxxS,`VSxxSU: fnRt = {6'h3F,1'b1,2'b0,isn[15:13]}; `VSHLV: fnRt = (vqei+1 >= vli) ? 11'h000 : {vli-vqei-1,1'b1,isn[`INSTRUCTION_RT]}; `VSHRV: fnRt = (vqei >= vli) ? 11'h000 : {vqei,1'b1,isn[`INSTRUCTION_RT]}; `VEINS: fnRt = {vqei,1'b1,isn[`INSTRUCTION_RT]}; // ToDo: add element # from Ra `V2BITS: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]}; default: fnRt = {vqei,1'b1,isn[`INSTRUCTION_RT]}; endcase `R2: if (isn[`INSTRUCTION_L2]==2'b01) case(isn[47:42]) `CMOVEZ: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]}; `CMOVNZ: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]}; default: fnRt = 12'd0; endcase else casez(isn[`INSTRUCTION_S2]) `MOV: case(isn[25:23]) 3'd0: fnRt = {isn[26],isn[22:18],1'b0,isn[`INSTRUCTION_RT]}; 3'd1: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]}; 3'd2: fnRt = {rs_stack[5:0],1'b0,isn[`INSTRUCTION_RT]}; 3'd3: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]}; 3'd4: fnRt = {fp_rgs,1'b0,isn[`INSTRUCTION_RT]}; 3'd5: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]}; 3'd6: fnRt = {fp_rgs,1'b0,isn[`INSTRUCTION_RT]}; default:fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]}; endcase `VMOV: case (isn[`INSTRUCTION_S1]) 5'h0: fnRt = {6'h3F,1'b1,isn[`INSTRUCTION_RT]}; 5'h1: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]}; default: fnRt = 12'h000; endcase `R1: case(isn[22:18]) `CNTLO,`CNTLZ,`CNTPOP,`ABS,`NOT,`NEG,`REDOR,`ZXB,`ZXC,`ZXH,`SXB,`SXC,`SXH: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]}; `MEMDB,`MEMSB,`SYNC: fnRt = 12'd0; default: fnRt = 12'd0; endcase `MUX: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]}; `MIN: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]}; `MAX: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]}; `LVX: fnRt = {vqei,1'b1,isn[`INSTRUCTION_RT]}; `SHIFTR: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]}; `SHIFT31,`SHIFT63: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]}; `SEI: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]}; `WAIT,`RTI,`CHK: fnRt = 12'd0; default: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]}; endcase `MEMNDX: begin if (!isn[31]) case({isn[31:28],isn[22:21]}) `LVX, `CACHEX, `LVBX,`LVBUX,`LVCX,`LVCUX,`LVHX,`LVHUX,`LVWX, `LBX,`LBUX,`LCX,`LCUX,`LHX,`LHUX,`LWX,`LWRX: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]}; default: fnRt = 12'd0; endcase else case({isn[31:28],isn[17:16]}) `PUSH: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]}; `SBX,`SCX,`SHX,`SWX,`SWCX,`CACHEX: fnRt = 12'd0; default: fnRt = 12'd0; endcase end `FLOAT: case(isn[31:26]) `FTX,`FCX,`FEX,`FDX,`FRM: fnRt = 12'd0; `FSYNC: fnRt = 12'd0; default: fnRt = {fp_rgs,1'b0,isn[`INSTRUCTION_RT]}; endcase `BRK: fnRt = 12'd0; `REX: fnRt = 12'd0; `CHK: fnRt = 12'd0; //`EXEC: fnRt = 12'd0; `Bcc: fnRt = 12'd0; `BLcc: fnRt = 12'd0; `BBc: fnRt = 12'd0; `NOP: fnRt = 12'd0; `BEQI: fnRt = 12'd0; `BNEI: fnRt = 12'd0; `SB,`Sx,`SWC,`CACHE: fnRt = 12'd0; `JMP: fnRt = 12'd0; `CALL: fnRt = {rgs,1'b0,5'd29}; // regLR `LV: fnRt = {vqei,1'b1,isn[`INSTRUCTION_RT]}; `AMO: fnRt = isn[31] ? {rgs,1'b0,isn[`INSTRUCTION_RT]} : {rgs,1'b0,isn[`INSTRUCTION_RT]}; `AUIPC,`LUI: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]}; default: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]}; endcase endfunction `endif // Determines which lanes of the target register get updated. // Duh, all the lanes. function [7:0] fnWe; input [47:0] isn; casez(isn[`INSTRUCTION_OP]) `R2: case(isn[`INSTRUCTION_S2]) `CMP: fnWe = 8'h00; default: fnWe = 8'hFF; endcase default: fnWe = 8'hFF; endcase /* casez(isn[`INSTRUCTION_OP]) `R2: case(isn[`INSTRUCTION_S2]) `R1: case(isn[22:18]) `ABS,`CNTLZ,`CNTLO,`CNTPOP: case(isn[25:23]) 3'b000: fnWe = 8'h01; 3'b001: fnWe = 8'h03; 3'b010: fnWe = 8'h0F; 3'b011: fnWe = 8'hFF; default: fnWe = 8'hFF; endcase default: fnWe = 8'hFF; endcase `SHIFT31: fnWe = (~isn[25] & isn[21]) ? 8'hFF : 8'hFF; `SHIFT63: fnWe = (~isn[25] & isn[21]) ? 8'hFF : 8'hFF; `SLT,`SLTU,`SLE,`SLEU, `ADD,`SUB, `AND,`OR,`XOR, `NAND,`NOR,`XNOR, `DIV,`DIVU,`DIVSU, `MOD,`MODU,`MODSU, `MUL,`MULU,`MULSU, `MULH,`MULUH,`MULSUH, `FXMUL: case(isn[25:23]) 3'b000: fnWe = 8'h01; 3'b001: fnWe = 8'h03; 3'b010: fnWe = 8'h0F; 3'b011: fnWe = 8'hFF; default: fnWe = 8'hFF; endcase default: fnWe = 8'hFF; endcase default: fnWe = 8'hFF; endcase */ endfunction // Detect if a source is automatically valid function Source1Valid; input [47:0] isn; casez(isn[`INSTRUCTION_OP]) `BRK: Source1Valid = isn[16] ? isn[`INSTRUCTION_RA]==5'd0 : TRUE; `Bcc: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; `BLcc: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; `BBc: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; `BEQI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; `BNEI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; `CHK: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; `RR: case(isn[`INSTRUCTION_S2]) `SHIFT31: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; `SHIFT63: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; `SHIFTR: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; default: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; endcase `MEMNDX:Source1Valid = isn[`INSTRUCTION_RA]==5'd0; `ADDI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; `SEQI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; `SLTI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; `SLTUI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; `SGTI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; `SGTUI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; `ANDI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; `ORI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; `XORI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; `XNORI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; `MULI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; `MULUI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; `MULFI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; `DIVI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; `DIVUI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; `AMO: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; `LB: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; `LBU: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; `Lx: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; `LxU: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; `LWR: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; `LV: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; `LVx: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; `LVxU: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; `SB: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; `Sx: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; `SWC: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; `SV: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; `PUSHC: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; `INC: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; `CAS: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; `CACHE: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; `JAL: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; `RET: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; `CSRRW: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; `BITFIELD: case(isn[47:44]) `BFINSI: Source1Valid = TRUE; default: Source1Valid = isn[`INSTRUCTION_RA]==5'd0 || isn[30]==1'b0; endcase `IVECTOR: Source1Valid = FALSE; default: Source1Valid = TRUE; endcase endfunction function Source2Valid; input [47:0] isn; casez(isn[`INSTRUCTION_OP]) `BRK: Source2Valid = TRUE; `Bcc: Source2Valid = isn[`INSTRUCTION_RB]==5'd0; `BLcc: Source2Valid = isn[`INSTRUCTION_RB]==5'd0; `BBc: Source2Valid = TRUE; `BEQI: Source2Valid = TRUE; `BNEI: Source2Valid = TRUE; `CHK: Source2Valid = isn[`INSTRUCTION_RB]==5'd0; `R2: casez(isn[`INSTRUCTION_S2]) `TLB: Source2Valid = TRUE; `R1: Source2Valid = TRUE; `MOV: Source2Valid = TRUE; `SHIFT31: Source2Valid = TRUE; `SHIFT63: Source2Valid = TRUE; `LVX,`SVX: Source2Valid = FALSE; default: Source2Valid = isn[`INSTRUCTION_RB]==5'd0; endcase `MEMNDX: begin if (!isn[31]) case({isn[31:28],isn[22:21]}) `LVX: Source2Valid = FALSE; `CACHEX, `LVBX,`LVBUX,`LVCX,`LVCUX,`LVHX,`LVHUX,`LVWX, `LBX,`LBUX,`LCX,`LCUX,`LHX,`LHUX,`LWX,`LWRX: Source2Valid = TRUE; default: Source2Valid = isn[`INSTRUCTION_RB]==5'd0; endcase else case({isn[31:28],isn[17:16]}) `SVX: Source2Valid = FALSE; default: Source2Valid = isn[`INSTRUCTION_RB]==5'd0; endcase end `ADDI: Source2Valid = TRUE; `SEQI: Source2Valid = TRUE; `SLTI: Source2Valid = TRUE; `SLTUI: Source2Valid = TRUE; `SGTI: Source2Valid = TRUE; `SGTUI: Source2Valid = TRUE; `ANDI: Source2Valid = TRUE; `ORI: Source2Valid = TRUE; `XORI: Source2Valid = TRUE; `XNORI: Source2Valid = TRUE; `MULUI: Source2Valid = TRUE; `MULFI: Source2Valid = TRUE; `LB: Source2Valid = TRUE; `LBU: Source2Valid = TRUE; `Lx: Source2Valid = TRUE; `LxU: Source2Valid = TRUE; `LWR: Source2Valid = TRUE; `LVx: Source2Valid = TRUE; `LVxU: Source2Valid = TRUE; `INC: Source2Valid = TRUE; `SB: Source2Valid = isn[`INSTRUCTION_RB]==5'd0; `Sx: Source2Valid = isn[`INSTRUCTION_RB]==5'd0; `SWC: Source2Valid = isn[`INSTRUCTION_RB]==5'd0; `PUSHC: Source2Valid = TRUE; `CAS: Source2Valid = isn[`INSTRUCTION_RB]==5'd0; `JAL: Source2Valid = TRUE; `RET: Source2Valid = isn[`INSTRUCTION_RB]==5'd0; `IVECTOR: case(isn[`INSTRUCTION_S2]) `VABS: Source2Valid = TRUE; `VMAND,`VMOR,`VMXOR,`VMXNOR,`VMPOP: Source2Valid = FALSE; `VADDS,`VSUBS,`VANDS,`VORS,`VXORS: Source2Valid = isn[`INSTRUCTION_RB]==5'd0; `VBITS2V: Source2Valid = TRUE; `V2BITS: Source2Valid = isn[`INSTRUCTION_RB]==5'd0; `VSHL,`VSHR,`VASR: Source2Valid = isn[22:21]==2'd2; default: Source2Valid = FALSE; endcase `LV: Source2Valid = TRUE; `SV: Source2Valid = FALSE; `AMO: Source2Valid = isn[31] || isn[`INSTRUCTION_RB]==5'd0; `BITFIELD: Source2Valid = isn[`INSTRUCTION_RB]==5'd0 || isn[31]==1'b0; default: Source2Valid = TRUE; endcase endfunction function Source3Valid; input [47:0] isn; case(isn[`INSTRUCTION_OP]) `IVECTOR: case(isn[`INSTRUCTION_S2]) `VEX: Source3Valid = TRUE; default: Source3Valid = TRUE; endcase `CHK: Source3Valid = isn[`INSTRUCTION_RC]==5'd0; `R2: if (isn[`INSTRUCTION_L2]==2'b01) case(isn[47:42]) `CMOVEZ,`CMOVNZ: Source3Valid = isn[`INSTRUCTION_RC]==5'd0; default: Source3Valid = TRUE; endcase else case(isn[`INSTRUCTION_S2]) `MAJ: Source3Valid = isn[`INSTRUCTION_RC]==5'd0; default: Source3Valid = TRUE; endcase `MEMNDX: if (!isn[31]) case({isn[31:28],isn[22:21]}) `CACHEX, `LVBX,`LVBUX,`LVCX,`LVCUX,`LVHX,`LVHUX,`LVWX, `LBX,`LBUX,`LCX,`LCUX,`LHX,`LHUX,`LWX,`LWRX: Source3Valid = isn[`INSTRUCTION_RC]==5'd0; default: Source3Valid = TRUE; endcase else case({isn[31:28],isn[17:16]}) `PUSH: Source3Valid = TRUE; `SBX: Source3Valid = isn[`INSTRUCTION_RC]==5'd0; `SCX: Source3Valid = isn[`INSTRUCTION_RC]==5'd0; `SHX: Source3Valid = isn[`INSTRUCTION_RC]==5'd0; `SWX: Source3Valid = isn[`INSTRUCTION_RC]==5'd0; `SWCX: Source3Valid = isn[`INSTRUCTION_RC]==5'd0; `CASX: Source3Valid = isn[`INSTRUCTION_RC]==5'd0; default: Source3Valid = TRUE; endcase `BITFIELD: Source3Valid = isn[`INSTRUCTION_RC]==5'd0 || isn[32]==1'b0; default: Source3Valid = TRUE; endcase endfunction // Used to indicate to the queue logic that the instruction needs to be // recycled to the queue VL number of times. function IsVector; input [47:0] isn; case(isn[`INSTRUCTION_OP]) `MEMNDX: case(isn[`INSTRUCTION_S2]) `LVX,`SVX: IsVector = TRUE; default: IsVector = FALSE; endcase `IVECTOR: case(isn[`INSTRUCTION_S2]) `VMxx: case(isn[25:23]) `VMAND,`VMOR,`VMXOR,`VMXNOR,`VMPOP: IsVector = FALSE; default: IsVector = TRUE; endcase `VEINS: IsVector = FALSE; `VEX: IsVector = FALSE; default: IsVector = TRUE; endcase `LV,`SV: IsVector = TRUE; default: IsVector = FALSE; endcase endfunction function IsVeins; input [47:0] isn; case(isn[`INSTRUCTION_OP]) `IVECTOR: IsVeins = isn[`INSTRUCTION_S2]==`VEINS; default: IsVeins = FALSE; endcase endfunction function IsVex; input [47:0] isn; case(isn[`INSTRUCTION_OP]) `IVECTOR: IsVex = isn[`INSTRUCTION_S2]==`VEX; default: IsVex = FALSE; endcase endfunction function IsVCmprss; input [47:0] isn; case(isn[`INSTRUCTION_OP]) `IVECTOR: IsVCmprss = isn[`INSTRUCTION_S2]==`VCMPRSS || isn[`INSTRUCTION_S2]==`VCIDX; default: IsVCmprss = FALSE; endcase endfunction function IsVShifti; input [47:0] isn; case(isn[`INSTRUCTION_OP]) `IVECTOR: case(isn[`INSTRUCTION_S2]) `VSHL,`VSHR,`VASR: IsVShifti = {isn[25],isn[22]}==2'd2; default: IsVShifti = FALSE; endcase default: IsVShifti = FALSE; endcase endfunction function IsVLS; input [47:0] isn; case(isn[`INSTRUCTION_OP]) `MEMNDX: if (IsLoad(isn)) case({isn[31:28],isn[22:21]}) `LVX,`LVWS: IsVLS = TRUE; default: IsVLS = FALSE; endcase else case({isn[31:28],isn[17:16]}) `SVX,`SVWS: IsVLS = TRUE; default: IsVLS = FALSE; endcase `LV,`SV: IsVLS = TRUE; default: IsVLS = FALSE; endcase endfunction function [1:0] fnM2; input [31:0] isn; case(isn[`INSTRUCTION_OP]) `RR: fnM2 = isn[24:23]; default: fnM2 = 2'b00; endcase endfunction function IsMem; input [47:0] isn; case(isn[`INSTRUCTION_OP]) `MEMNDX: IsMem = TRUE; `AMO: IsMem = TRUE; `LB: IsMem = TRUE; `LBU: IsMem = TRUE; `Lx: IsMem = TRUE; `LxU: IsMem = TRUE; `LWR: IsMem = TRUE; `LV,`SV: IsMem = TRUE; `INC: IsMem = TRUE; `SB: IsMem = TRUE; `Sx: IsMem = TRUE; `SWC: IsMem = TRUE; `PUSHC: IsMem = TRUE; `CAS: IsMem = TRUE; `LVx: IsMem = TRUE; `LVxU: IsMem = TRUE; default: IsMem = FALSE; endcase endfunction function IsMemNdx; input [47:0] isn; case(isn[`INSTRUCTION_OP]) `MEMNDX: IsMemNdx = TRUE; default: IsMemNdx = FALSE; endcase endfunction function IsLoad; input [47:0] isn; case(isn[`INSTRUCTION_OP]) `MEMNDX: IsLoad = !isn[31]; `LB: IsLoad = TRUE; `LBU: IsLoad = TRUE; `Lx: IsLoad = TRUE; `LxU: IsLoad = TRUE; `LWR: IsLoad = TRUE; `LV: IsLoad = TRUE; `LVx: IsLoad = TRUE; `LVxU: IsLoad = TRUE; default: IsLoad = FALSE; endcase endfunction function IsInc; input [47:0] isn; case(isn[`INSTRUCTION_OP]) `MEMNDX: if (isn[`INSTRUCTION_L2]==2'b00) case({isn[31:28],isn[17:16]}) `INC: IsInc = TRUE; default: IsInc = FALSE; endcase else IsInc = FALSE; `INC: IsInc = TRUE; default: IsInc = FALSE; endcase endfunction function IsSWC; input [47:0] isn; case(isn[`INSTRUCTION_OP]) `MEMNDX: if (isn[`INSTRUCTION_L2]==2'b00) case({isn[31:28],isn[17:16]}) `SWCX: IsSWC = TRUE; default: IsSWC = FALSE; endcase else IsSWC = FALSE; `SWC: IsSWC = TRUE; default: IsSWC = FALSE; endcase endfunction // Aquire / release bits are only available on indexed SWC / LWR function IsSWCX; input [47:0] isn; case(isn[`INSTRUCTION_OP]) `MEMNDX: if (isn[`INSTRUCTION_L2]==2'b00) case({isn[31:28],isn[17:16]}) `SWCX: IsSWCX = TRUE; default: IsSWCX = FALSE; endcase else IsSWCX = FALSE; default: IsSWCX = FALSE; endcase endfunction function IsLWR; input [47:0] isn; case(isn[`INSTRUCTION_OP]) `MEMNDX: if (isn[`INSTRUCTION_L2]==2'b00) case({isn[31:28],isn[22:21]}) `LWRX: IsLWR = TRUE; default: IsLWR = FALSE; endcase else IsLWR = FALSE; `LWR: IsLWR = TRUE; default: IsLWR = FALSE; endcase endfunction function IsLWRX; input [47:0] isn; case(isn[`INSTRUCTION_OP]) `MEMNDX: if (isn[`INSTRUCTION_L2]==2'b00) case({isn[31:28],isn[22:21]}) `LWRX: IsLWRX = TRUE; default: IsLWRX = FALSE; endcase else IsLWRX = FALSE; default: IsLWRX = FALSE; endcase endfunction function IsCAS; input [47:0] isn; case(isn[`INSTRUCTION_OP]) `MEMNDX: if (isn[`INSTRUCTION_L2]==2'b00) case({isn[31:28],isn[17:16]}) `CASX: IsCAS = TRUE; default: IsCAS = FALSE; endcase else IsCAS = FALSE; `CAS: IsCAS = TRUE; default: IsCAS = FALSE; endcase endfunction function IsAMO; input [47:0] isn; case(isn[`INSTRUCTION_OP]) `AMO: IsAMO = TRUE; default: IsAMO = FALSE; endcase endfunction // Really IsPredictableBranch // Does not include BccR's function IsBranch; input [47:0] isn; casez(isn[`INSTRUCTION_OP]) `Bcc: IsBranch = TRUE; `BLcc: IsBranch = TRUE; `BBc: IsBranch = TRUE; `BEQI: IsBranch = TRUE; `BNEI: IsBranch = TRUE; `CHK: IsBranch = TRUE; default: IsBranch = FALSE; endcase endfunction function IsWait; input [47:0] isn; IsWait = isn[`INSTRUCTION_OP]==`R2 && isn[`INSTRUCTION_L2]==2'b00 && isn[`INSTRUCTION_S2]==`WAIT; endfunction function IsCall; input [47:0] isn; IsCall = isn[`INSTRUCTION_OP]==`CALL && isn[7]==1'b0; endfunction function IsJmp; input [47:0] isn; IsJmp = isn[`INSTRUCTION_OP]==`JMP && isn[7]==1'b0; endfunction function IsFlowCtrl; input [47:0] isn; casez(isn[`INSTRUCTION_OP]) `BRK: IsFlowCtrl = TRUE; `R2: case(isn[`INSTRUCTION_S2]) `RTI: IsFlowCtrl = TRUE; default: IsFlowCtrl = FALSE; endcase `Bcc: IsFlowCtrl = TRUE; `BLcc: IsFlowCtrl = TRUE; `BBc: IsFlowCtrl = TRUE; `BEQI: IsFlowCtrl = TRUE; `BNEI: IsFlowCtrl = TRUE; `CHK: IsFlowCtrl = TRUE; `JAL: IsFlowCtrl = TRUE; `JMP: IsFlowCtrl = TRUE; `CALL: IsFlowCtrl = TRUE; `RET: IsFlowCtrl = TRUE; default: IsFlowCtrl = FALSE; endcase endfunction function IsCache; input [47:0] isn; case(isn[`INSTRUCTION_OP]) `MEMNDX: if (isn[`INSTRUCTION_L2]==2'b00) case({isn[31:28],isn[22:21]}) `CACHEX: IsCache = TRUE; default: IsCache = FALSE; endcase else IsCache = FALSE; `CACHE: IsCache = TRUE; default: IsCache = FALSE; endcase endfunction function [4:0] CacheCmd; input [47:0] isn; case(isn[`INSTRUCTION_OP]) `MEMNDX: if (isn[`INSTRUCTION_L2]==2'b00) case({isn[31:28],isn[22:21]}) `CACHEX: CacheCmd = isn[17:13]; default: CacheCmd = 5'd0; endcase else CacheCmd = 5'd0; `CACHE: CacheCmd = isn[17:13]; default: CacheCmd = 5'd0; endcase endfunction function IsMemsb; input [47:0] isn; IsMemsb = (isn[`INSTRUCTION_OP]==`RR && isn[`INSTRUCTION_L2]==2'b00 && isn[`INSTRUCTION_S2]==`R1 && isn[22:18]==`MEMSB); endfunction function IsSEI; input [47:0] isn; IsSEI = (isn[`INSTRUCTION_OP]==`R2 && isn[`INSTRUCTION_L2]==2'b00 && isn[`INSTRUCTION_S2]==`SEI); endfunction function IsLV; input [47:0] isn; case(isn[`INSTRUCTION_OP]) `MEMNDX: if (isn[`INSTRUCTION_L2]==2'b00) case({isn[31:28],isn[22:21]}) `LVX: IsLV = TRUE; default: IsLV = FALSE; endcase else IsLV = FALSE; `LV: IsLV = TRUE; default: IsLV = FALSE; endcase endfunction function IsRet; input [47:0] isn; IsRet = isn[`INSTRUCTION_OP]==`RET; endfunction function IsRFW; input [47:0] isn; input [5:0] vqei; input [5:0] vli; input thrd; if (fnRt(isn,vqei,vli,thrd)==12'd0) IsRFW = FALSE; else casez(isn[`INSTRUCTION_OP]) `IVECTOR: IsRFW = TRUE; `FVECTOR: IsRFW = TRUE; `R2: if (isn[`INSTRUCTION_L2]==2'b00) casez(isn[`INSTRUCTION_S2]) `TLB: IsRFW = TRUE; `R1: case(isn[22:18]) `MEMDB,`MEMSB,`SYNC,`SETWB,5'h14,5'h15: IsRFW = FALSE; default: IsRFW = TRUE; endcase `ADD: IsRFW = TRUE; `SUB: IsRFW = TRUE; `SEQ: IsRFW = TRUE; `SLT: IsRFW = TRUE; `SLTU: IsRFW = TRUE; `SLE: IsRFW = TRUE; `SLEU: IsRFW = TRUE; `AND: IsRFW = TRUE; `OR: IsRFW = TRUE; `XOR: IsRFW = TRUE; `NAND: IsRFW = TRUE; `NOR: IsRFW = TRUE; `XNOR: IsRFW = TRUE; `MULU: IsRFW = TRUE; `MULSU: IsRFW = TRUE; `MUL: IsRFW = TRUE; `MULUH: IsRFW = TRUE; `MULSUH: IsRFW = TRUE; `MULH: IsRFW = TRUE; `MULF: IsRFW = TRUE; `FXMUL: IsRFW = TRUE; `DIVU: IsRFW = TRUE; `DIVSU: IsRFW = TRUE; `DIV:IsRFW = TRUE; `MODU: IsRFW = TRUE; `MODSU: IsRFW = TRUE; `MOD:IsRFW = TRUE; `MOV: IsRFW = TRUE; `VMOV: IsRFW = TRUE; `SHIFTR,`SHIFT31,`SHIFT63: IsRFW = TRUE; `MIN,`MAX: IsRFW = TRUE; `SEI: IsRFW = TRUE; default: IsRFW = FALSE; endcase else if (isn[`INSTRUCTION_L2]==2'b01) case(isn[47:42]) `CMOVEZ: IsRFW = TRUE; `CMOVNZ: IsRFW = TRUE; default: IsRFW = FALSE; endcase else if (isn[7]==1'b1) // The following instructions might come from a compressed version. casez(isn[`INSTRUCTION_S2]) `ADD: IsRFW = TRUE; `SUB: IsRFW = TRUE; `AND: IsRFW = TRUE; `OR: IsRFW = TRUE; `XOR: IsRFW = TRUE; `MOV: IsRFW = TRUE; `SHIFTR,`SHIFT31,`SHIFT63: IsRFW = TRUE; default: IsRFW = FALSE; endcase else IsRFW = FALSE; `MEMNDX: if (isn[`INSTRUCTION_L2]==2'b10) begin if (!isn[31]) IsRFW = TRUE; else case({isn[31:28],isn[17:16]}) `PUSH: IsRFW = TRUE; `CASX: IsRFW = TRUE; default: IsRFW = FALSE; endcase end else if (isn[`INSTRUCTION_L2]==2'b00) begin if (!isn[31]) case({isn[31:28],isn[22:21]}) `LBX: IsRFW = TRUE; `LBUX: IsRFW = TRUE; `LCX: IsRFW = TRUE; `LCUX: IsRFW = TRUE; `LHX: IsRFW = TRUE; `LHUX: IsRFW = TRUE; `LWX: IsRFW = TRUE; `LVBX: IsRFW = TRUE; `LVBUX: IsRFW = TRUE; `LVCX: IsRFW = TRUE; `LVCUX: IsRFW = TRUE; `LVHX: IsRFW = TRUE; `LVHUX: IsRFW = TRUE; `LVWX: IsRFW = TRUE; `LWRX: IsRFW = TRUE; `LVX: IsRFW = TRUE; default: IsRFW = FALSE; endcase else case({isn[31:28],isn[17:16]}) `PUSH: IsRFW = TRUE; `CASX: IsRFW = TRUE; default: IsRFW = FALSE; endcase end else IsRFW = FALSE; `BBc: IsRFW = FALSE; `BITFIELD: IsRFW = TRUE; `ADDI: IsRFW = TRUE; `SEQI: IsRFW = TRUE; `SLTI: IsRFW = TRUE; `SLTUI: IsRFW = TRUE; `SGTI: IsRFW = TRUE; `SGTUI: IsRFW = TRUE; `ANDI: IsRFW = TRUE; `ORI: IsRFW = TRUE; `XORI: IsRFW = TRUE; `XNORI: IsRFW = TRUE; `MULUI: IsRFW = TRUE; `MULI: IsRFW = TRUE; `MULFI: IsRFW = TRUE; `DIVUI: IsRFW = TRUE; `DIVI: IsRFW = TRUE; `MODI: IsRFW = TRUE; `JAL: IsRFW = TRUE; `CALL: IsRFW = TRUE; `RET: IsRFW = TRUE; `LB: IsRFW = TRUE; `LBU: IsRFW = TRUE; `Lx: IsRFW = TRUE; `LxU: IsRFW = TRUE; `LWR: IsRFW = TRUE; `LV: IsRFW = TRUE; `LVx: IsRFW = TRUE; `LVxU: IsRFW = TRUE; `PUSHC: IsRFW = TRUE; `CAS: IsRFW = TRUE; `AMO: IsRFW = TRUE; `CSRRW: IsRFW = TRUE; `AUIPC: IsRFW = TRUE; `LUI: IsRFW = TRUE; default: IsRFW = FALSE; endcase endfunction function IsShifti; input [47:0] isn; case(isn[`INSTRUCTION_OP]) `R2: if (isn[`INSTRUCTION_L2]==2'b00) case(isn[`INSTRUCTION_S2]) `SHIFT31,`SHIFT63: IsShifti = TRUE; default: IsShifti = FALSE; endcase else IsShifti = FALSE; default: IsShifti = FALSE; endcase endfunction function IsShift; input [47:0] isn; case(isn[`INSTRUCTION_OP]) `R2: if (isn[`INSTRUCTION_L2]==2'b00) case(isn[31:26]) `SHIFTR: IsShift = TRUE; `SHIFT31: IsShift = TRUE; `SHIFT63: IsShift = TRUE; default: IsShift = FALSE; endcase else IsShift = FALSE; default: IsShift = FALSE; endcase endfunction function IsShift48; input [47:0] isn; case(isn[`INSTRUCTION_OP]) `R2: if (isn[`INSTRUCTION_L2]==2'b01) case(isn[47:42]) `SHIFTR: IsShift48 = TRUE; default: IsShift48 = FALSE; endcase else IsShift48 = FALSE; default: IsShift48 = FALSE; endcase endfunction function IsRtop; input [47:0] isn; case(isn[`INSTRUCTION_OP]) `R2: if (isn[`INSTRUCTION_L2]==2'b01) case(isn[47:42]) `RTOP: IsRtop = TRUE; default: IsRtop = FALSE; endcase else IsRtop = FALSE; default: IsRtop = FALSE; endcase endfunction function IsMul; input [47:0] isn; case(isn[`INSTRUCTION_OP]) `R2: if (isn[`INSTRUCTION_L2]==2'b00) case(isn[`INSTRUCTION_S2]) `MULU,`MULSU,`MUL: IsMul = TRUE; `MULUH,`MULSUH,`MULH: IsMul = TRUE; default: IsMul = FALSE; endcase else IsMul = FALSE; `MULUI,`MULI: IsMul = TRUE; default: IsMul = FALSE; endcase endfunction function IsDivmod; input [47:0] isn; case(isn[`INSTRUCTION_OP]) `R2: if (isn[`INSTRUCTION_L2]==2'b00) case(isn[`INSTRUCTION_S2]) `DIVU,`DIVSU,`DIV: IsDivmod = TRUE; `MODU,`MODSU,`MOD: IsDivmod = TRUE; default: IsDivmod = FALSE; endcase else IsDivmod = FALSE; `DIVUI,`DIVI,`MODI: IsDivmod = TRUE; default: IsDivmod = FALSE; endcase endfunction function [7:0] fnSelect; input [47:0] ins; input [`ABITS] adr; begin case(ins[`INSTRUCTION_OP]) `MEMNDX: if (ins[`INSTRUCTION_L2]==2'b10) begin if (ins[31]) begin case({ins[31:28],ins[17:16]}) `PUSH: fnSelect = 8'hFF; default: fnSelect = 8'h00; endcase end else fnSelect = 8'h00; end else if (ins[`INSTRUCTION_L2]==2'b00) begin if (!ins[31]) case({ins[31:28],ins[22:21]}) `LBX,`LBUX,`LVBX,`LVBUX: case(adr[2:0]) 3'd0: fnSelect = 8'h01; 3'd1: fnSelect = 8'h02; 3'd2: fnSelect = 8'h04; 3'd3: fnSelect = 8'h08; 3'd4: fnSelect = 8'h10; 3'd5: fnSelect = 8'h20; 3'd6: fnSelect = 8'h40; 3'd7: fnSelect = 8'h80; endcase `LCX,`LCUX,`LVCX,`LVCUX: case(adr[2:1]) 2'd0: fnSelect = 8'h03; 2'd1: fnSelect = 8'h0C; 2'd2: fnSelect = 8'h30; 2'd3: fnSelect = 8'hC0; endcase `LHX,`LHUX,`LVHX,`LVHUX: case(adr[2]) 1'b0: fnSelect = 8'h0F; 1'b1: fnSelect = 8'hF0; endcase `INC,`LVWX, `LWX,`LWRX,`LVX: fnSelect = 8'hFF; default:fnSelect = 8'hFF; endcase else case({ins[31:28],ins[17:16]}) `SBX: case(adr[2:0]) 3'd0: fnSelect = 8'h01; 3'd1: fnSelect = 8'h02; 3'd2: fnSelect = 8'h04; 3'd3: fnSelect = 8'h08; 3'd4: fnSelect = 8'h10; 3'd5: fnSelect = 8'h20; 3'd6: fnSelect = 8'h40; 3'd7: fnSelect = 8'h80; endcase `SCX: case(adr[2:1]) 2'd0: fnSelect = 8'h03; 2'd1: fnSelect = 8'h0C; 2'd2: fnSelect = 8'h30; 2'd3: fnSelect = 8'hC0; endcase `SHX: case(adr[2]) 1'b0: fnSelect = 8'h0F; 1'b1: fnSelect = 8'hF0; endcase `INC, `SWX,`SWCX,`SVX,`CASX,`PUSH: fnSelect = 8'hFF; default: fnSelect = 8'h00; endcase end else fnSelect = 8'h00; `LB,`LBU,`SB: case(adr[2:0]) 3'd0: fnSelect = 8'h01; 3'd1: fnSelect = 8'h02; 3'd2: fnSelect = 8'h04; 3'd3: fnSelect = 8'h08; 3'd4: fnSelect = 8'h10; 3'd5: fnSelect = 8'h20; 3'd6: fnSelect = 8'h40; 3'd7: fnSelect = 8'h80; endcase `Lx,`LxU,`LVx,`LVxU: casez(ins[20:18]) 3'b100: fnSelect = 8'hFF; 3'b?10: fnSelect = adr[2] ? 8'hF0 : 8'h0F; 3'b??1: case(adr[2:1]) 2'd0: fnSelect = 8'h03; 2'd1: fnSelect = 8'h0C; 2'd2: fnSelect = 8'h30; 2'd3: fnSelect = 8'hC0; endcase default: fnSelect = 8'h00; endcase `Sx: casez(ins[15:13]) 3'b100: fnSelect = 8'hFF; 3'b?10: fnSelect = adr[2] ? 8'hF0 : 8'h0F; 3'b??1: case(adr[2:1]) 2'd0: fnSelect = 8'h03; 2'd1: fnSelect = 8'h0C; 2'd2: fnSelect = 8'h30; 2'd3: fnSelect = 8'hC0; endcase default: fnSelect = 8'h00; endcase `PUSHC, `INC, `LWR,`SWC,`CAS: fnSelect = 8'hFF; `LV,`SV: fnSelect = 8'hFF; `AMO: case(ins[23:21]) 3'd0: fnSelect = {8'h01 << adr[2:0]}; 3'd1: fnSelect = {8'h03 << {adr[2:1],1'b0}}; 3'd2: fnSelect = {8'h0F << {adr[2],2'b00}}; 3'd3: fnSelect = 8'hFF; default: fnSelect = 8'hFF; endcase default: fnSelect = 8'h00; endcase end endfunction /* function [63:0] fnDatc; input [47:0] ins; input [63:0] dat; case(ins[`INSTRUCTION_OP]) `R2: if (isn[`INSTRUCTION_L2]==2'b01) case(ins[47:42]) `FINDB: fnDatc = dat[7:0]; `FINDC: fnDatc = dat[15:0]; `FINDH: fnDatc = dat[31:0]; `FINDW: fnDatc = dat[63:0]; default: fnDatc = dat[63:0]; endcase else fnDatc = dat[63:0]; default: fnDatc = dat[63:0]; endcase endfunction */ /* function [63:0] fnMemInc; input [47:0] ins; case(ins[`INSTRUCTION_OP]) `R2: if (isn[`INSTRUCTION_L2]==2'b01) case(ins[47:42]) `FINDB: fnMemInc = 32'd1; `FINDC: fnMemInc = 32'd2; `FINDH: fnMemInc = 32'd4; `FINDW: fnMemInc = 32'd8; default: fnMemInc = 32'd8; endcase else fnMemInc = 32'd8; default: fnMemInc = 32'd8; endcase endfunction */ function [63:0] fnDatiAlign; input [47:0] ins; input [`ABITS] adr; input [63:0] dat; case(ins[`INSTRUCTION_OP]) `MEMNDX: if (ins[`INSTRUCTION_L2]==2'b00) case({ins[31:28],ins[22:21]}) `LBX,`LVBX: case(adr[2:0]) 3'd0: fnDatiAlign = {{56{dat[7]}},dat[7:0]}; 3'd1: fnDatiAlign = {{56{dat[15]}},dat[15:8]}; 3'd2: fnDatiAlign = {{56{dat[23]}},dat[23:16]}; 3'd3: fnDatiAlign = {{56{dat[31]}},dat[31:24]}; 3'd4: fnDatiAlign = {{56{dat[39]}},dat[39:32]}; 3'd5: fnDatiAlign = {{56{dat[47]}},dat[47:40]}; 3'd6: fnDatiAlign = {{56{dat[55]}},dat[55:48]}; 3'd7: fnDatiAlign = {{56{dat[63]}},dat[63:56]}; endcase `LBUX,`LVBUX: case(adr[2:0]) 3'd0: fnDatiAlign = {{56{1'b0}},dat[7:0]}; 3'd1: fnDatiAlign = {{56{1'b0}},dat[15:8]}; 3'd2: fnDatiAlign = {{56{1'b0}},dat[23:16]}; 3'd3: fnDatiAlign = {{56{1'b0}},dat[31:24]}; 3'd4: fnDatiAlign = {{56{1'b0}},dat[39:32]}; 3'd5: fnDatiAlign = {{56{1'b0}},dat[47:40]}; 3'd6: fnDatiAlign = {{56{1'b0}},dat[55:48]}; 3'd7: fnDatiAlign = {{56{2'b0}},dat[63:56]}; endcase `LCX,`LVCX: case(adr[2:1]) 2'd0: fnDatiAlign = {{48{dat[15]}},dat[15:0]}; 2'd1: fnDatiAlign = {{48{dat[31]}},dat[31:16]}; 2'd2: fnDatiAlign = {{48{dat[47]}},dat[47:32]}; 2'd3: fnDatiAlign = {{48{dat[63]}},dat[63:48]}; endcase `LCUX,`LVCUX: case(adr[2:1]) 2'd0: fnDatiAlign = {{48{1'b0}},dat[15:0]}; 2'd1: fnDatiAlign = {{48{1'b0}},dat[31:16]}; 2'd2: fnDatiAlign = {{48{1'b0}},dat[47:32]}; 2'd3: fnDatiAlign = {{48{1'b0}},dat[63:48]}; endcase `LHX,`LVHX: case(adr[2]) 1'b0: fnDatiAlign = {{32{dat[31]}},dat[31:0]}; 1'b1: fnDatiAlign = {{32{dat[63]}},dat[63:32]}; endcase `LHUX,`LVHUX: case(adr[2]) 1'b0: fnDatiAlign = {{32{1'b0}},dat[31:0]}; 1'b1: fnDatiAlign = {{32{1'b0}},dat[63:32]}; endcase `LWX,`LWRX,`LVX,`CAS,`LVWX: fnDatiAlign = dat; default: fnDatiAlign = dat; endcase else fnDatiAlign = dat; `LB: case(adr[2:0]) 3'd0: fnDatiAlign = {{56{dat[7]}},dat[7:0]}; 3'd1: fnDatiAlign = {{56{dat[15]}},dat[15:8]}; 3'd2: fnDatiAlign = {{56{dat[23]}},dat[23:16]}; 3'd3: fnDatiAlign = {{56{dat[31]}},dat[31:24]}; 3'd4: fnDatiAlign = {{56{dat[39]}},dat[39:32]}; 3'd5: fnDatiAlign = {{56{dat[47]}},dat[47:40]}; 3'd6: fnDatiAlign = {{56{dat[55]}},dat[55:48]}; 3'd7: fnDatiAlign = {{56{dat[63]}},dat[63:56]}; endcase `LBU: case(adr[2:0]) 3'd0: fnDatiAlign = {{56{1'b0}},dat[7:0]}; 3'd1: fnDatiAlign = {{56{1'b0}},dat[15:8]}; 3'd2: fnDatiAlign = {{56{1'b0}},dat[23:16]}; 3'd3: fnDatiAlign = {{56{1'b0}},dat[31:24]}; 3'd4: fnDatiAlign = {{56{1'b0}},dat[39:32]}; 3'd5: fnDatiAlign = {{56{1'b0}},dat[47:40]}; 3'd6: fnDatiAlign = {{56{1'b0}},dat[55:48]}; 3'd7: fnDatiAlign = {{56{2'b0}},dat[63:56]}; endcase `Lx,`LVx: casez(ins[20:18]) 3'b100: fnDatiAlign = dat; 3'b?10: case(adr[2]) 1'b0: fnDatiAlign = {{32{dat[31]}},dat[31:0]}; 1'b1: fnDatiAlign = {{32{dat[63]}},dat[63:32]}; endcase 3'b??1: case(adr[2:1]) 2'd0: fnDatiAlign = {{48{dat[15]}},dat[15:0]}; 2'd1: fnDatiAlign = {{48{dat[31]}},dat[31:16]}; 2'd2: fnDatiAlign = {{48{dat[47]}},dat[47:32]}; 2'd3: fnDatiAlign = {{48{dat[63]}},dat[63:48]}; endcase default: fnDatiAlign = dat; endcase `LxU,`LVxU: casez(ins[20:18]) 3'b100: fnDatiAlign = dat; 3'b?10: case(adr[2]) 1'b0: fnDatiAlign = {{32{1'b0}},dat[31:0]}; 1'b1: fnDatiAlign = {{32{1'b0}},dat[63:32]}; endcase 3'b??1: case(adr[2:1]) 2'd0: fnDatiAlign = {{48{1'b0}},dat[15:0]}; 2'd1: fnDatiAlign = {{48{1'b0}},dat[31:16]}; 2'd2: fnDatiAlign = {{48{1'b0}},dat[47:32]}; 2'd3: fnDatiAlign = {{48{1'b0}},dat[63:48]}; endcase default: fnDatiAlign = dat; endcase `LWR,`LV,`CAS,`AMO: fnDatiAlign = dat; default: fnDatiAlign = dat; endcase endfunction function [63:0] fnDato; input [47:0] isn; input [63:0] dat; case(isn[`INSTRUCTION_OP]) `MEMNDX: if (isn[`INSTRUCTION_L2]==2'b00) case({isn[31:28],isn[17:16]}) `SBX: fnDato = {8{dat[7:0]}}; `SCX: fnDato = {4{dat[15:0]}}; `SHX: fnDato = {2{dat[31:0]}}; default: fnDato = dat; endcase else fnDato = dat; `SB: fnDato = {8{dat[7:0]}}; `Sx: casez(isn[15:13]) 3'b100: fnDato = dat; 3'b?10: fnDato = {2{dat[31:0]}}; 3'b??1: fnDato = {4{dat[15:0]}}; default: fnDato = dat; endcase `AMO: case(isn[23:21]) 3'd0: fnDato = {8{dat[7:0]}}; 3'd1: fnDato = {4{dat[15:0]}}; 3'd2: fnDato = {2{dat[31:0]}}; 3'd3: fnDato = dat; default: fnDato = dat; endcase default: fnDato = dat; endcase endfunction function IsTLB; input [47:0] isn; case(isn[`INSTRUCTION_OP]) `R2: case(isn[`INSTRUCTION_S2]) `TLB: IsTLB = TRUE; default: IsTLB = FALSE; endcase default: IsTLB = FALSE; endcase endfunction // Indicate if the ALU instruction is valid immediately (single cycle operation) function IsSingleCycle; input [47:0] isn; IsSingleCycle = !(IsMul(isn)|IsDivmod(isn)|IsTLB(isn)|IsShift48(isn)); endfunction generate begin : gDecocderInst for (g = 0; g < QENTRIES; g = g + 1) begin `ifdef SUPPORT_SMT decoder8 iq0(.num({iqentry_tgt[g][8:7],iqentry_tgt[g][5:0]}), .out(iq_out[g])); `else decoder7 iq0(.num({iqentry_tgt[g][7],iqentry_tgt[g][5:0]}), .out(iq_out[g])); `endif end end endgenerate initial begin: Init // // // set up panic messages message[ `PANIC_NONE ] = "NONE "; message[ `PANIC_FETCHBUFBEQ ] = "FETCHBUFBEQ "; message[ `PANIC_INVALIDISLOT ] = "INVALIDISLOT "; message[ `PANIC_IDENTICALDRAMS ] = "IDENTICALDRAMS "; message[ `PANIC_OVERRUN ] = "OVERRUN "; message[ `PANIC_HALTINSTRUCTION ] = "HALTINSTRUCTION "; message[ `PANIC_INVALIDMEMOP ] = "INVALIDMEMOP "; message[ `PANIC_INVALIDFBSTATE ] = "INVALIDFBSTATE "; message[ `PANIC_INVALIDIQSTATE ] = "INVALIDIQSTATE "; message[ `PANIC_BRANCHBACK ] = "BRANCHBACK "; message[ `PANIC_MEMORYRACE ] = "MEMORYRACE "; message[ `PANIC_ALU0ONLY ] = "ALU0 Only "; for (n = 0; n < 64; n = n + 1) codebuf[n] <= 48'h0; end // --------------------------------------------------------------------------- // FETCH // --------------------------------------------------------------------------- // assign fetchbuf0_mem = IsMem(fetchbuf0_instr) & ~IsRet(fetchbuf0_instr);// & IsLoad(fetchbuf0_instr); assign fetchbuf0_rfw = IsRFW(fetchbuf0_instr,vqe0,vl,fetchbuf0_thrd); generate begin: gFetchbufDec if (`WAYS > 1) begin assign fetchbuf1_mem = IsMem(fetchbuf1_instr) & ~IsRet(fetchbuf1_instr);// & IsLoad(fetchbuf1_instr); assign fetchbuf1_rfw = IsRFW(fetchbuf1_instr,vqe1,vl,fetchbuf1_thrd); end if (`WAYS > 2) begin assign fetchbuf2_mem = IsMem(fetchbuf2_instr) & ~IsRet(fetchbuf2_instr);// & IsLoad(fetchbuf2_instr); assign fetchbuf2_rfw = IsRFW(fetchbuf2_instr,vqe2,vl,fetchbuf2_thrd); end end endgenerate generate begin : gFetchbufInst if (`WAYS > 2) begin : gb1 FT64_fetchbuf_x3 #(AMSB,RSTPC) ufb1 ( .rst(rst), .clk4x(clk4x), .clk(clk), .fcu_clk(fcu_clk), .cs_i(vadr[31:16]==16'hFFFF), .cyc_i(cyc), .stb_i(stb_o), .ack_o(dc_ack), .we_i(we), .adr_i(vadr[15:0]), .dat_i(dat_o[47:0]), .cmpgrp(cr0[10:8]), .freezePC(freezePC), .regLR(regLR), .thread_en(thread_en), .insn0(insn0), .insn1(insn1), .insn1(insn2), .phit(phit), .threadx(threadx), .branchmiss(branchmiss), .misspc(misspc), .branchmiss_thrd(branchmiss_thrd), .predict_takenA(predict_takenA), .predict_takenB(predict_takenB), .predict_takenC(predict_takenC), .predict_takenD(predict_takenD), .predict_takenE(predict_takenE), .predict_takenF(predict_takenF), .predict_taken0(predict_taken0), .predict_taken1(predict_taken1), .predict_taken2(predict_taken2), .queued1(queued1), .queued2(queued2), .queued2(queued3), .queuedNop(queuedNop), .pc0(pc0a), .pc1(pc1a), .fetchbuf(fetchbuf), .fetchbufA_v(fetchbufA_v), .fetchbufB_v(fetchbufB_v), .fetchbufC_v(fetchbufC_v), .fetchbufD_v(fetchbufD_v), .fetchbufD_v(fetchbufE_v), .fetchbufD_v(fetchbufF_v), .fetchbufA_pc(fetchbufA_pc), .fetchbufB_pc(fetchbufB_pc), .fetchbufC_pc(fetchbufC_pc), .fetchbufD_pc(fetchbufD_pc), .fetchbufD_pc(fetchbufE_pc), .fetchbufD_pc(fetchbufF_pc), .fetchbufA_instr(fetchbufA_instr), .fetchbufB_instr(fetchbufB_instr), .fetchbufC_instr(fetchbufC_instr), .fetchbufD_instr(fetchbufD_instr), .fetchbufE_instr(fetchbufE_instr), .fetchbufF_instr(fetchbufF_instr), .fetchbuf0_instr(fetchbuf0_instr), .fetchbuf1_instr(fetchbuf1_instr), .fetchbuf0_thrd(fetchbuf0_thrd), .fetchbuf1_thrd(fetchbuf1_thrd), .fetchbuf2_thrd(fetchbuf2_thrd), .fetchbuf0_pc(fetchbuf0_pc), .fetchbuf1_pc(fetchbuf1_pc), .fetchbuf2_pc(fetchbuf2_pc), .fetchbuf0_v(fetchbuf0_v), .fetchbuf1_v(fetchbuf1_v), .fetchbuf2_v(fetchbuf2_v), .fetchbuf0_insln(fetchbuf0_insln), .fetchbuf1_insln(fetchbuf1_insln), .fetchbuf2_insln(fetchbuf2_insln), .codebuf0(codebuf[insn0[13:8]]), .codebuf1(codebuf[insn1[13:8]]), .codebuf2(codebuf[insn2[13:8]]), .btgtA(btgtA), .btgtB(btgtB), .btgtC(btgtC), .btgtD(btgtD), .btgtE(btgtE), .btgtF(btgtF), .nop_fetchbuf(nop_fetchbuf), .take_branch0(take_branch0), .take_branch1(take_branch1), .take_branch2(take_branch2), .stompedRets(stompedOnRets), .pred_on(pred_on), .panic(fb_panic) ); end else if (`WAYS > 1) begin : gb1 FT64_fetchbuf #(AMSB,RSTPC) ufb1 ( .rst(rst), .clk4x(clk4x), .clk(clk), .fcu_clk(fcu_clk), .cs_i(vadr[31:16]==16'hFFFF), .cyc_i(cyc), .stb_i(stb_o), .ack_o(dc_ack), .we_i(we), .adr_i(vadr[15:0]), .dat_i(dat_o[47:0]), .cmpgrp(cr0[10:8]), .freezePC(freezePC), .regLR(regLR), .thread_en(thread_en), .insn0(insn0), .insn1(insn1), .phit(phit), .threadx(threadx), .branchmiss(branchmiss), .misspc(misspc), .branchmiss_thrd(branchmiss_thrd), .predict_takenA(predict_takenA), .predict_takenB(predict_takenB), .predict_takenC(predict_takenC), .predict_takenD(predict_takenD), .predict_taken0(predict_taken0), .predict_taken1(predict_taken1), .queued1(queued1), .queued2(queued2), .queuedNop(queuedNop), .pc0(pc0a), .pc1(pc1a), .fetchbuf(fetchbuf), .fetchbufA_v(fetchbufA_v), .fetchbufB_v(fetchbufB_v), .fetchbufC_v(fetchbufC_v), .fetchbufD_v(fetchbufD_v), .fetchbufA_pc(fetchbufA_pc), .fetchbufB_pc(fetchbufB_pc), .fetchbufC_pc(fetchbufC_pc), .fetchbufD_pc(fetchbufD_pc), .fetchbufA_instr(fetchbufA_instr), .fetchbufB_instr(fetchbufB_instr), .fetchbufC_instr(fetchbufC_instr), .fetchbufD_instr(fetchbufD_instr), .fetchbuf0_instr(fetchbuf0_instr), .fetchbuf1_instr(fetchbuf1_instr), .fetchbuf0_thrd(fetchbuf0_thrd), .fetchbuf1_thrd(fetchbuf1_thrd), .fetchbuf0_pc(fetchbuf0_pc), .fetchbuf1_pc(fetchbuf1_pc), .fetchbuf0_v(fetchbuf0_v), .fetchbuf1_v(fetchbuf1_v), .fetchbuf0_insln(fetchbuf0_insln), .fetchbuf1_insln(fetchbuf1_insln), .codebuf0(codebuf[insn0[13:8]]), .codebuf1(codebuf[insn1[13:8]]), .btgtA(btgtA), .btgtB(btgtB), .btgtC(btgtC), .btgtD(btgtD), .nop_fetchbuf(nop_fetchbuf), .take_branch0(take_branch0), .take_branch1(take_branch1), .stompedRets(stompedOnRets), .pred_on(pred_on), .panic(fb_panic) ); end else begin : gb1 FT64_fetchbuf_x1 #(AMSB,RSTPC) ufb1 ( .rst(rst), .clk4x(clk4x), .clk(clk), .fcu_clk(fcu_clk), .cs_i(vadr[31:16]==16'hFFFF), .cyc_i(cyc), .stb_i(stb_o), .ack_o(dc_ack), .we_i(we), .adr_i(vadr[15:0]), .dat_i(dat_o[47:0]), .cmpgrp(cr0[10:8]), .freezePC(freezePC), .regLR(regLR), .thread_en(thread_en), .insn0(insn0), .phit(phit), .threadx(threadx), .branchmiss(branchmiss), .misspc(misspc), .branchmiss_thrd(branchmiss_thrd), .predict_takenA(predict_takenA), .predict_takenB(predict_takenB), .predict_taken0(predict_taken0), .queued1(queued1), .queuedNop(queuedNop), .pc0(pc0a), .fetchbuf(fetchbuf), .fetchbufA_v(fetchbufA_v), .fetchbufB_v(fetchbufB_v), .fetchbufA_pc(fetchbufA_pc), .fetchbufB_pc(fetchbufB_pc), .fetchbufA_instr(fetchbufA_instr), .fetchbufB_instr(fetchbufB_instr), .fetchbuf0_instr(fetchbuf0_instr), .fetchbuf0_thrd(fetchbuf0_thrd), .fetchbuf0_pc(fetchbuf0_pc), .fetchbuf0_v(fetchbuf0_v), .fetchbuf0_insln(fetchbuf0_insln), .fetchbuf0_pbyte(fetchbuf0_pbyte), .codebuf0(codebuf[insn0[13:8]]), .btgtA(btgtA), .btgtB(btgtB), .nop_fetchbuf(nop_fetchbuf), .take_branch0(take_branch0), .stompedRets(stompedOnRets), .pred_on(pred_on), .panic(fb_panic) ); assign fetchbuf1_v = `INV; end end endgenerate // Stores might exception so we don't want the heads to advance if a subsequent // instruction is store even though there's no target register. wire cmt_head1 = (!iqentry_rfw[heads[1]] && !iqentry_oddball[heads[1]] && ~|iqentry_exc[heads[1]]); wire cmt_head2 = (!iqentry_rfw[heads[2]] && !iqentry_oddball[heads[2]] && ~|iqentry_exc[heads[2]]); // Determine the head increment amount, this must match code later on. reg [2:0] hi_amt; always @* begin hi_amt <= 4'd0; casez ({ iqentry_v[heads[0]], iqentry_state[heads[0]]==IQS_CMT, iqentry_v[heads[1]], iqentry_state[heads[1]]==IQS_CMT, iqentry_v[heads[2]], iqentry_state[heads[2]]==IQS_CMT}) // retire 3 6'b0?_0?_0?: if (heads[0] != tail0 && heads[1] != tail0 && heads[2] != tail0) hi_amt <= 3'd3; else if (heads[0] != tail0 && heads[1] != tail0) hi_amt <= 3'd2; else if (heads[0] != tail0) hi_amt <= 3'd1; 6'b0?_0?_10: if (heads[0] != tail0 && heads[1] != tail0) hi_amt <= 3'd2; else if (heads[0] != tail0) hi_amt <= 3'd1; else hi_amt <= 3'd0; 6'b0?_0?_11: if (`NUM_CMT > 2 || cmt_head2) hi_amt <= 3'd3; else hi_amt <= 3'd2; // retire 1 (wait for regfile for heads[1]) 6'b0?_10_??: hi_amt <= 3'd1; // retire 2 6'b0?_11_0?, 6'b0?_11_10: if (`NUM_CMT > 1 || cmt_head1) hi_amt <= 3'd2; else hi_amt <= 3'd1; 6'b0?_11_11: if (`NUM_CMT > 2 || (`NUM_CMT > 1 && cmt_head2)) hi_amt <= 3'd3; else if (`NUM_CMT > 1 || cmt_head1) hi_amt <= 3'd2; else hi_amt <= 3'd1; 6'b10_??_??: ; 6'b11_0?_0?: if (heads[1] != tail0 && heads[2] != tail0) hi_amt <= 3'd3; else if (heads[1] != tail0) hi_amt <= 3'd2; else hi_amt <= 3'd1; 6'b11_0?_10: if (heads[1] != tail0) hi_amt <= 3'd2; else hi_amt <= 3'd1; 6'b11_0?_11: if (heads[1] != tail0) begin if (`NUM_CMT > 2 || cmt_head2) hi_amt <= 3'd3; else hi_amt <= 3'd2; end else hi_amt <= 3'd1; 6'b11_10_??: hi_amt <= 3'd1; 6'b11_11_0?: if (`NUM_CMT > 1 && heads[2] != tail0) hi_amt <= 3'd3; else if (cmt_head1 && heads[2] != tail0) hi_amt <= 3'd3; else if (`NUM_CMT > 1 || cmt_head1) hi_amt <= 3'd2; else hi_amt <= 3'd1; 6'b11_11_10: if (`NUM_CMT > 1 || cmt_head1) hi_amt <= 3'd2; else hi_amt <= 3'd1; 6'b11_11_11: if (`NUM_CMT > 2 || (`NUM_CMT > 1 && cmt_head2)) hi_amt <= 3'd3; else if (`NUM_CMT > 1 || cmt_head1) hi_amt <= 3'd2; else hi_amt <= 3'd1; default: begin hi_amt <= 3'd0; $display("hi_amt: Uncoded case %h",{ iqentry_v[heads[0]], iqentry_state[heads[0]], iqentry_v[heads[1]], iqentry_state[heads[1]], iqentry_v[heads[2]], iqentry_state[heads[2]]}); end endcase end // Amount subtracted from sequence numbers reg [`SNBITS] tosub; always @* case(hi_amt) 3'd3: tosub <= (iqentry_v[heads[2]] ? iqentry_sn[heads[2]] : iqentry_v[heads[1]] ? iqentry_sn[heads[1]] : iqentry_v[heads[0]] ? iqentry_sn[heads[0]] : 4'b0); 3'd2: tosub <= (iqentry_v[heads[1]] ? iqentry_sn[heads[1]] : iqentry_v[heads[0]] ? iqentry_sn[heads[0]] : 4'b0); 3'd1: tosub <= (iqentry_v[heads[0]] ? iqentry_sn[heads[0]] : 4'b0); default: tosub <= 4'd0; endcase //initial begin: stop_at //#1000000; panic <= `PANIC_OVERRUN; //end // // BRANCH-MISS LOGIC: livetarget // // livetarget implies that there is a not-to-be-stomped instruction that targets the register in question // therefore, if it is zero it implies the rf_v value should become VALID on a branchmiss // always @* for (j = 1; j < PREGS; j = j + 1) begin livetarget[j] = 1'b0; for (n = 0; n < QENTRIES; n = n + 1) livetarget[j] = livetarget[j] | iqentry_livetarget[n][j]; end always @* for (n = 0; n < QENTRIES; n = n + 1) iqentry_livetarget[n] = {PREGS {iqentry_v[n]}} & {PREGS {~iqentry_stomp[n] && iqentry_thrd[n]==branchmiss_thrd}} & iq_out[n]; // // BRANCH-MISS LOGIC: latestID // // latestID is the instruction queue ID of the newest instruction (latest) that targets // a particular register. looks a lot like scheduling logic, but in reverse. // always @* for (n = 0; n < QENTRIES; n = n + 1) begin iqentry_cumulative[n] = 1'b0; for (j = n; j < n + QENTRIES; j = j + 1) begin if (missid==(j % QENTRIES)) for (k = n; k <= j; k = k + 1) iqentry_cumulative[n] = iqentry_cumulative[n] | iqentry_livetarget[k % QENTRIES]; end end always @* for (n = 0; n < QENTRIES; n = n + 1) iqentry_latestID[n] = (missid == n || ((iqentry_livetarget[n] & iqentry_cumulative[(n+1)%QENTRIES]) == {PREGS{1'b0}})) ? iqentry_livetarget[n] : {PREGS{1'b0}}; always @* for (n = 0; n < QENTRIES; n = n + 1) iqentry_source[n] = | iqentry_latestID[n]; reg vqueued2; assign Ra0 = fnRa(fetchbuf0_instr,vqe0,vl,fetchbuf0_thrd) | {fetchbuf0_thrd,7'b0}; assign Rb0 = fnRb(fetchbuf0_instr,1'b0,vqe0,rfoa0[5:0],rfoa1[5:0],fetchbuf0_thrd) | {fetchbuf0_thrd,7'b0}; assign Rc0 = fnRc(fetchbuf0_instr,vqe0,fetchbuf0_thrd) | {fetchbuf0_thrd,7'b0}; assign Rt0 = fnRt(fetchbuf0_instr,vqet0,vl,fetchbuf0_thrd) | {fetchbuf0_thrd,7'b0}; assign Ra1 = fnRa(fetchbuf1_instr,vqueued2 ? vqe0 + 1 : vqe1,vl,fetchbuf1_thrd) | {fetchbuf1_thrd,7'b0}; assign Rb1 = fnRb(fetchbuf1_instr,1'b1,vqueued2 ? vqe0 + 1 : vqe1,rfoa0[5:0],rfoa1[5:0],fetchbuf1_thrd) | {fetchbuf1_thrd,7'b0}; assign Rc1 = fnRc(fetchbuf1_instr,vqueued2 ? vqe0 + 1 : vqe1,fetchbuf1_thrd) | {fetchbuf1_thrd,7'b0}; assign Rt1 = fnRt(fetchbuf1_instr,vqueued2 ? vqet0 + 1 : vqet1,vl,fetchbuf1_thrd) | {fetchbuf1_thrd,7'b0}; // // additional logic for ISSUE // // for the moment, we look at ALU-input buffers to allow back-to-back issue of // dependent instructions ... we do not, however, look ahead for DRAM requests // that will become valid in the next cycle. instead, these have to propagate // their results into the IQ entry directly, at which point it becomes issue-able // // note that, for all intents & purposes, iqentry_done == iqentry_agen ... no need to duplicate wire [QENTRIES-1:0] args_valid; wire [QENTRIES-1:0] could_issue; wire [QENTRIES-1:0] could_issueid; // Note that bypassing is provided only from the first fpu. generate begin : issue_logic for (g = 0; g < QENTRIES; g = g + 1) begin assign args_valid[g] = (iqentry_a1_v[g] `ifdef FU_BYPASS || (iqentry_a1_s[g] == alu0_sourceid && alu0_dataready && (~alu0_mem | alu0_push)) || ((iqentry_a1_s[g] == alu1_sourceid && alu1_dataready && (~alu1_mem | alu1_push)) && (`NUM_ALU > 1)) || ((iqentry_a1_s[g] == fpu1_sourceid && fpu1_dataready) && (`NUM_FPU > 0)) `endif ) && (iqentry_a2_v[g] || iqentry_mem[g] // a2 does not need to be valid immediately for a mem op (agen), it is checked by iqentry_memready logic `ifdef FU_BYPASS || (iqentry_a2_s[g] == alu0_sourceid && alu0_dataready && (~alu0_mem | alu0_push)) || ((iqentry_a2_s[g] == alu1_sourceid && alu1_dataready && (~alu1_mem | alu1_push)) && (`NUM_ALU > 1)) || ((iqentry_a2_s[g] == fpu1_sourceid && fpu1_dataready) && (`NUM_FPU > 0)) `endif ) && (iqentry_a3_v[g] || (iqentry_mem[g] & ~iqentry_agen[g] & ~iqentry_memndx[g]) // a3 needs to be valid for indexed instruction // || (iqentry_mem[g] & ~iqentry_agen[g]) `ifdef FU_BYPASS || (iqentry_a3_s[g] == alu0_sourceid && alu0_dataready && (~alu0_mem | alu0_push)) || ((iqentry_a3_s[g] == alu1_sourceid && alu1_dataready && (~alu1_mem | alu1_push)) && (`NUM_ALU > 1)) `endif ) ; assign could_issue[g] = iqentry_v[g] && iqentry_state[g]==IQS_QUEUED && args_valid[g] && iqentry_iv[g]; //&& (iqentry_mem[g] ? !iqentry_agen[g] : 1'b1); assign could_issueid[g] = (iqentry_v[g])// || (g==tail0 && canq1))// || (g==tail1 && canq2)) && !iqentry_iv[g]; // && (iqentry_a1_v[g] // || (iqentry_a1_s[g] == alu0_sourceid && alu0_dataready) // || (iqentry_a1_s[g] == alu1_sourceid && alu1_dataready)); end end endgenerate // The (old) simulator didn't handle the asynchronous race loop properly in the // original code. It would issue two instructions to the same islot. So the // issue logic has been re-written to eliminate the asynchronous loop. // Can't issue to the ALU if it's busy doing a long running operation like a // divide. // ToDo: fix the memory synchronization, see fp_issue below `ifndef INLINE_DECODE always @* begin iqentry_id1issue = {QENTRIES{1'b0}}; if (id1_available) begin for (n = 0; n < QENTRIES; n = n + 1) if (could_issueid[heads[n]] && iqentry_id1issue=={QENTRIES{1'b0}}) iqentry_id1issue[heads[n]] = `TRUE; end end generate begin : gIDUIssue if (`NUM_IDU > 1) begin always @* begin iqentry_id2issue = {QENTRIES{1'b0}}; if (id2_available) begin for (n = 0; n < QENTRIES; n = n + 1) if (could_issueid[heads[n]] && !iqentry_id1issue[heads[n]] && iqentry_id2issue=={QENTRIES{1'b0}}) iqentry_id2issue[heads[n]] = `TRUE; end end end if (`NUM_IDU > 2) begin always @* begin iqentry_id3issue = {QENTRIES{1'b0}}; if (id3_available) begin for (n = 0; n < QENTRIES; n = n + 1) if (could_issueid[heads[n]] && !iqentry_id1issue[heads[n]] && !iqentry_id2issue[heads[n]] && iqentry_id3issue=={QENTRIES{1'b0}}) iqentry_id3issue[heads[n]] = `TRUE; end end end end endgenerate `endif // not INLINE_DECODE // Detect if there are any valid queue entries prior to the given queue entry. reg [QENTRIES-1:0] prior_valid; //generate begin : gPriorValid always @* for (j = 0; j < QENTRIES; j = j + 1) begin prior_valid[heads[j]] = 1'b0; if (j > 0) for (n = j-1; n >= 0; n = n - 1) prior_valid[heads[j]] = prior_valid[heads[j]]|iqentry_v[heads[n]]; end //end //endgenerate // Detect if there are any valid sync instructions prior to the given queue // entry. reg [QENTRIES-1:0] prior_sync; //generate begin : gPriorSync always @* for (j = 0; j < QENTRIES; j = j + 1) begin prior_sync[heads[j]] = 1'b0; if (j > 0) for (n = j-1; n >= 0; n = n - 1) prior_sync[heads[j]] = prior_sync[heads[j]]|(iqentry_v[heads[n]] & iqentry_sync[heads[n]]); end //end //endgenerate // Detect if there are any valid fsync instructions prior to the given queue // entry. reg [QENTRIES-1:0] prior_fsync; //generate begin : gPriorFsync always @* for (j = 0; j < QENTRIES; j = j + 1) begin prior_fsync[heads[j]] = 1'b0; if (j > 0) for (n = j-1; n >= 0; n = n - 1) prior_fsync[heads[j]] = prior_fsync[heads[j]]|(iqentry_v[heads[n]] & iqentry_fsync[heads[n]]); end //end //endgenerate // Start search for instructions to process at head of queue (oldest instruction). always @* begin iqentry_alu0_issue = {QENTRIES{1'b0}}; iqentry_alu1_issue = {QENTRIES{1'b0}}; if (alu0_available & alu0_idle) begin for (n = 0; n < QENTRIES; n = n + 1) begin if (could_issue[heads[n]] && iqentry_alu[heads[n]] && iqentry_alu0_issue == {QENTRIES{1'b0}} // If there are no valid queue entries prior it doesn't matter if there is // a sync. && (!prior_sync[heads[n]] || !prior_valid[heads[n]]) ) iqentry_alu0_issue[heads[n]] = `TRUE; end end if (alu1_available && alu1_idle && `NUM_ALU > 1) begin // if ((could_issue & ~iqentry_alu0_issue & ~iqentry_alu0) != {QENTRIES{1'b0}}) begin for (n = 0; n < QENTRIES; n = n + 1) begin if (could_issue[heads[n]] && iqentry_alu[heads[n]] && !iqentry_alu0[heads[n]] // alu0 only && !iqentry_alu0_issue[heads[n]] && iqentry_alu1_issue == {QENTRIES{1'b0}} && (!prior_sync[heads[n]] || !prior_valid[heads[n]]) ) iqentry_alu1_issue[heads[n]] = `TRUE; end // end end end // Start search for instructions to process at head of queue (oldest instruction). always @* begin iqentry_fpu1_issue = {QENTRIES{1'b0}}; iqentry_fpu2_issue = {QENTRIES{1'b0}}; if (fpu1_available && fpu1_idle && `NUM_FPU > 0) begin for (n = 0; n < QENTRIES; n = n + 1) begin if (could_issue[heads[n]] && iqentry_fpu[heads[n]] && iqentry_fpu1_issue == {QENTRIES{1'b0}} // If there are no valid queue entries prior it doesn't matter if there is // a sync. && (!(prior_sync[heads[n]]|prior_fsync[heads[n]]) || !prior_valid[heads[n]]) ) iqentry_fpu1_issue[heads[n]] = `TRUE; end end if (fpu2_available && fpu2_idle && `NUM_FPU > 1) begin for (n = 0; n < QENTRIES; n = n + 1) begin if (could_issue[heads[n]] && iqentry_fpu[heads[n]] && !iqentry_fpu1_issue[heads[n]] && iqentry_fpu2_issue == {QENTRIES{1'b0}} && (!(prior_sync[heads[n]]|prior_fsync[heads[n]]) || !prior_valid[heads[n]]) ) iqentry_fpu2_issue[heads[n]] = `TRUE; end end end reg [QENTRIES-1:0] nextqd; // Next queue id /* reg [`QBITS] nids [0:QENTRIES-1]; always @* for (n = 0; n < QENTRIES; n = n + 1) begin nids[n] = n[`QBITS]; for (j = n; j != (n+1) % QENTRIES; j = (j - 1) % QENTRIES) if (iqentry_thrd[(j+1)%QENTRIES]==iqentry_thrd[n]) nids[n] = (j + 1) % QENTRIES; // Add one more compare and set end */ reg [`QBITS] nids [0:QENTRIES-1]; always @* for (j = 0; j < QENTRIES; j = j + 1) begin // We can't both start and stop at j for (n = j; n != (j+1)%QENTRIES; n = (n + (QENTRIES-1)) % QENTRIES) if (iqentry_thrd[n]==iqentry_thrd[j]) nids[j] = n; // Do the last one if (iqentry_thrd[(j+1)%QENTRIES]==iqentry_thrd[j]) nids[j] = (j+1)%QENTRIES; end /* assign nids[0] = nid0; assign nids[1] = nid1; assign nids[2] = nid2; assign nids[3] = nid3; assign nids[4] = nid4; assign nids[5] = nid5; assign nids[6] = nid6; assign nids[7] = nid7; assign nids[8] = nid8; assign nids[9] = nid9; */ // Search the queue for the next entry on the same thread. reg [`QBITS] nid; always @* begin nid = fcu_id; for (n = QENTRIES-1; n > 0; n = n - 1) if (iqentry_thrd[(fcu_id + n) % QENTRIES]==fcu_thrd) nid = (fcu_id + n) % QENTRIES; end /* always @* if (iqentry_thrd[idp1(fcu_id)]==iqentry_thrd[fcu_id[`QBITS]]) nid = idp1(fcu_id); else if (iqentry_thrd[idp2(fcu_id)]==iqentry_thrd[fcu_id[`QBITS]]) nid = idp2(fcu_id); else if (iqentry_thrd[idp3(fcu_id)]==iqentry_thrd[fcu_id[`QBITS]]) nid = idp3(fcu_id); else if (iqentry_thrd[idp4(fcu_id)]==iqentry_thrd[fcu_id[`QBITS]]) nid = idp4(fcu_id); else if (iqentry_thrd[idp5(fcu_id)]==iqentry_thrd[fcu_id[`QBITS]]) nid = idp5(fcu_id); else if (iqentry_thrd[idp6(fcu_id)]==iqentry_thrd[fcu_id[`QBITS]]) nid = idp6(fcu_id); else if (iqentry_thrd[idp7(fcu_id)]==iqentry_thrd[fcu_id[`QBITS]]) nid = idp7(fcu_id); else if (iqentry_thrd[idp8(fcu_id)]==iqentry_thrd[fcu_id[`QBITS]]) nid = idp8(fcu_id); else if (iqentry_thrd[idp9(fcu_id)]==iqentry_thrd[fcu_id[`QBITS]]) nid = idp9(fcu_id); else nid = fcu_id; */ always @* for (n = 0; n < QENTRIES; n = n + 1) nextqd[n] <= iqentry_sn[nids[n]] > iqentry_sn[n] || iqentry_v[n]; //assign nextqd = 8'hFF; // Don't issue to the fcu until the following instruction is enqueued. // However, if the queue is full then issue anyway. A branch miss will likely occur. // Start search for instructions at head of queue (oldest instruction). always @* begin iqentry_fcu_issue = {QENTRIES{1'b0}}; if (fcu_done & ~branchmiss) begin for (n = 0; n < QENTRIES; n = n + 1) begin if (could_issue[heads[n]] && iqentry_fc[heads[n]] && (nextqd[heads[n]] || iqentry_br[heads[n]]) && iqentry_fcu_issue == {QENTRIES{1'b0}} && (!prior_sync[heads[n]] || !prior_valid[heads[n]]) ) iqentry_fcu_issue[heads[n]] = `TRUE; end end end // Test if a given address is in the write buffer. This is done only for the // first two queue slots to save logic on comparators. reg inwb0; always @* begin inwb0 = FALSE; `ifdef HAS_WB for (n = 0; n < `WB_DEPTH; n = n + 1) if (iqentry_ma[heads[0]][AMSB:3]==wb_addr[n][AMSB:3] && wb_v[n]) inwb0 = TRUE; `endif end reg inwb1; always @* begin inwb1 = FALSE; `ifdef HAS_WB for (n = 0; n < `WB_DEPTH; n = n + 1) if (iqentry_ma[heads[1]][AMSB:3]==wb_addr[n][AMSB:3] && wb_v[n]) inwb1 = TRUE; `endif end always @* begin for (n = 0; n < QENTRIES; n = n + 1) begin iqentry_v[n] = iqentry_state[n] != IQS_INVALID; iqentry_done[n] = iqentry_state[n]==IQS_DONE || iqentry_state[n]==IQS_CMT; iqentry_out[n] = iqentry_state[n]==IQS_OUT; iqentry_agen[n] = iqentry_state[n]==IQS_AGEN; end end // // determine if the instructions ready to issue can, in fact, issue. // "ready" means that the instruction has valid operands but has not gone yet reg [1:0] issue_count, missue_count; generate begin : gMemIssue always @* begin issue_count = 0; memissue[ heads[0] ] = iqentry_memready[ heads[0] ] && !(iqentry_load[heads[0]] && inwb0); // first in line ... go as soon as ready if (memissue[heads[0]]) issue_count = issue_count + 1; memissue[ heads[1] ] = ~iqentry_stomp[heads[1]] && iqentry_memready[ heads[1] ] // addr and data are valid && issue_count < `NUM_MEM // ... and no preceding instruction is ready to go //&& ~iqentry_memready[heads[0]] // ... and there is no address-overlap with any preceding instruction && (!iqentry_mem[heads[0]] || (iqentry_agen[heads[0]] & iqentry_out[heads[0]]) || iqentry_done[heads[0]] || ((iqentry_ma[heads[1]][AMSB:3] != iqentry_ma[heads[0]][AMSB:3] || iqentry_out[heads[0]] || iqentry_done[heads[0]]))) // ... if a release, any prior memory ops must be done before this one && (iqentry_rl[heads[1]] ? iqentry_done[heads[0]] || !iqentry_v[heads[0]] || !iqentry_mem[heads[0]] : 1'b1) // ... if a preivous op has the aquire bit set && !(iqentry_aq[heads[0]] && iqentry_v[heads[0]]) // ... and there's nothing in the write buffer during a load && !(iqentry_load[heads[1]] && (inwb1 || iqentry_store[heads[0]])) // ... and, if it is a store, there is no chance of it being undone && ((iqentry_load[heads[1]] && sple) || !(iqentry_fc[heads[0]]||iqentry_canex[heads[0]])); if (memissue[heads[1]]) issue_count = issue_count + 1; memissue[ heads[2] ] = ~iqentry_stomp[heads[2]] && iqentry_memready[ heads[2] ] // addr and data are valid // ... and no preceding instruction is ready to go && issue_count < `NUM_MEM //&& ~iqentry_memready[heads[0]] //&& ~iqentry_memready[heads[1]] // ... and there is no address-overlap with any preceding instruction && (!iqentry_mem[heads[0]] || (iqentry_agen[heads[0]] & iqentry_out[heads[0]]) || iqentry_done[heads[0]] || ((iqentry_ma[heads[2]][AMSB:3] != iqentry_ma[heads[0]][AMSB:3] || iqentry_out[heads[0]] || iqentry_done[heads[0]]))) && (!iqentry_mem[heads[1]] || (iqentry_agen[heads[1]] & iqentry_out[heads[1]]) || iqentry_done[heads[1]] || ((iqentry_ma[heads[2]][AMSB:3] != iqentry_ma[heads[1]][AMSB:3] || iqentry_out[heads[1]] || iqentry_done[heads[1]]))) // ... if a release, any prior memory ops must be done before this one && (iqentry_rl[heads[2]] ? (iqentry_done[heads[0]] || !iqentry_v[heads[0]] || !iqentry_mem[heads[0]]) && (iqentry_done[heads[1]] || !iqentry_v[heads[1]] || !iqentry_mem[heads[1]]) : 1'b1) // ... if a preivous op has the aquire bit set && !(iqentry_aq[heads[0]] && iqentry_v[heads[0]]) && !(iqentry_aq[heads[1]] && iqentry_v[heads[1]]) // ... and there's nothing in the write buffer during a load && !(iqentry_load[heads[2]] && (wb_v!=1'b0 || iqentry_store[heads[0]] || iqentry_store[heads[1]])) // ... and there isn't a barrier, or everything before the barrier is done or invalid && (!(iqentry_iv[heads[1]] && iqentry_memsb[heads[1]]) || (iqentry_done[heads[0]] || !iqentry_v[heads[0]])) && (!(iqentry_iv[heads[1]] && iqentry_memdb[heads[1]]) || (!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]])) // ... and, if it is a SW, there is no chance of it being undone && ((iqentry_load[heads[2]] && sple) || !(iqentry_fc[heads[0]]||iqentry_canex[heads[0]]) && !(iqentry_fc[heads[1]]||iqentry_canex[heads[1]])); if (memissue[heads[2]]) issue_count = issue_count + 1; memissue[ heads[3] ] = ~iqentry_stomp[heads[3]] && iqentry_memready[ heads[3] ] // addr and data are valid // ... and no preceding instruction is ready to go && issue_count < `NUM_MEM //&& ~iqentry_memready[heads[0]] //&& ~iqentry_memready[heads[1]] //&& ~iqentry_memready[heads[2]] // ... and there is no address-overlap with any preceding instruction && (!iqentry_mem[heads[0]] || (iqentry_agen[heads[0]] & iqentry_out[heads[0]]) || iqentry_done[heads[0]] || ((iqentry_ma[heads[3]][AMSB:3] != iqentry_ma[heads[0]][AMSB:3] || iqentry_out[heads[0]] || iqentry_done[heads[0]]))) && (!iqentry_mem[heads[1]] || (iqentry_agen[heads[1]] & iqentry_out[heads[1]]) || iqentry_done[heads[1]] || ((iqentry_ma[heads[3]][AMSB:3] != iqentry_ma[heads[1]][AMSB:3] || iqentry_out[heads[1]] || iqentry_done[heads[1]]))) && (!iqentry_mem[heads[2]] || (iqentry_agen[heads[2]] & iqentry_out[heads[2]]) || iqentry_done[heads[2]] || ((iqentry_ma[heads[3]][AMSB:3] != iqentry_ma[heads[2]][AMSB:3] || iqentry_out[heads[2]] || iqentry_done[heads[2]]))) // ... if a release, any prior memory ops must be done before this one && (iqentry_rl[heads[3]] ? (iqentry_done[heads[0]] || !iqentry_v[heads[0]] || !iqentry_mem[heads[0]]) && (iqentry_done[heads[1]] || !iqentry_v[heads[1]] || !iqentry_mem[heads[1]]) && (iqentry_done[heads[2]] || !iqentry_v[heads[2]] || !iqentry_mem[heads[2]]) : 1'b1) // ... if a preivous op has the aquire bit set && !(iqentry_aq[heads[0]] && iqentry_v[heads[0]]) && !(iqentry_aq[heads[1]] && iqentry_v[heads[1]]) && !(iqentry_aq[heads[2]] && iqentry_v[heads[2]]) // ... and there's nothing in the write buffer during a load && !(iqentry_load[heads[3]] && (wb_v!=1'b0 || iqentry_store[heads[0]] || iqentry_store[heads[1]] || iqentry_store[heads[2]])) // ... and there isn't a barrier, or everything before the barrier is done or invalid && (!(iqentry_iv[heads[1]] && iqentry_memsb[heads[1]]) || (iqentry_done[heads[0]] || !iqentry_v[heads[0]])) && (!(iqentry_iv[heads[2]] && iqentry_memsb[heads[2]]) || ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (iqentry_done[heads[1]] || !iqentry_v[heads[1]])) ) && (!(iqentry_iv[heads[1]] && iqentry_memdb[heads[1]]) || (!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]])) && (!(iqentry_iv[heads[2]] && iqentry_memdb[heads[2]]) || ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]])) ) // ... and, if it is a SW, there is no chance of it being undone && ((iqentry_load[heads[3]] && sple) || !(iqentry_fc[heads[0]]||iqentry_canex[heads[0]]) && !(iqentry_fc[heads[1]]||iqentry_canex[heads[1]]) && !(iqentry_fc[heads[2]]||iqentry_canex[heads[2]])); if (memissue[heads[3]]) issue_count = issue_count + 1; if (QENTRIES > 4) begin memissue[ heads[4] ] = ~iqentry_stomp[heads[4]] && iqentry_memready[ heads[4] ] // addr and data are valid // ... and no preceding instruction is ready to go && issue_count < `NUM_MEM //&& ~iqentry_memready[heads[0]] //&& ~iqentry_memready[heads[1]] //&& ~iqentry_memready[heads[2]] //&& ~iqentry_memready[heads[3]] // ... and there is no address-overlap with any preceding instruction && (!iqentry_mem[heads[0]] || (iqentry_agen[heads[0]] & iqentry_out[heads[0]]) || iqentry_done[heads[0]] || ((iqentry_ma[heads[4]][AMSB:3] != iqentry_ma[heads[0]][AMSB:3] || iqentry_out[heads[0]] || iqentry_done[heads[0]]))) && (!iqentry_mem[heads[1]] || (iqentry_agen[heads[1]] & iqentry_out[heads[1]]) || iqentry_done[heads[1]] || ((iqentry_ma[heads[4]][AMSB:3] != iqentry_ma[heads[1]][AMSB:3] || iqentry_out[heads[1]] || iqentry_done[heads[1]]))) && (!iqentry_mem[heads[2]] || (iqentry_agen[heads[2]] & iqentry_out[heads[2]]) || iqentry_done[heads[2]] || ((iqentry_ma[heads[4]][AMSB:3] != iqentry_ma[heads[2]][AMSB:3] || iqentry_out[heads[2]] || iqentry_done[heads[2]]))) && (!iqentry_mem[heads[3]] || (iqentry_agen[heads[3]] & iqentry_out[heads[3]]) || iqentry_done[heads[3]] || ((iqentry_ma[heads[4]][AMSB:3] != iqentry_ma[heads[3]][AMSB:3] || iqentry_out[heads[3]] || iqentry_done[heads[3]]))) // ... if a release, any prior memory ops must be done before this one && (iqentry_rl[heads[4]] ? (iqentry_done[heads[0]] || !iqentry_v[heads[0]] || !iqentry_mem[heads[0]]) && (iqentry_done[heads[1]] || !iqentry_v[heads[1]] || !iqentry_mem[heads[1]]) && (iqentry_done[heads[2]] || !iqentry_v[heads[2]] || !iqentry_mem[heads[2]]) && (iqentry_done[heads[3]] || !iqentry_v[heads[3]] || !iqentry_mem[heads[3]]) : 1'b1) // ... if a preivous op has the aquire bit set && !(iqentry_aq[heads[0]] && iqentry_v[heads[0]]) && !(iqentry_aq[heads[1]] && iqentry_v[heads[1]]) && !(iqentry_aq[heads[2]] && iqentry_v[heads[2]]) && !(iqentry_aq[heads[3]] && iqentry_v[heads[3]]) // ... and there's nothing in the write buffer during a load && !(iqentry_load[heads[4]] && (wb_v!=1'b0 || iqentry_store[heads[0]] || iqentry_store[heads[1]] || iqentry_store[heads[2]] || iqentry_store[heads[3]])) // ... and there isn't a barrier, or everything before the barrier is done or invalid && (!(iqentry_iv[heads[1]] && iqentry_memsb[heads[1]]) || (iqentry_done[heads[0]] || !iqentry_v[heads[0]])) && (!(iqentry_iv[heads[2]] && iqentry_memsb[heads[2]]) || ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (iqentry_done[heads[1]] || !iqentry_v[heads[1]])) ) && (!(iqentry_iv[heads[3]] && iqentry_memsb[heads[3]]) || ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (iqentry_done[heads[1]] || !iqentry_v[heads[1]]) && (iqentry_done[heads[2]] || !iqentry_v[heads[2]])) ) && (!(iqentry_v[heads[1]] && iqentry_memdb[heads[1]]) || (!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]])) && (!(iqentry_iv[heads[2]] && iqentry_memdb[heads[2]]) || ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]])) ) && (!(iqentry_iv[heads[3]] && iqentry_memdb[heads[3]]) || ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]]) && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]])) ) // ... and, if it is a SW, there is no chance of it being undone && ((iqentry_load[heads[4]] && sple) || !(iqentry_fc[heads[0]]||iqentry_canex[heads[0]]) && !(iqentry_fc[heads[1]]||iqentry_canex[heads[1]]) && !(iqentry_fc[heads[2]]||iqentry_canex[heads[2]]) && !(iqentry_fc[heads[3]]||iqentry_canex[heads[3]])); if (memissue[heads[4]]) issue_count = issue_count + 1; end if (QENTRIES > 5) begin memissue[ heads[5] ] = ~iqentry_stomp[heads[5]] && iqentry_memready[ heads[5] ] // addr and data are valid // ... and no preceding instruction is ready to go && issue_count < `NUM_MEM //&& ~iqentry_memready[heads[0]] //&& ~iqentry_memready[heads[1]] //&& ~iqentry_memready[heads[2]] //&& ~iqentry_memready[heads[3]] //&& ~iqentry_memready[heads[4]] // ... and there is no address-overlap with any preceding instruction && (!iqentry_mem[heads[0]] || (iqentry_agen[heads[0]] & iqentry_out[heads[0]]) || iqentry_done[heads[0]] || ((iqentry_ma[heads[5]][AMSB:3] != iqentry_ma[heads[0]][AMSB:3] || iqentry_out[heads[0]] || iqentry_done[heads[0]]))) && (!iqentry_mem[heads[1]] || (iqentry_agen[heads[1]] & iqentry_out[heads[1]]) || iqentry_done[heads[1]] || ((iqentry_ma[heads[5]][AMSB:3] != iqentry_ma[heads[1]][AMSB:3] || iqentry_out[heads[1]] || iqentry_done[heads[1]]))) && (!iqentry_mem[heads[2]] || (iqentry_agen[heads[2]] & iqentry_out[heads[2]]) || iqentry_done[heads[2]] || ((iqentry_ma[heads[5]][AMSB:3] != iqentry_ma[heads[2]][AMSB:3] || iqentry_out[heads[2]] || iqentry_done[heads[2]]))) && (!iqentry_mem[heads[3]] || (iqentry_agen[heads[3]] & iqentry_out[heads[3]]) || iqentry_done[heads[3]] || ((iqentry_ma[heads[5]][AMSB:3] != iqentry_ma[heads[3]][AMSB:3] || iqentry_out[heads[3]] || iqentry_done[heads[3]]))) && (!iqentry_mem[heads[4]] || (iqentry_agen[heads[4]] & iqentry_out[heads[4]]) || iqentry_done[heads[4]] || ((iqentry_ma[heads[5]][AMSB:3] != iqentry_ma[heads[4]][AMSB:3] || iqentry_out[heads[4]] || iqentry_done[heads[4]]))) // ... if a release, any prior memory ops must be done before this one && (iqentry_rl[heads[5]] ? (iqentry_done[heads[0]] || !iqentry_v[heads[0]] || !iqentry_mem[heads[0]]) && (iqentry_done[heads[1]] || !iqentry_v[heads[1]] || !iqentry_mem[heads[1]]) && (iqentry_done[heads[2]] || !iqentry_v[heads[2]] || !iqentry_mem[heads[2]]) && (iqentry_done[heads[3]] || !iqentry_v[heads[3]] || !iqentry_mem[heads[3]]) && (iqentry_done[heads[4]] || !iqentry_v[heads[4]] || !iqentry_mem[heads[4]]) : 1'b1) // ... if a preivous op has the aquire bit set && !(iqentry_aq[heads[0]] && iqentry_v[heads[0]]) && !(iqentry_aq[heads[1]] && iqentry_v[heads[1]]) && !(iqentry_aq[heads[2]] && iqentry_v[heads[2]]) && !(iqentry_aq[heads[3]] && iqentry_v[heads[3]]) && !(iqentry_aq[heads[4]] && iqentry_v[heads[4]]) // ... and there's nothing in the write buffer during a load && !(iqentry_load[heads[5]] && (wb_v!=1'b0 || iqentry_store[heads[0]] || iqentry_store[heads[1]] || iqentry_store[heads[2]] || iqentry_store[heads[3]] || iqentry_store[heads[4]])) // ... and there isn't a barrier, or everything before the barrier is done or invalid && (!(iqentry_iv[heads[1]] && iqentry_memsb[heads[1]]) || (iqentry_done[heads[0]] || !iqentry_v[heads[0]])) && (!(iqentry_iv[heads[2]] && iqentry_memsb[heads[2]]) || ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (iqentry_done[heads[1]] || !iqentry_v[heads[1]])) ) && (!(iqentry_iv[heads[3]] && iqentry_memsb[heads[3]]) || ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (iqentry_done[heads[1]] || !iqentry_v[heads[1]]) && (iqentry_done[heads[2]] || !iqentry_v[heads[2]])) ) && (!(iqentry_iv[heads[4]] && iqentry_memsb[heads[4]]) || ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (iqentry_done[heads[1]] || !iqentry_v[heads[1]]) && (iqentry_done[heads[2]] || !iqentry_v[heads[2]]) && (iqentry_done[heads[3]] || !iqentry_v[heads[3]])) ) && (!(iqentry_iv[heads[1]] && iqentry_memdb[heads[1]]) || (!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]])) && (!(iqentry_iv[heads[2]] && iqentry_memdb[heads[2]]) || ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]])) ) && (!(iqentry_iv[heads[3]] && iqentry_memdb[heads[3]]) || ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]]) && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]])) ) && (!(iqentry_iv[heads[4]] && iqentry_memdb[heads[4]]) || ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]]) && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]]) && (!iqentry_mem[heads[3]] || iqentry_done[heads[3]] || !iqentry_v[heads[3]])) ) // ... and, if it is a SW, there is no chance of it being undone && ((iqentry_load[heads[5]] && sple) || !(iqentry_fc[heads[0]]||iqentry_canex[heads[0]]) && !(iqentry_fc[heads[1]]||iqentry_canex[heads[1]]) && !(iqentry_fc[heads[2]]||iqentry_canex[heads[2]]) && !(iqentry_fc[heads[3]]||iqentry_canex[heads[3]]) && !(iqentry_fc[heads[4]]||iqentry_canex[heads[4]])); if (memissue[heads[5]]) issue_count = issue_count + 1; end `ifdef FULL_ISSUE_LOGIC if (QENTRIES > 6) begin memissue[ heads[6] ] = ~iqentry_stomp[heads[6]] && iqentry_memready[ heads[6] ] // addr and data are valid // ... and no preceding instruction is ready to go && issue_count < `NUM_MEM //&& ~iqentry_memready[heads[0]] //&& ~iqentry_memready[heads[1]] //&& ~iqentry_memready[heads[2]] //&& ~iqentry_memready[heads[3]] //&& ~iqentry_memready[heads[4]] //&& ~iqentry_memready[heads[5]] // ... and there is no address-overlap with any preceding instruction && (!iqentry_mem[heads[0]] || (iqentry_agen[heads[0]] & iqentry_out[heads[0]]) || iqentry_done[heads[0]] || ((iqentry_ma[heads[6]][AMSB:3] != iqentry_ma[heads[0]][AMSB:3]))) && (!iqentry_mem[heads[1]] || (iqentry_agen[heads[1]] & iqentry_out[heads[1]]) || iqentry_done[heads[1]] || ((iqentry_ma[heads[6]][AMSB:3] != iqentry_ma[heads[1]][AMSB:3]))) && (!iqentry_mem[heads[2]] || (iqentry_agen[heads[2]] & iqentry_out[heads[2]]) || iqentry_done[heads[2]] || ((iqentry_ma[heads[6]][AMSB:3] != iqentry_ma[heads[2]][AMSB:3]))) && (!iqentry_mem[heads[3]] || (iqentry_agen[heads[3]] & iqentry_out[heads[3]]) || iqentry_done[heads[3]] || ((iqentry_ma[heads[6]][AMSB:3] != iqentry_ma[heads[3]][AMSB:3]))) && (!iqentry_mem[heads[4]] || (iqentry_agen[heads[4]] & iqentry_out[heads[4]]) || iqentry_done[heads[4]] || ((iqentry_ma[heads[6]][AMSB:3] != iqentry_ma[heads[4]][AMSB:3]))) && (!iqentry_mem[heads[5]] || (iqentry_agen[heads[5]] & iqentry_out[heads[5]]) || iqentry_done[heads[5]] || ((iqentry_ma[heads[6]][AMSB:3] != iqentry_ma[heads[5]][AMSB:3]))) && (iqentry_rl[heads[6]] ? (iqentry_done[heads[0]] || !iqentry_v[heads[0]] || !iqentry_mem[heads[0]]) && (iqentry_done[heads[1]] || !iqentry_v[heads[1]] || !iqentry_mem[heads[1]]) && (iqentry_done[heads[2]] || !iqentry_v[heads[2]] || !iqentry_mem[heads[2]]) && (iqentry_done[heads[3]] || !iqentry_v[heads[3]] || !iqentry_mem[heads[3]]) && (iqentry_done[heads[4]] || !iqentry_v[heads[4]] || !iqentry_mem[heads[4]]) && (iqentry_done[heads[5]] || !iqentry_v[heads[5]] || !iqentry_mem[heads[5]]) : 1'b1) // ... if a preivous op has the aquire bit set && !(iqentry_aq[heads[0]] && iqentry_v[heads[0]]) && !(iqentry_aq[heads[1]] && iqentry_v[heads[1]]) && !(iqentry_aq[heads[2]] && iqentry_v[heads[2]]) && !(iqentry_aq[heads[3]] && iqentry_v[heads[3]]) && !(iqentry_aq[heads[4]] && iqentry_v[heads[4]]) && !(iqentry_aq[heads[5]] && iqentry_v[heads[5]]) // ... and there's nothing in the write buffer during a load && !(iqentry_load[heads[6]] && (wb_v!=1'b0 || iqentry_store[heads[0]] || iqentry_store[heads[1]] || iqentry_store[heads[2]] || iqentry_store[heads[3]] || iqentry_store[heads[4]] || iqentry_store[heads[5]])) // ... and there isn't a barrier, or everything before the barrier is done or invalid && (!(iqentry_iv[heads[1]] && iqentry_memsb[heads[1]]) || (iqentry_done[heads[0]] || !iqentry_v[heads[0]])) && (!(iqentry_iv[heads[2]] && iqentry_memsb[heads[2]]) || ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (iqentry_done[heads[1]] || !iqentry_v[heads[1]])) ) && (!(iqentry_iv[heads[3]] && iqentry_memsb[heads[3]]) || ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (iqentry_done[heads[1]] || !iqentry_v[heads[1]]) && (iqentry_done[heads[2]] || !iqentry_v[heads[2]])) ) && (!(iqentry_iv[heads[4]] && iqentry_memsb[heads[4]]) || ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (iqentry_done[heads[1]] || !iqentry_v[heads[1]]) && (iqentry_done[heads[2]] || !iqentry_v[heads[2]]) && (iqentry_done[heads[3]] || !iqentry_v[heads[3]])) ) && (!(iqentry_iv[heads[5]] && iqentry_memsb[heads[5]]) || ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (iqentry_done[heads[1]] || !iqentry_v[heads[1]]) && (iqentry_done[heads[2]] || !iqentry_v[heads[2]]) && (iqentry_done[heads[3]] || !iqentry_v[heads[3]]) && (iqentry_done[heads[4]] || !iqentry_v[heads[4]])) ) && (!(iqentry_iv[heads[1]] && iqentry_memdb[heads[1]]) || (!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]])) && (!(iqentry_iv[heads[2]] && iqentry_memdb[heads[2]]) || ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]])) ) && (!(iqentry_iv[heads[3]] && iqentry_memdb[heads[3]]) || ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]]) && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]])) ) && (!(iqentry_iv[heads[4]] && iqentry_memdb[heads[4]]) || ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]]) && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]]) && (!iqentry_mem[heads[3]] || iqentry_done[heads[3]] || !iqentry_v[heads[3]])) ) && (!(iqentry_iv[heads[5]] && iqentry_memdb[heads[5]]) || ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]]) && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]]) && (!iqentry_mem[heads[3]] || iqentry_done[heads[3]] || !iqentry_v[heads[3]]) && (!iqentry_mem[heads[4]] || iqentry_done[heads[4]] || !iqentry_v[heads[4]])) ) // ... and, if it is a SW, there is no chance of it being undone && ((iqentry_load[heads[6]] && sple) || !(iqentry_fc[heads[0]]||iqentry_canex[heads[0]]) && !(iqentry_fc[heads[1]]||iqentry_canex[heads[1]]) && !(iqentry_fc[heads[2]]||iqentry_canex[heads[2]]) && !(iqentry_fc[heads[3]]||iqentry_canex[heads[3]]) && !(iqentry_fc[heads[4]]||iqentry_canex[heads[4]]) && !(iqentry_fc[heads[5]]||iqentry_canex[heads[5]])); if (memissue[heads[6]]) issue_count = issue_count + 1; end if (QENTRIES > 7) begin memissue[ heads[7] ] = ~iqentry_stomp[heads[7]] && iqentry_memready[ heads[7] ] // addr and data are valid // ... and no preceding instruction is ready to go && issue_count < `NUM_MEM //&& ~iqentry_memready[heads[0]] //&& ~iqentry_memready[heads[1]] //&& ~iqentry_memready[heads[2]] //&& ~iqentry_memready[heads[3]] //&& ~iqentry_memready[heads[4]] //&& ~iqentry_memready[heads[5]] //&& ~iqentry_memready[heads[6]] // ... and there is no address-overlap with any preceding instruction && (!iqentry_mem[heads[0]] || (iqentry_agen[heads[0]] & iqentry_out[heads[0]]) || iqentry_done[heads[0]] || ((iqentry_ma[heads[7]][AMSB:3] != iqentry_ma[heads[0]][AMSB:3] || iqentry_out[heads[0]] || iqentry_done[heads[0]]))) && (!iqentry_mem[heads[1]] || (iqentry_agen[heads[1]] & iqentry_out[heads[1]]) || iqentry_done[heads[1]] || ((iqentry_ma[heads[7]][AMSB:3] != iqentry_ma[heads[1]][AMSB:3] || iqentry_out[heads[1]] || iqentry_done[heads[1]]))) && (!iqentry_mem[heads[2]] || (iqentry_agen[heads[2]] & iqentry_out[heads[2]]) || iqentry_done[heads[2]] || ((iqentry_ma[heads[7]][AMSB:3] != iqentry_ma[heads[2]][AMSB:3] || iqentry_out[heads[2]] || iqentry_done[heads[2]]))) && (!iqentry_mem[heads[3]] || (iqentry_agen[heads[3]] & iqentry_out[heads[3]]) || iqentry_done[heads[3]] || ((iqentry_ma[heads[7]][AMSB:3] != iqentry_ma[heads[3]][AMSB:3] || iqentry_out[heads[3]] || iqentry_done[heads[3]]))) && (!iqentry_mem[heads[4]] || (iqentry_agen[heads[4]] & iqentry_out[heads[4]]) || iqentry_done[heads[4]] || ((iqentry_ma[heads[7]][AMSB:3] != iqentry_ma[heads[4]][AMSB:3] || iqentry_out[heads[4]] || iqentry_done[heads[4]]))) && (!iqentry_mem[heads[5]] || (iqentry_agen[heads[5]] & iqentry_out[heads[5]]) || iqentry_done[heads[5]] || ((iqentry_ma[heads[7]][AMSB:3] != iqentry_ma[heads[5]][AMSB:3] || iqentry_out[heads[5]] || iqentry_done[heads[5]]))) && (!iqentry_mem[heads[6]] || (iqentry_agen[heads[6]] & iqentry_out[heads[6]]) || iqentry_done[heads[6]] || ((iqentry_ma[heads[7]][AMSB:3] != iqentry_ma[heads[6]][AMSB:3] || iqentry_out[heads[6]] || iqentry_done[heads[6]]))) && (iqentry_rl[heads[7]] ? (iqentry_done[heads[0]] || !iqentry_v[heads[0]] || !iqentry_mem[heads[0]]) && (iqentry_done[heads[1]] || !iqentry_v[heads[1]] || !iqentry_mem[heads[1]]) && (iqentry_done[heads[2]] || !iqentry_v[heads[2]] || !iqentry_mem[heads[2]]) && (iqentry_done[heads[3]] || !iqentry_v[heads[3]] || !iqentry_mem[heads[3]]) && (iqentry_done[heads[4]] || !iqentry_v[heads[4]] || !iqentry_mem[heads[4]]) && (iqentry_done[heads[5]] || !iqentry_v[heads[5]] || !iqentry_mem[heads[5]]) && (iqentry_done[heads[6]] || !iqentry_v[heads[6]] || !iqentry_mem[heads[6]]) : 1'b1) // ... if a preivous op has the aquire bit set && !(iqentry_aq[heads[0]] && iqentry_v[heads[0]]) && !(iqentry_aq[heads[1]] && iqentry_v[heads[1]]) && !(iqentry_aq[heads[2]] && iqentry_v[heads[2]]) && !(iqentry_aq[heads[3]] && iqentry_v[heads[3]]) && !(iqentry_aq[heads[4]] && iqentry_v[heads[4]]) && !(iqentry_aq[heads[5]] && iqentry_v[heads[5]]) && !(iqentry_aq[heads[6]] && iqentry_v[heads[6]]) // ... and there's nothing in the write buffer during a load && !(iqentry_load[heads[7]] && (wb_v!=1'b0 || iqentry_store[heads[0]] || iqentry_store[heads[1]] || iqentry_store[heads[2]] || iqentry_store[heads[3]] || iqentry_store[heads[4]] || iqentry_store[heads[5]] || iqentry_store[heads[6]])) // ... and there isn't a barrier, or everything before the barrier is done or invalid && (!(iqentry_iv[heads[1]] && iqentry_memsb[heads[1]]) || (iqentry_done[heads[0]] || !iqentry_v[heads[0]])) && (!(iqentry_iv[heads[2]] && iqentry_memsb[heads[2]]) || ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (iqentry_done[heads[1]] || !iqentry_v[heads[1]])) ) && (!(iqentry_iv[heads[3]] && iqentry_memsb[heads[3]]) || ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (iqentry_done[heads[1]] || !iqentry_v[heads[1]]) && (iqentry_done[heads[2]] || !iqentry_v[heads[2]])) ) && (!(iqentry_iv[heads[4]] && iqentry_memsb[heads[4]]) || ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (iqentry_done[heads[1]] || !iqentry_v[heads[1]]) && (iqentry_done[heads[2]] || !iqentry_v[heads[2]]) && (iqentry_done[heads[3]] || !iqentry_v[heads[3]])) ) && (!(iqentry_iv[heads[5]] && iqentry_memsb[heads[5]]) || ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (iqentry_done[heads[1]] || !iqentry_v[heads[1]]) && (iqentry_done[heads[2]] || !iqentry_v[heads[2]]) && (iqentry_done[heads[3]] || !iqentry_v[heads[3]]) && (iqentry_done[heads[4]] || !iqentry_v[heads[4]])) ) && (!(iqentry_iv[heads[6]] && iqentry_memsb[heads[6]]) || ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (iqentry_done[heads[1]] || !iqentry_v[heads[1]]) && (iqentry_done[heads[2]] || !iqentry_v[heads[2]]) && (iqentry_done[heads[3]] || !iqentry_v[heads[3]]) && (iqentry_done[heads[4]] || !iqentry_v[heads[4]]) && (iqentry_done[heads[5]] || !iqentry_v[heads[5]])) ) && (!(iqentry_iv[heads[1]] && iqentry_memdb[heads[1]]) || (!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]])) && (!(iqentry_iv[heads[2]] && iqentry_memdb[heads[2]]) || ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]])) ) && (!(iqentry_iv[heads[3]] && iqentry_memdb[heads[3]]) || ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]]) && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]])) ) && (!(iqentry_iv[heads[4]] && iqentry_memdb[heads[4]]) || ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]]) && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]]) && (!iqentry_mem[heads[3]] || iqentry_done[heads[3]] || !iqentry_v[heads[3]])) ) && (!(iqentry_iv[heads[5]] && iqentry_memdb[heads[5]]) || ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]]) && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]]) && (!iqentry_mem[heads[3]] || iqentry_done[heads[3]] || !iqentry_v[heads[3]]) && (!iqentry_mem[heads[4]] || iqentry_done[heads[4]] || !iqentry_v[heads[4]])) ) && (!(iqentry_iv[heads[6]] && iqentry_memdb[heads[6]]) || ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]]) && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]]) && (!iqentry_mem[heads[3]] || iqentry_done[heads[3]] || !iqentry_v[heads[3]]) && (!iqentry_mem[heads[4]] || iqentry_done[heads[4]] || !iqentry_v[heads[4]]) && (!iqentry_mem[heads[5]] || iqentry_done[heads[5]] || !iqentry_v[heads[5]])) ) // ... and, if it is a SW, there is no chance of it being undone && ((iqentry_load[heads[7]] && sple) || !(iqentry_fc[heads[0]]||iqentry_canex[heads[0]]) && !(iqentry_fc[heads[1]]||iqentry_canex[heads[1]]) && !(iqentry_fc[heads[2]]||iqentry_canex[heads[2]]) && !(iqentry_fc[heads[3]]||iqentry_canex[heads[3]]) && !(iqentry_fc[heads[4]]||iqentry_canex[heads[4]]) && !(iqentry_fc[heads[5]]||iqentry_canex[heads[5]]) && !(iqentry_fc[heads[6]]||iqentry_canex[heads[6]])); if (memissue[heads[7]]) issue_count = issue_count + 1; end if (QENTRIES > 8) begin memissue[ heads[8] ] = ~iqentry_stomp[heads[8]] && iqentry_memready[ heads[8] ] // addr and data are valid // ... and no preceding instruction is ready to go && issue_count < `NUM_MEM //&& ~iqentry_memready[heads[0]] //&& ~iqentry_memready[heads[1]] //&& ~iqentry_memready[heads[2]] //&& ~iqentry_memready[heads[3]] //&& ~iqentry_memready[heads[4]] //&& ~iqentry_memready[heads[5]] //&& ~iqentry_memready[heads[6]] // ... and there is no address-overlap with any preceding instruction && (!iqentry_mem[heads[0]] || (iqentry_agen[heads[0]] & iqentry_out[heads[0]]) || iqentry_done[heads[0]] || ((iqentry_ma[heads[8]][AMSB:3] != iqentry_ma[heads[0]][AMSB:3] || iqentry_out[heads[0]] || iqentry_done[heads[0]]))) && (!iqentry_mem[heads[1]] || (iqentry_agen[heads[1]] & iqentry_out[heads[1]]) || iqentry_done[heads[1]] || ((iqentry_ma[heads[8]][AMSB:3] != iqentry_ma[heads[1]][AMSB:3] || iqentry_out[heads[1]] || iqentry_done[heads[1]]))) && (!iqentry_mem[heads[2]] || (iqentry_agen[heads[2]] & iqentry_out[heads[2]]) || iqentry_done[heads[2]] || ((iqentry_ma[heads[8]][AMSB:3] != iqentry_ma[heads[2]][AMSB:3] || iqentry_out[heads[2]] || iqentry_done[heads[2]]))) && (!iqentry_mem[heads[3]] || (iqentry_agen[heads[3]] & iqentry_out[heads[3]]) || iqentry_done[heads[3]] || ((iqentry_ma[heads[8]][AMSB:3] != iqentry_ma[heads[3]][AMSB:3] || iqentry_out[heads[3]] || iqentry_done[heads[3]]))) && (!iqentry_mem[heads[4]] || (iqentry_agen[heads[4]] & iqentry_out[heads[4]]) || iqentry_done[heads[4]] || ((iqentry_ma[heads[8]][AMSB:3] != iqentry_ma[heads[4]][AMSB:3] || iqentry_out[heads[4]] || iqentry_done[heads[4]]))) && (!iqentry_mem[heads[5]] || (iqentry_agen[heads[5]] & iqentry_out[heads[5]]) || iqentry_done[heads[5]] || ((iqentry_ma[heads[8]][AMSB:3] != iqentry_ma[heads[5]][AMSB:3] || iqentry_out[heads[5]] || iqentry_done[heads[5]]))) && (!iqentry_mem[heads[6]] || (iqentry_agen[heads[6]] & iqentry_out[heads[6]]) || iqentry_done[heads[6]] || ((iqentry_ma[heads[8]][AMSB:3] != iqentry_ma[heads[6]][AMSB:3] || iqentry_out[heads[6]] || iqentry_done[heads[6]]))) && (!iqentry_mem[heads[7]] || (iqentry_agen[heads[7]] & iqentry_out[heads[7]]) || iqentry_done[heads[7]] || ((iqentry_ma[heads[8]][AMSB:3] != iqentry_ma[heads[7]][AMSB:3] || iqentry_out[heads[7]] || iqentry_done[heads[7]]))) && (iqentry_rl[heads[8]] ? (iqentry_done[heads[0]] || !iqentry_v[heads[0]] || !iqentry_mem[heads[0]]) && (iqentry_done[heads[1]] || !iqentry_v[heads[1]] || !iqentry_mem[heads[1]]) && (iqentry_done[heads[2]] || !iqentry_v[heads[2]] || !iqentry_mem[heads[2]]) && (iqentry_done[heads[3]] || !iqentry_v[heads[3]] || !iqentry_mem[heads[3]]) && (iqentry_done[heads[4]] || !iqentry_v[heads[4]] || !iqentry_mem[heads[4]]) && (iqentry_done[heads[5]] || !iqentry_v[heads[5]] || !iqentry_mem[heads[5]]) && (iqentry_done[heads[6]] || !iqentry_v[heads[6]] || !iqentry_mem[heads[6]]) && (iqentry_done[heads[7]] || !iqentry_v[heads[7]] || !iqentry_mem[heads[7]]) : 1'b1) // ... if a preivous op has the aquire bit set && !(iqentry_aq[heads[0]] && iqentry_v[heads[0]]) && !(iqentry_aq[heads[1]] && iqentry_v[heads[1]]) && !(iqentry_aq[heads[2]] && iqentry_v[heads[2]]) && !(iqentry_aq[heads[3]] && iqentry_v[heads[3]]) && !(iqentry_aq[heads[4]] && iqentry_v[heads[4]]) && !(iqentry_aq[heads[5]] && iqentry_v[heads[5]]) && !(iqentry_aq[heads[6]] && iqentry_v[heads[6]]) && !(iqentry_aq[heads[7]] && iqentry_v[heads[7]]) // ... and there's nothing in the write buffer during a load && !(iqentry_load[heads[8]] && (wb_v!=1'b0 || iqentry_store[heads[0]] || iqentry_store[heads[1]] || iqentry_store[heads[2]] || iqentry_store[heads[3]] || iqentry_store[heads[4]] || iqentry_store[heads[5]] || iqentry_store[heads[6]] || iqentry_store[heads[7]])) // ... and there isn't a barrier, or everything before the barrier is done or invalid && (!(iqentry_iv[heads[1]] && iqentry_memsb[heads[1]]) || (iqentry_done[heads[0]] || !iqentry_v[heads[0]])) && (!(iqentry_iv[heads[2]] && iqentry_memsb[heads[2]]) || ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (iqentry_done[heads[1]] || !iqentry_v[heads[1]])) ) && (!(iqentry_iv[heads[3]] && iqentry_memsb[heads[3]]) || ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (iqentry_done[heads[1]] || !iqentry_v[heads[1]]) && (iqentry_done[heads[2]] || !iqentry_v[heads[2]])) ) && (!(iqentry_iv[heads[4]] && iqentry_memsb[heads[4]]) || ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (iqentry_done[heads[1]] || !iqentry_v[heads[1]]) && (iqentry_done[heads[2]] || !iqentry_v[heads[2]]) && (iqentry_done[heads[3]] || !iqentry_v[heads[3]])) ) && (!(iqentry_iv[heads[5]] && iqentry_memsb[heads[5]]) || ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (iqentry_done[heads[1]] || !iqentry_v[heads[1]]) && (iqentry_done[heads[2]] || !iqentry_v[heads[2]]) && (iqentry_done[heads[3]] || !iqentry_v[heads[3]]) && (iqentry_done[heads[4]] || !iqentry_v[heads[4]])) ) && (!(iqentry_iv[heads[6]] && iqentry_memsb[heads[6]]) || ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (iqentry_done[heads[1]] || !iqentry_v[heads[1]]) && (iqentry_done[heads[2]] || !iqentry_v[heads[2]]) && (iqentry_done[heads[3]] || !iqentry_v[heads[3]]) && (iqentry_done[heads[4]] || !iqentry_v[heads[4]]) && (iqentry_done[heads[5]] || !iqentry_v[heads[5]])) ) && (!(iqentry_iv[heads[7]] && iqentry_memsb[heads[7]]) || ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (iqentry_done[heads[1]] || !iqentry_v[heads[1]]) && (iqentry_done[heads[2]] || !iqentry_v[heads[2]]) && (iqentry_done[heads[3]] || !iqentry_v[heads[3]]) && (iqentry_done[heads[4]] || !iqentry_v[heads[4]]) && (iqentry_done[heads[5]] || !iqentry_v[heads[5]]) && (iqentry_done[heads[6]] || !iqentry_v[heads[6]]) ) ) && (!(iqentry_iv[heads[1]] && iqentry_memdb[heads[1]]) || (!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]])) && (!(iqentry_iv[heads[2]] && iqentry_memdb[heads[2]]) || ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]])) ) && (!(iqentry_iv[heads[3]] && iqentry_memdb[heads[3]]) || ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]]) && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]])) ) && (!(iqentry_iv[heads[4]] && iqentry_memdb[heads[4]]) || ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]]) && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]]) && (!iqentry_mem[heads[3]] || iqentry_done[heads[3]] || !iqentry_v[heads[3]])) ) && (!(iqentry_iv[heads[5]] && iqentry_memdb[heads[5]]) || ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]]) && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]]) && (!iqentry_mem[heads[3]] || iqentry_done[heads[3]] || !iqentry_v[heads[3]]) && (!iqentry_mem[heads[4]] || iqentry_done[heads[4]] || !iqentry_v[heads[4]])) ) && (!(iqentry_iv[heads[6]] && iqentry_memdb[heads[6]]) || ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]]) && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]]) && (!iqentry_mem[heads[3]] || iqentry_done[heads[3]] || !iqentry_v[heads[3]]) && (!iqentry_mem[heads[4]] || iqentry_done[heads[4]] || !iqentry_v[heads[4]]) && (!iqentry_mem[heads[5]] || iqentry_done[heads[5]] || !iqentry_v[heads[5]])) ) && (!(iqentry_iv[heads[7]] && iqentry_memdb[heads[7]]) || ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]]) && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]]) && (!iqentry_mem[heads[3]] || iqentry_done[heads[3]] || !iqentry_v[heads[3]]) && (!iqentry_mem[heads[4]] || iqentry_done[heads[4]] || !iqentry_v[heads[4]]) && (!iqentry_mem[heads[5]] || iqentry_done[heads[5]] || !iqentry_v[heads[5]]) && (!iqentry_mem[heads[6]] || iqentry_done[heads[6]] || !iqentry_v[heads[6]]) ) ) // ... and, if it is a SW, there is no chance of it being undone && ((iqentry_load[heads[8]] && sple) || !(iqentry_fc[heads[0]]||iqentry_canex[heads[0]]) && !(iqentry_fc[heads[1]]||iqentry_canex[heads[1]]) && !(iqentry_fc[heads[2]]||iqentry_canex[heads[2]]) && !(iqentry_fc[heads[3]]||iqentry_canex[heads[3]]) && !(iqentry_fc[heads[4]]||iqentry_canex[heads[4]]) && !(iqentry_fc[heads[5]]||iqentry_canex[heads[5]]) && !(iqentry_fc[heads[6]]||iqentry_canex[heads[6]]) && !(iqentry_fc[heads[7]]||iqentry_canex[heads[7]]) ); if (memissue[heads[8]]) issue_count = issue_count + 1; end if (QENTRIES > 9) begin memissue[ heads[9] ] = ~iqentry_stomp[heads[9]] && iqentry_memready[ heads[9] ] // addr and data are valid // ... and no preceding instruction is ready to go && issue_count < `NUM_MEM //&& ~iqentry_memready[heads[0]] //&& ~iqentry_memready[heads[1]] //&& ~iqentry_memready[heads[2]] //&& ~iqentry_memready[heads[3]] //&& ~iqentry_memready[heads[4]] //&& ~iqentry_memready[heads[5]] //&& ~iqentry_memready[heads[6]] // ... and there is no address-overlap with any preceding instruction && (!iqentry_mem[heads[0]] || (iqentry_agen[heads[0]] & iqentry_out[heads[0]]) || iqentry_done[heads[0]] || ((iqentry_ma[heads[9]][AMSB:3] != iqentry_ma[heads[0]][AMSB:3] || iqentry_out[heads[0]] || iqentry_done[heads[0]]))) && (!iqentry_mem[heads[1]] || (iqentry_agen[heads[1]] & iqentry_out[heads[1]]) || iqentry_done[heads[1]] || ((iqentry_ma[heads[9]][AMSB:3] != iqentry_ma[heads[1]][AMSB:3] || iqentry_out[heads[1]] || iqentry_done[heads[1]]))) && (!iqentry_mem[heads[2]] || (iqentry_agen[heads[2]] & iqentry_out[heads[2]]) || iqentry_done[heads[2]] || ((iqentry_ma[heads[9]][AMSB:3] != iqentry_ma[heads[2]][AMSB:3] || iqentry_out[heads[2]] || iqentry_done[heads[2]]))) && (!iqentry_mem[heads[3]] || (iqentry_agen[heads[3]] & iqentry_out[heads[3]]) || iqentry_done[heads[3]] || ((iqentry_ma[heads[9]][AMSB:3] != iqentry_ma[heads[3]][AMSB:3] || iqentry_out[heads[3]] || iqentry_done[heads[3]]))) && (!iqentry_mem[heads[4]] || (iqentry_agen[heads[4]] & iqentry_out[heads[4]]) || iqentry_done[heads[4]] || ((iqentry_ma[heads[9]][AMSB:3] != iqentry_ma[heads[4]][AMSB:3] || iqentry_out[heads[4]] || iqentry_done[heads[4]]))) && (!iqentry_mem[heads[5]] || (iqentry_agen[heads[5]] & iqentry_out[heads[5]]) || iqentry_done[heads[5]] || ((iqentry_ma[heads[9]][AMSB:3] != iqentry_ma[heads[5]][AMSB:3] || iqentry_out[heads[5]] || iqentry_done[heads[5]]))) && (!iqentry_mem[heads[6]] || (iqentry_agen[heads[6]] & iqentry_out[heads[6]]) || iqentry_done[heads[6]] || ((iqentry_ma[heads[9]][AMSB:3] != iqentry_ma[heads[6]][AMSB:3] || iqentry_out[heads[6]] || iqentry_done[heads[6]]))) && (!iqentry_mem[heads[7]] || (iqentry_agen[heads[7]] & iqentry_out[heads[7]]) || iqentry_done[heads[7]] || ((iqentry_ma[heads[9]][AMSB:3] != iqentry_ma[heads[7]][AMSB:3] || iqentry_out[heads[7]] || iqentry_done[heads[7]]))) && (!iqentry_mem[heads[8]] || (iqentry_agen[heads[8]] & iqentry_out[heads[8]]) || iqentry_done[heads[8]] || ((iqentry_ma[heads[9]][AMSB:3] != iqentry_ma[heads[8]][AMSB:3] || iqentry_out[heads[8]] || iqentry_done[heads[8]]))) && (iqentry_rl[heads[9]] ? (iqentry_done[heads[0]] || !iqentry_v[heads[0]] || !iqentry_mem[heads[0]]) && (iqentry_done[heads[1]] || !iqentry_v[heads[1]] || !iqentry_mem[heads[1]]) && (iqentry_done[heads[2]] || !iqentry_v[heads[2]] || !iqentry_mem[heads[2]]) && (iqentry_done[heads[3]] || !iqentry_v[heads[3]] || !iqentry_mem[heads[3]]) && (iqentry_done[heads[4]] || !iqentry_v[heads[4]] || !iqentry_mem[heads[4]]) && (iqentry_done[heads[5]] || !iqentry_v[heads[5]] || !iqentry_mem[heads[5]]) && (iqentry_done[heads[6]] || !iqentry_v[heads[6]] || !iqentry_mem[heads[6]]) && (iqentry_done[heads[7]] || !iqentry_v[heads[7]] || !iqentry_mem[heads[7]]) && (iqentry_done[heads[8]] || !iqentry_v[heads[8]] || !iqentry_mem[heads[8]]) : 1'b1) // ... if a preivous op has the aquire bit set && !(iqentry_aq[heads[0]] && iqentry_v[heads[0]]) && !(iqentry_aq[heads[1]] && iqentry_v[heads[1]]) && !(iqentry_aq[heads[2]] && iqentry_v[heads[2]]) && !(iqentry_aq[heads[3]] && iqentry_v[heads[3]]) && !(iqentry_aq[heads[4]] && iqentry_v[heads[4]]) && !(iqentry_aq[heads[5]] && iqentry_v[heads[5]]) && !(iqentry_aq[heads[6]] && iqentry_v[heads[6]]) && !(iqentry_aq[heads[7]] && iqentry_v[heads[7]]) && !(iqentry_aq[heads[8]] && iqentry_v[heads[8]]) // ... and there's nothing in the write buffer during a load && !(iqentry_load[heads[9]] && (wb_v!=1'b0 || iqentry_store[heads[0]] || iqentry_store[heads[1]] || iqentry_store[heads[2]] || iqentry_store[heads[3]] || iqentry_store[heads[4]] || iqentry_store[heads[5]] || iqentry_store[heads[6]] || iqentry_store[heads[7]] || iqentry_store[heads[8]])) // ... and there isn't a barrier, or everything before the barrier is done or invalid && (!(iqentry_iv[heads[1]] && iqentry_memsb[heads[1]]) || (iqentry_done[heads[0]] || !iqentry_v[heads[0]])) && (!(iqentry_iv[heads[2]] && iqentry_memsb[heads[2]]) || ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (iqentry_done[heads[1]] || !iqentry_v[heads[1]])) ) && (!(iqentry_iv[heads[3]] && iqentry_memsb[heads[3]]) || ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (iqentry_done[heads[1]] || !iqentry_v[heads[1]]) && (iqentry_done[heads[2]] || !iqentry_v[heads[2]])) ) && (!(iqentry_iv[heads[4]] && iqentry_memsb[heads[4]]) || ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (iqentry_done[heads[1]] || !iqentry_v[heads[1]]) && (iqentry_done[heads[2]] || !iqentry_v[heads[2]]) && (iqentry_done[heads[3]] || !iqentry_v[heads[3]])) ) && (!(iqentry_iv[heads[5]] && iqentry_memsb[heads[5]]) || ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (iqentry_done[heads[1]] || !iqentry_v[heads[1]]) && (iqentry_done[heads[2]] || !iqentry_v[heads[2]]) && (iqentry_done[heads[3]] || !iqentry_v[heads[3]]) && (iqentry_done[heads[4]] || !iqentry_v[heads[4]])) ) && (!(iqentry_iv[heads[6]] && iqentry_memsb[heads[6]]) || ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (iqentry_done[heads[1]] || !iqentry_v[heads[1]]) && (iqentry_done[heads[2]] || !iqentry_v[heads[2]]) && (iqentry_done[heads[3]] || !iqentry_v[heads[3]]) && (iqentry_done[heads[4]] || !iqentry_v[heads[4]]) && (iqentry_done[heads[5]] || !iqentry_v[heads[5]])) ) && (!(iqentry_iv[heads[7]] && iqentry_memsb[heads[7]]) || ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (iqentry_done[heads[1]] || !iqentry_v[heads[1]]) && (iqentry_done[heads[2]] || !iqentry_v[heads[2]]) && (iqentry_done[heads[3]] || !iqentry_v[heads[3]]) && (iqentry_done[heads[4]] || !iqentry_v[heads[4]]) && (iqentry_done[heads[5]] || !iqentry_v[heads[5]]) && (iqentry_done[heads[6]] || !iqentry_v[heads[6]])) ) && (!(iqentry_iv[heads[8]] && iqentry_memsb[heads[8]]) || ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (iqentry_done[heads[1]] || !iqentry_v[heads[1]]) && (iqentry_done[heads[2]] || !iqentry_v[heads[2]]) && (iqentry_done[heads[3]] || !iqentry_v[heads[3]]) && (iqentry_done[heads[4]] || !iqentry_v[heads[4]]) && (iqentry_done[heads[5]] || !iqentry_v[heads[5]]) && (iqentry_done[heads[6]] || !iqentry_v[heads[6]]) && (iqentry_done[heads[7]] || !iqentry_v[heads[7]]) ) ) && (!(iqentry_iv[heads[1]] && iqentry_memdb[heads[1]]) || (!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]])) && (!(iqentry_iv[heads[2]] && iqentry_memdb[heads[2]]) || ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]])) ) && (!(iqentry_iv[heads[3]] && iqentry_memdb[heads[3]]) || ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]]) && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]])) ) && (!(iqentry_iv[heads[4]] && iqentry_memdb[heads[4]]) || ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]]) && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]]) && (!iqentry_mem[heads[3]] || iqentry_done[heads[3]] || !iqentry_v[heads[3]])) ) && (!(iqentry_iv[heads[5]] && iqentry_memdb[heads[5]]) || ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]]) && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]]) && (!iqentry_mem[heads[3]] || iqentry_done[heads[3]] || !iqentry_v[heads[3]]) && (!iqentry_mem[heads[4]] || iqentry_done[heads[4]] || !iqentry_v[heads[4]])) ) && (!(iqentry_iv[heads[6]] && iqentry_memdb[heads[6]]) || ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]]) && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]]) && (!iqentry_mem[heads[3]] || iqentry_done[heads[3]] || !iqentry_v[heads[3]]) && (!iqentry_mem[heads[4]] || iqentry_done[heads[4]] || !iqentry_v[heads[4]]) && (!iqentry_mem[heads[5]] || iqentry_done[heads[5]] || !iqentry_v[heads[5]])) ) && (!(iqentry_iv[heads[7]] && iqentry_memdb[heads[7]]) || ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]]) && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]]) && (!iqentry_mem[heads[3]] || iqentry_done[heads[3]] || !iqentry_v[heads[3]]) && (!iqentry_mem[heads[4]] || iqentry_done[heads[4]] || !iqentry_v[heads[4]]) && (!iqentry_mem[heads[5]] || iqentry_done[heads[5]] || !iqentry_v[heads[5]]) && (!iqentry_mem[heads[6]] || iqentry_done[heads[6]] || !iqentry_v[heads[6]])) ) && (!(iqentry_iv[heads[8]] && iqentry_memdb[heads[8]]) || ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]]) && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]]) && (!iqentry_mem[heads[3]] || iqentry_done[heads[3]] || !iqentry_v[heads[3]]) && (!iqentry_mem[heads[4]] || iqentry_done[heads[4]] || !iqentry_v[heads[4]]) && (!iqentry_mem[heads[5]] || iqentry_done[heads[5]] || !iqentry_v[heads[5]]) && (!iqentry_mem[heads[6]] || iqentry_done[heads[6]] || !iqentry_v[heads[6]]) && (!iqentry_mem[heads[7]] || iqentry_done[heads[7]] || !iqentry_v[heads[7]]) ) ) // ... and, if it is a store, there is no chance of it being undone && ((iqentry_load[heads[9]] && sple) || !(iqentry_fc[heads[0]]||iqentry_canex[heads[0]]) && !(iqentry_fc[heads[1]]||iqentry_canex[heads[1]]) && !(iqentry_fc[heads[2]]||iqentry_canex[heads[2]]) && !(iqentry_fc[heads[3]]||iqentry_canex[heads[3]]) && !(iqentry_fc[heads[4]]||iqentry_canex[heads[4]]) && !(iqentry_fc[heads[5]]||iqentry_canex[heads[5]]) && !(iqentry_fc[heads[6]]||iqentry_canex[heads[6]]) && !(iqentry_fc[heads[7]]||iqentry_canex[heads[7]]) && !(iqentry_fc[heads[8]]||iqentry_canex[heads[8]]) ); if (memissue[heads[9]]) issue_count = issue_count + 1; end end end endgenerate `endif // Starts search for instructions to issue at the head of the queue and // progresses from there. This ensures that the oldest instructions are // selected first for processing. always @* begin last_issue0 = QENTRIES; last_issue1 = QENTRIES; last_issue2 = QENTRIES; for (n = 0; n < QENTRIES; n = n + 1) if (~iqentry_stomp[heads[n]] && iqentry_memissue[heads[n]] && !iqentry_done[heads[n]] && iqentry_v[heads[n]]) begin if (mem1_available && dram0 == `DRAMSLOT_AVAIL) begin last_issue0 = heads[n]; end end for (n = 0; n < QENTRIES; n = n + 1) if (~iqentry_stomp[heads[n]] && iqentry_memissue[heads[n]]) begin if (mem2_available && heads[n] != last_issue0 && `NUM_MEM > 1) begin if (dram1 == `DRAMSLOT_AVAIL) begin last_issue1 = heads[n]; end end end for (n = 0; n < QENTRIES; n = n + 1) if (~iqentry_stomp[heads[n]] && iqentry_memissue[heads[n]]) begin if (mem3_available && heads[n] != last_issue0 && heads[n] != last_issue1 && `NUM_MEM > 2) begin if (dram2 == `DRAMSLOT_AVAIL) begin last_issue2 = heads[n]; end end end end reg [2:0] wbptr = 2'd0; // Stomp logic for branch miss. /* FT64_stomp #(QENTRIES) ustmp1 ( .branchmiss(branchmiss), .branchmiss_thrd(branchmiss_thrd), .missid(missid), .head0(heads[0]), .thrd(iqentry_thrd), .iqentry_v(iqentry_v), .stomp(iqentry_stomp) ); */ always @* begin iqentry_stomp <= 1'b0; if (branchmiss) begin for (n = 0; n < QENTRIES; n = n + 1) begin if (iqentry_v[n] && iqentry_thrd[n]==branchmiss_thrd) begin if (iqentry_sn[n] > iqentry_sn[missid[`QBITS]]) iqentry_stomp[n] <= `TRUE; end end end /* if (fcu_branchmiss) begin for (n = 0; n < QENTRIES; n = n + 1) begin if (iqentry_v[n] && iqentry_thrd[n]==fcu_thrd) begin if (iqentry_sn[n] > iqentry_sn[fcu_id[`QBITS]]) iqentry_stomp[n] <= `TRUE; end end end */ end always @* begin stompedOnRets = 1'b0; for (n = 0; n < QENTRIES; n = n + 1) if (iqentry_stomp[n] && iqentry_ret[n]) stompedOnRets = stompedOnRets + 4'd1; end reg id1_vi, id2_vi, id3_vi; wire [4:0] id1_ido, id2_ido, id3_ido; wire id1_vo, id2_vo, id3_vo; wire id1_clk, id2_clk, id3_clk; // Always at least one decoder BUFH uidclk (.I(clk_i), .O(id1_clk)); //assign id1_clk = clk_i; //BUFGCE uclkb2 //( // .I(clk_i), // .CE(id1_available), // .O(id1_clk) //); FT64_idecoder uid1 ( .clk(id1_clk), .idv_i(id1_vi), .id_i(id1_id), `ifdef INLINE_DECODE .instr(fetchbuf0_instr), .Rt(Rt0[4:0]), .predict_taken(predict_taken0), .thrd(fetchbuf0_thrd), .vl(vl), `else .instr(id1_instr), .Rt(id1_Rt), .predict_taken(id1_pt), .thrd(id1_thrd), .vl(id1_vl), `endif //ToDo: fix for vectors length and element number .ven(id1_ven), .bus(id1_bus), .id_o(id1_ido), .idv_o(id1_vo), .debug_on(debug_on), .pred_on(pred_on) ); /* `ifdef INLINE_DECODE id1_Rt <= Rt0[4:0]; id1_vl <= vl; id1_ven <= venno; id1_id <= tail; id1_pt <= predict_taken0; id1_thrd <= fetchbuf0_thrd; setinsn1(tail,id1_bus); `endif */ generate begin : gIDUInst if (`NUM_IDU > 1) begin //BUFGCE uclkb3 //( // .I(clk_i), // .CE(id2_available), // .O(id2_clk) //); assign id2_clk = clk_i; FT64_idecoder uid2 ( .clk(id2_clk), .idv_i(id2_vi), .id_i(id2_id), `ifdef INLINE_DECODE .instr(fetchbuf1_instr), .Rt(Rt1[4:0]), .predict_taken(predict_taken1), .thrd(fetchbuf1_thrd), .vl(vl), `else .instr(id2_instr), .Rt(id2_Rt), .predict_taken(id2_pt), .thrd(id2_thrd), .vl(id2_vl), `endif .ven(id2_ven), .bus(id2_bus), .id_o(id2_ido), .idv_o(id2_vo), .debug_on(debug_on), .pred_on(pred_on) ); end if (`NUM_IDU > 2) begin //BUFGCE uclkb4 //( // .I(clk_i), // .CE(id3_available), // .O(id3_clk) //); assign id3_clk = clk_i; FT64_idecoder uid2 ( .clk(id3_clk), .idv_i(id3_vi), .id_i(id3_id), `ifdef INLINE_DECODE .instr(fetchbuf2_instr), .Rt(Rt2[4:0]), .predict_taken(predict_taken2), .thrd(fetchbuf2_thrd), .vl(vl), `else .instr(id3_instr), .Rt(id3_Rt), .predict_taken(id3_pt), .thrd(id3_thrd), .vl(id3_vl), `endif .ven(id3_ven), .bus(id3_bus), .id_o(id3_ido), .idv_o(id3_vo), .debug_on(debug_on), .pred_on(pred_on) ); end end endgenerate // // EXECUTE // wire [15:0] lfsro; lfsr #(16,16'hACE4) u1 (rst, clk, 1'b1, 1'b0, lfsro); reg [63:0] csr_r; wire [11:0] csrno = alu0_instr[29:18]; always @* begin `ifdef SUPPORT_SMT if (csrno[11:10] >= ol[alu0_thrd]) `else if (csrno[11:10] >= ol) `endif casez(csrno[9:0]) `CSR_CR0: csr_r <= cr0; `CSR_HARTID: csr_r <= hartid; `CSR_TICK: csr_r <= tick; `CSR_PCR: csr_r <= pcr; `CSR_PCR2: csr_r <= pcr2; `CSR_PMR: csr_r <= pmr; `CSR_WBRCD: csr_r <= wbrcd; `CSR_SEMA: csr_r <= sema; `CSR_KEYS: csr_r <= keys; `CSR_TCB: csr_r <= tcb; `CSR_FSTAT: csr_r <= {fp_rgs,fp_status}; `ifdef SUPPORT_DBG `CSR_DBAD0: csr_r <= dbg_adr0; `CSR_DBAD1: csr_r <= dbg_adr1; `CSR_DBAD2: csr_r <= dbg_adr2; `CSR_DBAD3: csr_r <= dbg_adr3; `CSR_DBCTRL: csr_r <= dbg_ctrl; `CSR_DBSTAT: csr_r <= dbg_stat; `endif `CSR_CAS: csr_r <= cas; `CSR_TVEC: csr_r <= tvec[csrno[2:0]]; `CSR_BADADR: csr_r <= badaddr[{alu0_thrd,csrno[11:10]}]; `CSR_BADINSTR: csr_r <= bad_instr[{alu0_thrd,csrno[11:10]}]; `CSR_CAUSE: csr_r <= {48'd0,cause[{alu0_thrd,csrno[11:10]}]}; `ifdef SUPPORT_SMT `CSR_ODL_STACK: csr_r <= {16'h0,dl_stack[alu0_thrd],16'h0,ol_stack[alu0_thrd]}; `CSR_IM_STACK: csr_r <= im_stack[alu0_thrd]; `CSR_PL_STACK: csr_r <= pl_stack[alu0_thrd]; `CSR_RS_STACK: csr_r <= rs_stack[alu0_thrd]; `CSR_STATUS: csr_r <= mstatus[alu0_thrd][63:0]; `CSR_BRS_STACK: csr_r <= brs_stack[alu0_thrd]; `CSR_EPC0: csr_r <= epc0[alu0_thrd]; `CSR_EPC1: csr_r <= epc1[alu0_thrd]; `CSR_EPC2: csr_r <= epc2[alu0_thrd]; `CSR_EPC3: csr_r <= epc3[alu0_thrd]; `CSR_EPC4: csr_r <= epc4[alu0_thrd]; `CSR_EPC5: csr_r <= epc5[alu0_thrd]; `CSR_EPC6: csr_r <= epc6[alu0_thrd]; `CSR_EPC7: csr_r <= epc7[alu0_thrd]; `else `CSR_ODL_STACK: csr_r <= {16'h0,dl_stack,16'h0,ol_stack}; `CSR_IM_STACK: csr_r <= im_stack; `CSR_PL_STACK: csr_r <= pl_stack; `CSR_RS_STACK: csr_r <= rs_stack; `CSR_STATUS: csr_r <= mstatus[63:0]; `CSR_BRS_STACK: csr_r <= brs_stack; `CSR_EPC0: csr_r <= epc0; `CSR_EPC1: csr_r <= epc1; `CSR_EPC2: csr_r <= epc2; `CSR_EPC3: csr_r <= epc3; `CSR_EPC4: csr_r <= epc4; `CSR_EPC5: csr_r <= epc5; `CSR_EPC6: csr_r <= epc6; `CSR_EPC7: csr_r <= epc7; `endif `CSR_CODEBUF: csr_r <= codebuf[csrno[5:0]]; `ifdef SUPPORT_BBMS `CSR_TB: csr_r <= tb; `CSR_CBL: csr_r <= cbl; `CSR_CBU: csr_r <= cbu; `CSR_RO: csr_r <= ro; `CSR_DBL: csr_r <= dbl; `CSR_DBU: csr_r <= dbu; `CSR_SBL: csr_r <= sbl; `CSR_SBU: csr_r <= sbu; `CSR_ENU: csr_r <= en; `endif `CSR_Q_CTR: csr_r <= iq_ctr; `CSR_BM_CTR: csr_r <= bm_ctr; `CSR_ICL_CTR: csr_r <= icl_ctr; `CSR_IRQ_CTR: csr_r <= irq_ctr; `CSR_TIME: csr_r <= wc_times; `CSR_INFO: case(csrno[3:0]) 4'd0: csr_r <= "Finitron"; // manufacturer 4'd1: csr_r <= " "; 4'd2: csr_r <= "64 bit "; // CPU class 4'd3: csr_r <= " "; 4'd4: csr_r <= "FT64 "; // Name 4'd5: csr_r <= " "; 4'd6: csr_r <= 64'd1; // model # 4'd7: csr_r <= 64'd1; // serial number 4'd8: csr_r <= {32'd16384,32'd16384}; // cache sizes instruction,csr_ra 4'd9: csr_r <= 64'd0; default: csr_r <= 64'd0; endcase default: begin $display("Unsupported CSR:%h",csrno[10:0]); csr_r <= 64'hEEEEEEEEEEEEEEEE; end endcase else csr_r <= 64'h0; end reg [63:0] alu0_xu = 1'd0, alu1_xu = 1'd0; `ifdef SUPPORT_BBMS `else // This always block didn't work, it left the signals as X's. // So they are set to zero where the reg declaration is. // I'm guessing the @* says there's no variables on the right // hand side, so I'm not going to evaluate it. always @* alu0_xs <= 64'd0; always @* alu1_xs <= 64'd0; `endif wire alu_clk = clk; //BUFH uclka (.I(clk), .O(alu_clk)); //always @* // read_csr(alu0_instr[29:18],csr_r,alu0_thrd); FT64_alu #(.BIG(1'b1),.SUP_VECTOR(SUP_VECTOR)) ualu0 ( .rst(rst), .clk(alu_clk), .ld(alu0_ld), .abort(alu0_abort), .instr(alu0_instr), .sz(alu0_sz), .store(alu0_store), .a(alu0_argA), .b(alu0_argB), .c(alu0_argC), .pc(alu0_pc), // .imm(alu0_argI), .tgt(alu0_tgt), .ven(alu0_ven), .vm(vm[alu0_instr[25:23]]), .csr(csr_r), .o(alu0_out), .ob(alu0b_bus), .done(alu0_done), .idle(alu0_idle), .excen(aec[4:0]), .exc(alu0_exc), .thrd(alu0_thrd), .mem(alu0_mem), .shift(alu0_shft), // 48 bit shift inst. .ol(ol) `ifdef SUPPORT_BBMS , .pb(dl==2'b00 ? 64'd0 : pb), .cbl(cbl), .cbu(cbu), .ro(ro), .dbl(dbl), .dbu(dbu), .sbl(sbl), .sbu(sbu), .en(en) `endif ); generate begin : gAluInst if (`NUM_ALU > 1) begin FT64_alu #(.BIG(1'b0),.SUP_VECTOR(SUP_VECTOR)) ualu1 ( .rst(rst), .clk(clk), .ld(alu1_ld), .abort(alu1_abort), .instr(alu1_instr), .sz(alu1_sz), .store(alu1_store), .a(alu1_argA), .b(alu1_argB), .c(alu1_argC), .pc(alu1_pc), //.imm(alu1_argI), .tgt(alu1_tgt), .ven(alu1_ven), .vm(vm[alu1_instr[25:23]]), .csr(64'd0), .o(alu1_out), .ob(alu1b_bus), .done(alu1_done), .idle(alu1_idle), .excen(aec[4:0]), .exc(alu1_exc), .thrd(1'b0), .mem(alu1_mem), .shift(alu1_shft), .ol(2'b0) `ifdef SUPPORT_BBMS , .pb(dl==2'b00 ? 64'd0 : pb), .cbl(cbl), .cbu(cbu), .ro(ro), .dbl(dbl), .dbu(dbu), .sbl(sbl), .sbu(sbu), .en(en) `endif ); end end endgenerate wire tlb_done; wire tlb_idle; wire [63:0] tlbo; wire uncached; `ifdef SUPPORT_TLB FT64_TLB utlb1 ( .clk(clk), .ld(alu0_ld & alu0_tlb), .done(tlb_done), .idle(tlb_idle), .ol(ol), .ASID(ASID), .op(alu0_instr[25:22]), .regno(alu0_instr[21:18]), .dati(alu0_argA), .dato(tlbo), .uncached(uncached), .icl_i(icl_o), .cyc_i(cyc), .we_i(we), .vadr_i(vadr), .cyc_o(cyc_o), .we_o(we_o), .padr_o(adr_o), .TLBMiss(tlb_miss), .wrv_o(wrv_o), .rdv_o(rdv_o), .exv_o(exv_o), .HTLBVirtPageo() ); `else assign tlb_done = 1'b1; assign tlb_idle = 1'b1; assign tlbo = 64'hDEADDEADDEADDEAD; assign uncached = 1'b0; assign adr_o = vadr; assign cyc_o = cyc; assign we_o = we; assign tlb_miss = 1'b0; assign wrv_o = 1'b0; assign rdv_o = 1'b0; assign exv_o = 1'b0; assign exv_i = 1'b0; // for now `endif always @* begin alu0_cmt <= 1'b1; alu1_cmt <= 1'b1; fpu1_cmt <= 1'b1; fpu2_cmt <= 1'b1; fcu_cmt <= 1'b1; alu0_bus <= alu0_out; alu1_bus <= alu1_out; fpu1_bus <= fpu1_out; fpu2_bus <= fpu2_out; fcu_bus <= fcu_out; end assign alu0_abort = 1'b0; assign alu1_abort = 1'b0; generate begin : gFPUInst if (`NUM_FPU > 0) begin wire fpu1_clk; //BUFGCE ufpc1 //( // .I(clk_i), // .CE(fpu1_available), // .O(fpu1_clk) //); assign fpu1_clk = clk_i; fpUnit ufp1 ( .rst(rst), .clk(fpu1_clk), .clk4x(clk4x), .ce(1'b1), .ir(fpu1_instr), .ld(fpu1_ld), .a(fpu1_argA), .b(fpu1_argB), .imm(fpu1_argI), .o(fpu1_out), .csr_i(), .status(fpu1_status), .exception(), .done(fpu1_done) ); end if (`NUM_FPU > 1) begin wire fpu2_clk; //BUFGCE ufpc2 //( // .I(clk_i), // .CE(fpu2_available), // .O(fpu2_clk) //); assign fpu2_clk = clk_i; fpUnit ufp1 ( .rst(rst), .clk(fpu2_clk), .clk4x(clk4x), .ce(1'b1), .ir(fpu2_instr), .ld(fpu2_ld), .a(fpu2_argA), .b(fpu2_argB), .imm(fpu2_argI), .o(fpu2_out), .csr_i(), .status(fpu2_status), .exception(), .done(fpu2_done) ); end end endgenerate assign fpu1_exc = (fpu1_available) ? ((|fpu1_status[15:0]) ? `FLT_FLT : `FLT_NONE) : `FLT_UNIMP; assign fpu2_exc = (fpu2_available) ? ((|fpu2_status[15:0]) ? `FLT_FLT : `FLT_NONE) : `FLT_UNIMP; assign alu0_v = alu0_dataready, alu1_v = alu1_dataready; assign alu0_id = alu0_sourceid, alu1_id = alu1_sourceid; assign fpu1_v = fpu1_dataready; assign fpu1_id = fpu1_sourceid; assign fpu2_v = fpu2_dataready; assign fpu2_id = fpu2_sourceid; `ifdef SUPPORT_SMT wire [1:0] olm = ol[fcu_thrd]; `else wire [1:0] olm = ol; `endif reg [`SNBITS] maxsn [0:`WAYS-1]; always @* begin for (j = 0; j < `WAYS; j = j + 1) begin maxsn[j] = 8'd0; for (n = 0; n < QENTRIES; n = n + 1) if (iqentry_sn[n] > maxsn[j] && iqentry_thrd[n]==j && iqentry_v[n]) maxsn[j] = iqentry_sn[n]; maxsn[j] = maxsn[j] - tosub; end end assign fcu_v = fcu_dataready; assign fcu_id = fcu_sourceid; wire [4:0] fcmpo; wire fnanx; fp_cmp_unit #(64) ufcmp1 (fcu_argA, fcu_argB, fcmpo, fnanx); wire fcu_takb; always @* begin fcu_exc <= `FLT_NONE; casez(fcu_instr[`INSTRUCTION_OP]) `ifdef SUPPORT_BBMS `LFCS: fcu_exc <= currentCSSelector != fcu_instr[31:8] ? `FLT_CS : `FLT_NONE; `RET: fcu_exc <= fcu_argB[63:40] != currentCSSelector ? `FLT_RET : `FLT_NONE; `endif `CHK: begin if (fcu_instr[21]) fcu_exc <= fcu_argA >= fcu_argB && fcu_argA < fcu_argC ? `FLT_NONE : `FLT_CHK; end `REX: case(olm) `OL_USER: fcu_exc <= `FLT_PRIV; default: ; endcase // Could have long branches exceptioning and unimplmented in the fetch stage. // `BBc: fcu_exc <= fcu_instr[6] ? `FLT_BRN : `FLT_NONE; default: fcu_exc <= `FLT_NONE; endcase end FT64_EvalBranch ube1 ( .instr(fcu_instr), .a(fcu_argA), .b(fcu_argB), .c(fcu_argC), .takb(fcu_takb) ); FT64_FCU_Calc #(.AMSB(AMSB)) ufcuc1 ( .ol(olm), .instr(fcu_instr), .tvec(tvec[fcu_instr[14:13]]), .a(fcu_argA), .pc(fcu_pc), .nextpc(fcu_nextpc), .im(im), .waitctr(waitctr), .bus(fcu_out) ); wire will_clear_branchmiss = branchmiss && ((fetchbuf0_v && fetchbuf0_pc==misspc) || (fetchbuf1_v && fetchbuf1_pc==misspc)); always @* begin case(fcu_instr[`INSTRUCTION_OP]) `R2: fcu_misspc = fcu_epc; // RTI (we don't bother fully decoding this as it's the only R2) `RET: fcu_misspc = fcu_argB; `REX: fcu_misspc = fcu_bus; `BRK: fcu_misspc = {tvec[0][AMSB:8], 1'b0, olm, 5'h0}; `JAL: fcu_misspc = fcu_argA + fcu_argI; //`CHK: fcu_misspc = fcu_nextpc + fcu_argI; // Handled as an instruction exception // Default: branch default: fcu_misspc = fcu_pt ? fcu_nextpc : {fcu_pc[AMSB:32],fcu_pc[31:0] + fcu_brdisp[31:0]}; endcase fcu_misspc[0] = 1'b0; end // To avoid false branch mispredicts the branch isn't evaluated until the // following instruction queues. The address of the next instruction is // looked at to see if the BTB predicted correctly. wire fcu_brk_miss = fcu_brk || fcu_rti; `ifdef FCU_ENH wire fcu_ret_miss = fcu_ret && (fcu_argB != iqentry_pc[nid]); wire fcu_jal_miss = fcu_jal && (fcu_argA + fcu_argI != iqentry_pc[nid]); wire fcu_followed = iqentry_sn[nid] > iqentry_sn[fcu_id[`QBITS]]; `else wire fcu_ret_miss = fcu_ret; wire fcu_jal_miss = fcu_jal; wire fcu_followed = `TRUE; `endif always @* if (fcu_v) begin // Break and RTI switch register sets, and so are always treated as a branch miss in order to // flush the pipeline. Hardware interrupts also stream break instructions so they need to // flushed from the queue so the interrupt is recognized only once. // BRK and RTI are handled as excmiss types which are processed during the commit stage. if (fcu_brk_miss) fcu_branchmiss = TRUE; else if (fcu_branch && (fcu_takb ^ fcu_pt)) fcu_branchmiss = TRUE; else `ifdef SUPPORT_SMT if (fcu_instr[`INSTRUCTION_OP] == `REX && (im < ~ol[fcu_thrd])) `else if (fcu_instr[`INSTRUCTION_OP] == `REX && (im < ~ol)) `endif fcu_branchmiss = TRUE; else if (fcu_ret_miss) fcu_branchmiss = TRUE; else if (fcu_jal_miss) fcu_branchmiss = TRUE; else if (fcu_instr[`INSTRUCTION_OP] == `CHK && ~fcu_takb) fcu_branchmiss = TRUE; else fcu_branchmiss = FALSE; end else fcu_branchmiss = FALSE; FT64_RMW_alu urmwalu0 (rmw_instr, rmw_argA, rmw_argB, rmw_argC, rmw_res); // // additional DRAM-enqueue logic assign dram_avail = (dram0 == `DRAMSLOT_AVAIL || dram1 == `DRAMSLOT_AVAIL || dram2 == `DRAMSLOT_AVAIL); always @* for (n = 0; n < QENTRIES; n = n + 1) iqentry_memopsvalid[n] <= (iqentry_mem[n] && (iqentry_store[n] ? iqentry_a2_v[n] : 1'b1) && iqentry_state[n]==IQS_AGEN); always @* for (n = 0; n < QENTRIES; n = n + 1) iqentry_memready[n] <= (iqentry_v[n] & iqentry_iv[n] & iqentry_memopsvalid[n] & ~iqentry_memissue[n] & ~iqentry_stomp[n]); assign outstanding_stores = (dram0 && dram0_store) || (dram1 && dram1_store) || (dram2 && dram2_store); // // additional COMMIT logic // always @* begin commit0_v <= (iqentry_state[heads[0]] == IQS_CMT && ~|panic); commit0_id <= {iqentry_mem[heads[0]], heads[0]}; // if a memory op, it has a DRAM-bus id commit0_tgt <= iqentry_tgt[heads[0]]; commit0_we <= iqentry_we[heads[0]]; commit0_bus <= iqentry_res[heads[0]]; if (`NUM_CMT > 1) begin commit1_v <= ({iqentry_v[heads[0]], iqentry_state[heads[0]] == IQS_CMT} != 2'b10 && iqentry_state[heads[1]] == IQS_CMT && ~|panic); commit1_id <= {iqentry_mem[heads[1]], heads[1]}; commit1_tgt <= iqentry_tgt[heads[1]]; commit1_we <= iqentry_we[heads[1]]; commit1_bus <= iqentry_res[heads[1]]; // Need to set commit1, and commit2 valid bits for the branch predictor. if (`NUM_CMT > 2) begin end else begin commit2_v <= ({iqentry_v[heads[0]], iqentry_state[heads[0]] == IQS_CMT} != 2'b10 && {iqentry_v[heads[1]], iqentry_state[heads[1]] == IQS_CMT} != 2'b10 && {iqentry_v[heads[2]], iqentry_br[heads[2]], iqentry_state[heads[2]] == IQS_CMT}==3'b111 && iqentry_tgt[heads[2]][4:0]==5'd0 && ~|panic); // watch out for dbnz and ibne commit2_tgt <= 12'h000; commit2_we <= 8'h00; end end else begin commit1_v <= ({iqentry_v[heads[0]], iqentry_state[heads[0]] == IQS_CMT} != 2'b10 && {iqentry_v[heads[1]], iqentry_state[heads[1]] == IQS_CMT} == 2'b11 && !iqentry_rfw[heads[1]] && ~|panic); // watch out for dbnz and ibne commit1_id <= {iqentry_mem[heads[1]], heads[1]}; // if a memory op, it has a DRAM-bus id commit1_tgt <= 12'h000; commit1_we <= 8'h00; // We don't really need the bus value since nothing is being written. commit1_bus <= iqentry_res[heads[1]]; commit2_v <= ({iqentry_v[heads[0]], iqentry_state[heads[0]] == IQS_CMT} != 2'b10 && {iqentry_v[heads[1]], iqentry_state[heads[1]] == IQS_CMT} != 2'b10 && {iqentry_v[heads[2]], iqentry_br[heads[2]], iqentry_state[heads[2]] == IQS_CMT}==3'b111 && !iqentry_rfw[heads[2]] && ~|panic); // watch out for dbnz and ibne commit2_id <= {iqentry_mem[heads[2]], heads[2]}; // if a memory op, it has a DRAM-bus id commit2_tgt <= 12'h000; commit2_we <= 8'h00; commit2_bus <= iqentry_res[heads[2]]; end end assign int_commit = (commit0_v && iqentry_irq[heads[0]]) || (commit0_v && commit1_v && iqentry_irq[heads[1]] && `NUM_CMT > 1) || (commit0_v && commit1_v && commit2_v && iqentry_irq[heads[2]] && `NUM_CMT > 2); // Detect if a given register will become valid during the current cycle. // We want a signal that is active during the current clock cycle for the read // through register file, which trims a cycle off register access for every // instruction. But two different kinds of assignment statements can't be // placed under the same always block, it's a bad practice and may not work. // So a signal is created here with it's own always block. reg [AREGS-1:0] regIsValid; always @* begin for (n = 1; n < AREGS; n = n + 1) begin regIsValid[n] = rf_v[n]; if (branchmiss) if (~livetarget[n]) begin if (branchmiss_thrd) begin if (n >= 128) regIsValid[n] = `VAL; end else begin if (n < 128) regIsValid[n] = `VAL; end end if (commit0_v && n=={commit0_tgt[7:0]}) regIsValid[n] = regIsValid[n] | ((rf_source[ {commit0_tgt[7:0]} ] == commit0_id) || (branchmiss && branchmiss_thrd == iqentry_thrd[commit0_id[`QBITS]] && iqentry_source[ commit0_id[`QBITS] ])); if (commit1_v && n=={commit1_tgt[7:0]} && `NUM_CMT > 1) regIsValid[n] = regIsValid[n] | ((rf_source[ {commit1_tgt[7:0]} ] == commit1_id) || (branchmiss && branchmiss_thrd == iqentry_thrd[commit1_id[`QBITS]] && iqentry_source[ commit1_id[`QBITS] ])); if (commit2_v && n=={commit2_tgt[7:0]} && `NUM_CMT > 2) regIsValid[n] = regIsValid[n] | ((rf_source[ {commit2_tgt[7:0]} ] == commit2_id) || (branchmiss && branchmiss_thrd == iqentry_thrd[commit2_id[`QBITS]] && iqentry_source[ commit2_id[`QBITS] ])); end regIsValid[0] = `VAL; regIsValid[32] = `VAL; regIsValid[64] = `VAL; regIsValid[96] = `VAL; `ifdef SMT regIsValid[128] = `VAL; regIsValid[160] = `VAL; regIsValid[192] = `VAL; regIsValid[224] = `VAL; `endif end // Wait until the cycle after Ra becomes valid to give time to read // the vector element from the register file. reg rf_vra0, rf_vra1; /*always @(posedge clk) rf_vra0 <= regIsValid[Ra0s]; always @(posedge clk) rf_vra1 <= regIsValid[Ra1s]; */ // Check how many instructions can be queued. This might be fewer than the // number ready to queue from the fetch stage if queue slots aren't // available or if there are no more physical registers left for remapping. // The fetch stage needs to know how many instructions will queue so this // logic is placed here. // NOPs are filtered out and do not enter the instruction queue. The core // will stream NOPs on a cache miss and they would mess up the queue order // if there are immediate prefixes in the queue. // For the VEX instruction, the instruction can't queue until register Ra // is valid, because register Ra is used to specify the vector element to // read. wire q2open = iqentry_v[tail0]==`INV && iqentry_v[tail1]==`INV; wire q3open = iqentry_v[tail0]==`INV && iqentry_v[tail1]==`INV && iqentry_v[(tail1 + 2'd1) % QENTRIES]==`INV; always @* begin canq1 <= FALSE; canq2 <= FALSE; queued1 <= FALSE; queued2 <= FALSE; queuedNop <= FALSE; vqueued2 <= FALSE; if (!branchmiss) begin // Two available if (fetchbuf1_v & fetchbuf0_v) begin // Is there a pair of NOPs ? (cache miss) if ((fetchbuf0_instr[`INSTRUCTION_OP]==`NOP) && (fetchbuf1_instr[`INSTRUCTION_OP]==`NOP)) queuedNop <= TRUE; else begin // If it's a predicted branch queue only the first instruction, the second // instruction will be stomped on. if (take_branch0 && fetchbuf1_thrd==fetchbuf0_thrd) begin if (iqentry_v[tail0]==`INV) begin canq1 <= TRUE; queued1 <= TRUE; end end // This is where a single NOP is allowed through to simplify the code. A // single NOP can't be a cache miss. Otherwise it would be necessary to queue // fetchbuf1 on tail0 it would add a nightmare to the enqueue code. // Not a branch and there are two instructions fetched, see whether or not // both instructions can be queued. else begin if (iqentry_v[tail0]==`INV) begin canq1 <= !IsVex(fetchbuf0_instr) || rf_vra0 || !SUP_VECTOR; queued1 <= ( ((!IsVex(fetchbuf0_instr) || rf_vra0) && (!IsVector(fetchbuf0_instr))) || !SUP_VECTOR); if (iqentry_v[tail1]==`INV) begin canq2 <= ((!IsVex(fetchbuf1_instr) || rf_vra1)) || !SUP_VECTOR; queued2 <= ( (!IsVector(fetchbuf1_instr) && (!IsVex(fetchbuf1_instr) || rf_vra1) && (!IsVector(fetchbuf0_instr))) || !SUP_VECTOR); vqueued2 <= IsVector(fetchbuf0_instr) && vqe0 < vl-2 && !vechain; end end // If an irq is active during a vector instruction fetch, claim the vector instruction // is finished queueing even though it may not be. It'll pick up where it left off after // the exception is processed. if (freezePC) begin if (IsVector(fetchbuf0_instr) && IsVector(fetchbuf1_instr) && vechain) begin queued1 <= TRUE; queued2 <= TRUE; end else if (IsVector(fetchbuf0_instr)) begin queued1 <= TRUE; if (vqe0 < vl-2) queued2 <= TRUE; else queued2 <= iqentry_v[tail1]==`INV; end end end end end // One available else if (fetchbuf0_v) begin if (fetchbuf0_instr[`INSTRUCTION_OP]!=`NOP) begin if (iqentry_v[tail0]==`INV) begin canq1 <= !IsVex(fetchbuf0_instr) || rf_vra0 || !SUP_VECTOR; queued1 <= (((!IsVex(fetchbuf0_instr) || rf_vra0) && (!IsVector(fetchbuf0_instr))) || !SUP_VECTOR); end if (iqentry_v[tail1]==`INV) begin canq2 <= IsVector(fetchbuf0_instr) && vqe0 < vl-2 && SUP_VECTOR; vqueued2 <= IsVector(fetchbuf0_instr) && vqe0 < vl-2 && !vechain; end if (freezePC) begin if (IsVector(fetchbuf0_instr)) begin queued1 <= TRUE; if (vqe0 < vl-2) queued2 <= iqentry_v[tail1]==`INV; end end end else queuedNop <= TRUE; end else if (fetchbuf1_v) begin if (fetchbuf1_instr[`INSTRUCTION_OP]!=`NOP) begin if (iqentry_v[tail0]==`INV) begin canq1 <= !IsVex(fetchbuf1_instr) || rf_vra1 || !SUP_VECTOR; queued1 <= ( ((!IsVex(fetchbuf1_instr) || rf_vra1) && (!IsVector(fetchbuf1_instr))) || !SUP_VECTOR); end if (iqentry_v[tail1]==`INV) begin canq2 <= IsVector(fetchbuf1_instr) && vqe1 < vl-2 && SUP_VECTOR; vqueued2 <= IsVector(fetchbuf1_instr) && vqe1 < vl-2; end if (freezePC) begin if (IsVector(fetchbuf1_instr)) begin queued1 <= TRUE; if (vqe1 < vl-2) queued2 <= iqentry_v[tail1]==`INV; end end end else queuedNop <= TRUE; end //else no instructions available to queue end else begin // One available if (fetchbuf0_v && fetchbuf0_thrd != branchmiss_thrd) begin if (fetchbuf0_instr[`INSTRUCTION_OP]!=`NOP) begin if (iqentry_v[tail0]==`INV) begin canq1 <= !IsVex(fetchbuf0_instr) || rf_vra0 || !SUP_VECTOR; queued1 <= ( ((!IsVex(fetchbuf0_instr) || rf_vra0) && (!IsVector(fetchbuf0_instr))) || !SUP_VECTOR); end if (iqentry_v[tail1]==`INV) begin canq2 <= IsVector(fetchbuf0_instr) && vqe0 < vl-2 && SUP_VECTOR; vqueued2 <= IsVector(fetchbuf0_instr) && vqe0 < vl-2 && !vechain; end end else queuedNop <= TRUE; end else if (fetchbuf1_v && fetchbuf1_thrd != branchmiss_thrd) begin if (fetchbuf1_instr[`INSTRUCTION_OP]!=`NOP) begin if (iqentry_v[tail0]==`INV) begin canq1 <= !IsVex(fetchbuf1_instr) || rf_vra1 || !SUP_VECTOR; queued1 <= ( ((!IsVex(fetchbuf1_instr) || rf_vra1) && (!IsVector(fetchbuf1_instr))) || !SUP_VECTOR); end if (iqentry_v[tail1]==`INV) begin canq2 <= IsVector(fetchbuf1_instr) && vqe1 < vl-2 && SUP_VECTOR; vqueued2 <= IsVector(fetchbuf0_instr) && vqe0 < vl-2 && !vechain; end end else queuedNop <= TRUE; end // else // queuedNop <= TRUE; end end // // Branchmiss seems to be sticky sometimes during simulation. For instance branch miss // and cache miss at same time. The branchmiss should clear before the core continues // so the positive edge is detected to avoid incrementing the sequnce number too many // times. wire pebm; edge_det uedbm (.rst(rst), .clk(clk), .ce(1'b1), .i(branchmiss), .pe(pebm), .ne(), .ee() ); reg [5:0] ld_time; reg [63:0] wc_time_dat; reg [63:0] wc_times; always @(posedge tm_clk_i) begin if (|ld_time) wc_time <= wc_time_dat; else begin wc_time[31:0] <= wc_time[31:0] + 32'd1; if (wc_time[31:0] >= TM_CLKFREQ-1) begin wc_time[31:0] <= 32'd0; wc_time[63:32] <= wc_time[63:32] + 32'd1; end end end wire writing_wb = (mem1_available && dram0==`DRAMSLOT_BUSY && dram0_store && wbptr<`WB_DEPTH-1) || (mem2_available && dram1==`DRAMSLOT_BUSY && dram1_store && `NUM_MEM > 1 && wbptr<`WB_DEPTH-1) || (mem3_available && dram2==`DRAMSLOT_BUSY && dram2_store && `NUM_MEM > 2 && wbptr<`WB_DEPTH-1) ; // Monster clock domain. // Like to move some of this to clocking under different always blocks in order // to help out the toolset's synthesis, but it ain't gonna be easy. // Simulation doesn't like it if things are under separate always blocks. // Synthesis doesn't like it if things are under the same always block. //always @(posedge clk) //begin // branchmiss <= excmiss|fcu_branchmiss; // misspc <= excmiss ? excmisspc : fcu_misspc; // missid <= excmiss ? (|iqentry_exc[heads[0]] ? heads[0] : heads[1]) : fcu_sourceid; // branchmiss_thrd <= excmiss ? excthrd : fcu_thrd; //end wire alu0_done_pe, alu1_done_pe, pe_wait; edge_det uedalu0d (.clk(clk), .ce(1'b1), .i(alu0_done&tlb_done), .pe(alu0_done_pe), .ne(), .ee()); edge_det uedalu1d (.clk(clk), .ce(1'b1), .i(alu1_done), .pe(alu1_done_pe), .ne(), .ee()); edge_det uedwait1 (.clk(clk), .ce(1'b1), .i((waitctr==48'd1) || signal_i[fcu_argA[4:0]|fcu_argI[4:0]]), .pe(pe_wait), .ne(), .ee()); // Bus randomization to mitigate meltdown attacks wire [63:0] ralu0_bus = |alu0_exc ? {4{lfsro}} : alu0_tlb ? tlbo : alu0_bus; wire [63:0] ralu1_bus = |alu1_exc ? {4{lfsro}} : alu1_bus; wire [63:0] rfpu1_bus = |fpu1_exc ? {4{lfsro}} : fpu1_bus; wire [63:0] rfpu2_bus = |fpu2_exc ? {4{lfsro}} : fpu2_bus; wire [63:0] rfcu_bus = |fcu_exc ? {4{lfsro}} : fcu_bus; wire [63:0] rdramA_bus = dramA_bus; wire [63:0] rdramB_bus = dramB_bus; wire [63:0] rdramC_bus = dramC_bus; // Hold reset for five seconds reg [31:0] rst_ctr; always @(posedge clk) if (rst) rst_ctr <= 32'd0; else begin if (rst_ctr < 32'd10) rst_ctr <= rst_ctr + 24'd1; end always @(posedge clk) if (rst|(rst_ctr < 32'd10)) begin `ifdef SUPPORT_SMT mstatus[0] <= 64'h4000F; // select register set #16 for thread 0 mstatus[1] <= 64'h4800F; // select register set #18 for thread 1 rs_stack[0] <= 64'd16; brs_stack[0] <= 64'd16; rs_stack[1] <= 64'd18; brs_stack[1] <= 64'd18; `else im_stack <= 32'hFFFFFFFF; mstatus <= 64'h4000F; // select register set #16 for thread 0 rs_stack <= 64'd16; brs_stack <= 64'd16; `endif for (n = 0; n < QENTRIES; n = n + 1) begin iqentry_state[n] <= IQS_INVALID; iqentry_iv[n] <= `INV; iqentry_is[n] <= 3'b00; iqentry_sn[n] <= 4'd0; iqentry_pt[n] <= FALSE; iqentry_bt[n] <= FALSE; iqentry_br[n] <= FALSE; iqentry_aq[n] <= FALSE; iqentry_rl[n] <= FALSE; iqentry_alu0[n] <= FALSE; iqentry_alu[n] <= FALSE; iqentry_fpu[n] <= FALSE; iqentry_fsync[n] <= FALSE; iqentry_fc[n] <= FALSE; iqentry_takb[n] <= FALSE; iqentry_jmp[n] <= FALSE; iqentry_jal[n] <= FALSE; iqentry_ret[n] <= FALSE; iqentry_brk[n] <= FALSE; iqentry_irq[n] <= FALSE; iqentry_rti[n] <= FALSE; iqentry_ldcmp[n] <= FALSE; iqentry_load[n] <= FALSE; iqentry_rtop[n] <= FALSE; iqentry_sei[n] <= FALSE; iqentry_shft[n] <= FALSE; iqentry_sync[n] <= FALSE; iqentry_ven[n] <= 6'd0; iqentry_vl[n] <= 8'd0; iqentry_we[n] <= 8'h00; iqentry_rfw[n] <= FALSE; iqentry_rmw[n] <= FALSE; iqentry_pc[n] <= RSTPC; iqentry_instr[n] <= `NOP_INSN; iqentry_insln[n] <= 3'd4; iqentry_preload[n] <= FALSE; iqentry_mem[n] <= FALSE; iqentry_memndx[n] <= FALSE; iqentry_memissue[n] <= FALSE; iqentry_mem_islot[n] <= 3'd0; iqentry_memdb[n] <= FALSE; iqentry_memsb[n] <= FALSE; iqentry_tgt[n] <= 6'd0; iqentry_imm[n] <= 1'b0; iqentry_ma[n] <= 1'b0; iqentry_a0[n] <= 64'd0; iqentry_a1[n] <= 64'd0; iqentry_a2[n] <= 64'd0; iqentry_a3[n] <= 64'd0; iqentry_a1_v[n] <= `INV; iqentry_a2_v[n] <= `INV; iqentry_a3_v[n] <= `INV; iqentry_a1_s[n] <= 5'd0; iqentry_a2_s[n] <= 5'd0; iqentry_a3_s[n] <= 5'd0; iqentry_canex[n] <= FALSE; end bwhich <= 2'b00; dram0 <= `DRAMSLOT_AVAIL; dram1 <= `DRAMSLOT_AVAIL; dram2 <= `DRAMSLOT_AVAIL; dram0_instr <= `NOP_INSN; dram1_instr <= `NOP_INSN; dram2_instr <= `NOP_INSN; dram0_addr <= 32'h0; dram1_addr <= 32'h0; dram2_addr <= 32'h0; dram0_id <= 1'b0; dram1_id <= 1'b0; dram2_id <= 1'b0; dram0_store <= 1'b0; dram1_store <= 1'b0; dram2_store <= 1'b0; invic <= FALSE; invicl <= FALSE; tail0 <= 3'd0; tail1 <= 3'd1; for (n = 0; n < QENTRIES; n = n + 1) heads[n] <= n; panic = `PANIC_NONE; alu0_dataready <= 1'b0; alu1_dataready <= 1'b0; alu0_sourceid <= 5'd0; alu1_sourceid <= 5'd0; `define SIM_ `ifdef SIM_ alu0_pc <= RSTPC; alu0_instr <= `NOP_INSN; alu0_argA <= 64'h0; alu0_argB <= 64'h0; alu0_argC <= 64'h0; alu0_argI <= 64'h0; alu0_mem <= 1'b0; alu0_shft <= 1'b0; alu0_thrd <= 1'b0; alu0_tgt <= 6'h00; alu0_ven <= 6'd0; alu1_pc <= RSTPC; alu1_instr <= `NOP_INSN; alu1_argA <= 64'h0; alu1_argB <= 64'h0; alu1_argC <= 64'h0; alu1_argI <= 64'h0; alu1_mem <= 1'b0; alu1_shft <= 1'b0; alu1_thrd <= 1'b0; alu1_tgt <= 6'h00; alu1_ven <= 6'd0; `endif fcu_dataready <= 0; fcu_instr <= `NOP_INSN; fcu_call <= 1'b0; dramA_v <= 0; dramB_v <= 0; dramC_v <= 0; I <= 0; CC <= 0; bstate <= BIDLE; tick <= 64'd0; ol_o <= 2'b0; bte_o <= 2'b00; cti_o <= 3'b000; cyc <= `LOW; stb_o <= `LOW; we <= `LOW; sel_o <= 8'h00; dat_o <= 64'hFFFFFFFFFFFFFFFF; sr_o <= `LOW; cr_o <= `LOW; vadr <= RSTPC; cr0 <= 64'd0; cr0[13:8] <= 6'd0; // select compressed instruction group #0 cr0[30] <= TRUE; // enable data caching cr0[32] <= TRUE; // enable branch predictor cr0[16] <= 1'b0; // disable SMT cr0[17] <= 1'b0; // sequence number reset = 1 cr0[34] <= FALSE; // write buffer merging enable cr0[35] <= TRUE; // load speculation enable pcr <= 32'd0; pcr2 <= 64'd0; for (n = 0; n < PREGS; n = n + 1) begin rf_v[n] <= `VAL; rf_source[n] <= {`QBIT{1'b1}}; end fp_rm <= 3'd0; // round nearest even - default rounding mode fpu_csr[37:32] <= 5'd31; // register set #31 waitctr <= 48'd0; for (n = 0; n < 16; n = n + 1) begin badaddr[n] <= 64'd0; bad_instr[n] <= `NOP_INSN; end // Vector vqe0 <= 6'd0; vqet0 <= 6'd0; vqe1 <= 6'd0; vqet1 <= 6'd0; vl <= 7'd62; for (n = 0; n < 8; n = n + 1) vm[n] <= 64'h7FFFFFFFFFFFFFFF; nop_fetchbuf <= 4'h0; fcu_done <= `TRUE; sema <= 64'h0; tvec[0] <= RSTPC; pmr <= 64'hFFFFFFFFFFFFFFFF; pmr[0] <= `ID1_AVAIL; pmr[1] <= `ID2_AVAIL; pmr[2] <= `ID3_AVAIL; pmr[8] <= `ALU0_AVAIL; pmr[9] <= `ALU1_AVAIL; pmr[16] <= `FPU1_AVAIL; pmr[17] <= `FPU2_AVAIL; pmr[24] <= `MEM1_AVAIL; pmr[25] <= `MEM2_AVAIL; pmr[26] <= `MEM3_AVAIL; pmr[32] <= `FCU_AVAIL; for (n = 0; n < `WB_DEPTH; n = n + 1) begin wb_v[n] <= 1'b0; wb_rmw[n] <= 1'b0; wb_id[n] <= {QENTRIES{1'b0}}; wb_ol[n] <= 2'b00; wb_sel[n] <= 8'h00; wb_addr[n] <= 64'd0; wb_data[n] <= 64'd0; end wb_en <= `TRUE; wbo_id <= {QENTRIES{1'b0}}; wbptr <= 2'd0; `ifdef SIM wb_merges <= 32'd0; `endif iq_ctr <= 40'd0; icl_ctr <= 40'd0; bm_ctr <= 40'd0; br_ctr <= 40'd0; irq_ctr <= 40'd0; cmt_timer <= 9'd0; StoreAck1 <= `FALSE; keys <= 64'h0; `ifdef SUPPORT_DBG dbg_ctrl <= 64'h0; `endif /* Initialized with initial begin above `ifdef SUPPORT_BBMS for (n = 0; n < 64; n = n + 1) begin thrd_handle[n] <= 16'h0; prg_base[n] <= 64'h0; cl_barrier[n] <= 64'h0; cu_barrier[n] <= 64'hFFFFFFFFFFFFFFFF; ro_barrier[n] <= 64'h0; dl_barrier[n] <= 64'h0; du_barrier[n] <= 64'hFFFFFFFFFFFFFFFF; sl_barrier[n] <= 64'h0; su_barrier[n] <= 64'hFFFFFFFFFFFFFFFF; end `endif */ end else begin if (|fb_panic) panic <= fb_panic; // Only one branchmiss is allowed to be processed at a time. If a second // branchmiss occurs while the first is being processed, it would have // to of occurred as a speculation in the branch shadow of the first. // The second instruction would be stomped on by the first branchmiss so // there is no need to process it. // The branchmiss has to be latched, then cleared later as there could // be a cache miss at the same time meaning the switch to the new pc // does not take place immediately. if (!branchmiss) begin if (excmiss) begin branchmiss <= `TRUE; misspc <= excmisspc; missid <= (|iqentry_exc[heads[0]] ? heads[0] : heads[1]); branchmiss_thrd <= excthrd; end else if (fcu_branchmiss) begin branchmiss <= `TRUE; misspc <= fcu_misspc; missid <= fcu_sourceid; branchmiss_thrd <= fcu_thrd; end end // Clear a branch miss when target instruction is fetched. if (will_clear_branchmiss) begin branchmiss <= `FALSE; end // The following signals only pulse // Instruction decode output should only pulse once for a queue entry. We // want the decode to be invalidated after a clock cycle so that it isn't // inadvertently used to update the queue at a later point. dramA_v <= `INV; dramB_v <= `INV; dramC_v <= `INV; id1_vi <= `INV; if (`NUM_IDU > 1) id2_vi <= `INV; if (`NUM_IDU > 2) id3_vi <= `INV; wb_shift <= FALSE; ld_time <= {ld_time[4:0],1'b0}; wc_times <= wc_time; rf_vra0 <= regIsValid[Ra0s]; rf_vra1 <= regIsValid[Ra1s]; if (vqe0 >= vl) begin vqe0 <= 6'd0; vqet0 <= 6'h0; end if (vqe1 >= vl) begin vqe1 <= 6'd0; vqet1 <= 6'h0; end // Turn off vector chaining indicator when chained instructions are done. if ((vqe0 >= vl || vqe0==6'd0) && (vqe1 >= vl || vqe1==6'd0)) `ifdef SUPPORT_SMT mstatus[0][32] <= 1'b0; `else mstatus[32] <= 1'b0; `endif nop_fetchbuf <= 4'h0; excmiss <= FALSE; invic <= FALSE; if (L1_invline) invicl <= FALSE; tick <= tick + 64'd1; alu0_ld <= FALSE; alu1_ld <= FALSE; fpu1_ld <= FALSE; fpu2_ld <= FALSE; fcu_ld <= FALSE; cr0[17] <= 1'b0; if (waitctr != 48'd0) waitctr <= waitctr - 4'd1; if (iqentry_fc[fcu_id[`QBITS]] && iqentry_v[fcu_id[`QBITS]] && !iqentry_done[fcu_id[`QBITS]] && iqentry_out[fcu_id[`QBITS]]) fcu_timeout <= fcu_timeout + 8'd1; if (branchmiss) begin for (n = 1; n < PREGS; n = n + 1) if (~livetarget[n]) begin if (branchmiss_thrd) begin if (n >= 128) rf_v[n] <= `VAL; end else begin if (n < 128) rf_v[n] <= `VAL; end end for (n = 0; n < QENTRIES; n = n + 1) if (|iqentry_latestID[n]) if (iqentry_thrd[n]==branchmiss_thrd) rf_source[ {iqentry_tgt[n][7:0]} ] <= { 1'b0, iqentry_mem[n], n[`QBITS] }; end // The source for the register file data might have changed since it was // placed on the commit bus. So it's needed to check that the source is // still as expected to validate the register. if (commit0_v) begin if (!rf_v[ {commit0_tgt[7:0]} ]) begin // rf_v[ {commit0_tgt[7:0]} ] <= rf_source[ commit0_tgt[7:0] ] == commit0_id || (branchmiss && iqentry_source[ commit0_id[`QBITS] ]); rf_v[ {commit0_tgt[7:0]} ] <= regIsValid[{commit0_tgt[7:0]}];//rf_source[ commit0_tgt[4:0] ] == commit0_id || (branchmiss && iqentry_source[ commit0_id[`QBITS] ]); if (regIsValid[{commit0_tgt[7:0]}]) rf_source[{commit0_tgt[7:0]}] <= {`QBIT{1'b1}}; end if (commit0_tgt[5:0] != 6'd0) $display("r%d <- %h v[%d]<-%d", commit0_tgt, commit0_bus, regIsValid[commit0_tgt[5:0]], rf_source[ {commit0_tgt[7:0]} ] == commit0_id || (branchmiss && iqentry_source[ commit0_id[`QBITS] ])); if (commit0_tgt[5:0]==6'd30 && commit0_bus==64'd0) $display("FP <= 0"); end if (commit1_v && `NUM_CMT > 1) begin if (!rf_v[ {commit1_tgt[7:0]} ]) begin if ({commit1_tgt[7:0]}=={commit0_tgt[7:0]}) begin rf_v[ {commit1_tgt[7:0]} ] <= regIsValid[{commit0_tgt[7:0]}] | regIsValid[{commit1_tgt[7:0]}]; if (regIsValid[{commit0_tgt[7:0]}] | regIsValid[{commit1_tgt[7:0]}]) rf_source[{commit1_tgt[7:0]}] <= {`QBIT{1'b1}}; /* (rf_source[ commit0_tgt[4:0] ] == commit0_id || (branchmiss && iqentry_source[ commit0_id[`QBITS] ])) || (rf_source[ commit1_tgt[4:0] ] == commit1_id || (branchmiss && iqentry_source[ commit1_id[`QBITS] ])); */ end else begin rf_v[ {commit1_tgt[7:0]} ] <= regIsValid[{commit1_tgt[7:0]}];//rf_source[ commit1_tgt[4:0] ] == commit1_id || (branchmiss && iqentry_source[ commit1_id[`QBITS] ]); if (regIsValid[{commit1_tgt[7:0]}]) rf_source[{commit1_tgt[7:0]}] <= {`QBIT{1'b1}}; end end if (commit1_tgt[5:0] != 6'd0) $display("r%d <- %h v[%d]<-%d", commit1_tgt, commit1_bus, regIsValid[commit1_tgt[5:0]], rf_source[ {commit1_tgt[7:0]} ] == commit1_id || (branchmiss && iqentry_source[ commit1_id[`QBITS] ])); if (commit1_tgt[5:0]==6'd30 && commit1_bus==64'd0) $display("FP <= 0"); end if (commit2_v && `NUM_CMT > 2) begin if (!rf_v[ {commit2_tgt[7:0]} ]) begin if ({commit2_tgt[7:0]}=={commit1_tgt[7:0]} && {commit2_tgt[7:0]}=={commit0_tgt[7:0]}) begin rf_v[ {commit2_tgt[7:0]} ] <= regIsValid[{commit0_tgt[7:0]}] | regIsValid[{commit1_tgt[7:0]}] | regIsValid[{commit2_tgt[7:0]}]; if (regIsValid[{commit0_tgt[7:0]}] | regIsValid[{commit1_tgt[7:0]}] | regIsValid[{commit2_tgt[7:0]}]) rf_source[{commit0_tgt[7:0]}] <= {`QBIT{1'b1}}; end else if ({commit2_tgt[7:0]}=={commit0_tgt[7:0]}) begin rf_v[ {commit2_tgt[7:0]} ] <= regIsValid[{commit0_tgt[7:0]}] | regIsValid[{commit2_tgt[7:0]}]; if (regIsValid[{commit0_tgt[7:0]}] | regIsValid[{commit2_tgt[7:0]}]) rf_source[{commit0_tgt[7:0]}] <= {`QBIT{1'b1}}; end else if ({commit2_tgt[7:0]}=={commit1_tgt[7:0]}) begin rf_v[ {commit2_tgt[7:0]} ] <= regIsValid[{commit1_tgt[7:0]}] | regIsValid[{commit2_tgt[7:0]}]; if (regIsValid[{commit1_tgt[7:0]}] | regIsValid[{commit2_tgt[7:0]}]) rf_source[{commit1_tgt[7:0]}] <= {`QBIT{1'b1}}; end else begin rf_v[ {commit2_tgt[7:0]} ] <= regIsValid[{commit2_tgt[7:0]}];//rf_source[ commit1_tgt[4:0] ] == commit1_id || (branchmiss && iqentry_source[ commit1_id[`QBITS] ]); if (regIsValid[{commit2_tgt[7:0]}]) rf_source[{commit2_tgt[7:0]}] <= {`QBIT{1'b1}}; end end if (commit2_tgt[5:0] != 6'd0) $display("r%d <- %h v[%d]<-%d", commit2_tgt, commit2_bus, regIsValid[commit2_tgt[5:0]], rf_source[ {commit2_tgt[7:0]} ] == commit2_id || (branchmiss && iqentry_source[ commit2_id[`QBITS] ])); if (commit2_tgt[5:0]==6'd30 && commit2_bus==64'd0) $display("FP <= 0"); end rf_v[0] <= 1; // // ENQUEUE // // place up to two instructions from the fetch buffer into slots in the IQ. // note: they are placed in-order, and they are expected to be executed // 0, 1, or 2 of the fetch buffers may have valid data // 0, 1, or 2 slots in the instruction queue may be available. // if we notice that one of the instructions in the fetch buffer is a predicted branch, // (set branchback/backpc and delete any instructions after it in fetchbuf) // // enqueue fetchbuf0 and fetchbuf1, but only if there is room, // and ignore fetchbuf1 if fetchbuf0 has a backwards branch in it. // // also, do some instruction-decode ... set the operand_valid bits in the IQ // appropriately so that the DATAINCOMING stage does not have to look at the opcode // if (!branchmiss) // don't bother doing anything if there's been a branch miss case ({fetchbuf0_v, fetchbuf1_v}) 2'b00: ; // do nothing 2'b01: if (canq1) begin if (fetchbuf1_rfw) begin rf_source[ Rt1s ] <= { 1'b0, fetchbuf1_mem, tail0 }; // top bit indicates ALU/MEM bus rf_v [Rt1s] <= `INV; end if (IsVector(fetchbuf1_instr) && SUP_VECTOR) begin vqe1 <= vqe1 + 4'd1; if (IsVCmprss(fetchbuf1_instr)) begin if (vm[fetchbuf1_instr[25:23]][vqe1]) vqet1 <= vqet1 + 4'd1; end else vqet1 <= vqet1 + 4'd1; if (vqe1 >= vl-2) nop_fetchbuf <= fetchbuf ? 4'b0100 : 4'b0001; enque1(tail0, fetchbuf1_thrd ? maxsn[1]+4'd1 : maxsn[0]+4'd1, vqe1); iq_ctr = iq_ctr + 4'd1; if (canq2 && vqe1 < vl-2) begin vqe1 <= vqe1 + 4'd2; if (IsVCmprss(fetchbuf1_instr)) begin if (vm[fetchbuf1_instr[25:23]][vqe1+6'd1]) vqet1 <= vqet1 + 4'd2; end else vqet1 <= vqet1 + 4'd2; enque1(tail1, fetchbuf1_thrd ? maxsn[1] + 4'd2 : maxsn[0] + 4'd2, vqe1 + 6'd1); iq_ctr = iq_ctr + 4'd2; end end else begin enque1(tail0, fetchbuf1_thrd ? maxsn[1]+4'd1 : maxsn[0]+4'd1, 6'd0); iq_ctr = iq_ctr + 4'd1; end end 2'b10: if (canq1) begin enque0x(); end 2'b11: if (canq1) begin // // if the first instruction is a predicted branch, enqueue it & stomp on all following instructions // but only if the following instruction is in the same thread. Otherwise we want to queue two. // if (take_branch0 && fetchbuf1_thrd==fetchbuf0_thrd) begin enque0x(); end else begin // fetchbuf0 doesn't contain a predicted branch // // so -- we can enqueue 1 or 2 instructions, depending on space in the IQ // update the rf_v and rf_source bits separately (at end) // the problem is that if we do have two instructions, // they may interact with each other, so we have to be // careful about where things point. // // enqueue the first instruction ... // if (IsVector(fetchbuf0_instr) && SUP_VECTOR) begin vqe0 <= vqe0 + 4'd1; if (IsVCmprss(fetchbuf0_instr)) begin if (vm[fetchbuf0_instr[25:23]][vqe0]) vqet0 <= vqet0 + 4'd1; end else vqet0 <= vqet0 + 4'd1; if (vqe0 >= vl-2) nop_fetchbuf <= fetchbuf ? 4'b1000 : 4'b0010; end if (vqe0 < vl || !IsVector(fetchbuf0_instr)) begin enque0(tail0, fetchbuf0_thrd ? maxsn[1]+4'd1 : maxsn[0]+4'd1, vqe0); iq_ctr = iq_ctr + 4'd1; // // if there is room for a second instruction, enqueue it // if (canq2) begin if (vechain && IsVector(fetchbuf1_instr) && Ra1s != Rt0s // And there is no dependency && Rb1s != Rt0s && Rc1s != Rt0s ) begin `ifdef SUPPORT_SMT mstatus[0][32] <= 1'b1; `else mstatus[32] <= 1'b1; `endif vqe1 <= vqe1 + 4'd1; if (IsVCmprss(fetchbuf1_instr)) begin if (vm[fetchbuf1_instr[25:23]][vqe1]) vqet1 <= vqet1 + 4'd1; end else vqet1 <= vqet1 + 4'd1; if (vqe1 >= vl-2) nop_fetchbuf <= fetchbuf ? 4'b0100 : 4'b0001; enque1(tail1, fetchbuf1_thrd==fetchbuf0_thrd && fetchbuf1_thrd==1'b1 ? maxsn[1] + 4'd2 : fetchbuf1_thrd==fetchbuf0_thrd && fetchbuf1_thrd==1'b0 ? maxsn[0] + 4'd2 : fetchbuf1_thrd ? maxsn[1] + 4'd2: maxsn[0] + 4'd2, 6'd0); iq_ctr = iq_ctr + 4'd2; // SOURCE 1 ... a1_vs(); // SOURCE 2 ... a2_vs(); // SOURCE 3 ... a3_vs(); // if the two instructions enqueued target the same register, // make sure only the second writes to rf_v and rf_source. // first is allowed to update rf_v and rf_source only if the // second has no target // if (fetchbuf0_rfw) begin rf_source[ Rt0s ] <= { 1'b0,fetchbuf0_mem, tail0 }; rf_v [ Rt0s] <= `INV; end if (fetchbuf1_rfw) begin rf_source[ Rt1s ] <= { 1'b0,fetchbuf1_mem, tail1 }; rf_v [ Rt1s ] <= `INV; end end // If there was a vector instruction in fetchbuf0, we really // want to queue the next vector element, not the next // instruction waiting in fetchbuf1. else if (IsVector(fetchbuf0_instr) && SUP_VECTOR && vqe0 < vl-1) begin vqe0 <= vqe0 + 4'd2; if (IsVCmprss(fetchbuf0_instr)) begin if (vm[fetchbuf0_instr[25:23]][vqe0+6'd1]) vqet0 <= vqet0 + 4'd2; end else vqet0 <= vqet0 + 4'd2; if (vqe0 >= vl-3) nop_fetchbuf <= fetchbuf ? 4'b1000 : 4'b0010; if (vqe0 < vl-1) begin enque0(tail1, fetchbuf0_thrd ? maxsn[1] + 4'd2 : maxsn[0] + 4'd2, vqe0 + 6'd1); iq_ctr = iq_ctr + 4'd2; // SOURCE 1 ... iqentry_a1_v [tail1] <= regIsValid[Ra0s]; iqentry_a1_s [tail1] <= rf_source [Ra0s]; // SOURCE 2 ... iqentry_a2_v [tail1] <= regIsValid[Rb0s]; iqentry_a2_s [tail1] <= rf_source[ Rb0s ]; // SOURCE 3 ... iqentry_a3_v [tail1] <= regIsValid[Rc0s]; iqentry_a3_s [tail1] <= rf_source[ Rc0s ]; // if the two instructions enqueued target the same register, // make sure only the second writes to rf_v and rf_source. // first is allowed to update rf_v and rf_source only if the // second has no target (BEQ or SW) // if (fetchbuf0_rfw) begin rf_source[ Rt0s ] <= { 1'b0, fetchbuf0_mem, tail1 }; rf_v [ Rt0s ] <= `INV; end end end else if (IsVector(fetchbuf1_instr) && SUP_VECTOR) begin vqe1 <= 6'd1; if (IsVCmprss(fetchbuf1_instr)) begin if (vm[fetchbuf1_instr[25:23]][IsVector(fetchbuf0_instr)? 6'd0:vqe1+6'd1]) vqet1 <= 6'd1; else vqet1 <= 6'd0; end else vqet1 <= 6'd1; if (IsVector(fetchbuf0_instr) && SUP_VECTOR) nop_fetchbuf <= fetchbuf ? 4'b1000 : 4'b0010; enque1(tail1, fetchbuf1_thrd==fetchbuf0_thrd && fetchbuf1_thrd==1'b1 ? maxsn[1] + 4'd2 : fetchbuf1_thrd==fetchbuf0_thrd && fetchbuf1_thrd==1'b0 ? maxsn[0] + 4'd2 : fetchbuf1_thrd ? maxsn[1] + 4'd2: maxsn[0] + 4'd2, 6'd0); iq_ctr = iq_ctr + 4'd2; // SOURCE 1 ... a1_vs(); // SOURCE 2 .. a2_vs(); // SOURCE 3 ... a3_vs(); // if the two instructions enqueued target the same register, // make sure only the second writes to rf_v and rf_source. // first is allowed to update rf_v and rf_source only if the // second has no target // if (fetchbuf0_rfw) begin rf_source[ Rt0s ] <= { 1'b0,fetchbuf0_mem, tail0 }; rf_v [ Rt0s] <= `INV; end if (fetchbuf1_rfw) begin rf_source[ Rt1s ] <= { 1'b0,fetchbuf1_mem, tail1 }; rf_v [ Rt1s ] <= `INV; end end else begin // enque1(tail1, seq_num + 5'd1, 6'd0); enque1(tail1, fetchbuf1_thrd==fetchbuf0_thrd && fetchbuf1_thrd==1'b1 ? maxsn[1] + 4'd2 : fetchbuf1_thrd==fetchbuf0_thrd && fetchbuf1_thrd==1'b0 ? maxsn[0] + 4'd2 : fetchbuf1_thrd ? maxsn[1] + 4'd1: maxsn[0]+4'd1, 6'd0); iq_ctr = iq_ctr + 4'd2; // SOURCE 1 ... a1_vs(); // SOURCE 2 ... a2_vs(); // SOURCE 3 ... a3_vs(); // if the two instructions enqueued target the same register, // make sure only the second writes to regIsValid and rf_source. // first is allowed to update regIsValid and rf_source only if the // second has no target (BEQ or SW) // if (fetchbuf0_rfw) begin rf_source[ Rt0s ] <= { 1'b0,fetchbuf0_mem, tail0 }; rf_v [ Rt0s] <= `INV; $display("r%dx (%d) Invalidated", Rt0s, Rt0s[4:0]); end else $display("No rfw"); if (fetchbuf1_rfw) begin rf_source[ Rt1s ] <= { 1'b0,fetchbuf1_mem, tail1 }; $display("r%dx (%d) Invalidated", Rt1s, Rt1s[4:0]); rf_v [ Rt1s ] <= `INV; end else $display("No rfw"); end end // ends the "if IQ[tail1] is available" clause else begin // only first instruction was enqueued if (fetchbuf0_rfw) begin $display("r%dx (%d) Invalidated 1", Rt0s, Rt0s[4:0]); rf_source[ Rt0s ] <= {1'b0,fetchbuf0_mem, tail0}; rf_v [ Rt0s ] <= `INV; end end end end // ends the "else fetchbuf0 doesn't have a backwards branch" clause end endcase if (pebm) begin bm_ctr <= bm_ctr + 40'd1; end // // DATAINCOMING // // wait for operand/s to appear on alu busses and puts them into // the iqentry_a1 and iqentry_a2 slots (if appropriate) // as well as the appropriate iqentry_res slots (and setting valid bits) // // put results into the appropriate instruction entries // // This chunk of code has to be before the enqueue stage so that the agen bit // can be reset to zero by enqueue. // put results into the appropriate instruction entries // if (IsMul(alu0_instr)|IsDivmod(alu0_instr)|alu0_shft|alu0_tlb) begin if (alu0_done_pe) begin alu0_dataready <= TRUE; end end if (alu1_shft) begin if (alu1_done_pe) begin alu1_dataready <= TRUE; end end if (alu0_v) begin iqentry_tgt [ alu0_id[`QBITS] ] <= alu0_tgt; iqentry_res [ alu0_id[`QBITS] ] <= ralu0_bus; iqentry_exc [ alu0_id[`QBITS] ] <= alu0_exc; if (!iqentry_mem[ alu0_id[`QBITS] ] && alu0_done && tlb_done) begin // iqentry_done[ alu0_id[`QBITS] ] <= `TRUE; iqentry_state[alu0_id[`QBITS]] <= IQS_CMT; end // if (alu0_done) // iqentry_cmt [ alu0_id[`QBITS] ] <= `TRUE; // iqentry_out [ alu0_id[`QBITS] ] <= `INV; // iqentry_agen[ alu0_id[`QBITS] ] <= `VAL;//!iqentry_fc[alu0_id[`QBITS]]; // RET if (iqentry_mem[alu0_id[`QBITS]]) iqentry_state[alu0_id[`QBITS]] <= IQS_AGEN; if (iqentry_mem[ alu0_id[`QBITS] ] && !iqentry_agen[ alu0_id[`QBITS] ]) begin iqentry_ma[ alu0_id[`QBITS] ] <= alu0_bus; end if (|alu0_exc) begin // iqentry_done[alu0_id[`QBITS]] <= `VAL; iqentry_store[alu0_id[`QBITS]] <= `INV; iqentry_state[alu0_id[`QBITS]] <= IQS_CMT; end alu0_dataready <= FALSE; end if (alu1_v && `NUM_ALU > 1) begin iqentry_tgt [ alu1_id[`QBITS] ] <= alu1_tgt; iqentry_res [ alu1_id[`QBITS] ] <= ralu1_bus; iqentry_exc [ alu1_id[`QBITS] ] <= alu1_exc; if (!iqentry_mem[ alu1_id[`QBITS] ] && alu1_done) begin // iqentry_done[ alu1_id[`QBITS] ] <= `TRUE; iqentry_state[alu1_id[`QBITS]] <= IQS_CMT; end // iqentry_done[ alu1_id[`QBITS] ] <= (!iqentry_mem[ alu1_id[`QBITS] ] && alu1_done); // if (alu1_done) // iqentry_cmt [ alu1_id[`QBITS] ] <= `TRUE; // iqentry_out [ alu1_id[`QBITS] ] <= `INV; if (iqentry_mem[alu1_id[`QBITS]]) iqentry_state[alu1_id[`QBITS]] <= IQS_AGEN; // iqentry_agen[ alu1_id[`QBITS] ] <= `VAL;//!iqentry_fc[alu0_id[`QBITS]]; // RET if (iqentry_mem[ alu1_id[`QBITS] ] && !iqentry_agen[ alu1_id[`QBITS] ]) begin iqentry_ma[ alu1_id[`QBITS] ] <= alu1_bus; end if (|alu1_exc) begin // iqentry_done[alu1_id[`QBITS]] <= `VAL; iqentry_store[alu1_id[`QBITS]] <= `INV; iqentry_state[alu1_id[`QBITS]] <= IQS_CMT; end alu1_dataready <= FALSE; end if (fpu1_v && `NUM_FPU > 0) begin iqentry_res [ fpu1_id[`QBITS] ] <= rfpu1_bus; iqentry_ares[ fpu1_id[`QBITS] ] <= fpu1_status; iqentry_exc [ fpu1_id[`QBITS] ] <= fpu1_exc; // iqentry_done[ fpu1_id[`QBITS] ] <= fpu1_done; // iqentry_out [ fpu1_id[`QBITS] ] <= `INV; iqentry_state[fpu1_id[`QBITS]] <= IQS_CMT; fpu1_dataready <= FALSE; end if (fpu2_v && `NUM_FPU > 1) begin iqentry_res [ fpu2_id[`QBITS] ] <= rfpu2_bus; iqentry_ares[ fpu2_id[`QBITS] ] <= fpu2_status; iqentry_exc [ fpu2_id[`QBITS] ] <= fpu2_exc; // iqentry_done[ fpu2_id[`QBITS] ] <= fpu2_done; // iqentry_out [ fpu2_id[`QBITS] ] <= `INV; iqentry_state[fpu2_id[`QBITS]] <= IQS_CMT; //iqentry_agen[ fpu_id[`QBITS] ] <= `VAL; // RET fpu2_dataready <= FALSE; end if (IsWait(fcu_instr)) begin if (pe_wait) fcu_dataready <= `TRUE; end // If the return segment is not the same as the current code segment then a // segment load is triggered via the memory unit by setting the iq state to // AGEN. Otherwise the state is set to CMT which will cause a bypass of the // segment load from memory. if (fcu_v) begin fcu_done <= `TRUE; iqentry_ma [ fcu_id[`QBITS] ] <= fcu_misspc; iqentry_res [ fcu_id[`QBITS] ] <= rfcu_bus; iqentry_exc [ fcu_id[`QBITS] ] <= fcu_exc; iqentry_state[fcu_id[`QBITS] ] <= IQS_CMT; // takb is looked at only for branches to update the predictor. Here it is // unconditionally set, the value will be ignored if it's not a branch. iqentry_takb[ fcu_id[`QBITS] ] <= fcu_takb; br_ctr <= br_ctr + fcu_branch; fcu_dataready <= `INV; end // dramX_v only set on a load if (dramA_v && iqentry_v[ dramA_id[`QBITS] ]) begin iqentry_res [ dramA_id[`QBITS] ] <= rdramA_bus; // iqentry_done[ dramA_id[`QBITS] ] <= `VAL; // iqentry_out [ dramA_id[`QBITS] ] <= `INV; iqentry_state[dramA_id[`QBITS] ] <= IQS_CMT; iqentry_aq [ dramA_id[`QBITS] ] <= `INV; end if (`NUM_MEM > 1 && dramB_v && iqentry_v[ dramB_id[`QBITS] ]) begin iqentry_res [ dramB_id[`QBITS] ] <= rdramB_bus; iqentry_state[dramB_id[`QBITS] ] <= IQS_CMT; iqentry_aq [ dramB_id[`QBITS] ] <= `INV; end if (`NUM_MEM > 2 && dramC_v && iqentry_v[ dramC_id[`QBITS] ]) begin iqentry_res [ dramC_id[`QBITS] ] <= rdramC_bus; iqentry_state[dramC_id[`QBITS] ] <= IQS_CMT; iqentry_aq [ dramC_id[`QBITS] ] <= `INV; // if (iqentry_lptr[dram2_id[`QBITS]]) // wbrcd[pcr[5:0]] <= 1'b1; end // // see if anybody else wants the results ... look at lots of buses: // - fpu_bus // - alu0_bus // - alu1_bus // - fcu_bus // - dram_bus // - commit0_bus // - commit1_bus // for (n = 0; n < QENTRIES; n = n + 1) begin if (`NUM_FPU > 0) setargs(n,{1'b0,fpu1_id},fpu1_v,rfpu1_bus); if (`NUM_FPU > 1) setargs(n,{1'b0,fpu2_id},fpu2_v,rfpu2_bus); // The memory address generated by the ALU should not be posted to be // recieved into waiting argument registers. The arguments will be waiting // for the result of the memory load, picked up from the dram busses. The // only mem operation requiring the alu result bus is the push operation. setargs(n,{1'b0,alu0_id},alu0_v & (~alu0_mem | alu0_push),ralu0_bus); if (`NUM_ALU > 1) setargs(n,{1'b0,alu1_id},alu1_v & (~alu1_mem | alu1_push),ralu1_bus); setargs(n,{1'b0,fcu_id},fcu_v,rfcu_bus); setargs(n,{1'b0,dramA_id},dramA_v,rdramA_bus); if (`NUM_MEM > 1) setargs(n,{1'b0,dramB_id},dramB_v,rdramB_bus); if (`NUM_MEM > 2) setargs(n,{1'b0,dramC_id},dramC_v,rdramC_bus); setargs(n,commit0_id,commit0_v,commit0_bus); if (`NUM_CMT > 1) setargs(n,commit1_id,commit1_v,commit1_bus); if (`NUM_CMT > 2) setargs(n,commit2_id,commit2_v,commit2_bus); `ifndef INLINE_DECODE setinsn(n[`QBITS],id1_ido,id1_available&id1_vo,id1_bus); if (`NUM_IDU > 1) setinsn(n[`QBITS],id2_ido,id2_available&id2_vo,id2_bus); if (`NUM_IDU > 2) setinsn(n[`QBITS],id3_ido,id3_available&id3_vo,id3_bus); `endif end // // ISSUE // // determines what instructions are ready to go, then places them // in the various ALU queues. // also invalidates instructions following a branch-miss BEQ or any JALR (STOMP logic) // `ifndef INLINE_DECODE for (n = 0; n < QENTRIES; n = n + 1) if (id1_available) begin if (iqentry_id1issue[n] && !iqentry_iv[n] && !(iqentry_v[n] && iqentry_stomp[n])) begin id1_vi <= `VAL; id1_id <= n[4:0]; id1_instr <= iqentry_rtop[n] ? ( iqentry_a3_v[n] ? iqentry_a3[n] `ifdef FU_BYPASS : (iqentry_a3_s[n] == alu0_id) ? alu0_bus : (iqentry_a3_s[n] == alu1_id) ? alu1_bus `endif : `NOP_INSN) : iqentry_instr[n]; id1_ven <= iqentry_ven[n]; id1_vl <= iqentry_vl[n]; id1_thrd <= iqentry_thrd[n]; id1_Rt <= iqentry_tgt[n][4:0]; id1_pt <= iqentry_pt[n]; end end if (`NUM_IDU > 1) begin for (n = 0; n < QENTRIES; n = n + 1) if (id2_available) begin if (iqentry_id2issue[n] && !iqentry_iv[n] && !(iqentry_v[n] && iqentry_stomp[n])) begin id2_vi <= `VAL; id2_id <= n[4:0]; id2_instr <= iqentry_rtop[n] ? ( iqentry_a3_v[n] ? iqentry_a3[n] `ifdef FU_BYPASS : (iqentry_a3_s[n] == alu0_id) ? alu0_bus : (iqentry_a3_s[n] == alu1_id) ? alu1_bus `endif : `NOP_INSN) : iqentry_instr[n]; id2_ven <= iqentry_ven[n]; id2_vl <= iqentry_vl[n]; id2_thrd <= iqentry_thrd[n]; id2_Rt <= iqentry_tgt[n][4:0]; id2_pt <= iqentry_pt[n]; end end end if (`NUM_IDU > 2) begin for (n = 0; n < QENTRIES; n = n + 1) if (id3_available) begin if (iqentry_id3issue[n] && !iqentry_iv[n] && !(iqentry_v[n] && iqentry_stomp[n])) begin id3_vi <= `VAL; id3_id <= n[4:0]; id3_instr <= iqentry_rtop[n] ? ( iqentry_a3_v[n] ? iqentry_a3[n] `ifdef FU_BYPASS : (iqentry_a3_s[n] == alu0_id) ? alu0_bus : (iqentry_a3_s[n] == alu1_id) ? alu1_bus `endif : `NOP_INSN) : iqentry_instr[n]; id3_ven <= iqentry_ven[n]; id3_vl <= iqentry_vl[n]; id3_thrd <= iqentry_thrd[n]; id3_Rt <= iqentry_tgt[n][4:0]; id3_pt <= iqentry_pt[n]; end end end `endif // not INLINE_DECODE // X's on unused busses cause problems in SIM. for (n = 0; n < QENTRIES; n = n + 1) if (iqentry_alu0_issue[n] && !(iqentry_v[n] && iqentry_stomp[n])) begin if (alu0_available & alu0_done) begin alu0_sourceid <= {iqentry_push[n],n[`QBITS]}; alu0_instr <= iqentry_rtop[n] ? ( `ifdef FU_BYPASS iqentry_a3_v[n] ? iqentry_a3[n] : (iqentry_a3_s[n] == alu0_id) ? ralu0_bus : (iqentry_a3_s[n] == alu1_id) ? ralu1_bus : (iqentry_a3_s[n] == fpu1_id && `NUM_FPU > 0) ? rfpu1_bus : `NOP_INSN) `else iqentry_a3[n]) `endif : iqentry_instr[n]; alu0_sz <= iqentry_sz[n]; alu0_tlb <= iqentry_tlb[n]; alu0_mem <= iqentry_mem[n]; alu0_load <= iqentry_load[n]; alu0_store <= iqentry_store[n]; alu0_push <= iqentry_push[n]; alu0_shft <= iqentry_shft[n]; alu0_pc <= iqentry_pc[n]; alu0_argA <= `ifdef FU_BYPASS iqentry_a1_v[n] ? iqentry_a1[n] : (iqentry_a1_s[n] == alu0_id) ? ralu0_bus : (iqentry_a1_s[n] == alu1_id) ? ralu1_bus : (iqentry_a1_s[n] == fpu1_id && `NUM_FPU > 0) ? rfpu1_bus : 64'hDEADDEADDEADDEAD; `else iqentry_a1[n]; `endif alu0_argB <= iqentry_imm[n] ? iqentry_a0[n] `ifdef FU_BYPASS : (iqentry_a2_v[n] ? iqentry_a2[n] : (iqentry_a2_s[n] == alu0_id) ? ralu0_bus : (iqentry_a2_s[n] == alu1_id) ? ralu1_bus : (iqentry_a2_s[n] == fpu1_id && `NUM_FPU > 0) ? rfpu1_bus : 64'hDEADDEADDEADDEAD); `else : iqentry_a2[n]; `endif alu0_argC <= `ifdef FU_BYPASS iqentry_a3_v[n] ? iqentry_a3[n] : (iqentry_a3_s[n] == alu0_id) ? ralu0_bus : ralu1_bus; `else iqentry_a3[n]; `endif alu0_argI <= iqentry_a0[n]; alu0_tgt <= IsVeins(iqentry_instr[n]) ? {6'h0,1'b1,iqentry_tgt[n][4:0]} | (( iqentry_a2_v[n] ? iqentry_a2[n][5:0] : (iqentry_a2_s[n] == alu0_id) ? ralu0_bus[5:0] : (iqentry_a2_s[n] == alu1_id) ? ralu1_bus[5:0] : {4{16'h0000}})) << 6 : iqentry_tgt[n]; alu0_ven <= iqentry_ven[n]; alu0_thrd <= iqentry_thrd[n]; alu0_dataready <= IsSingleCycle(iqentry_instr[n]); alu0_ld <= TRUE; iqentry_state[n] <= IQS_OUT; end end if (`NUM_ALU > 1) begin for (n = 0; n < QENTRIES; n = n + 1) if (iqentry_alu1_issue[n] && !(iqentry_v[n] && iqentry_stomp[n])) begin if (alu1_available && alu1_done) begin if (iqentry_alu0[n]) panic <= `PANIC_ALU0ONLY; alu1_sourceid <= {iqentry_push[n],n[`QBITS]}; alu1_instr <= iqentry_instr[n]; alu1_sz <= iqentry_sz[n]; alu1_mem <= iqentry_mem[n]; alu1_load <= iqentry_load[n]; alu1_store <= iqentry_store[n]; alu1_push <= iqentry_push[n]; alu1_shft <= iqentry_shft[n]; alu1_pc <= iqentry_pc[n]; alu1_argA <= `ifdef FU_BYPASS iqentry_a1_v[n] ? iqentry_a1[n] : (iqentry_a1_s[n] == alu0_id) ? ralu0_bus : (iqentry_a1_s[n] == alu1_id) ? ralu1_bus : (iqentry_a1_s[n] == fpu1_id && `NUM_FPU > 0) ? rfpu1_bus : 64'hDEADDEADDEADDEAD; `else iqentry_a1[n]; `endif alu1_argB <= iqentry_imm[n] ? iqentry_a0[n] `ifdef FU_BYPASS : (iqentry_a2_v[n] ? iqentry_a2[n] : (iqentry_a2_s[n] == alu0_id) ? ralu0_bus : (iqentry_a2_s[n] == alu1_id) ? ralu1_bus : (iqentry_a2_s[n] == fpu1_id && `NUM_FPU > 0) ? rfpu1_bus : 64'hDEADDEADDEADDEAD); `else : iqentry_a2[n]; `endif alu1_argC <= `ifdef FU_BYPASS iqentry_a3_v[n] ? iqentry_a3[n] : (iqentry_a3_s[n] == alu0_id) ? ralu0_bus : ralu1_bus; `else iqentry_a3[n]; `endif alu1_argI <= iqentry_a0[n]; alu1_tgt <= IsVeins(iqentry_instr[n]) ? {6'h0,1'b1,iqentry_tgt[n][4:0]} | ((iqentry_a2_v[n] ? iqentry_a2[n][5:0] : (iqentry_a2_s[n] == alu0_id) ? ralu0_bus[5:0] : (iqentry_a2_s[n] == alu1_id) ? ralu1_bus[5:0] : {4{16'h0000}})) << 6 : iqentry_tgt[n]; alu1_ven <= iqentry_ven[n]; alu1_dataready <= IsSingleCycle(iqentry_instr[n]); alu1_ld <= TRUE; iqentry_state[n] <= IQS_OUT; end end end for (n = 0; n < QENTRIES; n = n + 1) if (iqentry_fpu1_issue[n] && !(iqentry_v[n] && iqentry_stomp[n])) begin if (fpu1_available & fpu1_done) begin fpu1_sourceid <= n[`QBITS]; fpu1_instr <= iqentry_instr[n]; fpu1_pc <= iqentry_pc[n]; fpu1_argA <= `ifdef FU_BYPASS iqentry_a1_v[n] ? iqentry_a1[n] : (iqentry_a1_s[n] == alu0_id) ? ralu0_bus : (iqentry_a1_s[n] == alu1_id) ? ralu1_bus : (iqentry_a1_s[n] == fpu1_id && `NUM_FPU > 0) ? rfpu1_bus : 64'hDEADDEADDEADDEAD; `else iqentry_a1[n]; `endif fpu1_argB <= `ifdef FU_BYPASS (iqentry_a2_v[n] ? iqentry_a2[n] : (iqentry_a2_s[n] == alu0_id) ? ralu0_bus : (iqentry_a2_s[n] == alu1_id) ? ralu1_bus : (iqentry_a2_s[n] == fpu1_id && `NUM_FPU > 0) ? rfpu1_bus : 64'hDEADDEADDEADDEAD); `else iqentry_a2[n]; `endif fpu1_argC <= `ifdef FU_BYPASS iqentry_a3_v[n] ? iqentry_a3[n] : (iqentry_a3_s[n] == alu0_id) ? ralu0_bus : ralu1_bus; `else iqentry_a3[n]; `endif fpu1_argI <= iqentry_a0[n]; fpu1_dataready <= `VAL; fpu1_ld <= TRUE; iqentry_state[n] <= IQS_OUT; end end for (n = 0; n < QENTRIES; n = n + 1) if (`NUM_FPU > 1 && iqentry_fpu2_issue[n] && !(iqentry_v[n] && iqentry_stomp[n])) begin if (fpu2_available & fpu2_done) begin fpu2_sourceid <= n[`QBITS]; fpu2_instr <= iqentry_instr[n]; fpu2_pc <= iqentry_pc[n]; fpu2_argA <= `ifdef FU_BYPASS iqentry_a1_v[n] ? iqentry_a1[n] : (iqentry_a1_s[n] == alu0_id) ? ralu0_bus : (iqentry_a1_s[n] == alu1_id) ? ralu1_bus : (iqentry_a1_s[n] == fpu1_id && `NUM_FPU > 0) ? rfpu1_bus : 64'hDEADDEADDEADDEAD; `else iqentry_a1[n]; `endif fpu2_argB <= `ifdef FU_BYPASS (iqentry_a2_v[n] ? iqentry_a2[n] : (iqentry_a2_s[n] == alu0_id) ? ralu0_bus : (iqentry_a2_s[n] == alu1_id) ? ralu1_bus : (iqentry_a2_s[n] == fpu1_id && `NUM_FPU > 0) ? rfpu1_bus : 64'hDEADDEADDEADDEAD); `else iqentry_a2[n]; `endif fpu2_argC <= `ifdef FU_BYPASS iqentry_a3_v[n] ? iqentry_a3[n] : (iqentry_a3_s[n] == alu0_id) ? ralu0_bus : ralu1_bus; `else iqentry_a3[n]; `endif fpu2_argI <= iqentry_a0[n]; fpu2_dataready <= `VAL; fpu2_ld <= TRUE; iqentry_state[n] <= IQS_OUT; end end for (n = 0; n < QENTRIES; n = n + 1) if (iqentry_fcu_issue[n] && !(iqentry_v[n] && iqentry_stomp[n])) begin if (fcu_done) begin fcu_sourceid <= n[`QBITS]; fcu_prevInstr <= fcu_instr; fcu_instr <= iqentry_instr[n]; fcu_insln <= iqentry_insln[n]; fcu_pc <= iqentry_pc[n]; fcu_nextpc <= iqentry_pc[n] + iqentry_insln[n]; fcu_pt <= iqentry_pt[n]; fcu_brdisp <= iqentry_instr[n][6] ? {{36{iqentry_instr[n][47]}},iqentry_instr[n][47:23],iqentry_instr[n][17:16],1'b0} : {{52{iqentry_instr[n][31]}},iqentry_instr[n][31:23],iqentry_instr[n][17:16],1'b0}; fcu_branch <= iqentry_br[n]; fcu_call <= IsCall(iqentry_instr[n])|iqentry_jal[n]; fcu_jal <= iqentry_jal[n]; fcu_ret <= iqentry_ret[n]; fcu_brk <= iqentry_brk[n]; fcu_rti <= iqentry_rti[n]; fcu_pc <= iqentry_pc[n]; fcu_argA <= iqentry_a1_v[n] ? iqentry_a1[n] : (iqentry_a1_s[n] == alu0_id) ? ralu0_bus : (iqentry_a1_s[n] == fpu1_id && `NUM_FPU > 0) ? rfpu1_bus : ralu1_bus; `ifdef SUPPORT_SMT // fcu_argB <= iqentry_rti[n] ? epc0[iqentry_thrd[n]] fcu_epc <= epc0[iqentry_thrd[n]]; `else fcu_epc <= epc0; // fcu_argB <= iqentry_rti[n] ? epc0 `endif fcu_argB <= (iqentry_a2_v[n] ? iqentry_a2[n] : (iqentry_a2_s[n] == alu0_id) ? ralu0_bus : (iqentry_a2_s[n] == fpu1_id && `NUM_FPU > 0) ? rfpu1_bus : ralu1_bus); // argB waitctr <= (iqentry_a2_v[n] ? iqentry_a2[n][47:0] : (iqentry_a2_s[n] == alu0_id) ? ralu0_bus[47:0] : (iqentry_a2_s[n] == fpu1_id && `NUM_FPU > 0) ? rfpu1_bus[47:0] : ralu1_bus[47:0]); fcu_argC <= iqentry_a3_v[n] ? iqentry_a3[n] : (iqentry_a3_s[n] == alu0_id) ? ralu0_bus : ralu1_bus; fcu_argI <= iqentry_a0[n]; fcu_thrd <= iqentry_thrd[n]; fcu_dataready <= !IsWait(iqentry_instr[n]); fcu_clearbm <= `FALSE; fcu_ld <= TRUE; fcu_timeout <= 8'h00; iqentry_state[n] <= IQS_OUT; fcu_done <= `FALSE; end end // // MEMORY // // update the memory queues and put data out on bus if appropriate // // // dram0, dram1, dram2 are the "state machines" that keep track // of three pipelined DRAM requests. if any has the value "000", // then it can accept a request (which bumps it up to the value "001" // at the end of the cycle). once it hits the value "111" the request // is finished and the dram_bus takes the value. if it is a store, the // dram_bus value is not used, but the dram_v value along with the // dram_id value signals the waiting memq entry that the store is // completed and the instruction can commit. // // if (dram0 != `DRAMSLOT_AVAIL) dram0 <= dram0 + 2'd1; // if (dram1 != `DRAMSLOT_AVAIL) dram1 <= dram1 + 2'd1; // if (dram2 != `DRAMSLOT_AVAIL) dram2 <= dram2 + 2'd1; // Flip the ready status to available. Used for loads or stores. if (dram0 == `DRAMREQ_READY) dram0 <= `DRAMSLOT_AVAIL; if (dram1 == `DRAMREQ_READY && `NUM_MEM > 1) dram1 <= `DRAMSLOT_AVAIL; if (dram2 == `DRAMREQ_READY && `NUM_MEM > 2) dram2 <= `DRAMSLOT_AVAIL; // grab requests that have finished and put them on the dram_bus // If stomping on the instruction don't place the value on the argument // bus to be loaded. if (dram0 == `DRAMREQ_READY && dram0_load) begin dramA_v <= !iqentry_stomp[dram0_id[`QBITS]]; dramA_id <= dram0_id; dramA_bus <= fnDatiAlign(dram0_instr,dram0_addr,rdat0); end if (dram1 == `DRAMREQ_READY && dram1_load && `NUM_MEM > 1) begin dramB_v <= !iqentry_stomp[dram1_id[`QBITS]]; dramB_id <= dram1_id; dramB_bus <= fnDatiAlign(dram1_instr,dram1_addr,rdat1); end if (dram2 == `DRAMREQ_READY && dram2_load && `NUM_MEM > 2) begin dramC_v <= !iqentry_stomp[dram2_id[`QBITS]]; dramC_id <= dram2_id; dramC_bus <= fnDatiAlign(dram2_instr,dram2_addr,rdat2); end if (dram0 == `DRAMREQ_READY && dram0_store) $display("m[%h] <- %h", dram0_addr, dram0_data); if (dram1 == `DRAMREQ_READY && dram1_store && `NUM_MEM > 1) $display("m[%h] <- %h", dram1_addr, dram1_data); if (dram2 == `DRAMREQ_READY && dram2_store && `NUM_MEM > 2) $display("m[%h] <- %h", dram2_addr, dram2_data); // // determine if the instructions ready to issue can, in fact, issue. // "ready" means that the instruction has valid operands but has not gone yet for (n = 0; n < QENTRIES; n = n + 1) if (memissue[n]) iqentry_memissue[n] <= `VAL; //iqentry_memissue <= memissue; missue_count <= issue_count; for (n = 0; n < QENTRIES; n = n + 1) if (iqentry_v[n] && iqentry_stomp[n]) begin iqentry_iv[n] <= `INV; iqentry_mem[n] <= `INV; iqentry_load[n] <= `INV; iqentry_store[n] <= `INV; iqentry_state[n] <= IQS_INVALID; // iqentry_agen[n] <= `INV; // iqentry_out[n] <= `INV; // iqentry_done[n] <= `INV; // iqentry_cmt[n] <= `INV; end // A store can't be stomped on, because a store won't issue unless there are // no instructions that could change the flow of execution before it. Meaning // stomp would never be true for a store. // A load could be stomped on, but the memory access is allowed to complete // to ensure the bus acknowledge doesn't get out of sync. /* if (iqentry_stomp[dram0_id[`QBITS]]) begin if (dram0==`DRAMSLOT_HASBUS) wb_nack(); dram0_load <= `FALSE; dram0_store <= `FALSE; dram0_rmw <= `FALSE; dram0 <= `DRAMSLOT_AVAIL; end if (iqentry_stomp[dram1_id[`QBITS]]) begin if (dram1==`DRAMSLOT_HASBUS) wb_nack(); dram1_load <= `FALSE; dram1_store <= `FALSE; dram1_rmw <= `FALSE; dram1 <= `DRAMSLOT_AVAIL; end if (iqentry_stomp[dram2_id[`QBITS]]) begin if (dram2==`DRAMSLOT_HASBUS) wb_nack(); dram2_load <= `FALSE; dram2_store <= `FALSE; dram2_rmw <= `FALSE; dram2 <= `DRAMSLOT_AVAIL; end */ if (last_issue0 < QENTRIES) tDram0Issue(last_issue0); if (last_issue1 < QENTRIES) tDram1Issue(last_issue1); if (last_issue2 < QENTRIES) tDram2Issue(last_issue2); //for (n = 0; n < QENTRIES; n = n + 1) //begin // if (!iqentry_v[n]) // iqentry_done[n] <= FALSE; //end if (ohead[0]==heads[0]) cmt_timer <= cmt_timer + 12'd1; else cmt_timer <= 12'd0; if (cmt_timer==12'd1000 && icstate==IDLE) begin iqentry_state[heads[0]] <= IQS_CMT; iqentry_exc[heads[0]] <= `FLT_CMT; cmt_timer <= 12'd0; end // // COMMIT PHASE (dequeue only ... not register-file update) // // look at heads[0] and heads[1] and let 'em write to the register file if they are ready // // always @(posedge clk) begin: commit_phase ohead[0] <= heads[0]; ohead[1] <= heads[1]; ohead[2] <= heads[2]; ocommit0_v <= commit0_v; ocommit1_v <= commit1_v; ocommit2_v <= commit2_v; oddball_commit(commit0_v, heads[0], 2'd0); if (`NUM_CMT > 1) oddball_commit(commit1_v, heads[1], 2'd1); if (`NUM_CMT > 2) oddball_commit(commit2_v, heads[2], 2'd2); // Fetch and queue are limited to two instructions per cycle, so we might as // well limit retiring to two instructions max to conserve logic. // if (~|panic) casez ({ iqentry_v[heads[0]], iqentry_state[heads[0]] == IQS_CMT, iqentry_v[heads[1]], iqentry_state[heads[1]] == IQS_CMT, iqentry_v[heads[2]], iqentry_state[heads[2]] == IQS_CMT}) // retire 3 6'b0?_0?_0?: if (heads[0] != tail0 && heads[1] != tail0 && heads[2] != tail0) head_inc(3); else if (heads[0] != tail0 && heads[1] != tail0) head_inc(2); else if (heads[0] != tail0) head_inc(1); 6'b0?_0?_10: if (heads[0] != tail0 && heads[1] != tail0) head_inc(2); else if (heads[0] != tail0) head_inc(1); 6'b0?_0?_11: if (`NUM_CMT > 2 || cmt_head2) // and it's not an oddball? head_inc(3); else head_inc(2); // retire 1 (wait for regfile for heads[1]) 6'b0?_10_??: head_inc(1); // retire 2 6'b0?_11_0?, 6'b0?_11_10: if (`NUM_CMT > 1 || cmt_head1) head_inc(2); else head_inc(1); 6'b0?_11_11: if (`NUM_CMT > 2 || (`NUM_CMT > 1 && cmt_head2)) head_inc(3); else if (`NUM_CMT > 1 || cmt_head1) head_inc(2); else head_inc(1); 6'b10_??_??: ; 6'b11_0?_0?: if (heads[1] != tail0 && heads[2] != tail0) head_inc(3); else if (heads[1] != tail0) head_inc(2); else head_inc(1); 6'b11_0?_10: if (heads[1] != tail0) head_inc(2); else head_inc(1); 6'b11_0?_11: if (heads[1] != tail0) begin if (`NUM_CMT > 2 || cmt_head2) head_inc(3); else head_inc(2); end else head_inc(1); 6'b11_10_??: head_inc(1); 6'b11_11_0?: if (`NUM_CMT > 1 && heads[2] != tail0) head_inc(3); else if (cmt_head1 && heads[2] != tail0) head_inc(3); else if (`NUM_CMT > 1 || cmt_head1) head_inc(2); else head_inc(1); 6'b11_11_10: if (`NUM_CMT > 1 || cmt_head1) head_inc(2); else head_inc(1); 6'b11_11_11: if (`NUM_CMT > 2 || (`NUM_CMT > 1 && cmt_head2)) head_inc(3); else if (`NUM_CMT > 1 || cmt_head1) head_inc(2); else head_inc(1); default: begin $display("head_inc: Uncoded case %h",{ iqentry_v[heads[0]], iqentry_state[heads[0]], iqentry_v[heads[1]], iqentry_state[heads[1]], iqentry_v[heads[2]], iqentry_state[heads[2]]}); $stop; end endcase rf_source[0] <= 0; // A store will never be stomped on because they aren't issued until it's // guarenteed there will be no change of flow. // A load or other long running instruction might be stomped on by a change // of program flow. Stomped on loads already in progress can be aborted early. // In the case of an aborted load, random data is returned and any exceptions // are nullified. if (dram0_load) case(dram0) `DRAMSLOT_AVAIL: ; `DRAMSLOT_BUSY: if (iqentry_v[dram0_id[`QBITS]] && !iqentry_stomp[dram0_id[`QBITS]]) dram0 <= dram0 + !dram0_unc; else begin dram0 <= `DRAMREQ_READY; dram0_load <= `FALSE; xdati[63:0] <= {4{lfsro}}; end 3'd2,3'd3: if (iqentry_v[dram0_id[`QBITS]] && !iqentry_stomp[dram0_id[`QBITS]]) dram0 <= dram0 + 3'd1; else begin dram0 <= `DRAMREQ_READY; dram0_load <= `FALSE; xdati[63:0] <= {4{lfsro}}; end 3'd4: if (iqentry_v[dram0_id[`QBITS]] && !iqentry_stomp[dram0_id[`QBITS]]) begin if (dhit0) dram0 <= `DRAMREQ_READY; else dram0 <= `DRAMSLOT_REQBUS; end else begin dram0 <= `DRAMREQ_READY; dram0_load <= `FALSE; xdati[63:0] <= {4{lfsro}}; end `DRAMSLOT_REQBUS: if (iqentry_v[dram0_id[`QBITS]] && !iqentry_stomp[dram0_id[`QBITS]]) ; else begin dram0 <= `DRAMREQ_READY; dram0_load <= `FALSE; xdati[63:0] <= {4{lfsro}}; end `DRAMSLOT_HASBUS: if (iqentry_v[dram0_id[`QBITS]] && !iqentry_stomp[dram0_id[`QBITS]]) ; else begin dram0 <= `DRAMREQ_READY; dram0_load <= `FALSE; xdati[63:0] <= {4{lfsro}}; end `DRAMREQ_READY: dram0 <= `DRAMSLOT_AVAIL; endcase if (dram1_load && `NUM_MEM > 1) case(dram1) `DRAMSLOT_AVAIL: ; `DRAMSLOT_BUSY: dram1 <= dram1 + !dram1_unc; 3'd2: dram1 <= dram1 + 3'd1; 3'd3: dram1 <= dram1 + 3'd1; 3'd4: // if (iqentry_v[dram1_id[`QBITS]] && !iqentry_stomp[dram1_id[`QBITS]]) begin if (dhit1) dram1 <= `DRAMREQ_READY; else dram1 <= `DRAMSLOT_REQBUS; // end /* else begin dram1 <= `DRAMSLOT_AVAIL; dram1_load <= `FALSE; end*/ `DRAMSLOT_REQBUS: ; `DRAMSLOT_HASBUS: ; `DRAMREQ_READY: dram1 <= `DRAMSLOT_AVAIL; endcase if (dram2_load && `NUM_MEM > 2) case(dram2) `DRAMSLOT_AVAIL: ; `DRAMSLOT_BUSY: dram2 <= dram2 + !dram2_unc; 3'd2: dram2 <= dram2 + 3'd1; 3'd3: dram2 <= dram2 + 3'd1; 3'd4: if (dhit2) dram2 <= `DRAMREQ_READY; else dram2 <= `DRAMSLOT_REQBUS; `DRAMSLOT_REQBUS: ; `DRAMSLOT_HASBUS: ; `DRAMREQ_READY: dram2 <= `DRAMSLOT_AVAIL; endcase // Bus Interface Unit (BIU) // Interfaces to the external bus which is WISHBONE compatible. // Stores take precedence over other operations. // Next data cache read misses are serviced. // Uncached data reads are serviced. // Finally L2 instruction cache misses are serviced.// // set the IQ entry == DONE as soon as the SW is let loose to the memory system // `ifndef HAS_WB if (mem1_available && dram0 == `DRAMSLOT_BUSY && dram0_store) begin if ((alu0_v && (dram0_id[`QBITS] == alu0_id[`QBITS])) || (alu1_v && (dram0_id[`QBITS] == alu1_id[`QBITS]))) panic <= `PANIC_MEMORYRACE; // iqentry_done[ dram0_id[`QBITS] ] <= `VAL; // iqentry_out[ dram0_id[`QBITS] ] <= `INV; iqentry_state[ dram0_id[`QBITS] ] <= IQS_DONE; end if (mem2_available && `NUM_MEM > 1 && dram1 == `DRAMSLOT_BUSY && dram1_store) begin if ((alu0_v && (dram1_id[`QBITS] == alu0_id[`QBITS])) || (alu1_v && (dram1_id[`QBITS] == alu1_id[`QBITS]))) panic <= `PANIC_MEMORYRACE; iqentry_state[ dram1_id[`QBITS] ] <= IQS_DONE; end if (mem3_available && `NUM_MEM > 2 && dram2 == `DRAMSLOT_BUSY && dram2_store) begin if ((alu0_v && (dram2_id[`QBITS] == alu0_id[`QBITS])) || (alu1_v && (dram2_id[`QBITS] == alu1_id[`QBITS]))) panic <= `PANIC_MEMORYRACE; iqentry_state[ dram2_id[`QBITS] ] <= IQS_DONE; end `endif `ifdef HAS_WB if (dram0==`DRAMSLOT_BUSY && dram0_store) begin if (wbptr<`WB_DEPTH-1) begin dram0 <= `DRAMSLOT_AVAIL; dram0_instr[`INSTRUCTION_OP] <= `NOP; wb_update( dram0_id, `FALSE, fnSelect(dram0_instr,dram0_addr), dram0_ol, dram0_addr, fnDato(dram0_instr,dram0_data), wbptr ); wbptr <= wbptr + 2'd1; iqentry_state[ dram0_id[`QBITS] ] <= IQS_DONE; end end else if (dram1==`DRAMSLOT_BUSY && dram1_store && `NUM_MEM > 1) begin if (wbptr<`WB_DEPTH-1) begin dram1 <= `DRAMSLOT_AVAIL; dram1_instr[`INSTRUCTION_OP] <= `NOP; wb_update( dram1_id, `FALSE, fnSelect(dram1_instr,dram1_addr), dram1_ol, dram1_addr, fnDato(dram1_instr,dram1_data), wbptr ); wbptr <= wbptr + 2'd1; iqentry_state[ dram1_id[`QBITS] ] <= IQS_DONE; end end else if (dram2==`DRAMSLOT_BUSY && dram2_store && `NUM_MEM > 2) begin if (wbptr<`WB_DEPTH-1) begin dram2 <= `DRAMSLOT_AVAIL; dram2_instr[`INSTRUCTION_OP] <= `NOP; wb_update( dram2_id, `FALSE, fnSelect(dram2_instr,dram2_addr), dram2_ol, dram2_addr, fnDato(dram2_instr,dram2_data), wbptr ); wbptr <= wbptr + 2'd1; iqentry_state[ dram2_id[`QBITS] ] <= IQS_DONE; end end `endif case(bstate) BIDLE: begin isCAS <= FALSE; isAMO <= FALSE; isInc <= FALSE; isSpt <= FALSE; isRMW <= FALSE; rdvq <= 1'b0; errq <= 1'b0; exvq <= 1'b0; bwhich <= 2'b00; preload <= FALSE; `ifdef HAS_WB if (wb_v[0] & wb_en & ~acki & ~cyc) begin cyc <= `HIGH; stb_o <= `HIGH; we <= `HIGH; sel_o <= wb_sel[0]; vadr <= wb_addr[0]; dat_o <= wb_data[0]; dcbuf <= {4{wb_data[0]}}; dcsel <= wb_sel[0] << {wb_addr[0][4:3],3'b0}; ol_o <= wb_ol[0]; wbo_id <= wb_id[0]; isStore <= TRUE; bstate <= wb_rmw[0] ? B_RMWAck : B_StoreAck; wb_v[0] <= `INV; end if (wb_v[0]==`INV && !writing_wb) begin for (j = 1; j < `WB_DEPTH; j = j + 1) begin wb_v[j-1] <= wb_v[j]; wb_id[j-1] <= wb_id[j]; wb_rmw[j-1] <= wb_rmw[j]; wb_sel[j-1] <= wb_sel[j]; wb_addr[j-1] <= wb_addr[j]; wb_data[j-1] <= wb_data[j]; wb_ol[j-1] <= wb_ol[j]; if (wbptr > 2'd0) wbptr <= wbptr - 2'd1; end wb_v[`WB_DEPTH-1] <= `INV; wb_rmw[`WB_DEPTH-1] <= `FALSE; end `endif if (~|wb_v && dram0==`DRAMSLOT_BUSY && dram0_rmw && !iqentry_stomp[dram0_id[`QBITS]]) begin `ifdef SUPPORT_DBG if (dbg_smatch0|dbg_lmatch0) begin dramA_v <= `TRUE; dramA_id <= dram0_id; dramA_bus <= 64'h0; iqentry_exc[dram0_id[`QBITS]] <= `FLT_DBG; dram0 <= `DRAMSLOT_AVAIL; end else `endif if (!acki) begin isRMW <= dram0_rmw; isCAS <= IsCAS(dram0_instr); isAMO <= IsAMO(dram0_instr); isInc <= IsInc(dram0_instr); casid <= dram0_id; bwhich <= 2'b00; dram0 <= `DRAMSLOT_HASBUS; cyc <= `HIGH; stb_o <= `HIGH; sel_o <= fnSelect(dram0_instr,dram0_addr); dcbuf <= {4{fnDato(dram0_instr,dram0_data)}}; dcsel <= fnSelect(dram0_instr,dram0_addr) << {dram0_addr[4:3],3'b0}; vadr <= dram0_addr; dat_o <= fnDato(dram0_instr,dram0_data); ol_o <= dram0_ol; bstate <= B_RMWAck; end end else if (~|wb_v && dram1==`DRAMSLOT_BUSY && dram1_rmw && `NUM_MEM > 1 && !iqentry_stomp[dram1_id[`QBITS]]) begin `ifdef SUPPORT_DBG if (dbg_smatch1|dbg_lmatch1) begin dramB_v <= `TRUE; dramB_id <= dram1_id; dramB_bus <= 64'h0; iqentry_exc[dram1_id[`QBITS]] <= `FLT_DBG; dram1 <= `DRAMSLOT_AVAIL; end else `endif if (!acki) begin isRMW <= dram1_rmw; isCAS <= IsCAS(dram1_instr); isAMO <= IsAMO(dram1_instr); isInc <= IsInc(dram1_instr); casid <= dram1_id; bwhich <= 2'b01; dram1 <= `DRAMSLOT_HASBUS; cyc <= `HIGH; stb_o <= `HIGH; sel_o <= fnSelect(dram1_instr,dram1_addr); vadr <= dram1_addr; dat_o <= fnDato(dram1_instr,dram1_data); ol_o <= dram1_ol; dcbuf <= {4{fnDato(dram1_instr,dram1_data)}}; dcsel <= fnSelect(dram1_instr,dram1_addr) << {dram1_addr[4:3],3'b0}; bstate <= B_RMWAck; end end else if (~|wb_v && dram2==`DRAMSLOT_BUSY && dram2_rmw && `NUM_MEM > 2 && !iqentry_stomp[dram2_id[`QBITS]]) begin `ifdef SUPPORT_DBG if (dbg_smatch2|dbg_lmatch2) begin dramC_v <= `TRUE; dramC_id <= dram2_id; dramC_bus <= 64'h0; iqentry_exc[dram2_id[`QBITS]] <= `FLT_DBG; dram2 <= `DRAMSLOT_AVAIL; end else `endif if (!acki) begin isRMW <= dram2_rmw; isCAS <= IsCAS(dram2_instr); isAMO <= IsAMO(dram2_instr); isInc <= IsInc(dram2_instr); casid <= dram2_id; bwhich <= 2'b10; dram2 <= `DRAMSLOT_HASBUS; cyc <= `HIGH; stb_o <= `HIGH; sel_o <= fnSelect(dram2_instr,dram2_addr); vadr <= dram2_addr; dat_o <= fnDato(dram2_instr,dram2_data); ol_o <= dram2_ol; dcbuf <= {4{fnDato(dram2_instr,dram2_data)}}; dcsel <= fnSelect(dram2_instr,dram2_addr) << {dram2_addr[4:3],3'b0}; bstate <= B_RMWAck; end end `ifndef HAS_WB // Check write buffer enable ? else if (dram0==`DRAMSLOT_BUSY && dram0_store) begin `ifdef SUPPORT_DBG if (dbg_smatch0) begin dramA_v <= `TRUE; dramA_id <= dram0_id; dramA_bus <= 64'h0; iqentry_exc[dram0_id[`QBITS]] <= `FLT_DBG; dram0 <= `DRAMSLOT_AVAIL; end else `endif begin bwhich <= 2'b00; if (!acki) begin dram0 <= `DRAMSLOT_HASBUS; dram0_instr[`INSTRUCTION_OP] <= `NOP; cyc <= `HIGH; stb_o <= `HIGH; we <= `HIGH; sel_o <= fnSelect(dram0_instr,dram0_addr); vadr <= dram0_addr; dat_o <= fnDato(dram0_instr,dram0_data); ol_o <= dram0_ol; isStore <= TRUE; bstate <= B_StoreAck; dcbuf <= {4{fnDato(dram0_instr,dram0_data)}}; dcsel <= fnSelect(dram0_instr,dram0_addr) << {dram0_addr[4:3],3'b0}; end // cr_o <= IsSWC(dram0_instr); end end else if (dram1==`DRAMSLOT_BUSY && dram1_store && `NUM_MEM > 1) begin `ifdef SUPPORT_DBG if (dbg_smatch1) begin dramB_v <= `TRUE; dramB_id <= dram1_id; dramB_bus <= 64'h0; iqentry_exc[dram1_id[`QBITS]] <= `FLT_DBG; dram1 <= `DRAMSLOT_AVAIL; end else `endif begin bwhich <= 2'b01; if (!acki) begin dram1 <= `DRAMSLOT_HASBUS; dram1_instr[`INSTRUCTION_OP] <= `NOP; cyc <= `HIGH; stb_o <= `HIGH; we <= `HIGH; sel_o <= fnSelect(dram1_instr,dram1_addr); vadr <= dram1_addr; dat_o <= fnDato(dram1_instr,dram1_data); ol_o <= dram1_ol; isStore <= TRUE; dcbuf <= {4{fnDato(dram1_instr,dram1_data)}}; dcsel <= fnSelect(dram1_instr,dram1_addr) << {dram1_addr[4:3],3'b0}; bstate <= B_StoreAck; end // cr_o <= IsSWC(dram0_instr); end end else if (dram2==`DRAMSLOT_BUSY && dram2_store && `NUM_MEM > 2) begin `ifdef SUPPORT_DBG if (dbg_smatch2) begin dramC_v <= `TRUE; dramC_id <= dram2_id; dramC_bus <= 64'h0; iqentry_exc[dram2_id[`QBITS]] <= `FLT_DBG; dram2 <= `DRAMSLOT_AVAIL; end else `endif begin bwhich <= 2'b10; if (!acki) begin dram2 <= `DRAMSLOT_HASBUS; dram2_instr[`INSTRUCTION_OP] <= `NOP; cyc <= `HIGH; stb_o <= `HIGH; we <= `HIGH; sel_o <= fnSelect(dram2_instr,dram2_addr); vadr <= dram2_addr; dat_o <= fnDato(dram2_instr,dram2_data); ol_o <= dram2_ol; isStore <= TRUE; dcbuf <= {4{fnDato(dram2_instr,dram2_data)}}; dcsel <= fnSelect(dram2_instr,dram2_addr) << {dram2_addr[4:3],3'b0}; bstate <= B_StoreAck; end // cr_o <= IsSWC(dram0_instr); end end `endif // Check for read misses on the data cache else if (~|wb_v && !dram0_unc && dram0==`DRAMSLOT_REQBUS && dram0_load && !iqentry_stomp[dram0_id[`QBITS]]) begin `ifdef SUPPORT_DBG if (dbg_lmatch0) begin dramA_v <= `TRUE; dramA_id <= dram0_id; dramA_bus <= 64'h0; iqentry_exc[dram0_id[`QBITS]] <= `FLT_DBG; dram0 <= `DRAMSLOT_AVAIL; end else `endif begin dram0 <= `DRAMSLOT_HASBUS; bwhich <= 2'b00; preload <= dram0_preload; bstate <= B_DCacheLoadStart; end end else if (~|wb_v && !dram1_unc && dram1==`DRAMSLOT_REQBUS && dram1_load && `NUM_MEM > 1 && !iqentry_stomp[dram1_id[`QBITS]]) begin `ifdef SUPPORT_DBG if (dbg_lmatch1) begin dramB_v <= `TRUE; dramB_id <= dram1_id; dramB_bus <= 64'h0; iqentry_exc[dram1_id[`QBITS]] <= `FLT_DBG; dram1 <= `DRAMSLOT_AVAIL; end else `endif begin dram1 <= `DRAMSLOT_HASBUS; bwhich <= 2'b01; preload <= dram1_preload; bstate <= B_DCacheLoadStart; end end else if (~|wb_v && !dram2_unc && dram2==`DRAMSLOT_REQBUS && dram2_load && `NUM_MEM > 2 && !iqentry_stomp[dram2_id[`QBITS]]) begin `ifdef SUPPORT_DBG if (dbg_lmatch2) begin dramC_v <= `TRUE; dramC_id <= dram2_id; dramC_bus <= 64'h0; iqentry_exc[dram2_id[`QBITS]] <= `FLT_DBG; dram2 <= `DRAMSLOT_AVAIL; end else `endif begin dram2 <= `DRAMSLOT_HASBUS; preload <= dram2_preload; bwhich <= 2'b10; bstate <= B_DCacheLoadStart; end end else if (~|wb_v && dram0_unc && dram0==`DRAMSLOT_BUSY && dram0_load && !iqentry_stomp[dram0_id[`QBITS]]) begin `ifdef SUPPORT_DBG if (dbg_lmatch0) begin dramA_v <= `TRUE; dramA_id <= dram0_id; dramA_bus <= 64'h0; iqentry_exc[dram0_id[`QBITS]] <= `FLT_DBG; dram0 <= `DRAMSLOT_AVAIL; end else `endif if (!acki) begin bwhich <= 2'b00; dram0 <= `DRAMSLOT_HASBUS; cyc <= `HIGH; stb_o <= `HIGH; sel_o <= fnSelect(dram0_instr,dram0_addr); vadr <= {dram0_addr[AMSB:3],3'b0}; sr_o <= IsLWR(dram0_instr); ol_o <= dram0_ol; dccnt <= 2'd0; bstate <= B_DLoadAck; end end else if (~|wb_v && dram1_unc && dram1==`DRAMSLOT_BUSY && dram1_load && `NUM_MEM > 1 && !iqentry_stomp[dram1_id[`QBITS]]) begin `ifdef SUPPORT_DBG if (dbg_lmatch1) begin dramB_v <= `TRUE; dramB_id <= dram1_id; dramB_bus <= 64'h0; iqentry_exc[dram1_id[`QBITS]] <= `FLT_DBG; dram1 <= `DRAMSLOT_AVAIL; end else `endif if (!acki) begin bwhich <= 2'b01; dram1 <= `DRAMSLOT_HASBUS; cyc <= `HIGH; stb_o <= `HIGH; sel_o <= fnSelect(dram1_instr,dram1_addr); vadr <= {dram1_addr[AMSB:3],3'b0}; sr_o <= IsLWR(dram1_instr); ol_o <= dram1_ol; dccnt <= 2'd0; bstate <= B_DLoadAck; end end else if (~|wb_v && dram2_unc && dram2==`DRAMSLOT_BUSY && dram2_load && `NUM_MEM > 2 && !iqentry_stomp[dram2_id[`QBITS]]) begin `ifdef SUPPORT_DBG if (dbg_lmatch2) begin dramC_v <= `TRUE; dramC_id <= dram2_id; dramC_bus <= 64'h0; iqentry_exc[dram2_id[`QBITS]] <= `FLT_DBG; dram2 <= 2'd0; end else `endif if (!acki) begin bwhich <= 2'b10; dram2 <= `DRAMSLOT_HASBUS; cyc <= `HIGH; stb_o <= `HIGH; sel_o <= fnSelect(dram2_instr,dram2_addr); vadr <= {dram2_addr[AMSB:3],3'b0}; sr_o <= IsLWR(dram2_instr); ol_o <= dram2_ol; dccnt <= 2'd0; bstate <= B_DLoadAck; end end // Check for L2 cache miss else if (~|wb_v && !ihitL2 && !acki) begin bstate <= B_WaitIC; /* cti_o <= 3'b001; bte_o <= 2'b00;//2'b01; // 4 beat burst wrap cyc <= `HIGH; stb_o <= `HIGH; sel_o <= 8'hFF; icl_o <= `HIGH; iccnt <= 3'd0; icack <= 1'b0; // adr_o <= icwhich ? {pc0[31:5],5'b0} : {pc1[31:5],5'b0}; // L2_adr <= icwhich ? {pc0[31:5],5'b0} : {pc1[31:5],5'b0}; vadr <= {L1_adr[AMSB:5],5'h0}; `ifdef SUPPORT_SMT `else ol_o <= ol;//??? `endif L2_adr <= {L1_adr[AMSB:5],5'h0}; L2_xsel <= 1'b0; selL2 <= TRUE; bstate <= B_ICacheAck; */ end end B_WaitIC: begin cti_o <= icti; bte_o <= ibte; cyc <= icyc; stb_o <= istb; sel_o <= isel; vadr <= iadr; we <= 1'b0; if (L2_nxt) bstate <= BIDLE; end // Terminal state for a store operation. // Note that if only a single memory channel is selected, bwhich will be a // constant 0. This should cause the extra code to be removed. B_StoreAck: begin StoreAck1 <= `TRUE; isStore <= `TRUE; if (acki|err_i|tlb_miss|wrv_i) begin wb_nack(); cr_o <= 1'b0; // This isn't a good way of doing things; the state should be propagated // to the commit stage, however since this is a store we know there will // be no change of program flow. So the reservation status bit is set // here. The author wanted to avoid the complexity of propagating the // input signal to the commit stage. It does mean that the SWC // instruction should be surrounded by SYNC's. if (cr_o) sema[0] <= rbi_i; `ifdef HAS_WB for (n = 0; n < QENTRIES; n = n + 1) begin if (wbo_id[n]) begin iqentry_exc[n] <= tlb_miss ? `FLT_TLB : wrv_i ? `FLT_DWF : err_i ? `FLT_IBE : `FLT_NONE; if (err_i|wrv_i) begin wb_v <= 1'b0; // Invalidate write buffer if there is a problem with the store wb_en <= `FALSE; // and disable write buffer end iqentry_state[n] <= IQS_CMT; iqentry_aq[n] <= `INV; end end `else case(bwhich) 2'd0: begin dram0 <= `DRAMSLOT_AVAIL; iqentry_exc[dram0_id[`QBITS]] <= (wrv_i|err_i) ? `FLT_DWF : `FLT_NONE; iqentry_state[dram0_id[`QBITS]] <= IQS_CMT; iqentry_aq[ dram0_id[`QBITS] ] <= `INV; //iqentry_out[ dram0_id[`QBITS] ] <= `INV; end 2'd1: if (`NUM_MEM > 1) begin dram1 <= `DRAMSLOT_AVAIL; iqentry_exc[dram1_id[`QBITS]] <= (wrv_i|err_i) ? `FLT_DWF : `FLT_NONE; iqentry_state[dram1_id[`QBITS]] <= IQS_CMT; iqentry_aq[ dram1_id[`QBITS] ] <= `INV; //iqentry_out[ dram1_id[`QBITS] ] <= `INV; end 2'd2: if (`NUM_MEM > 2) begin dram2 <= `DRAMSLOT_AVAIL; iqentry_exc[dram2_id[`QBITS]] <= (wrv_i|err_i) ? `FLT_DWF : `FLT_NONE; iqentry_state[dram2_id[`QBITS]] <= IQS_CMT; iqentry_aq[ dram2_id[`QBITS] ] <= `INV; //iqentry_out[ dram2_id[`QBITS] ] <= `INV; end default: ; endcase `endif bstate <= B_LSNAck; end end B_DCacheLoadStart: if (~acki & ~cyc) begin // check for idle bus - it should be dccnt <= 2'd0; bstate <= B_DCacheLoadAck; cti_o <= 3'b001; // constant address burst bte_o <= 2'b00; // linear burst, non-wrapping cyc <= `HIGH; stb_o <= `HIGH; // Select should be selecting all byte lanes for a cache load sel_o <= 8'hFF; // bwhich should always be one of the three channels. // If single bit upset, continue to select channel zero when // there's only one available. case(bwhich) 2'd1: if (`NUM_MEM > 1) begin vadr <= {dram1_addr[AMSB:5],5'b0}; ol_o <= dram1_ol; if (iqentry_stomp[dram1_id[`QBITS]]) begin wb_nack(); dram1 <= `DRAMREQ_READY; bstate <= BIDLE; end end else begin vadr <= {dram0_addr[AMSB:5],5'b0}; ol_o <= dram0_ol; if (iqentry_stomp[dram0_id[`QBITS]]) begin wb_nack(); dram0 <= `DRAMREQ_READY; bstate <= BIDLE; end end 2'd2: if (`NUM_MEM > 2) begin vadr <= {dram2_addr[AMSB:5],5'b0}; ol_o <= dram2_ol; if (iqentry_stomp[dram2_id[`QBITS]]) begin wb_nack(); dram2 <= `DRAMREQ_READY; bstate <= BIDLE; end end else if (`NUM_MEM > 1) begin vadr <= {dram1_addr[AMSB:5],5'b0}; ol_o <= dram1_ol; if (iqentry_stomp[dram1_id[`QBITS]]) begin wb_nack(); dram1 <= `DRAMREQ_READY; bstate <= BIDLE; end end else begin vadr <= {dram0_addr[AMSB:5],5'b0}; ol_o <= dram0_ol; if (iqentry_stomp[dram0_id[`QBITS]]) begin wb_nack(); dram0 <= `DRAMREQ_READY; bstate <= BIDLE; end end default: begin vadr <= {dram0_addr[AMSB:5],5'b0}; ol_o <= dram0_ol; if (iqentry_stomp[dram0_id[`QBITS]]) begin wb_nack(); dram0 <= `DRAMREQ_READY; bstate <= BIDLE; end end endcase end // Data cache load terminal state B_DCacheLoadAck: begin dcsel <= 32'hFFFFFFFF; if (acki|err_i|tlb_miss|rdv_i) begin if (!bok_i) begin stb_o <= `LOW; bstate <= B_DCacheLoadStb; end errq <= errq | err_i; rdvq <= rdvq | rdv_i; if (!preload) // A preload instruction ignores any error if (dccnt==3'd3) case(bwhich) 2'd0: if (iqentry_stomp[dram0_id[`QBITS]]) iqentry_exc[dram0_id[`QBITS]] <= `FLT_NONE; else iqentry_exc[dram0_id[`QBITS]] <= tlb_miss ? `FLT_TLB : err_i ? `FLT_DBE : rdv_i ? `FLT_DRF : `FLT_NONE; 2'd1: if (iqentry_stomp[dram1_id[`QBITS]]) iqentry_exc[dram1_id[`QBITS]] <= `FLT_NONE; else iqentry_exc[dram1_id[`QBITS]] <= tlb_miss ? `FLT_TLB : err_i ? `FLT_DBE : rdv_i ? `FLT_DRF : `FLT_NONE; 2'd2: if (iqentry_stomp[dram2_id[`QBITS]]) iqentry_exc[dram2_id[`QBITS]] <= `FLT_NONE; else iqentry_exc[dram2_id[`QBITS]] <= tlb_miss ? `FLT_TLB : err_i ? `FLT_DBE : rdv_i ? `FLT_DRF : `FLT_NONE; default: if (iqentry_stomp[dram0_id[`QBITS]]) iqentry_exc[dram0_id[`QBITS]] <= `FLT_NONE; else iqentry_exc[dram0_id[`QBITS]] <= tlb_miss ? `FLT_TLB : err_i ? `FLT_DBE : rdv_i ? `FLT_DRF : `FLT_NONE; endcase case(dccnt) 2'd0: dcbuf[63:0] <= dat_i; 2'd1: dcbuf[127:64] <= dat_i; 2'd2: dcbuf[191:128] <= dat_i; 2'd3: dcbuf[255:192] <= dat_i; endcase dccnt <= dccnt + 2'd1; vadr[4:3] <= vadr[4:3] + 2'd1; if (dccnt==2'd2) cti_o <= 3'b111; if (dccnt==2'd3) begin wb_nack(); dcwr <= 1'b1; dcwait_ctr <= dcwait; bstate <= B_DCacheLoadWait; end end end B_DCacheLoadStb: begin stb_o <= `HIGH; bstate <= B_DCacheLoadAck; case(bwhich) 2'd0: if (iqentry_stomp[dram0_id[`QBITS]]) begin wb_nack(); dram0 <= `DRAMREQ_READY; bstate <= BIDLE; end 2'd1: if (iqentry_stomp[dram1_id[`QBITS]]) begin wb_nack(); dram1 <= `DRAMREQ_READY; bstate <= BIDLE; end 2'd2: if (iqentry_stomp[dram2_id[`QBITS]]) begin wb_nack(); dram2 <= `DRAMREQ_READY; bstate <= BIDLE; end default: if (iqentry_stomp[dram0_id[`QBITS]]) begin wb_nack(); dram0 <= `DRAMREQ_READY; bstate <= BIDLE; end endcase end B_DCacheLoadWait: begin dcsel <= 32'h0; dcwr <= 1'b0; dcwait_ctr <= dcwait_ctr - 4'd1; if (dcwait_ctr[3]) // detect underflow bstate <= B_DCacheLoadResetBusy; end // There could be more than one memory cycle active. We reset the state // of the other machines to retest for a hit because otherwise sequential // loading of memory will cause successive machines to miss resulting in // multiple dcache loads that aren't needed. B_DCacheLoadResetBusy: begin if (`NUM_MEM > 1) case(bwhich) 2'b01: begin dram1 <= `DRAMREQ_READY; if (dram0 != `DRAMSLOT_AVAIL && dram0_addr[AMSB:5]==vadr[AMSB:5]) dram0 <= `DRAMSLOT_BUSY; // causes retest of dhit if (dram2 != `DRAMSLOT_AVAIL && dram2_addr[AMSB:5]==vadr[AMSB:5]) dram2 <= `DRAMSLOT_BUSY; end 2'b10: if (`NUM_MEM > 2) begin dram2 <= `DRAMREQ_READY; if (dram0 != `DRAMSLOT_AVAIL && dram0_addr[AMSB:5]==vadr[AMSB:5]) dram0 <= `DRAMSLOT_BUSY; // causes retest of dhit if (dram1 != `DRAMSLOT_AVAIL && dram1_addr[AMSB:5]==vadr[AMSB:5]) dram1 <= `DRAMSLOT_BUSY; end else begin dram0 <= `DRAMREQ_READY; if (dram1 != `DRAMSLOT_AVAIL && dram1_addr[AMSB:5]==vadr[AMSB:5]) dram1 <= `DRAMSLOT_BUSY; if (dram2 != `DRAMSLOT_AVAIL && dram2_addr[AMSB:5]==vadr[AMSB:5]) dram2 <= `DRAMSLOT_BUSY; end default: begin dram0 <= `DRAMREQ_READY; if (dram1 != `DRAMSLOT_AVAIL && dram1_addr[AMSB:5]==vadr[AMSB:5]) dram1 <= `DRAMSLOT_BUSY; if (dram2 != `DRAMSLOT_AVAIL && dram2_addr[AMSB:5]==vadr[AMSB:5]) dram2 <= `DRAMSLOT_BUSY; end endcase else begin dram0 <= `DRAMREQ_READY; end bstate <= BIDLE; end B_RMWAck: if (acki|err_i|tlb_miss|rdv_i) begin if (isCAS) begin iqentry_res [ casid[`QBITS] ] <= (dat_i == cas); iqentry_exc [ casid[`QBITS] ] <= tlb_miss ? `FLT_TLB : err_i ? `FLT_DRF : rdv_i ? `FLT_DRF : `FLT_NONE; // iqentry_done[ casid[`QBITS] ] <= `VAL; // iqentry_out [ casid[`QBITS] ] <= `INV; iqentry_state [ casid[`QBITS] ] <= IQS_DONE; iqentry_instr[ casid[`QBITS]] <= `NOP_INSN; if (err_i | rdv_i) iqentry_ma[casid[`QBITS]] <= vadr; if (dat_i == cas) begin stb_o <= `LOW; we <= `TRUE; bstate <= B15; check_abort_load(); end else begin cas <= dat_i; cyc <= `LOW; stb_o <= `LOW; case(bwhich) 2'b00: dram0 <= `DRAMREQ_READY; 2'b01: dram1 <= `DRAMREQ_READY; 2'b10: dram2 <= `DRAMREQ_READY; default: ; endcase bstate <= B_LSNAck; check_abort_load(); end end else if (isRMW) begin rmw_instr <= iqentry_instr[casid[`QBITS]]; rmw_argA <= dat_i; if (isSpt) begin rmw_argB <= 64'd1 << iqentry_a1[casid[`QBITS]][63:58]; rmw_argC <= iqentry_instr[casid[`QBITS]][5:0]==`R2 ? iqentry_a3[casid[`QBITS]][64] << iqentry_a1[casid[`QBITS]][63:58] : iqentry_a2[casid[`QBITS]][64] << iqentry_a1[casid[`QBITS]][63:58]; end else if (isInc) begin rmw_argB <= iqentry_instr[casid[`QBITS]][5:0]==`R2 ? {{59{iqentry_instr[casid[`QBITS]][22]}},iqentry_instr[casid[`QBITS]][22:18]} : {{59{iqentry_instr[casid[`QBITS]][17]}},iqentry_instr[casid[`QBITS]][17:13]}; end else begin // isAMO iqentry_res [ casid[`QBITS] ] <= dat_i; rmw_argB <= iqentry_instr[casid[`QBITS]][31] ? {{59{iqentry_instr[casid[`QBITS]][20:16]}},iqentry_instr[casid[`QBITS]][20:16]} : iqentry_a2[casid[`QBITS]]; end iqentry_exc [ casid[`QBITS] ] <= tlb_miss ? `FLT_TLB : err_i ? `FLT_DRF : rdv_i ? `FLT_DRF : `FLT_NONE; stb_o <= `LOW; bstate <= B20; check_abort_load(); end end // Regular load B_DLoadAck: if (acki|err_i|tlb_miss|rdv_i) begin wb_nack(); sr_o <= `LOW; case(dccnt) 2'd0: xdati[63:0] <= dat_i; 2'd1: xdati[127:64] <= dat_i; endcase case(bwhich) 2'b00: begin if (dram0_memsize==hexi) begin if (dccnt==2'd1) begin dram0 <= `DRAMREQ_READY; iqentry_seg_base[dram0_id[`QBITS]] <= xdati[63:0]; iqentry_seg_acr[dram0_id[`QBITS]] <= dat_i; end else begin dccnt <= dccnt + 2'd1; cyc <= `HIGH; sel_o <= 8'hFF; vadr <= vadr + 64'd8; bstate <= B_DLoadNack; end end else dram0 <= `DRAMREQ_READY; if (iqentry_stomp[dram0_id[`QBITS]]) iqentry_exc [dram0_id[`QBITS]] <= `FLT_NONE; else iqentry_exc [ dram0_id[`QBITS] ] <= tlb_miss ? `FLT_TLB : err_i ? `FLT_DRF : rdv_i ? `FLT_DRF : `FLT_NONE; end 2'b01: if (`NUM_MEM > 1) begin dram1 <= `DRAMREQ_READY; if (iqentry_stomp[dram1_id[`QBITS]]) iqentry_exc [dram1_id[`QBITS]] <= `FLT_NONE; else iqentry_exc [ dram1_id[`QBITS] ] <= tlb_miss ? `FLT_TLB : err_i ? `FLT_DRF : rdv_i ? `FLT_DRF : `FLT_NONE; end 2'b10: if (`NUM_MEM > 2) begin dram2 <= `DRAMREQ_READY; if (iqentry_stomp[dram2_id[`QBITS]]) iqentry_exc [dram2_id[`QBITS]] <= `FLT_NONE; else iqentry_exc [ dram2_id[`QBITS] ] <= tlb_miss ? `FLT_TLB : err_i ? `FLT_DRF : rdv_i ? `FLT_DRF : `FLT_NONE; end default: ; endcase bstate <= B_LSNAck; check_abort_load(); end B_DLoadNack: if (~acki) begin stb_o <= `HIGH; bstate <= B_DLoadAck; check_abort_load(); end // Three cycles to detemrine if there's a cache hit during a store. B16: begin case(bwhich) 2'd0: if (dhit0) begin dram0 <= `DRAMREQ_READY; bstate <= B17; end 2'd1: if (dhit1) begin dram1 <= `DRAMREQ_READY; bstate <= B17; end 2'd2: if (dhit2) begin dram2 <= `DRAMREQ_READY; bstate <= B17; end default: bstate <= BIDLE; endcase check_abort_load(); end B17: begin bstate <= B18; check_abort_load(); end B18: begin bstate <= B_LSNAck; check_abort_load(); end B_LSNAck: begin bstate <= BIDLE; StoreAck1 <= `FALSE; isStore <= `FALSE; check_abort_load(); end B20: if (~acki) begin stb_o <= `HIGH; we <= `HIGH; dat_o <= fnDato(rmw_instr,rmw_res); bstate <= B_StoreAck; check_abort_load(); end B21: if (~acki) begin stb_o <= `HIGH; bstate <= B_RMWAck; check_abort_load(); end default: bstate <= BIDLE; endcase if (!branchmiss) begin case({fetchbuf0_v, fetchbuf1_v}) 2'b00: ; 2'b01: if (canq1) begin tail0 <= (tail0+2'd1) % QENTRIES; tail1 <= (tail1+2'd1) % QENTRIES; end 2'b10: if (canq1) begin tail0 <= (tail0+2'd1) % QENTRIES; tail1 <= (tail1+2'd1) % QENTRIES; end 2'b11: if (canq1) begin if (IsBranch(fetchbuf0_instr) && predict_taken0 && fetchbuf0_thrd==fetchbuf1_thrd) begin tail0 <= (tail0+2'd1) % QENTRIES; tail1 <= (tail1+2'd1) % QENTRIES; end else begin if (vqe0 < vl || !IsVector(fetchbuf0_instr)) begin if (canq2) begin tail0 <= (tail0 + 3'd2) % QENTRIES; tail1 <= (tail1 + 3'd2) % QENTRIES; end else begin // queued1 will be true tail0 <= (tail0+2'd1) % QENTRIES; tail1 <= (tail1+2'd1) % QENTRIES; end end end end endcase end else if (!thread_en) begin // if branchmiss for (n = QENTRIES-1; n >= 0; n = n - 1) // (QENTRIES-1) is needed to ensure that n increments forwards so that the modulus is // a positive number. if (iqentry_stomp[n] & ~iqentry_stomp[(n+(QENTRIES-1))%QENTRIES]) begin tail0 <= n; tail1 <= (n + 1) % QENTRIES; end // otherwise, it is the last instruction in the queue that has been mispredicted ... do nothing end // #5 rf[0] = 0; rf_v[0] = 1; rf_source[0] = 0; `ifdef SIM $display("\n\n\n\n\n\n\n\n"); $display("TIME %0d", $time); $display("%h #", pc0); `ifdef SUPPORT_SMT $display ("Regfile: %d", rgs[0]); for (n=0; n < 32; n=n+4) begin $display("%d: %h %d %o %d: %h %d %o %d: %h %d %o %d: %h %d %o#", n[4:0]+0, urf1.urf10.mem[{rgs[0],1'b0,n[4:2],2'b00}], regIsValid[n+0], rf_source[n+0], n[4:0]+1, urf1.urf10.mem[{rgs[0],1'b0,n[4:2],2'b01}], regIsValid[n+1], rf_source[n+1], n[4:0]+2, urf1.urf10.mem[{rgs[0],1'b0,n[4:2],2'b10}], regIsValid[n+2], rf_source[n+2], n[4:0]+3, urf1.urf10.mem[{rgs[0],1'b0,n[4:2],2'b11}], regIsValid[n+3], rf_source[n+3] ); end $display ("Regfile: %d", rgs[1]); for (n=128; n < 160; n=n+4) begin $display("%d: %h %d %o %d: %h %d %o %d: %h %d %o %d: %h %d %o#", n[4:0]+0, urf1.urf10.mem[{rgs[1],1'b0,n[4:2],2'b00}], regIsValid[n+0], rf_source[n+0], n[4:0]+1, urf1.urf10.mem[{rgs[1],1'b0,n[4:2],2'b01}], regIsValid[n+1], rf_source[n+1], n[4:0]+2, urf1.urf10.mem[{rgs[1],1'b0,n[4:2],2'b10}], regIsValid[n+2], rf_source[n+2], n[4:0]+3, urf1.urf10.mem[{rgs[1],1'b0,n[4:2],2'b11}], regIsValid[n+3], rf_source[n+3] ); end `else $display ("Regfile: %d", rgs); for (n=0; n < 32; n=n+4) begin $display("%d: %h %d %o %d: %h %d %o %d: %h %d %o %d: %h %d %o#", n[4:0]+0, gRegfileInst.gb1.urf1.urf10.mem[{rgs,1'b0,n[4:2],2'b00}], regIsValid[n+0], rf_source[n+0], n[4:0]+1, gRegfileInst.gb1.urf1.urf10.mem[{rgs,1'b0,n[4:2],2'b01}], regIsValid[n+1], rf_source[n+1], n[4:0]+2, gRegfileInst.gb1.urf1.urf10.mem[{rgs,1'b0,n[4:2],2'b10}], regIsValid[n+2], rf_source[n+2], n[4:0]+3, gRegfileInst.gb1.urf1.urf10.mem[{rgs,1'b0,n[4:2],2'b11}], regIsValid[n+3], rf_source[n+3] ); end `endif `ifdef FCU_ENH $display("Call Stack:"); for (n = 0; n < 16; n = n + 4) $display("%c%d: %h %c%d: %h %c%d: %h %c%d: %h", gFetchbufInst.gb1.ufb1.ursb1.rasp==n+0 ?">" : " ", n[4:0]+0, gFetchbufInst.gb1.ufb1.ursb1.ras[n+0], gFetchbufInst.gb1.ufb1.ursb1.rasp==n+1 ?">" : " ", n[4:0]+1, gFetchbufInst.gb1.ufb1.ursb1.ras[n+1], gFetchbufInst.gb1.ufb1.ursb1.rasp==n+2 ?">" : " ", n[4:0]+2, gFetchbufInst.gb1.ufb1.ursb1.ras[n+2], gFetchbufInst.gb1.ufb1.ursb1.rasp==n+3 ?">" : " ", n[4:0]+3, gFetchbufInst.gb1.ufb1.ursb1.ras[n+3] ); $display("\n"); `endif // $display("Return address stack:"); // for (n = 0; n < 16; n = n + 1) // $display("%d %h", rasp+n[3:0], ras[rasp+n[3:0]]); $display("TakeBr:%d #", take_branch);//, backpc); $display("Insn%d: %h", 0, insn0); if (`WAYS==1) begin $display("%c%c A: %d %h %h #", 45, fetchbuf?45:62, fetchbufA_v, fetchbufA_instr, fetchbufA_pc); $display("%c%c B: %d %h %h #", 45, fetchbuf?62:45, fetchbufB_v, fetchbufB_instr, fetchbufB_pc); end else if (`WAYS > 1) begin $display("Insn%d: %h", 1, insn1); $display("%c%c A: %d %h %h #", 45, fetchbuf?45:62, fetchbufA_v, fetchbufA_instr, fetchbufA_pc); $display("%c%c B: %d %h %h #", 45, fetchbuf?45:62, fetchbufB_v, fetchbufB_instr, fetchbufB_pc); end else if (`WAYS > 2) begin $display("%c%c C: %d %h %h #", 45, fetchbuf?62:45, fetchbufC_v, fetchbufC_instr, fetchbufC_pc); $display("%c%c D: %d %h %h #", 45, fetchbuf?62:45, fetchbufD_v, fetchbufD_instr, fetchbufD_pc); end for (i=0; i<QENTRIES; i=i+1) $display("%c%c %d: %c%c%c%c %d %d %c%c %c %c%h %d %o %h %h %h %d %o %h %d %o %h %d %o %d:%h %h %d#", (i[`QBITS]==heads[0])?"C":".", (i[`QBITS]==tail0)?"Q":".", i[`QBITS], iqentry_state[i]==IQS_INVALID ? "-" : iqentry_state[i]==IQS_QUEUED ? "Q" : iqentry_state[i]==IQS_OUT ? "O" : iqentry_state[i]==IQS_AGEN ? "A" : iqentry_state[i]==IQS_MEM ? "M" : iqentry_state[i]==IQS_DONE ? "D" : iqentry_state[i]==IQS_CMT ? "C" : "?", // iqentry_v[i] ? "v" : "-", iqentry_iv[i] ? "I" : "-", iqentry_done[i]?"d":"-", iqentry_out[i]?"o":"-", iqentry_bt[i], iqentry_memissue[i], iqentry_agen[i] ? "a": "-", iqentry_alu0_issue[i]?"0":iqentry_alu1_issue[i]?"1":"-", iqentry_stomp[i]?"s":"-", iqentry_fc[i] ? "F" : iqentry_mem[i] ? "M" : (iqentry_alu[i]==1'b1) ? "a" : (iqentry_alu[i]==1'bx) ? "X" : iqentry_fpu[i] ? "f" : "O", iqentry_instr[i], iqentry_tgt[i][4:0], iqentry_exc[i], iqentry_res[i], iqentry_a0[i], iqentry_a1[i], iqentry_a1_v[i], iqentry_a1_s[i], iqentry_a2[i], iqentry_a2_v[i], iqentry_a2_s[i], iqentry_a3[i], iqentry_a3_v[i], iqentry_a3_s[i], iqentry_thrd[i], iqentry_pc[i], iqentry_sn[i], iqentry_ven[i] ); $display("DRAM"); $display("%d %h %h %c%h %o #", dram0, dram0_addr, dram0_data, (IsFlowCtrl(dram0_instr) ? 98 : (IsMem(dram0_instr)) ? 109 : 97), dram0_instr, dram0_id); if (`NUM_MEM > 1) $display("%d %h %h %c%h %o #", dram1, dram1_addr, dram1_data, (IsFlowCtrl(dram1_instr) ? 98 : (IsMem(dram1_instr)) ? 109 : 97), dram1_instr, dram1_id); if (`NUM_MEM > 2) $display("%d %h %h %c%h %o #", dram2, dram2_addr, dram2_data, (IsFlowCtrl(dram2_instr) ? 98 : (IsMem(dram2_instr)) ? 109 : 97), dram2_instr, dram2_id); $display("%d %h %o #", dramA_v, dramA_bus, dramA_id); if (`NUM_MEM > 1) $display("%d %h %o #", dramB_v, dramB_bus, dramB_id); if (`NUM_MEM > 2) $display("%d %h %o #", dramC_v, dramC_bus, dramC_id); $display("ALU"); $display("%d %h %h %h %c%h %o %h #", alu0_dataready, alu0_argI, alu0_argA, alu0_argB, (IsFlowCtrl(alu0_instr) ? 98 : IsMem(alu0_instr) ? 109 : 97), alu0_instr, alu0_sourceid, alu0_pc); $display("%d %h %o 0 #", alu0_v, alu0_bus, alu0_id); if (`NUM_ALU > 1) begin $display("%d %h %h %h %c%h %o %h #", alu1_dataready, alu1_argI, alu1_argA, alu1_argB, (IsFlowCtrl(alu1_instr) ? 98 : IsMem(alu1_instr) ? 109 : 97), alu1_instr, alu1_sourceid, alu1_pc); $display("%d %h %o 0 #", alu1_v, alu1_bus, alu1_id); end $display("FCU"); $display("%d %h %h %h %h %c%c #", fcu_v, fcu_bus, fcu_argI, fcu_argA, fcu_argB, fcu_takb?"T":"-", fcu_pt?"T":"-"); $display("%c %h %h %h %h #", fcu_branchmiss?"m":" ", fcu_sourceid, fcu_misspc, fcu_nextpc, fcu_brdisp); $display("Commit"); $display("0: %c %h %o %d #", commit0_v?"v":" ", commit0_bus, commit0_id, commit0_tgt[4:0]); $display("1: %c %h %o %d #", commit1_v?"v":" ", commit1_bus, commit1_id, commit1_tgt[4:0]); $display("instructions committed: %d valid committed: %d ticks: %d ", CC, I, tick); $display("Write Buffer:"); for (n = `WB_DEPTH-1; n >= 0; n = n - 1) $display("%c adr: %h dat: %h", wb_v[n]?" ":"*", wb_addr[n], wb_data[n]); $display("Write merges: %d", wb_merges); `endif // SIM // // $display("\n\n\n\n\n\n\n\n"); // $display("TIME %0d", $time); // $display(" pc0=%h", pc0); // $display(" pc1=%h", pc1); // $display(" reg0=%h, v=%d, src=%o", rf[0], rf_v[0], rf_source[0]); // $display(" reg1=%h, v=%d, src=%o", rf[1], rf_v[1], rf_source[1]); // $display(" reg2=%h, v=%d, src=%o", rf[2], rf_v[2], rf_source[2]); // $display(" reg3=%h, v=%d, src=%o", rf[3], rf_v[3], rf_source[3]); // $display(" reg4=%h, v=%d, src=%o", rf[4], rf_v[4], rf_source[4]); // $display(" reg5=%h, v=%d, src=%o", rf[5], rf_v[5], rf_source[5]); // $display(" reg6=%h, v=%d, src=%o", rf[6], rf_v[6], rf_source[6]); // $display(" reg7=%h, v=%d, src=%o", rf[7], rf_v[7], rf_source[7]); // $display("Fetch Buffers:"); // $display(" %c%c fbA: v=%d instr=%h pc=%h %c%c fbC: v=%d instr=%h pc=%h", // fetchbuf?32:45, fetchbuf?32:62, fetchbufA_v, fetchbufA_instr, fetchbufA_pc, // fetchbuf?45:32, fetchbuf?62:32, fetchbufC_v, fetchbufC_instr, fetchbufC_pc); // $display(" %c%c fbB: v=%d instr=%h pc=%h %c%c fbD: v=%d instr=%h pc=%h", // fetchbuf?32:45, fetchbuf?32:62, fetchbufB_v, fetchbufB_instr, fetchbufB_pc, // fetchbuf?45:32, fetchbuf?62:32, fetchbufD_v, fetchbufD_instr, fetchbufD_pc); // $display(" branchback=%d backpc=%h", branchback, backpc); // $display("Instruction Queue:"); // for (i=0; i<8; i=i+1) // $display(" %c%c%d: v=%d done=%d out=%d agen=%d res=%h op=%d bt=%d tgt=%d a1=%h (v=%d/s=%o) a2=%h (v=%d/s=%o) im=%h pc=%h exc=%h", // (i[`QBITS]==heads[0])?72:32, (i[`QBITS]==tail0)?84:32, i, // iqentry_v[i], iqentry_done[i], iqentry_out[i], iqentry_agen[i], iqentry_res[i], iqentry_op[i], // iqentry_bt[i], iqentry_tgt[i], iqentry_a1[i], iqentry_a1_v[i], iqentry_a1_s[i], iqentry_a2[i], iqentry_a2_v[i], // iqentry_a2_s[i], iqentry_a0[i], iqentry_pc[i], iqentry_exc[i]); // $display("Scheduling Status:"); // $display(" iqentry0 issue=%d islot=%d stomp=%d source=%d - memready=%d memissue=%b", // iqentry_0_issue, iqentry_0_islot, iqentry_stomp[0], iqentry_source[0], iqentry_memready[0], iqentry_memissue[0]); // $display(" iqentry1 issue=%d islot=%d stomp=%d source=%d - memready=%d memissue=%b", // iqentry_1_issue, iqentry_1_islot, iqentry_stomp[1], iqentry_source[1], iqentry_memready[1], iqentry_memissue[1]); // $display(" iqentry2 issue=%d islot=%d stomp=%d source=%d - memready=%d memissue=%b", // iqentry_2_issue, iqentry_2_islot, iqentry_stomp[2], iqentry_source[2], iqentry_memready[2], iqentry_memissue[2]); // $display(" iqentry3 issue=%d islot=%d stomp=%d source=%d - memready=%d memissue=%b", // iqentry_3_issue, iqentry_3_islot, iqentry_stomp[3], iqentry_source[3], iqentry_memready[3], iqentry_memissue[3]); // $display(" iqentry4 issue=%d islot=%d stomp=%d source=%d - memready=%d memissue=%b", // iqentry_4_issue, iqentry_4_islot, iqentry_stomp[4], iqentry_source[4], iqentry_memready[4], iqentry_memissue[4]); // $display(" iqentry5 issue=%d islot=%d stomp=%d source=%d - memready=%d memissue=%b", // iqentry_5_issue, iqentry_5_islot, iqentry_stomp[5], iqentry_source[5], iqentry_memready[5], iqentry_memissue[5]); // $display(" iqentry6 issue=%d islot=%d stomp=%d source=%d - memready=%d memissue=%b", // iqentry_6_issue, iqentry_6_islot, iqentry_stomp[6], iqentry_source[6], iqentry_memready[6], iqentry_memissue[6]); // $display(" iqentry7 issue=%d islot=%d stomp=%d source=%d - memready=%d memissue=%b", // iqentry_7_issue, iqentry_7_islot, iqentry_stomp[7], iqentry_source[7], iqentry_memready[7], iqentry_memissue[7]); // $display("ALU Inputs:"); // $display(" 0: avail=%d data=%d id=%o op=%d a1=%h a2=%h im=%h bt=%d", // alu0_available, alu0_dataready, alu0_sourceid, alu0_op, alu0_argA, // alu0_argB, alu0_argI, alu0_bt); // $display(" 1: avail=%d data=%d id=%o op=%d a1=%h a2=%h im=%h bt=%d", // alu1_available, alu1_dataready, alu1_sourceid, alu1_op, alu1_argA, // alu1_argB, alu1_argI, alu1_bt); // $display("ALU Outputs:"); // $display(" 0: v=%d bus=%h id=%o bmiss=%d misspc=%h missid=%o", // alu0_v, alu0_bus, alu0_id, alu0_branchmiss, alu0_misspc, alu0_sourceid); // $display(" 1: v=%d bus=%h id=%o bmiss=%d misspc=%h missid=%o", // alu1_v, alu1_bus, alu1_id, alu1_branchmiss, alu1_misspc, alu1_sourceid); // $display("DRAM Status:"); // $display(" OUT: v=%d data=%h tgt=%d id=%o", dram_v, dram_bus, dram_tgt, dram_id); // $display(" dram0: status=%h addr=%h data=%h op=%d tgt=%d id=%o", // dram0, dram0_addr, dram0_data, dram0_op, dram0_tgt, dram0_id); // $display(" dram1: status=%h addr=%h data=%h op=%d tgt=%d id=%o", // dram1, dram1_addr, dram1_data, dram1_op, dram1_tgt, dram1_id); // $display(" dram2: status=%h addr=%h data=%h op=%d tgt=%d id=%o", // dram2, dram2_addr, dram2_data, dram2_op, dram2_tgt, dram2_id); // $display("Commit Buses:"); // $display(" 0: v=%d id=%o data=%h", commit0_v, commit0_id, commit0_bus); // $display(" 1: v=%d id=%o data=%h", commit1_v, commit1_id, commit1_bus); // // $display("Memory Contents:"); // for (j=0; j<64; j=j+16) // $display(" %h %h %h %h %h %h %h %h %h %h %h %h %h %h %h %h", // m[j+0], m[j+1], m[j+2], m[j+3], m[j+4], m[j+5], m[j+6], m[j+7], // m[j+8], m[j+9], m[j+10], m[j+11], m[j+12], m[j+13], m[j+14], m[j+15]); $display(""); if (|panic) begin $display(""); $display("-----------------------------------------------------------------"); $display("-----------------------------------------------------------------"); $display("--------------- PANIC:%s -----------------", message[panic]); $display("-----------------------------------------------------------------"); $display("-----------------------------------------------------------------"); $display(""); $display("instructions committed: %d", I); $display("total execution cycles: %d", $time / 10); $display(""); end if (|panic && ~outstanding_stores) begin $finish; end /* for (n = 0; n < QENTRIES; n = n + 1) if (branchmiss) begin if (!setpred[n]) begin iqentry_instr[n][`INSTRUCTION_OP] <= `NOP; iqentry_done[n] <= iqentry_v[n]; iqentry_cmt[n] <= iqentry_v[n]; end end */ rf_source[ 0] <= {`QBIT{1'b1}}; rf_source[32] <= {`QBIT{1'b1}}; rf_source[64] <= {`QBIT{1'b1}}; rf_source[96] <= {`QBIT{1'b1}}; `ifdef SUPPORTSMT rf_source[128] <= {`QBIT{1'b1}}; rf_source[160] <= {`QBIT{1'b1}}; rf_source[192] <= {`QBIT{1'b1}}; rf_source[224] <= {`QBIT{1'b1}}; `endif end // clock domain /* always @(posedge clk) if (rst) begin tail0 <= 3'd0; tail1 <= 3'd1; end else begin if (!branchmiss) begin case({fetchbuf0_v, fetchbuf1_v}) 2'b00: ; 2'b01: if (canq1) begin tail0 <= idp1(tail0); tail1 <= idp1(tail1); end 2'b10: if (canq1) begin tail0 <= idp1(tail0); tail1 <= idp1(tail1); end 2'b11: if (canq1) begin if (IsBranch(fetchbuf0_instr) && predict_taken0) begin tail0 <= idp1(tail0); tail1 <= idp1(tail1); end else begin if (vqe < vl || !IsVector(fetchbuf0_instr)) begin if (canq2) begin tail0 <= idp2(tail0); tail1 <= idp2(tail1); end else begin // queued1 will be true tail0 <= idp1(tail0); tail1 <= idp1(tail1); end end end end endcase end else begin // if branchmiss if (iqentry_stomp[0] & ~iqentry_stomp[7]) begin tail0 <= 3'd0; tail1 <= 3'd1; end else if (iqentry_stomp[1] & ~iqentry_stomp[0]) begin tail0 <= 3'd1; tail1 <= 3'd2; end else if (iqentry_stomp[2] & ~iqentry_stomp[1]) begin tail0 <= 3'd2; tail1 <= 3'd3; end else if (iqentry_stomp[3] & ~iqentry_stomp[2]) begin tail0 <= 3'd3; tail1 <= 3'd4; end else if (iqentry_stomp[4] & ~iqentry_stomp[3]) begin tail0 <= 3'd4; tail1 <= 3'd5; end else if (iqentry_stomp[5] & ~iqentry_stomp[4]) begin tail0 <= 3'd5; tail1 <= 3'd6; end else if (iqentry_stomp[6] & ~iqentry_stomp[5]) begin tail0 <= 3'd6; tail1 <= 3'd7; end else if (iqentry_stomp[7] & ~iqentry_stomp[6]) begin tail0 <= 3'd7; tail1 <= 3'd0; end // otherwise, it is the last instruction in the queue that has been mispredicted ... do nothing end end */ assign exc_o = iqentry_exc[heads[0]][7:0]; task check_abort_load; begin case(bwhich) 2'd0: if (iqentry_stomp[dram0_id[`QBITS]]) begin bstate <= BIDLE; dram0 <= `DRAMREQ_READY; end 2'd1: if (iqentry_stomp[dram1_id[`QBITS]]) begin bstate <= BIDLE; dram1 <= `DRAMREQ_READY; end 2'd2: if (iqentry_stomp[dram2_id[`QBITS]]) begin bstate <= BIDLE; dram2 <= `DRAMREQ_READY; end default: if (iqentry_stomp[dram0_id[`QBITS]]) begin bstate <= BIDLE; dram0 <= `DRAMREQ_READY; end endcase end endtask // Update the write buffer. task wb_update; input [`QBITS] id; input rmw; input [7:0] sel; input [1:0] ol; input [`ABITS] addr; input [63:0] data; input [2:0] wbptr; begin if (wbm && wbptr > 1 && wb_addr[wbptr-1][AMSB:3]==addr[AMSB:3] && wb_ol[wbptr-1]==ol && wb_rmw[wbptr-1]==rmw && wb_v[wbptr-1]) begin // The write buffer is always shifted during the bus IDLE state. That means // the data is out of place by a slot. The slot the data is moved from is // invalidated. wb_v[wbptr-2] <= `INV; wb_v[wbptr-1] <= wb_en; wb_id[wbptr-1] <= wb_id[wbptr-1] | (16'd1 << id); wb_rmw[wbptr-1] <= rmw; wb_ol[wbptr-1] <= ol; wb_sel[wbptr-1] <= wb_sel[wbptr-1] | sel; wb_addr[wbptr-1] <= wb_addr[wbptr-1]; wb_data[wbptr-1] <= wb_data[wbptr-1]; if (sel[0]) wb_data[wbptr-1][ 7: 0] <= data[ 7: 0]; if (sel[1]) wb_data[wbptr-1][15: 8] <= data[15: 8]; if (sel[2]) wb_data[wbptr-1][23:16] <= data[23:16]; if (sel[3]) wb_data[wbptr-1][31:24] <= data[31:24]; if (sel[4]) wb_data[wbptr-1][39:32] <= data[39:32]; if (sel[5]) wb_data[wbptr-1][47:40] <= data[47:40]; if (sel[6]) wb_data[wbptr-1][55:48] <= data[55:48]; if (sel[7]) wb_data[wbptr-1][63:56] <= data[63:56]; wb_merges <= wb_merges + 32'd1; end else begin wb_v[wbptr] <= wb_en; wb_id[wbptr] <= (16'd1 << id); wb_rmw[wbptr] <= rmw; wb_ol[wbptr] <= ol; wb_sel[wbptr] <= sel; wb_addr[wbptr] <= {addr[AMSB:3],3'b0}; wb_data[wbptr] <= data; end end endtask // Increment the head pointers // Also increments the instruction counter // Used when instructions are committed. // Also clear any outstanding state bits that foul things up. // task head_inc; input [`QBITS] amt; begin for (n = 0; n < QENTRIES; n = n + 1) heads[n] <= (heads[n] + amt) % QENTRIES; CC <= CC + amt; if (amt==3'd3) begin I = I + iqentry_v[heads[0]] + iqentry_v[heads[1]] + iqentry_v[heads[2]]; iqentry_state[heads[0]] <= IQS_INVALID; iqentry_state[heads[1]] <= IQS_INVALID; iqentry_state[heads[2]] <= IQS_INVALID; iqentry_mem[heads[0]] <= `FALSE; iqentry_mem[heads[1]] <= `FALSE; iqentry_mem[heads[2]] <= `FALSE; iqentry_iv[heads[0]] <= `INV; iqentry_iv[heads[1]] <= `INV; iqentry_iv[heads[2]] <= `INV; iqentry_alu[heads[0]] <= `FALSE; iqentry_alu[heads[1]] <= `FALSE; iqentry_alu[heads[2]] <= `FALSE; for (n = 0; n < QENTRIES; n = n + 1) if (iqentry_v[n]) iqentry_sn[n] <= iqentry_sn[n] - (iqentry_v[heads[2]] ? iqentry_sn[heads[2]] : iqentry_v[heads[1]] ? iqentry_sn[heads[1]] : iqentry_v[heads[0]] ? iqentry_sn[heads[0]] : 4'b0); end else if (amt==3'd2) begin I = I + iqentry_v[heads[0]] + iqentry_v[heads[1]]; iqentry_state[heads[0]] <= IQS_INVALID; iqentry_state[heads[1]] <= IQS_INVALID; iqentry_mem[heads[0]] <= `FALSE; iqentry_mem[heads[1]] <= `FALSE; iqentry_iv[heads[0]] <= `INV; iqentry_iv[heads[1]] <= `INV; iqentry_alu[heads[0]] <= `FALSE; iqentry_alu[heads[1]] <= `FALSE; for (n = 0; n < QENTRIES; n = n + 1) if (iqentry_v[n]) iqentry_sn[n] <= iqentry_sn[n] - (iqentry_v[heads[1]] ? iqentry_sn[heads[1]] : iqentry_v[heads[0]] ? iqentry_sn[heads[0]] : 4'b0); end else if (amt==3'd1) begin I = I + iqentry_v[heads[0]]; iqentry_state[heads[0]] <= IQS_INVALID; iqentry_mem[heads[0]] <= `FALSE; iqentry_iv[heads[0]] <= `INV; iqentry_alu[heads[0]] <= `FALSE; for (n = 0; n < QENTRIES; n = n + 1) if (iqentry_v[n]) iqentry_sn[n] <= iqentry_sn[n] - (iqentry_v[heads[0]] ? iqentry_sn[heads[0]] : 4'b0); end end endtask task setargs; input [`QBITS] nn; input [`QBITSP1] id; input v; input [63:0] bus; begin if (iqentry_a1_v[nn] == `INV && iqentry_a1_s[nn] == id && iqentry_v[nn] == `VAL && v == `VAL) begin iqentry_a1[nn] <= bus; iqentry_a1_v[nn] <= `VAL; end if (iqentry_a2_v[nn] == `INV && iqentry_a2_s[nn] == id && iqentry_v[nn] == `VAL && v == `VAL) begin iqentry_a2[nn] <= bus; iqentry_a2_v[nn] <= `VAL; end if (iqentry_a3_v[nn] == `INV && iqentry_a3_s[nn] == id && iqentry_v[nn] == `VAL && v == `VAL) begin iqentry_a3[nn] <= bus; iqentry_a3_v[nn] <= `VAL; end end endtask task setinsn1; input [`QBITS] nn; input [143:0] bus; begin iqentry_iv [nn] <= `VAL; // iqentry_Rt [nn] <= bus[`IB_RT]; // iqentry_Rc [nn] <= bus[`IB_RC]; // iqentry_Ra [nn] <= bus[`IB_RA]; iqentry_a0 [nn] <= bus[`IB_CONST]; iqentry_imm [nn] <= bus[`IB_IMM]; // iqentry_insln[nn] <= bus[`IB_LN]; `ifndef INLINE_DECODE if (iqentry_insln[nn] != bus[`IB_LN]) begin $display("Insn length mismatch."); $stop; end `endif iqentry_cmp [nn] <= bus[`IB_CMP]; iqentry_tlb [nn] <= bus[`IB_TLB]; iqentry_sz [nn] <= bus[`IB_SZ]; iqentry_jal [nn] <= bus[`IB_JAL]; iqentry_ret [nn] <= bus[`IB_RET]; iqentry_irq [nn] <= bus[`IB_IRQ]; iqentry_brk [nn] <= bus[`IB_BRK]; iqentry_rti [nn] <= bus[`IB_RTI]; iqentry_bt [nn] <= bus[`IB_BT]; iqentry_alu [nn] <= bus[`IB_ALU]; iqentry_alu0 [nn] <= bus[`IB_ALU0]; iqentry_fpu [nn] <= bus[`IB_FPU]; iqentry_fc [nn] <= bus[`IB_FC]; iqentry_canex[nn] <= bus[`IB_CANEX]; iqentry_loadv[nn] <= bus[`IB_LOADV]; iqentry_load [nn] <= bus[`IB_LOAD]; iqentry_loadseg[nn]<= bus[`IB_LOADSEG]; iqentry_preload[nn]<= bus[`IB_PRELOAD]; iqentry_store[nn] <= bus[`IB_STORE]; iqentry_push [nn] <= bus[`IB_PUSH]; iqentry_oddball[nn] <= bus[`IB_ODDBALL]; iqentry_memsz[nn] <= bus[`IB_MEMSZ]; iqentry_mem [nn] <= bus[`IB_MEM]; iqentry_memndx[nn] <= bus[`IB_MEMNDX]; iqentry_rmw [nn] <= bus[`IB_RMW]; iqentry_memdb[nn] <= bus[`IB_MEMDB]; iqentry_memsb[nn] <= bus[`IB_MEMSB]; iqentry_shft [nn] <= bus[`IB_SHFT]; // 48 bit shift instructions iqentry_sei [nn] <= bus[`IB_SEI]; iqentry_aq [nn] <= bus[`IB_AQ]; iqentry_rl [nn] <= bus[`IB_RL]; iqentry_jmp [nn] <= bus[`IB_JMP]; iqentry_br [nn] <= bus[`IB_BR]; iqentry_sync [nn] <= bus[`IB_SYNC]; iqentry_fsync[nn] <= bus[`IB_FSYNC]; iqentry_rfw [nn] <= bus[`IB_RFW]; iqentry_we [nn] <= bus[`IB_WE]; end endtask task setinsn; input [`QBITS] nn; input [4:0] id; input v; input [143:0] bus; begin if (iqentry_iv[nn] == `INV && iqentry_is[nn] == id && iqentry_v[nn] == `VAL && v == `VAL) setinsn1(nn,bus); end endtask task a1_vs; begin // if there is not an overlapping write to the register file. if (Ra1s != Rt0s || !fetchbuf0_rfw) begin iqentry_a1_v [tail1] <= regIsValid[Ra1s]; iqentry_a1_s [tail1] <= rf_source [Ra1s]; end else begin iqentry_a1_v [tail1] <= `INV; iqentry_a1_s [tail1] <= { 1'b0, fetchbuf0_mem, tail0 }; end end endtask task a2_vs; begin // if there is not an overlapping write to the register file. if (Rb1s != Rt0s || !fetchbuf0_rfw) begin iqentry_a2_v [tail1] <= regIsValid[Rb1s]; iqentry_a2_s [tail1] <= rf_source [Rb1s]; end else begin iqentry_a2_v [tail1] <= `INV; iqentry_a2_s [tail1] <= { 1'b0, fetchbuf0_mem, tail0 }; end end endtask task a3_vs; begin // if there is not an overlapping write to the register file. if (Rc1s != Rt0s || !fetchbuf0_rfw) begin iqentry_a3_v [tail1] <= regIsValid[Rc1s]; iqentry_a3_s [tail1] <= rf_source [Rc1s]; end else begin iqentry_a3_v [tail1] <= `INV; iqentry_a3_s [tail1] <= { 1'b0, fetchbuf0_mem, tail0 }; end end endtask task enque0x; begin if (IsVector(fetchbuf0_instr) && SUP_VECTOR) begin vqe0 <= vqe0 + 4'd1; if (IsVCmprss(fetchbuf0_instr)) begin if (vm[fetchbuf0_instr[25:23]][vqe0]) vqet0 <= vqet0 + 4'd1; end else vqet0 <= vqet0 + 4'd1; if (vqe0 >= vl-2) nop_fetchbuf <= fetchbuf ? 4'b1000 : 4'b0010; enque0(tail0, fetchbuf0_thrd ? maxsn[1]+4'd1 : maxsn[0]+4'd1, vqe0); iq_ctr = iq_ctr + 4'd1; if (fetchbuf0_rfw) begin rf_source[ Rt0s ] <= { 1'b0, fetchbuf0_mem, tail0 }; // top bit indicates ALU/MEM bus rf_v[Rt0s] <= `INV; end if (canq2) begin if (vqe0 < vl-2) begin vqe0 <= vqe0 + 4'd2; if (IsVCmprss(fetchbuf0_instr)) begin if (vm[fetchbuf0_instr[25:23]][vqe0+6'd1]) vqet0 <= vqet0 + 4'd2; end else vqet0 <= vqet0 + 4'd2; enque0(tail1, fetchbuf0_thrd ? maxsn[1] + 4'd2 : maxsn[0]+4'd2, vqe0 + 6'd1); iq_ctr = iq_ctr + 4'd2; if (fetchbuf0_rfw) begin rf_source[ Rt0s ] <= { 1'b0, fetchbuf0_mem, tail1 }; // top bit indicates ALU/MEM bus rf_v[Rt0s] <= `INV; end end end end else begin enque0(tail0, fetchbuf0_thrd ? maxsn[1]+4'd1 : maxsn[0]+4'd1, 6'd0); iq_ctr = iq_ctr + 4'd1; if (fetchbuf0_rfw) begin rf_source[ Rt0s ] <= { 1'b0, fetchbuf0_mem, tail0 }; // top bit indicates ALU/MEM bus rf_v[Rt0s] <= `INV; end end end endtask // Enqueue fetchbuf0 onto the tail of the instruction queue task enque0; input [`QBITS] tail; input [`SNBITS] seqnum; input [5:0] venno; begin iqentry_exc[tail] <= `FLT_NONE; `ifdef SUPPORT_DBG if (dbg_imatchA) iqentry_exc[tail] <= `FLT_DBG; else if (dbg_ctrl[63]) iqentry_exc[tail] <= `FLT_SSM; `endif iqentry_state[tail] <= IQS_QUEUED; iqentry_sn [tail] <= seqnum; iqentry_iv [tail] <= `INV; iqentry_is [tail] <= tail; iqentry_thrd [tail] <= fetchbuf0_thrd; iqentry_res [tail] <= `ZERO; iqentry_instr[tail] <= IsVLS(fetchbuf0_instr) ? (vm[fnM2(fetchbuf0_instr)] ? fetchbuf0_instr : `NOP_INSN) : fetchbuf0_instr; iqentry_insln[tail] <= fetchbuf0_insln; iqentry_fc [tail] <= `INV; iqentry_mem [tail] <= `INV; iqentry_memissue[tail] <= `INV; iqentry_alu [tail] <= `INV; iqentry_fpu [tail] <= `INV; iqentry_load [tail] <= `INV; iqentry_pt [tail] <= predict_taken0; // If the previous instruction was a hardware interrupt and this instruction is a hardware interrupt // inherit the previous pc. //if (IsBrk(fetchbuf0_instr) && !fetchbuf0_instr[15] && // (IsBrk(iqentry_instr[idm1(tail)]) && !iqentry_instr[idm1(tail1)][15] && iqentry_v[idm1(tail)])) // iqentry_pc [tail] <= iqentry_pc[idm1(tail)]; //else iqentry_pc [tail] <= fetchbuf0_pc; iqentry_rtop [tail] <= IsRtop(fetchbuf0_instr); iqentry_tgt [tail] <= Rt0; iqentry_Ra [tail] <= Ra0; iqentry_Rb [tail] <= Rb0; iqentry_Rc [tail] <= Rc0; iqentry_vl [tail] <= vl; iqentry_ven [tail] <= venno; iqentry_exc [tail] <= `EXC_NONE; iqentry_a1 [tail] <= rfoa0; iqentry_a1_v [tail] <= Source1Valid(fetchbuf0_instr) | regIsValid[Ra0s]; iqentry_a1_s [tail] <= rf_source[Ra0s]; iqentry_a2 [tail] <= rfob0; iqentry_a2_v [tail] <= Source2Valid(fetchbuf0_instr) | regIsValid[Rb0s]; iqentry_a2_s [tail] <= rf_source[Rb0s]; iqentry_a3 [tail] <= rfoc0; iqentry_a3_v [tail] <= Source3Valid(fetchbuf0_instr) | regIsValid[Rc0s]; iqentry_a3_s [tail] <= rf_source[Rc0s]; `ifdef INLINE_DECODE /* This decoding cannot be done here because it'll introduce a 1 cycle delay id1_Rt <= Rt0[4:0]; id1_vl <= vl; id1_ven <= venno; id1_id <= tail; id1_pt <= predict_taken0; id1_thrd <= fetchbuf0_thrd; */ setinsn1(tail,id1_bus); `endif end endtask // Enque fetchbuf1. Fetchbuf1 might be the second instruction to queue so some // of this code checks to see which tail it is being queued on. task enque1; input [`QBITS] tail; input [`SNBITS] seqnum; input [5:0] venno; begin iqentry_exc[tail] <= `FLT_NONE; `ifdef SUPPORT_DBG if (dbg_imatchB) iqentry_exc[tail] <= `FLT_DBG; else if (dbg_ctrl[63]) iqentry_exc[tail] <= `FLT_SSM; `endif iqentry_state[tail] <= IQS_QUEUED; iqentry_sn [tail] <= seqnum; iqentry_iv [tail] <= `INV; iqentry_is [tail] <= tail; iqentry_thrd [tail] <= fetchbuf1_thrd; iqentry_res [tail] <= `ZERO; iqentry_instr[tail] <= IsVLS(fetchbuf1_instr) ? (vm[fnM2(fetchbuf1_instr)] ? fetchbuf1_instr : `NOP_INSN) : fetchbuf1_instr; iqentry_insln[tail] <= fetchbuf1_insln; iqentry_fc [tail] <= `INV; iqentry_mem [tail] <= `INV; iqentry_memissue[tail] <= `INV; iqentry_alu [tail] <= `INV; iqentry_fpu [tail] <= `INV; iqentry_load [tail] <= `INV; iqentry_pt [tail] <= predict_taken1; // If queing 2nd instruction must read from first if (tail==tail1) begin // If the previous instruction was a hardware interrupt and this instruction is a hardware interrupt // inherit the previous pc. // if (IsBrk(fetchbuf1_instr) && !fetchbuf1_instr[15] && // IsBrk(fetchbuf0_instr) && !fetchbuf0_instr[15]) // iqentry_pc [tail] <= fetchbuf0_pc; // else iqentry_pc [tail] <= fetchbuf1_pc; end else begin // If the previous instruction was a hardware interrupt and this instruction is a hardware interrupt // inherit the previous pc. // if (IsBrk(fetchbuf1_instr) && !fetchbuf1_instr[15] && // (IsBrk(iqentry_instr[idp7(tail)]) && !iqentry_instr[idm1(tail)][15] && iqentry_v[idm1(tail)])) // iqentry_pc [tail] <= iqentry_pc[idm1(tail)]; // else iqentry_pc [tail] <= fetchbuf1_pc; end iqentry_rtop [tail] <= IsRtop(fetchbuf1_instr); iqentry_tgt [tail] <= Rt1; iqentry_Ra [tail] <= Ra1; iqentry_Rb [tail] <= Rb1; iqentry_Rc [tail] <= Rc1; iqentry_vl [tail] <= vl; iqentry_ven [tail] <= venno; iqentry_exc [tail] <= `EXC_NONE; iqentry_a1 [tail] <= rfoa1; iqentry_a1_v [tail] <= Source1Valid(fetchbuf1_instr) | regIsValid[Ra1s]; iqentry_a1_s [tail] <= rf_source[Ra1s]; iqentry_a2 [tail] <= rfob1; iqentry_a2_v [tail] <= Source2Valid(fetchbuf1_instr) | regIsValid[Rb1s]; iqentry_a2_s [tail] <= rf_source[Rb1s]; iqentry_a3 [tail] <= rfoc1; iqentry_a3_v [tail] <= Source3Valid(fetchbuf1_instr) | regIsValid[Rc1s]; iqentry_a3_s [tail] <= rf_source[Rc1s]; `ifdef INLINE_DECODE /* This decoding cannot be done here because it'll introduce a 1 cycle delay id2_Rt <= Rt1[4:0]; id2_vl <= vl; id2_ven <= venno; id2_id <= tail; id2_pt <= predict_taken1; id2_thrd <= fetchbuf1_thrd; */ setinsn1(tail,id2_bus); `endif end endtask task exc; input [`QBITS] head; input thread; input [7:0] causecd; begin excmiss <= TRUE; excmisspc <= {tvec[3'd0][AMSB:8],1'b0,ol[thread],5'h00}; badaddr[{thread,2'd0}] <= iqentry_ma[head]; bad_instr[{thread,2'd0}] <= iqentry_instr[head]; im_stack <= {im_stack[27:0],4'hF}; `ifdef SUPPORT_SMT excthrd <= iqentry_thrd[head]; ol_stack[thread] <= {ol_stack[thread][13:0],2'b00}; dl_stack[thread] <= {dl_stack[thread][13:0],2'b00}; epc0[thread] <= iqentry_pc[head]; epc1[thread] <= epc0[thread]; epc2[thread] <= epc1[thread]; epc3[thread] <= epc2[thread]; epc4[thread] <= epc3[thread]; epc5[thread] <= epc4[thread]; epc6[thread] <= epc5[thread]; epc7[thread] <= epc6[thread]; epc8[thread] <= epc7[thread]; pl_stack[thread] <= {pl_stack[thread][55:0],cpl[thread]}; rs_stack[thread] <= {rs_stack[thread][59:0],`EXC_RGS}; brs_stack[thread] <= {brs_stack[thread][59:0],`EXC_RGS}; cause[{thread,2'd0}] <= {8'd0,causecd}; mstatus[thread][5:4] <= 2'd0; mstatus[thread][13:6] <= 8'h00; mstatus[thread][19:14] <= `EXC_RGS; `else excthrd <= 1'b0; ol_stack <= {ol_stack[13:0],2'b00}; dl_stack <= {dl_stack[13:0],2'b00}; epc0 <= iqentry_pc[head]; epc1 <= epc0; epc2 <= epc1; epc3 <= epc2; epc4 <= epc3; epc5 <= epc4; epc6 <= epc5; epc7 <= epc6; epc8 <= epc7; pl_stack <= {pl_stack[55:0],cpl}; rs_stack <= {rs_stack[59:0],`EXC_RGS}; brs_stack <= {rs_stack[59:0],`EXC_RGS}; cause[3'd0] <= {8'd0,causecd}; mstatus[5:4] <= 2'd0; mstatus[13:6] <= 8'h00; mstatus[19:14] <= `EXC_RGS; `endif wb_en <= `TRUE; sema[0] <= 1'b0; ve_hold <= {vqet1,10'd0,vqe1,10'd0,vqet0,10'd0,vqe0}; `ifdef SUPPORT_DBG dbg_ctrl[62:55] <= {dbg_ctrl[61:55],dbg_ctrl[63]}; dbg_ctrl[63] <= FALSE; `endif end endtask // This task takes care of commits for things other than the register file. task oddball_commit; input v; input [`QBITS] head; input [1:0] which; reg thread; begin thread = iqentry_thrd[head]; if (v) begin if (|iqentry_exc[head]) begin exc(head,thread,iqentry_exc[head]); end else case(iqentry_instr[head][`INSTRUCTION_OP]) `BRK: // BRK is treated as a nop unless it's a software interrupt or a // hardware interrupt at a higher priority than the current priority. if ((|iqentry_instr[head][25:21]) || iqentry_instr[head][20:17] > im) begin excmiss <= TRUE; im_stack <= {im_stack[27:0],4'hF}; `ifdef SUPPORT_SMT ol_stack[thread] <= {ol_stack[thread][13:0],2'b00}; dl_stack[thread] <= {dl_stack[thread][13:0],2'b00}; excmisspc <= {tvec[3'd0][AMSB:8],1'b0,ol[thread],5'h00}; excthrd <= iqentry_thrd[head]; epc0[thread] <= iqentry_pc[head] + {iqentry_instr[head][25:21],1'b0}; epc1[thread] <= epc0[thread]; epc2[thread] <= epc1[thread]; epc3[thread] <= epc2[thread]; epc4[thread] <= epc3[thread]; epc5[thread] <= epc4[thread]; epc6[thread] <= epc5[thread]; epc7[thread] <= epc6[thread]; epc8[thread] <= epc7[thread]; pl_stack[thread] <= {pl_stack[thread][55:0],cpl[thread]}; rs_stack[thread] <= {rs_stack[thread][59:0],`BRK_RGS}; brs_stack[thread] <= {brs_stack[thread][59:0],`BRK_RGS}; cause[{thread,2'd0}] <= iqentry_res[head][7:0]; mstatus[thread][5:4] <= 2'd0; mstatus[thread][13:6] <= 8'h00; // For hardware interrupts only, set a new mask level. Setting a // new mask level will effectively prevent subsequent brks that // are streaming from an interrupt from being processed. // Select register set according to interrupt level if (iqentry_instr[head][25:21]==5'd0) begin mstatus[thread][ 3: 0] <= iqentry_instr[head][20:17]; mstatus[thread][31:28] <= iqentry_instr[head][20:17]; mstatus[thread][19:14] <= {2'b0,iqentry_instr[head][20:17]}; rs_stack[thread][5:0] <= {2'b0,iqentry_instr[head][20:17]}; brs_stack[thread][5:0] <= {2'b0,iqentry_instr[head][20:17]}; end else begin mstatus[thread][19:14] <= `BRK_RGS; rs_stack[thread][5:0] <= `BRK_RGS; brs_stack[thread][5:0] <= `BRK_RGS; end `else ol_stack <= {ol_stack[13:0],2'b00}; dl_stack <= {dl_stack[13:0],2'b00}; excmisspc <= {tvec[3'd0][AMSB:8],1'b0,ol,5'h00}; excthrd <= 1'b0; epc0 <= iqentry_pc[head] + {iqentry_instr[head][25:21],1'b0}; epc1 <= epc0; epc2 <= epc1; epc3 <= epc2; epc4 <= epc3; epc5 <= epc4; epc6 <= epc5; epc7 <= epc6; epc8 <= epc7; pl_stack <= {pl_stack[55:0],cpl}; rs_stack <= {rs_stack[59:0],`BRK_RGS}; brs_stack <= {brs_stack[59:0],`BRK_RGS}; cause[3'd0] <= iqentry_res[head][7:0]; mstatus[5:4] <= 2'd0; mstatus[13:6] <= 8'h00; // For hardware interrupts only, set a new mask level. Setting a // new mask level will effectively prevent subsequent brks that // are streaming from an interrupt from being processed. // Select register set according to interrupt level if (iqentry_instr[head][25:21]==5'd0) begin mstatus[ 3: 0] <= iqentry_instr[head][20:17]; mstatus[31:28] <= iqentry_instr[head][20:17]; mstatus[19:14] <= {2'b0,iqentry_instr[head][20:17]}; rs_stack[5:0] <= {2'b0,iqentry_instr[head][20:17]}; brs_stack[5:0] <= {2'b0,iqentry_instr[head][20:17]}; end else begin mstatus[19:14] <= `BRK_RGS; rs_stack[5:0] <= `BRK_RGS; brs_stack[5:0] <= `BRK_RGS; end `endif sema[0] <= 1'b0; ve_hold <= {vqet1,10'd0,vqe1,10'd0,vqet0,10'd0,vqe0}; `ifdef SUPPORT_DBG dbg_ctrl[62:55] <= {dbg_ctrl[61:55],dbg_ctrl[63]}; dbg_ctrl[63] <= FALSE; `endif end `IVECTOR: casez(iqentry_tgt[head]) 8'b00100???: vm[iqentry_tgt[head][2:0]] <= iqentry_res[head]; 8'b00101111: vl <= iqentry_res[head]; default: ; endcase `R2: case(iqentry_instr[head][`INSTRUCTION_S2]) `R1: case(iqentry_instr[head][20:16]) `CHAIN_OFF: cr0[18] <= 1'b0; `CHAIN_ON: cr0[18] <= 1'b1; //`SETWB: wbrcd[pcr[5:0]] <= 1'b1; default: ; endcase `VMOV: casez(iqentry_tgt[head]) 12'b1111111_00???: vm[iqentry_tgt[head][2:0]] <= iqentry_res[head]; 12'b1111111_01111: vl <= iqentry_res[head]; default: ; endcase `ifdef SUPPORT_SMT `SEI: mstatus[thread][3:0] <= iqentry_res[head][3:0]; // S1 `else `SEI: mstatus[3:0] <= iqentry_res[head][3:0]; // S1 `endif `RTI: begin excmiss <= TRUE; excthrd <= thread; excmisspc <= iqentry_ma[head]; `ifdef SUPPORT_SMT // excmisspc <= epc0[thread]; mstatus[thread][3:0] <= im_stack[thread][3:0]; mstatus[thread][5:4] <= ol_stack[thread][1:0]; mstatus[thread][21:20] <= dl_stack[thread][1:0]; mstatus[thread][13:6] <= pl_stack[thread][7:0]; mstatus[thread][19:14] <= rs_stack[thread][5:0]; im_stack[thread] <= {4'd15,im_stack[thread][31:4]}; ol_stack[thread] <= {2'd0,ol_stack[thread][15:2]}; dl_stack[thread] <= {2'd0,dl_stack[thread][15:2]}; pl_stack[thread] <= {8'h00,pl_stack[thread][63:8]}; rs_stack[thread] <= {6'h00,rs_stack[thread][59:6]}; brs_stack[thread] <= {6'h00,brs_stack[thread][59:6]}; epc0[thread] <= epc1[thread]; epc1[thread] <= epc2[thread]; epc2[thread] <= epc3[thread]; epc3[thread] <= epc4[thread]; epc4[thread] <= epc5[thread]; epc5[thread] <= epc6[thread]; epc6[thread] <= epc7[thread]; epc7[thread] <= epc8[thread]; epc8[thread] <= {tvec[0][AMSB:8], 1'b0, ol[thread], 5'h0}; `else // excmisspc <= epc0; mstatus[3:0] <= im_stack[3:0]; mstatus[5:4] <= ol_stack[1:0]; mstatus[21:20] <= dl_stack[1:0]; mstatus[13:6] <= pl_stack[7:0]; mstatus[19:14] <= rs_stack[5:0]; im_stack <= {4'd15,im_stack[31:4]}; ol_stack <= {2'd0,ol_stack[15:2]}; dl_stack <= {2'd0,dl_stack[15:2]}; pl_stack <= {8'h00,pl_stack[63:8]}; rs_stack <= {6'h00,rs_stack[59:6]}; brs_stack <= {6'h00,brs_stack[59:6]}; epc0 <= epc1; epc1 <= epc2; epc2 <= epc3; epc3 <= epc4; epc4 <= epc5; epc5 <= epc6; epc6 <= epc7; epc7 <= epc8; epc8 <= {tvec[0][AMSB:8], 1'b0, ol, 5'h0}; `endif sema[0] <= 1'b0; sema[iqentry_res[head][5:0]] <= 1'b0; vqe0 <= ve_hold[ 5: 0]; vqet0 <= ve_hold[21:16]; vqe1 <= ve_hold[37:32]; vqet1 <= ve_hold[53:48]; `ifdef SUPPORT_DBG dbg_ctrl[62:55] <= {FALSE,dbg_ctrl[62:56]}; dbg_ctrl[63] <= dbg_ctrl[55]; `endif end default: ; endcase `MEMNDX: case(iqentry_instr[head][`INSTRUCTION_S2]) `CACHEX: case(iqentry_instr[head][22:18]) 5'h02: begin invicl <= TRUE; invlineAddr <= {ASID,iqentry_res[head]}; end 5'h03: invic <= TRUE; 5'h10: cr0[30] <= FALSE; 5'h11: cr0[30] <= TRUE; default: ; endcase default: ; endcase `CSRRW: begin write_csr(iqentry_instr[head][31:18],iqentry_a1[head],thread); end `REX: `ifdef SUPPORT_SMT // Can only redirect to a lower level if (ol[thread] < iqentry_instr[head][14:13]) begin mstatus[thread][5:4] <= iqentry_instr[head][14:13]; badaddr[{thread,iqentry_instr[head][14:13]}] <= badaddr[{thread,ol[thread]}]; bad_instr[{thread,iqentry_instr[head][14:13]}] <= bad_instr[{thread,ol[thread]}]; cause[{thread,iqentry_instr[head][14:13]}] <= cause[{thread,ol[thread]}]; mstatus[thread][13:6] <= iqentry_instr[head][25:18] | iqentry_a1[head][7:0]; end `else if (ol < iqentry_instr[head][14:13]) begin mstatus[5:4] <= iqentry_instr[head][14:13]; badaddr[{1'b0,iqentry_instr[head][14:13]}] <= badaddr[{1'b0,ol}]; bad_instr[{1'b0,iqentry_instr[head][14:13]}] <= bad_instr[{1'b0,ol}]; cause[{1'b0,iqentry_instr[head][14:13]}] <= cause[{1'b0,ol}]; mstatus[13:6] <= iqentry_instr[head][25:18] | iqentry_a1[head][7:0]; end `endif `CACHE: case(iqentry_instr[head][17:13]) 5'h02: begin invicl <= TRUE; invlineAddr <= {ASID,iqentry_res[head]}; end 5'h03: invic <= TRUE; 5'h10: cr0[30] <= FALSE; 5'h11: cr0[30] <= TRUE; default: ; endcase `FLOAT: case(iqentry_instr[head][`INSTRUCTION_S2]) `FRM: begin fp_rm <= iqentry_res[head][2:0]; end `FCX: begin fp_sx <= fp_sx & ~iqentry_res[head][5]; fp_inex <= fp_inex & ~iqentry_res[head][4]; fp_dbzx <= fp_dbzx & ~(iqentry_res[head][3]|iqentry_res[head][0]); fp_underx <= fp_underx & ~iqentry_res[head][2]; fp_overx <= fp_overx & ~iqentry_res[head][1]; fp_giopx <= fp_giopx & ~iqentry_res[head][0]; fp_infdivx <= fp_infdivx & ~iqentry_res[head][0]; fp_zerozerox <= fp_zerozerox & ~iqentry_res[head][0]; fp_subinfx <= fp_subinfx & ~iqentry_res[head][0]; fp_infzerox <= fp_infzerox & ~iqentry_res[head][0]; fp_NaNCmpx <= fp_NaNCmpx & ~iqentry_res[head][0]; fp_swtx <= 1'b0; end `FDX: begin fp_inexe <= fp_inexe & ~iqentry_res[head][4]; fp_dbzxe <= fp_dbzxe & ~iqentry_res[head][3]; fp_underxe <= fp_underxe & ~iqentry_res[head][2]; fp_overxe <= fp_overxe & ~iqentry_res[head][1]; fp_invopxe <= fp_invopxe & ~iqentry_res[head][0]; end `FEX: begin fp_inexe <= fp_inexe | iqentry_res[head][4]; fp_dbzxe <= fp_dbzxe | iqentry_res[head][3]; fp_underxe <= fp_underxe | iqentry_res[head][2]; fp_overxe <= fp_overxe | iqentry_res[head][1]; fp_invopxe <= fp_invopxe | iqentry_res[head][0]; end default: begin // 31 to 29 is rounding mode // 28 to 24 are exception enables // 23 is nsfp // 22 is a fractie fp_fractie <= iqentry_ares[head][22]; fp_raz <= iqentry_ares[head][21]; // 20 is a 0 fp_neg <= iqentry_ares[head][19]; fp_pos <= iqentry_ares[head][18]; fp_zero <= iqentry_ares[head][17]; fp_inf <= iqentry_ares[head][16]; // 15 swtx // 14 fp_inex <= fp_inex | (fp_inexe & iqentry_ares[head][14]); fp_dbzx <= fp_dbzx | (fp_dbzxe & iqentry_ares[head][13]); fp_underx <= fp_underx | (fp_underxe & iqentry_ares[head][12]); fp_overx <= fp_overx | (fp_overxe & iqentry_ares[head][11]); //fp_giopx <= fp_giopx | (fp_giopxe & iqentry_res2[head][10]); //fp_invopx <= fp_invopx | (fp_invopxe & iqentry_res2[head][24]); // fp_cvtx <= fp_cvtx | (fp_giopxe & iqentry_ares[head][7]); fp_sqrtx <= fp_sqrtx | (fp_giopxe & iqentry_ares[head][6]); fp_NaNCmpx <= fp_NaNCmpx | (fp_giopxe & iqentry_ares[head][5]); fp_infzerox <= fp_infzerox | (fp_giopxe & iqentry_ares[head][4]); fp_zerozerox <= fp_zerozerox | (fp_giopxe & iqentry_ares[head][3]); fp_infdivx <= fp_infdivx | (fp_giopxe & iqentry_ares[head][2]); fp_subinfx <= fp_subinfx | (fp_giopxe & iqentry_ares[head][1]); fp_snanx <= fp_snanx | (fp_giopxe & iqentry_ares[head][0]); end endcase default: ; endcase // Once the flow control instruction commits, NOP it out to allow // pending stores to be issued. iqentry_instr[head][5:0] <= `NOP; end end endtask // CSR access tasks // This task does not work. Possibly because the always block @* doesn't // evaluate into the task to see which signals are changing. The following // code is simply included as an always block above. task read_csr; input [11:0] csrno; output [63:0] dat; input thread; begin `ifdef SUPPORT_SMT if (csrno[11:10] >= ol[thread]) `else if (csrno[11:10] >= ol) `endif casez(csrno[9:0]) `CSR_CR0: dat <= cr0; `CSR_HARTID: dat <= hartid; `CSR_TICK: dat <= tick; `CSR_PCR: dat <= pcr; `CSR_PCR2: dat <= pcr2; `CSR_PMR: dat <= pmr; `CSR_WBRCD: dat <= wbrcd; `CSR_SEMA: dat <= sema; `CSR_KEYS: dat <= keys; `CSR_TCB: dat <= tcb; `CSR_FSTAT: dat <= {fp_rgs,fp_status}; `ifdef SUPPORT_DBG `CSR_DBAD0: dat <= dbg_adr0; `CSR_DBAD1: dat <= dbg_adr1; `CSR_DBAD2: dat <= dbg_adr2; `CSR_DBAD3: dat <= dbg_adr3; `CSR_DBCTRL: dat <= dbg_ctrl; `CSR_DBSTAT: dat <= dbg_stat; `endif `CSR_CAS: dat <= cas; `CSR_TVEC: dat <= tvec[csrno[2:0]]; `CSR_BADADR: dat <= badaddr[{thread,csrno[11:10]}]; `CSR_BADINSTR: dat <= bad_instr[{thread,csrno[11:10]}]; `CSR_CAUSE: dat <= {48'd0,cause[{thread,csrno[11:10]}]}; `ifdef SUPPORT_SMT `CSR_IM_STACK: dat <= im_stack[thread]; `CSR_OL_STACK: dat <= {16'h0,dl_stack[thread],16'h0,ol_stack[thread]}; `CSR_PL_STACK: dat <= pl_stack[thread]; `CSR_RS_STACK: dat <= rs_stack[thread]; `CSR_STATUS: dat <= mstatus[thread][63:0]; `CSR_EPC0: dat <= epc0[thread]; `CSR_EPC1: dat <= epc1[thread]; `CSR_EPC2: dat <= epc2[thread]; `CSR_EPC3: dat <= epc3[thread]; `CSR_EPC4: dat <= epc4[thread]; `CSR_EPC5: dat <= epc5[thread]; `CSR_EPC6: dat <= epc6[thread]; `CSR_EPC7: dat <= epc7[thread]; `else `CSR_IM_STACK: dat <= im_stack; `CSR_ODL_STACK: dat <= {16'h0,dl_stack,16'h0,ol_stack}; `CSR_PL_STACK: dat <= pl_stack; `CSR_RS_STACK: dat <= rs_stack; `CSR_STATUS: dat <= mstatus[63:0]; `CSR_EPC0: dat <= epc0; `CSR_EPC1: dat <= epc1; `CSR_EPC2: dat <= epc2; `CSR_EPC3: dat <= epc3; `CSR_EPC4: dat <= epc4; `CSR_EPC5: dat <= epc5; `CSR_EPC6: dat <= epc6; `CSR_EPC7: dat <= epc7; `endif `CSR_CODEBUF: dat <= codebuf[csrno[5:0]]; `ifdef SUPPORT_BBMS `CSR_TB: dat <= tb; `CSR_CBL: dat <= cbl; `CSR_CBU: dat <= cbu; `CSR_RO: dat <= ro; `CSR_DBL: dat <= dbl; `CSR_DBU: dat <= dbu; `CSR_SBL: dat <= sbl; `CSR_SBU: dat <= sbu; `CSR_ENU: dat <= en; `endif `CSR_Q_CTR: dat <= iq_ctr; `CSR_BM_CTR: dat <= bm_ctr; `CSR_ICL_CTR: dat <= icl_ctr; `CSR_IRQ_CTR: dat <= irq_ctr; `CSR_TIME: dat <= wc_times; `CSR_INFO: case(csrno[3:0]) 4'd0: dat <= "Finitron"; // manufacturer 4'd1: dat <= " "; 4'd2: dat <= "64 bit "; // CPU class 4'd3: dat <= " "; 4'd4: dat <= "FT64 "; // Name 4'd5: dat <= " "; 4'd6: dat <= 64'd1; // model # 4'd7: dat <= 64'd1; // serial number 4'd8: dat <= {32'd16384,32'd16384}; // cache sizes instruction,data 4'd9: dat <= 64'd0; default: dat <= 64'd0; endcase default: begin $display("Unsupported CSR:%h",csrno[10:0]); dat <= 64'hEEEEEEEEEEEEEEEE; end endcase else dat <= 64'h0; end endtask task write_csr; input [13:0] csrno; input [63:0] dat; input thread; begin `ifdef SUPPORT_SMT if (csrno[11:10] >= ol[thread]) `else if (csrno[11:10] >= ol) `endif case(csrno[13:12]) 2'd1: // CSRRW casez(csrno[9:0]) `CSR_CR0: cr0 <= dat; `CSR_PCR: pcr <= dat[31:0]; `CSR_PCR2: pcr2 <= dat; `CSR_PMR: case(`NUM_IDU) 0,1: pmr[0] <= 1'b1; 2: begin if (dat[1:0]==2'b00) pmr[1:0] <= 2'b01; else pmr[1:0] <= dat[1:0]; pmr[63:2] <= dat[63:2]; end 3: begin if (dat[2:0]==3'b000) pmr[2:0] <= 3'b001; else pmr[2:0] <= dat[2:0]; pmr[63:3] <= dat[63:3]; end default: pmr[0] <= 1'b1; endcase `CSR_WBRCD: wbrcd <= dat; `CSR_SEMA: sema <= dat; `CSR_KEYS: keys <= dat; `CSR_TCB: tcb <= dat; `CSR_FSTAT: fpu_csr[37:32] <= dat[37:32]; `CSR_BADADR: badaddr[{thread,csrno[11:10]}] <= dat; `CSR_BADINSTR: bad_instr[{thread,csrno[11:10]}] <= dat; `CSR_CAUSE: cause[{thread,csrno[11:10]}] <= dat[15:0]; `ifdef SUPPORT_DBG `CSR_DBAD0: dbg_adr0 <= dat[AMSB:0]; `CSR_DBAD1: dbg_adr1 <= dat[AMSB:0]; `CSR_DBAD2: dbg_adr2 <= dat[AMSB:0]; `CSR_DBAD3: dbg_adr3 <= dat[AMSB:0]; `CSR_DBCTRL: dbg_ctrl <= dat; `endif `CSR_CAS: cas <= dat; `CSR_TVEC: tvec[csrno[2:0]] <= dat[31:0]; `ifdef SUPPORT_SMT `CSR_IM_STACK: im_stack[thread] <= dat[31:0]; `CSR_ODL_STACK: begin ol_stack[thread] <= dat[15:0]; dl_stack[thread] <= dat[31:16]; end `CSR_PL_STACK: pl_stack[thread] <= dat; `CSR_RS_STACK: rs_stack[thread] <= dat; `CSR_STATUS: mstatus[thread][63:0] <= dat; `CSR_EPC0: epc0[thread] <= dat; `CSR_EPC1: epc1[thread] <= dat; `CSR_EPC2: epc2[thread] <= dat; `CSR_EPC3: epc3[thread] <= dat; `CSR_EPC4: epc4[thread] <= dat; `CSR_EPC5: epc5[thread] <= dat; `CSR_EPC6: epc6[thread] <= dat; `CSR_EPC7: epc7[thread] <= dat; `else `CSR_IM_STACK: im_stack <= dat[31:0]; `CSR_ODL_STACK: begin ol_stack <= dat[15:0]; dl_stack <= dat[47:32]; end `CSR_PL_STACK: pl_stack <= dat; `CSR_RS_STACK: rs_stack <= dat; `CSR_STATUS: mstatus[63:0] <= dat; `CSR_EPC0: epc0 <= dat; `CSR_EPC1: epc1 <= dat; `CSR_EPC2: epc2 <= dat; `CSR_EPC3: epc3 <= dat; `CSR_EPC4: epc4 <= dat; `CSR_EPC5: epc5 <= dat; `CSR_EPC6: epc6 <= dat; `CSR_EPC7: epc7 <= dat; `endif `ifdef SUPPORT_BBMS `CSR_TB: prg_base[brgs] <= dat; `CSR_CBL: cl_barrier[brgs] <= dat; `CSR_CBU: cu_barrier[brgs] <= dat; `CSR_RO: ro_barrier[brgs] <= dat; `CSR_DBL: dl_barrier[brgs] <= dat; `CSR_DBU: du_barrier[brgs] <= dat; `CSR_SBL: sl_barrier[brgs] <= dat; `CSR_SBU: su_barrier[brgs] <= dat; `CSR_ENU: en_barrier[brgs] <= dat; `endif `CSR_TIME: begin ld_time <= 6'h3f; wc_time_dat <= dat; end `CSR_CODEBUF: codebuf[csrno[5:0]] <= dat; default: ; endcase 2'd2: // CSRRS case(csrno[9:0]) `CSR_CR0: cr0 <= cr0 | dat; `CSR_PCR: pcr[31:0] <= pcr[31:0] | dat[31:0]; `CSR_PCR2: pcr2 <= pcr2 | dat; `CSR_PMR: pmr <= pmr | dat; `CSR_WBRCD: wbrcd <= wbrcd | dat; `ifdef SUPPORT_DBG `CSR_DBCTRL: dbg_ctrl <= dbg_ctrl | dat; `endif `CSR_SEMA: sema <= sema | dat; `ifdef SUPPORT_SMT `CSR_STATUS: mstatus[thread][63:0] <= mstatus[thread][63:0] | dat; `else `CSR_STATUS: mstatus[63:0] <= mstatus[63:0] | dat; `endif default: ; endcase 2'd3: // CSRRC case(csrno[9:0]) `CSR_CR0: cr0 <= cr0 & ~dat; `CSR_PCR: pcr <= pcr & ~dat; `CSR_PCR2: pcr2 <= pcr2 & ~dat; `CSR_PMR: begin if (dat[1:0]==2'b11) pmr[1:0] <= 2'b01; else pmr[1:0] <= pmr[1:0] & ~dat[1:0]; pmr[63:2] <= pmr[63:2] & ~dat[63:2]; end `CSR_WBRCD: wbrcd <= wbrcd & ~dat; `ifdef SUPPORT_DBG `CSR_DBCTRL: dbg_ctrl <= dbg_ctrl & ~dat; `endif `CSR_SEMA: sema <= sema & ~dat; `ifdef SUPPORT_SMT `CSR_STATUS: mstatus[thread][63:0] <= mstatus[thread][63:0] & ~dat; `else `CSR_STATUS: mstatus[63:0] <= mstatus[63:0] & ~dat; `endif default: ; endcase default: ; endcase end endtask task tDram0Issue; input [`QBITSP1] n; begin if (iqentry_state[n]==IQS_AGEN) begin // dramA_v <= `INV; dram0 <= `DRAMSLOT_BUSY; dram0_id <= { 1'b1, n[`QBITS] }; dram0_instr <= iqentry_instr[n]; dram0_rmw <= iqentry_rmw[n]; dram0_preload <= iqentry_preload[n]; dram0_tgt <= iqentry_tgt[n]; if (iqentry_imm[n] & iqentry_push[n]) dram0_data <= iqentry_a0[n]; else dram0_data <= iqentry_a2[n]; dram0_addr <= iqentry_ma[n]; dram0_unc <= iqentry_ma[n][31:20]==12'hFFD || !dce || iqentry_loadv[n]; dram0_memsize <= iqentry_memsz[n]; dram0_load <= iqentry_load[n]; dram0_loadseg <= iqentry_loadseg[n]; dram0_store <= iqentry_store[n]; `ifdef SUPPORT_SMT dram0_ol <= (iqentry_Ra[n][4:0]==5'd31 || iqentry_Ra[n][4:0]==5'd30) ? ol[iqentry_thrd[n]] : dl[iqentry_thrd[n]]; `else dram0_ol <= (iqentry_Ra[n][4:0]==5'd31 || iqentry_Ra[n][4:0]==5'd30) ? ol : dl; `endif // Once the memory op is issued reset the a1_v flag. // This will cause the a1 bus to look for new data from memory (a1_s is pointed to a memory bus) // This is used for the load and compare instructions. // must reset the a1 source too. //iqentry_a1_v[n] <= `INV; iqentry_state[n] <= IQS_MEM; end end endtask task tDram1Issue; input [`QBITSP1] n; begin if (iqentry_state[n]==IQS_AGEN) begin // dramB_v <= `INV; dram1 <= `DRAMSLOT_BUSY; dram1_id <= { 1'b1, n[`QBITS] }; dram1_instr <= iqentry_instr[n]; dram1_rmw <= iqentry_rmw[n]; dram1_preload <= iqentry_preload[n]; dram1_tgt <= iqentry_tgt[n]; if (iqentry_imm[n] & iqentry_push[n]) dram1_data <= iqentry_a0[n]; else dram1_data <= iqentry_a2[n]; dram1_addr <= iqentry_ma[n]; // if (ol[iqentry_thrd[n]]==`OL_USER) // dram1_seg <= (iqentry_Ra[n]==5'd30 || iqentry_Ra[n]==5'd31) ? {ss[iqentry_thrd[n]],13'd0} : {ds[iqentry_thrd[n]],13'd0}; // else dram1_unc <= iqentry_ma[n][31:20]==12'hFFD || !dce || iqentry_loadv[n]; dram1_memsize <= iqentry_memsz[n]; dram1_load <= iqentry_load[n]; dram1_loadseg <= iqentry_loadseg[n]; dram1_store <= iqentry_store[n]; `ifdef SUPPORT_SMT dram1_ol <= (iqentry_Ra[n][4:0]==5'd31 || iqentry_Ra[n][4:0]==5'd30) ? ol[iqentry_thrd[n]] : dl[iqentry_thrd[n]]; `else dram1_ol <= (iqentry_Ra[n][4:0]==5'd31 || iqentry_Ra[n][4:0]==5'd30) ? ol : dl; `endif //iqentry_a1_v[n] <= `INV; iqentry_state[n] <= IQS_MEM; end end endtask task tDram2Issue; input [`QBITSP1] n; begin if (iqentry_state[n]==IQS_AGEN) begin // dramC_v <= `INV; dram2 <= `DRAMSLOT_BUSY; dram2_id <= { 1'b1, n[`QBITS] }; dram2_instr <= iqentry_instr[n]; dram2_rmw <= iqentry_rmw[n]; dram2_preload <= iqentry_preload[n]; dram2_tgt <= iqentry_tgt[n]; if (iqentry_imm[n] & iqentry_push[n]) dram2_data <= iqentry_a0[n]; else dram2_data <= iqentry_a2[n]; dram2_addr <= iqentry_ma[n]; // if (ol[iqentry_thrd[n]]==`OL_USER) // dram2_seg <= (iqentry_Ra[n]==5'd30 || iqentry_Ra[n]==5'd31) ? {ss[iqentry_thrd[n]],13'd0} : {ds[iqentry_thrd[n]],13'd0}; // else dram2_unc <= iqentry_ma[n][31:20]==12'hFFD || !dce || iqentry_loadv[n]; dram2_memsize <= iqentry_memsz[n]; dram2_load <= iqentry_load[n]; dram2_loadseg <= iqentry_loadseg[n]; dram2_store <= iqentry_store[n]; `ifdef SUPPORT_SMT dram2_ol <= (iqentry_Ra[n][4:0]==5'd31 || iqentry_Ra[n][4:0]==5'd30) ? ol[iqentry_thrd[n]] : dl[iqentry_thrd[n]]; `else dram2_ol <= (iqentry_Ra[n][4:0]==5'd31 || iqentry_Ra[n][4:0]==5'd30) ? ol : dl; `endif //iqentry_a1_v[n] <= `INV; iqentry_state[n] <= IQS_MEM; end end endtask task wb_nack; begin cti_o <= 3'b000; bte_o <= 2'b00; cyc <= `LOW; stb_o <= `LOW; we <= `LOW; sel_o <= 8'h00; // vadr <= 32'hCCCCCCCC; end endtask endmodule module decoder5 (num, out); input [4:0] num; output [31:1] out; reg [31:1] out; always @(num) case (num) 5'd0 : out <= 31'b0000000000000000000000000000000; 5'd1 : out <= 31'b0000000000000000000000000000001; 5'd2 : out <= 31'b0000000000000000000000000000010; 5'd3 : out <= 31'b0000000000000000000000000000100; 5'd4 : out <= 31'b0000000000000000000000000001000; 5'd5 : out <= 31'b0000000000000000000000000010000; 5'd6 : out <= 31'b0000000000000000000000000100000; 5'd7 : out <= 31'b0000000000000000000000001000000; 5'd8 : out <= 31'b0000000000000000000000010000000; 5'd9 : out <= 31'b0000000000000000000000100000000; 5'd10: out <= 31'b0000000000000000000001000000000; 5'd11: out <= 31'b0000000000000000000010000000000; 5'd12: out <= 31'b0000000000000000000100000000000; 5'd13: out <= 31'b0000000000000000001000000000000; 5'd14: out <= 31'b0000000000000000010000000000000; 5'd15: out <= 31'b0000000000000000100000000000000; 5'd16: out <= 31'b0000000000000001000000000000000; 5'd17: out <= 31'b0000000000000010000000000000000; 5'd18: out <= 31'b0000000000000100000000000000000; 5'd19: out <= 31'b0000000000001000000000000000000; 5'd20: out <= 31'b0000000000010000000000000000000; 5'd21: out <= 31'b0000000000100000000000000000000; 5'd22: out <= 31'b0000000001000000000000000000000; 5'd23: out <= 31'b0000000010000000000000000000000; 5'd24: out <= 31'b0000000100000000000000000000000; 5'd25: out <= 31'b0000001000000000000000000000000; 5'd26: out <= 31'b0000010000000000000000000000000; 5'd27: out <= 31'b0000100000000000000000000000000; 5'd28: out <= 31'b0001000000000000000000000000000; 5'd29: out <= 31'b0010000000000000000000000000000; 5'd30: out <= 31'b0100000000000000000000000000000; 5'd31: out <= 31'b1000000000000000000000000000000; endcase endmodule module decoder6 (num, out); input [5:0] num; output [63:1] out; wire [63:0] out1; assign out1 = 64'd1 << num; assign out = out1[63:1]; endmodule module decoder7 (num, out); input [6:0] num; output [127:1] out; wire [127:0] out1; assign out1 = 128'd1 << num; assign out = out1[127:1]; endmodule module decoder8 (num, out); input [7:0] num; output [255:1] out; wire [255:0] out1; assign out1 = 256'd1 << num; assign out = out1[255:1]; endmodule