URL
https://opencores.org/ocsvn/riscv_vhdl/riscv_vhdl/trunk
Subversion Repositories riscv_vhdl
[/] [riscv_vhdl/] [trunk/] [rtl/] [patches/] [rocket.scala] - Rev 5
Compare with Previous | Blame | View Log
// See LICENSE for license details.package rocketimport Chisel._import junctions._import uncore._import Util._import cde.{Parameters, Field}case object UseFPU extends Field[Boolean]case object FDivSqrt extends Field[Boolean]case object XLen extends Field[Int]case object FetchWidth extends Field[Int]case object RetireWidth extends Field[Int]case object UseVM extends Field[Boolean]case object UsePerfCounters extends Field[Boolean]case object FastLoadWord extends Field[Boolean]case object FastLoadByte extends Field[Boolean]case object FastMulDiv extends Field[Boolean]case object CoreInstBits extends Field[Int]case object CoreDataBits extends Field[Int]case object CoreDCacheReqTagBits extends Field[Int]case object NCustomMRWCSRs extends Field[Int]case object MtvecInit extends Field[BigInt]trait HasCoreParameters extends HasAddrMapParameters {implicit val p: Parametersval xLen = p(XLen)val usingVM = p(UseVM)val usingFPU = p(UseFPU)val usingFDivSqrt = p(FDivSqrt)val usingRoCC = !p(BuildRoCC).isEmptyval usingFastMulDiv = p(FastMulDiv)val fastLoadWord = p(FastLoadWord)val fastLoadByte = p(FastLoadByte)val retireWidth = p(RetireWidth)val fetchWidth = p(FetchWidth)val coreInstBits = p(CoreInstBits)val coreInstBytes = coreInstBits/8val coreDataBits = xLenval coreDataBytes = coreDataBits/8val coreDCacheReqTagBits = 7 + (2 + (if(!usingRoCC) 0 else 1))val coreMaxAddrBits = math.max(ppnBits,vpnBits+1) + pgIdxBitsval vaddrBitsExtended = vaddrBits + (vaddrBits < xLen).toIntval mmioBase = p(MMIOBase)val nCustomMrwCsrs = p(NCustomMRWCSRs)val roccCsrs = if (p(BuildRoCC).isEmpty) Nilelse p(BuildRoCC).flatMap(_.csrs)val nRoccCsrs = p(RoccNCSRs)val nCores = p(HtifKey).nCoresval mtvecInit = p(MtvecInit)val startAddr = mtvecInit + 0x100// Print out log of committed instructions and their writeback values.// Requires post-processing due to out-of-order writebacks.val enableCommitLog = falseval usingPerfCounters = p(UsePerfCounters)if (fastLoadByte) require(fastLoadWord)}abstract class CoreModule(implicit val p: Parameters) extends Modulewith HasCoreParametersabstract class CoreBundle(implicit val p: Parameters) extends ParameterizedBundle()(p)with HasCoreParametersclass RegFile(n: Int, w: Int, zero: Boolean = false) {private val rf = Mem(n, UInt(width = w))private def access(addr: UInt) = rf(~addr(log2Up(n)-1,0))private val reads = collection.mutable.ArrayBuffer[(UInt,UInt)]()private var canRead = truedef read(addr: UInt) = {require(canRead)reads += addr -> Wire(UInt())reads.last._2 := Mux(Bool(zero) && addr === UInt(0), UInt(0), access(addr))reads.last._2}def write(addr: UInt, data: UInt) = {canRead = falsewhen (addr =/= UInt(0)) {access(addr) := datafor ((raddr, rdata) <- reads)when (addr === raddr) { rdata := data }}}}object ImmGen {def apply(sel: UInt, inst: UInt) = {val sign = Mux(sel === IMM_Z, SInt(0), inst(31).toSInt)val b30_20 = Mux(sel === IMM_U, inst(30,20).toSInt, sign)val b19_12 = Mux(sel =/= IMM_U && sel =/= IMM_UJ, sign, inst(19,12).toSInt)val b11 = Mux(sel === IMM_U || sel === IMM_Z, SInt(0),Mux(sel === IMM_UJ, inst(20).toSInt,Mux(sel === IMM_SB, inst(7).toSInt, sign)))val b10_5 = Mux(sel === IMM_U || sel === IMM_Z, Bits(0), inst(30,25))val b4_1 = Mux(sel === IMM_U, Bits(0),Mux(sel === IMM_S || sel === IMM_SB, inst(11,8),Mux(sel === IMM_Z, inst(19,16), inst(24,21))))val b0 = Mux(sel === IMM_S, inst(7),Mux(sel === IMM_I, inst(20),Mux(sel === IMM_Z, inst(15), Bits(0))))Cat(sign, b30_20, b19_12, b11, b10_5, b4_1, b0).toSInt}}class Rocket(implicit p: Parameters) extends CoreModule()(p) {val io = new Bundle {val host = new HtifIOval imem = new FrontendIO()(p.alterPartial({case CacheName => "L1I" }))val dmem = new HellaCacheIO()(p.alterPartial({ case CacheName => "L1D" }))val ptw = new DatapathPTWIO().flipval fpu = new FPUIO().flipval rocc = new RoCCInterface().flip}var decode_table = XDecode.tableif (usingFPU) decode_table ++= FDecode.tableif (usingFPU && usingFDivSqrt) decode_table ++= FDivSqrtDecode.tableif (usingRoCC) decode_table ++= RoCCDecode.tableval ex_ctrl = Reg(new IntCtrlSigs)val mem_ctrl = Reg(new IntCtrlSigs)val wb_ctrl = Reg(new IntCtrlSigs)val ex_reg_xcpt_interrupt = Reg(Bool())val ex_reg_valid = Reg(Bool())val ex_reg_btb_hit = Reg(Bool())val ex_reg_btb_resp = Reg(io.imem.btb_resp.bits)val ex_reg_xcpt = Reg(Bool())val ex_reg_flush_pipe = Reg(Bool())val ex_reg_load_use = Reg(Bool())val ex_reg_cause = Reg(UInt())val ex_reg_pc = Reg(UInt())val ex_reg_inst = Reg(Bits())val mem_reg_xcpt_interrupt = Reg(Bool())val mem_reg_valid = Reg(Bool())val mem_reg_btb_hit = Reg(Bool())val mem_reg_btb_resp = Reg(io.imem.btb_resp.bits)val mem_reg_xcpt = Reg(Bool())val mem_reg_replay = Reg(Bool())val mem_reg_flush_pipe = Reg(Bool())val mem_reg_cause = Reg(UInt())val mem_reg_slow_bypass = Reg(Bool())val mem_reg_pc = Reg(UInt())val mem_reg_inst = Reg(Bits())val mem_reg_wdata = Reg(Bits())val mem_reg_rs2 = Reg(Bits())val take_pc_mem = Wire(Bool())val wb_reg_valid = Reg(Bool())val wb_reg_xcpt = Reg(Bool())val wb_reg_replay = Reg(Bool())val wb_reg_cause = Reg(UInt())val wb_reg_rocc_pending = Reg(init=Bool(false))val wb_reg_pc = Reg(UInt())val wb_reg_inst = Reg(Bits())val wb_reg_wdata = Reg(Bits())val wb_reg_rs2 = Reg(Bits())val take_pc_wb = Wire(Bool())//SHval reg_ll_wdata_postponed = Reg(Bits())val reg_ll_waddr_postponed = Reg(Bits())val reg_ll_wen_postponed = Reg(init = Bool(false))val take_pc_mem_wb = take_pc_wb || take_pc_memval take_pc = take_pc_mem_wb// decode stageval id_pc = io.imem.resp.bits.pcval id_inst = io.imem.resp.bits.data(0).toBits; require(fetchWidth == 1)val id_ctrl = Wire(new IntCtrlSigs()).decode(id_inst, decode_table)val id_raddr3 = id_inst(31,27)val id_raddr2 = id_inst(24,20)val id_raddr1 = id_inst(19,15)val id_waddr = id_inst(11,7)val id_load_use = Wire(Bool())val id_reg_fence = Reg(init=Bool(false))val id_ren = IndexedSeq(id_ctrl.rxs1, id_ctrl.rxs2)val id_raddr = IndexedSeq(id_raddr1, id_raddr2)val rf = new RegFile(31, xLen)val id_rs = id_raddr.map(rf.read _)val ctrl_killd = Wire(Bool())val csr = Module(new CSRFile)val id_csr_en = id_ctrl.csr =/= CSR.Nval id_system_insn = id_ctrl.csr === CSR.Ival id_csr_ren = (id_ctrl.csr === CSR.S || id_ctrl.csr === CSR.C) && id_raddr1 === UInt(0)val id_csr = Mux(id_csr_ren, CSR.R, id_ctrl.csr)val id_csr_addr = id_inst(31,20)// this is overly conservativeval safe_csrs = CSRs.sscratch :: CSRs.sepc :: CSRs.mscratch :: CSRs.mepc :: CSRs.mcause :: CSRs.mbadaddr :: Nilval legal_csrs = collection.mutable.LinkedHashSet(CSRs.all:_*)val id_csr_flush = id_system_insn || (id_csr_en && !id_csr_ren && !DecodeLogic(id_csr_addr, safe_csrs.map(UInt(_)), (legal_csrs -- safe_csrs).toList.map(UInt(_))))val id_illegal_insn = !id_ctrl.legal ||id_ctrl.fp && !csr.io.status.fs.orR ||id_ctrl.rocc && !csr.io.status.xs.orR// stall decode for fences (now, for AMO.aq; later, for AMO.rl and FENCE)val id_amo_aq = id_inst(26)val id_amo_rl = id_inst(25)val id_fence_next = id_ctrl.fence || id_ctrl.amo && id_amo_rlval id_mem_busy = !io.dmem.ordered || io.dmem.req.validval id_rocc_busy = Bool(usingRoCC) &&(io.rocc.busy || ex_reg_valid && ex_ctrl.rocc ||mem_reg_valid && mem_ctrl.rocc || wb_reg_valid && wb_ctrl.rocc)id_reg_fence := id_fence_next || id_reg_fence && id_mem_busyval id_do_fence = id_rocc_busy && id_ctrl.fence ||id_mem_busy && (id_ctrl.amo && id_amo_aq || id_ctrl.fence_i || id_reg_fence && (id_ctrl.mem || id_ctrl.rocc) || id_csr_en)val (id_xcpt, id_cause) = checkExceptions(List((csr.io.interrupt, csr.io.interrupt_cause),(io.imem.resp.bits.xcpt_if, UInt(Causes.fault_fetch)),(id_illegal_insn, UInt(Causes.illegal_instruction))))val dcache_bypass_data =if (fastLoadByte) io.dmem.resp.bits.dataelse if (fastLoadWord) io.dmem.resp.bits.data_word_bypasselse wb_reg_wdata// detect bypass opportunitiesval ex_waddr = ex_reg_inst(11,7)val mem_waddr = mem_reg_inst(11,7)val wb_waddr = wb_reg_inst(11,7)val bypass_sources = IndexedSeq((Bool(true), UInt(0), UInt(0)), // treat reading x0 as a bypass(ex_reg_valid && ex_ctrl.wxd, ex_waddr, mem_reg_wdata),(mem_reg_valid && mem_ctrl.wxd && !mem_ctrl.mem, mem_waddr, wb_reg_wdata),(mem_reg_valid && mem_ctrl.wxd, mem_waddr, dcache_bypass_data))val id_bypass_src = id_raddr.map(raddr => bypass_sources.map(s => s._1 && s._2 === raddr))// execute stageval bypass_mux = Vec(bypass_sources.map(_._3))val ex_reg_rs_bypass = Reg(Vec(id_raddr.size, Bool()))val ex_reg_rs_lsb = Reg(Vec(id_raddr.size, UInt()))val ex_reg_rs_msb = Reg(Vec(id_raddr.size, UInt()))val ex_rs = for (i <- 0 until id_raddr.size)yield Mux(ex_reg_rs_bypass(i), bypass_mux(ex_reg_rs_lsb(i)), Cat(ex_reg_rs_msb(i), ex_reg_rs_lsb(i)))val ex_imm = ImmGen(ex_ctrl.sel_imm, ex_reg_inst)val ex_op1 = MuxLookup(ex_ctrl.sel_alu1, SInt(0), Seq(A1_RS1 -> ex_rs(0).toSInt,A1_PC -> ex_reg_pc.toSInt))val ex_op2 = MuxLookup(ex_ctrl.sel_alu2, SInt(0), Seq(A2_RS2 -> ex_rs(1).toSInt,A2_IMM -> ex_imm,A2_FOUR -> SInt(4)))val alu = Module(new ALU)alu.io.dw := ex_ctrl.alu_dwalu.io.fn := ex_ctrl.alu_fnalu.io.in2 := ex_op2.toUIntalu.io.in1 := ex_op1.toUInt// multiplier and dividerval div = Module(new MulDiv(width = xLen,unroll = if(usingFastMulDiv) 8 else 1,earlyOut = usingFastMulDiv))div.io.req.valid := ex_reg_valid && ex_ctrl.divdiv.io.req.bits.dw := ex_ctrl.alu_dwdiv.io.req.bits.fn := ex_ctrl.alu_fndiv.io.req.bits.in1 := ex_rs(0)div.io.req.bits.in2 := ex_rs(1)div.io.req.bits.tag := ex_waddrex_reg_valid := !ctrl_killdex_reg_xcpt := !ctrl_killd && id_xcptex_reg_xcpt_interrupt := csr.io.interrupt && !take_pc && io.imem.resp.validwhen (id_xcpt) { ex_reg_cause := id_cause }when (!ctrl_killd) {ex_ctrl := id_ctrlex_ctrl.csr := id_csrex_reg_btb_hit := io.imem.btb_resp.validwhen (io.imem.btb_resp.valid) { ex_reg_btb_resp := io.imem.btb_resp.bits }ex_reg_flush_pipe := id_ctrl.fence_i || id_csr_flushex_reg_load_use := id_load_usefor (i <- 0 until id_raddr.size) {val do_bypass = id_bypass_src(i).reduce(_||_)val bypass_src = PriorityEncoder(id_bypass_src(i))ex_reg_rs_bypass(i) := do_bypassex_reg_rs_lsb(i) := bypass_srcwhen (id_ren(i) && !do_bypass) {ex_reg_rs_lsb(i) := id_rs(i)(bypass_src.getWidth-1,0)ex_reg_rs_msb(i) := id_rs(i) >> bypass_src.getWidth}}}when (!ctrl_killd || csr.io.interrupt) {ex_reg_inst := id_instex_reg_pc := id_pc}// replay inst in ex stage?val wb_dcache_miss = wb_ctrl.mem && !io.dmem.resp.validval replay_ex_structural = ex_ctrl.mem && !io.dmem.req.ready ||ex_ctrl.div && !div.io.req.readyval replay_ex_load_use = wb_dcache_miss && ex_reg_load_useval replay_ex = ex_reg_valid && (replay_ex_structural || replay_ex_load_use)val ctrl_killx = take_pc_mem_wb || replay_ex || !ex_reg_valid// detect 2-cycle load-use delay for LB/LH/SCval ex_slow_bypass = ex_ctrl.mem_cmd === M_XSC || Vec(MT_B, MT_BU, MT_H, MT_HU).contains(ex_ctrl.mem_type)val (ex_xcpt, ex_cause) = checkExceptions(List((ex_reg_xcpt_interrupt || ex_reg_xcpt, ex_reg_cause),(ex_ctrl.fp && io.fpu.illegal_rm, UInt(Causes.illegal_instruction))))// memory stageval mem_br_taken = mem_reg_wdata(0)val mem_br_target = mem_reg_pc.toSInt +Mux(mem_ctrl.branch && mem_br_taken, ImmGen(IMM_SB, mem_reg_inst),Mux(mem_ctrl.jal, ImmGen(IMM_UJ, mem_reg_inst), SInt(4)))val mem_int_wdata = Mux(mem_ctrl.jalr, mem_br_target, mem_reg_wdata.toSInt).toUIntval mem_npc = (Mux(mem_ctrl.jalr, Cat(vaSign(mem_reg_wdata, mem_reg_wdata), mem_reg_wdata(vaddrBits-1,0)).toSInt, mem_br_target) & SInt(-2)).toUIntval mem_wrong_npc = mem_npc =/= ex_reg_pc || !ex_reg_validval mem_npc_misaligned = mem_npc(1)val mem_misprediction = mem_wrong_npc && mem_reg_valid && (mem_ctrl.branch || mem_ctrl.jalr || mem_ctrl.jal)val want_take_pc_mem = mem_reg_valid && (mem_misprediction || mem_reg_flush_pipe)take_pc_mem := want_take_pc_mem && !mem_npc_misalignedmem_reg_valid := !ctrl_killxmem_reg_replay := !take_pc_mem_wb && replay_exmem_reg_xcpt := !ctrl_killx && ex_xcptmem_reg_xcpt_interrupt := !take_pc_mem_wb && ex_reg_xcpt_interruptwhen (ex_xcpt) { mem_reg_cause := ex_cause }when (ex_reg_valid || ex_reg_xcpt_interrupt) {mem_ctrl := ex_ctrlmem_reg_btb_hit := ex_reg_btb_hitwhen (ex_reg_btb_hit) { mem_reg_btb_resp := ex_reg_btb_resp }mem_reg_flush_pipe := ex_reg_flush_pipemem_reg_slow_bypass := ex_slow_bypassmem_reg_inst := ex_reg_instmem_reg_pc := ex_reg_pcmem_reg_wdata := alu.io.outwhen (ex_ctrl.rxs2 && (ex_ctrl.mem || ex_ctrl.rocc)) {mem_reg_rs2 := ex_rs(1)}}val (mem_xcpt, mem_cause) = checkExceptions(List((mem_reg_xcpt_interrupt || mem_reg_xcpt, mem_reg_cause),(want_take_pc_mem && mem_npc_misaligned, UInt(Causes.misaligned_fetch)),(mem_reg_valid && mem_ctrl.mem && io.dmem.xcpt.ma.st, UInt(Causes.misaligned_store)),(mem_reg_valid && mem_ctrl.mem && io.dmem.xcpt.ma.ld, UInt(Causes.misaligned_load)),(mem_reg_valid && mem_ctrl.mem && io.dmem.xcpt.pf.st, UInt(Causes.fault_store)),(mem_reg_valid && mem_ctrl.mem && io.dmem.xcpt.pf.ld, UInt(Causes.fault_load))))val dcache_kill_mem = mem_reg_valid && mem_ctrl.wxd && io.dmem.replay_next.valid // structural hazard on writeback portval fpu_kill_mem = mem_reg_valid && mem_ctrl.fp && io.fpu.nack_memval replay_mem = dcache_kill_mem || mem_reg_replay || fpu_kill_memval killm_common = dcache_kill_mem || take_pc_wb || mem_reg_xcpt || !mem_reg_validdiv.io.kill := killm_common && Reg(next = div.io.req.fire())val ctrl_killm = killm_common || mem_xcpt || fpu_kill_mem// writeback stagewb_reg_valid := !ctrl_killmwb_reg_replay := replay_mem && !take_pc_wbwb_reg_xcpt := mem_xcpt && !take_pc_wbwhen (mem_xcpt) { wb_reg_cause := mem_cause }when (mem_reg_valid || mem_reg_replay || mem_reg_xcpt_interrupt) {wb_ctrl := mem_ctrlwb_reg_wdata := Mux(mem_ctrl.fp && mem_ctrl.wxd, io.fpu.toint_data, mem_int_wdata)when (mem_ctrl.rocc) {wb_reg_rs2 := mem_reg_rs2}wb_reg_inst := mem_reg_instwb_reg_pc := mem_reg_pc}val wb_set_sboard = wb_ctrl.div || wb_dcache_miss || wb_ctrl.roccval replay_wb_common = io.dmem.resp.bits.nack || wb_reg_replayval wb_rocc_val = wb_reg_valid && wb_ctrl.rocc && !replay_wb_commonval replay_wb = replay_wb_common || wb_reg_valid && wb_ctrl.rocc && !io.rocc.cmd.readyval wb_xcpt = wb_reg_xcpt || csr.io.csr_xcpttake_pc_wb := replay_wb || wb_xcpt || csr.io.eretwhen (wb_rocc_val) { wb_reg_rocc_pending := !io.rocc.cmd.ready }when (wb_reg_xcpt) { wb_reg_rocc_pending := Bool(false) }// writeback arbitrationval dmem_resp_xpu = !io.dmem.resp.bits.tag(0).toBoolval dmem_resp_fpu = io.dmem.resp.bits.tag(0).toBoolval dmem_resp_waddr = io.dmem.resp.bits.tag.toUInt()(5,1)val dmem_resp_valid = io.dmem.resp.valid && io.dmem.resp.bits.has_dataval dmem_resp_replay = io.dmem.resp.bits.replay && io.dmem.resp.bits.has_datadiv.io.resp.ready := !(wb_reg_valid && wb_ctrl.wxd)val ll_wdata = Wire(init = div.io.resp.bits.data)val ll_waddr = Wire(init = div.io.resp.bits.tag)val ll_wen = Wire(init = div.io.resp.fire())if (usingRoCC) {io.rocc.resp.ready := !(wb_reg_valid && wb_ctrl.wxd)when (io.rocc.resp.fire()) {div.io.resp.ready := Bool(false)ll_wdata := io.rocc.resp.bits.datall_waddr := io.rocc.resp.bits.rdll_wen := Bool(true)}}when (dmem_resp_replay && dmem_resp_xpu) {div.io.resp.ready := Bool(false)if (usingRoCC)io.rocc.resp.ready := Bool(false)ll_waddr := dmem_resp_waddrll_wen := Bool(true)}val wb_valid = wb_reg_valid && !replay_wb && !csr.io.csr_xcptval wb_wen = wb_valid && wb_ctrl.wxd//SHval stall_wen = ll_wen && wb_wen// && (wb_waddr === UInt(0x1))when (stall_wen) {reg_ll_wen_postponed := Bool(true)reg_ll_waddr_postponed := wb_waddrreg_ll_wdata_postponed := wb_reg_wdata}when (!wb_wen || (!ll_wen && wb_wen && wb_waddr === reg_ll_waddr_postponed)) {reg_ll_wen_postponed := Bool(false)reg_ll_waddr_postponed := UInt(0)reg_ll_wdata_postponed := UInt(0)}val rf_wen = wb_wen || ll_wen || reg_ll_wen_postponedval rf_waddr = Mux(ll_wen, ll_waddr,Mux(wb_wen, wb_waddr,reg_ll_waddr_postponed))val rf_wdata = Mux(dmem_resp_valid && dmem_resp_xpu, io.dmem.resp.bits.data,Mux(ll_wen, ll_wdata,Mux(wb_ctrl.csr =/= CSR.N, csr.io.rw.rdata,Mux(wb_wen, wb_reg_wdata,reg_ll_wdata_postponed))))//val rf_wen = wb_wen || ll_wen//val rf_waddr = Mux(ll_wen, ll_waddr, wb_waddr)//val rf_wdata = Mux(dmem_resp_valid && dmem_resp_xpu, io.dmem.resp.bits.data,// Mux(ll_wen, ll_wdata,// Mux(wb_ctrl.csr != CSR.N, csr.io.rw.rdata,// wb_reg_wdata)))when (rf_wen) { rf.write(rf_waddr, rf_wdata) }// hook up control/status regfilecsr.io.exception := wb_reg_xcptcsr.io.cause := wb_reg_causecsr.io.retire := wb_validio.host <> csr.io.hostio.fpu.fcsr_rm := csr.io.fcsr_rmcsr.io.fcsr_flags := io.fpu.fcsr_flagscsr.io.rocc <> io.rocccsr.io.pc := wb_reg_pccsr.io.uarch_counters.foreach(_ := Bool(false))io.ptw.ptbr := csr.io.ptbrio.ptw.invalidate := csr.io.fatcio.ptw.status := csr.io.statuscsr.io.rw.addr := wb_reg_inst(31,20)csr.io.rw.cmd := Mux(wb_reg_valid, wb_ctrl.csr, CSR.N)csr.io.rw.wdata := wb_reg_wdataval hazard_targets = Seq((id_ctrl.rxs1 && id_raddr1 =/= UInt(0), id_raddr1),(id_ctrl.rxs2 && id_raddr2 =/= UInt(0), id_raddr2),(id_ctrl.wxd && id_waddr =/= UInt(0), id_waddr))val fp_hazard_targets = Seq((io.fpu.dec.ren1, id_raddr1),(io.fpu.dec.ren2, id_raddr2),(io.fpu.dec.ren3, id_raddr3),(io.fpu.dec.wen, id_waddr))val sboard = new Scoreboard(32)sboard.clear(ll_wen, ll_waddr)val id_sboard_hazard = checkHazards(hazard_targets, sboard.readBypassed _)sboard.set(wb_set_sboard && wb_wen, wb_waddr)// stall for RAW/WAW hazards on CSRs, loads, AMOs, and mul/div in execute stage.val ex_cannot_bypass = ex_ctrl.csr =/= CSR.N || ex_ctrl.jalr || ex_ctrl.mem || ex_ctrl.div || ex_ctrl.fp || ex_ctrl.roccval data_hazard_ex = ex_ctrl.wxd && checkHazards(hazard_targets, _ === ex_waddr)val fp_data_hazard_ex = ex_ctrl.wfd && checkHazards(fp_hazard_targets, _ === ex_waddr)val id_ex_hazard = ex_reg_valid && (data_hazard_ex && ex_cannot_bypass || fp_data_hazard_ex)// stall for RAW/WAW hazards on CSRs, LB/LH, and mul/div in memory stage.val mem_mem_cmd_bh =if (fastLoadWord) Bool(!fastLoadByte) && mem_reg_slow_bypasselse Bool(true)val mem_cannot_bypass = mem_ctrl.csr =/= CSR.N || mem_ctrl.mem && mem_mem_cmd_bh || mem_ctrl.div || mem_ctrl.fp || mem_ctrl.roccval data_hazard_mem = mem_ctrl.wxd && checkHazards(hazard_targets, _ === mem_waddr)val fp_data_hazard_mem = mem_ctrl.wfd && checkHazards(fp_hazard_targets, _ === mem_waddr)val id_mem_hazard = mem_reg_valid && (data_hazard_mem && mem_cannot_bypass || fp_data_hazard_mem)id_load_use := mem_reg_valid && data_hazard_mem && mem_ctrl.mem// stall for RAW/WAW hazards on load/AMO misses and mul/div in writeback.val data_hazard_wb = wb_ctrl.wxd && checkHazards(hazard_targets, _ === wb_waddr)val fp_data_hazard_wb = wb_ctrl.wfd && checkHazards(fp_hazard_targets, _ === wb_waddr)val id_wb_hazard = wb_reg_valid && (data_hazard_wb && wb_set_sboard || fp_data_hazard_wb)val id_stall_fpu = if (usingFPU) {val fp_sboard = new Scoreboard(32)fp_sboard.set((wb_dcache_miss && wb_ctrl.wfd || io.fpu.sboard_set) && wb_valid, wb_waddr)fp_sboard.clear(dmem_resp_replay && dmem_resp_fpu, dmem_resp_waddr)fp_sboard.clear(io.fpu.sboard_clr, io.fpu.sboard_clra)id_csr_en && !io.fpu.fcsr_rdy || checkHazards(fp_hazard_targets, fp_sboard.read _)} else Bool(false)val ctrl_stalld =id_ex_hazard || id_mem_hazard || id_wb_hazard || id_sboard_hazard ||id_ctrl.fp && id_stall_fpu ||id_ctrl.mem && !io.dmem.req.ready ||Bool(usingRoCC) && wb_reg_rocc_pending && id_ctrl.rocc && !io.rocc.cmd.ready ||id_do_fence ||csr.io.csr_stall ||stall_wen || reg_ll_wen_postponed //SHctrl_killd := !io.imem.resp.valid || take_pc || ctrl_stalld || csr.io.interruptio.imem.req.valid := take_pcio.imem.req.bits.pc :=Mux(wb_xcpt || csr.io.eret, csr.io.evec, // exception or [m|s]retMux(replay_wb, wb_reg_pc, // replaymem_npc)).toUInt // mispredicted branchio.imem.invalidate := wb_reg_valid && wb_ctrl.fence_iio.imem.resp.ready := !ctrl_stalld || csr.io.interruptio.imem.btb_update.valid := mem_reg_valid && !mem_npc_misaligned && mem_wrong_npc && ((mem_ctrl.branch && mem_br_taken) || mem_ctrl.jalr || mem_ctrl.jal) && !take_pc_wbio.imem.btb_update.bits.isJump := mem_ctrl.jal || mem_ctrl.jalrio.imem.btb_update.bits.isReturn := mem_ctrl.jalr && mem_reg_inst(19,15) === BitPat("b00??1")io.imem.btb_update.bits.pc := mem_reg_pcio.imem.btb_update.bits.target := io.imem.req.bits.pcio.imem.btb_update.bits.br_pc := mem_reg_pcio.imem.btb_update.bits.prediction.valid := mem_reg_btb_hitio.imem.btb_update.bits.prediction.bits := mem_reg_btb_respio.imem.bht_update.valid := mem_reg_valid && mem_ctrl.branch && !take_pc_wbio.imem.bht_update.bits.pc := mem_reg_pcio.imem.bht_update.bits.taken := mem_br_takenio.imem.bht_update.bits.mispredict := mem_wrong_npcio.imem.bht_update.bits.prediction := io.imem.btb_update.bits.predictionio.imem.ras_update.valid := mem_reg_valid && io.imem.btb_update.bits.isJump && !mem_npc_misaligned && !take_pc_wbio.imem.ras_update.bits.returnAddr := mem_int_wdataio.imem.ras_update.bits.isCall := mem_ctrl.wxd && mem_waddr(0)io.imem.ras_update.bits.isReturn := io.imem.btb_update.bits.isReturnio.imem.ras_update.bits.prediction := io.imem.btb_update.bits.predictionio.fpu.valid := !ctrl_killd && id_ctrl.fpio.fpu.killx := ctrl_killxio.fpu.killm := killm_commonio.fpu.inst := id_instio.fpu.fromint_data := ex_rs(0)io.fpu.dmem_resp_val := dmem_resp_valid && dmem_resp_fpuio.fpu.dmem_resp_data := io.dmem.resp.bits.data_word_bypassio.fpu.dmem_resp_type := io.dmem.resp.bits.typio.fpu.dmem_resp_tag := dmem_resp_waddrio.dmem.req.valid := ex_reg_valid && ex_ctrl.memio.dmem.req.bits.kill := killm_common || mem_xcptio.dmem.req.bits.cmd := ex_ctrl.mem_cmdio.dmem.req.bits.typ := ex_ctrl.mem_typeio.dmem.req.bits.phys := Bool(false)io.dmem.req.bits.addr := Cat(vaSign(ex_rs(0), alu.io.adder_out), alu.io.adder_out(vaddrBits-1,0)).toUIntio.dmem.req.bits.tag := Cat(ex_waddr, ex_ctrl.fp)io.dmem.req.bits.data := Mux(mem_ctrl.fp, io.fpu.store_data, mem_reg_rs2)require(coreDCacheReqTagBits >= 6)io.dmem.invalidate_lr := wb_xcptio.rocc.cmd.valid := wb_rocc_valio.rocc.exception := wb_xcpt && csr.io.status.xs.orRio.rocc.s := csr.io.status.prv.orR // should we just pass all of mstatus?io.rocc.cmd.bits.inst := new RoCCInstruction().fromBits(wb_reg_inst)io.rocc.cmd.bits.rs1 := wb_reg_wdataio.rocc.cmd.bits.rs2 := wb_reg_rs2if (enableCommitLog) {val pc = Wire(SInt(width=64))pc := wb_reg_pcval inst = wb_reg_instval rd = RegNext(RegNext(RegNext(id_waddr)))val wfd = wb_ctrl.wfdval wxd = wb_ctrl.wxdval has_data = wb_wen && !wb_set_sboardval priv = csr.io.status.prvwhen (wb_valid) {when (wfd) {printf ("%d 0x%x (0x%x) f%d p%d 0xXXXXXXXXXXXXXXXX\n", priv, pc, inst, rd, rd+UInt(32))}.elsewhen (wxd && rd =/= UInt(0) && has_data) {printf ("%d 0x%x (0x%x) x%d 0x%x\n", priv, pc, inst, rd, rf_wdata)}.elsewhen (wxd && rd =/= UInt(0) && !has_data) {printf ("%d 0x%x (0x%x) x%d p%d 0xXXXXXXXXXXXXXXXX\n", priv, pc, inst, rd, rd)}.otherwise {printf ("%d 0x%x (0x%x)\n", priv, pc, inst)}}when (ll_wen && rf_waddr =/= UInt(0)) {printf ("x%d p%d 0x%x\n", rf_waddr, rf_waddr, rf_wdata)}}else {printf("C%d: %d [%d] pc=[%x] W[r%d=%x][%d] R[r%d=%x] R[r%d=%x] inst=[%x] DASM(%x)\n",io.host.id, csr.io.time(32,0), wb_valid, wb_reg_pc,Mux(rf_wen, rf_waddr, UInt(0)), rf_wdata, rf_wen,wb_reg_inst(19,15), Reg(next=Reg(next=ex_rs(0))),wb_reg_inst(24,20), Reg(next=Reg(next=ex_rs(1))),wb_reg_inst, wb_reg_inst)}def checkExceptions(x: Seq[(Bool, UInt)]) =(x.map(_._1).reduce(_||_), PriorityMux(x))def checkHazards(targets: Seq[(Bool, UInt)], cond: UInt => Bool) =targets.map(h => h._1 && cond(h._2)).reduce(_||_)def vaSign(a0: UInt, ea: UInt) = {// efficient means to compress 64-bit VA into vaddrBits+1 bits// (VA is bad if VA(vaddrBits) != VA(vaddrBits-1))val a = a0 >> vaddrBits-1val e = ea(vaddrBits,vaddrBits-1)Mux(a === UInt(0) || a === UInt(1), e =/= UInt(0),Mux(a.toSInt === SInt(-1) || a.toSInt === SInt(-2), e.toSInt === SInt(-1),e(0)))}class Scoreboard(n: Int){def set(en: Bool, addr: UInt): Unit = update(en, _next | mask(en, addr))def clear(en: Bool, addr: UInt): Unit = update(en, _next & ~mask(en, addr))def read(addr: UInt): Bool = r(addr)def readBypassed(addr: UInt): Bool = _next(addr)private val r = Reg(init=Bits(0, n))private var _next = rprivate var ens = Bool(false)private def mask(en: Bool, addr: UInt) = Mux(en, UInt(1) << addr, UInt(0))private def update(en: Bool, update: UInt) = {_next = updateens = ens || enwhen (ens) { r := _next }}}}
