OpenCores
URL https://opencores.org/ocsvn/raptor64/raptor64/trunk

Subversion Repositories raptor64

[/] [raptor64/] [trunk/] [rtl/] [verilog/] [Raptor64sc.v] - Rev 50

Go to most recent revision | Compare with Previous | Blame | View Log

`timescale 1ns / 1ps
// ============================================================================
//        __
//   \\__/ o\    (C) 2011-2013  Robert Finch, Stratford
//    \  __ /    All rights reserved.
//     \/_//     robfinch<remove>@opencores.org
//       ||
//
// Raptor64sc.v
//  - 64 bit CPU
//
// This source file is free software: you can redistribute it and/or modify 
// it under the terms of the GNU Lesser General Public License as published 
// by the Free Software Foundation, either version 3 of the License, or     
// (at your option) any later version.                                      
//                                                                          
// This source file is distributed in the hope that it will be useful,      
// but WITHOUT ANY WARRANTY; without even the implied warranty of           
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the            
// GNU General Public License for more details.                             
//                                                                          
// You should have received a copy of the GNU General Public License        
// along with this program.  If not, see <http://www.gnu.org/licenses/>.    
//                                                                          
// 15848 LUT's / 3591 ff's / 48.215 MHz
// 29 Block RAMs
// ============================================================================
//
//`define ADDRESS_RESERVATION	1
//`define FLOATING_POINT		1
//`define BTB					1
//`define TLB		1
//`define SIMD		1
`define SEGMENTATION	1
 
`define RESET_VECTOR	64'hFFFF_FFFF_FFFF_FFF0
 
`define EX_NON			9'd000
`define EX_TRAP			9'd32	// Trap exception
`define EX_IRQ			9'd448	// base IRQ interrupt
`define EX_DBZ			9'd488	// divide by zero
`define EX_OFL			9'd489	// overflow
`define EX_UNIMP_INSN	9'd495	// unimplemented instruction
`define EX_PRIV			9'd496	// priviledge violation
`define EX_TLBD			9'd506	// TLB exception - data
`define EX_TLBI			9'd507	// TLB exception - ifetch
`define EX_DBERR		9'd508	// Bus Error - load or store or I/O
`define EX_IBERR		9'd509	// Bus Error - instruction fetch
`define EX_NMI			9'd510	// non-maskable interrupt
`define EX_RST			9'd511	// Reset
 
`include "Raptor64_opcodes.v"
 
module Raptor64sc(rst_i, clk_i, nmi_i, irq_i, irq_no, bte_o, cti_o, bl_o, iocyc_o,
	cyc_o, stb_o, ack_i, err_i, we_o, sel_o, rsv_o, adr_o, dat_i, dat_o, sys_adv, sys_adr,
	advanceI, advanceR, advanceX, advanceM1, advanceM2, advanceW, advanceT
);
parameter IDLE = 5'd1;
parameter ICACT = 5'd2;
parameter ICACT1 = 5'd4;
parameter ICACT2 = 5'd5;
parameter DCIDLE = 5'd20;
parameter DCACT = 5'd21;
parameter AMSB = 31;
parameter RESET = 4'd0;
parameter RUN = 4'd1;
input rst_i;
input clk_i;
input nmi_i;
input irq_i;
input [8:0] irq_no;
 
output [1:0] bte_o;		// burst type
reg [1:0] bte_o;
output [2:0] cti_o;		// cycle type
reg [2:0] cti_o;
output [4:0] bl_o;		// burst length (non-WISHBONE)
reg [4:0] bl_o;
output iocyc_o;			// I/O cycle is valid
reg iocyc_o;
output cyc_o;			// cycle is valid
reg cyc_o;
output stb_o;			// data strobe
reg stb_o;
input ack_i;			// data transfer acknowledge
input err_i;			// bus error
output we_o;			// write enable
reg we_o;
output [7:0] sel_o;		// byte lane selects
reg [7:0] sel_o;
output rsv_o;			// reserve the address (non-WISHBONE)
reg rsv_o;
output [63:0] adr_o;	// address
reg [63:0] adr_o;
input [63:0] dat_i;		// data input
output [63:0] dat_o;	// data output
reg [63:0] dat_o;
 
input sys_adv;
input [63:5] sys_adr;
 
output advanceI;
output advanceR;
output advanceX;
output advanceM1;
output advanceM2;
output advanceW;
output advanceT;
 
wire clk;	
reg [3:0] state;
reg [5:0] fltctr;
wire fltdone = fltctr==6'd0;
reg bu_im;			// interrupt mask
reg im1;			// temporary interrupt mask for LM/SM
reg [7:0] ie_fuse;	// interrupt enable fuse
wire im = ~ie_fuse[7];
reg [1:0] rm;		// fp rounding mode
reg FXE;			// fp exception enable
wire KernelMode;
wire [31:0] sr = {bu_im,15'd0,im,1'b0,KernelMode,FXE,2'b00,10'b0};
reg [31:0] dIR,d1IR,xIR,m1IR,m2IR,wIR;
reg [31:0] ndIR;		// next dIR
reg [63:0] pc;			// ipc
wire [63:0] pchistoric;
reg pccap;				// flag 1=capture PC history
reg [63:0] ErrorEPC;
reg [63:0] EPC [0:15];	// Exception return address
reg [63:0] IPC [0:15];	// Interrupt return address
`ifdef SEGMENTATION
reg [63:16] CS [0:15];	// Code segment
reg [63:16] DS [0:15];	// Data segment
reg [63:16] SS [0:15];	// Stack segment
reg [63:16] ES [0:15];	// BSS segment
`endif
reg dStatusHWI,xStatusHWI,m1StatusHWI,m2StatusHWI;
reg dIm,xIm,m1Im,m2Im;
reg dNmi,xNmi,m1Nmi,m2Nmi,wNmi;
reg [15:0] StatusEXL;	// 1= context in exception state
reg [63:0] dpc,d1pc,xpc,m1pc,m2pc,wpc;		// PC's associated with instruction in pipeline
wire [63:0] rfoa,rfob,rfoc;		// register file outputs
wire [8:0] dRa,dRb,dRc;
reg [8:0] xRt,wRt,m1Rt,m2Rt,tRt;	// target register
reg [63:0] ea;			// effective data address
reg [4:0] cstate;		// cache state
reg dbranch_taken,xbranch_taken;	// flag: 1=branch taken
reg [63:0] mutex_gate;
reg [63:0] TBA;			// Trap Base Address
reg [8:0] dextype,d1extype,xextype,m1extype,m2extype,wextype,textype;
reg [3:0] epat [0:255];
reg [7:0] eptr;
reg [3:0] dAXC,d1AXC,xAXC,m1AXC,m2AXC,wAXC;	// context active per pipeline stage
wire [3:0] AXC = (eptr==8'h00) ? 4'h0 : epat[eptr];
reg dtinit;			// 1=data cache tags are being intialized
reg dcache_on;		// 1= data cache is enabled
wire [63:0] cdat;	// data cache output
reg [63:32] nonICacheSeg;
reg [1:0] FPC_rm;	// fp: rounding mode
reg FPC_SL;			// result is negative (and non-zero)
reg FPC_SE;			// result is zero
reg FPC_SG;			// result is positive (and non-zero)
reg FPC_SI;			// result is infinite or NaN
reg FPC_overx;
reg fp_iop;
reg fp_ovr;
reg fp_uf;
wire [31:0] FPC = {FPC_rm,1'b0,
			9'd0,
			FPC_SL,
			FPC_SG,
			FPC_SE,
			FPC_SI,
			16'd0
			};
reg [63:0] wr_addr;
reg [31:0] insn;
reg clk_en;
reg cpu_clk_en;
reg StatusERL;		// 1= in error processing
//reg StatusEXL;		// 1= in exception processing
reg StatusHWI;		// 1= in interrupt processing
reg StatusUM;		// 1= user mode
reg [7:0] ASID;		// address space identifier (process ID)
integer n;
reg [63:13] BadVAddr;
reg [63:13] PageTableAddr;
reg [63:0] errorAddress;
 
wire [6:0] iOpcode = insn[31:25];
wire [6:0] iFunc = insn[6:0];
wire [5:0] iFunc6 = insn[5:0];
wire [6:0] dOpcode = dIR[31:25];
wire [6:0] dFunc = dIR[6:0];
wire [5:0] dFunc6 = dIR[5:0];
wire [6:0] xOpcode = xIR[31:25];
wire [6:0] xFunc = xIR[6:0];
wire [5:0] xFunc6 = xIR[5:0];
wire [4:0] xFunc5 = xIR[4:0];
wire [6:0] m1Opcode,m2Opcode,wOpcode;
assign m1Opcode = m1IR[31:25];
assign m2Opcode = m2IR[31:25];
assign wOpcode = wIR[31:25];
wire [6:0] m1Func,m2Func,wFunc;
assign m1Func = m1IR[6:0];
assign m2Func = m2IR[6:0];
assign wFunc = wIR[6:0];
wire [5:0] m1Func6 = m1Func[5:0];
wire [5:0] m2Func6 = m2Func[5:0];
wire [5:0] wFunc6 = wIR[5:0];
reg [63:0] m1Data,m2Data,wData,tData;
reg [63:0] m2Addr;
reg [63:0] tick;
reg [63:0] a,b,c,imm,m1b;
wire [1:0] scale = xIR[9:8];
wire [1:0] offset2 = xIR[7:6];
reg rsf;					// reserrved address flag
reg [63:5] resv_address;	// reserved address
reg dirqf,rirqf,m1irqf,m2irqf,wirqf,tirqf;
reg xirqf;
wire advanceX_edge;
wire takb;
wire advanceI,advanceR,advanceR1,advanceX,advanceM1,advanceW,advanceT;	// Pipeline advance signals
reg m1clkoff,m2clkoff,m3clkoff,m4clkoff,wclkoff;
reg dFip,d1Fip,xFip,m1Fip,m2Fip,m3Fip,m4Fip,wFip;
reg cyc1;
reg LoadNOPs;
reg m1IsLoad,m1IsStore;
reg m2IsLoad,m2IsStore;
reg wIsStore;
reg m1IsOut,m1IsIn;
 
function [63:0] fnIncPC;
input [63:0] fpc;
begin
fnIncPC = fpc + 64'd4;
end
endfunction
 
function [7:0] fnSelect;
input [6:0] opcode;
input [2:0] addr;
case(opcode)
`LBU,`LB,`SB,`INB,`INBU,`OUTB:
	case(addr)
	3'b000:	fnSelect = 8'b00000001;
	3'b001:	fnSelect = 8'b00000010;
	3'b010:	fnSelect = 8'b00000100;
	3'b011:	fnSelect = 8'b00001000;
	3'b100:	fnSelect = 8'b00010000;
	3'b101:	fnSelect = 8'b00100000;
	3'b110:	fnSelect = 8'b01000000;
	3'b111:	fnSelect = 8'b10000000;
	endcase
`LC,`LCU,`SC,`INCH,`INCU,`OUTC:
	case(addr[2:1])
	2'b00:	fnSelect = 8'b00000011;
	2'b01:	fnSelect = 8'b00001100;
	2'b10:	fnSelect = 8'b00110000;
	2'b11:	fnSelect = 8'b11000000;
	endcase
`LHU,`LH,`SH,`LSH,`LF,`LFP,`SF,`SFP,`SSH,`INH,`INHU,`OUTH:
	case(addr[2])
	1'b0:	fnSelect = 8'b00001111;
	1'b1:	fnSelect = 8'b11110000;
	endcase
`LW,`LWR,`LM,`LFD,`LSW,`LP,`LFDP,
`SW,`SM,`SFD,`SSW,`SWC,`SP,`SFDP,`INW,`OUTW:
	fnSelect = 8'b11111111;
endcase
endfunction
 
reg [7:0] data8;
reg [15:0] data16;
reg [31:0] data32;
reg [63:0] data64;
 
always @(sel_o or dat_i)
	case(sel_o)
	8'b00000001:	data8 <= #1 dat_i[ 7: 0];
	8'b00000010:	data8 <= #1 dat_i[15: 8];
	8'b00000100:	data8 <= #1 dat_i[23:16];
	8'b00001000:	data8 <= #1 dat_i[31:24];
	8'b00010000:	data8 <= #1 dat_i[39:32];
	8'b00100000:	data8 <= #1 dat_i[47:40];
	8'b01000000:	data8 <= #1 dat_i[55:48];
	8'b10000000:	data8 <= #1 dat_i[63:56];
	default:	data8 <= 8'h00;
	endcase
 
always @(sel_o or dat_i)
	case(sel_o)
	8'b00000011:	data16 <= #1 dat_i[15: 0];
	8'b00001100:	data16 <= #1 dat_i[31:16];
	8'b00110000:	data16 <= #1 dat_i[47:32];
	8'b11000000:	data16 <= #1 dat_i[63:48];
	default:	data16 <= #1 16'hDEAD;			
	endcase
 
always @(sel_o or dat_i)
	case(sel_o)
	8'b00001111:	data32 <= #1 dat_i[31: 0];
	8'b11110000:	data32 <= #1 dat_i[63:32];
	default:	data32 <= #1 32'hDEADDEAD;			
	endcase
 
always @(sel_o or dat_i)
	data64 <= #1 dat_i;
 
assign KernelMode = StatusEXL[xAXC]|StatusHWI;
 
//wire iIsLSPair = iOpcode==`SP || iOpcode==`LP || iOpcode==`SFP || iOpcode==`LFP || iOpcode==`SFDP || iOpcode==`LFDP || 
//				(iOpcode==`MEMNDX && (iFunc6==`SPX || iFunc6==`LPX || iFunc6==`SFPX || iFunc6==`LFPX || iFunc6==`SFDPX || iFunc6==`LFDPX));
//wire dIsLSPair = dOpcode==`SP || dOpcode==`LP || dOpcode==`SFP || dOpcode==`LFP || dOpcode==`SFDP || dOpcode==`LFDP ||
//				(dOpcode==`MEMNDX && (dFunc6==`SPX || dFunc6==`LPX || dFunc6==`SFPX || dFunc6==`LFPX || dFunc6==`SFDPX || dFunc6==`LFDPX));
//wire xIsLSPair = xOpcode==`SP || xOpcode==`LP || xOpcode==`SFP || xOpcode==`LFP || xOpcode==`SFDP || xOpcode==`LFDP ||
//				 (xOpcode==`MEMNDX && (xFunc6==`SPX || xFunc6==`LPX || xFunc6==`SFPX || xFunc6==`LFPX || xFunc6==`SFDPX || xFunc6==`LFDPX));
 
 
//-----------------------------------------------------------------------------
// Segmentation
//
// If the upper nybble of the address is 'F' then segmentation is not applied.
// This allows for bootstrapping and operating system use. Also when in kernel
// mode the lowest 64k of memory is unsegmented to allow easier access to 
// operating system variables.
//
// Otherwise: the CS register is always in use for code addresses.
// Which segment is used for data addresses depends on the upper nybble of
// the address.
//-----------------------------------------------------------------------------
`ifdef SEGMENTATION
wire [63:0] spc;		// segmented PC
reg [63:0] sea;			// segmented effective address
assign spc = pc[63:60]==4'hF ? pc : {CS[AXC][63:16] + pc[59:16],pc[15:0]};
always @(ea or KernelMode)
if (KernelMode && ea[63:16]==48'h0)
	sea <= ea;
else
	case(ea[63:60])
	4'hF:	sea <= ea;
	4'hE:	sea <= {SS[xAXC][63:16] + ea[59:16],ea[15:0]};
	4'hD:	sea <= {ES[xAXC][63:16] + ea[59:16],ea[15:0]};
	default:
			sea <= {DS[xAXC][63:16] + ea[59:16],ea[15:0]};
	endcase
`else
wire [63:0] spc = pc;
wire [63:0] sea = ea;
`endif
 
//-----------------------------------------------------------------------------
// TLB
// The TLB contains 64 entries, that are 8 way set associative.
// The TLB is dual ported and shared between the instruction and data streams.
//-----------------------------------------------------------------------------
wire [63:0] ppc;
wire [63:0] pea;
wire [63:0] tlbo;
`ifdef TLB
wire [63:0] TLBVirtPage;
wire wTlbp = advanceW && wOpcode==`MISC && wFunc==`TLBP;
wire wTlbrd = advanceW && wOpcode==`MISC && wFunc==`TLBR;
wire wTlbwr = advanceW && wOpcode==`MISC && wFunc==`TLBWR;
wire wTlbwi = advanceW && wOpcode==`MISC && wFunc==`TLBWI;
wire wMtspr = advanceW && wOpcode==`R && wFunc==`MTSPR;
wire xTlbrd = advanceX && xOpcode==`MISC && xFunc==`TLBR;
wire xTlbwr = advanceX && xOpcode==`MISC && xFunc==`TLBWR;
wire xTlbwi = advanceX && xOpcode==`MISC && xFunc==`TLBWI;
wire ITLBMiss,DTLBMiss;
 
Raptor64_TLB u26
(
	.rst(rst_i),
	.clk(clk),
	.pc(spc),
	.ea(sea),
	.ppc(ppc),
	.pea(pea),
	.m1IsStore(advanceM1 && m1IsStore),
	.ASID(ASID),
	.wTlbp(wTlbp),
	.wTlbrd(wTlbrd),
	.wTlbwr(wTlbwr),
	.wTlbwi(wTlbwi),
	.xTlbrd(xTlbrd),
	.xTlbwr(xTlbwr),
	.xTlbwi(xTlbwi),
	.wr(wMtspr),
	.wregno(wIR[11:6]),
	.dati(wData),
	.xregno(xIR[11:6]),
	.dato(tlbo),
	.ITLBMiss(ITLBMiss),
	.DTLBMiss(DTLBMiss),
	.HTLBVirtPage(TLBVirtPage)
);
 
`else
assign ppc = spc;
assign pea = sea;
`endif
 
//-----------------------------------------------------------------------------
// Clock control
// - reset or NMI reenables the clock
// - this circuit must be under the clk_i domain
//-----------------------------------------------------------------------------
//
BUFGCE u20 (.CE(cpu_clk_en), .I(clk_i), .O(clk) );
 
always @(posedge clk_i)
if (rst_i) begin
	cpu_clk_en <= 1'b1;
end
else begin
	if (nmi_i)
		cpu_clk_en <= 1'b1;
	else
		cpu_clk_en <= clk_en;
end
//assign clk = clk_i;
 
//-----------------------------------------------------------------------------
// Random number register:
//
// Uses George Marsaglia's multiply method.
//-----------------------------------------------------------------------------
reg [63:0] m_z;
reg [63:0] m_w;
reg [63:0] next_m_z;
reg [63:0] next_m_w;
 
always @(m_z or m_w)
begin
	next_m_z <= (36'd3696936969 * m_z[31:0]) + m_z[63:32];
	next_m_w <= (36'd1800018000 * m_w[31:0]) + m_w[63:32];
end
 
wire [63:0] rand = {m_z[31:0],32'd0} + m_w;
 
wire [10:0] bias = 11'h3FF;				// bias amount (eg 127)
wire [10:0] xl = rand[62:53];
wire sgn = 1'b0;								// floating point: always generate a positive number
wire [10:0] exp = xl > bias-1 ? bias-1 : xl;	// 2^-1 otherwise number could be over 1
wire [52:0] man = rand[52:0];					// a leading '1' will be assumed
wire [63:0] randfd = {sgn,exp,man};
reg [63:0] rando;
 
//-----------------------------------------------------------------------------
// Instruction Cache / Instruction buffer
// 
// On a bus error, the instruction cache / buffer is loaded with a SYSCALL 509
// instruction, which is a call to the bus error handler.
// Line size is 16 half-words (64 bytes). Total cache size is 16kB.
// 
//-----------------------------------------------------------------------------
//reg lfdir;
reg icaccess;
reg ICacheOn;
wire ibufrdy;
wire [31:0] insnbundle;
reg [31:0] insnbuf;
reg [63:0] ibufadr;
wire isICached = ppc[63:32]!=nonICacheSeg;
//wire isEncrypted = ppc[63:32]==encryptedArea;
wire ICacheAct = ICacheOn & isICached;
reg [31:0] insn1;
reg [31:0] insnkey;
reg [63:0] icadr;
 
// SYSCALL 509
wire syscall509 = 32'b0000000_00000_0000_11111110_10010111;
wire [63:0] bevect = {syscall509,syscall509};
 
Raptor64_icache_ram u1
(
	.wclk(clk),
	.we(icaccess & (ack_i|err_i)),
	.adr(icadr[13:0]),
	.d(err_i ? bevect : dat_i),
	.rclk(~clk),
	.pc(pc[13:0]),
	.insn(insnbundle)
);
 
always @(insnbundle or ICacheAct or insnbuf)
begin
	case(ICacheAct)
	1'b0:	insn1 <= insnbuf;
	1'b1:	insn1 <= insnbundle;
	endcase
end
 
// Decrypt the instruction set.
always @(insn1,insnkey)
	insn <= insn1 ^ insnkey;
 
reg [63:14] tmem [255:0];
reg [255:0] tvalid;
 
initial begin
	for (n=0; n < 256; n = n + 1)
		tmem[n] = 0;
	for (n=0; n < 256; n = n + 1)
		tvalid[n] = 0;
end
 
wire [64:14] tgout;
assign tgout = {tvalid[pc[13:6]],tmem[pc[13:6]]};
assign ihit = (tgout=={1'b1,ppc[63:14]});
assign ibufrdy = ibufadr[63:2]==ppc[63:2];
 
//-----------------------------------------------------------------------------
// Data Cache
// No-allocate on write
// Line size is 8 words (64 bytes). Total cache size is 32kB
//-----------------------------------------------------------------------------
reg dcaccess;
wire dhit;
wire [64:15] dtgout;
reg wrhit;
reg wr_dcache;
reg [14:0] dcadr;
 
// cache RAM 32Kb
Raptor64_dcache_ram u10
(
	.wclk(clk),
	.wr(1'b1),
	.sel(dcaccess ? {8{ack_i}} : wrhit ? sel_o : 8'h00),
	.wadr(dcaccess ? dcadr[14:3] : adr_o[14:3]),
	.i(dcaccess ? dat_i : dat_o),
	.rclk(~clk),
	.radr(pea[14:3]),
	.o(cdat)
);
 
// tag RAM 512 b
Raptor64_dcache_tagram u11
(
	.wclk(clk),
	.we(dtinit | (dcaccess && ack_i && dcadr[5:3]==3'b111)),
	.adr(dcadr[14:6]),
	.d({~dtinit,adr_o[63:15]}),
 
	.rclk(~clk),
	.ea(pea[14:6]),
	.tago(dtgout)
);
 
assign dhit = (dtgout=={1'b1,pea[63:15]});
 
reg [ 7:0] cdata8;
reg [15:0] cdata16;
reg [31:0] cdata32;
reg [63:0] cdata64;
 
always @(pea or cdat)
	case(pea[2:0])
	3'b000:	cdata8 <= cdat[ 7: 0];
	3'b001:	cdata8 <= cdat[15: 8];
	3'b010:	cdata8 <= cdat[23:16];
	3'b011:	cdata8 <= cdat[31:24];
	3'b100:	cdata8 <= cdat[39:32];
	3'b101:	cdata8 <= cdat[47:40];
	3'b110:	cdata8 <= cdat[55:48];
	3'b111:	cdata8 <= cdat[63:56];
	endcase
 
always @(pea or cdat)
	case(pea[2:1])
	2'b00:	cdata16 <= cdat[15: 0];
	2'b01:	cdata16 <= cdat[31:16];
	2'b10:	cdata16 <= cdat[47:32];
	2'b11:	cdata16 <= cdat[63:48];
	endcase
 
always @(pea or cdat)
	case(pea[2])
	1'b0:	cdata32 <= cdat[31: 0];
	1'b1:	cdata32 <= cdat[63:32];
	endcase
 
always @(pea or cdat)
	cdata64 <= cdat;
 
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
 
reg [64:0] xData;
// Load word and reserve is never cached.
wire xisCacheElement = (xData[63:52] != 12'hFFD && xData[63:52]!=12'hFFF &&
						xOpcode!=`LWR && !(xOpcode==`MEMNDX && xFunc6==`LWRX)) && dcache_on;
reg m1IsCacheElement;
 
 
wire [127:0] mult_out;
wire [63:0] sqrt_out;
wire [63:0] div_q;
wire [63:0] div_r;
wire sqrt_done,mult_done,div_done;
wire isSqrt =xOpcode==`R && xFunc==`SQRT;
 
isqrt #(64) u14
(
	.rst(rst_i),
	.clk(clk),
	.ce(1'b1),
	.ld(isSqrt),
	.a(a),
	.o(sqrt_out),
	.done(sqrt_done)
);
 
wire isMulu = xOpcode==`RR && xFunc==`MULU;
wire isMuls = ((xOpcode==`RR && xFunc==`MULS) || xOpcode==`MULSI);
wire isMuli = (xOpcode==`MULSI || xOpcode==`MULUI);
wire isMult = (xOpcode==`MULSI || xOpcode==`MULUI || (xOpcode==`RR && (xFunc==`MULS || xFunc==`MULU)));
wire isDivu = (xOpcode==`RR && xFunc==`DIVU);
wire isDivs = ((xOpcode==`RR && xFunc==`DIVS) || xOpcode==`DIVSI);
wire isDivi = (xOpcode==`DIVSI || xOpcode==`DIVUI);
wire isDiv = (xOpcode==`DIVSI || xOpcode==`DIVUI || (xOpcode==`RR && (xFunc==`DIVS || xFunc==`DIVU)));
wire isModu = (xOpcode==`RR && xFunc==`MODU);
wire isMods = (xOpcode==`RR && xFunc==`MODS);
wire isMod = isModu|isMods;
 
Raptor64Mult u18
(
	.rst(rst_i),
	.clk(clk),
	.ld(isMult),
	.sgn(isMuls),
	.isMuli(isMuli),
	.a(a),
	.b(b),
	.imm(imm),
	.o(mult_out),
	.done(mult_done)
);
 
Raptor64Div u19
(
	.rst(rst_i),
	.clk(clk),
	.ld(isDiv|isMod),
	.sgn(isDivs|isMods),
	.isDivi(isDivi),
	.a(a),
	.b(b),
	.imm(imm),
	.qo(div_q),
	.ro(div_r),
	.dvByZr(),
	.done(div_done)
);
 
//-----------------------------------------------------------------------------
// Floating point
//-----------------------------------------------------------------------------
 
wire [63:0] fpZLOut;
wire [63:0] fpLooOut;
wire fpLooDone;
 
 
/*
fpZLUnit #(64) u30 
(
	.op(xFunc[5:0]),
	.a(a),
	.b(b),	// for fcmp
	.o(fpZLOut),
	.nanx()
);
 
fpLOOUnit #(64) u31
(
	.clk(clk),
	.ce(1'b1),
	.rm(rm),
	.op(xFunc[5:0]),
	.a(a),
	.o(fpLooOut),
	.done(fpLooDone)
);
 
*/
wire dcmp_result;
wire [63:0] daddsub_result;
wire [63:0] ddiv_result;
wire [63:0] dmul_result;
wire [63:0] i2f_result;
wire [63:0] f2i_result;
wire [63:0] f2d_result;
wire [63:0] d2f_result;
 
wire f2i_iop,fpmul_iop,fpdiv_iop,fpaddsub_iop,fpcmp_iop;
wire f2i_ovr,fpmul_ovr,fpdiv_ovr,fpaddsub_ovr;
wire fpmul_uf,fpaddsub_uf,fpdiv_uf;
wire [11:0] fcmp_result;
 
`ifdef SIMD
 
Raptor64_fpAdd21 u61
(
	.a(a[20:0]), 
	.b(b[20:0]),
	.operation(xFunc6),
	.clk(clk),
	.result(daddsub_result[20:0]) 
);
 
Raptor64_fpAdd21 u62
(
	.a(a[41:21]),
	.b(b[41:21]),
	.operation(xFunc6), 
	.clk(clk),
	.result(daddsub_result[41:21])
);
 
Raptor64_fpAdd21 u63
(
	.a(a[62:42]),
	.b(b[62:42]),
	.operation(xFunc6),
	.clk(clk),
	.result(daddsub_result[62:42])
);
 
Raptor64_fpMul21 u64
(
	.a(a[20:0]),
	.b(b[20:0]),
	.clk(clk),
	.result(dmul_result[20:0])
);
 
Raptor64_fpMul21 u65
(
	.a(a[41:21]),
	.b(b[41:21]),
	.clk(clk),
	.result(dmul_result[41:21])
);
 
Raptor64_fpMul21 u66
(
	.a(a[62:42]),
	.b(b[62:42]),
	.clk(clk),
	.result(dmul_result[62:42])
);
 
Raptor64_fpDiv21 u67
(
	.a(a[20:0]),
	.b(b[20:0]),
	.clk(clk),
	.result(ddiv_result[20:0])
);
 
Raptor64_fpDiv21 u68
(
	.a(a[41:21]),
	.b(b[41:21]),
	.clk(clk),
	.result(ddiv_result[41:21])
);
 
Raptor64_fpDiv21 u69
(
	.a(a[62:42]),
	.b(b[62:42]),
	.clk(clk),
	.result(ddiv_result[62:42])
);
 
Raptor64_fCmp21 u70
(
	.a(a[20:0]),
	.b(b[20:0]),
	.clk(clk),
	.result(fcmp_result[3:0])
);
 
Raptor64_fCmp21 u71
(
	.a(a[41:21]),
	.b(b[41:21]),
	.clk(clk),
	.result(fcmp_result[7:4])
);
 
Raptor64_fCmp21 u72
(
	.a(a[62:42]),
	.b(b[62:42]),
	.clk(clk),
	.result(fcmp_result[11:8])
);
`endif
 
`ifdef FLOATING_POINT
// Xilinx Core Generator Components
 
Raptor64_fpCmp u60
(
	.a(a), // input [63 : 0] a
	.b(b), // input [63 : 0] b
	.operation(xFunc6), // input [5 : 0] operation
	.clk(clk), // input clk
	.result(dcmp_result), // ouput [0 : 0] result
	.invalid_op(fpcmp_iop)
); // ouput invalid_op
 
Raptor64_fpAddsub u61
(
	.a(a), // input [63 : 0] a
	.b(b), // input [63 : 0] b
	.operation(xFunc6), // input [5 : 0] operation
	.clk(clk), // input clk
	.result(daddsub_result), // ouput [63 : 0] result
	.underflow(fpaddsub_uf), // ouput underflow
	.overflow(fpaddsub_ovr), // ouput overflow
	.invalid_op(fpaddsub_iop)
); // ouput invalid_op
 
Raptor64_fpDiv u62
(
	.a(a), // input [63 : 0] a
	.b(b), // input [63 : 0] b
	.clk(clk), // input clk
	.result(ddiv_result), // ouput [63 : 0] result
	.underflow(fpdiv_uf), // ouput underflow
	.overflow(fpdiv_ovr), // ouput overflow
	.invalid_op(fpdiv_iop), // ouput invalid_op
	.divide_by_zero()
); // ouput divide_by_zero
 
Raptor64_fpMul u63
(
	.a(a), // input [63 : 0] a
	.b(b), // input [63 : 0] b
	.clk(clk), // input clk
	.result(dmul_result), // ouput [63 : 0] result
	.underflow(fpmul_uf), // ouput underflow
	.overflow(fpmul_ovr), // ouput overflow
	.invalid_op(fpmul_iop)
); // ouput invalid_op
 
Raptor64_fpItoF u64
(
	.a(a), // input [63 : 0] a
	.clk(clk), // input clk
	.result(i2f_result)
); // ouput [63 : 0] result
 
Raptor64_fpFtoI u65
(
	.a(a), // input [63 : 0] a
	.clk(clk), // input clk
	.result(f2i_result), // ouput [63 : 0] result
	.overflow(f2i_ovr), // ouput overflow
	.invalid_op(f2i_iop)
); // ouput invalid_op
 
`endif
 
always @(posedge clk)
if (rst_i) begin
	fltctr <= 6'd0;
end
else begin
	if (fltdone) begin
		FPC_overx <= fp_ovr;
	end
	if (advanceX) begin
`ifdef SIMD
		if (xOpcode==`SIMD) begin
			case(xFunc6)
			`SIMD_ADD:	fltctr <= 6'd10;
			`SIMD_SUB:	fltctr <= 6'd10;
			`SIMD_MUL:	fltctr <= 6'd7;
			`SIMD_DIV:	fltctr <= 6'd19;
			`SIMD_CMP:	fltctr <= 6'd2;
			default:	fltctr <= 6'd1;
			endcase
		end
		else
`endif
		if (xOpcode==`FP) begin
			if (xFunc6==`FDADD)	// FDADD
				fltctr <= 6'd12;
			else if (xFunc6==`FDSUB)	// FDSUB
				fltctr <= 6'd12;
			else if (xFunc6==`FDMUL)	// FDMUL
				fltctr <= 6'd12;
			else if (xFunc6==`FDDIV)	// FDDIV
				fltctr <= 6'd12;
			else if (xFunc6==6'b000100)	// unordered
				fltctr <= 6'd2;
			else if (xFunc6==6'b001100)	// less than
				fltctr <= 6'd2;
			else if (xFunc6==6'b010100)	// equal
				fltctr <= 6'd2;
			else if (xFunc6==6'b011100)	// less than or equal
				fltctr <= 6'd2;
			else if (xFunc6==6'b100100)	// greater than
				fltctr <= 6'd2;
			else if (xFunc6==6'b101100)	// not equal
				fltctr <= 6'd2;
			else if (xFunc6==6'b110100)	// greater than or equal
				fltctr <= 6'd2;
			else if (xFunc6==`FDI2F)	// ItoFD
				fltctr <= 6'd7;
			else if (xFunc6==6'b000110)	// FFtoI
				fltctr <= 6'd6;
			else if (xFunc6==6'b000111)	// FtoD
				fltctr <= 6'd2;
			else if (xFunc6==6'b001000) // DtoF
				fltctr <= 6'd2;
			else
				fltctr <= 6'd0;
		end
	end
	else begin
		if (fltctr > 6'd0)
			fltctr <= fltctr - 6'd1;
	end
end
 
function [2:0] popcnt6;
input [5:0] a;
begin
case(a)
6'b000000:	popcnt6 = 3'd0;
6'b000001:	popcnt6 = 3'd1;
6'b000010:	popcnt6 = 3'd1;
6'b000011:	popcnt6 = 3'd2;
6'b000100:	popcnt6 = 3'd1;
6'b000101:	popcnt6 = 3'd2;
6'b000110:	popcnt6 = 3'd2;
6'b000111:	popcnt6 = 3'd3;
6'b001000:	popcnt6 = 3'd1;
6'b001001:	popcnt6 = 3'd2;
6'b001010:	popcnt6 = 3'd2;
6'b001011:	popcnt6 = 3'd3;
6'b001100:	popcnt6 = 3'd2;
6'b001101:	popcnt6 = 3'd3;
6'b001110:	popcnt6 = 3'd3;
6'b001111:  popcnt6 = 3'd4;
6'b010000:	popcnt6 = 3'd1;
6'b010001:	popcnt6 = 3'd2;
6'b010010:  popcnt6 = 3'd2;
6'b010011:	popcnt6 = 3'd3;
6'b010100:  popcnt6 = 3'd2;
6'b010101:  popcnt6 = 3'd3;
6'b010110:  popcnt6 = 3'd3;
6'b010111:	popcnt6 = 3'd4;
6'b011000:	popcnt6 = 3'd2;
6'b011001:	popcnt6 = 3'd3;
6'b011010:	popcnt6 = 3'd3;
6'b011011:	popcnt6 = 3'd4;
6'b011100:	popcnt6 = 3'd3;
6'b011101:	popcnt6 = 3'd4;
6'b011110:	popcnt6 = 3'd4;
6'b011111:	popcnt6 = 3'd5;
6'b100000:	popcnt6 = 3'd1;
6'b100001:	popcnt6 = 3'd2;
6'b100010:	popcnt6 = 3'd2;
6'b100011:	popcnt6 = 3'd3;
6'b100100:	popcnt6 = 3'd2;
6'b100101:	popcnt6 = 3'd3;
6'b100110:	popcnt6 = 3'd3;
6'b100111:	popcnt6 = 3'd4;
6'b101000:	popcnt6 = 3'd2;
6'b101001:	popcnt6 = 3'd3;
6'b101010:	popcnt6 = 3'd3;
6'b101011:	popcnt6 = 3'd4;
6'b101100:	popcnt6 = 3'd3;
6'b101101:	popcnt6 = 3'd4;
6'b101110:	popcnt6 = 3'd4;
6'b101111:	popcnt6 = 3'd5;
6'b110000:	popcnt6 = 3'd2;
6'b110001:	popcnt6 = 3'd3;
6'b110010:	popcnt6 = 3'd3;
6'b110011: 	popcnt6 = 3'd4;
6'b110100:	popcnt6 = 3'd3;
6'b110101:	popcnt6 = 3'd4;
6'b110110:	popcnt6 = 3'd4;
6'b110111:	popcnt6 = 3'd5;
6'b111000:	popcnt6 = 3'd3;
6'b111001:	popcnt6 = 3'd4;
6'b111010: 	popcnt6 = 3'd4;
6'b111011:	popcnt6 = 3'd5;
6'b111100:	popcnt6 = 3'd4;
6'b111101:	popcnt6 = 3'd5;
6'b111110:	popcnt6 = 3'd5;
6'b111111:	popcnt6 = 3'd6;
endcase
end
endfunction
 
function [5:0] popcnt36;
input [35:0] a;
begin
popcnt36 = popcnt6(a[5:0]) + 
			popcnt6(a[11:6]) +
			popcnt6(a[17:12]) +
			popcnt6(a[23:18]) +
			popcnt6(a[29:24]) +
			popcnt6(a[35:30]);
end
endfunction
 
wire [63:0] jmp_tgt = {pc[63:27],insn[24:0],2'b00};
 
//-----------------------------------------------------------------------------
// Stack for return address predictor
//-----------------------------------------------------------------------------
reg [63:0] ras [63:0];	// return address stack, return predictions
reg [5:0] ras_sp;		// stack pointer
initial begin
	for (n = 0; n < 64; n = n + 1)
		ras[n] = 0;
end
`ifdef BTB
reg [63:0] btb [63:0];	// branch target buffer
`endif
 
//-----------------------------------------------------------------------------
// Branch history table.
// The history table is updated by the EX stage and read in
// both the EX and IF stages.
//-----------------------------------------------------------------------------
wire predict_taken;
 
Raptor64_BranchHistory u6
(
	.rst(rst_i),
	.clk(clk),
	.advanceX(advanceX),
	.xIR(xIR),
	.pc(pc),
	.xpc(xpc),
	.takb(takb),
	.predict_taken(predict_taken)
);
 
//-----------------------------------------------------------------------------
// Evaluate branch conditions.
//-----------------------------------------------------------------------------
 
Raptor64_EvaluateBranch u4
(
	.ir(xIR),
	.a(a),
	.b(b),
	.imm(imm),
	.rsf(rsf),
	.takb(takb)
);
 
//-----------------------------------------------------------------------------
// Datapath (ALU) operations.
//-----------------------------------------------------------------------------
reg [63:0] xData1;
wire [63:0] xBitfieldo,xSeto,xLogico,xShifto,xAddsubo;
 
wire [6:0] cntlzo,cntloo;
cntlz64 u12 (.clk(clk), .i(a),  .o(cntlzo) );
cntlo64 u13 (.clk(clk), .i(a),  .o(cntloo) );
//cntlz64 u12 (.i(a),  .o(cntlzo) );
//cntlo64 u13 (.i(a),  .o(cntloo) );
 
reg [1:0] shftop;
wire [63:0] shfto;
wire [63:0] masko;
reg [63:0] bfextd;
wire [63:0] rolo;
wire [15:0] bcdmulo;
 
Raptor64_addsub u21 (xIR,a,b,imm,xAddsubo);
Raptor64_logic   u9 (xIR,a,b,imm,xLogico);
Raptor64_set    u15 (xIR,a,b,imm,xSeto);
Raptor64_bitfield u16(xIR, a, b, xBitfieldo, masko);
Raptor64_shift  u17 (xIR, a, b, masko, xShifto, rolo);
BCDMul2 u22 (a[7:0],b[7:0],bcdmulo);
 
wire aeqz = a==64'd0;
wire eq = a==b;
wire eqi = a==imm;
wire lt = $signed(a) < $signed(b);
wire lti = $signed(a) < $signed(imm);
wire ltu = a < b;
wire ltui = a < imm;
 
always @(xOpcode or xFunc or xFunc5 or a or b or c or imm or xpc or aeqz or xFunc6 or
	sqrt_out or cntlzo or cntloo or tick or AXC or scale or
	lt or eq or ltu or mult_out or lti or eqi or ltui or xIR or div_q or div_r or
	shfto or masko or bcdmulo or fpLooOut or fpZLOut or m_z or m_w or
`ifdef TLB
	PageTableAddr or BadVAddr or ASID or tlbo or
`endif
	ASID or TBA or xAXC or nonICacheSeg or rm or
	rando or errorAddress or insnkey or pchistoric
)
casex(xOpcode)
`MISC:
	case(xFunc)
	`SYSCALL:
		if (xIR[16])
			xData1 = fnIncPC(xpc);
		else
			xData1 = xpc;
	default:	xData1 = 64'd0;
	endcase
`R:
	casex(xFunc6)
	`COM:	xData1 = ~a;
	`NOT:	xData1 = ~|a;
	`NEG:	xData1 = -a;
	`ABS:	xData1 = a[63] ? -a : a;
	`SGN:	xData1 = a[63] ? 64'hFFFFFFFF_FFFFFFFF : aeqz ? 64'd0 : 64'd1;
	`MOV:	xData1 = a;
	`SQRT:	xData1 = sqrt_out;
	`SWAP:	xData1 = {a[31:0],a[63:32]};
	`RBO:	xData1 = {a[7:0],a[15:8],a[23:16],a[31:24],a[39:32],a[47:40],a[55:48],a[63:56]};
 
	`REDOR:		xData1 = |a;
	`REDAND:	xData1 = &a;
 
	`CTLZ:	xData1 = cntlzo;
	`CTLO:	xData1 = cntloo;
	`CTPOP:	xData1 = {4'd0,popcnt6(a[5:0])} +
					{4'd0,popcnt6(a[11:6])} +
					{4'd0,popcnt6(a[17:12])} +
					{4'd0,popcnt6(a[23:18])} +
					{4'd0,popcnt6(a[29:24])} +
					{4'd0,popcnt6(a[35:30])} +
					{4'd0,popcnt6(a[41:36])} +
					{4'd0,popcnt6(a[47:42])} +
					{4'd0,popcnt6(a[53:48])} +
					{4'd0,popcnt6(a[59:54])} +
					{4'd0,popcnt6(a[63:60])}
					;
	`SEXT8:		xData1 = {{56{a[7]}},a[7:0]};	
	`SEXT16:	xData1 = {{48{a[15]}},a[15:0]};
	`SEXT32:	xData1 = {{32{a[31]}},a[31:0]};
 
	`MTSPR:		xData1 = a;
	`MFSPR:
		case(xIR[11:6])
`ifdef TLB
		`TLBWired:		xData1 = tlbo;
		`TLBIndex:		xData1 = tlbo;
		`TLBRandom:		xData1 = tlbo;
		`TLBPhysPage0:	xData1 = tlbo;
		`TLBPhysPage1:	xData1 = tlbo;
		`TLBVirtPage:	xData1 = tlbo;
		`TLBPageMask:	xData1 = tlbo;
		`TLBASID:	begin
					xData1 = 64'd0;
					xData1[0] = tlbo[0];
					xData1[1] = tlbo[1];
					xData1[2] = tlbo[2];
					xData1[15:8] = tlbo[15:8];
					end
		`PageTableAddr:	xData1 = {PageTableAddr,13'd0};
		`BadVAddr:		xData1 = {BadVAddr,13'd0};
`endif
`ifdef SEGMENTATION
		`CS:			xData1 = {CS[xAXC],16'h0};
		`DS:			xData1 = {DS[xAXC],16'h0};
		`ES:			xData1 = {ES[xAXC],16'b0};
		`SS:			xData1 = {SS[xAXC],16'h0};
`endif
		`ASID:			xData1 = ASID;
		`Tick:			xData1 = tick;
		`EPC:			xData1 = EPC[xAXC];
		`IPC:			xData1 = IPC[xAXC];
		`TBA:			xData1 = TBA;
		`ERRADR:		xData1 = errorAddress;
		`AXC:			xData1 = xAXC;
		`NON_ICACHE_SEG:	xData1 = nonICacheSeg;
		`FPCR:			xData1 = FPC;
		`RAND:			xData1 = rando;
		`SRAND1:		xData1 = m_z;
		`SRAND2:		xData1 = m_w;
		`INSNKEY:		xData1 = insnkey;
		`PCHISTORIC:	xData1 = pchistoric;
		default:	xData1 = 64'd0;
		endcase
	`OMG:		xData1 = mutex_gate[a[5:0]];
	`CMG:		xData1 = mutex_gate[a[5:0]];
	`OMGI:		begin
				xData1 = mutex_gate[xIR[11:6]];
				$display("mutex_gate[%d]=%d",xIR[11:6],mutex_gate[xIR[11:6]]);
				end
	`CMGI:		xData1 = mutex_gate[xIR[11:6]];
	default:	xData1 = 64'd0;
	endcase
`RR:
	case(xFunc6)
	`CMP:	xData1 = lt ? 64'hFFFFFFFFFFFFFFFF : eq ? 64'd0 : 64'd1;
	`CMPU:	xData1 = ltu ? 64'hFFFFFFFFFFFFFFFF : eq ? 64'd0 : 64'd1;
	`MIN:	xData1 = lt ? a : b;
	`MAX:	xData1 = lt ? b : a;
	`MOVZ:	xData1 = b;
	`MOVNZ:	xData1 = b;
	`MOVPL:	xData1 = b;
	`MOVMI:	xData1 = b;
	`MULS:	xData1 = mult_out[63:0];
	`MULU:	xData1 = mult_out[63:0];
	`DIVS:	xData1 = div_q;
	`DIVU:	xData1 = div_q;
	`MODU:	xData1 = div_r;
	`MODS:	xData1 = div_r;
	`BCD_MUL:	xData1 = bcdmulo;
	`MFEP:	xData1 = epat[a[7:0]];
 	default:	xData1 = 64'd0;
	endcase
`ifdef SIMD
`SIMD:
	case(xFunc6)
	`SIMD_ADD:	xData1 = daddsub_result;
	`SIMD_SUB:	xData1 = daddsub_result;
	`SIMD_MUL:	xData1 = dmul_result;
	`SIMD_DIV:	xData1 = ddiv_result;
	`SIMD_CMP:	xData1 = {fcmp_result[11:8],17'd0,fcmp_result[7:4],17'd0,fcmp_result[3:0]};
	default:	xData1 = 64'd0;
	endcase
`endif
`ifdef ISIMD
`SIMD:
	case(xFunc6)
	`SIMD_ADD:
		begin
			xData1[15: 0] <= a[15: 0] + b[15: 0];
			xData1[31:16] <= a[31:16] + b[31:16];
			xData1[47:32] <= a[47:32] + b[47:32];
			xData1[63:48] <= a[63:48] + b[63:48];
		end
	`SIMD_SUB:
		begin
			xData1[15: 0] <= a[15: 0] - b[15: 0];
			xData1[31:16] <= a[31:16] - b[31:16];
			xData1[47:32] <= a[47:32] - b[47:32];
			xData1[63:48] <= a[63:48] - b[63:48];
		end
	`SIMD_MUL:
		begin
			xData1[15: 0] <= a[15: 0] * b[15: 0];
			xData1[31:16] <= a[31:16] * b[31:16];
			xData1[47:32] <= a[47:32] * b[47:32];
			xData1[63:48] <= a[63:48] * b[63:48];
		end
	`SIMD_AND:
		begin
			xData1[15: 0] <= a[15: 0] & b[15: 0];
			xData1[31:16] <= a[31:16] & b[31:16];
			xData1[47:32] <= a[47:32] & b[47:32];
			xData1[63:48] <= a[63:48] & b[63:48];
		end
	`SIMD_OR:
		begin
			xData1[15: 0] <= a[15: 0] | b[15: 0];
			xData1[31:16] <= a[31:16] | b[31:16];
			xData1[47:32] <= a[47:32] | b[47:32];
			xData1[63:48] <= a[63:48] | b[63:48];
		end
	`SIMD_XOR:
		begin
			xData1[15: 0] <= a[15: 0] ^ b[15: 0];
			xData1[31:16] <= a[31:16] ^ b[31:16];
			xData1[47:32] <= a[47:32] ^ b[47:32];
			xData1[63:48] <= a[63:48] ^ b[63:48];
		end
	endcase
`endif
`BTRR:
	case(xFunc5)
	`LOOP:	xData1 = b - 64'd1;
	default:	xData1 = 64'd0;
	endcase
`MUX:
	begin
		for (n = 0; n < 64; n = n + 1)
			xData1[n] = c[n] ? b[n] : a[n];
	end
`SETLO:		xData1 = {{42{xIR[21]}},xIR[21:0]};
`SETMID:	xData1 = {{20{xIR[21]}},xIR[21:0],a[21:0]};
`SETHI:		xData1 = {xIR[19:0],a[43:0]};
`CMPI:	xData1 = lti ? 64'hFFFFFFFFFFFFFFFF : eqi ? 64'd0 : 64'd1;
`CMPUI:	xData1 = ltui ? 64'hFFFFFFFFFFFFFFFF : eqi ? 64'd0 : 64'd1;
`MULSI:	xData1 = mult_out[63:0];
`MULUI:	xData1 = mult_out[63:0];
`DIVSI:	xData1 = div_q;
`DIVUI:	xData1 = div_q;
`ifdef FLOATING_POINT
`LFP,`LFDP:	xData1 = a + imm + xIR[15];
`SFP,`SFDP:	xData1 = a + imm + xIR[15];
`endif
//`LP:	xData1 = a + imm + xIR[15];
//`SP:	xData1 = a + imm + xIR[15];
`MEMNDX:
		case(xFunc6)
//		`LPX,`LFPX,`LFDPX,`SPX,`SFPX,`SFDPX:
//			xData1 = a + (b << scale) + offset2 + xIR[15];
		default:
			xData1 = a + (b << scale) + offset2;
		endcase
`TRAPcc:	xData1 = fnIncPC(xpc);
`TRAPcci:	xData1 = fnIncPC(xpc);
`CALL:		xData1 = fnIncPC(xpc);
`JAL:		xData1 = fnIncPC(xpc);//???xpc + {xIR[19:15],2'b00};
`RET:	xData1 = a + imm;
`FPLOO:	xData1 = fpLooOut;
`FPZL:	xData1 = fpZLOut;
`ifdef FLOATING_POINT
`FP:
	case(xFunc6)
	`FDADD:	xData1 = daddsub_result;
	`FDSUB:	xData1 = daddsub_result;
	`FDMUL:	xData1 = dmul_result;
	`FDDIV:	xData1 = ddiv_result;
	`FDI2F:	xData1 = i2f_result;
	`FDF2I:	xData1 = f2i_result;
	`FDCUN:	xData1 = dcmp_result;
	`FDCEQ:	xData1 = dcmp_result;
	`FDCNE:	xData1 = dcmp_result;
	`FDCLT:	xData1 = dcmp_result;
	`FDCLE:	xData1 = dcmp_result;
	`FDCGT:	xData1 = dcmp_result;
	`FDCGE:	xData1 = dcmp_result;
	default:	xData1 = 64'd0;
	endcase
`endif
default:	xData1 = 64'd0;
endcase
 
always @(xData1,xBitfieldo,xLogico,xShifto,xSeto,xAddsubo)
	xData = xData1|xBitfieldo|xLogico|xShifto|xSeto|xAddsubo;
 
wire v_ri,v_rr;
overflow u2 (.op(xOpcode==`SUBI), .a(a[63]), .b(imm[63]), .s(xAddsubo[63]), .v(v_ri));
overflow u3 (.op(xOpcode==`RR && xFunc==`SUB), .a(a[63]), .b(b[63]), .s(xAddsubo[63]), .v(v_rr));
 
wire dbz_error = (((xOpcode==`DIVSI||xOpcode==`DIVUI) && imm==64'd0) || (xOpcode==`RR && (xFunc6==`DIVS || xFunc6==`DIVU) && b==64'd0));
wire ovr_error = (((xOpcode==`ADDI || xOpcode==`SUBI) && v_ri) || ((xOpcode==`RR && (xFunc6==`SUB || xFunc6==`ADD)) && v_rr));
// ToDo: add more priv violations
wire priv_violation = !KernelMode && (xOpcode==`MISC &&
	(xFunc==`IRET || xFunc==`ERET || xFunc==`CLI || xFunc==`SEI ||
	 xFunc==`TLBP || xFunc==`TLBR || xFunc==`TLBWR || xFunc==`TLBWI || xFunc==`IEPP
	));
// ToDo: detect illegal instructions in the hives (sub-opcodes)
wire illegal_insn = (
		xOpcode==7'd19 ||
`ifndef SIMD
		xOpcode==7'd20 ||
`endif
		xOpcode==7'd28 ||
		xOpcode==7'd29 ||
		xOpcode==7'd30 ||
		xOpcode==7'd31 ||
		xOpcode==7'd47 ||
		xOpcode==7'd55 ||
		xOpcode==7'd63 ||
		xOpcode==7'd90 ||
		xOpcode==7'd91 ||
		xOpcode==7'd92 ||
		xOpcode==7'd93 ||
		xOpcode==7'd106 ||
		xOpcode==7'd107 ||
		xOpcode==7'd124 ||
		xOpcode==7'd125 ||
		xOpcode==7'd126 ||
		xOpcode==7'd127
		)
		;
 
//-----------------------------------------------------------------------------
// For performance and core size reasons, the following should really decode
// the opcodes in the decode stage, then pass the decoding information forward
// using regs. However the core is trickier to get working that way; decoding
// in multiple stages is simpler.
//-----------------------------------------------------------------------------
//wire dIsFlowCtrl =
//	dOpcode==`JAL || dOpcode==`RET ||
//	dOpcode==`BTRI || dOpcode==`BTRR || dOpcode==`TRAPcci || dOpcode==`TRAPcc ||
//	dOpcode==`BEQI || dOpcode==`BNEI ||
//	dOpcode==`BLTI || dOpcode==`BLEI || dOpcode==`BGTI || dOpcode==`BGEI ||
//	dOpcode==`BLTUI || dOpcode==`BLEUI || dOpcode==`BGTUI || dOpcode==`BGEUI ||
//	(dOpcode==`MISC && (dFunc==`SYSCALL || dFunc==`IRET || dFunc==`ERET))
//	;
//wire xIsFlowCtrl =
//	xOpcode==`JAL || xOpcode==`RET ||
//	xOpcode==`BTRI || xOpcode==`BTRR || xOpcode==`TRAPcci || xOpcode==`TRAPcc ||
//	xOpcode==`BEQI || xOpcode==`BNEI ||
//	xOpcode==`BLTI || xOpcode==`BLEI || xOpcode==`BGTI || xOpcode==`BGEI ||
//	xOpcode==`BLTUI || xOpcode==`BLEUI || xOpcode==`BGTUI || xOpcode==`BGEUI ||
//	(xOpcode==`MISC && (xFunc==`SYSCALL || xFunc==`IRET || xFunc==`ERET))
//	;
//wire m1IsFlowCtrl = 
//	(m1Opcode==`MISC && m1Func==`SYSCALL)
//	;
//wire m2IsFlowCtrl = 
//	(m2Opcode==`MISC && m2Func==`SYSCALL)
//	;
//	
//	
//wire dIsLoad = dIRvalid && (
//	dOpcode==`LW || dOpcode==`LH || dOpcode==`LB || dOpcode==`LWR ||
//	dOpcode==`LHU || dOpcode==`LBU ||
//	dOpcode==`LC || dOpcode==`LCU || dOpcode==`LM ||
//	dOpcode==`LF || dOpcode==`LFD || dOpcode==`LP || dOpcode==`LFP || dOpcode==`LFDP ||
//	dOpcode==`LSH || dOpcode==`LSW ||
//	(dOpcode==`MEMNDX && (
//		dFunc6==`LWX || dFunc6==`LHX || dFunc6==`LBX || dFunc6==`LWRX ||
//		dFunc6==`LHUX || dFunc6==`LBUX ||
//		dFunc6==`LCX || dFunc6==`LCUX ||
//		dFunc6==`LFX || dFunc6==`LFDX || dFunc6==`LPX ||
//		dFunc6==`LSHX || dFunc6==`LSWX
//	)) ||
//	(dOpcode==`MISC && (dFunc==`SYSJMP || dFunc==`SYSCALL || dFunc==`SYSINT)))
//	;
//wire dIsStore = dIRvalid && (
//	dOpcode==`SW || dOpcode==`SH || dOpcode==`SB || dOpcode==`SC || dOpcode==`SWC || dOpcode==`SM ||
//	dOpcode==`SF || dOpcode==`SFD || dOpcode==`SP || dOpcode==`SFP || dOpcode==`SFDP ||
//	dOpcode==`SSH || dOpcode==`SSW ||
//	(dOpcode==`MEMNDX && (
//		dFunc6==`SWX || dFunc6==`SHX || dFunc6==`SBX || dFunc6==`SCX || dFunc6==`SWCX ||
//		dFunc6==`SFX || dFunc6==`SFDX || dFunc6==`SPX ||
//		dFunc6==`SSHX || dFunc6==`SSWX
//	)))
//	;
//wire dIsIn = dIRvalid && (
//	dOpcode==`INW || dOpcode==`INH || dOpcode==`INCH || dOpcode==`INB ||
//	dOpcode==`INHU || dOpcode==`INCU || dOpcode==`INBU ||
//	(dOpcode==`MEMNDX && (
//		dFunc6==`INWX || dFunc6==`INHX || dFunc6==`INCX || dFunc6==`INBX ||
//		dFunc6==`INHUX || dFunc6==`INCUX || dFunc6==`INBUX
//	)))
//	;
//wire dIsOut = dIRvalid && (dOpcode==`OUTW || dOpcode==`OUTH || dOpcode==`OUTC || dOpcode==`OUTB ||
//	(dOpcode==`MEMNDX && (
//		dFunc6==`OUTWX || dFunc6==`OUTHX || dFunc6==`OUTCX || dFunc6==`OUTBX
//	)))
//	;
 
 
//-----------------------------------------------------------------------------
// Pipeline advance and stall logic
//-----------------------------------------------------------------------------
wire xIsSqrt = xOpcode==`R && xFunc6==`SQRT;
wire xIsMult = ((xOpcode==`RR && (xFunc6==`MULU || xFunc6==`MULS)) || xOpcode==`MULSI || xOpcode==`MULUI);
wire xIsDiv = ((xOpcode==`RR && (xFunc6==`DIVU || xFunc6==`DIVS || xFunc6==`MODU || xFunc6==`MODS)) || xOpcode==`DIVSI || xOpcode==`DIVUI);
wire xIsCnt = (xOpcode==`R && (xFunc6==`CTLZ || xFunc6==`CTLO || xFunc6==`CTPOP));
reg m1IsCnt,m2IsCnt;
reg m2IsCacheElement;
 
// Have to set the xIsLoad/xIsStore flag to false when xIR is nopped out
wire xIsLoad = (
	xOpcode==`LW || xOpcode==`LH || xOpcode==`LB || xOpcode==`LWR ||
	xOpcode==`LHU || xOpcode==`LBU ||
	xOpcode==`LC || xOpcode==`LCU || xOpcode==`LM ||
	xOpcode==`LF || xOpcode==`LFD || xOpcode==`LP || xOpcode==`LFP || xOpcode==`LFDP ||
	xOpcode==`LSH || xOpcode==`LSW ||
	(xOpcode==`MEMNDX && (
		xFunc6==`LWX || xFunc6==`LHX || xFunc6==`LBX || xFunc6==`LWRX ||
		xFunc6==`LHUX || xFunc6==`LBUX ||
		xFunc6==`LCX || xFunc6==`LCUX ||
		xFunc6==`LFX || xFunc6==`LFDX || xFunc6==`LPX ||
		xFunc6==`LSHX || xFunc6==`LSWX
	)) ||
	(xOpcode==`MISC && (xFunc==`SYSCALL))
	)
	;
wire xIsStore = (
	xOpcode==`SW || xOpcode==`SH || xOpcode==`SB || xOpcode==`SC || xOpcode==`SWC || xOpcode==`SM ||
	xOpcode==`SF || xOpcode==`SFD || xOpcode==`SP || xOpcode==`SFP || xOpcode==`SFDP ||
	xOpcode==`SSH || xOpcode==`SSW || xOpcode==`STBC ||
	(xOpcode==`MEMNDX && (
		xFunc6==`SWX || xFunc6==`SHX || xFunc6==`SBX || xFunc6==`SCX || xFunc6==`SWCX ||
		xFunc6==`SFX || xFunc6==`SFDX || xFunc6==`SPX ||
		xFunc6==`SSHX || xFunc6==`SSWX
	))
	)
	;
wire xIsSWC = xOpcode==`SWC;
wire xIsIn = (
	xOpcode==`INW || xOpcode==`INH || xOpcode==`INCH || xOpcode==`INB ||
	xOpcode==`INHU || xOpcode==`INCU || xOpcode==`INBU ||
	(xOpcode==`MEMNDX && (
		xFunc6==`INWX || xFunc6==`INHX || xFunc6==`INCX || xFunc6==`INBX ||
		xFunc6==`INHUX || xFunc6==`INCUX || xFunc6==`INBUX
	))
	)
	;
wire xIsOut = (
	xOpcode==`OUTW || xOpcode==`OUTH || xOpcode==`OUTC || xOpcode==`OUTB || 
	(xOpcode==`MEMNDX && (
		xFunc6==`OUTWX || xFunc6==`OUTHX || xFunc6==`OUTCX || xFunc6==`OUTBX
	)))
	;
//wire mIsSWC = mOpcode==`SWC;
//reg m1IsIn;
 
wire m2IsInW = m2Opcode==`INW;
wire xIsIO = xIsIn || xIsOut;
wire m1IsIO = m1IsIn || m1IsOut;
wire xIsSetmid = xOpcode==`SETMID;
 
wire xIsFPLoo = xOpcode==`FPLOO;
wire xIsFP = xOpcode==`FP;
wire xIsSIMD = xOpcode==`SIMD;
wire xneedBus = xIsIO;
//wire m1needBus = (m1IsLoad & !m1IsCacheElement) || m1IsStore || m1IsIO;
wire m1needBus = m1IsLoad || m1IsStore || m1IsIO;
wire m2needBus = m2IsLoad || m2IsStore;
 
wire xRtz = xRt[4:0]==5'd0;
wire m1Rtz = m1Rt[4:0]==5'd0;
wire m2Rtz = m2Rt[4:0]==5'd0;
wire wRtz = wRt[4:0]==5'd0;
wire tRtz = tRt[4:0]==5'd0;
 
//wire StallI = dIsLSPair & ~dIR[15];
wire intPending = (nmi_edge & ~StatusHWI) || (irq_i & ~im & ~StatusHWI);	// || ITLBMiss
 
// Check if there are results being forwarded, to allow the pipeline to empty if result
// forwarding isn't needed.
wire tForwardingActive = (tRt==dRa || tRt==dRb || tRt==dRc) & !tRtz;
wire wForwardingActive = (wRt==dRa || wRt==dRb || wRt==dRc) & !wRtz;
wire m2ForwardingActive = (m2Rt==dRa || m2Rt==dRb || m2Rt==dRc) & !m2Rtz;
wire m1ForwardingActive = (m1Rt==dRa || m1Rt==dRb || m1Rt==dRc) & !m1Rtz;
wire xForwardingActive = (xRt==dRa || xRt==dRb || xRt==dRc) & !xRtz;
wire memCycleActive = ((iocyc_o & !(ack_i|err_i)) || (cyc_o & !(ack_i|err_i)));
wire StallI = 1'b0;
 
// Stall on SWC allows rsf flag to be loaded for the next instruction
// Could check for dRa,dRb,dRc==0, for non-stalling
wire StallR =  	((( xIsLoad||xIsIn||xIsCnt) &&   xForwardingActive) || xIsSWC) ||
				(((m1IsLoad||m1IsIn||m1IsCnt) && m1ForwardingActive)) ||
				(((m2IsLoad||m2IsCnt) &&         m2ForwardingActive))
				;
wire StallX = ((xneedBus||xIsLoad||xIsStore) & (m1needBus|m2needBus|icaccess));
wire StallM1 = (m1needBus & (m2needBus|icaccess)) ||
				( m1IsLoad & m1IsCacheElement & (m2IsStore|wIsStore)) ||	// wait for a preceding store to complete
				memCycleActive
				;
// We need to stall the pipeline stages *after* the memory load so that result forwarding
// isn't lost during a data cache load.
wire StallM2 =  (m2needBus & icaccess) || (m2ForwardingActive && (((m1IsLoad & m1IsCacheElement & !dhit) || memCycleActive)));
wire StallW = (wForwardingActive && ((m1IsLoad & m1IsCacheElement & !dhit) || memCycleActive));
wire StallT = (tForwardingActive && ((m1IsLoad & m1IsCacheElement & !dhit) || memCycleActive)) || dcaccess;
 
assign advanceT = (state==RUN) && !StallT;
assign advanceW = advanceT & !StallW;
assign advanceM2 = advanceW && (cyc_o ? (ack_i|err_i) : 1'b1) && !StallM2;
assign advanceM1 = advanceM2 &
					(iocyc_o ? (ack_i|err_i) : 1'b1) &
					((m1IsLoad & m1IsCacheElement) ? dhit : 1'b1) & 
					!StallM1
					;
assign advanceX = advanceM1 & (
					xIsSqrt ? sqrt_done :
					xIsMult ? mult_done :
					xIsDiv ? div_done :
`ifdef FLOATING_POINT
					xIsFPLoo ? fpLooDone :
					xIsFP ? fltdone :
`endif
`ifdef SIMD
					xIsSIMD ? fltdone :
`endif
					1'b1) &
					!StallX;
assign advanceR = advanceX & !StallR;
assign advanceI = advanceR & (ICacheAct ? ihit : ibufrdy) & !StallI;
 
//-----------------------------------------------------------------------------
// Cache loading control
//
// There are two triggers for instruction loading depending on whether or not
// the icache is active.
// For the instruction cache load we wait until there are no more memory or
// I/O operations active. An instruction cache load is taking place and that
// cost is probably at least a dozen cycles (8*memory clocks+3latency).
// In the data cache case we know that there is a memory operation about to
// execute in the M1 stage because it's the data cache miss instruction. So
// there are no other memory operations active. We wait for the prior operation
// to clear from the M2 stage.
// The point is to avoid a memory operation colliding with cache access. We
// could maybe just test for the stb_o line but it gets complex.
//-----------------------------------------------------------------------------
wire pipelineEmpty = 	(dOpcode==`NOPI) &&			// and the pipeline is flushed
						(xOpcode==`NOPI) &&
						(m1Opcode==`NOPI) &&
						(m2Opcode==`NOPI)
						;
wire triggerDCacheLoad = (m1IsLoad & m1IsCacheElement & !dhit) &&	// there is a miss
						!(icaccess | dcaccess) && 	// caches are not active
						(m2Opcode==`NOPI);		// and the pipeline is free of memory-ops
 
wire triggerICacheLoad1 = ICacheAct && !ihit && !triggerDCacheLoad &&	// There is a miss
						!(icaccess | dcaccess) && 	// caches are not active
						pipelineEmpty;
wire triggerICacheLoad2 = (!ICacheAct && !ibufrdy) && !triggerDCacheLoad &&	// There is a miss
						!(icaccess | dcaccess) &&	// caches are not active
						pipelineEmpty;
 
wire triggerICacheLoad = triggerICacheLoad1 | triggerICacheLoad2;
 
wire EXexception_pending = ovr_error || dbz_error || priv_violation || xOpcode==`TRAPcci || xOpcode==`TRAPcc;
`ifdef TLB
wire M1exception_pending = advanceM1 & (m1IsLoad|m1IsStore) & DTLBMiss;
`else
wire M1exception_pending = 1'b0;
`endif
wire exception_pending = EXexception_pending | M1exception_pending;
 
reg prev_nmi,nmi_edge;
 
//-----------------------------------------------------------------------------
// Register file.
//-----------------------------------------------------------------------------
 
wire [63:0] nxt_a, nxt_b, nxt_c;
wire [8:0] nxt_Ra,nxt_Rb,nxt_Rc;
 
Raptor64_SetOperandRegs u7
(
	.rst(rst_i),
	.clk(clk),
	.advanceI(advanceI),
	.advanceR(advanceR),
	.advanceX(advanceX),
	.b(b),
	.AXC(AXC),
	.xAXC(xAXC),
	.insn(insn),
	.xIR(xIR),
	.dRa(dRa),
	.dRb(dRb),
	.dRc(dRc),
	.nxt_Ra(nxt_Ra),
	.nxt_Rb(nxt_Rb),
	.nxt_Rc(nxt_Rc)
);
 
syncRam512x64_1rw3r u5
(
	.wrst(1'b0),
	.wclk(clk),
	.wce(1'b1),		// advanceW
	.we(1'b1),
	.wadr(wRt),
	.i(wData),
	.wo(),
 
	.rrsta(1'b0),
	.rclka(~clk),
	.rcea(advanceR),
	.radra(dRa),
	.roa(rfoa),
 
	.rrstb(1'b0),
	.rclkb(~clk),
	.rceb(advanceR),
	.radrb(dRb),
	.rob(rfob),
 
	.rrstc(1'b0),
	.rclkc(~clk),
	.rcec(advanceR),
	.radrc(dRc),
	.roc(rfoc)
);
 
Raptor64_BypassMux u8
(
	.dpc(dpc),
	.dRn(dRa),
	.xRt(xRt),
	.m1Rt(m1Rt),
	.m2Rt(m2Rt),
	.wRt(wRt),
	.tRt(tRt),
	.rfo(rfoa),
	.xData(xData),
	.m1Data(m1Data),
	.m2Data(m2Data),
	.wData(wData),
	.tData(tData),
	.nxt(nxt_a)
);
 
Raptor64_BypassMux u25
(
	.dpc(dpc),
	.dRn(dRb),
	.xRt(xRt),
	.m1Rt(m1Rt),
	.m2Rt(m2Rt),
	.wRt(wRt),
	.tRt(tRt),
	.rfo(rfob),
	.xData(xData),
	.m1Data(m1Data),
	.m2Data(m2Data),
	.wData(wData),
	.tData(tData),
	.nxt(nxt_b)
);
 
Raptor64_BypassMux u24
(
	.dpc(dpc),
	.dRn(dRc),
	.xRt(xRt),
	.m1Rt(m1Rt),
	.m2Rt(m2Rt),
	.wRt(wRt),
	.tRt(tRt),
	.rfo(rfoc),
	.xData(xData),
	.m1Data(m1Data),
	.m2Data(m2Data),
	.wData(wData),
	.tData(tData),
	.nxt(nxt_c)
);
 
// We need to zero out xRt because it'll match in the operand bypass multiplexers if it isn't zeroed out.
//Raptor64_SetTargetRegister u8
//(
//	.rst(rst_i),
//	.clk(clk),
//	.advanceR(advanceR),
//	.advanceX(advanceX),
//	.dIRvalid(dIRvalid),
//	.dIR(dIR),
//	.dAXC(dAXC),
//	.xRt(xRt)
//);
 
reg [5:0] pchi;
vtdl #(64,64) u23
(
	.clk(clk),
	.ce(advanceI & pccap),
	.a(pchi),
	.d(pc),
	.q(pchistoric)
);
 
wire isxIRQ = ((xIR[15:7]>=`EX_IRQ && xIR[15:7] < `EX_IRQ+32) || xIR[15:7]==`EX_NMI) && xIR[16];
wire isPipeIRQ = dextype==`EX_NMI || (dextype>=`EX_IRQ && dextype < `EX_IRQ+32);
wire isxNonHWI = (xIR[15:7]!=`EX_NMI && 
				!(xIR[15:7]>=`EX_IRQ && xIR[15:7] < `EX_IRQ+32) &&
				xIR[15:7]!=`EX_TLBI && xIR[15:7]!=`EX_TLBD);
wire IRQinPipe = intPending || isPipeIRQ;
 
always @(posedge clk)
if (rst_i) begin
	bte_o <= 2'b00;
	cti_o <= 3'b000;
	iocyc_o <= 1'b0;
	cyc_o <= 1'b0;
	stb_o <= 1'b0;
	we_o <= 1'b0;
	sel_o <= 8'h00;
	adr_o <= 64'd0;
	dat_o <= 64'd0;
 
	state <= RESET;
	cstate <= IDLE;
	pccap <= 1'b1;
	nonICacheSeg <= 32'hFFFF_FFFD;
	TBA <= 64'd0;
	pc <= `RESET_VECTOR;
	dIR <= `NOP_INSN;
	xIR <= `NOP_INSN;
	m1IR <= `NOP_INSN;
	m2IR <= `NOP_INSN;
	wIR <= `NOP_INSN;
	m1IsLoad <= 1'b0;
	m1IsStore <= 1'b0;
	m2IsLoad <= 1'b0;
	m2IsStore <= 1'b0;
	wIsStore <= 1'b0;
	m1IsOut <= 1'b0;
	m1IsIn <= 1'b0;
	tRt <= 9'd0;
	wRt <= 9'd0;
	m1Rt <= 9'd0;
	m2Rt <= 9'd0;
	tData <= 64'd0;
	wData <= 64'd0;
	m1Data <= 64'd0;
	m2Data <= 64'd0;
	icaccess <= 1'b0;
	dcaccess <= 1'b0;
	wFip <= 1'b0;
	m2Fip <= 1'b0;
	m1Fip <= 1'b0;
	xFip <= 1'b0;
	dFip <= 1'b0;
	dirqf <= 1'b0;
	dNmi <= 1'b0;
	xNmi <= 1'b0;
	m1Nmi <= 1'b0;
	m2Nmi <= 1'b0;
	tick <= 64'd0;
	cstate <= IDLE;
	dAXC <= 4'd0;
	xAXC <= 4'd0;
	m1AXC <= 4'd0;
	m2AXC <= 4'd0;
	wAXC <= 4'd0;
	xirqf <= 1'b0;
	dextype <= 9'h00;
	xextype <= 9'h00;
	m1extype <= 9'h00;
	m2extype <= 9'h00;
	wextype <= 9'h00;
	textype <= 9'h00;
	xpc <= 64'd0;
	a <= 64'd0;
	b <= 64'd0;
	imm <= 64'd0;
	clk_en <= 1'b1;
	StatusEXL <= 16'hFFFF;
	StatusHWI <= 1'b0;
	mutex_gate <= 64'h0;
	dcache_on <= 1'b0;
	ICacheOn <= 1'b0;
	ibufadr <= 64'h0;
	m1IsCacheElement <= 1'b0;
	dtinit <= 1'b1;
	ras_sp <= 6'd63;
	im1 <= 1'b1;
// These must be non-zero in order to produce random numbers
// We set them here in case the user doesn't bother to set them.
	m_z <= 64'h0123456789ABCDEF;
	m_w <= 64'h8888888877777777;
	insnkey <= 32'd0;
	LoadNOPs <= 1'b0;
	eptr <= 8'h00;
	ie_fuse <= 8'h00;
end
else begin
 
//---------------------------------------------------------
// Initialize program counters
// Initialize data tags to zero.
// Initialize execution pattern register to zero.
//---------------------------------------------------------
case(state)
RESET:
	begin
		pc <= `RESET_VECTOR;
		adr_o[14:6] <= adr_o[14:6]+9'd1;
		if (adr_o[14:6]==9'h1FF) begin
			dtinit <= 1'b0;
			state <= RUN;
		end
		epat[a[7:0]] <= b[3:0];		/// b=0, to make this line the same as MTEP
		a[7:0] <= a[7:0] + 8'h1;
	end
RUN:
begin
 
ie_fuse <= {ie_fuse[6:0],ie_fuse[0]};		// shift counter
 
tick <= tick + 64'd1;
 
prev_nmi <= nmi_i;
if (!prev_nmi & nmi_i)
	nmi_edge <= 1'b1;
 
 
`ifdef ADDRESS_RESERVATION
// A store by any device in the system to a reserved address blcok
// clears the reservation.
 
if (sys_adv && sys_adr[63:5]==resv_address)
	resv_address <= 59'd0;
`endif
 
wrhit <= 1'b0;
 
//---------------------------------------------------------
// IFETCH:
// - check for external hardware interrupt
// - fetch instruction
// - increment PC
// - set special register defaults for some instructions
// Outputs:
// - d???? signals
//---------------------------------------------------------
if (advanceI) begin
	dAXC <= AXC;
	dextype <= `EX_NON;
	// record instruction and associated pc value
	dIR <= insn;
	dpc <= pc;
	dIm <= im;
	dStatusHWI <= StatusHWI;
	// Interrupt: stomp on the incoming instruction and replace it with
	// a system call.
	if (nmi_edge & !StatusHWI) begin
		$display("*****************");
		$display("NMI edge detected");
		$display("*****************");
		dextype <= `EX_NMI;
		dNmi <= 1'b1;
		dIR <= {`MISC,9'd0,`EX_NMI,`SYSCALL};
	end
	else if (irq_i & !im & !StatusHWI) begin
		$display("*****************");
		$display("IRQ %d detected", irq_no);
		$display("*****************");
		dIR <= {`MISC,9'd0,irq_no,`SYSCALL};
		$display("setting dIR=%h", {`MISC,9'd0,irq_no,`SYSCALL});
		dextype <= irq_no;
	end
`ifdef TLB
	// A TLB miss is treated like a hardware interrupt.
	else if (ITLBMiss) begin
		$display("TLB miss on instruction fetch.");
		dextype <= `EX_TLBI;
		dIR <= {`MISC,9'd0,`EX_TLBI,`SYSCALL};
		BadVAddr <= pc[63:13];
	end
`endif
	// Are we filling the pipeline with NOP's as a result of a previous
	// hardware interrupt ?
	else if (|dFip|LoadNOPs) begin
		dIR <= `NOP_INSN;
	end
	else begin
`include "insn_dumpsc.v"
	end
	begin
		dbranch_taken <= 1'b0;
		pc <= fnIncPC(pc);
		case(iOpcode)
		// We predict the return address by storing it in a return address stack
		// during a call instruction, then popping it off the stack in a return
		// instruction. The prediction will not always be correct, if it's wrong
		// it's corrected by the EX stage branching to the right address.
		`CALL:
			begin
				ras[ras_sp] <= fnIncPC(pc);
				ras_sp <= ras_sp - 6'd1;
				pc <= jmp_tgt;
			end
		`RET:
			begin
				pc <= ras[ras_sp + 6'd1];
				ras_sp <= ras_sp + 6'd1;
			end
		`JMP:
			begin
				pc <= jmp_tgt;
			end
		`BTRR:
			case(insn[4:0])
			`BEQ,`BNE,`BLT,`BLE,`BGT,`BGE,`BLTU,`BLEU,`BGTU,`BGEU,`BAND,`BOR,`BRA,`BNR,`BRN,`LOOP:
				if (predict_taken) begin
//					$display("Taking predicted branch: %h",{pc[63:4] + {{42{insn[24]}},insn[24:7]},insn[6:5],2'b00});
					dbranch_taken <= 1'b1;
					pc <= pc + {{52{insn[14]}},insn[14:5],2'b00};
				end
			default:	;
			endcase
 
		// If doing a JAL that stores a return address in the link register, save off the return address
		// in the return address predictor stack.
		`JAL:
			if (insn[19:15]==5'd31) begin
				ras[ras_sp] <= fnIncPC(pc);
				ras_sp <= ras_sp - 6'd1;
			end
`ifdef BTB
		`JAL:	pc <= btb[pc[7:2]];
		`BTRI:
			if (predict_taken) begin
				dbranch_taken <= 1'b1;
				pc <= btb[pc[7:2]];
			end
`endif
		`BEQI,`BNEI,`BLTI,`BLEI,`BGTI,`BGEI,`BLTUI,`BLEUI,`BGTUI,`BGEUI:
			begin
				if (predict_taken) begin
					dbranch_taken <= 1'b1;
					pc <= pc + {{50{insn[19]}},insn[19:8],2'b00};
				end
			end
		default:	;
		endcase
	end
end
// Stage tail
// Pipeline annul for when a bubble in the pipeline occurs.
else if (advanceR) begin
	dextype <= #1 `EX_NON;
	dIR <= #1 `NOP_INSN;
end
 
//-----------------------------------------------------------------------------
// RFETCH:
// Register fetch stage
//
// Inputs:
// - d???? signals
// Outputs:
// - x???? signals to EX stage
//-----------------------------------------------------------------------------
//
if (advanceR) begin
	xIm <= dIm;
	xNmi <= dNmi;
	xStatusHWI <= dStatusHWI;
	xAXC <= dAXC;
	xFip <= dFip;
	xextype <= dextype;
	xpc <= dpc;
	xbranch_taken <= dbranch_taken;
	if (dOpcode==`R && dFunc==`MYST)
		xIR <= nxt_c[31:0];
	else
		xIR <= dIR;
	a <= nxt_a;
	b <= nxt_b;
	if (dOpcode==`SHFTI)
		b <= {58'd0,dIR[14:9]};
	c <= nxt_c;
 
	case(dOpcode)
	`BTRI:
		imm <= {{53{dIR[7]}},dIR[10:0]};
	`BEQI,`BNEI,`BLTI,`BLEI,`BGTI,`BGEI,`BLTUI,`BLEUI,`BGTUI,`BGEUI:
		imm <= {{56{dIR[7]}},dIR[7:0]};
	`MEMNDX:
		imm <= dIR[7:6];
	default:
		imm <= {{49{dIR[14]}},dIR[14:0]};
	endcase
 
	casex(dOpcode)
	`MISC:
		case(dFunc)
		`SYSCALL:	xRt <= 9'd0;
		default:	xRt <= 9'd0;
		endcase
	`R:
		case(dFunc)
		`MTSPR,`CMG,`CMGI,`EXEC:
					xRt <= 9'd0;
		default:	xRt <= {dAXC,dIR[19:15]};
		endcase
	`MYST,`MUX:	xRt <= {dAXC,dIR[ 9: 5]};
	`SETLO:		xRt <= {dAXC,dIR[26:22]};
	`SETMID:	xRt <= {dAXC,dIR[26:22]};
	`SETHI:		xRt <= {dAXC,dIR[26:22]};
	`RR,`FP:	xRt <= {dAXC,dIR[14:10]};
	`BTRI:		xRt <= 9'd0;
	`BTRR:
		case(dIR[4:0])
		`LOOP:	xRt <= {dAXC,dIR[19:15]};
		default: xRt <= 9'd0;
		endcase
	`TRAPcc:	xRt <= 9'd0;
	`TRAPcci:	xRt <= 9'd0;
	`JMP:		xRt <= 9'd00;
	`CALL:		xRt <= {dAXC,5'd31};
	`RET:		xRt <= {dAXC,5'd30};
	`MEMNDX:
		case(dFunc[5:0])
		`LSHX,`LSWX,
		`SWX,`SHX,`SCX,`SBX,`SFX,`SFDX,`SPX,`SFPX,`SFDPX,`SSHX,`SSWX,
		`OUTWX,`OUTHX,`OUTCX,`OUTBX:
				xRt <= 9'd0;
		default:	xRt <= {dAXC,dIR[14:10]};
		endcase
	`LSH,`LSW,
	`SW,`SH,`SC,`SB,`SF,`SFD,`SSH,`SSW,`SP,`SFP,`SFDP,	// but not SWC!
	`OUTW,`OUTH,`OUTC,`OUTB:
				xRt <= 9'd0;
	`NOPI:		xRt <= 9'd0;
	`BEQI,`BNEI,`BLTI,`BLEI,`BGTI,`BGEI,`BLTUI,`BLEUI,`BGTUI,`BGEUI:
				xRt <= 9'd0;
	default:	xRt <= {dAXC,dIR[19:15]};
	endcase
 
//	if (dIsLSPair & ~dIR[15])
//		dIR <= dIR|32'h8000;
end
// Stage tail
// Pipeline annul for when a bubble in the pipeline occurs.
else if (advanceX) begin
	xRt <= #1 9'd0;
	xextype <= #1 `EX_NON;
	xIR <= #1 `NOP_INSN;
	xFip <= #1 1'b0;
end
 
//---------------------------------------------------------
// EXECUTE:
// - perform datapath operation
// - perform virtual to physical address translation.
// Outputs:
// - m1???? signals to M1 stage
//---------------------------------------------------------
if (advanceX) begin
	m1StatusHWI <= xStatusHWI;
	m1Im <= xIm;
	m1Nmi <= xNmi;
	m1extype <= xextype;
	m1Fip <= xFip;
	m1pc <= xpc;
	m1IR <= xIR;
	m1IsCnt <= xIsCnt;
	m1IsLoad <= xIsLoad;
	m1IsStore <= xIsStore;
	m1IsOut <= xIsOut;
	m1IsIn <= xIsIn;
	m1Rt <= xRt;
	m1Data <= xData;
	m1IsCacheElement <= xisCacheElement;
	m1AXC <= xAXC;
	if (xOpcode==`RR) begin
		if (xFunc6==`MOVZ && !aeqz) begin
			m1Rt <= 9'd0;
			m1Data <= 64'd0;
		end
		if (xFunc6==`MOVNZ && aeqz) begin
			m1Rt <= 9'd0;
			m1Data <= 64'd0;
		end
		if (xFunc6==`MOVPL && a[63]) begin
			m1Rt <= 9'd0;
			m1Data <= 64'd0;
		end
		if (xFunc6==`MOVMI && !a[63]) begin
			m1Rt <= 9'd0;
			m1Data <= 64'd0;
		end
	end
 
	begin
		case(xOpcode)
		`MISC:
			case(xFunc)
			`SEI:	begin ie_fuse <= 8'h00; end
			`CLI:	begin ie_fuse[0] <= 1'b1; end
			`WAIT:	m1clkoff <= 1'b1;
			`ICACHE_ON:		ICacheOn <= 1'b1;
			`ICACHE_OFF:	ICacheOn <= 1'b0;
			`DCACHE_ON:		dcache_on <= 1'b1;
			`DCACHE_OFF:	dcache_on <= 1'b0;
			`FIP:	begin
					// In case we stomped on am interrupt, we have to re-enable
					// interrupts which were disable in the I-Stage. We go backwards
					// in time and set the interrupt status to what it used to be
					// when this instruction is executed.
					if (!xNmi&!dNmi) begin
						dIR <= `NOP_INSN;
						xIR <= `NOP_INSN;
					end
					xRt <= 9'd0;
					dFip <= 1'b1;
					xFip <= 1'b1;
					m1Fip <= 1'b1;
					end
			`IEPP:	begin
					eptr <= eptr + 8'd1;
					if (!xNmi&!dNmi) begin
						dIR <= `NOP_INSN;
						xIR <= `NOP_INSN;
					end
					xRt <= 9'd0;
					dFip <= 1'b1;
					xFip <= 1'b1;
					m1Fip <= 1'b1;
					end
			`GRAN:	begin
					rando <= rand;
					m_z <= next_m_z;
					m_w <= next_m_w;
					end
			`GRAFD:	begin
					rando <= randfd;
					m_z <= next_m_z;
					m_w <= next_m_w;
					end
			`IRET:
				if (StatusHWI) begin
					StatusHWI <= 1'b0;
					ie_fuse[0] <= 1'b1;
					pc <= IPC[xAXC];	//a;
					if (!xNmi&!dNmi) begin
						dIR <= `NOP_INSN;
						xIR <= `NOP_INSN;
					end
					xRt <= 9'd0;
				end
			`ERET:
				if (StatusEXL[xAXC]) begin
					StatusEXL[xAXC]	<= 1'b0;
					pc <= EPC[xAXC];
					if (!xNmi&!dNmi) begin
						dIR <= `NOP_INSN;
						xIR <= `NOP_INSN;
					end
					xRt <= 9'd0;
				end
			// Note: we can't mask off the interrupts in the I-stage because this
			// instruction might not be valid. Eg. a branch could occur causing
			// the instruction to not be executed. But we don't want to allow
			// nested interrupts. We would need a stack of return addresses to 
			// implement nested interrupts. We don't want a real IRQ that's following this
			// instruction in the pipeline to interfere with it's operation. So...
			// we check the pipeline and if if the IRQ SYSCALL is being followed by
			// a real IRQ, then we merge the two IRQ's into a single one by aborting
			// the IRQ SYSCALL. If nested interrupts were happening, the IRET at the
			// end of the real IRQ routine would re-enable interrupts too soon.
			`SYSCALL:
				begin
					if (isxIRQ && 	// Is this a software IRQ SYSCALL ?
						IRQinPipe) begin		// Is there an interrupt in the pipeline ? OR about to happen
						m1IR <= `NOP_INSN;								// Then turn this into a NOP
						m1Rt <= 9'd0;
					end
					else begin
						if (isxNonHWI)
							StatusEXL[xAXC] <= 1'b1;
						else begin
							StatusHWI <= 1'b1;
							ie_fuse <= 8'h00;
							if (xNmi)
								nmi_edge <= 1'b0;
						end
						if (!xNmi&!dNmi) begin
							dIR <= `NOP_INSN;
							xIR <= `NOP_INSN;
						end
						xRt <= 9'd0;
						ea <= {TBA[63:12],xIR[15:7],3'b000};
						LoadNOPs <= 1'b1;
						$display("EX SYSCALL thru %h",{TBA[63:12],xIR[15:7],3'b000});
					end
				end
`ifdef TLB
			`TLBP:	ea <= TLBVirtPage;
`endif
			default:	;
			endcase
		`R:
			case(xFunc6)
			`EXEC:
				begin
					pc <= fnIncPC(xpc);
					dIR <= b;
					if (!xNmi&!dNmi) begin
						dIR <= `NOP_INSN;
						xIR <= `NOP_INSN;
					end
					xRt <= 9'd0;
				end
			`MTSPR:
				begin
				case(xIR[11:6])
`ifdef TLB
				`PageTableAddr:	PageTableAddr <= a[63:13];
				`BadVAddr:		BadVAddr <= a[63:13];
`endif
				`ASID:			ASID <= a[7:0];
				`TBA:			TBA <= {a[63:12],12'h000};
				`NON_ICACHE_SEG:	nonICacheSeg <= a[63:32];
				`FPCR:			rm <= a[31:30];
				`SRAND1:		begin
								m_z <= a;
								end
				`SRAND2:		begin
								m_w <= a;
								end
				`INSNKEY:		insnkey <= a[31:0];
				`PCHI:			pchi <= a[5:0];
				default:	;
				endcase
				end
			`OMG:	mutex_gate[a[5:0]] <= 1'b1;
			`CMG:	mutex_gate[a[5:0]] <= 1'b0;
			`OMGI:	mutex_gate[xIR[11:6]] <= 1'b1;
			`CMGI:	mutex_gate[xIR[11:6]] <= 1'b0;
			default:	;
			endcase
		`RR:
			case(xFunc6)
			`MTEP:	epat[a[7:0]] <= b[3:0];
			default:	;
			endcase
		// JMP and CALL change the program counter immediately in the IF stage.
		// There's no work to do here. The pipeline does not need to be cleared.
		`JMP:	;
		`CALL:	;//m1Data <= fnIncPC(xpc);
 
		`JAL:
`ifdef BTB
			if (dpc[63:2] != a[63:2] + imm[63:2]) begin
				pc[63:2] <= a[63:2] + imm[63:2];
				btb[xpc[7:2]] <= {a[63:2] + imm[63:2],2'b00};
				if (!xNmi&!dNmi) begin
					dIR <= `NOP_INSN;
					xIR <= `NOP_INSN;
				end
				xRt <= 9'd0;
			end
`else
			begin
				pc[63:2] <= a[63:2] + imm[63:2];
				if (!xNmi&!dNmi) begin
					dIR <= `NOP_INSN;
					xIR <= `NOP_INSN;
				end
				xRt <= 9'd0;
			end
`endif
		// Check the pc of the instruction after the RET instruction (the dpc), to
		// see if it's equal to the RET target. If it's the same as the target then
		// we predicted the RET return correctly, so there's nothing to do. Otherwise
		// we need to branch to the RET location.
		`RET:
			if (dpc[63:2]!=b[63:2]) begin
				pc[63:2] <= b[63:2];
				if (!xNmi&!dNmi) begin
					dIR <= `NOP_INSN;
					xIR <= `NOP_INSN;
				end
				xRt <= 9'd0;
			end
		`BTRR:
			case(xFunc5)
		// BEQ r1,r2,label
			`BEQ,`BNE,`BLT,`BLE,`BGT,`BGE,`BLTU,`BLEU,`BGTU,`BGEU,`BAND,`BOR,`BNR,`LOOP,`BRA,`BRN:
				if (!takb & xbranch_taken) begin
					$display("Taking mispredicted branch %h",fnIncPC(xpc));
					pc <= fnIncPC(xpc);
					if (!xNmi&!dNmi) begin
						dIR <= `NOP_INSN;
						xIR <= `NOP_INSN;
					end
					xRt <= 9'd0;
				end
				else if (takb & !xbranch_taken) begin
					$display("Taking branch %h",{xpc[63:2] + {{52{xIR[14]}},xIR[14:5]},2'b00});
					pc[63:2] <= xpc[63:2] + {{52{xIR[14]}},xIR[14:5]};
					if (!xNmi&!dNmi) begin
						dIR <= `NOP_INSN;
						xIR <= `NOP_INSN;
					end
					xRt <= 9'd0;
				end
		// BEQ r1,r2,r10
			`BEQR,`BNER,`BLTR,`BLER,`BGTR,`BGER,`BLTUR,`BLEUR,`BGTUR,`BGEUR://,`BANDR,`BORR,`BNRR:
				if (takb) begin
					pc[63:2] <= c[63:2];
					pc[1:0] <= 2'b00;
`ifdef BTB
					btb[xpc[7:2]] <= c;
`endif
					if (!xNmi&!dNmi) begin
						dIR <= `NOP_INSN;
						xIR <= `NOP_INSN;
					end
					xRt <= 9'd0;
				end
			default:	;
			endcase
		// BEQ r1,#3,r10
		`BTRI:
`ifdef BTB
			if (takb) begin
				if ((xbranch_taken && b[63:2]!=dpc[63:2]) ||	// took branch, but not to right target
					!xbranch_taken) begin					// didn't take branch, and were supposed to
					pc[63:2] <= b[63:2];
					pc[1:0] <= 2'b00;
					btb[xpc[7:2]] <= b;
					if (!xNmi&!dNmi) begin
						dIR <= `NOP_INSN;
						xIR <= `NOP_INSN;
					end
					xRt <= 9'd0;
				end
			end
			else if (xbranch_taken)	begin	// took the branch, and weren't supposed to
				pc <= fnIncPC(xpc);
				if (!xNmi&!dNmi) begin
					dIR <= `NOP_INSN;
					xIR <= `NOP_INSN;
				end
				xRt <= 9'd0;
			end
`else
			if (takb) begin
				pc[63:2] <= b[63:2];
				pc[1:0] <= 2'b00;
				if (!xNmi&!dNmi) begin
					dIR <= `NOP_INSN;
					xIR <= `NOP_INSN;
				end
				xRt <= 9'd0;
			end
`endif
		// BEQI r1,#3,label
		`BEQI,`BNEI,`BLTI,`BLEI,`BGTI,`BGEI,`BLTUI,`BLEUI,`BGTUI,`BGEUI:
			if (takb) begin
				if (!xbranch_taken) begin
					pc[63:2] <= xpc[63:2] + {{50{xIR[19]}},xIR[19:8]};
					if (!xNmi&!dNmi) begin
						dIR <= `NOP_INSN;
						xIR <= `NOP_INSN;
					end
					xRt <= 9'd0;
				end
			end
			else begin
				if (xbranch_taken) begin
					$display("Taking mispredicted branch %h",fnIncPC(xpc));
					pc <= fnIncPC(xpc);
					if (!xNmi&!dNmi) begin
						dIR <= `NOP_INSN;
						xIR <= `NOP_INSN;
					end
					xRt <= 9'd0;
				end
			end
		`TRAPcc,`TRAPcci:
			if (takb) begin
				StatusEXL[xAXC] <= 1'b1;
				xextype <= `EX_TRAP;
				if (!xNmi&!dNmi) begin
					dIR <= `NOP_INSN;
					xIR <= `NOP_INSN;
				end
				xRt <= 9'd0;
				LoadNOPs <= 1'b1;
			end
 
		`INW,`INH,`INHU,`INCH,`INCU,`INB,`INBU:
				begin
				iocyc_o <= 1'b1;
				stb_o <= 1'b1;
				sel_o <= fnSelect(xOpcode,xData[2:0]);
				adr_o <= xData;
				end
		`OUTW:
				begin
				iocyc_o <= 1'b1;
				stb_o <= 1'b1;
				we_o <= 1'b1;
				sel_o <= fnSelect(xOpcode,xData[2:0]);
				adr_o <= xData;
				dat_o <= b;
				end
		`OUTH:
				begin
				iocyc_o <= 1'b1;
				stb_o <= 1'b1;
				we_o <= 1'b1;
				sel_o <= fnSelect(xOpcode,xData[2:0]);
				adr_o <= xData;
				dat_o <= {2{b[31:0]}};
				end
		`OUTC:
				begin
				iocyc_o <= 1'b1;
				stb_o <= 1'b1;
				we_o <= 1'b1;
				sel_o <= fnSelect(xOpcode,xData[2:0]);
				adr_o <= xData;
				dat_o <= {4{b[15:0]}};
				end
		`OUTB:
				begin
				iocyc_o <= 1'b1;
				stb_o <= 1'b1;
				we_o <= 1'b1;
				sel_o <= fnSelect(xOpcode,xData[2:0]);
				adr_o <= xData;
				dat_o <= {8{b[7:0]}};
				end
	//	`OUTBC:
	//			begin
	//			iocyc_o <= 1'b1;
	//			stb_o <= 1'b1;
	//			we_o <= 1'b1;
	//			case(xData1[2:0])
	//			3'b000:	sel_o <= 8'b00000001;
	//			3'b001:	sel_o <= 8'b00000010;
	//			3'b010:	sel_o <= 8'b00000100;
	//			3'b011:	sel_o <= 8'b00001000;
	//			3'b100:	sel_o <= 8'b00010000;
	//			3'b101:	sel_o <= 8'b00100000;
	//			3'b110:	sel_o <= 8'b01000000;
	//			3'b111:	sel_o <= 8'b10000000;
	//			endcase
	//			adr_o <= xData1;
	//			dat_o <= {8{xIR[19:12]}};
	//			end
		`LEA:	begin
				$display("LEA %h", xData);
				m1Data <= xData;
				end
		`LB,`LBU,`LC,`LCU,`LH,`LHU,`LW,`LWR,`LF,`LFD,`LM,`LSH,`LSW,`LP,`LFP,`LFDP,
		`SW,`SH,`SC,`SB,`SWC,`SF,`SFD,`SM,`SSW,`SP,`SFP,`SFDP:
				begin
				m1Data <= b;
				ea <= xData;
				$display("EX MEMOP %h", xData);
				end
	//	`STBC:
	//			begin
	//			m1Data <= {8{xIR[19:12]}};
	//			ea <= xData1;
	//			end
	//	`SSH:	begin
	//			case(xRt)
	//			`SR:	m1Data <= {2{sr}};
	//			default:	m1Data <= 64'd0;
	//			endcase
	//			ea <= xData1;
	//			end
		`CACHE:
				begin
				m1Data <= b;
				ea <= xData;
				case(xIR[19:15])
				`INVIL:		;		// handled in M1 stage
				`INVIALL:	tvalid <= 256'd0;
				`ICACHEON:	ICacheOn <= 1'b1;
				`ICACHEOFF:	ICacheOn <= 1'b0;
				`DCACHEON:	dcache_on <= 1'b1;
				`DCACHEOFF:	dcache_on <= 1'b0;
				default:	;
				endcase
				end
		`MEMNDX:
				begin
				m1IR[31:25] <= 7'd32+xFunc6;
				case(xFunc6)
				`LEAX:
					begin
					$display("LEAX %h", xData);
					m1Data <= xData;
					end
				`INWX:
						begin
						iocyc_o <= 1'b1;
						stb_o <= 1'b1;
						sel_o <= 8'hFF;
						adr_o <= xData;
						end
				`INHX,`INHUX:
						begin
						iocyc_o <= 1'b1;
						stb_o <= 1'b1;
						sel_o <= xData[2] ? 8'b11110000 : 8'b00001111;
						adr_o <= xData;
						end
				`INCX,`INCUX:
						begin
						iocyc_o <= 1'b1;
						stb_o <= 1'b1;
						case(xData[2:1])
						2'b00:	sel_o <= 8'b00000011;
						2'b01:	sel_o <= 8'b00001100;
						2'b10:	sel_o <= 8'b00110000;
						2'b11:	sel_o <= 8'b11000000;
						endcase
						adr_o <= xData;
						end
				`INBX,`INBUX:
						begin
						iocyc_o <= 1'b1;
						stb_o <= 1'b1;
						case(xData[2:0])
						3'b000:	sel_o <= 8'b00000001;
						3'b001:	sel_o <= 8'b00000010;
						3'b010:	sel_o <= 8'b00000100;
						3'b011:	sel_o <= 8'b00001000;
						3'b100:	sel_o <= 8'b00010000;
						3'b101:	sel_o <= 8'b00100000;
						3'b110:	sel_o <= 8'b01000000;
						3'b111:	sel_o <= 8'b10000000;
						endcase
						adr_o <= xData;
						end
				`OUTWX:
						begin
						iocyc_o <= 1'b1;
						stb_o <= 1'b1;
						we_o <= 1'b1;
						sel_o <= 8'hFF;
						adr_o <= xData;
						dat_o <= c;
						end
				`OUTHX:
						begin
						iocyc_o <= 1'b1;
						stb_o <= 1'b1;
						we_o <= 1'b1;
						sel_o <= xData[2] ? 8'b11110000 : 8'b00001111;
						adr_o <= xData;
						dat_o <= {2{c[31:0]}};
						end
				`OUTCX:
						begin
						iocyc_o <= 1'b1;
						stb_o <= 1'b1;
						we_o <= 1'b1;
						case(xData[2:1])
						2'b00:	sel_o <= 8'b00000011;
						2'b01:	sel_o <= 8'b00001100;
						2'b10:	sel_o <= 8'b00110000;
						2'b11:	sel_o <= 8'b11000000;
						endcase
						adr_o <= xData;
						dat_o <= {4{c[15:0]}};
						end
				`OUTBX:
						begin
						iocyc_o <= 1'b1;
						stb_o <= 1'b1;
						we_o <= 1'b1;
						case(xData[2:0])
						3'b000:	sel_o <= 8'b00000001;
						3'b001:	sel_o <= 8'b00000010;
						3'b010:	sel_o <= 8'b00000100;
						3'b011:	sel_o <= 8'b00001000;
						3'b100:	sel_o <= 8'b00010000;
						3'b101:	sel_o <= 8'b00100000;
						3'b110:	sel_o <= 8'b01000000;
						3'b111:	sel_o <= 8'b10000000;
						endcase
						adr_o <= xData;
						dat_o <= {8{c[7:0]}};
						end
				default:
					begin
					m1Data <= c;
					ea <= xData;
					end
				endcase
				end
		default:	;
		endcase
	end
`ifdef FLOATING_POINT
	if (xOpcode==`FP) begin
		case (xFunc6)
		`FDADD,`FDSUB:	
				begin
				fp_uf <= fpaddsub_uf;
				fp_ovr <= fpaddsub_ovr;
				fp_iop <= fpaddsub_iop;
				FPC_SL <= xData[63] && xData[62:0]!=63'd0;
				FPC_SG <= !xData[63] && xData[62:0]!=63'd0;
				FPC_SE <= xData[62:0]==63'd0;
				end
		`FPMUL:
				begin
				fp_uf <= fpmul_uf;
				fp_ovr <= fpmul_ovr;
				fp_iop <= fpmul_iop;
				FPC_SL <= xData[63] && xData[62:0]!=63'd0;
				FPC_SG <= !xData[63] && xData[62:0]!=63'd0;
				FPC_SE <= xData[62:0]==63'd0;
				end
		`FPDIV:
				begin
				fp_uf <= fpdiv_uf;
				fp_ovr <= fpdiv_ovr;
				fp_iop <= fpdiv_iop;
				FPC_SL <= xData[63] && xData[62:0]!=63'd0;
				FPC_SG <= !xData[63] && xData[62:0]!=63'd0;
				FPC_SE <= xData[62:0]==63'd0;
				end
		`FDF2I:
				begin
				fp_ovr <= f2i_ovr;
				fp_iop <= f2i_iop;
				end
		`FDCLT,`FDCLE,`FDCEQ,`FDCNE,`FDCGT,`FDCGE,`FDCUN:
				begin
				fp_iop <= fpcmp_iop;
				end
		default:	;
		endcase
	end
`endif
	if (dbz_error) begin
		$display("Divide by zero error");
		LoadNOPs <= #1 1'b1;
		// Squash a pending IRQ, but not an NMI
		m1extype <= #1 `EX_DBZ;
		m1IR <= #1 `NOP_INSN;
		if (!xNmi&!dNmi) begin
			dIR <= `NOP_INSN;
			xIR <= `NOP_INSN;
		end
		xRt <= #1 9'd0;
	end
	else if (ovr_error) begin
		$display("Overflow error");
		LoadNOPs <= 1'b1;
		m1extype <= `EX_OFL;
		m1IR <= #1 `NOP_INSN;
		if (!xNmi&!dNmi) begin
			dIR <= `NOP_INSN;
			xIR <= `NOP_INSN;
		end
		xRt <= #1 9'd0;
	end
//	else if (priv_violation) begin
//		$display("Priviledge violation");
//		m1IR <= #1 `NOP_INSN;
//		LoadNOPs <= 1'b1;
//		if (!xNmi&!dNmi) begin
//			m1extype <= `EX_PRIV;
//		end
//		dIR <= #1 `NOP_INSN;
//		xIR <= #1 `NOP_INSN;
//		xRt <= #1 9'd0;
//	end
	else if (illegal_insn) begin
		$display("Unimplemented Instruction");
		LoadNOPs <= 1'b1;
		m1extype <= `EX_UNIMP_INSN;
		m1IR <= #1 `NOP_INSN;
		if (!xNmi&!dNmi) begin
			dIR <= `NOP_INSN;
			xIR <= `NOP_INSN;
		end
		xRt <= #1 9'd0;
	end
end
// Stage tail
// Pipeline annul for when a bubble in the pipeline occurs.
else if (advanceM1) begin
	m1IR <= #1 `NOP_INSN;
	m1IsLoad <= #1 1'b0;
	m1IsStore <= #1 1'b0;
	m1IsOut <= #1 1'b0;
	m1IsIn <= #1 1'b0;
	m1Rt <= #1 9'd0;
	m1clkoff <= #1 1'b0;
	m1Fip <= #1 1'b0;
	m1extype <= #1 `EX_NON;
	m1IsCnt <= #1 1'b0;
	m1IsCacheElement <= #1 1'b0;
end
 
 
//-----------------------------------------------------------------------------
// MEMORY:
// - I/O instructions are finished
// - store instructions are started
// - missed loads are started
// On a data cache hit for a load, the load is essentially
// finished in this stage. We switch the opcode to 'NOPI'
// to cause the pipeline to advance as if a NOPs were
// present.
//
// Inputs:
// - m1???? signals
// Outputs:
// - m2???? signals to M2 stage
//-----------------------------------------------------------------------------
if (advanceM1) begin
	m2StatusHWI <= m1StatusHWI;
	m2Im <= m1Im;
	m2Nmi <= m1Nmi;
	m2extype <= m1extype;
	m2Addr <= pea;
	m2Data <= m1Data;
	m2Fip <= m1Fip;
	m2pc <= m1pc;
	m2IR <= m1IR;
	m2IsCnt <= m1IsCnt;
	m2Rt <= m1Rt;
	m2clkoff <= m1clkoff;
	m2AXC <= m1AXC;
	m2IsCacheElement <= m1IsCacheElement;
	m2IsLoad <= m1IsLoad;
	m2IsStore <= m2IsStore;
 
	if (m1IsIO & err_i) begin
		m2extype <= `EX_DBERR;
		errorAddress <= adr_o;
		m2IR <= #1 `NOP_INSN;
	end
 
	case(m1Opcode)
	`MISC:
		case(m1Func)
		`SYSCALL:
			if (!m1IsCacheElement) begin
				cyc_o <= 1'b1;
				stb_o <= 1'b1;
				sel_o <= 8'hFF;
				adr_o <= pea;
				m2Addr <= pea;
			end
			else begin	// dhit must be true
				$display("fetched vector: %h", {cdat[63:2],2'b00});
				m2IR <= `NOP_INSN;
				m2IsLoad <= 1'b0;
				pc <= {cdat[63:2],2'b00};
				LoadNOPs <= 1'b0;
			end
		endcase
	`INW:
		begin
			iocyc_o <= 1'b0;
			stb_o <= 1'b0;
			sel_o <= 8'h00;
			m2Data <= data64;
		end
	`INH:
		begin
			iocyc_o <= 1'b0;
			stb_o <= 1'b0;
			sel_o <= 8'h00;
			m2Data <= {{32{data32[31]}},data32};
		end
	`INHU:
		begin
			iocyc_o <= 1'b0;
			stb_o <= 1'b0;
			sel_o <= 8'h00;
			m2Data <= data32;
		end
	`INCH:
		begin
			iocyc_o <= 1'b0;
			stb_o <= 1'b0;
			sel_o <= 8'h00;
			m2Data <= {{48{data16[15]}},data16};
		end
	`INCU:
		begin
			iocyc_o <= #1 1'b0;
			stb_o <= #1 1'b0;
			sel_o <= #1 8'h00;
			m2Data <= #1 data16;
		end
	`INB:
		begin
			iocyc_o <= #1 1'b0;
			stb_o <= #1 1'b0;
			sel_o <= #1 8'h00;
			m2Data <= #1 {{56{data8[7]}},data8};
		end
	`INBU:
		begin
			iocyc_o <= #1 1'b0;
			stb_o <= #1 1'b0;
			sel_o <= #1 8'h00;
			m2Data <= #1 data8;
		end
	`OUTW,`OUTH,`OUTC,`OUTB,`OUTBC:
		begin
			iocyc_o <= #1 1'b0;
			stb_o <= #1 1'b0;
			we_o <= #1 1'b0;
			sel_o <= #1 8'h00;
		end
	`CACHE:
		case(m1IR[19:15])
		`INVIL:	tvalid[pea[13:6]] <= 1'b0;
		default:	;
		endcase
 
	`LW,`LM,`LFD,`LSW,`LP,`LFDP:
		if (!m1IsCacheElement) begin
			cyc_o <= 1'b1;
			stb_o <= 1'b1;
			sel_o <= fnSelect(m1Opcode,pea[2:0]);
			adr_o <= pea;
			m2Addr <= pea;
		end
		else begin
			m2IsLoad <= 1'b0;
			m2IR <= `NOP_INSN;
			m2Data <= cdata64;
		end
`ifdef ADDRESS_RESERVATION
	`LWR:
		begin
			rsv_o <= 1'b1;
			resv_address <= pea[63:5];
			cyc_o <= 1'b1;
			stb_o <= 1'b1;
			sel_o <= fnSelect(m1Opcode,pea[2:0]);
			adr_o <= pea;
			m2Addr <= pea;
		end
`endif
	`LH,`LF,`LFP:
		if (!m1IsCacheElement) begin
			cyc_o <= 1'b1;
			stb_o <= 1'b1;
			sel_o <= fnSelect(m1Opcode,pea[2:0]);
			adr_o <= pea;
			m2Addr <= pea;
		end
		else begin
			m2IsLoad <= 1'b0;
			m2IR <= `NOP_INSN;
			m2Data <= {{32{cdata32[31]}},cdata32};
		end
 
	`LHU,`LSH:
		if (!m1IsCacheElement) begin
			cyc_o <= 1'b1;
			stb_o <= 1'b1;
			sel_o <= fnSelect(m1Opcode,pea[2:0]);
			adr_o <= pea;
			m2Addr <= pea;
		end
		else begin
			m2IsLoad <= 1'b0;
			m2IR <= `NOP_INSN;
			m2Data <= cdata32;
		end
 
	`LC:
		if (!m1IsCacheElement) begin
			cyc_o <= 1'b1;
			stb_o <= 1'b1;
			sel_o <= fnSelect(m1Opcode,pea[2:0]);
			adr_o <= pea;
			m2Addr <= pea;
		end
		else begin
			$display("dhit=1, cdat=%h",cdat);
			m2IsLoad <= 1'b0;
			m2IR <= `NOP_INSN;
			m2Data <= {{48{cdata16[15]}},cdata16};
		end
 
	`LCU:
		if (!m1IsCacheElement) begin
			cyc_o <= 1'b1;
			stb_o <= 1'b1;
			sel_o <= fnSelect(m1Opcode,pea[2:0]);
			adr_o <= pea;
			m2Addr <= pea;
		end
		else begin
			m2IsLoad <= 1'b0;
			m2IR <= `NOP_INSN;
			m2Data <= cdata16;
		end
 
	`LB:
		if (!m1IsCacheElement) begin
			$display("Load byte:");
			cyc_o <= 1'b1;
			stb_o <= 1'b1;
			sel_o <= fnSelect(m1Opcode,pea[2:0]);
			adr_o <= pea;
			m2Addr <= pea;
		end
		else begin
			m2IsLoad <= 1'b0;
			m2IR <= `NOP_INSN;
			m2Data <= {{56{cdata8[7]}},cdata8};
		end
	`LBU:
		if (!m1IsCacheElement) begin
			$display("Load unsigned byte:");
			cyc_o <= 1'b1;
			stb_o <= 1'b1;
			sel_o <= fnSelect(m1Opcode,pea[2:0]);
			adr_o <= pea;
			m2Addr <= pea;
		end
		else begin
			$display("m2IsLoad <= 0");
			m2IsLoad <= 1'b0;
			m2IR <= `NOP_INSN;
			m2Data <= cdata8;
		end
 
	`SW,`SM,`SFD,`SSW,`SP,`SFDP:
		begin
			$display("%d SW/SM %h",tick,{pea[63:3],3'b000});
			m2Addr <= pea;
			wrhit <= #1 dhit;
`ifdef ADDRESS_RESERVATION
			if (resv_address==pea[63:5])
				resv_address <= #1 59'd0;
`endif
			cyc_o <= #1 1'b1;
			stb_o <= #1 1'b1;
			we_o <= #1 1'b1;
			sel_o <= fnSelect(m1Opcode,pea[2:0]);
			adr_o <= pea;
			dat_o <= #1 m1Data;
		end
 
	`SH,`SF,`SSH,`SFP:
		begin
			wrhit <= #1 dhit;
			m2Addr <= pea;
`ifdef ADDRESS_RESERVATION
			if (resv_address==pea[63:5])
				resv_address <= #1 59'd0;
`endif
			cyc_o <= #1 1'b1;
			stb_o <= #1 1'b1;
			we_o <= #1 1'b1;
			sel_o <= fnSelect(m1Opcode,pea[2:0]);
			adr_o <= pea;
			dat_o <= #1 {2{m1Data[31:0]}};
		end
 
	`SC:
		begin
			$display("Storing char to %h, ea=%h",pea,ea);
			wrhit <= #1 dhit;
			m2Addr <= pea;
`ifdef ADDRESS_RESERVATION
			if (resv_address==pea[63:5])
				resv_address <= #1 59'd0;
`endif
			cyc_o <= #1 1'b1;
			stb_o <= #1 1'b1;
			we_o <= #1 1'b1;
			sel_o <= fnSelect(m1Opcode,pea[2:0]);
			adr_o <= pea;
			dat_o <= #1 {4{m1Data[15:0]}};
		end
 
	`SB,`STBC:
		begin
			wrhit <= #1 dhit;
			m2Addr <= pea;
`ifdef ADDRESS_RESERVATION
			if (resv_address==pea[63:5])
				resv_address <= #1 59'd0;
`endif
			cyc_o <= #1 1'b1;
			stb_o <= #1 1'b1;
			we_o <= #1 1'b1;
			sel_o <= fnSelect(m1Opcode,pea[2:0]);
			adr_o <= pea;
			dat_o <= #1 {8{m1Data[7:0]}};
		end
 
`ifdef ADDRESS_RESERVATION
	`SWC:
		begin
			rsf <= #1 1'b0;
			if (resv_address==pea[63:5]) begin
				wrhit <= #1 dhit;
				m2Addr <= pea;
				cyc_o <= #1 1'b1;
				stb_o <= #1 1'b1;
				we_o <= #1 1'b1;
				sel_o <= fnSelect(m1Opcode,pea[2:0]);
				adr_o <= pea;
				dat_o <= #1 m1Data;
				resv_address <= #1 59'd0;
				rsf <= #1 1'b1;
			end
			else
				m2IR <= `NOP_INSN;
		end
`endif
	endcase
 
//---------------------------------------------------------
// Check for a TLB miss.
// On a prefetch load, just switch the opcode to a NOP
// instruction and ignore the error. Otherwise set the
// exception type.
//---------------------------------------------------------
`ifdef TLB
if (m1IsLoad && m1Rt[4:0]==5'd0 && DTLBMiss) begin
	m1IR <= `NOP_INSN;
end
if ((m1IsLoad&&m1Rt[4:0]!=5'd0)|m1IsStore) begin
	if (DTLBMiss) begin
		$display("DTLB miss on address: %h",ea);
		cyc_o <= 1'b0;
		stb_o <= 1'b0;
		we_o <= 1'b0;
		sel_o <= 8'h00;
		m1extype <= `EX_TLBD;
		StatusHWI <= 1'b1;
		BadVAddr <= ea[63:13];
		if (!xNmi&!dNmi) begin
			dIR <= `NOP_INSN;
			xIR <= `NOP_INSN;
		end
		m1IR <= `NOP_INSN;
		m1Rt <= 9'd0;
		xRt <= #1 9'd0;
		LoadNOPs <= 1'b1;
	end
	end
`endif
end
// Stage tail
// Pipeline annul for when a bubble in the pipeline occurs.
else if (advanceM2) begin
	m2Rt <= #1 9'd0;
	m2IR <= #1 `NOP_INSN;
	m2IsCnt <= #1 1'b0;
	m2IsLoad <= #1 1'b0;
	m2IsStore <= #1 1'b0;
	m2Addr <= 64'd0;
	m2Data <= #1 64'd0;
	m2clkoff <= #1 1'b0;
	m2Fip <= #1 1'b0;
	m2extype <= #1 `EX_NON;
	m2IsCacheElement <= 1'b0;
end
 
 
//-----------------------------------------------------------------------------
// MEMORY:
// - complete the memory cycle
// - merge load data into pipeline
// Inputs:
// - m2???? type signals
// Outputs:
// - w???? signals to WB stage
//-----------------------------------------------------------------------------
if (advanceM2) begin
	wextype <= #1 m2extype;
	wpc <= #1 m2pc;
	wFip <= #1 m2Fip;
	wIR <= #1 m2IR;
	wIsStore <= #1 m2IsStore;
	wData <= #1 m2Data;
	wRt <= #1 m2Rt;
	wclkoff <= #1 m2clkoff;
	wAXC <= #1 m2AXC;
 
	// There's not an error is a prefetch is taking place (m2Rt=0).
	if (((m2IsLoad&&m2Rt[4:0]!=5'd0)|m2IsStore) & err_i) begin
		wextype <= #1 `EX_DBERR;
		errorAddress <= #1 adr_o;
	end
 
	case(m2Opcode)
	`MISC:
		if (m2Func==`SYSCALL)
			begin
				cyc_o <= #1 1'b0;
				stb_o <= #1 1'b0;
				sel_o <= #1 8'h00;
				pc <= #1 {data64[63:2],2'b00};
				LoadNOPs <= 1'b0;
				$display("M2 Fetched vector: %h",{data64[63:2],2'b00});
			end
	`SH,`SC,`SB,`SW,`SWC,`SF,`SFD,`SSH,`SSW,`SP,`SFP,`SFDP:
		begin
			cyc_o <= #1 1'b0;
			stb_o <= #1 1'b0;
			we_o <= #1 1'b0;
			sel_o <= #1 8'h00;
		end
	`LH,`LF,`LSH,`LFP:
		begin
			cyc_o <= #1 1'b0;
			stb_o <= #1 1'b0;
			sel_o <= #1 8'h00;
			wData <= #1 {{32{data32[31]}},data32};
		end
	`LW,`LWR,`LFD,`LSW,`LP,`LFDP:
		begin
			cyc_o <= #1 1'b0;
			stb_o <= #1 1'b0;
			sel_o <= #1 8'h00;
			wData <= #1 data64;
		end
	`LHU:
		begin
			cyc_o <= #1 1'b0;
			stb_o <= #1 1'b0;
			sel_o <= #1 8'h00;
			wData <= #1 data32;
		end
	`LC:
		begin
			cyc_o <= #1 1'b0;
			stb_o <= #1 1'b0;
			sel_o <= #1 8'h00;
			wData <= #1 {{48{data16[15]}},data16};
		end
	`LCU:
		begin
			cyc_o <= #1 1'b0;
			stb_o <= #1 1'b0;
			sel_o <= #1 8'h00;
			wData <= #1 data16;
		end
	`LB:
		begin
			cyc_o <= 1'b0;
			stb_o <= 1'b0;
			sel_o <= 8'h00;
			wData <= {{56{data8[7]}},data8};
		end
	`LBU:
		begin
			cyc_o <= 1'b0;
			stb_o <= 1'b0;
			sel_o <= 8'h00;
			wData <= data8;
		end
	default:	;
	endcase
	// Force stack pointer to word alignment
	if (m2Rt[4:0]==5'b11110)
		wData[2:0] <= 3'b000;
end
// Stage tail
// Pipeline annul for when a bubble in the pipeline occurs.
else if (advanceW) begin
	wIR <= #1 `NOP_INSN;
	wextype <= `EX_NON;
	wRt <= 9'd0;
	wData <= 64'd0;
	wIsStore <= 1'b0;
	wclkoff <= 1'b0;
	wFip <= 1'b0;
end
 
 
//-----------------------------------------------------------------------------
// WRITEBACK:
// - update the register file with results
// - record exception address and type
// - jump to exception handler routine (below)
// Inputs:
// - w???? type signals
// Outputs:
// - t???? signals
//-----------------------------------------------------------------------------
//
if (advanceW) begin
	// Hold onto the last register update
	if (wRt[4:0]!=5'd0 && wRt[4:0]!=5'd29) begin
		tRt <= wRt;
		tData <= wData;
	end
	if (wRt!=5'd0) begin
		$display("Writing regfile[%d:%d] with %h", wRt[8:5],wRt[4:0], wData);
	end
	case(wOpcode)
	`LSH:
		case (wRt)
		`SR:	begin
				bu_im <= wData[31];
				if (wData[15])
					ie_fuse <= 8'h00;
				else
					ie_fuse[0] <= 1'b1;
				FXE <= wData[12];
				end
		default:	;
		endcase
	`MISC:
		case(wFunc)
		`SYSCALL:
			if (wIR[15:7]==`EX_NMI || (wIR[15:7]>=`EX_IRQ && wIR[15:7]<`EX_IRQ+32) || wIR[15:7]==`EX_TLBI || wIR[15:7]==`EX_TLBD)
				IPC[wAXC] <= wData;
			else
				EPC[wAXC] <= wData;
		default:	;
		endcase
	`R:
		case(wFunc6)
		`MTSPR:
			case(wIR[11:6])
`ifdef SEGMENTATION
			`CS:	CS[wAXC][63:16] <= wData[63:16];
			`DS:	DS[wAXC][63:16] <= wData[63:16];
			`ES:	ES[wAXC][63:16] <= wData[63:16];
			`SS:	SS[wAXC][63:16] <= wData[63:16];
`endif
			`IPC:	begin
					$display("mtspr IPC[%d]=%h",wAXC,wData);
					IPC[wAXC] <= wData;
					end
			`EPC:	EPC[wAXC] <= wData;
			default:	;
			endcase
		endcase
	endcase
	if (wclkoff)
		clk_en <= 1'b0;
	else
		clk_en <= 1'b1;
	// FIP/IEPP:
	// Jump back to the instruction following the FIP/IEPP
	if (wFip) begin
		wFip <= 1'b0;
		m2Fip <= 1'b0;
		m1Fip <= 1'b0;
		xFip <= 1'b0;
		dFip <= 1'b0;
		pc <= fnIncPC(wpc);
	end
	//---------------------------------------------------------
	// WRITEBACK (WB') - part two:
	// - vector to exception handler address
	// In the case of a hardware interrupt (NMI/IRQ) we know
	// the pipeline following the interrupt is filled with
	// NOP instructions. This means there is no need to 
	// invalidate the pipeline.
	// 		Also, we have to wait until the WB stage before
	// vectoring so that the pc setting doesn't get trashed
	// by a branch or other exception.
	// 		Tricky because we have to find the first valid
	// PC to record in the IPC register. The interrupt might
	// have occurred in a branch shadow, in which case the
	// current PC isn't valid.
	//---------------------------------------------------------
	case(wextype)
	`EX_NON:	;
	`EX_RST:
		begin
		pc <= `RESET_VECTOR;
		end
	// Hardware exceptions
	`EX_NMI,`EX_IRQ,`EX_TLBI,`EX_TLBD,
	`EX_IRQ+1,`EX_IRQ+2,`EX_IRQ+3,`EX_IRQ+4,`EX_IRQ+5,`EX_IRQ+6,`EX_IRQ+7,
	`EX_IRQ+8,`EX_IRQ+9,`EX_IRQ+10,`EX_IRQ+11,`EX_IRQ+12,`EX_IRQ+13,`EX_IRQ+14,
	`EX_IRQ+15,`EX_IRQ+16,`EX_IRQ+17,`EX_IRQ+18,`EX_IRQ+19,`EX_IRQ+20,`EX_IRQ+21,
	`EX_IRQ+22,`EX_IRQ+23,`EX_IRQ+24,`EX_IRQ+25,`EX_IRQ+26,`EX_IRQ+27,`EX_IRQ+28,
	`EX_IRQ+29,`EX_IRQ+30,`EX_IRQ+31:
		begin
		dNmi <= 1'b0;
		xNmi <= 1'b0;
		m1Nmi <= 1'b0;
		m2Nmi <= 1'b0;
//		$display("Stuffing SYSCALL %d",wextype);
//		dIR <= {`MISC,9'd0,wextype,`SYSCALL};
		// One of the following pc's MUST be valid.
		// wpc will be valid if the interrupt occurred outside of a branch
		// shadow. m1pc or m2pc (the branch target address) will be valid
		// depending on where in the branch shadow the interrupt falls.
		// Syscall has a larger shadow than a branch because it loads the
		// vector from memory. xpc or dpc should be valid depending on
		// whether or not the vector is cached. Eventually syscall flags
		// the pc valid. If none of the PC's are valid, then there is a
		// hardware problem.
//		dpc <= wpc;
//		case(1'b1)
//		wpcv:	dpc <= wpc;
//		m2pcv:	dpc <= m2pc;
//		m1pcv: 	dpc <= m1pc;
//		xpcv:	dpc <= xpc;
//		dpcv:	dpc <= dpc;
//		ipcv:	dpc <= pc;
//		default:	dpc <= `RESET_VECTOR;	// Can't happen
//		endcase
//		dpcv <= 1'b1;
		end
	// Software exceptions
	// We probably want to return to the excepting instruction.
	`EX_DBERR:
		begin
		pccap <= 1'b0;
		dIR <= {`MISC,9'd0,wextype,`SYSCALL};
		dpc <= wpc;
		end
	default:
		begin
		pccap <= 1'b0;
		dIR <= {`MISC,9'd0,wextype,`SYSCALL};
		dpc <= wpc;
		end
	endcase
end
 
// Hold onto the last register update
//begin
//	if (tRt[4:0]!=5'd0 && tRt[4:0]!=5'd29) begin
//		uRt <= tRt;
//		uData <= tData;
//	end
//end
 
//=============================================================================
// Cache loader
//=============================================================================
case(cstate)
IDLE:
	if (triggerDCacheLoad) begin
		dcaccess <= 1'b1;
		bte_o <= 2'b00;			// linear burst
		cti_o <= 3'b001;		// constant address burst access
		bl_o <= 5'd7;
		cyc_o <= 1'b1;
		stb_o <= 1'b1;
		sel_o <= 8'hFF;
		adr_o <= {pea[63:6],6'h00};
		dcadr <= {pea[14:6],6'h00};
		cstate <= DCACT;
	end
	else if (triggerICacheLoad) begin
		icaccess <= 1'b1;
		bte_o <= 2'b00;			// linear burst
		cyc_o <= 1'b1;
		stb_o <= 1'b1;
		sel_o <= 8'hFF;
		if (ICacheAct) begin
			cti_o <= 3'b001;		// constant address burst access
			bl_o <= 5'd7;
			adr_o <= {ppc[63:6],6'h00};
			icadr <= {ppc[63:6],6'h00};
			cstate <= ICACT1;
		end
		else begin
			cti_o <= 3'b000;
			bl_o <= 5'd0;
			adr_o <= {ppc[63:2],2'b00};
			cstate <= ICACT2;
		end
	end
// WISHBONE burst accesses
//
ICACT1:
	if (ack_i|err_i) begin
		icadr[5:3] <= icadr[5:3] + 3'd1;
		if (icadr[5:3]==3'd6)
			cti_o <= 3'b111;	// Last cycle ahead
		if (icadr[5:3]==3'd7) begin
			cti_o <= 3'b000;	// back to non-burst mode
			cyc_o <= 1'b0;
			stb_o <= 1'b0;
			sel_o <= 8'h00;
			tmem[adr_o[13:6]] <= adr_o[63:14];	// This will cause ihit to go high
			tvalid[adr_o[13:6]] <= 1'b1;
			icaccess <= 1'b0;
			cstate <= IDLE;
		end
	end
//SYSCALL 509:	00000000_00000000_11111110_10010111
ICACT2:
	begin
		if (ack_i|err_i) begin
			ibufadr <= adr_o;
			if (err_i)
				insnbuf <= syscall509;
			else
				insnbuf <= adr_o[2] ? dat_i[63:32] : dat_i[31:0];
			cti_o <= 3'b000;	// back to non-burst mode
			cyc_o <= 1'b0;
			stb_o <= 1'b0;
			sel_o <= 8'h00;
			icaccess <= 1'b0;
			cstate <= IDLE;
		end
	end
 
DCACT:
	if (ack_i|err_i) begin
		dcadr[5:3] <= dcadr[5:3] + 3'd1;
		if (dcadr[5:3]==3'd6)
			cti_o <= 3'b111;	// Last cycle ahead
		if (dcadr[5:3]==3'h7) begin
			cti_o <= 3'b000;	// back to non-burst mode
			cyc_o <= 1'b0;
			stb_o <= 1'b0;
			sel_o <= 8'h00;
			dcaccess <= 1'b0;
			cstate <= IDLE;
		end
	end
 
endcase	// cstate
end		// RUN
endcase
end
 
endmodule
 

Go to most recent revision | Compare with Previous | Blame | View Log

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.