OpenCores
URL https://opencores.org/ocsvn/rf6809/rf6809/trunk

Subversion Repositories rf6809

[/] [rf6809/] [trunk/] [rtl/] [cpu/] [rf6809.sv] - Rev 22

Compare with Previous | Blame | View Log

// ============================================================================
//        __
//   \\__/ o\    (C) 2022  Robert Finch, Waterloo
//    \  __ /    All rights reserved.
//     \/_//     robfinch<remove>@finitron.ca
//       ||
//
//      rf6809.sv
//
//
// BSD 3-Clause License
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this
//    list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright notice,
//    this list of conditions and the following disclaimer in the documentation
//    and/or other materials provided with the distribution.
//
// 3. Neither the name of the copyright holder nor the names of its
//    contributors may be used to endorse or promote products derived from
//    this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//                                                                          
// ============================================================================

import rf6809_pkg::*;

module rf6809(id, rst_i, clk_i, halt_i, nmi_i, irq_i, firq_i, vec_i, ba_o, bs_o, lic_o, tsc_i,
        rty_i, bte_o, cti_o, bl_o, lock_o, cyc_o, stb_o, we_o, ack_i, aack_i, atag_i,
        adr_o, dat_i, dat_o, state);
parameter RESET = 6'd0;
parameter IFETCH = 6'd1;
parameter DECODE = 6'd2;
parameter CALC = 6'd3;
parameter PULL1 = 6'd4;
parameter PUSH1 = 6'd5;
parameter PUSH2 = 6'd6;
parameter LOAD1 = 6'd7;
parameter LOAD2 = 6'd8;
parameter STORE1 = 6'd9;
parameter STORE1a = 6'd10;
parameter STORE2 = 6'd11;
parameter OUTER_INDEXING = 6'd12;
parameter OUTER_INDEXING2 = 6'd13;
parameter DIV1 = 6'd16;
parameter DIV2 = 6'd17;
parameter DIV3 = 6'd18;
parameter MUL2 = 6'd20;
parameter ICACHE1 = 6'd31;
parameter ICACHE2 = 6'd32;
parameter ICACHE3 = 6'd33;
parameter ICACHE4 = 6'd34;
parameter ICACHE5 = 6'd35;
parameter ICACHE6 = 6'd36;
parameter ICACHE7 = 6'd37;
parameter ICACHE8 = 6'd38;
parameter ICACHE9 = 6'd39;
parameter IBUF1 = 6'd40;
parameter IBUF2 = 6'd41;
parameter IBUF3 = 6'd42;
parameter IBUF4 = 6'd43;
parameter IBUF5 = 6'd44;
parameter IBUF6 = 6'd45;
input [5:0] id;
input rst_i;
input clk_i;
input halt_i;
input nmi_i;
input irq_i;
input firq_i;
input [`TRPBYTE] vec_i;
output reg ba_o;
output reg bs_o;
output lic_o;
input tsc_i;
input rty_i;
output reg [1:0] bte_o;
output reg [2:0] cti_o;
output reg [5:0] bl_o;
output reg cyc_o;
output reg stb_o;
output reg we_o;
output reg lock_o;
input ack_i;
input aack_i;
input [3:0] atag_i;
output reg [`TRPBYTE] adr_o;
input [`LOBYTE] dat_i;
output reg [`LOBYTE] dat_o;
output [5:0] state;

reg [5:0] state;
reg [5:0] load_what,store_what,load_what2;
reg [`TRPBYTE] pc;
wire [`TRPBYTE] pcp2 = pc + 4'd2;
wire [`TRPBYTE] pcp16 = pc + 5'd16;
wire [`HEXBYTE] insn;
wire icacheOn = 1'b1;
reg [`TRPBYTE] ibufadr, icwa;
reg [191:0] ibuf;
wire ibufhit = ibufadr==pc;
reg natMd,firqMd,iplMd,dbz,iop;
reg md32;
wire [`DBLBYTE] mask = 24'hFFFFFF;
reg [1:0] ipg;
reg isFar;
reg isOuterIndexed;
reg [`HEXBYTE] ir;
`ifdef EIGHTBIT
wire [9:0] ir12 = {ipg,ir[`LOBYTE]};
`endif
`ifdef TWELVEBIT
wire [`LOBYTE] ir12 = ir[`LOBYTE];
`endif
reg [`LOBYTE] dpr;              // direct page register
reg [`DBLBYTE] usppg;   // user stack pointer page

Address [3:0] brkad;    // breakpoint addresses
brkCtrl [3:0] brkctrl;

wire [`LOBYTE] ndxbyte;
reg cf,vf,zf,nf,hf,ef;
wire [`LOBYTE] cfx8 = cf;
wire [`DBLBYTE] cfx24 = {23'b0,cf};
reg im,im1,firqim;
reg dm; // decimal mode
reg sync_state,wait_state;
wire [`LOBYTE] ccr = bitsPerByte==12 ? {2'b00,im1,dm,ef,firqim,hf,im,nf,zf,vf,cf} : {ef,firqim,hf,im,nf,zf,vf,cf};
reg [`LOBYTE] acca,accb;
`ifdef SUPPORT_6309
reg [`LOBYTE] acce,accf;
`endif
reg [`DBLBYTE] accd;
`ifdef SUPPORT_6309
reg [`DBLBYTE] accw;
`endif
reg [`DBLBYTE] xr,yr,usp,ssp;
wire [`DBLBYTE] prod = acca * accb;
reg [`DBLBYTE] vect;
reg [`DBLBYTEP1] res;
reg [`LOBYTEP1] res12;
wire res12n = res12[BPBM1];
wire res12z = res12[`LOBYTE]==12'h000;
wire res12c = res12[bitsPerByte];
wire res24n = res[BPBX2M1];
wire res24z = res[`DBLBYTE]==24'h000000;
wire res24c = res[BPB*2];
reg [`TRPBYTE] ia;
reg ic_invalidate;
reg first_ifetch;
reg tsc_latched;
wire tsc = tsc_i|tsc_latched;
reg [`LOBYTE] chkpoint;
reg [15:0] icgot;
reg [23:0] btocnt;
reg bto;                                                        // bus timed out

reg [`DBLBYTE] a,b;
wire [`LOBYTE] b12 = b[`LOBYTE];
reg [`TRPBYTE] radr,wadr;
reg [`DBLBYTE] wdat;

reg nmi1,nmi_edge;
reg nmi_armed;

reg isStore;
reg isPULU,isPULS;
reg isPSHS,isPSHU;
reg isRTS,isRTI,isRTF;
reg isLEA;
reg isRMW;

function fnAddOverflow;
input a;
input b;
input r;
begin
        fnAddOverflow = (r ^ b) & (1'b1 ^ a ^ b);
end
endfunction

function fnSubOverflow;
input a;
input b;
input r;
begin
        fnSubOverflow = (1'b1 ^ r ^ b) & (a ^ b);
end
endfunction

// Data input path multiplexing
reg [bitsPerByte-1:0] dati;
always_comb
        dati = dat_i;

genvar g;

// Evaluate the branch conditional
reg takb;
always_comb
        case(ir12)
        `BRA,`LBRA:             takb <= 1'b1;
        `BRN,`LBRN:             takb <= 1'b0;
        `BHI,`LBHI:             takb <= !cf & !zf;
        `BLS,`LBLS:             takb <=  cf | zf;
        `BLO,`LBLO:             takb <=  cf;
        `BHS,`LBHS:             takb <= !cf;
        `BNE,`LBNE:             takb <= !zf;
        `BEQ,`LBEQ:             takb <=  zf;
        `BMI,`LBMI:             takb <=  nf;
        `BPL,`LBPL:             takb <= !nf;
        `BVS,`LBVS:             takb <=  vf;
        `BVC,`LBVC:             takb <= !vf;
        `BGT,`LBGT:             takb <= (nf & vf & !zf) | (!nf & !vf & !zf);
        `BGE,`LBGE:             takb <= (nf & vf) | (!nf & !vf);
        `BLE,`LBLE:             takb <= zf | (nf & !vf) | (!nf & vf);
        `BLT,`LBLT:             takb <= (nf & !vf) | (!nf & vf);
        default:        takb <= 1'b1;
        endcase

// This chunk of code takes care of calculating the number of bytes stacked
// by a push or pull operation.
//
reg [4:0] cnt;
always_comb
begin
        cnt =   (ir[bitsPerByte] ? 5'd1 : 5'd0) +
                        (ir[bitsPerByte+1] ? 5'd1 : 5'd0) +
                        (ir[bitsPerByte+2] ? 5'd1 : 5'd0) +
                        (ir[bitsPerByte+3] ? 5'd1 : 5'd0) +
                        (ir[bitsPerByte+4] ? 5'd2 : 5'd0) +
                        (ir[bitsPerByte+5] ? 5'd2 : 5'd0) +
                        (ir[bitsPerByte+6] ? 5'd2 : 5'd0) +
                        (ir[bitsPerByte+7] ? (isFar ? 5'd3 : 5'd2) : 5'd0)
`ifdef SUPPORT_6309
                        + (ir[bitsPerByte+8] ? 5'd1 : 5'd0) +
                        + (ir[bitsPerByte+9] ? 5'd1 : 5'd0)
`endif                  
                        ;
//  cnt = 0;
//      if (ir[8]) cnt = cnt + 5'd1;    // CC
//      if (ir[9]) cnt = cnt + md32 ? 5'd4 : 5'd1;      // A
//      if (ir[10]) cnt = cnt + md32 ? 5'd4 : 5'd1;     // B
//      if (ir[BPBM1]) cnt = cnt + 5'd1;        // DP
//      if (ir[12]) cnt = cnt + md32 ? 5'd4 : 5'd2;     // X
//      if (ir[bitsPerByte+1]) cnt = cnt + md32 ? 5'd4 : 5'd2;  // Y
//      if (ir[bitsPerByte+2]) cnt = cnt + md32 ? 5'd4 : 5'd2;  // U/S
//      if (ir[bitsPerByte+3]) cnt = cnt + 5'd4;        // PC
end

`ifdef SUPPORT_6309
wire isInMem =  ir12==`AIM_DP || ir12==`EIM_DP || ir12==`OIM_DP || ir12==`TIM_DP ||
                                ir12==`AIM_NDX || ir12==`EIM_NDX || ir12==`OIM_NDX || ir12==`TIM_NDX ||
                                ir12==`AIM_EXT || ir12==`EIM_EXT || ir12==`OIM_EXT || ir12==`TIM_EXT
                                ;
wire isRMW1 =   ir12==`AIM_DP || ir12==`EIM_DP || ir12==`OIM_DP ||
                                ir12==`NEG_DP || ir12==`COM_DP || ir12==`LSR_DP || ir12==`ROR_DP || ir12==`ASR_DP || ir12==`ASL_DP || ir12==`ROL_DP || ir12==`DEC_DP || ir12==`INC_DP ||
                                ir12==`AIM_NDX || ir12==`EIM_NDX || ir12==`OIM_NDX || 
                                ir12==`NEG_NDX || ir12==`COM_NDX || ir12==`LSR_NDX || ir12==`ROR_NDX || ir12==`ASR_NDX || ir12==`ASL_NDX || ir12==`ROL_NDX || ir12==`DEC_NDX || ir12==`INC_NDX ||
                                ir12==`AIM_EXT || ir12==`EIM_EXT || ir12==`OIM_EXT || 
                                ir12==`NEG_EXT || ir12==`COM_EXT || ir12==`LSR_EXT || ir12==`ROR_EXT || ir12==`ASR_EXT || ir12==`ASL_EXT || ir12==`ROL_EXT || ir12==`DEC_EXT || ir12==`INC_EXT
                                ;
`else
wire isInMem = 1'b0;
wire isRMW1 =   ir12==`NEG_DP || ir12==`COM_DP || ir12==`LSR_DP || ir12==`ROR_DP || ir12==`ASR_DP || ir12==`ASL_DP || ir12==`ROL_DP || ir12==`DEC_DP || ir12==`INC_DP ||
                                ir12==`NEG_NDX || ir12==`COM_NDX || ir12==`LSR_NDX || ir12==`ROR_NDX || ir12==`ASR_NDX || ir12==`ASL_NDX || ir12==`ROL_NDX || ir12==`DEC_NDX || ir12==`INC_NDX ||
                                ir12==`NEG_EXT || ir12==`COM_EXT || ir12==`LSR_EXT || ir12==`ROR_EXT || ir12==`ASR_EXT || ir12==`ASL_EXT || ir12==`ROL_EXT || ir12==`DEC_EXT || ir12==`INC_EXT
                                ;
`endif

wire isIndexed =
        ir12[7:4]==4'h6 || ir12[7:4]==4'hA || ir12[7:4]==4'hE ||
        ir12==`LEAX_NDX || ir12==`LEAY_NDX || ir12==`LEAS_NDX || ir12==`LEAU_NDX
        ;
reg isDblIndirect;
wire isIndirect = ndxbyte[bitsPerByte-4] & ndxbyte[bitsPerByte-1];
`ifdef TWELVEBIT
always_comb
        isOuterIndexed = ndxbyte[bitsPerByte-5] & ndxbyte[bitsPerByte-1];
`endif

assign ndxbyte = ir[`HIBYTE];

// Detect type of interrupt
wire isINT = ir12==`INT;
wire isRST = vect[3:0]==4'hE;
wire isNMI = vect[3:0]==4'hC;
wire isSWI = vect[3:0]==4'hA;
wire isIRQ = vect[3:0]==4'h8;
wire isFIRQ = vect[3:0]==4'h6;
wire isSWI2 = vect[3:0]==4'h4;
wire isSWI3 = vect[3:0]==4'h2;

wire [`TRPBYTE] far_address = {ir[`HIBYTE],ir[`BYTE3],ir[`BYTE4]};
wire [`TRPBYTE] address = {ir[`HIBYTE],ir[`BYTE3]};
wire [`TRPBYTE] dp_address = {dpr,ir[`HIBYTE]};
wire [`TRPBYTE] ex_address = isFar ? far_address : address;
wire [`TRPBYTE] offset12 = {{bitsPerByte{ir[bitsPerByte*3-1]}},ir[`BYTE3]};
wire [`TRPBYTE] offset24 = {ir[`BYTE3],ir[`BYTE4]};
wire [`TRPBYTE] offset36 = {ir[`BYTE3],ir[`BYTE4],ir[`BYTE5]};

// Choose the indexing register
reg [`TRPBYTE] ndxreg;
always_comb
        if (bitsPerByte==8)
                case(ndxbyte[6:5])
                2'b00:  ndxreg <= xr;
                2'b01:  ndxreg <= yr;
                2'b10:  ndxreg <= {usppg,8'h00} + usp;
                2'b11:  ndxreg <= ssp;
                endcase
        else if (bitsPerByte==12)
                case(ndxbyte[10:9])
                2'b00:  ndxreg <= xr;
                2'b01:  ndxreg <= yr;
                2'b10:  ndxreg <= {usppg,8'h00} + usp;
                2'b11:  ndxreg <= ssp;
                endcase
        
reg [`TRPBYTE] NdxAddr;
always_comb
        if (bitsPerByte==8)
                casez({isOuterIndexed,ndxbyte})
                9'b00???????:   NdxAddr <= ndxreg + {{19{ndxbyte[BPB-4]}},ndxbyte[BPB-4:0]};
                9'b01???0000:   NdxAddr <= ndxreg;
                9'b01???0001:   NdxAddr <= ndxreg;
                9'b01???0010:   NdxAddr <= ndxreg - 2'd1;
                9'b01???0011:   NdxAddr <= ndxreg - 2'd2;
                9'b01???0100:   NdxAddr <= ndxreg;
                9'b01???0101:   NdxAddr <= ndxreg + {{BPB*2{accb[BPBM1]}},accb};
                9'b01???0110:   NdxAddr <= ndxreg + {{BPB*2{acca[BPBM1]}},acca};
                9'b01???1000:   NdxAddr <= ndxreg + offset12;
                9'b01???1001:   NdxAddr <= ndxreg + offset24;
                9'b01???1010:   NdxAddr <= ndxreg + offset36;
                9'b01???1011:   NdxAddr <= ndxreg + {acca,accb};
                9'b01???1100:   NdxAddr <= pc + offset12 + 3'd3;
                9'b01???1101:   NdxAddr <= pc + offset24 + 3'd4;
                9'b01???1110:   NdxAddr <= pc + offset36 + 3'd5;
                9'b01??01111:   NdxAddr <= isFar ? offset36 : offset24;
                9'b01??11111:   NdxAddr <= offset24;
                9'b10???????:   NdxAddr <= {{11{ndxbyte[BPB-4]}},ndxbyte[BPB-4:0]};
                9'b11???0000:   NdxAddr <= 24'd0;
                9'b11???0001:   NdxAddr <= 24'd0;
                9'b11???0010:   NdxAddr <= 24'd0;
                9'b11???0011:   NdxAddr <= 24'd0;
                9'b11???0100:   NdxAddr <= 24'd0;
                9'b11???0101:   NdxAddr <= {{BPB*2{accb[BPBM1]}},accb};
                9'b11???0110:   NdxAddr <= {{BPB*2{acca[BPBM1]}},acca};
                9'b11???1000:   NdxAddr <= offset12;
                9'b11???1001:   NdxAddr <= offset24;
                9'b11???1010:   NdxAddr <= offset36;
                9'b11???1011:   NdxAddr <= {acca,accb};
                9'b11???1100:   NdxAddr <= pc + offset12 + 3'd3;
                9'b11???1101:   NdxAddr <= pc + offset24 + 3'd4;
                9'b11???1110:   NdxAddr <= pc + offset36 + 3'd5;
                9'b11??01111:   NdxAddr <= isFar ? offset36 : offset24;
                9'b11??11111:   NdxAddr <= offset24;
                default:                NdxAddr <= 24'hFFFFFF;
                endcase
        else if (bitsPerByte==12)
                casez({isOuterIndexed,ndxbyte})
                13'b00???????????:      NdxAddr <= ndxreg + {{27{ndxbyte[BPB-4]}},ndxbyte[BPB-4:0]};
                13'b01???00000000:      NdxAddr <= ndxreg;
                13'b01???00000001:      NdxAddr <= ndxreg;
                13'b01???00000010:      NdxAddr <= ndxreg - 2'd1;
                13'b01???00010010:      NdxAddr <= ndxreg - 2'd2;
                13'b01???00100010:      NdxAddr <= ndxreg - 2'd3;
                13'b01???00000011:      NdxAddr <= ndxreg - 2'd2;
                13'b01???00000100:      NdxAddr <= ndxreg;
                13'b01???00000101:      NdxAddr <= ndxreg + {{BPB*2{accb[BPBM1]}},accb};
                13'b01???00000110:      NdxAddr <= ndxreg + {{BPB*2{acca[BPBM1]}},acca};
`ifdef SUPPORT_6309
                13'b01???00010101:      NdxAddr <= ndxreg + {{BPB*2{accf[BPBM1]}},accf};
                13'b01???00010110:      NdxAddr <= ndxreg + {{BPB*2{acce[BPBM1]}},acce};
                13'b01???00011011:      NdxAddr <= ndxreg + {acce,accf};
`endif
                13'b01???00001000:      NdxAddr <= ndxreg + offset12;
                13'b01???00001001:      NdxAddr <= ndxreg + offset24;
                13'b01???00001010:      NdxAddr <= ndxreg + offset36;
                13'b01???00001011:      NdxAddr <= ndxreg + {acca,accb};
                13'b01???00001100:      NdxAddr <= pc + offset12 + 3'd3;
                13'b01???00001101:      NdxAddr <= pc + offset24 + 3'd4;
                13'b01???00001110:      NdxAddr <= pc + offset36 + 3'd5;
                13'b01??000001111:      NdxAddr <= isFar ? offset36 : offset24;
                13'b01??100001111:      NdxAddr <= offset24;
                13'b01???10000000:      NdxAddr <= 24'd0;
                13'b01???10000001:      NdxAddr <= 24'd0;
                13'b01???10000010:      NdxAddr <= 24'd0;
                13'b01???10000011:      NdxAddr <= 24'd0;
                13'b01???10000100:      NdxAddr <= 24'd0;
                13'b01???10000101:      NdxAddr <= {{BPB*2{accb[BPBM1]}},accb};
                13'b01???10000110:      NdxAddr <= {{BPB*2{acca[BPBM1]}},acca};
`ifdef SUPPORT_6309
                13'b01???10010101:      NdxAddr <= {{BPB*2{accf[BPBM1]}},accf};
                13'b01???10010110:      NdxAddr <= {{BPB*2{acce[BPBM1]}},acce};
                13'b01???10011011:      NdxAddr <= {acce,accf};
`endif          
                13'b01???10001000:      NdxAddr <= offset12;
                13'b01???10001001:      NdxAddr <= offset24;
                13'b01???10001010:      NdxAddr <= offset36;
                13'b01???10001011:      NdxAddr <= {acca,accb};
                13'b01???10001100:      NdxAddr <= pc + offset12 + 3'd3;
                13'b01???10001101:      NdxAddr <= pc + offset24 + 3'd4;
                13'b01???10001110:      NdxAddr <= pc + offset36 + 3'd5;
                13'b01??010001111:      NdxAddr <= isFar ? offset36 : offset24;
                13'b01??110001111:      NdxAddr <= offset24;
                13'b10???????????:      NdxAddr <= {{15{ndxbyte[BPB-4]}},ndxbyte[BPB-4:0]};
                13'b11???00000000:      NdxAddr <= 24'd0;
                13'b11???00000001:      NdxAddr <= 24'd0;
                13'b11???00000010:      NdxAddr <= 24'd0;
                13'b11???00000011:      NdxAddr <= 24'd0;
                13'b11???00000100:      NdxAddr <= 24'd0;
                13'b11???00000101:      NdxAddr <= {{BPB*2{accb[BPBM1]}},accb};
                13'b11???00000110:      NdxAddr <= {{BPB*2{acca[BPBM1]}},acca};
                13'b11???00001000:      NdxAddr <= offset12;
                13'b11???00001001:      NdxAddr <= offset24;
                13'b11???00001010:      NdxAddr <= offset36;
                13'b11???00001011:      NdxAddr <= {acca,accb};
                13'b11???00001100:      NdxAddr <= pc + offset12 + 3'd3;
                13'b11???00001101:      NdxAddr <= pc + offset24 + 3'd4;
                13'b11???00001110:      NdxAddr <= pc + offset36 + 3'd5;
                13'b11??000001111:      NdxAddr <= isFar ? offset36 : offset24;
                13'b11??000011111:      NdxAddr <= offset24;
                default:                NdxAddr <= 24'hFFFFFF;
                endcase
        
// Compute instruction length depending on indexing byte
reg [2:0] insnsz;
always_comb
        if (bitsPerByte==8)
                casez(ndxbyte)
                8'b0???????:    insnsz <= 4'h2;
                8'b1??00000:    insnsz <= 4'h2;
                8'b1??00001:    insnsz <= 4'h2;
                8'b1??00010:    insnsz <= 4'h2;
                8'b1??00011:    insnsz <= 4'h2;
                8'b1??00100:    insnsz <= 4'h2;
                8'b1??00101:    insnsz <= 4'h2;
                8'b1??00110:    insnsz <= 4'h2;
                8'b1??01000:    insnsz <= 4'h3;
                8'b1??01001:    insnsz <= 4'h4;
                8'b1??01010:    insnsz <= 4'h5;
                8'b1??01011:    insnsz <= 4'h2;
                8'b1??01100:    insnsz <= 4'h3;
                8'b1??01101:    insnsz <= 4'h4;
                8'b1??01110:    insnsz <= 4'h5;
                8'b1??01111:    insnsz <= isFar ? 4'h5 : 4'h4;
                8'b1??11111:    insnsz <= 4'h4;
                default:        insnsz <= 4'h2;
                endcase
        else if (bitsPerByte==12)
                casez(ndxbyte)
                12'b0???????????:       insnsz <= 4'h2;
                12'b1???00000000:       insnsz <= 4'h2;
                12'b1???00000001:       insnsz <= 4'h2;
                12'b1???00000010:       insnsz <= 4'h2;
                12'b1???00000011:       insnsz <= 4'h2;
                12'b1???00000100:       insnsz <= 4'h2;
                12'b1???000?0101:       insnsz <= 4'h2;
                12'b1???000?0110:       insnsz <= 4'h2;
                12'b1???00001000:       insnsz <= 4'h3;
                12'b1???00001001:       insnsz <= 4'h4;
                12'b1???00001010:       insnsz <= 4'h5;
                12'b1???000?1011:       insnsz <= 4'h2;
                12'b1???00001100:       insnsz <= 4'h3;
                12'b1???00001101:       insnsz <= 4'h4;
                12'b1???00001110:       insnsz <= 4'h5;
                12'b1??000001111:       insnsz <= isFar ? 4'h5 : 4'h4;
                12'b1??100001111:       insnsz <= 4'h4;
                default:        insnsz <= 4'h2;
                endcase

// Source registers for transfer or exchange instructions.
reg [`DBLBYTE] src1,src2;
always_comb
        case(ir[bitsPerByte+7:bitsPerByte+4])
        4'b0000:        src1 <= {acca[`LOBYTE],accb[`LOBYTE]};
        4'b0001:        src1 <= xr;
        4'b0010:        src1 <= yr;
        4'b0011:        src1 <= usp;
        4'b0100:        src1 <= ssp;
        4'b0101:        src1 <= pcp2;
        4'b1000:        src1 <= {12'hFFF,acca[`LOBYTE]};
        4'b1001:        src1 <= {12'hFFF,accb[`LOBYTE]};
        4'b1010:        src1 <= {ccr,ccr};
        4'b1011:        src1 <= {dpr,dpr};
        4'b1100:        src1 <= usppg;
        4'b1101:        src1 <= 24'h0000;
`ifdef SUPPORT_6309
        4'b0110:        src1 <= {acce[`LOBYTE],accf[`LOBYTE]};
        4'b1110:        src1 <= {12'hFFF,acce};
        4'b1111:        src1 <= {12'hFFF,accf};
`else
        4'b1110:        src1 <= 24'h0000;
        4'b1111:        src1 <= 24'h0000;
`endif
        default:        src1 <= 24'h0000;
        endcase
always_comb
        case(ir[bitsPerByte+3:bitsPerByte])
        4'b0000:        src2 <= {acca[`LOBYTE],accb[`LOBYTE]};
        4'b0001:        src2 <= xr;
        4'b0010:        src2 <= yr;
        4'b0011:        src2 <= usp;
        4'b0100:        src2 <= ssp;
        4'b0101:        src2 <= pcp2;
        4'b1000:        src2 <= acca[`LOBYTE];
        4'b1001:        src2 <= accb[`LOBYTE];
        4'b1010:        src2 <= ccr;
        4'b1011:        src2 <= dpr;
        4'b1100:        src2 <= usppg;
        4'b1101:        src2 <= 24'h0000;
`ifdef SUPPORT_6309
        4'b0110:        src2 <= {acce[`LOBYTE],accf[`LOBYTE]};
        4'b1110:        src2 <= acce;
        4'b1111:        src2 <= accf;
`else
        4'b1110:        src2 <= 24'h0000;
        4'b1111:        src2 <= 24'h0000;
`endif
        default:        src2 <= 24'h0000;
        endcase

wire [bitsPerByte*2:0] sum12 = src1 + src2;
wire [bitsPerByte*2:0] sum12c = src1 + src2 + cf;
wire [bitsPerByte*2-1:0] and12 = src1 & src2;
wire [bitsPerByte*2-1:0] eor12 = src1 ^ src2;
wire [bitsPerByte*2-1:0] or12 = src1 | src2;
wire [bitsPerByte*2:0] dif12 = src1 - src2;
wire [bitsPerByte*2:0] dif12c = src1 - src2 - cf;

wire isAcca     =       ir12==`NEGA || ir12==`COMA || ir12==`LSRA || ir12==`RORA || ir12==`ASRA || ir12==`ASLA ||
                                ir12==`ROLA || ir12==`DECA || ir12==`INCA || ir12==`TSTA || ir12==`CLRA ||
                                ir12==`SUBA_IMM || ir12==`CMPA_IMM || ir12==`SBCA_IMM || ir12==`ANDA_IMM || ir12==`BITA_IMM ||
                                ir12==`LDA_IMM || ir12==`EORA_IMM || ir12==`ADCA_IMM || ir12==`ORA_IMM || ir12==`ADDA_IMM ||
                                ir12==`SUBA_DP || ir12==`CMPA_DP || ir12==`SBCA_DP || ir12==`ANDA_DP || ir12==`BITA_DP ||
                                ir12==`LDA_DP || ir12==`EORA_DP || ir12==`ADCA_DP || ir12==`ORA_DP || ir12==`ADDA_DP ||
                                ir12==`SUBA_NDX || ir12==`CMPA_NDX || ir12==`SBCA_NDX || ir12==`ANDA_NDX || ir12==`BITA_NDX ||
                                ir12==`LDA_NDX || ir12==`EORA_NDX || ir12==`ADCA_NDX || ir12==`ORA_NDX || ir12==`ADDA_NDX ||
                                ir12==`SUBA_EXT || ir12==`CMPA_EXT || ir12==`SBCA_EXT || ir12==`ANDA_EXT || ir12==`BITA_EXT ||
                                ir12==`LDA_EXT || ir12==`EORA_EXT || ir12==`ADCA_EXT || ir12==`ORA_EXT || ir12==`ADDA_EXT
                                ;
wire isAdc =    ir12==`ADCA_IMM || ir12==`ADCA_DP || ir12==`ADCA_NDX || ir12==`ADCA_EXT ||
                                                        ir12==`ADCB_IMM || ir12==`ADCB_DP || ir12==`ADCB_NDX || ir12==`ADCB_EXT ||
                                                        ir12==`ADCD_IMM || ir12==`ADCD_DP || ir12==`ADCD_NDX || ir12==`ADCD_EXT ;
wire isSbc =    ir12==`SBCA_IMM || ir12==`SBCA_DP || ir12==`SBCA_NDX || ir12==`SBCA_EXT ||
                                                        ir12==`SBCB_IMM || ir12==`SBCB_DP || ir12==`SBCB_NDX || ir12==`SBCB_EXT ||
                                                        ir12==`SBCD_IMM || ir12==`SBCD_DP || ir12==`SBCD_NDX || ir12==`SBCD_EXT ;

`ifdef SUPPORT_6309
wire isAcce =   ir12 == `ADDE_IMM || ir12==`ADDE_DP || ir12==`ADDE_NDX || ir12==`ADDE_EXT || ir12==`CLRE || ir12==`COME ||
                                                                ir12 == `SUBE_IMM || ir12==`SUBE_DP || ir12==`SUBE_NDX || ir12==`SUBE_EXT ||
                                                                ir12 == `LDE_IMM || ir12==`LDE_DP || ir12==`LDE_NDX || ir12==`LDE_EXT ||
                                                                ir12 == `DECE || ir12==`INCE ||
                                                                ir12 == `CMPE_IMM || ir12==`CMPE_DP || ir12==`CMPE_NDX || ir12==`CMPE_EXT
                                ;
wire isAccf =   ir12 == `ADDF_IMM || ir12==`ADDF_DP || ir12==`ADDF_NDX || ir12==`ADDF_EXT || ir12==`CLRF || ir12==`COMF ||
                                                                ir12 == `SUBF_IMM || ir12==`SUBF_DP || ir12==`SUBF_NDX || ir12==`SUBF_EXT ||
                                                                ir12 == `LDF_IMM || ir12==`LDF_DP || ir12==`LDF_NDX || ir12==`LDF_EXT ||
                                                                ir12 == `DECF || ir12==`INCF ||
                                                                ir12 == `CMPF_IMM || ir12==`CMPF_DP || ir12==`CMPF_NDX || ir12==`CMPF_EXT
                                ;
wire [`DBLBYTE] acc = isAcce ? acce : isAccf ? accf : isAcca ? acca : accb;
`else
wire [`DBLBYTE] acc = isAcca ? acca : accb;
`endif

always_ff @(posedge clk_i)
if (state==DECODE) begin
        isStore <=      ir12==`STA_DP || ir12==`STB_DP || ir12==`STD_DP || ir12==`STX_DP || ir12==`STY_DP || ir12==`STU_DP || ir12==`STS_DP ||
                                ir12==`STA_NDX || ir12==`STB_NDX || ir12==`STD_NDX || ir12==`STX_NDX || ir12==`STY_NDX || ir12==`STU_NDX || ir12==`STS_NDX ||
                                ir12==`STA_EXT || ir12==`STB_EXT || ir12==`STD_EXT || ir12==`STX_EXT || ir12==`STY_EXT || ir12==`STU_EXT || ir12==`STS_EXT ||
                                ir12==`STE_DP || ir12==`STE_NDX || ir12==`STE_EXT || ir12==`STF_DP || ir12==`STF_NDX || ir12==`STF_EXT ||
                                ir12==`STW_DP || ir12==`STW_NDX || ir12==`STW_EXT
                                ;
        isPULU <= ir12==`PULU;
        isPULS <= ir12==`PULS;
        isPSHS <= ir12==`PSHS;
        isPSHU <= ir12==`PSHU;
        isRTI <= ir12==`RTI;
        isRTS <= ir12==`RTS;
        isRTF <= ir12==`RTF;
        isLEA <= ir12==`LEAX_NDX || ir12==`LEAY_NDX || ir12==`LEAU_NDX || ir12==`LEAS_NDX;
        isRMW <= isRMW1;
end

wire hit0, hit1;
wire ihit = hit0 & hit1;
reg rhit0;

assign lic_o =  (state==CALC && !isRMW) ||
                                (state==DECODE && (
                                        ir12==`NOP || ir12==`ORCC || ir12==`ANDCC || ir12==`DAA || ir12==`LDMD || ir12==`BITMD || ir12==`TFR || ir12==`EXG ||
                                        ir12==`NEGA || ir12==`COMA || ir12==`LSRA || ir12==`RORA || ir12==`ASRA || ir12==`ROLA || ir12==`DECA || ir12==`INCA || ir12==`TSTA || ir12==`CLRA ||
                                        ir12==`DECE || ir12==`DECF || ir12==`DECD || ir12==`DECW || ir12==`INCE || ir12==`INCF || ir12==`INCD || ir12==`INCW ||
                                        ir12==`NEGB || ir12==`COMB || ir12==`LSRB || ir12==`RORB || ir12==`ASRB || ir12==`ROLB || ir12==`DECB || ir12==`INCB || ir12==`TSTB || ir12==`CLRB ||
                                        ir12==`COME || ir12==`COMF || ir12==`COMD || ir12==`COMW ||
                                        ir12==`ASLD || ir12==`ASRD || ir12==`TSTD || ir12==`ADDR || ir12==`ADCR || ir12==`ANDR ||
                                        ir12==`TSTE || ir12==`TSTF || ir12==`TSTW ||
                                        ir12==`LSRD || ir12==`LSRW || ir12==`NEGD || ir12==`ROLD || ir12==`ROLW || ir12==`RORD || ir12==`RORW ||
                                        ir12==`SUBA_IMM || ir12==`CMPA_IMM || ir12==`SBCA_IMM || ir12==`ANDA_IMM || ir12==`BITA_IMM || ir12==`LDA_IMM || ir12==`EORA_IMM || ir12==`ADCA_IMM || ir12==`ORA_IMM || ir12==`ADDA_IMM ||
                                        ir12==`SUBB_IMM || ir12==`CMPB_IMM || ir12==`SBCB_IMM || ir12==`ANDB_IMM || ir12==`BITB_IMM || ir12==`LDB_IMM || ir12==`EORB_IMM || ir12==`ADCB_IMM || ir12==`ORB_IMM || ir12==`ADDB_IMM ||
                                        ir12==`EORD_IMM || ir12==`ANDD_IMM || ir12==`ORD_IMM || ir12==`BITD_IMM || ir12==`ADDD_IMM || ir12==`ADCD_IMM || ir12==`SUBD_IMM || ir12==`SBCD_IMM || ir12==`LDD_IMM || ir12==`LDW_IMM ||
                                        ir12==`LDQ_IMM || ir12==`CMPD_IMM || ir12==`CMPX_IMM || ir12==`CMPY_IMM || ir12==`CMPU_IMM || ir12==`CMPS_IMM || ir12==`CMPW_IMM ||
                                        ir12==`LDE_IMM || ir12==`LDF_IMM ||
                                        ir12==`SUBE_IMM || ir12==`SUBF_IMM || ir12==`SUBW_IMM ||
                                        ir12==`BEQ || ir12==`BNE || ir12==`BMI || ir12==`BPL || ir12==`BVS || ir12==`BVC || ir12==`BRA || ir12==`BRN ||
                                        ir12==`BHI || ir12==`BLS || ir12==`BHS || ir12==`BLO ||
                                        ir12==`BGT || ir12==`BGE || ir12==`BLT || ir12==`BLE ||
                                        ir12==`LBEQ || ir12==`LBNE || ir12==`LBMI || ir12==`LBPL || ir12==`LBVS || ir12==`LBVC || ir12==`LBRA || ir12==`LBRN ||
                                        ir12==`LBHI || ir12==`LBLS || ir12==`LBHS || ir12==`LBLO ||
                                        ir12==`LBGT || ir12==`LBGE || ir12==`LBLT || ir12==`LBLE
                                        )
                                ) ||
                                (state==STORE2 && (
                                        (store_what==`SW_ACCQ3124 && wadr[1:0]==2'b00) ||
                                        (store_what==`SW_ACCQ70) ||
                                        (store_what==`SW_ACCA && !(isINT || isPSHS || isPSHU)) ||
                                        (store_what==`SW_ACCB && !(isINT || isPSHS || isPSHU)) ||
                                        (store_what==`SW_ACCDH && wadr[1:0]!=2'b11) ||
                                        (store_what==`SW_ACCDL) ||
                                        (store_what==`SW_X3124 && wadr[1:0]==2'b00 && !(isINT || isPSHS || isPSHU)) ||
                                        (store_what==`SW_XL && !(isINT || isPSHS || isPSHU)) ||
                                        (store_what==`SW_YL && !(isINT || isPSHS || isPSHU)) ||
                                        (store_what==`SW_USPL && !(isINT || isPSHS || isPSHU)) ||
                                        (store_what==`SW_SSPL && !(isINT || isPSHS || isPSHU)) ||
                                        (store_what==`SW_PCL && !(isINT || isPSHS || isPSHU) && !(ir12==`JSR_NDX && isIndirect)) ||
                                        (store_what==`SW_ACCA70 && !(isINT || isPSHS || isPSHU)) ||
                                        (store_what==`SW_ACCB70 && !(isINT || isPSHS || isPSHU))
                                )) ||
                                (state==PUSH2 && ir[`HIBYTE]==12'h000 && !isINT) ||
                                (state==PULL1 && ir[`HIBYTE]==12'h000) ||
                                (state==OUTER_INDEXING2 && isLEA) ||
                                (state==LOAD2 && 
                                        (load_what==`LW_ACCA && !(isRTI || isPULU || isPULS)) ||
                                        (load_what==`LW_ACCB && !(isRTI || isPULU || isPULS)) ||
                                        (load_what==`LW_DPR && !(isRTI || isPULU || isPULS)) ||
                                        (load_what==`LW_XL && !(isRTI || isPULU || isPULS)) ||
                                        (load_what==`LW_YL && !(isRTI || isPULU || isPULS)) ||
                                        (load_what==`LW_USPL && !(isRTI || isPULU || isPULS)) ||
                                        (load_what==`LW_SSPL && !(isRTI || isPULU || isPULS)) ||
                                        (load_what==`LW_PCL) ||
                                        (load_what==`LW_IAL && !isOuterIndexed && isLEA) ||
                                        (load_what==`LW_IA3124 && radr[1:0]==2'b00 && !isOuterIndexed && isLEA)
                                )
                                ;

wire lock_bus = load_what==`LW_XH || load_what==`LW_YH || load_what==`LW_USPH || load_what==`LW_SSPH ||
                                load_what==`LW_PCH || load_what==`LW_BH || load_what==`LW_IAH || load_what==`LW_PC2316 ||
                                load_what==`LW_IA2316 || load_what==`LW_B2316 || 
                                load_what==`LW_X2316 || load_what==`LW_Y2316 || load_what==`LW_USP2316 || load_what==`LW_SSP2316 ||
                                isRMW ||
                                store_what==`SW_ACCDH || store_what==`SW_XH || store_what==`SW_YH || store_what==`SW_USPH || store_what==`SW_SSPH ||
                                store_what==`SW_PCH || store_what==`SW_PC2316 || store_what==`SW_ACCQ2316 ||
                                store_what==`SW_X2316 || store_what==`SW_Y2316 || store_what==`SW_USP2316 || store_what==`SW_SSP2316
                                ;

wire isPrefix = ir12==`PG2 || ir12==`PG3 || ir12==`OUTER;

reg rty;
reg [5:0] waitcnt;
reg [3:0] iccnt;
reg [bitsPerByte-1:0] icbuf [0:15];
reg [bitsPerByte*16-1:0] icbuf2;
reg [15:0] outstanding; // Outstanding async read cycles.
integer n4;

rf6809_icachemem u1
(
        .wclk(clk_i),
        .wce(1'b1),
        .wr(state==ICACHE6),
        .wa(icwa[11:0]),
        .i(icbuf2),
        .rclk(~clk_i),
        .rce(1'b1),
        .pc(pc[11:0]),
        .insn(insn)
);
        
rf6809_itagmem u2
(
        .wclk(clk_i),
        .wce(1'b1),
        .wr(state==ICACHE6),
        .wa(icwa[`TRPBYTE]),
        .invalidate(ic_invalidate),
        .rclk(~clk_i),
        .rce(1'b1),
        .pc(pc),
        .hit0(hit0),
        .hit1(hit1)
);

wire bcdaddbcf, bcdsuubbcf,bcdaddcf,bcdsubcf,bcdnegcf,bcdnegbcf;
wire [bitsPerByte-1:0] bcdaddbo, bcdsubbo, bcdnegbo;
wire [bitsPerByte*2-1:0] bcdaddo, bcdsubo, bcdnego;
wire [31:0] bcdmulo;

`ifdef SUPPORT_BCD
BCDAddN #(.N(3)) ubcda1 (
        .ci(isAdc ? cf : 1'b0),
        .a(acc),
        .b(b12),
        .o(bcdaddbo),
        .co(bcdaddbcf)
);

BCDAddN #(.N(6)) ubcda2 (
        .ci(isAdc ? cf : 1'b0),
        .a({acca,accb}),
        .b(b),
        .o(bcdaddo),
        .co(bcdaddcf)
);

BCDSubN #(.N(3)) ubcds1 (
        .ci(isSbc ? cf : 1'b0),
        .a(acc),
        .b(b12),
        .o(bcdsubbo),
        .co(bcdsubbcf)
);

BCDSubN #(.N(6)) ubcds2 (
        .ci(isSbc ? cf : 1'b0),
        .a({acca,accb}),
        .b(b),
        .o(bcdsubo),
        .co(bcdsubcf)
);

BCDSubN #(.N(3)) ubcds3 (
        .ci(1'b0),
        .a(12'h0),
        .b(acc),
        .o(bcdnegbo),
        .co(bcdnegbcf)
);

BCDSubN #(.N(6)) ubcds4 (
        .ci(1'b0),
        .a(12'h0),
        .b({acca,accb}),
        .o(bcdnego),
        .co(bcdnegcf)
);

BCDMul4 ubcdmul1
(
        .a({4'h0,acca}),
        .b({4'h0,accb}),
        .o(bcdmulo)
);
`endif

reg [bitsPerByte*2-1:0] bcdmul_res [0:15];
reg [bitsPerByte*2-1:0] bcdmul_res16;
genvar g5;
generate begin : gBCDMulPipe
        always_ff @(posedge clk_i)
                bcdmul_res[0] <= bcdmulo[23:0];
        always_ff @(posedge clk_i)
                bcdmul_res16 <= muld_res[15];
        for (g5 = 1; g5 < 16; g5 = g5 + 1)
                always_ff @(posedge clk_i)
                        bcdmul_res[g5] = bcdmul_res[g5-1];
end
endgenerate

// Multiplier logic
wire signed [`QUADBYTE] muld_prod = $signed({acca,accb}) * $signed(b[`DBLBYTE]);
reg [`QUADBYTE] muld_res [0:15];
reg [`QUADBYTE] muld_res6;
genvar g4;
generate begin : gMulPipe
        always_ff @(posedge clk_i)
                muld_res[0] <= muld_prod;
        always_ff @(posedge clk_i)
                muld_res6 <= muld_res[5];
        for (g4 = 1; g4 < 6; g4 = g4 + 1)
                always_ff @(posedge clk_i)
                        muld_res[g4] = muld_res[g4-1];
end
endgenerate

// Divider logic
reg [5:0] divcnt;
/*
reg divsign;
reg [`DBLBYTE] dividend;
// Table of positive constants 1/0 to 1/2047, accurate to 35 bits
reg [26:0] divtbl [0:2047];     
genvar g2;
generate begin: gDivtbl
        for (g2 = 0; g2 < 2048; g2 = g2 + 1)
        initial begin
                divtbl[g2] = 27'h4000000 / g2;
        end
end
endgenerate
reg [49:0] divres;
always_comb
        divres = ({36'd0,dividend} * divtbl[b12]);
reg [11:0] divrem;
always_comb
        divrem = dividend - divres[49:26] * b12;
// Now create an 12-stage divider pipeline. Hopefully the synthesizer
// will backfill along this pipeline. Each multiplier requires only
// about 5 stages for best performance.
genvar g1;
reg [49:0] divrespipe [0:31];
reg [11:0] divrempipe [0:31];
reg [49:0] divres12;
reg [11:0] divrem12;
generate begin : gDivPipe
        always_ff @(posedge clk_i)
                divrespipe[0] <= divres;
        always_ff @(posedge clk_i)
                divrempipe[0] <= divrem;
        always_ff @(posedge clk_i)
                divres12 <= divrespipe[12];
        always_ff @(posedge clk_i)
                divrem12 <= divrempipe[12];
        for (g1 = 1; g1 < 13; g1 = g1 + 1)
        always_ff @(posedge clk_i) begin
                divrespipe[g1] <= divrespipe[g1-1];
                divrempipe[g1] <= divrempipe[g1-1];
        end
end
endgenerate
*/
wire [23:0] divres24;
wire [15:0] divrem12;
wire [47:0] divres48;
wire [23:0] divrem24;
wire [15:0] divres16;
wire [7:0] divrem8;
wire [31:0] divres32;
wire [15:0] divrem16;

`ifdef SUPPORT_6309
`ifdef SUPPORT_DIVIDE
generate begin : gDividers
        if (bitsPerByte==12) begin
                div24by12 udiv24by12 (
                  .aclk(clk_i),                                      // input wire aclk
                  .s_axis_divisor_tvalid(1'b1),    // input wire s_axis_divisor_tvalid
                  .s_axis_divisor_tdata({4'h0,b12}),      // input wire [15 : 0] s_axis_divisor_tdata
                  .s_axis_dividend_tvalid(1'b1),  // input wire s_axis_dividend_tvalid
                  .s_axis_dividend_tdata({acca,accb}),    // input wire [23 : 0] s_axis_dividend_tdata
                  .m_axis_dout_tvalid(),          // output wire m_axis_dout_tvalid
                  .m_axis_dout_tuser(),            // output wire [0 : 0] m_axis_dout_tuser
                  .m_axis_dout_tdata({divres24,divrem12})            // output wire [39 : 0] m_axis_dout_tdata
                );

                div48by24 udiv48by24 (
                  .aclk(clk_i),                                      // input wire aclk
                  .s_axis_divisor_tvalid(1'b1),    // input wire s_axis_divisor_tvalid
                  .s_axis_divisor_tdata(b),      // input wire [23 : 0] s_axis_divisor_tdata
                  .s_axis_dividend_tvalid(1'b1),  // input wire s_axis_dividend_tvalid
                  .s_axis_dividend_tdata({acca,accb,acce,accf}),    // input wire [47 : 0] s_axis_dividend_tdata
                  .m_axis_dout_tvalid(),          // output wire m_axis_dout_tvalid
                  .m_axis_dout_tuser(),            // output wire [0 : 0] m_axis_dout_tuser
                  .m_axis_dout_tdata({divres48,divrem24})            // output wire [71 : 0] m_axis_dout_tdata
                );
        end
end
endgenerate
`endif
`endif

// For asynchronous reads,
// The read response might come back in any order (the packets could loop
// around in the network.
// We need to buffer and reorder the response correctly.

integer n3;
always_ff @(posedge clk_i)
if (rst_i) begin
        icgot <= 16'h0;
        for (n3 = 0; n3 < 16; n3 = n3 + 1)
                icbuf[n3] <= {bitsPerByte{1'b0}};
end
else begin
        if (state==ICACHE1)
                icgot <= 16'h0;
`ifdef SUPPORT_AREAD
        if (aack_i) begin
                icgot[atag_i] <= 1'b1;
                icbuf[atag_i] <= dati;
        end
`else
        if (ack_i) begin
                icgot[adr_o[3:0]] <= 1'b1;
                icbuf[adr_o[3:0]] <= dati;
        end
`endif
end

genvar g3;
generate begin : gIcin
for (g3 = 0; g3 < 16; g3 = g3 + 1)
        always_comb
                icbuf2[(g3+1)*bitsPerByte-1:g3*bitsPerByte] <= icbuf[g3];
end
endgenerate

// Bus timeout counter
always_ff @(posedge clk_i)
if (rst_i) begin
        btocnt <= 24'd0;
end
else begin
        if (cyc_o & stb_o)
                btocnt <= btocnt + 2'd1;
        else
                btocnt <= 24'd0;
end
always_comb
        bto = btocnt >= 24'd10000;

// Count  milliseconds
// Based on a count determined by the clock frequency
// 40MHz is assumed.
reg [23:0] ns_count;    // The counter to get to 1ms
reg [35:0] ms_count;    // Count of number of milliseconds

always_ff @(posedge clk_i)
if (rst_i) begin
        ns_count <= 16'd0;
        ms_count <= 36'd0;
end
else begin
        ns_count <= ns_count + 2'd1;
        if (ns_count>=24'd40000) begin
                ns_count <= 24'h0;
                ms_count <= ms_count + 2'd1;
        end
end

`ifdef SUPPORT_CHECKPOINT
always_ff @(posedge clk_i)
if (rst_i)
        chkpoint <= 12'h000;
else begin
        if (ns_count==16'd40000) begin
                if (ms_count[9:0]==10'h3FF)
                        chkpoint <= 12'hFFF;
        end
        if (state==STORE1 && (wadr=={{BPB*3-8{1'b1}},8'hE1}))
                chkpoint <= 12'h000;
end
`endif

always_ff @(posedge clk_i)
        tsc_latched <= tsc_i;

always_ff @(posedge clk_i)
        nmi1 <= iplMd ? &{nmi_i,firq_i,irq_i} : nmi_i;
always_ff @(posedge clk_i)
`ifdef SUPPORT_CHECKPOINT
        if (ms_count[9:0]==10'h3FF && chkpoint!=12'h000)
                nmi_edge <= 1'b1;
        else 
`endif
        if ((iplMd ? &{nmi_i,firq_i,irq_i} : nmi_i) & !nmi1)
                nmi_edge <= 1'b1;
        else if (state==DECODE && ir12==`INT)
                nmi_edge <= 1'b0;

reg [11:0] rst_cnt;

always @(posedge clk_i)
if (rst_i) begin
        wb_nack();
        natMd <= 1'b0;
        firqMd <= 1'b0;
        iplMd <= 1'b0;
        rty <= `FALSE;
        rst_cnt <= {id,4'd0};
        next_state(RESET);
        sync_state <= `FALSE;
        wait_state <= `FALSE;
        md32 <= `FALSE;
        ipg <= 2'b00;
        isFar <= `FALSE;
`ifdef EIGHTBIT
        isOuterIndexed <= `FALSE;
`endif
        dpr <= 12'h000;
        ibufadr <= {BPB*3{1'b0}};
//      pc <= 24'hFFFFFE;
        pc <= {{BPB*3-1{1'b1}},1'b0};   // FF...FE
        ir <= {4{`NOP}};
        ibuf <= {4{`NOP}};
        dm <= 1'b0;
        im <= 1'b1;
        firqim <= 1'b1;
        nmi_armed <= `FALSE;
        ic_invalidate <= `TRUE;
        first_ifetch <= `TRUE;
        acca <= 12'h0;
        accb <= 12'h0;
        accd <= 24'h0;
`ifdef SUPPORT_6309
        accw <= 24'h0;
`endif
        xr <= 24'h0;
        yr <= 24'h0;
        usppg <= 16'h0;
        usp <= 24'h0;
        ssp <= 24'h0;
        if (halt_i) begin
                ba_o <= 1'b1;
                bs_o <= 1'b1;
        end
        else begin
                ba_o <= 1'b0;
                bs_o <= 1'b0;
        end
        outstanding <= 16'h0;
        iccnt <= 4'h0;
        //dividend <= 'b0;
        divcnt <= 'b0;
        //divsign <= 'b0;
end
else begin

// Release any bus lock during the last state of an instruction.
if (lic_o && ack_i && (state==STORE2 || state==LOAD2))
        lock_o <= 1'b0;

case(state)
RESET:
        if (rst_cnt==10'd0) begin
                ic_invalidate <= `FALSE;
                ba_o <= 1'b0;
                bs_o <= 1'b0;
                vect <= `RST_VECT;
                radr <= `RST_VECT;
                load_what <= `LW_PCH;
                next_state(LOAD1);
        end
        else
                rst_cnt <= rst_cnt - 2'd1;

IFETCH:
        begin
                tIfetch();
                tWriteback();
        end
DECODE: tDecode();
LOAD1:  tLoad1();
LOAD2:  tLoad2();
CALC:           tExecute();
STORE1: tStore1();
STORE1a:        tStore1a();
STORE2: tStore2();

// ============================================================================
// ============================================================================
MUL2:
        if (divcnt != 6'd0)
                divcnt <= divcnt - 2'd1;
        else
                next_state(IFETCH);
DIV1:
        begin
                /*
                divsign <= acca[bitsPerByte-1] ^ b12[bitsPerByte-1];
                if (acca[bitsPerByte-1])
                        dividend <= -{acca,accb};
                else
                        dividend <= {acca,accb};
                if (b12[bitsPerByte-1])
                        b <= -b;
                */
                case(ir12)
                `DIVD_IMM,`DIVD_DP,`DIVD_NDX,`DIVD_EXT:
                        divcnt <= 6'd28;
                `DIVQ_IMM,`DIVQ_DP,`DIVQ_NDX,`DIVQ_EXT:
                        divcnt <= 6'd52;
                endcase
                next_state(DIV2);
        end
DIV2:
        if (divcnt != 6'd0)
                divcnt <= divcnt - 2'd1;
        else
                next_state(DIV3);
DIV3:
        begin
                res[`LOBYTE] <= divres24[11:0];
                res[`HIBYTE] <= divrem12;
                vf <= divres24[23:12] != {12{divres24[11]}};
                next_state(IFETCH);
        end

// ============================================================================
// ============================================================================
PUSH1:
        begin
                next_state(PUSH2);
                if (isINT | isPSHS) begin
                        wadr <= (ssp - cnt);
                        ssp <= (ssp - cnt);
                end
                else begin      // PSHU
                        wadr <= ({usppg,{bitsPerByte{1'b0}}} + usp - cnt);
                        usp <= (usp - cnt);
                end
        end
PUSH2:
        begin
                next_state(STORE1);
                if (ir[bitsPerByte]) begin
                        store_what <= `SW_CCR;
                        ir[bitsPerByte] <= 1'b0;
                end
                else if (ir[bitsPerByte+1]) begin
                        store_what <= `SW_ACCA;
                        ir[bitsPerByte+1] <= 1'b0;
                end
                else if (ir[bitsPerByte+2]) begin
                        store_what <= `SW_ACCB;
                        ir[bitsPerByte+2] <= 1'b0;
                end
`ifdef SUPPORT_6309
                else if (ir[bitsPerByte+8]) begin
                        store_what <= `SW_ACCE;
                        ir[bitsPerByte+8] <= 1'b0;
                end
                else if (ir[bitsPerByte+9]) begin
                        store_what <= `SW_ACCF;
                        ir[bitsPerByte+9] <= 1'b0;
                end
`endif
                else if (ir[bitsPerByte+3]) begin
                        store_what <= `SW_DPR;
                        ir[bitsPerByte+3] <= 1'b0;
                end
                else if (ir[bitsPerByte+4]) begin
                        store_what <= `SW_XH;
                        ir[bitsPerByte+4] <= 1'b0;
                end
                else if (ir[bitsPerByte+5]) begin
                        store_what <= `SW_YH;
                        ir[bitsPerByte+5] <= 1'b0;
                end
                else if (ir[bitsPerByte+6]) begin
                        if (isINT | isPSHS)
                                store_what <= `SW_USPH;
                        else
                                store_what <= `SW_SSPH;
                        ir[bitsPerByte+6] <= 1'b0;
                end
                else if (ir[bitsPerByte+7]) begin
                        store_what <= isFar ? `SW_PC2316 : `SW_PCH;
                        ir[bitsPerByte+7] <= 1'b0;
                end
                else begin
                        if (isINT) begin
                                dm <= 1'b0;
                                radr <= vect;
                                if (vec_i != 24'h0) begin
                                        $display("vector: %h", vec_i);
                                        pc <= vec_i;
                                        next_state(IFETCH);
                                end
                                else begin
                                        pc[`BYTE3] <= 8'h00;
                                        load_what <= `LW_PCH;
                                        next_state(LOAD1);
                                end
                        end
                        else
                                next_state(IFETCH);
                end
        end
PULL1:
        begin
                next_state(LOAD1);
                if (ir[bitsPerByte]) begin
                        load_what <= `LW_CCR;
                        ir[bitsPerByte] <= 1'b0;
                end
                else if (ir[bitsPerByte+1]) begin
                        load_what <= `LW_ACCA;
                        ir[bitsPerByte+1] <= 1'b0;
                end
                else if (ir[bitsPerByte+2]) begin
                        load_what <= `LW_ACCB;
                        ir[bitsPerByte+2] <= 1'b0;
                end
`ifdef SUPPORT_6309
                else if (ir[bitsPerByte+8]) begin
                        load_what <= `LW_ACCE;
                        ir[bitsPerByte+8] <= 1'b0;
                end
                else if (ir[bitsPerByte+9]) begin
                        load_what <= `LW_ACCF;
                        ir[bitsPerByte+9] <= 1'b0;
                end
`endif
                else if (ir[bitsPerByte+3]) begin
                        load_what <= `LW_DPR;
                        ir[bitsPerByte+3] <= 1'b0;
                end
                else if (ir[bitsPerByte+4]) begin
                        load_what <= `LW_XH;
                        ir[bitsPerByte+4] <= 1'b0;
                end
                else if (ir[bitsPerByte+5]) begin
                        load_what <= `LW_YH;
                        ir[bitsPerByte+5] <= 1'b0;
                end
                else if (ir[bitsPerByte+6]) begin
                        if (ir12==`PULU)
                                load_what <= `LW_SSPH;
                        else
                                load_what <= `LW_USPH;
                        ir[bitsPerByte+6] <= 1'b0;
                end
                else if (ir[bitsPerByte+7]) begin
                        load_what <= isFar ? `LW_PC2316 : `LW_PCH;
                        ir[bitsPerByte+7] <= 1'b0;
                end
                else
                        next_state(IFETCH);
        end

// ----------------------------------------------------------------------------
// Outer Indexing Support
// ----------------------------------------------------------------------------
OUTER_INDEXING:
        begin
                if (bitsPerByte==8) begin
                        casez(ndxbyte)
                        8'b0???????:    radr <= radr + ndxreg;
                        8'b1???0000:
                                                        begin
                                                                radr <= radr + ndxreg;
                                                                case(ndxbyte[6:5])
                                                                2'b00:  xr <= (xr + 2'd1);
                                                                2'b01:  yr <= (yr + 2'd1);
                                                                2'b10:  usp <= (usp + 2'd1);
                                                                2'b11:  ssp <= (ssp + 2'd1);
                                                                endcase
                                                        end
                        8'b1???0001:    begin
                                                                radr <= radr + ndxreg;
                                                                case(ndxbyte[6:5])
                                                                2'b00:  xr <= (xr + 2'd2);
                                                                2'b01:  yr <= (yr + 2'd2);
                                                                2'b10:  usp <= (usp + 2'd2);
                                                                2'b11:  ssp <= (ssp + 2'd2);
                                                                endcase
                                                        end
                        8'b1???0010:    radr <= radr + ndxreg;
                        8'b1???0011:    radr <= radr + ndxreg;
                        8'b1???0100:    radr <= radr + ndxreg;
                        8'b1???0101:    radr <= radr + ndxreg;
                        8'b1???0110:    radr <= radr + ndxreg;
                        8'b1???1000:    radr <= radr + ndxreg;
                        8'b1???1001:    radr <= radr + ndxreg;
                        8'b1???1010:    radr <= radr + ndxreg;
                        8'b1???1011:    radr <= radr + ndxreg;
                        default:        radr <= radr;
                        endcase
                end
                else if (bitsPerByte==12) begin
                        casez(ndxbyte)
                        12'b0???????????:       radr <= radr + ndxreg;
                        12'b1????0000000:
                                                        begin
                                                                radr <= radr + ndxreg;
                                                                case(ndxbyte[10:9])
                                                                2'b00:  xr <= (xr + 2'd1);
                                                                2'b01:  yr <= (yr + 2'd1);
                                                                2'b10:  usp <= (usp + 2'd1);
                                                                2'b11:  ssp <= (ssp + 2'd1);
                                                                endcase
                                                        end
                        12'b1????0000001:       begin
                                                                radr <= radr + ndxreg;
                                                                case(ndxbyte[10:9])
                                                                2'b00:  xr <= (xr + 2'd2);
                                                                2'b01:  yr <= (yr + 2'd2);
                                                                2'b10:  usp <= (usp + 2'd2);
                                                                2'b11:  ssp <= (ssp + 2'd2);
                                                                endcase
                                                        end
                        12'b1????0000010:       radr <= radr + ndxreg;
                        12'b1????0000011:       radr <= radr + ndxreg;
                        12'b1????0000100:       radr <= radr + ndxreg;
                        12'b1????00?0101:       radr <= radr + ndxreg;
                        12'b1????00?0110:       radr <= radr + ndxreg;
                        12'b1????0001000:       radr <= radr + ndxreg;
                        12'b1????0001001:       radr <= radr + ndxreg;
                        12'b1????0001010:       radr <= radr + ndxreg;
                        12'b1????00?1011:       radr <= radr + ndxreg;
                        default:        radr <= radr;
                        endcase
                end
                next_state(OUTER_INDEXING2);
        end
OUTER_INDEXING2:
        begin
                wadr <= radr;
                res <= radr[`DBLBYTE];
                load_what <= load_what2;
                if (isLEA)
                        next_state(IFETCH);
                else if (isStore)
                        next_state(STORE1);
                else
                        next_state(LOAD1);
        end

// ============================================================================
// Cache Control
// ============================================================================
ICACHE1:
        begin
                iccnt <= 4'h0;
                outstanding <= 16'h0;
                if (hit0 & hit1)
                        next_state(IFETCH);
                else if (!tsc && !ack_i) begin
                        rhit0 <= hit0;
                        bte_o <= 2'b00;
                        cti_o <= 3'b001;
                        cyc_o <= 1'b1;
                        bl_o <= 6'd15;
                        stb_o <= 1'b1;
                        we_o <= 1'b0;
                        adr_o <= !hit0 ? {pc[bitsPerByte*3-1:4],4'b00} : {pcp16[bitsPerByte*3-1:4],4'b0000};
                        dat_o <= 12'd0;
                        next_state(ICACHE2);
                end
        end
// If tsc is asserted during an instruction cache fetch, then abort the fetch
// cycle, and wait until tsc deactivates.
// The instruction cache uses asynchronous reading through the network for
// better performance. The read request and the read response are two
// separate things.
ICACHE2:
`ifdef SUPPORT_AREAD
        if (tsc) begin
                wb_nack();
                next_state(ICACHE3);
        end
        else if (ack_i|rty_i|bto) begin
                stb_o <= 1'b0;
                iccnt <= iccnt + 2'd1;
                next_state(ICACHE4);
                if (iccnt==4'b1110)
                        cti_o <= 3'b111;
                if (iccnt==4'b1111) begin
                        icwa <= adr_o;
                        wb_nack();
                        next_state(ICACHE5);
                end
        end
`else
        if (tsc|rty_i) begin
                wb_nack();
                next_state(ICACHE3);
        end
        else if (ack_i) begin
                stb_o <= 1'b0;
                iccnt <= iccnt + 2'd1;
                next_state(ICACHE4);
                if (iccnt==4'b1110)
                        cti_o <= 3'b111;
                if (iccnt==4'b1111) begin
                        icwa <= adr_o;
                        wb_nack();
                        next_state(ICACHE6);
                end
        end
`endif

ICACHE4:
        if (!ack_i) begin
                adr_o[3:0] <= iccnt;
                stb_o <= 1'b1;
                next_state(ICACHE2);
        end

ICACHE6:
        next_state(ICACHE1);

// The following states to handle outstanding transfers.
// The transfer might retry several times if it has not registered.
`ifdef SUPPORT_AREAD
ICACHE5:
        // Line loaded?
        if (icgot == 16'hFFFF)
                next_state(ICACHE6);
        else begin
                waitcnt <= 6'd20;
                next_state(ICACHE7);
        end
ICACHE7:
        if (waitcnt==6'd0) begin
                next_state(ICACHE5);
                adr_o <= icwa;
                for (n4 = 15; n4 >= 0; n4 = n4 - 1)
                        if (~icgot[n4]) begin// & ~outstanding[n4]) begin
                                cti_o <= 3'b001;
                                cyc_o <= `TRUE;
                                stb_o <= `TRUE;
                                adr_o[3:0] <= n4[3:0];
                                outstanding[n4[3:0]] <= 1'b1;
                                next_state(ICACHE9);
                        end
        end
        else
                waitcnt <= waitcnt - 2'd1;
ICACHE9:
        begin
                if (bto)
                        outstanding <= 16'h0;
                if (aack_i)
                        outstanding[atag_i] <= 1'b0;
                if (ack_i|rty_i|bto) begin
                        wb_nack();
                        waitcnt <= 6'd20;
                        next_state(ICACHE7);
                end
        end
`endif

// Restart a cache load aborted by the TSC signal. A registered version of the
// hit signal must be used as the cache may be partially updated.
ICACHE3:
        if (!tsc) begin
                bte_o <= 2'b00;
                cti_o <= 3'b001;
                cyc_o <= 1'b1;
                bl_o <= 6'd15;
                stb_o <= 1'b1;
                we_o <= 1'b0;
                adr_o <= !rhit0 ? {pc[bitsPerByte*3-1:4],4'b00} : {pcp16[bitsPerByte*3-1:4],4'b0000};
                dat_o <= 12'd0;
                next_state(ICACHE2);
        end

`ifdef SUPPORT_IBUF
IBUF1:
        if (!tsc) begin
                bte_o <= 2'b00;
                cti_o <= 3'b001;
                cyc_o <= 1'b1;
                bl_o <= 6'd2;
                stb_o <= 1'b1;
                we_o <= 1'b0;
                adr_o <= pc[`DBLBYTE];
                dat_o <= 12'd0;
                next_state(IBUF2);
        end
IBUF2:
        if (tsc|rty_i) begin
                wb_nack();
                next_state(IBUF1);
        end
        else if (ack_i) begin
                adr_o <= adr_o + 2'd1;
                ibuf <= dat_i;
                next_state(IBUF3);
        end
IBUF3:
        if (tsc|rty_i) begin
                wb_nack();
                next_state(IBUF1);
        end
        else if (ack_i) begin
                cti_o <= 3'b111;
                adr_o <= adr_o + 2'd1;
                ibuf[`HIBYTE] <= dat_i;
                next_state(IBUF4);
        end
IBUF4:
        if (tsc|rty_i) begin
                wb_nack();
                next_state(IBUF1);
        end
        else if (ack_i) begin
                wb_nack();
                ibuf[`BYTE3] <= dat_i;
                next_state(IBUF5);
        end
IBUF5:
        if (tsc|rty_i) begin
                wb_nack();
                next_state(IBUF1);
        end
        else if (ack_i) begin
                wb_nack();
                ibuf[`BYTE4] <= dat_i;
                next_state(IBUF6);
        end
IBUF6:
        if (tsc|rty_i) begin
                wb_nack();
                next_state(IBUF1);
        end
        else if (ack_i) begin
                wb_nack();
                ibuf[`BYTE5] <= dat_i;
                ibufadr <= pc;
                next_state(IFETCH);
        end
`endif

endcase
end

// ============================================================================
// ============================================================================
// Supporting Tasks
// ============================================================================
// ============================================================================

// ============================================================================
// IFETCH
//
// Fetch instructions.
// ============================================================================

task tIfetch;
begin
        if (halt_i) begin
                ba_o <= 1'b1;
                bs_o <= 1'b1;
        end
        else begin
                ba_o <= 1'b0;
                bs_o <= 1'b0;
                next_state(DECODE);
                isFar <= `FALSE;
`ifdef EIGHTBIT
                isOuterIndexed <= `FALSE;
`endif
                ipg <= 2'b00;
                ia <= {bitsPerByte*3{1'b0}};
                res <= 24'd0;
                load_what <= `LW_NOTHING;
                store_what <= `SW_NOTHING;
                if (nmi_edge | firq_i | irq_i)
                        sync_state <= `FALSE;
                if (nmi_edge & nmi_armed) begin
                        bs_o <= 1'b1;
                        ir[`LOBYTE] <= `INT;
                        ipg <= 2'b11;
                        vect <= `NMI_VECT;
                end
                else if ({nmi_i,firq_i,irq_i} > {im1,firqim,im} && !sync_state && iplMd) begin
                        bs_o <= 1'b1;
                        ir[`LOBYTE] <= `INT;
                        ipg <= 2'b11;
                        case({nmi_i,firq_i,irq_i})
                        3'd1:   vect <= `IRQ_VECT;
                        3'd2:   vect <= `FIRQ_VECT;
                        default:        vect <= `DBG_VECT | {nmi_i,firq_i,irq_i,1'b0};
                        endcase
                end
                else if (firq_i & !firqim & !sync_state) begin
                        bs_o <= 1'b1;
                        ir[`LOBYTE] <= `INT;
                        ipg <= 2'b11;
                        vect <= `FIRQ_VECT;
                end
                else if (irq_i & !im & !sync_state) begin
                        $display("**************************************");
                        $display("****** Interrupt *********************");
                        $display("**************************************");
                        bs_o <= 1'b1;
                        ir[`LOBYTE] <= `INT;
                        ipg <= 2'b11;
                        vect <= `IRQ_VECT;
                end
`ifdef SUPPORT_DEBUG_REG
                // Check for instruction breakpoint hit.
                else if (brkctrl[0].en && brkctrl[0].match_type==BMT_IA && (pc & {{20{1'b1}},brkctrl[0].amask})==brkad[0]) begin
                        brkctrl[0].hit <= 1'b1;
                        bs_o <= 1'b1;
                        ir[`LOBYTE] <= `INT;
                        ipg <= 2'b11;
                        vect <= `DBG_VECT;
                end
                else if (brkctrl[1].en && brkctrl[1].match_type==BMT_IA && (pc & {{20{1'b1}},brkctrl[1].amask})==brkad[1]) begin
                        brkctrl[1].hit <= 1'b1;
                        bs_o <= 1'b1;
                        ir[`LOBYTE] <= `INT;
                        ipg <= 2'b11;
                        vect <= `DBG_VECT;
                end
                else if (brkctrl[2].en && brkctrl[2].match_type==BMT_IA && (pc & {{20{1'b1}},brkctrl[2].amask})==brkad[2]) begin
                        brkctrl[2].hit <= 1'b