URL
https://opencores.org/ocsvn/thor/thor/trunk
Subversion Repositories thor
Compare Revisions
- This comparison shows the changes necessary to convert path
/thor/trunk
- from Rev 59 to Rev 60
- ↔ Reverse comparison
Rev 59 → Rev 60
/FT64v7/doc/FT64v7.docx
Cannot display: file marked as a binary type.
svn:mime-type = application/octet-stream
FT64v7/doc/FT64v7.docx
Property changes :
Added: svn:mime-type
## -0,0 +1 ##
+application/octet-stream
\ No newline at end of property
Index: FT64v7/rtl/common/FT64_AMO_alu.v
===================================================================
--- FT64v7/rtl/common/FT64_AMO_alu.v (nonexistent)
+++ FT64v7/rtl/common/FT64_AMO_alu.v (revision 60)
@@ -0,0 +1,244 @@
+// ============================================================================
+// __
+// \\__/ o\ (C) 2018 Robert Finch, Waterloo
+// \ __ / All rights reserved.
+// \/_// robfinch@finitron.ca
+// ||
+//
+// FT64_AMO_alu.v
+//
+// This source file is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published
+// by the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This source file is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+//
+//
+// ALU for atomic memory operations (AMO)
+// AMO ops have their own limited ALU since they can't wait on the usual
+// ALU.
+// ============================================================================
+//
+`include "FT64_defines.vh"
+
+module FT64_AMO_alu(instr, a, b, res);
+input [31:0] instr;
+input [63:0] a;
+input [63:0] b;
+output reg [63:0] res;
+
+wire [4:0] op = instr[30:26];
+
+always @*
+case(instr[5:0])
+`AMO:
+ case(op)
+ `AMO_SWAP: res <= b;
+ `AMO_ADD: case(instr[23:21])
+ 3'd0,3'd4:
+ begin
+ res[7:0] <= a[7:0] + b[7:0];
+ res[15:8] <= a[15:8] + b[15:8];
+ res[23:16] <= a[23:16] + b[23:16];
+ res[31:24] <= a[31:24] + b[31:24];
+ res[39:32] <= a[39:32] + b[39:32];
+ res[47:40] <= a[47:40] + b[47:40];
+ res[55:48] <= a[55:48] + b[55:48];
+ res[63:56] <= a[63:56] + b[63:56];
+ end
+ 3'd1,3'd5:
+ begin
+ res[15:0] <= a[15:0] + b[15:0];
+ res[31:16] <= a[31:16] + b[31:16];
+ res[47:32] <= a[47:32] + b[47:32];
+ res[63:48] <= a[63:48] + b[63:48];
+ end
+ 3'd2,3'd6:
+ begin
+ res[31:0] <= a[31:0] + b[31:0];
+ res[63:32] <= a[63:32] + b[63:32];
+ end
+ 3'd3,3'd7: res <= a + b;
+ endcase
+ `AMO_AND: res <= a & b;
+ `AMO_OR: res <= a | b;
+ `AMO_XOR: res <= a ^ b;
+
+ `AMO_SHL:
+ case(instr[23:21])
+ 3'd0,3'd4:
+ begin
+ res[7:0] <= a[7:0] << b[2:0];
+ res[15:8] <= a[15:8] << b[2:0];
+ res[23:16] <= a[23:16] << b[2:0];
+ res[31:24] <= a[31:24] << b[2:0];
+ res[39:32] <= a[39:32] << b[2:0];
+ res[47:40] <= a[47:40] << b[2:0];
+ res[55:48] <= a[55:48] << b[2:0];
+ res[63:56] <= a[63:56] << b[2:0];
+ end
+ 3'd1,3'd5:
+ begin
+ res[15:0] <= a[15:0] << b[3:0];
+ res[31:16] <= a[31:16] << b[3:0];
+ res[47:32] <= a[47:32] << b[3:0];
+ res[63:48] <= a[63:48] << b[3:0];
+ end
+ 3'd2,3'd6:
+ begin
+ res[31:0] <= a[31:0] << b[4:0];
+ res[63:32] <= a[63:32] << b[4:0];
+ end
+ 3'd3,3'd7: res <= a << b[5:0];
+ endcase
+
+ `AMO_SHR:
+ case(instr[23:21])
+ 3'd0,3'd4:
+ begin
+ res[7:0] <= a[7:0] >> b[2:0];
+ res[15:8] <= a[15:8] >> b[2:0];
+ res[23:16] <= a[23:16] >> b[2:0];
+ res[31:24] <= a[31:24] >> b[2:0];
+ res[39:32] <= a[39:32] >> b[2:0];
+ res[47:40] <= a[47:40] >> b[2:0];
+ res[55:48] <= a[55:48] >> b[2:0];
+ res[63:56] <= a[63:56] >> b[2:0];
+ end
+ 3'd1,3'd5:
+ begin
+ res[15:0] <= a[15:0] >> b[3:0];
+ res[31:16] <= a[31:16] >> b[3:0];
+ res[47:32] <= a[47:32] >> b[3:0];
+ res[63:48] <= a[63:48] >> b[3:0];
+ end
+ 3'd2,3'd6:
+ begin
+ res[31:0] <= a[31:0] >> b[4:0];
+ res[63:32] <= a[63:32] >> b[4:0];
+ end
+ 3'd3,3'd7: res <= a >> b[5:0];
+ endcase
+
+ `AMO_MIN:
+ case(instr[23:21])
+ 3'd0,3'd4:
+ begin
+ res[7:0] <= $signed(a[7:0]) < $signed(b[7:0]) ? a[7:0] : b[7:0];
+ res[15:8] <= $signed(a[15:8]) < $signed(b[15:8]) ? a[15:8] : b[15:8];
+ res[23:16] <= $signed(a[23:16]) < $signed(b[23:16]) ? a[23:16] : b[23:16];
+ res[31:24] <= $signed(a[31:24]) < $signed(b[31:24]) ? a[31:24] : b[31:24];
+ res[39:32] <= $signed(a[39:32]) < $signed(b[39:32]) ? a[39:32] : b[39:32];
+ res[47:40] <= $signed(a[47:40]) < $signed(b[47:40]) ? a[47:40] : b[47:40];
+ res[55:48] <= $signed(a[55:48]) < $signed(b[55:48]) ? a[55:48] : b[55:48];
+ res[63:56] <= $signed(a[63:56]) < $signed(b[63:56]) ? a[63:56] : b[63:56];
+ end
+ 3'd1,3'd5:
+ begin
+ res[15:0] <= $signed(a[15:0]) < $signed(b[15:0]) ? a[15:0] : b[15:0];
+ res[31:16] <= $signed(a[31:16]) < $signed(b[31:16]) ? a[31:16] : b[31:16];
+ res[47:32] <= $signed(a[47:32]) < $signed(b[47:32]) ? a[47:32] : b[47:32];
+ res[63:48] <= $signed(a[63:48]) < $signed(b[63:48]) ? a[63:48] : b[63:48];
+ end
+ 3'd2,3'd6:
+ begin
+ res[31:0] <= $signed(a[31:0]) < $signed(b[31:0]) ? a[31:0] : b[31:0];
+ res[63:32] <= $signed(a[63:32]) < $signed(b[63:32]) ? a[63:32] : b[63:32];
+ end
+ 3'd3,3'd7: res <= $signed(a) < $signed(b) ? a : b;
+ endcase
+ `AMO_MAX:
+ case(instr[23:21])
+ 3'd0,3'd4:
+ begin
+ res[7:0] <= $signed(a[7:0]) > $signed(b[7:0]) ? a[7:0] : b[7:0];
+ res[15:8] <= $signed(a[15:8]) > $signed(b[15:8]) ? a[15:8] : b[15:8];
+ res[23:16] <= $signed(a[23:16]) > $signed(b[23:16]) ? a[23:16] : b[23:16];
+ res[31:24] <= $signed(a[31:24]) > $signed(b[31:24]) ? a[31:24] : b[31:24];
+ res[39:32] <= $signed(a[39:32]) > $signed(b[39:32]) ? a[39:32] : b[39:32];
+ res[47:40] <= $signed(a[47:40]) > $signed(b[47:40]) ? a[47:40] : b[47:40];
+ res[55:48] <= $signed(a[55:48]) > $signed(b[55:48]) ? a[55:48] : b[55:48];
+ res[63:56] <= $signed(a[63:56]) > $signed(b[63:56]) ? a[63:56] : b[63:56];
+ end
+ 3'd1,3'd5:
+ begin
+ res[15:0] <= $signed(a[15:0]) > $signed(b[15:0]) ? a[15:0] : b[15:0];
+ res[31:16] <= $signed(a[31:16]) > $signed(b[31:16]) ? a[31:16] : b[31:16];
+ res[47:32] <= $signed(a[47:32]) > $signed(b[47:32]) ? a[47:32] : b[47:32];
+ res[63:48] <= $signed(a[63:48]) > $signed(b[63:48]) ? a[63:48] : b[63:48];
+ end
+ 3'd2,3'd6:
+ begin
+ res[31:0] <= $signed(a[31:0]) > $signed(b[31:0]) ? a[31:0] : b[31:0];
+ res[63:32] <= $signed(a[63:32]) > $signed(b[63:32]) ? a[63:32] : b[63:32];
+ end
+ 3'd3,3'd7: res <= $signed(a) > $signed(b) ? a : b;
+ endcase
+ `AMO_MINU:
+ case(instr[23:21])
+ 3'd0,3'd4:
+ begin
+ res[7:0] <= $unsigned(a[7:0]) < $unsigned(b[7:0]) ? a[7:0] : b[7:0];
+ res[15:8] <= $unsigned(a[15:8]) < $unsigned(b[15:8]) ? a[15:8] : b[15:8];
+ res[23:16] <= $unsigned(a[23:16]) < $unsigned(b[23:16]) ? a[23:16] : b[23:16];
+ res[31:24] <= $unsigned(a[31:24]) < $unsigned(b[31:24]) ? a[31:24] : b[31:24];
+ res[39:32] <= $unsigned(a[39:32]) < $unsigned(b[39:32]) ? a[39:32] : b[39:32];
+ res[47:40] <= $unsigned(a[47:40]) < $unsigned(b[47:40]) ? a[47:40] : b[47:40];
+ res[55:48] <= $unsigned(a[55:48]) < $unsigned(b[55:48]) ? a[55:48] : b[55:48];
+ res[63:56] <= $unsigned(a[63:56]) < $unsigned(b[63:56]) ? a[63:56] : b[63:56];
+ end
+ 3'd1,3'd5:
+ begin
+ res[15:0] <= $unsigned(a[15:0]) < $unsigned(b[15:0]) ? a[15:0] : b[15:0];
+ res[31:16] <= $unsigned(a[31:16]) < $unsigned(b[31:16]) ? a[31:16] : b[31:16];
+ res[47:32] <= $unsigned(a[47:32]) < $unsigned(b[47:32]) ? a[47:32] : b[47:32];
+ res[63:48] <= $unsigned(a[63:48]) < $unsigned(b[63:48]) ? a[63:48] : b[63:48];
+ end
+ 3'd2,3'd6:
+ begin
+ res[31:0] <= $unsigned(a[31:0]) < $unsigned(b[31:0]) ? a[31:0] : b[31:0];
+ res[63:32] <= $unsigned(a[63:32]) < $unsigned(b[63:32]) ? a[63:32] : b[63:32];
+ end
+ 3'd3,3'd7: res <= $unsigned(a) < $unsigned(b) ? a : b;
+ endcase
+ `AMO_MAXU:
+ case(instr[23:21])
+ 3'd0,3'd4:
+ begin
+ res[7:0] <= $unsigned(a[7:0]) > $unsigned(b[7:0]) ? a[7:0] : b[7:0];
+ res[15:8] <= $unsigned(a[15:8]) > $unsigned(b[15:8]) ? a[15:8] : b[15:8];
+ res[23:16] <= $unsigned(a[23:16]) > $unsigned(b[23:16]) ? a[23:16] : b[23:16];
+ res[31:24] <= $unsigned(a[31:24]) > $unsigned(b[31:24]) ? a[31:24] : b[31:24];
+ res[39:32] <= $unsigned(a[39:32]) > $unsigned(b[39:32]) ? a[39:32] : b[39:32];
+ res[47:40] <= $unsigned(a[47:40]) > $unsigned(b[47:40]) ? a[47:40] : b[47:40];
+ res[55:48] <= $unsigned(a[55:48]) > $unsigned(b[55:48]) ? a[55:48] : b[55:48];
+ res[63:56] <= $unsigned(a[63:56]) > $unsigned(b[63:56]) ? a[63:56] : b[63:56];
+ end
+ 3'd1,3'd5:
+ begin
+ res[15:0] <= $unsigned(a[15:0]) > $unsigned(b[15:0]) ? a[15:0] : b[15:0];
+ res[31:16] <= $unsigned(a[31:16]) > $unsigned(b[31:16]) ? a[31:16] : b[31:16];
+ res[47:32] <= $unsigned(a[47:32]) > $unsigned(b[47:32]) ? a[47:32] : b[47:32];
+ res[63:48] <= $unsigned(a[63:48]) > $unsigned(b[63:48]) ? a[63:48] : b[63:48];
+ end
+ 3'd2,3'd6:
+ begin
+ res[31:0] <= $unsigned(a[31:0]) > $unsigned(b[31:0]) ? a[31:0] : b[31:0];
+ res[63:32] <= $unsigned(a[63:32]) > $unsigned(b[63:32]) ? a[63:32] : b[63:32];
+ end
+ 3'd3,3'd7: res <= $unsigned(a) > $unsigned(b) ? a : b;
+ endcase
+ default: res <= 64'hDEADDEADDEADDEAD;
+ endcase
+`INC: res <= a + b;
+default: res <= 64'hDEADDEADDEADDEAD;
+endcase
+
+endmodule
Index: FT64v7/rtl/common/FT64_BMM.v
===================================================================
--- FT64v7/rtl/common/FT64_BMM.v (nonexistent)
+++ FT64v7/rtl/common/FT64_BMM.v (revision 60)
@@ -0,0 +1,85 @@
+// ============================================================================
+// __
+// \\__/ o\ (C) 2017 Robert Finch, Waterloo
+// \ __ / All rights reserved.
+// \/_// robfinch@finitron.ca
+// ||
+//
+// FT64_BMM.v
+//
+// This source file is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published
+// by the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This source file is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+//
+// ============================================================================
+//
+module FT64_BMM(op,a,b,o);
+parameter DBW=64;
+parameter N=7;
+input op; // 0 = MOR, 1 = MXOR
+input [DBW-1:0] a;
+input [DBW-1:0] b;
+output reg [DBW-1:0] o;
+
+integer n,i,j;
+reg omor[0:N][0:N];
+reg omxor[0:N][0:N];
+reg am[0:N][0:N];
+reg bm[0:N][0:N];
+
+always @*
+for (i = 0; i <= N; i = i + 1) begin
+ for (j = 0; j <= N; j = j + 1) begin
+ am[i][j] = a[(N-i)*(N+1)+(N-j)];
+ bm[i][j] = b[(N-i)*(N+1)+(N-j)];
+ end
+end
+
+always @*
+for (i = 0; i <= N; i = i + 1) begin
+ for (j = 0; j <= N; j = j + 1) begin
+ omor[i][j] =
+ (am[i][0]&bm[0][j])
+ |(am[i][1]&bm[1][j])
+ |(am[i][2]&bm[2][j])
+ |(am[i][3]&bm[3][j])
+ |(am[i][4]&bm[4][j])
+ |(am[i][5]&bm[5][j])
+ |(am[i][6]&bm[6][j])
+ |(am[i][7]&bm[7][j]);
+ omxor[i][j] =
+ (am[i][0]&bm[0][j])
+ ^(am[i][1]&bm[1][j])
+ ^(am[i][2]&bm[2][j])
+ ^(am[i][3]&bm[3][j])
+ ^(am[i][4]&bm[4][j])
+ ^(am[i][5]&bm[5][j])
+ ^(am[i][6]&bm[6][j])
+ ^(am[i][7]&bm[7][j]);
+ end
+end
+
+always @*
+case (op)
+1'b0: begin
+ for (i = 0; i <= N; i = i + 1)
+ for (j = 0; j <= N; j = j + 1)
+ o[(N-i)*(N+1)+(N-j)] = omor[i][j];
+ end
+1'b1: begin
+ for (i = 0; i <= N; i = i + 1)
+ for (j = 0; j <= N; j = j + 1)
+ o[(N-i)*(N+1)+(N-j)] = omxor[i][j];
+ end
+endcase
+
+endmodule
Index: FT64v7/rtl/common/FT64_EvalBranch.v
===================================================================
--- FT64v7/rtl/common/FT64_EvalBranch.v (nonexistent)
+++ FT64v7/rtl/common/FT64_EvalBranch.v (revision 60)
@@ -0,0 +1,79 @@
+// ============================================================================
+// __
+// \\__/ o\ (C) 2017-2018 Robert Finch, Waterloo
+// \ __ / All rights reserved.
+// \/_// robfinch@finitron.ca
+// ||
+//
+// FT64_EvalBranch.v
+// - FT64 branch evaluation
+//
+// This source file is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published
+// by the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This source file is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+//
+// ============================================================================
+//
+`define TRUE 1'b1
+`define BBc 6'h26
+`define Bcc 6'h30
+`define BEQI 6'h32
+`define BCHK 6'h33
+`define CHK 6'h34
+
+`define BEQ 3'h0
+`define BNE 3'h1
+`define BLT 3'h2
+`define BGE 3'h3
+`define BLTU 3'h6
+`define BGEU 3'h7
+
+`define IBNE 2'd2
+`define DBNZ 2'd3
+
+module FT64_EvalBranch(instr, a, b, c, takb);
+parameter WID=64;
+input [47:0] instr;
+input [WID-1:0] a;
+input [WID-1:0] b;
+input [WID-1:0] c;
+output reg takb;
+
+wire [5:0] opcode = instr[5:0];
+
+//Evaluate branch condition
+always @*
+case(opcode)
+`Bcc:
+ case(instr[15:13])
+ `BEQ: takb <= a==b;
+ `BNE: takb <= a!=b;
+ `BLT: takb <= $signed(a) < $signed(b);
+ `BGE: takb <= $signed(a) >= $signed(b);
+ `BLTU: takb <= a < b;
+ `BGEU: takb <= a >= b;
+ default: takb <= `TRUE;
+ endcase
+`BEQI: takb <= a=={{56{instr[22]}},instr[22:18],instr[15:13]};
+`BBc:
+ case(instr[14:13])
+ 2'd0: takb <= a[{instr[22:18],instr[15]}]; // BBS
+ 2'd1: takb <= ~a[{instr[22:18],instr[15]}]; // BBC
+ `IBNE: takb <= (a + 64'd1) !=b;
+ `DBNZ: takb <= (a - 64'd1) !=b;
+ default: takb <= `TRUE;
+ endcase
+`CHK,`BCHK: takb <= a >= b && a < c;
+default: takb <= `TRUE;
+endcase
+
+endmodule
Index: FT64v7/rtl/common/FT64_FCU_Calc.v
===================================================================
--- FT64v7/rtl/common/FT64_FCU_Calc.v (nonexistent)
+++ FT64v7/rtl/common/FT64_FCU_Calc.v (revision 60)
@@ -0,0 +1,66 @@
+// ============================================================================
+// __
+// \\__/ o\ (C) 2017-2018 Robert Finch, Waterloo
+// \ __ / All rights reserved.
+// \/_// robfinch@finitron.ca
+// ||
+//
+// FT64_FCU_Calc.v
+// - FT64 flow control calcs
+//
+// This source file is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published
+// by the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This source file is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+//
+// ============================================================================
+//
+`include ".\FT64_defines.vh"
+
+module FT64_FCU_Calc(ol, instr, tvec, a, pc, nextpc, im, waitctr, bus);
+parameter WID = 64;
+parameter AMSB = 31;
+input [1:0] ol;
+input [47:0] instr;
+input [WID-1:0] tvec;
+input [WID-1:0] a;
+input [AMSB:0] pc;
+input [AMSB:0] nextpc;
+input [3:0] im;
+input [WID-1:0] waitctr;
+output reg [WID-1:0] bus;
+
+always @*
+begin
+ casez(instr[`INSTRUCTION_OP])
+ `BRK: bus <= instr[16] ? {56'd0,a[7:0]} : {56'b0,instr[15:8]};
+ `BBc:
+ case(instr[20:19])
+ `IBNE: bus <= a + 64'd1;
+ `DBNZ: bus <= a - 64'd1;
+ default: bus <= 64'hCCCCCCCCCCCCCCCC;
+ endcase
+ `JAL: bus <= nextpc;
+ `CALL: bus <= nextpc;
+ `RET: bus <= a + (instr[7:6]==2'b01 ? {instr[47:23],3'b0} : {instr[31:23],3'b0});
+ `REX:
+ case(ol)
+ `OL_USER: bus <= 64'hCCCCCCCCCCCCCCCC;
+ // ToDo: fix im test
+ default: bus <= (im < ~{ol,2'b00}) ? tvec : nextpc;
+ endcase
+ `WAIT: bus = waitctr==64'd1;
+ default: bus <= 64'hCCCCCCCCCCCCCCCC;
+ endcase
+end
+
+endmodule
+
Index: FT64v7/rtl/common/FT64_InsLength.v
===================================================================
--- FT64v7/rtl/common/FT64_InsLength.v (nonexistent)
+++ FT64v7/rtl/common/FT64_InsLength.v (revision 60)
@@ -0,0 +1,48 @@
+// ============================================================================
+// __
+// \\__/ o\ (C) 2018 Robert Finch, Waterloo
+// \ __ / All rights reserved.
+// \/_// robfinch@finitron.ca
+// ||
+//
+// FT64_InsLength.v
+//
+// This source file is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published
+// by the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This source file is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+//
+// Computes the length of an instruction.
+// There are also other places in code where the length is determined
+// without the use of this module.
+// ============================================================================
+//
+`include "FT64_config.vh"
+`include "FT64_defines.vh"
+
+module FT64_InsLength(ins, len, pred_on);
+input [47:0] ins;
+output reg [2:0] len;
+input pred_on;
+
+always @*
+`ifdef SUPPORT_DCI
+if (ins[`INSTRUCTION_OP]==`CMPRSSD)
+ len <= 3'd2 | pred_on;
+else
+`endif
+ case(ins[7:6])
+ 2'd0: len <= 3'd4 | pred_on;
+ 2'd1: len <= 3'd6 | pred_on;
+ default: len <= 3'd2 | pred_on;
+ endcase
+
+endmodule
Index: FT64v7/rtl/common/FT64_RMW_alu.v
===================================================================
--- FT64v7/rtl/common/FT64_RMW_alu.v (nonexistent)
+++ FT64v7/rtl/common/FT64_RMW_alu.v (revision 60)
@@ -0,0 +1,282 @@
+// ============================================================================
+// __
+// \\__/ o\ (C) 2018 Robert Finch, Waterloo
+// \ __ / All rights reserved.
+// \/_// robfinch@finitron.ca
+// ||
+//
+// FT64_RMW_alu.v
+//
+// This source file is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published
+// by the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This source file is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+//
+//
+// ALU for atomic memory operations (AMO)
+// AMO ops have their own limited ALU since they can't wait on the usual
+// ALU.
+// ============================================================================
+//
+`include "FT64_defines.vh"
+
+module FT64_RMW_alu(instr, a, b, c, res);
+input [47:0] instr;
+input [63:0] a;
+input [63:0] b;
+input [63:0] c;
+output reg [63:0] res;
+
+wire [4:0] op = instr[30:26];
+
+always @*
+begin
+case(instr[5:0])
+`R2:
+ case(instr[31:26])
+ `INC: begin
+ res[63:0] <= a + b;
+ end
+ default: res[63:0] <= 64'hDEADDEADDEADDEAD;
+ endcase
+`AMO:
+ case(op)
+ `AMO_SWAP: res[63:0] <= b;
+ `AMO_ADD: case(instr[23:21])
+ 3'd0,3'd4:
+ begin
+ res[7:0] <= a[7:0] + b[7:0];
+ res[15:8] <= a[15:8] + b[15:8];
+ res[23:16] <= a[23:16] + b[23:16];
+ res[31:24] <= a[31:24] + b[31:24];
+ res[39:32] <= a[39:32] + b[39:32];
+ res[47:40] <= a[47:40] + b[47:40];
+ res[55:48] <= a[55:48] + b[55:48];
+ res[63:56] <= a[63:56] + b[63:56];
+ end
+ 3'd1,3'd5:
+ begin
+ res[15:0] <= a[15:0] + b[15:0];
+ res[31:16] <= a[31:16] + b[31:16];
+ res[47:32] <= a[47:32] + b[47:32];
+ res[63:48] <= a[63:48] + b[63:48];
+ end
+ 3'd2,3'd6:
+ begin
+ res[31:0] <= a[31:0] + b[31:0];
+ res[63:32] <= a[63:32] + b[63:32];
+ end
+ 3'd3,3'd7:
+ begin
+ res[63:0] <= a + b;
+ end
+ endcase
+ `AMO_AND:
+ begin
+ res[63:0] <= a & b;
+ end
+ `AMO_OR: begin
+ res[63:0] <= a | b;
+ end
+ `AMO_XOR: begin
+ res[63:0] <= a ^ b;
+ end
+ `AMO_SHL:
+ case(instr[23:21])
+ 3'd0,3'd4:
+ begin
+ res[7:0] <= a[7:0] << b[2:0];
+ res[15:8] <= a[15:8] << b[2:0];
+ res[23:16] <= a[23:16] << b[2:0];
+ res[31:24] <= a[31:24] << b[2:0];
+ res[39:32] <= a[39:32] << b[2:0];
+ res[47:40] <= a[47:40] << b[2:0];
+ res[55:48] <= a[55:48] << b[2:0];
+ res[63:56] <= a[63:56] << b[2:0];
+ end
+ 3'd1,3'd5:
+ begin
+ res[15:0] <= a[15:0] << b[3:0];
+ res[31:16] <= a[31:16] << b[3:0];
+ res[47:32] <= a[47:32] << b[3:0];
+ res[63:48] <= a[63:48] << b[3:0];
+ end
+ 3'd2,3'd6:
+ begin
+ res[31:0] <= a[31:0] << b[4:0];
+ res[63:32] <= a[63:32] << b[4:0];
+ end
+ 3'd3,3'd7:
+ begin
+ res[63:0] <= a << b[5:0];
+ res[64] <= a[64];
+ end
+ endcase
+ `AMO_SHR:
+ case(instr[23:21])
+ 3'd0,3'd4:
+ begin
+ res[7:0] <= a[7:0] >> b[2:0];
+ res[15:8] <= a[15:8] >> b[2:0];
+ res[23:16] <= a[23:16] >> b[2:0];
+ res[31:24] <= a[31:24] >> b[2:0];
+ res[39:32] <= a[39:32] >> b[2:0];
+ res[47:40] <= a[47:40] >> b[2:0];
+ res[55:48] <= a[55:48] >> b[2:0];
+ res[63:56] <= a[63:56] >> b[2:0];
+ end
+ 3'd1,3'd5:
+ begin
+ res[15:0] <= a[15:0] >> b[3:0];
+ res[31:16] <= a[31:16] >> b[3:0];
+ res[47:32] <= a[47:32] >> b[3:0];
+ res[63:48] <= a[63:48] >> b[3:0];
+ end
+ 3'd2,3'd6:
+ begin
+ res[31:0] <= a[31:0] >> b[4:0];
+ res[63:32] <= a[63:32] >> b[4:0];
+ end
+ 3'd3,3'd7:
+ begin
+ res[63:0] <= a >> b[5:0];
+ res[64] <= a[64];
+ end
+ endcase
+ `AMO_MIN:
+ case(instr[23:21])
+ 3'd0,3'd4:
+ begin
+ res[7:0] <= $signed(a[7:0]) < $signed(b[7:0]) ? a[7:0] : b[7:0];
+ res[15:8] <= $signed(a[15:8]) < $signed(b[15:8]) ? a[15:8] : b[15:8];
+ res[23:16] <= $signed(a[23:16]) < $signed(b[23:16]) ? a[23:16] : b[23:16];
+ res[31:24] <= $signed(a[31:24]) < $signed(b[31:24]) ? a[31:24] : b[31:24];
+ res[39:32] <= $signed(a[39:32]) < $signed(b[39:32]) ? a[39:32] : b[39:32];
+ res[47:40] <= $signed(a[47:40]) < $signed(b[47:40]) ? a[47:40] : b[47:40];
+ res[55:48] <= $signed(a[55:48]) < $signed(b[55:48]) ? a[55:48] : b[55:48];
+ res[63:56] <= $signed(a[63:56]) < $signed(b[63:56]) ? a[63:56] : b[63:56];
+ end
+ 3'd1,3'd5:
+ begin
+ res[15:0] <= $signed(a[15:0]) < $signed(b[15:0]) ? a[15:0] : b[15:0];
+ res[31:16] <= $signed(a[31:16]) < $signed(b[31:16]) ? a[31:16] : b[31:16];
+ res[47:32] <= $signed(a[47:32]) < $signed(b[47:32]) ? a[47:32] : b[47:32];
+ res[63:48] <= $signed(a[63:48]) < $signed(b[63:48]) ? a[63:48] : b[63:48];
+ end
+ 3'd2,3'd6:
+ begin
+ res[31:0] <= $signed(a[31:0]) < $signed(b[31:0]) ? a[31:0] : b[31:0];
+ res[63:32] <= $signed(a[63:32]) < $signed(b[63:32]) ? a[63:32] : b[63:32];
+ end
+ 3'd3,3'd7:
+ begin
+ res[63:0] <= $signed(a) < $signed(b) ? a : b;
+ end
+ endcase
+ `AMO_MAX:
+ case(instr[23:21])
+ 3'd0,3'd4:
+ begin
+ res[7:0] <= $signed(a[7:0]) > $signed(b[7:0]) ? a[7:0] : b[7:0];
+ res[15:8] <= $signed(a[15:8]) > $signed(b[15:8]) ? a[15:8] : b[15:8];
+ res[23:16] <= $signed(a[23:16]) > $signed(b[23:16]) ? a[23:16] : b[23:16];
+ res[31:24] <= $signed(a[31:24]) > $signed(b[31:24]) ? a[31:24] : b[31:24];
+ res[39:32] <= $signed(a[39:32]) > $signed(b[39:32]) ? a[39:32] : b[39:32];
+ res[47:40] <= $signed(a[47:40]) > $signed(b[47:40]) ? a[47:40] : b[47:40];
+ res[55:48] <= $signed(a[55:48]) > $signed(b[55:48]) ? a[55:48] : b[55:48];
+ res[63:56] <= $signed(a[63:56]) > $signed(b[63:56]) ? a[63:56] : b[63:56];
+ end
+ 3'd1,3'd5:
+ begin
+ res[15:0] <= $signed(a[15:0]) > $signed(b[15:0]) ? a[15:0] : b[15:0];
+ res[31:16] <= $signed(a[31:16]) > $signed(b[31:16]) ? a[31:16] : b[31:16];
+ res[47:32] <= $signed(a[47:32]) > $signed(b[47:32]) ? a[47:32] : b[47:32];
+ res[63:48] <= $signed(a[63:48]) > $signed(b[63:48]) ? a[63:48] : b[63:48];
+ end
+ 3'd2,3'd6:
+ begin
+ res[31:0] <= $signed(a[31:0]) > $signed(b[31:0]) ? a[31:0] : b[31:0];
+ res[63:32] <= $signed(a[63:32]) > $signed(b[63:32]) ? a[63:32] : b[63:32];
+ end
+ 3'd3,3'd7:
+ begin
+ res[63:0] <= $signed(a) > $signed(b) ? a : b;
+ end
+ endcase
+ `AMO_MINU:
+ case(instr[23:21])
+ 3'd0,3'd4:
+ begin
+ res[7:0] <= $unsigned(a[7:0]) < $unsigned(b[7:0]) ? a[7:0] : b[7:0];
+ res[15:8] <= $unsigned(a[15:8]) < $unsigned(b[15:8]) ? a[15:8] : b[15:8];
+ res[23:16] <= $unsigned(a[23:16]) < $unsigned(b[23:16]) ? a[23:16] : b[23:16];
+ res[31:24] <= $unsigned(a[31:24]) < $unsigned(b[31:24]) ? a[31:24] : b[31:24];
+ res[39:32] <= $unsigned(a[39:32]) < $unsigned(b[39:32]) ? a[39:32] : b[39:32];
+ res[47:40] <= $unsigned(a[47:40]) < $unsigned(b[47:40]) ? a[47:40] : b[47:40];
+ res[55:48] <= $unsigned(a[55:48]) < $unsigned(b[55:48]) ? a[55:48] : b[55:48];
+ res[63:56] <= $unsigned(a[63:56]) < $unsigned(b[63:56]) ? a[63:56] : b[63:56];
+ end
+ 3'd1,3'd5:
+ begin
+ res[15:0] <= $unsigned(a[15:0]) < $unsigned(b[15:0]) ? a[15:0] : b[15:0];
+ res[31:16] <= $unsigned(a[31:16]) < $unsigned(b[31:16]) ? a[31:16] : b[31:16];
+ res[47:32] <= $unsigned(a[47:32]) < $unsigned(b[47:32]) ? a[47:32] : b[47:32];
+ res[63:48] <= $unsigned(a[63:48]) < $unsigned(b[63:48]) ? a[63:48] : b[63:48];
+ end
+ 3'd2,3'd6:
+ begin
+ res[31:0] <= $unsigned(a[31:0]) < $unsigned(b[31:0]) ? a[31:0] : b[31:0];
+ res[63:32] <= $unsigned(a[63:32]) < $unsigned(b[63:32]) ? a[63:32] : b[63:32];
+ end
+ 3'd3,3'd7:
+ begin
+ res[63:0] <= $unsigned(a) < $unsigned(b) ? a : b;
+ end
+ endcase
+ `AMO_MAXU:
+ case(instr[23:21])
+ 3'd0,3'd4:
+ begin
+ res[7:0] <= $unsigned(a[7:0]) > $unsigned(b[7:0]) ? a[7:0] : b[7:0];
+ res[15:8] <= $unsigned(a[15:8]) > $unsigned(b[15:8]) ? a[15:8] : b[15:8];
+ res[23:16] <= $unsigned(a[23:16]) > $unsigned(b[23:16]) ? a[23:16] : b[23:16];
+ res[31:24] <= $unsigned(a[31:24]) > $unsigned(b[31:24]) ? a[31:24] : b[31:24];
+ res[39:32] <= $unsigned(a[39:32]) > $unsigned(b[39:32]) ? a[39:32] : b[39:32];
+ res[47:40] <= $unsigned(a[47:40]) > $unsigned(b[47:40]) ? a[47:40] : b[47:40];
+ res[55:48] <= $unsigned(a[55:48]) > $unsigned(b[55:48]) ? a[55:48] : b[55:48];
+ res[63:56] <= $unsigned(a[63:56]) > $unsigned(b[63:56]) ? a[63:56] : b[63:56];
+ end
+ 3'd1,3'd5:
+ begin
+ res[15:0] <= $unsigned(a[15:0]) > $unsigned(b[15:0]) ? a[15:0] : b[15:0];
+ res[31:16] <= $unsigned(a[31:16]) > $unsigned(b[31:16]) ? a[31:16] : b[31:16];
+ res[47:32] <= $unsigned(a[47:32]) > $unsigned(b[47:32]) ? a[47:32] : b[47:32];
+ res[63:48] <= $unsigned(a[63:48]) > $unsigned(b[63:48]) ? a[63:48] : b[63:48];
+ end
+ 3'd2,3'd6:
+ begin
+ res[31:0] <= $unsigned(a[31:0]) > $unsigned(b[31:0]) ? a[31:0] : b[31:0];
+ res[63:32] <= $unsigned(a[63:32]) > $unsigned(b[63:32]) ? a[63:32] : b[63:32];
+ end
+ 3'd3,3'd7:
+ begin
+ res[63:0] <= $unsigned(a) > $unsigned(b) ? a : b;
+ end
+ endcase
+ default: res[63:0] <= 64'hDEADDEADDEADDEAD;
+ endcase
+`INC: begin
+ res[63:0] <= a + b;
+ end
+default: res[63:0] <= 64'hDEADDEADDEADDEAD;
+endcase
+end
+endmodule
Index: FT64v7/rtl/common/FT64_RSB.v
===================================================================
--- FT64v7/rtl/common/FT64_RSB.v (nonexistent)
+++ FT64v7/rtl/common/FT64_RSB.v (revision 60)
@@ -0,0 +1,166 @@
+// ============================================================================
+// __
+// \\__/ o\ (C) 2018 Robert Finch, Waterloo
+// \ __ / All rights reserved.
+// \/_// robfinch@finitron.ca
+// ||
+//
+// FT64_RSB.v
+//
+// This source file is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published
+// by the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This source file is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+//
+// ============================================================================
+//
+`include "FT64_defines.vh"
+
+// Return address stack predictor is updated during the fetch stage on the
+// assumption that previous flow controls (branches) predicted correctly.
+// Otherwise many small routines wouldn't predict the return address
+// correctly because they hit the RET before the CALL reaches the
+// commit stage.
+
+module FT64_RSB(rst, clk, regLR, queued1, queued2,
+ fetchbuf0_v, fetchbuf0_pc, fetchbuf0_instr,
+ fetchbuf1_v, fetchbuf1_pc, fetchbuf1_instr,
+ stompedRets, stompedRet,
+ pc
+);
+parameter AMSB = 31;
+parameter DEPTH = 16;
+input rst;
+input clk;
+input [4:0] regLR;
+input queued1;
+input queued2;
+input fetchbuf0_v;
+input [47:0] fetchbuf0_instr;
+input [AMSB:0] fetchbuf0_pc;
+input fetchbuf1_v;
+input [47:0] fetchbuf1_instr;
+input [AMSB:0] fetchbuf1_pc;
+input [3:0] stompedRets;
+input stompedRet;
+output [AMSB:0] pc;
+
+parameter RSTPC = 32'hFFFC0100;
+integer n;
+reg [AMSB:0] ras [0:DEPTH-1];
+reg [3:0] rasp;
+assign pc = ras[rasp];
+reg [47:0] lasti0, lasti1;
+
+always @(posedge clk)
+if (rst) begin
+ lasti0 <= `NOP_INSN;
+ lasti1 <= `NOP_INSN;
+ for (n = 0; n < DEPTH; n = n + 1)
+ ras[n] <= RSTPC;
+ rasp <= 4'd0;
+end
+else begin
+ if (fetchbuf0_v && fetchbuf1_v && (queued1 || queued2)) begin
+ // Make sure the instruction changed between clock cycles.
+ lasti0 <= fetchbuf0_instr;
+ lasti1 <= fetchbuf1_instr;
+ if (fetchbuf0_instr != lasti0 || fetchbuf1_instr != lasti1) begin
+ case(fetchbuf0_instr[`INSTRUCTION_OP])
+ `JAL:
+ begin
+ // JAL LR,xxxx assume call
+ if (fetchbuf0_instr[`INSTRUCTION_RB]==regLR) begin
+ ras[((rasp-6'd1)&(DEPTH-1))] <= fetchbuf0_pc + (fetchbuf0_instr[6] ? 32'd6 : 32'd4);
+ rasp <= rasp - 4'd1;
+ end
+ // JAL r0,[r29] assume a ret
+ else if (fetchbuf0_instr[`INSTRUCTION_RB]==5'd00 &&
+ fetchbuf0_instr[`INSTRUCTION_RA]==regLR) begin
+ rasp <= rasp + 4'd1;
+ end
+ end
+ `CALL:
+ begin
+ ras[((rasp-6'd1)&(DEPTH-1))] <= fetchbuf0_pc + (fetchbuf0_instr[6] ? 32'd6 : 32'd4);
+ rasp <= rasp - 4'd1;
+ end
+ `RET: begin
+ $display("RSP: Added 1");
+ rasp <= rasp + 4'd1;
+ end
+ default: ;
+ endcase
+ end
+ end
+ else if (fetchbuf1_v && queued1)
+ lasti1 <= fetchbuf1_instr;
+ if (fetchbuf1_instr != lasti1) begin
+ case(fetchbuf1_instr[`INSTRUCTION_OP])
+ `JAL:
+ if (fetchbuf1_instr[`INSTRUCTION_RB]==regLR) begin
+ ras[((rasp-6'd1)&(DEPTH-1))] <= fetchbuf1_pc + (fetchbuf1_instr[6] ? 32'd6 : 32'd4);
+ rasp <= rasp - 4'd1;
+ end
+ else if (fetchbuf1_instr[`INSTRUCTION_RB]==5'd00 &&
+ fetchbuf1_instr[`INSTRUCTION_RA]==regLR) begin
+ rasp <= rasp + 4'd1;
+ end
+ `CALL:
+ begin
+ ras[((rasp-6'd1)&(DEPTH-1))] <= fetchbuf1_pc + (fetchbuf1_instr[6] ? 32'd6 : 32'd4);
+ rasp <= rasp - 4'd1;
+ end
+ `RET: begin
+ rasp <= rasp + 4'd1;
+ $display("RSP: Added 1");
+ end
+ default: ;
+ endcase
+ end
+ else if (fetchbuf0_v && queued1)
+ lasti0 <= fetchbuf0_instr;
+ if (lasti0 != fetchbuf0_instr) begin
+ case(fetchbuf0_instr[`INSTRUCTION_OP])
+ `JAL:
+ if (fetchbuf0_instr[`INSTRUCTION_RB]==regLR) begin
+ ras[((rasp-6'd1)&(DEPTH-1))] <= fetchbuf0_pc + (fetchbuf0_instr[6] ? 32'd6 : 32'd4);
+ rasp <= rasp - 4'd1;
+ end
+ else if (fetchbuf0_instr[`INSTRUCTION_RB]==5'd00 &&
+ fetchbuf0_instr[`INSTRUCTION_RA]==regLR) begin
+ rasp <= rasp + 4'd1;
+ end
+ `CALL:
+ begin
+ ras[((rasp-6'd1)&(DEPTH-1))] <= fetchbuf0_pc + (fetchbuf0_instr[6] ? 32'd6 : 32'd4);
+ rasp <= rasp - 4'd1;
+ end
+ `RET: begin
+ $display("RSP: Added 1");
+ rasp <= rasp + 4'd1;
+ end
+ default: ;
+ endcase
+ end
+/*
+ if (stompedRets > 4'd0) begin
+ $display("Stomped Rets: %d", stompedRets);
+ rasp <= rasp - stompedRets;
+ end
+ else if (stompedRet) begin
+ $display("Stomped Ret");
+ rasp <= rasp - 5'd1;
+ end
+*/
+end
+
+endmodule
Index: FT64v7/rtl/common/FT64_alu.v
===================================================================
--- FT64v7/rtl/common/FT64_alu.v (nonexistent)
+++ FT64v7/rtl/common/FT64_alu.v (revision 60)
@@ -0,0 +1,1875 @@
+// ============================================================================
+// __
+// \\__/ o\ (C) 2017-2018 Robert Finch, Waterloo
+// \ __ / All rights reserved.
+// \/_// robfinch@finitron.ca
+// ||
+//
+// FT64_alu.v
+//
+// This source file is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published
+// by the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This source file is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+//
+// ============================================================================
+//
+`include "FT64_defines.vh"
+`include "FT64_config.vh"
+
+module FT64_alu(rst, clk, ld, abort, instr, sz, tlb, store, a, b, c, pc, Ra, tgt, tgt2, ven, vm,
+ csr, o, ob, done, idle, excen, exc, thrd, ptrmask, state, mem, shift,
+ ol, dl, ASID, icl_i, cyc_i, we_i, vadr_i, cyc_o, we_o, padr_o, uncached, tlb_miss,
+ exv_o, rdv_o, wrv_o
+`ifdef SUPPORT_SEGMENTATION
+ , zs_base, ds_base, es_base, fs_base, gs_base, hs_base, ss_base, cs_base,
+ zsub, dsub, esub, fsub, gsub, hsub, ssub, csub,
+ zslb, dslb, eslb, fslb, gslb, hslb, sslb, cslb
+`endif
+`ifdef SUPPORT_BBMS
+ , pb, cbl, cbu, ro, dbl, dbu, sbl, sbu, en
+`endif
+ );
+parameter DBW = 64;
+parameter ABW = 64;
+parameter BIG = 1'b1;
+parameter SUP_VECTOR = 1;
+parameter TRUE = 1'b1;
+parameter FALSE = 1'b0;
+parameter PTR = 20'hFFF01;
+input rst;
+input clk;
+input ld;
+input abort;
+input [47:0] instr;
+input [2:0] sz;
+input tlb;
+input store;
+input [63:0] a;
+input [63:0] b;
+input [63:0] c;
+input [31:0] pc;
+input [11:0] Ra;
+input [11:0] tgt;
+input [7:0] tgt2;
+input [5:0] ven;
+input [15:0] vm;
+input [63:0] csr;
+output reg [63:0] o;
+output reg [63:0] ob;
+output reg done;
+output reg idle;
+input [4:0] excen;
+output reg [8:0] exc;
+input thrd;
+input [63:0] ptrmask;
+input [1:0] state;
+input mem;
+input shift;
+input [1:0] ol;
+input [1:0] dl;
+input [7:0] ASID;
+input icl_i;
+input cyc_i;
+input we_i;
+input [ABW-1:0] vadr_i;
+output cyc_o;
+output we_o;
+output [ABW-1:0] padr_o;
+output uncached;
+output tlb_miss;
+output wrv_o;
+output rdv_o;
+output exv_o;
+`ifdef SUPPORT_SEGMENTATION
+input [63:0] zs_base;
+input [63:0] ds_base;
+input [63:0] es_base;
+input [63:0] fs_base;
+input [63:0] gs_base;
+input [63:0] hs_base;
+input [63:0] ss_base;
+input [63:0] cs_base;
+input [63:0] zslb;
+input [63:0] dslb;
+input [63:0] eslb;
+input [63:0] fslb;
+input [63:0] gslb;
+input [63:0] hslb;
+input [63:0] sslb;
+input [63:0] cslb;
+input [63:0] zsub;
+input [63:0] dsub;
+input [63:0] esub;
+input [63:0] fsub;
+input [63:0] gsub;
+input [63:0] hsub;
+input [63:0] ssub;
+input [63:0] csub;
+`endif
+`ifdef SUPPORT_BBMS
+input [63:0] pb;
+input [63:0] cbl;
+input [63:0] cbu;
+input [63:0] ro;
+input [63:0] dbl;
+input [63:0] dbu;
+input [63:0] sbl;
+input [63:0] sbu;
+input [63:0] en;
+`endif
+
+parameter byt = 3'd0;
+parameter char = 3'd1;
+parameter half = 3'd2;
+parameter word = 3'd3;
+parameter byt_para = 3'd4;
+parameter char_para = 3'd5;
+parameter half_para = 3'd6;
+parameter word_para = 3'd7;
+
+integer n;
+
+reg adrDone, adrIdle;
+reg [63:0] usa; // unsegmented address
+`ifdef SUPPORT_SEGMENTATION
+reg [63:0] pb;
+reg [63:0] ub;
+reg [63:0] lb;
+always @*
+case(usa[63:61])
+3'd0: pb <= zs_base;
+3'd1: pb <= ds_base;
+3'd2: pb <= es_base;
+3'd3: pb <= fs_base;
+3'd4: pb <= gs_base;
+3'd5: pb <= hs_base;
+3'd6: pb <= ss_base;
+3'd7: pb <= cs_base;
+endcase
+always @*
+case(usa[63:61])
+3'd0: ub <= zsub;
+3'd1: ub <= dsub;
+3'd2: ub <= esub;
+3'd3: ub <= fsub;
+3'd4: ub <= gsub;
+3'd5: ub <= hsub;
+3'd6: ub <= ssub;
+3'd7: ub <= csub;
+endcase
+always @*
+case(usa[63:61])
+3'd0: lb <= zslb;
+3'd1: lb <= dslb;
+3'd2: lb <= eslb;
+3'd3: lb <= fslb;
+3'd4: lb <= gslb;
+3'd5: lb <= hslb;
+3'd6: lb <= sslb;
+3'd7: lb <= cslb;
+endcase
+`else
+`ifndef SUPPORT_BBMS
+reg [63:0] pb = 64'h0;
+`endif
+`endif
+reg [63:0] addro;
+reg [63:0] adr; // load / store address
+reg [63:0] shift8;
+
+wire [7:0] a8 = a[7:0];
+wire [15:0] a16 = a[15:0];
+wire [31:0] a32 = a[31:0];
+wire [7:0] b8 = b[7:0];
+wire [15:0] b16 = b[15:0];
+wire [31:0] b32 = b[31:0];
+wire [63:0] orb = instr[6] ? {34'd0,b[29:0]} : {50'd0,b[13:0]};
+wire [63:0] andb = b;//((instr[6]==1'b1) ? {34'h3FFFFFFFF,b[29:0]} : {50'h3FFFFFFFFFFFF,b[13:0]});
+
+wire [21:0] qimm = instr[39:18];
+wire [63:0] imm = {{45{instr[39]}},instr[39:21]};
+wire [DBW-1:0] divq, rem;
+wire divByZero;
+wire [15:0] prod80, prod81, prod82, prod83, prod84, prod85, prod86, prod87;
+wire [31:0] prod160, prod161, prod162, prod163;
+wire [63:0] prod320, prod321;
+wire [DBW*2-1:0] prod;
+wire mult_done8, mult_idle8, div_done8, div_idle8;
+wire mult_done80, mult_idle80, div_done80, div_idle80;
+wire mult_done81, mult_idle81, div_done81, div_idle81;
+wire mult_done82, mult_idle82, div_done82, div_idle82;
+wire mult_done83, mult_idle83, div_done83, div_idle83;
+wire mult_done84, mult_idle84, div_done84, div_idle84;
+wire mult_done85, mult_idle85, div_done85, div_idle85;
+wire mult_done86, mult_idle86, div_done86, div_idle86;
+wire mult_done87, mult_idle87, div_done87, div_idle87;
+wire mult_done16, mult_idle16, div_done16, div_idle16;
+wire mult_done160, mult_idle160, div_done160, div_idle160;
+wire mult_done161, mult_idle161, div_done161, div_idle161;
+wire mult_done162, mult_idle162, div_done162, div_idle162;
+wire mult_done163, mult_idle163, div_done163, div_idle163;
+wire mult_done320, mult_idle320, div_done320, div_idle320;
+wire mult_done321, mult_idle321, div_done321, div_idle321;
+wire mult_done, mult_idle, div_done, div_idle;
+wire aslo;
+wire [6:0] clzo,cloo,cpopo;
+wire [63:0] shftho;
+reg [63:0] shift9;
+
+function IsLoad;
+input [47:0] isn;
+case(isn[`INSTRUCTION_OP])
+`MEMNDX:
+ if (isn[`INSTRUCTION_L2]==2'b00)
+ IsLoad = !isn[31];
+ else
+ IsLoad = FALSE;
+`LB: IsLoad = TRUE;
+`LBU: IsLoad = TRUE;
+`Lx: IsLoad = TRUE;
+`LxU: IsLoad = TRUE;
+`LWR: IsLoad = TRUE;
+`LV: IsLoad = TRUE;
+`LVx: IsLoad = TRUE;
+`LVxU: IsLoad = TRUE;
+default: IsLoad = FALSE;
+endcase
+endfunction
+
+function IsMul;
+input [47:0] isn;
+case(isn[`INSTRUCTION_OP])
+`IVECTOR:
+ case(isn[`INSTRUCTION_S2])
+ `VMUL,`VMULS: IsMul = TRUE;
+ default: IsMul = FALSE;
+ endcase
+`R2:
+ case(isn[`INSTRUCTION_S2])
+ `MULU,`MULSU,`MUL: IsMul = TRUE;
+ `MULUH,`MULSUH,`MULH: IsMul = TRUE;
+ `FXMUL: IsMul = TRUE;
+ default: IsMul = FALSE;
+ endcase
+`MULUI,`MULI: IsMul = TRUE;
+default: IsMul = FALSE;
+endcase
+endfunction
+
+function IsDivmod;
+input [47:0] isn;
+case(isn[`INSTRUCTION_OP])
+`IVECTOR:
+ case(isn[`INSTRUCTION_S2])
+ `VDIV,`VDIVS: IsDivmod = TRUE;
+ default: IsDivmod = FALSE;
+ endcase
+`R2:
+ case(isn[`INSTRUCTION_S2])
+ `DIVU,`DIVSU,`DIV: IsDivmod = TRUE;
+ `MODU,`MODSU,`MOD: IsDivmod = TRUE;
+ default: IsDivmod = FALSE;
+ endcase
+`DIVUI,`DIVI,`MODI: IsDivmod = TRUE;
+default: IsDivmod = FALSE;
+endcase
+endfunction
+
+function IsSgn;
+input [47:0] isn;
+case(isn[`INSTRUCTION_OP])
+`IVECTOR:
+ case(isn[`INSTRUCTION_S2])
+ `VMUL,`VMULS,`VDIV,`VDIVS: IsSgn = TRUE;
+ default: IsSgn = FALSE;
+ endcase
+`R2:
+ case(isn[`INSTRUCTION_S2])
+ `MUL,`DIV,`MOD,`MULH: IsSgn = TRUE;
+ `FXMUL: IsSgn = TRUE;
+ default: IsSgn = FALSE;
+ endcase
+`MULI,`DIVI,`MODI: IsSgn = TRUE;
+default: IsSgn = FALSE;
+endcase
+endfunction
+
+function IsSgnus;
+input [47:0] isn;
+case(isn[`INSTRUCTION_OP])
+`R2:
+ case(isn[`INSTRUCTION_S2])
+ `MULSU,`MULSUH,`DIVSU,`MODSU: IsSgnus = TRUE;
+ default: IsSgnus = FALSE;
+ endcase
+default: IsSgnus = FALSE;
+endcase
+endfunction
+
+function IsShiftAndOp;
+input [47:0] isn;
+IsShiftAndOp = FALSE;
+endfunction
+
+wire [63:0] bfout,shfto;
+wire [63:0] shftob;
+wire [63:0] shftco;
+
+always @(posedge clk)
+ shift9 <= shift8;
+
+wire tlb_done, tlb_idle;
+wire [DBW-1:0] tlbo;
+
+`ifdef SUPPORT_TLB
+FT64_TLB utlb1 (
+ .rst(rst),
+ .clk(clk),
+ .ld(ld & tlb),
+ .done(tlb_done),
+ .idle(tlb_idle),
+ .ol(ol),
+ .ASID(ASID),
+ .op(instr[25:22]),
+ .regno(instr[21:18]),
+ .dati(a),
+ .dato(tlbo),
+ .uncached(uncached),
+ .icl_i(icl_i),
+ .cyc_i(cyc_i),
+ .we_i(we_i),
+ .vadr_i(vadr_i),
+ .cyc_o(cyc_o),
+ .we_o(we_o),
+ .padr_o(padr_o),
+ .TLBMiss(tlb_miss),
+ .wrv_o(wrv_o),
+ .rdv_o(rdv_o),
+ .exv_o(exv_o),
+ .HTLBVirtPageo()
+);
+`else
+assign tlbo = 64'hDEADDEADDEADDEAD;
+assign uncached = 1'b0;
+assign padr_o = vadr_i;
+assign cyc_o = cyc_i;
+assign we_o = we_i;
+assign tlb_miss = 1'b0;
+assign wrv_o = 1'b0;
+assign rdv_o = 1'b0;
+assign exv_o = 1'b0;
+`endif
+
+FT64_bitfield #(DBW) ubf1
+(
+ .inst(instr),
+ .a(a),
+ .b(b),
+ .c(c),
+ .o(bfout),
+ .masko()
+);
+
+FT64_multiplier #(DBW) umult1
+(
+ .rst(rst),
+ .clk(clk),
+ .ld(ld && IsMul(instr)&& (sz==word || sz==word_para)),
+ .abort(abort),
+ .sgn(IsSgn(instr)),
+ .sgnus(IsSgnus(instr)),
+ .a(a),
+ .b(b),
+ .o(prod),
+ .done(mult_done),
+ .idle(mult_idle)
+);
+
+FT64_multiplier #(32) umulth0
+(
+ .rst(rst),
+ .clk(clk),
+ .ld(ld && IsMul(instr) && (sz==half || sz==half_para)),
+ .abort(abort),
+ .sgn(IsSgn(instr)),
+ .sgnus(IsSgnus(instr)),
+ .a(a[31:0]),
+ .b(b[31:0]),
+ .o(prod320),
+ .done(mult_done320),
+ .idle(mult_idle320)
+);
+
+FT64_multiplier #(16) umultc0
+(
+ .rst(rst),
+ .clk(clk),
+ .ld(ld && IsMul(instr) && (sz==char || sz==char_para)),
+ .abort(abort),
+ .sgn(IsSgn(instr)),
+ .sgnus(IsSgnus(instr)),
+ .a(a[15:0]),
+ .b(b[15:0]),
+ .o(prod160),
+ .done(mult_done160),
+ .idle(mult_idle160)
+);
+
+FT64_multiplier #(8) umultb0
+(
+ .rst(rst),
+ .clk(clk),
+ .ld(ld && IsMul(instr) && (sz==byt || sz==byt_para)),
+ .abort(abort),
+ .sgn(IsSgn(instr)),
+ .sgnus(IsSgnus(instr)),
+ .a(a[7:0]),
+ .b(b[7:0]),
+ .o(prod80),
+ .done(mult_done80),
+ .idle(mult_idle80)
+);
+
+`ifdef SIMD
+FT64_multiplier #(32) umulth1
+(
+ .rst(rst),
+ .clk(clk),
+ .ld(ld && IsMul(instr) && (sz==half || sz==half_para)),
+ .abort(abort),
+ .sgn(IsSgn(instr)),
+ .sgnus(IsSgnus(instr)),
+ .a(a[63:32]),
+ .b(b[63:32]),
+ .o(prod321),
+ .done(mult_done321),
+ .idle(mult_idle321)
+);
+
+FT64_multiplier #(16) umultc1
+(
+ .rst(rst),
+ .clk(clk),
+ .ld(ld && IsMul(instr) && (sz==char_para)),
+ .abort(abort),
+ .sgn(IsSgn(instr)),
+ .sgnus(IsSgnus(instr)),
+ .a(a[31:16]),
+ .b(b[31:16]),
+ .o(prod161),
+ .done(mult_done161),
+ .idle(mult_idle161)
+);
+
+FT64_multiplier #(16) umultc2
+(
+ .rst(rst),
+ .clk(clk),
+ .ld(ld && IsMul(instr) && (sz==char_para)),
+ .abort(abort),
+ .sgn(IsSgn(instr)),
+ .sgnus(IsSgnus(instr)),
+ .a(a[47:32]),
+ .b(b[47:32]),
+ .o(prod162),
+ .done(mult_done162),
+ .idle(mult_idle162)
+);
+
+FT64_multiplier #(16) umultc3
+(
+ .rst(rst),
+ .clk(clk),
+ .ld(ld && IsMul(instr) && (sz==char_para)),
+ .abort(abort),
+ .sgn(IsSgn(instr)),
+ .sgnus(IsSgnus(instr)),
+ .a(a[63:48]),
+ .b(b[63:48]),
+ .o(prod163),
+ .done(mult_done163),
+ .idle(mult_idle163)
+);
+
+FT64_multiplier #(8) umultb1
+(
+ .rst(rst),
+ .clk(clk),
+ .ld(ld && IsMul(instr) && (sz==byt_para)),
+ .abort(abort),
+ .sgn(IsSgn(instr)),
+ .sgnus(IsSgnus(instr)),
+ .a(a[15:8]),
+ .b(b[15:8]),
+ .o(prod81),
+ .done(mult_done81),
+ .idle(mult_idle81)
+);
+
+FT64_multiplier #(8) umultb2
+(
+ .rst(rst),
+ .clk(clk),
+ .ld(ld && IsMul(instr) && (sz==byt_para)),
+ .abort(abort),
+ .sgn(IsSgn(instr)),
+ .sgnus(IsSgnus(instr)),
+ .a(a[23:16]),
+ .b(b[23:16]),
+ .o(prod82),
+ .done(mult_done82),
+ .idle(mult_idle82)
+);
+
+FT64_multiplier #(8) umultb3
+(
+ .rst(rst),
+ .clk(clk),
+ .ld(ld && IsMul(instr) && (sz==byt_para)),
+ .abort(abort),
+ .sgn(IsSgn(instr)),
+ .sgnus(IsSgnus(instr)),
+ .a(a[31:24]),
+ .b(b[31:24]),
+ .o(prod83),
+ .done(mult_done83),
+ .idle(mult_idle83)
+);
+
+FT64_multiplier #(8) umultb4
+(
+ .rst(rst),
+ .clk(clk),
+ .ld(ld && IsMul(instr) && (sz==byt_para)),
+ .abort(abort),
+ .sgn(IsSgn(instr)),
+ .sgnus(IsSgnus(instr)),
+ .a(a[39:32]),
+ .b(b[39:32]),
+ .o(prod84),
+ .done(mult_done84),
+ .idle(mult_idle84)
+);
+
+FT64_multiplier #(8) umultb5
+(
+ .rst(rst),
+ .clk(clk),
+ .ld(ld && IsMul(instr) && (sz==byt_para)),
+ .abort(abort),
+ .sgn(IsSgn(instr)),
+ .sgnus(IsSgnus(instr)),
+ .a(a[47:40]),
+ .b(b[47:40]),
+ .o(prod85),
+ .done(mult_done85),
+ .idle(mult_idle85)
+);
+
+FT64_multiplier #(8) umultb6
+(
+ .rst(rst),
+ .clk(clk),
+ .ld(ld && IsMul(instr) && (sz==byt_para)),
+ .abort(abort),
+ .sgn(IsSgn(instr)),
+ .sgnus(IsSgnus(instr)),
+ .a(a[55:48]),
+ .b(b[55:48]),
+ .o(prod86),
+ .done(mult_done86),
+ .idle(mult_idle86)
+);
+
+FT64_multiplier #(8) umultb7
+(
+ .rst(rst),
+ .clk(clk),
+ .ld(ld && IsMul(instr) && (sz==byt_para)),
+ .abort(abort),
+ .sgn(IsSgn(instr)),
+ .sgnus(IsSgnus(instr)),
+ .a(a[63:56]),
+ .b(b[63:56]),
+ .o(prod87),
+ .done(mult_done87),
+ .idle(mult_idle87)
+);
+`endif
+
+FT64_divider #(DBW) udiv1
+(
+ .rst(rst),
+ .clk(clk),
+ .ld(ld && IsDivmod(instr) && (sz==word || sz==word_para)),
+ .abort(abort),
+ .sgn(IsSgn(instr)),
+ .sgnus(IsSgnus(instr)),
+ .a(a),
+ .b(b),
+ .qo(divq),
+ .ro(rem),
+ .dvByZr(divByZero),
+ .done(div_done),
+ .idle(div_idle)
+);
+
+wire [5:0] bshift = instr[31:26]==`SHIFTR ? b[5:0] : {instr[30],instr[22:18]};
+
+FT64_shift ushft1
+(
+ .instr(instr),
+ .a(a),
+ .b(bshift),
+ .res(shfto),
+ .ov(aslo)
+);
+
+FT64_shifth ushfthL
+(
+ .instr(instr),
+ .a(a[31:0]),
+ .b(bshift),
+ .res(shftho[31:0]),
+ .ov()
+);
+
+FT64_shifth ushfthH
+(
+ .instr(instr),
+ .a(a[63:32]),
+ .b(b[63:32]),
+ .res(shftho[63:32]),
+ .ov()
+);
+
+FT64_shiftc ushftc0
+(
+ .instr(instr),
+ .a(a[15:0]),
+ .b(bshift),
+ .res(shftco[15:0]),
+ .ov()
+);
+
+FT64_shiftc ushftc1
+(
+ .instr(instr),
+ .a(a[31:16]),
+ .b(b[31:16]),
+ .res(shftco[31:16]),
+ .ov()
+);
+
+FT64_shiftc ushftc2
+(
+ .instr(instr),
+ .a(a[47:32]),
+ .b(b[47:32]),
+ .res(shftco[47:32]),
+ .ov()
+);
+
+FT64_shiftc ushftc3
+(
+ .instr(instr),
+ .a(a[63:48]),
+ .b(b[63:48]),
+ .res(shftco[63:48]),
+ .ov()
+);
+
+FT64_shiftb ushftb0
+(
+ .instr(instr),
+ .a(a[7:0]),
+ .b(bshift),
+ .res(shftob[7:0]),
+ .ov()
+);
+
+FT64_shiftb ushftb1
+(
+ .instr(instr),
+ .a(a[15:8]),
+ .b(b[15:8]),
+ .res(shftob[15:8]),
+ .ov()
+);
+
+FT64_shiftb ushftb2
+(
+ .instr(instr),
+ .a(a[23:16]),
+ .b(b[23:16]),
+ .res(shftob[23:16]),
+ .ov()
+);
+
+FT64_shiftb ushftb3
+(
+ .instr(instr),
+ .a(a[31:24]),
+ .b(b[31:24]),
+ .res(shftob[31:24]),
+ .ov()
+);
+
+FT64_shiftb ushftb4
+(
+ .instr(instr),
+ .a(a[39:32]),
+ .b(b[39:32]),
+ .res(shftob[39:32]),
+ .ov()
+);
+
+FT64_shiftb ushftb5
+(
+ .instr(instr),
+ .a(a[47:40]),
+ .b(b[47:40]),
+ .res(shftob[47:40]),
+ .ov()
+);
+
+FT64_shiftb ushftb6
+(
+ .instr(instr),
+ .a(a[55:48]),
+ .b(b[55:48]),
+ .res(shftob[55:48]),
+ .ov()
+);
+
+FT64_shiftb ushftb7
+(
+ .instr(instr),
+ .a(a[63:56]),
+ .b(b[63:56]),
+ .res(shftob[63:56]),
+ .ov()
+);
+
+cntlz64 uclz1
+(
+ .i(sz==2'd0 ? {56'hFFFFFFFFFFFFFF,a[7:0]} :
+ sz==2'd1 ? {48'hFFFFFFFFFFFF,a[15:0]} :
+ sz==2'd2 ? {32'hFFFFFFFF,a[31:0]} : a),
+ .o(clzo)
+);
+
+cntlo64 uclo1
+(
+ .i(sz==2'd0 ? a[7:0] : sz==2'd1 ? a[15:0] : sz==2'd2 ? a[31:0] : a),
+ .o(cloo)
+);
+
+cntpop64 ucpop1
+(
+ .i(sz==2'd0 ? a[7:0] : sz==2'd1 ? a[15:0] : sz==2'd2 ? a[31:0] : a),
+ .o(cpopo)
+);
+
+wire [7:0] bcdaddo,bcdsubo;
+wire [15:0] bcdmulo;
+BCDAdd ubcd1 (1'b0,a,b,bcdaddo);
+BCDSub ubcd2 (1'b0,a,b,bcdsubo);
+BCDMul2 ubcd3 (a,b,bcdmulo);
+
+wire [7:0] s8 = a[7:0] + b[7:0];
+wire [15:0] s16 = a[15:0] + b[15:0];
+wire [31:0] s32 = a[31:0] + b[31:0];
+wire [7:0] d8 = a[7:0] - b[7:0];
+wire [15:0] d16 = a[15:0] - b[15:0];
+wire [31:0] d32 = a[31:0] - b[31:0];
+wire [63:0] and64 = a & b;
+wire [63:0] or64 = a | b;
+wire [63:0] xor64 = a ^ b;
+wire [63:0] redor64 = {63'd0,|a};
+wire [63:0] redor32 = {31'd0,|a[63:32],31'd0,|a[31:0]};
+wire [63:0] redor16 = {15'd0,|a[63:48],15'd0,|a[47:32],15'd0,|a[31:16],15'd0,|a[15:0]};
+wire [63:0] redor8 = {7'b0,|a[63:56],6'b0,|a[55:48],7'd0,|a[47:40],7'd0,|a[39:32],7'd0,
+ |a[31:24],7'd0,|a[23:16],7'd0,|a[15:8],7'd0,|a[7:0]};
+wire [63:0] zxb10 = {54'd0,b[9:0]};
+wire [63:0] sxb10 = {{54{b[9]}},b[9:0]};
+wire [63:0] zxb26 = {38'd0,instr[47:32],instr[27:18]};
+wire [63:0] sxb26 = {{38{instr[47]}},instr[47:32],instr[27:18]};
+reg [15:0] mask;
+wire [4:0] cpopom;
+wire signed [63:0] as = a;
+wire signed [63:0] bs = b;
+wire signed [63:0] cs = c;
+
+always @*
+for (n = 0; n < 16; n = n + 1)
+ if (n <= ven)
+ mask[n] = 1'b1;
+ else
+ mask[n] = 1'b0;
+
+cntpop16 ucpop2
+(
+ .i(vm & mask),
+ .o(cpopom)
+);
+
+wire [5:0] lsto, fsto;
+ffz24 uffo1
+(
+ .i(~{8'h00,a[15:0]}),
+ .o(lsto)
+);
+
+flz24 uflo1
+(
+ .i(~{8'h00,a[15:0]}),
+ .o(fsto)
+);
+
+wire [DBW-1:0] bmmo;
+FT64_BMM ubmm1
+(
+ .op(1'b0),
+ .a(a),
+ .b(b),
+ .o(bmmo)
+);
+
+always @*
+begin
+case(instr[`INSTRUCTION_OP])
+`IVECTOR:
+ if (SUP_VECTOR)
+ case(instr[`INSTRUCTION_S2])
+ `VABS: o[63:0] = a[63] ? -a : a;
+ `VSIGN: o[63:0] = a[63] ? 64'hFFFFFFFFFFFFFFFF : a==64'd0 ? 64'd0 : 64'd1;
+ `VMxx:
+ case(instr[25:23])
+ `VMAND: o[63:0] = and64;
+ `VMOR: o[63:0] = or64;
+ `VMXOR: o[63:0] = xor64;
+ `VMXNOR: o[63:0] = ~(xor64);
+ `VMPOP: o[63:0] = {57'd0,cpopo};
+ `VMFILL: for (n = 0; n < 64; n = n + 1)
+ o[n] = (n < a);
+ // Change the following when VL > 16
+ `VMFIRST: o[63:0] = fsto==5'd31 ? 64'd64 : fsto;
+ `VMLAST: o[63:0] = lsto==5'd31 ? 64'd64 : lsto;
+ endcase
+ `VADD,`VADDS: o[63:0] = vm[ven] ? a + b : c;
+ `VSUB,`VSUBS: o[63:0] = vm[ven] ? a - b : c;
+ `VMUL,`VMULS: o[63:0] = vm[ven] ? prod[DBW-1:0] : c;
+ `VDIV,`VDIVS: o[63:0] = BIG ? (vm[ven] ? divq : c) : 64'hCCCCCCCCCCCCCCCC;
+ `VAND,`VANDS: o[63:0] = vm[ven] ? a & b : c;
+ `VOR,`VORS: o[63:0] = vm[ven] ? a | b : c;
+ `VXOR,`VXORS: o[63:0] = vm[ven] ? a ^ b : c;
+ `VCNTPOP: o[63:0] = {57'd0,cpopo};
+ `VSHLV: o[63:0] = a; // no masking here
+ `VSHRV: o[63:0] = a;
+ `VCMPRSS: o[63:0] = a;
+ `VCIDX: o[63:0] = a * ven;
+ `VSCAN: o[63:0] = a * (cpopom==0 ? 0 : cpopom-1);
+ `VSxx,`VSxxS,
+ `VSxxb,`VSxxSb:
+ case({instr[26],instr[20:19]})
+ `VSEQ: begin
+ o[63:0] = c;
+ o[ven] = vm[ven] ? a==b : c[ven];
+ end
+ `VSNE: begin
+ o[63:0] = c;
+ o[ven] = vm[ven] ? a!=b : c[ven];
+ end
+ `VSLT: begin
+ o[63:0] = c;
+ o[ven] = vm[ven] ? $signed(a) < $signed(b) : c[ven];
+ end
+ `VSGE: begin
+ o[63:0] = c;
+ o[ven] = vm[ven] ? $signed(a) >= $signed(b) : c[ven];
+ end
+ `VSLE: begin
+ o[63:0] = c;
+ o[ven] = vm[ven] ? $signed(a) <= $signed(b) : c[ven];
+ end
+ `VSGT: begin
+ o[63:0] = c;
+ o[ven] = vm[ven] ? $signed(a) > $signed(b) : c[ven];
+ end
+ default: o[63:0] = 64'hCCCCCCCCCCCCCCCC;
+ endcase
+ `VSxxU,`VSxxSU,
+ `VSxxUb,`VSxxSUb:
+ case({instr[26],instr[20:19]})
+ `VSEQ: begin
+ o[63:0] = c;
+ o[ven] = vm[ven] ? a==b : c[ven];
+ end
+ `VSNE: begin
+ o[63:0] = c;
+ o[ven] = vm[ven] ? a!=b : c[ven];
+ end
+ `VSLT: begin
+ o[63:0] = c;
+ o[ven] = vm[ven] ? a < b : c[ven];
+ end
+ `VSGE: begin
+ o[63:0] = c;
+ o[ven] = vm[ven] ? a >= b : c[ven];
+ end
+ `VSLE: begin
+ o[63:0] = c;
+ o[ven] = vm[ven] ? a <= b : c[ven];
+ end
+ `VSGT: begin
+ o[63:0] = c;
+ o[ven] = vm[ven] ? a > b : c[ven];
+ end
+ default: o[63:0] = 64'hCCCCCCCCCCCCCCCC;
+ endcase
+ `VBITS2V: o[63:0] = vm[ven] ? a[ven] : c;
+ `V2BITS: begin
+ o[63:0] = b;
+ o[ven] = vm[ven] ? a[0] : b[ven];
+ end
+ `VSHL,`VSHR,`VASR: o[63:0] = BIG ? shfto : 64'hCCCCCCCCCCCCCCCC;
+ `VXCHG: o[63:0] = vm[ven] ? b : a;
+ default: o[63:0] = 64'hCCCCCCCCCCCCCCCC;
+ endcase
+ else
+ o[63:0] <= 64'hCCCCCCCCCCCCCCCC;
+`R2:
+ if (instr[6])
+ case(instr[47:42])
+ `SHIFTR:
+ begin
+ case(instr[35:33])
+ `ASL,`ASR,`ROL,`ROR:
+ case(instr[32:30]) // size
+ 3'd0: shift8 = {{56{shftob[7]}},shftob[7:0]};
+ 3'd1: shift8 = {{48{shftob[15]}},shftco[15:0]};
+ 3'd2: shift8 = {{32{shftho[31]}},shftho[31:0]};
+ 3'd3,3'd7: shift8 = shfto;
+ 3'd4: shift8 = shftob;
+ 3'd5: shift8 = shftco;
+ 3'd6: shift8 = shftho;
+ endcase
+ `SHL,`SHR:
+ case(instr[32:30]) // size
+ 3'd0: shift8 = {56'd0,shftob[7:0]};
+ 3'd1: shift8 = {48'd0,shftco[15:0]};
+ 3'd2: shift8 = {32'd0,shftho[31:0]};
+ 3'd3,3'd7: shift8 = shfto;
+ 3'd4: shift8 = shftob;
+ 3'd5: shift8 = shftco;
+ 3'd6: shift8 = shftho;
+ endcase
+ default: o[63:0] = 64'hDCDCDCDCDCDCDCDC;
+ endcase
+ case(instr[35:33])
+ `ASL,`ASR,`SHL,`SHR,`ROL,`ROR:
+ o[63:0] = shift9;
+ default: o[63:0] = 64'hDCDCDCDCDCDCDCDC;
+ endcase
+ end
+ `MIN:
+ case(instr[30:28])
+ 3'd3:
+ if (as < bs && as < cs)
+ o[63:0] = as;
+ else if (bs < cs)
+ o[63:0] = bs;
+ else
+ o[63:0] = cs;
+ default: o = 64'hDEADDEADDEADDEAD;
+ endcase
+ `CMOVEZ: begin
+ o = (a==64'd0) ? b : c;
+ end
+ `CMOVNZ: if (instr[41:39]==3'd4)
+ o = (a!=64'd0) ? b : {{48{instr[38]}},instr[38:23]};
+ else
+ o = (a!=64'd0) ? b : c;
+ default: o = 64'hDEADDEADDEADDEAD;
+ endcase
+ else
+ casez(instr[`INSTRUCTION_S2])
+ `BCD:
+ case(instr[`INSTRUCTION_S1])
+ `BCDADD: o[63:0] = BIG ? bcdaddo : 64'hCCCCCCCCCCCCCCCC;
+ `BCDSUB: o[63:0] = BIG ? bcdsubo : 64'hCCCCCCCCCCCCCCCC;
+ `BCDMUL: o[63:0] = BIG ? bcdmulo : 64'hCCCCCCCCCCCCCCCC;
+ default: o[63:0] = 64'hDEADDEADDEADDEAD;
+ endcase
+ `MOV: begin
+ o[63:0] = a;
+ end
+ `VMOV: o[63:0] = a;
+ `R1:
+ case(instr[`INSTRUCTION_S1])
+ `CNTLZ: o[63:0] = BIG ? {57'd0,clzo} : 64'hCCCCCCCCCCCCCCCC;
+ `CNTLO: o[63:0] = BIG ? {57'd0,cloo} : 64'hCCCCCCCCCCCCCCCC;
+ `CNTPOP: o[63:0] = BIG ? {57'd0,cpopo} : 64'hCCCCCCCCCCCCCCCC;
+ `ABS: case(sz[1:0])
+ 2'd0: o[63:0] = BIG ? (a[7] ? -a[7:0] : a[7:0]) : 64'hCCCCCCCCCCCCCCCC;
+ 2'd1: o[63:0] = BIG ? (a[15] ? -a[15:0] : a[15:0]) : 64'hCCCCCCCCCCCCCCCC;
+ 2'd2: o[63:0] = BIG ? (a[31] ? -a[31:0] : a[31:0]) : 64'hCCCCCCCCCCCCCCCC;
+ 2'd3: o[63:0] = BIG ? (a[63] ? -a : a) : 64'hCCCCCCCCCCCCCCCC;
+ endcase
+ `NOT: case(sz[1:0])
+ 2'd0: o = {~|a[63:56],~|a[55:48],~|a[47:40],~|a[39:32],~|a[31:24],~|a[23:16],~|a[15:8],~|a[7:0]};
+ 2'd1: o = {~|a[63:48],~|a[47:32],~|a[31:16],~|a[15:0]};
+ 2'd2: o = {~|a[63:32],~|a[31:0]};
+ 2'd3: o = ~|a[63:0];
+ endcase
+ `NEG:
+ case(sz[1:0])
+ 2'd0: o = {-a[63:56],-a[55:48],-a[47:40],-a[39:32],-a[31:24],-a[23:16],-a[15:8],-a[7:0]};
+ 2'd1: o = {-a[63:48],-a[47:32],-a[31:16],-a[15:0]};
+ 2'd2: o = {-a[63:32],-a[31:0]};
+ 2'd3: o = -a;
+ endcase
+ `REDOR: case(sz[1:0])
+ 2'd0: o = redor8;
+ 2'd1: o = redor16;
+ 2'd2: o = redor32;
+ 2'd3: o = redor64;
+ endcase
+ `ZXH: o[63:0] = {32'd0,a[31:0]};
+ `ZXC: o[63:0] = {48'd0,a[15:0]};
+ `ZXB: o[63:0] = {56'd0,a[7:0]};
+ `SXH: o[63:0] = {{32{a[31]}},a[31:0]};
+ `SXC: o[63:0] = {{48{a[15]}},a[15:0]};
+ `SXB: o[63:0] = {{56{a[7]}},a[7:0]};
+// 5'h1C: o[63:0] = tmem[a[9:0]];
+ default: o = 64'hDEADDEADDEADDEAD;
+ endcase
+ `BMM: o[63:0] = BIG ? bmmo : 64'hCCCCCCCCCCCCCCCC;
+ `SHIFT31,
+ `SHIFT63,
+ `SHIFTR:
+ begin
+ if (instr[25:23]==`SHL || instr[25:23]==`ASL)
+ o[63:0] = shfto;
+ else
+ o[63:0] = BIG ? shfto : 64'hCCCCCCCCCCCCCCCC;
+ $display("BIG=%d",BIG);
+ if(!BIG)
+ $stop;
+ end
+ `ADD:
+`ifdef SIMD
+ case(sz)
+ 3'd0,3'd4:
+ begin
+ o[7:0] = a[7:0] + b[7:0];
+ o[15:8] = a[15:8] + b[15:8];
+ o[23:16] = a[23:16] + b[23:16];
+ o[31:24] = a[31:24] + b[31:24];
+ o[39:32] = a[39:32] + b[39:32];
+ o[47:40] = a[47:40] + b[47:40];
+ o[55:48] = a[55:48] + b[55:48];
+ o[63:56] = a[63:56] + b[63:56];
+ end
+ 3'd1,3'd5:
+ begin
+ o[15:0] = a[15:0] + b[15:0];
+ o[31:16] = a[31:16] + b[31:16];
+ o[47:32] = a[47:32] + b[47:32];
+ o[63:48] = a[63:48] + b[63:48];
+ end
+ 3'd2,3'd6:
+ begin
+ o[31:0] = a[31:0] + b[31:0];
+ o[63:32] = a[63:32] + b[63:32];
+ end
+ default:
+ begin
+ o[63:0] = a + b;
+ end
+ endcase
+`else
+ o = a + b;
+`endif
+ `SUB:
+`ifdef SIMD
+ case(sz)
+ 3'd0,3'd4:
+ begin
+ o[7:0] = a[7:0] - b[7:0];
+ o[15:8] = a[15:8] - b[15:8];
+ o[23:16] = a[23:16] - b[23:16];
+ o[31:24] = a[31:24] - b[31:24];
+ o[39:32] = a[39:32] - b[39:32];
+ o[47:40] = a[47:40] - b[47:40];
+ o[55:48] = a[55:48] - b[55:48];
+ o[63:56] = a[63:56] - b[63:56];
+ end
+ 3'd1,3'd5:
+ begin
+ o[15:0] = a[15:0] - b[15:0];
+ o[31:16] = a[31:16] - b[31:16];
+ o[47:32] = a[47:32] - b[47:32];
+ o[63:48] = a[63:48] - b[63:48];
+ end
+ 3'd2,3'd6:
+ begin
+ o[31:0] = a[31:0] - b[31:0];
+ o[63:32] = a[63:32] - b[63:32];
+ end
+ default:
+ begin
+ o[63:0] = a - b;
+ end
+ endcase
+`else
+ o = a - b;
+`endif
+ `SLT: tskSlt(instr,instr[25:23],a,b,o);
+ `SLTU: tskSltu(instr,instr[25:23],a,b,o);
+ `SLE: tskSle(instr,instr[25:23],a,b,o);
+ `SLEU: tskSleu(instr,instr[25:23],a,b,o);
+ `AND: o = and64;
+ `OR: o = or64;
+ `XOR: o = xor64;
+ `NAND: o = ~and64;
+ `NOR: o = ~or64;
+ `XNOR: o = ~xor64;
+ `SEI: o = a | instr[21:16];
+ `RTI: o = a | instr[21:16];
+ `MUX: for (n = 0; n < 64; n = n + 1)
+ o[n] <= a[n] ? b[n] : c[n];
+ `MULU,`MULSU,`MUL:
+ case(sz)
+ byt_para: o[63:0] = {prod87[7:0],prod86[7:0],prod85[7:0],prod84[7:0],prod83[7:0],prod82[7:0],prod81[7:0],prod80[7:0]};
+ char_para: o[63:0] = {prod163[15:0],prod162[15:0],prod161[15:0],prod160[15:0]};
+ half_para: o[63:0] = {prod321[31:0],prod320[31:0]};
+ default: o[63:0] = prod[DBW-1:0];
+ endcase
+ `FXMUL:
+ case(sz)
+ half_para: o = {prod321[47:16] + prod321[15],prod320[47:16] + prod320[15]};
+ default: o = prod[95:32] + prod[31];
+ endcase
+ `MULF: o = a[23:0] * b[15:0];
+ `DIVU: o[63:0] = BIG ? divq : 64'hCCCCCCCCCCCCCCCC;
+ `DIVSU: o[63:0] = BIG ? divq : 64'hCCCCCCCCCCCCCCCC;
+ `DIV: o[63:0] = BIG ? divq : 64'hCCCCCCCCCCCCCCCC;
+ `MODU: o[63:0] = BIG ? rem : 64'hCCCCCCCCCCCCCCCC;
+ `MODSU: o[63:0] = BIG ? rem : 64'hCCCCCCCCCCCCCCCC;
+ `MOD: o[63:0] = BIG ? rem : 64'hCCCCCCCCCCCCCCCC;
+ `LEAX:
+ begin
+ o[63:0] = BIG ? a + (b << instr[22:21]) : 64'hCCCCCCCCEEEEEEEE;
+ //o[63:44] = PTR;
+ end
+ `MIN:
+`ifdef SIMD
+ case(sz)
+ 3'd0,3'd4:
+ begin
+ o[7:0] = BIG ? ($signed(a[7:0]) < $signed(b[7:0]) ? a[7:0] : b[7:0]) : 8'hCC;
+ o[15:8] = BIG ? ($signed(a[15:8]) < $signed(b[15:8]) ? a[15:8] : b[15:8]) : 64'hCCCCCCCCCCCCCCCC;
+ o[23:16] = BIG ? ($signed(a[23:16]) < $signed(b[23:16]) ? a[23:16] : b[23:16]) : 64'hCCCCCCCCCCCCCCCC;
+ o[31:24] = BIG ? ($signed(a[31:24]) < $signed(b[31:24]) ? a[31:24] : b[31:24]) : 64'hCCCCCCCCCCCCCCCC;
+ o[39:32] = BIG ? ($signed(a[39:32]) < $signed(b[39:32]) ? a[39:32] : b[39:32]) : 64'hCCCCCCCCCCCCCCCC;
+ o[47:40] = BIG ? ($signed(a[47:40]) < $signed(b[47:40]) ? a[47:40] : b[47:40]) : 64'hCCCCCCCCCCCCCCCC;
+ o[55:48] = BIG ? ($signed(a[55:48]) < $signed(b[55:48]) ? a[55:48] : b[55:48]) : 64'hCCCCCCCCCCCCCCCC;
+ o[63:56] = BIG ? ($signed(a[63:56]) < $signed(b[63:56]) ? a[63:56] : b[63:56]) : 64'hCCCCCCCCCCCCCCCC;
+ end
+ 3'd1,3'd5:
+ begin
+ o[15:0] = BIG ? ($signed(a[15:0]) < $signed(b[15:0]) ? a[15:0] : b[15:0]) : 64'hCCCCCCCCCCCCCCCC;
+ o[32:16] = BIG ? ($signed(a[32:16]) < $signed(b[32:16]) ? a[32:16] : b[32:16]) : 64'hCCCCCCCCCCCCCCCC;
+ o[47:32] = BIG ? ($signed(a[47:32]) < $signed(b[47:32]) ? a[47:32] : b[47:32]) : 64'hCCCCCCCCCCCCCCCC;
+ o[63:48] = BIG ? ($signed(a[63:48]) < $signed(b[63:48]) ? a[63:48] : b[63:48]) : 64'hCCCCCCCCCCCCCCCC;
+ end
+ 3'd2,3'd6:
+ begin
+ o[31:0] = BIG ? ($signed(a[31:0]) < $signed(b[31:0]) ? a[31:0] : b[31:0]) : 64'hCCCCCCCCCCCCCCCC;
+ o[63:32] = BIG ? ($signed(a[63:32]) < $signed(b[63:32]) ? a[63:32] : b[63:32]) : 64'hCCCCCCCCCCCCCCCC;
+ end
+ 3'd3,3'd7:
+ begin
+ o[63:0] = BIG ? ($signed(a) < $signed(b) ? a : b) : 64'hCCCCCCCCCCCCCCCC;
+ end
+ endcase
+`else
+ o[63:0] = BIG ? ($signed(a) < $signed(b) ? a : b) : 64'hCCCCCCCCCCCCCCCC;
+`endif
+ `MAX:
+`ifdef SIMD
+ case(sz)
+ 3'd0,3'd4:
+ begin
+ o[7:0] = BIG ? ($signed(a[7:0]) > $signed(b[7:0]) ? a[7:0] : b[7:0]) : 64'hCCCCCCCCCCCCCCCC;
+ o[15:8] = BIG ? ($signed(a[15:8]) > $signed(b[15:8]) ? a[15:8] : b[15:8]) : 64'hCCCCCCCCCCCCCCCC;
+ o[23:16] = BIG ? ($signed(a[23:16]) > $signed(b[23:16]) ? a[23:16] : b[23:16]) : 64'hCCCCCCCCCCCCCCCC;
+ o[31:24] = BIG ? ($signed(a[31:24]) > $signed(b[31:24]) ? a[31:24] : b[31:24]) : 64'hCCCCCCCCCCCCCCCC;
+ o[39:32] = BIG ? ($signed(a[39:32]) > $signed(b[39:32]) ? a[39:32] : b[39:32]) : 64'hCCCCCCCCCCCCCCCC;
+ o[47:40] = BIG ? ($signed(a[47:40]) > $signed(b[47:40]) ? a[47:40] : b[47:40]) : 64'hCCCCCCCCCCCCCCCC;
+ o[55:48] = BIG ? ($signed(a[55:48]) > $signed(b[55:48]) ? a[55:48] : b[55:48]) : 64'hCCCCCCCCCCCCCCCC;
+ o[63:56] = BIG ? ($signed(a[63:56]) > $signed(b[63:56]) ? a[63:56] : b[63:56]) : 64'hCCCCCCCCCCCCCCCC;
+ end
+ 3'd1,3'd5:
+ begin
+ o[15:0] = BIG ? ($signed(a[15:0]) > $signed(b[15:0]) ? a[15:0] : b[15:0]) : 64'hCCCCCCCCCCCCCCCC;
+ o[32:16] = BIG ? ($signed(a[32:16]) > $signed(b[32:16]) ? a[32:16] : b[32:16]) : 64'hCCCCCCCCCCCCCCCC;
+ o[47:32] = BIG ? ($signed(a[47:32]) > $signed(b[47:32]) ? a[47:32] : b[47:32]) : 64'hCCCCCCCCCCCCCCCC;
+ o[63:48] = BIG ? ($signed(a[63:48]) > $signed(b[63:48]) ? a[63:48] : b[63:48]) : 64'hCCCCCCCCCCCCCCCC;
+ end
+ 3'd2,3'd6:
+ begin
+ o[31:0] = BIG ? ($signed(a[31:0]) > $signed(b[31:0]) ? a[31:0] : b[31:0]) : 64'hCCCCCCCCCCCCCCCC;
+ o[63:32] = BIG ? ($signed(a[63:32]) > $signed(b[63:32]) ? a[63:32] : b[63:32]) : 64'hCCCCCCCCCCCCCCCC;
+ end
+ 3'd3,3'd7:
+ begin
+ o[63:0] = BIG ? ($signed(a) > $signed(b) ? a : b) : 64'hCCCCCCCCCCCCCCCC;
+ end
+ endcase
+`else
+ o[63:0] = BIG ? ($signed(a) > $signed(b) ? a : b) : 64'hCCCCCCCCCCCCCCCC;
+`endif
+ `MAJ: o = (a & b) | (a & c) | (b & c);
+ `CHK: o[63:0] = (a >= b && a < c);
+ /*
+ `RTOP: case(c[5:0])
+ `RTADD: o = a + b;
+ `RTSUB: o = a - b;
+ `RTAND: o = and64;
+ `RTOR: o = or64;
+ `RTXOR: o = xor64;
+ `RTNAND: o = ~and64;
+ `RTNOR: o = ~or64;
+ `RTXNOR: o = ~xor64;
+ `RTSLT: o = as < bs;
+ `RTSGE: o = as >= bs;
+ `RTSLE: o = as <= bs;
+ `RTSGT: o = as > bs;
+ `RTSEQ: o = as==bs;
+ `RTSNE: o = as!=bs;
+ endcase
+ */
+ `TLB: o = BIG ? tlbo : 64'hDEADDEADDEADDEAD;
+ default: o[63:0] = 64'hDEADDEADDEADDEAD;
+ endcase
+`MEMNDX:
+ if (instr[7:6]==2'b10) begin
+ if (instr[31])
+ case({instr[31:28],instr[17:16]})
+ `PUSH:
+ begin
+ usa = a - 4'd8;
+ o = {pb[50:0],13'd0} + usa;
+ end
+ default: o = 64'hDEADDEADDEADDEAD;
+ endcase
+ else
+ o = 64'hDEADDEADDEADDEAD;
+ end
+ else if (instr[7:6]==2'b00) begin
+ if (!instr[31])
+ case({instr[31:28],instr[22:21]})
+ `CACHEX,`LVX,
+ `LBX,`LBUX,`LCX,`LCUX,
+ `LVBX,`LVBUX,`LVCX,`LVCUX,`LVHX,`LVHUX,`LVWX,
+ `LHX,`LHUX,`LWX,`LWRX:
+ if (BIG) begin
+ usa = a + (c << instr[19:18]);
+ o = {pb[50:0],13'd0} + usa;
+ end
+ else
+ o = 64'hCCCCCCCCEEEEEEEE;
+ `LVX,`SVX:
+ if (BIG) begin
+ usa = a + (c << 2'd3);
+ o = {pb[50:0],13'd0} + usa;
+ end
+ else
+ o = 64'hCCCCCCCCCCCCCCCC;
+ `LVWS,`SVWS:
+ if (BIG) begin
+ usa = a + ({c * ven,3'b000});
+ o = {pb[50:0],13'd0} + usa;
+ end
+ else
+ o = 64'hCCCCCCCCCCCCCCCC;
+ default: o = 64'hDEADDEADDEADDEAD;
+ endcase
+ else
+ case({instr[31:28],instr[17:16]})
+ `PUSH:
+ begin
+ usa = a - 4'd8;
+ o = {pb[50:0],13'd0} + usa;
+ end
+ `SBX,`SCX,`SHX,`SWX,`SWCX:
+ if (BIG) begin
+ usa = a + (c << instr[14:13]);
+ o = {pb[50:0],13'd0} + usa;
+ end
+ else
+ o = 64'hCCCCCCCCEEEEEEEE;
+ `SVX: if (BIG) begin
+ usa = a + (c << 2'd3);
+ o = {pb[50:0],13'd0} + usa;
+ end
+ else
+ o = 64'hCCCCCCCCCCCCCCCC;
+ `SVWS:
+ if (BIG) begin
+ usa = a + ({c * ven,3'b000});
+ o = {pb[50:0],13'd0} + usa;
+ end
+ else
+ o = 64'hCCCCCCCCCCCCCCCC;
+ default: o = 64'hDEADDEADDEADDEAD;
+ endcase
+ end
+ else
+ o[63:0] = 64'hDEADDEADDEADDEAD;
+`AUIPC:
+ begin
+ if (instr[7:6]==2'b01)
+ o[63:0] = pc + {instr[47:18],instr[12:8],30'd0};
+ else
+ o[63:0] = pc + {{15{instr[31]}},instr[31:18],instr[12:8],30'd0};
+ o[29:0] = 30'd0;
+// o[63:44] = PTR;
+ end
+`LUI:
+ begin
+ if (instr[7:6]==2'b01)
+ o = {instr[47:18],instr[12:8],30'd0};
+ else
+ o = {{15{instr[31]}},instr[31:18],instr[12:8],30'd0};
+ end
+`ADDI: o = a + b;
+`SLTI: o = $signed(a) < $signed(b);
+`SLTUI: o = a < b;
+`SGTI: o = $signed(a) > $signed(b);
+`SGTUI: o = a > b;
+`ANDI: o = a & andb;
+`ORI: o = a | orb;
+`XORI: o = a ^ orb;
+`XNORI: o = ~(a ^ orb);
+`MULUI: o = prod[DBW-1:0];
+`MULI: o = prod[DBW-1:0];
+`MULFI: o = a[23:0] * b[15:0];
+`DIVUI: o = BIG ? divq : 64'hCCCCCCCCCCCCCCCC;
+`DIVI: o = BIG ? divq : 64'hCCCCCCCCCCCCCCCC;
+`MODI: o = BIG ? rem : 64'hCCCCCCCCCCCCCCCC;
+`LB,`LBU,`SB:
+ begin
+ usa = a + b;
+ o = {pb[50:0],13'd0} + usa;
+ end
+`Lx,`LxU,`Sx,`LVx,`LVxU:
+ begin
+ casez(b[2:0])
+ 3'b100:
+ begin
+ usa = a + {b[63:3],3'b0}; // LW / SW
+ o = {pb[50:0],13'd0} + usa;
+ end
+ 3'b?10:
+ begin
+ usa = a + {b[63:2],2'b0}; // LH / LHU / SH
+ o = {pb[50:0],13'd0} + usa;
+ end
+ default:
+ begin
+ usa = a + {b[63:1],1'b0}; // LC / LCU / SC
+ o = {pb[50:0],13'd0} + usa;
+ end
+ endcase
+ end
+`LWR,`SWC,`CAS,`CACHE:
+ begin
+ usa = a + b;
+ o = {pb[50:0],13'd0} + usa;
+ end
+`LV,`SV:
+ begin
+ usa = a + b + {ven,3'b0};
+ o = {pb[50:0],13'd0} + usa;
+ end
+`CSRRW:
+ case(instr[27:18])
+ 10'h044: o = BIG ? (csr | {39'd0,thrd,24'h0}) : 64'hDDDDDDDDDDDDDDDD;
+ default: o = BIG ? csr : 64'hDDDDDDDDDDDDDDDD;
+ endcase
+`BITFIELD: o = BIG ? bfout : 64'hCCCCCCCCCCCCCCCC;
+default: o = 64'hDEADDEADDEADDEAD;
+endcase
+end
+
+always @(posedge clk)
+if (rst)
+ adrDone <= TRUE;
+else begin
+ if (ld)
+ adrDone <= FALSE;
+ else if (mem|shift)
+ adrDone <= TRUE;
+end
+
+always @(posedge clk)
+if (rst)
+ adrIdle <= TRUE;
+else begin
+ if (ld)
+ adrIdle <= FALSE;
+ else if (mem|shift)
+ adrIdle <= TRUE;
+end
+
+always @(posedge clk)
+case(instr[`INSTRUCTION_OP])
+`R2:
+ if (instr[`INSTRUCTION_L2]==2'b01)
+ case(instr[47:42])
+ `ADD,`SUB,
+ `AND,`OR,`XOR,`NAND,`NOR,`XNOR,
+ `SHIFTR:
+ case(instr[41:36])
+ `R1:
+ case(instr[22:18])
+ `COM: addro[63:0] = ~shift8;
+ `NOT: addro[63:0] = ~|shift8;
+ `NEG: addro[63:0] = -shift8;
+ default: addro[63:0] = 64'hDCDCDCDCDCDCDCDC;
+ endcase
+ `ADD: addro[63:0] = shift8 + c;
+ `SUB: addro[63:0] = shift8 - c;
+ `AND: addro[63:0] = shift8 & c;
+ `OR: addro[63:0] = shift8 | c;
+ `XOR: addro[63:0] = shift8 ^ c;
+ default: addro[63:0] = 64'hDCDCDCDCDCDCDCDC;
+ endcase
+ default: addro[63:0] = 64'hDCDCDCDCDCDCDCDC;
+ endcase
+ else
+ addro = 64'hCCCCCCCCCCCCCCCE;
+default: addro = 64'hCCCCCCCCCCCCCCCE;
+endcase
+
+reg sao_done, sao_idle;
+always @(posedge clk)
+if (rst) begin
+ sao_done <= 1'b1;
+ sao_idle <= 1'b1;
+end
+else begin
+if (ld & IsShiftAndOp(instr) & BIG) begin
+ sao_done <= 1'b0;
+ sao_idle <= 1'b0;
+end
+else begin
+ if (IsShiftAndOp(instr) & BIG) begin
+ sao_done <= 1'b1;
+ sao_idle <= 1'b1;
+ end
+end
+end
+
+// Generate done signal
+always @*
+if (rst)
+ done <= TRUE;
+else begin
+ if (IsMul(instr)) begin
+ case(sz)
+ byt,byt_para: done <= mult_done80;
+ char,char_para: done <= mult_done160;
+ half,half_para: done <= mult_done320;
+ default: done <= mult_done;
+ endcase
+ end
+ else if (IsDivmod(instr) & BIG)
+ done <= div_done;
+ else if (IsShiftAndOp(instr) & BIG)
+ done <= sao_done;
+ else if (shift)
+ done <= adrDone;
+ else if (tlb & BIG)
+ done <= tlb_done;
+ else
+ done <= TRUE;
+end
+
+// Generate idle signal
+always @*
+if (rst)
+ idle <= TRUE;
+else begin
+ if (IsMul(instr)) begin
+ case(sz)
+ byt,byt_para: idle <= mult_idle80;
+ char,char_para: idle <= mult_idle160;
+ half,half_para: idle <= mult_idle320;
+ default: idle <= mult_idle;
+ endcase
+ end
+ else if (IsDivmod(instr) & BIG)
+ idle <= div_idle;
+ else if (IsShiftAndOp(instr) & BIG)
+ idle <= sao_idle;
+ else if (shift)
+ idle <= adrIdle;
+ else if (tlb & BIG)
+ idle <= tlb_idle;
+ else
+ idle <= TRUE;
+end
+
+function fnOverflow;
+input op; // 0 = add, 1=sub
+input a;
+input b;
+input s;
+fnOverflow = (op ^ s ^ b) & (~op ^ a ^ b);
+endfunction
+
+always @*
+begin
+//if ((tgt[4:0]==5'd31 || tgt[4:0]==5'd30) && (o[ABW-1:0] < {sbl[50:13],13'd0} || o[ABW-1:0] > {pl[50:0],13'h1FFF}))
+// exc <= `FLT_STK;
+//else
+case(instr[`INSTRUCTION_OP])
+`R2:
+ case(instr[`INSTRUCTION_S2])
+ `ADD: exc <= (fnOverflow(0,a[63],b[63],o[63]) & excen[0] & instr[24]) ? `FLT_OFL : `FLT_NONE;
+ `SUB: exc <= (fnOverflow(1,a[63],b[63],o[63]) & excen[1] & instr[24]) ? `FLT_OFL : `FLT_NONE;
+// `ASL,`ASLI: exc <= (BIG & aslo & excen[2]) ? `FLT_OFL : `FLT_NONE;
+ `MUL,`MULSU: exc <= prod[63] ? (prod[127:64] != 64'hFFFFFFFFFFFFFFFF && excen[3] ? `FLT_OFL : `FLT_NONE ):
+ (prod[127:64] != 64'd0 && excen[3] ? `FLT_OFL : `FLT_NONE);
+ `FXMUL: exc <= prod[95] ? (prod[127:96] != 32'hFFFFFFFF && excen[3] ? `FLT_OFL : `FLT_NONE ):
+ (prod[127:96] != 32'd0 && excen[3] ? `FLT_OFL : `FLT_NONE);
+ `MULU: exc <= prod[127:64] != 64'd0 && excen[3] ? `FLT_OFL : `FLT_NONE;
+ `DIV,`DIVSU,`DIVU: exc <= BIG && excen[4] & divByZero ? `FLT_DBZ : `FLT_NONE;
+ `MOD,`MODSU,`MODU: exc <= BIG && excen[4] & divByZero ? `FLT_DBZ : `FLT_NONE;
+ default: exc <= `FLT_NONE;
+ endcase
+`MULI: exc <= prod[63] ? (prod[127:64] != 64'hFFFFFFFFFFFFFFFF & excen[3] ? `FLT_OFL : `FLT_NONE):
+ (prod[127:64] != 64'd0 & excen[3] ? `FLT_OFL : `FLT_NONE);
+`DIVI: exc <= BIG & excen[4] & divByZero & instr[27] ? `FLT_DBZ : `FLT_NONE;
+`MODI: exc <= BIG & excen[4] & divByZero & instr[27] ? `FLT_DBZ : `FLT_NONE;
+`CSRRW: exc <= (instr[27:21]==7'b0011011) ? `FLT_SEG : `FLT_NONE;
+`MEMNDX:
+ begin
+`ifdef SUPPORT_SEGMENTATION
+ if (usa < {lb[50:0],13'h0000} && usa > {ub[50:0],13'h1fff} && dl!=2'b00)
+ exc <= (Ra[4:0]==5'd30 || Ra[4:0]==5'd31) ? `FLT_STK : `FLT_SGB;
+ else
+`endif
+`ifdef SUPPORT_BBMS
+ if ((Ra[4:0]==5'd30 || Ra[4:0]==5'd31) && (usa < {sbl[50:0],13'd0} || usa > {sbu[50:0],13'h1FF8}) && dl!=2'b00)
+ exc <= `FLT_STK;
+ else if (usa > {sbu[50:0],13'h1FFF} && dl!=2'b00)
+ exc <= `FLT_SGB;
+ else if (usa < {sbl[50:0],13'h0000} && usa > {dbu[50:0],13'h1fff} && dl!=2'b00)
+ exc <= `FLT_SGB;
+ else if (usa > {en[50:0],13'h1fff} && usa < {dbl[50:0],13'd0} && dl!=2'b00)
+ exc <= `FLT_SGB;
+ else if (usa < {ro[50:0],13'd0} && store && dl!=2'b00)
+ exc <= `FLT_WRV;
+ else
+`endif
+ begin
+ if (instr[7:6]==2'b10) begin
+ if (instr[31])
+ case({instr[31:28],instr[17:16]})
+ `PUSH: exc <= |o[2:0] ? `FLT_ALN : `FLT_NONE;
+ default: exc <= `FLT_UNIMP;
+ endcase
+ else
+ exc <= `FLT_UNIMP;
+ end
+ else if (instr[7:6]==2'b00) begin
+ if (!instr[31]) begin
+ if (BIG) begin
+ case({instr[31:28],instr[22:21]})
+ `LBX,`LBUX,`LVBX,`LVBUX: exc <= `FLT_NONE;
+ `LCX,`LCUX,`LVCX,`LVCUX: exc <= |o[ 0] ? `FLT_ALN : `FLT_NONE;
+ `LVHX,`LVHUX,`LHX,`LHUX: exc <= |o[1:0] ? `FLT_ALN : `FLT_NONE;
+ `LWX,`LVWX,`LWRX,
+ `CACHEX,`LVX: exc <= |o[2:0] ? `FLT_ALN : `FLT_NONE;
+ `LVX,`SVX,`LVWS,`SVWS: exc <= |o[2:0] ? `FLT_ALN : `FLT_NONE;
+ default: exc <= `FLT_UNIMP;
+ endcase
+ end
+ else
+ exc <= `FLT_UNIMP;
+ end
+ else begin
+ if (BIG) begin
+ case({instr[31:28],instr[17:16]})
+ `PUSH: exc <= |o[2:0] ? `FLT_ALN : `FLT_NONE;
+ `SBX: exc <= `FLT_NONE;
+ `SCX: exc <= |o[ 0] ? `FLT_ALN : `FLT_NONE;
+ `SHX: exc <= |o[1:0] ? `FLT_ALN : `FLT_NONE;
+ `SWX,`SWCX: exc <= |o[2:0] ? `FLT_ALN : `FLT_NONE;
+ `SVX: exc <= |o[2:0] ? `FLT_ALN : `FLT_NONE;
+ `SVWS: exc <= |o[2:0] ? `FLT_ALN : `FLT_NONE;
+ default: exc <= `FLT_UNIMP;
+ endcase
+ end
+ else
+ exc <= `FLT_UNIMP;
+ end
+ end
+ else
+ exc <= `FLT_UNIMP;
+ end
+ end
+`ifdef SUPPORT_SEGMENTATION
+`LB,`LBU,`SB:
+ if (usa < {lb[50:0],13'h0000} && usa > {ub[50:0],13'h1fff} && dl!=2'b00)
+ exc <= (Ra[4:0]==5'd30 || Ra[4:0]==5'd31) ? `FLT_STK : `FLT_SGB;
+`endif
+`ifdef SUPPORT_BBMS
+`LB,`LBU,`SB:
+ if ((Ra[4:0]==5'd30 || Ra[4:0]==5'd31) && (usa < {sbl[50:0],13'd0} || usa > {sbu[50:0],13'h1FF8}) && dl!=2'b00)
+ exc <= `FLT_STK;
+ else if (usa > {sbu[50:0],13'h1FFF} && dl!=2'b00)
+ exc <= `FLT_SGB;
+ else if (usa < {sbl[50:0],13'h0000} && usa > {dbu[50:0],13'h1fff} && dl!=2'b00)
+ exc <= `FLT_SGB;
+ else if (usa > {en[50:0],13'h1fff} && usa < {dbl[50:0],13'd0} && dl!=2'b00)
+ exc <= `FLT_SGB;
+ else if (usa < {ro[50:0],13'd0} && store && dl!=2'b00)
+ exc <= `FLT_WRV;
+`endif
+`Lx,`Sx,`LxU,`LVx,`LVxU:
+ begin
+`ifdef SUPPORT_SEGMENTATION
+ if (usa < {lb[50:0],13'h0000} && usa > {ub[50:0],13'h1fff} && dl!=2'b00)
+ exc <= (Ra[4:0]==5'd30 || Ra[4:0]==5'd31) ? `FLT_STK : `FLT_SGB;
+ else
+`endif
+`ifdef SUPPORT_BBMS
+ if ((Ra[4:0]==5'd30 || Ra[4:0]==5'd31) && (usa < {sbl[50:0],13'd0} || usa > {sbu[50:0],13'h1FF8}) && dl!=2'b00)
+ exc <= `FLT_STK;
+ else if (usa > {sbu[50:0],13'h1FFF} && dl!=2'b00)
+ exc <= `FLT_SGB;
+ else if (usa < {sbl[50:0],13'h0000} && usa > {dbu[50:0],13'h1fff} && dl!=2'b00)
+ exc <= `FLT_SGB;
+ else if (usa > {en[50:0],13'h1fff} && usa < {dbl[50:0],13'd0} && dl!=2'b00)
+ exc <= `FLT_SGB;
+ else if (usa < {ro[50:0],13'd0} && store && dl!=2'b00)
+ exc <= `FLT_WRV;
+ else
+`endif
+ casez(b[2:0])
+ 3'b100: exc <= |o[2:0] ? `FLT_ALN : `FLT_NONE; // LW / SW
+ 3'b?10: exc <= |o[1:0] ? `FLT_ALN : `FLT_NONE; // LH / LHU / SH
+ default: exc <= |o[ 0] ? `FLT_ALN : `FLT_NONE; // LC / LCU / SC
+ endcase
+ end
+`LWR,`SWC,`CAS,`CACHE:
+ begin
+`ifdef SUPPORT_SEGMENTATION
+ if (usa < {lb[50:0],13'h0000} && usa > {ub[50:0],13'h1fff} && dl!=2'b00)
+ exc <= (Ra[4:0]==5'd30 || Ra[4:0]==5'd31) ? `FLT_STK : `FLT_SGB;
+ else
+`endif
+`ifdef SUPPORT_BBMS
+ if ((Ra[4:0]==5'd30 || Ra[4:0]==5'd31) && (usa < {sbl[50:0],13'd0} || usa > {sbu[50:0],13'h1FF8}) && dl!=2'b00)
+ exc <= `FLT_STK;
+ else if (usa > {sbu[50:0],13'h1FFF} && dl!=2'b00)
+ exc <= `FLT_SGB;
+ else if (usa < {sbl[50:0],13'h0000} && usa > {dbu[50:0],13'h1fff} && dl!=2'b00)
+ exc <= `FLT_SGB;
+ else if (usa > {en[50:0],13'h1fff} && usa < {dbl[50:0],13'd0} && dl!=2'b00)
+ exc <= `FLT_SGB;
+ else if (usa < {ro[50:0],13'd0} && store && dl!=2'b00)
+ exc <= `FLT_WRV;
+ else
+`endif
+ exc <= |o[2:0] ? `FLT_ALN : `FLT_NONE;
+ end
+default: exc <= `FLT_NONE;
+endcase
+end
+
+reg [63:0] aa, bb;
+
+always @(posedge clk)
+begin
+ aa <= shfto;
+ bb <= c;
+end
+
+task tskSlt;
+input [47:0] instr;
+input [2:0] sz;
+input [63:0] a;
+input [63:0] b;
+output [63:0] o;
+begin
+`ifdef SIMD
+ case(sz[2:0])
+ 3'd0: o[63:0] = $signed(a[7:0]) < $signed(b[7:0]);
+ 3'd1: o[63:0] = $signed(a[15:0]) < $signed(b[15:0]);
+ 3'd2: o[63:0] = $signed(a[31:0]) < $signed(b[31:0]);
+ 3'd3: o[63:0] = $signed(a) < $signed(b);
+ 3'd4: o[63:0] = {
+ 7'h0,$signed(a[7:0]) < $signed(b[7:0]),
+ 7'h0,$signed(a[15:8]) < $signed(b[15:8]),
+ 7'h0,$signed(a[23:16]) < $signed(b[23:16]),
+ 7'h0,$signed(a[31:24]) < $signed(b[31:24]),
+ 7'h0,$signed(a[39:32]) < $signed(b[39:32]),
+ 7'h0,$signed(a[47:40]) < $signed(b[47:40]),
+ 7'h0,$signed(a[55:48]) < $signed(b[55:48]),
+ 7'h0,$signed(a[63:56]) < $signed(b[63:56])
+ };
+ 3'd5: o[63:0] = {
+ 15'h0,$signed(a[15:0]) < $signed(b[15:0]),
+ 15'h0,$signed(a[31:16]) < $signed(b[31:16]),
+ 15'h0,$signed(a[47:32]) < $signed(b[47:32]),
+ 15'h0,$signed(a[63:48]) < $signed(b[63:48])
+ };
+ 3'd6: o[63:0] = {
+ 31'h0,$signed(a[31:0]) < $signed(b[31:0]),
+ 31'h0,$signed(a[63:32]) < $signed(b[63:32])
+ };
+ 3'd7: o[63:0] = $signed(a[63:0]) < $signed(b[63:0]);
+ endcase
+`else
+ o[63:0] = $signed(a[63:0]) < $signed(b[63:0]);
+`endif
+end
+endtask
+
+task tskSle;
+input [47:0] instr;
+input [2:0] sz;
+input [63:0] a;
+input [63:0] b;
+output [63:0] o;
+begin
+`ifdef SIMD
+ case(sz[2:0])
+ 3'd0: o[63:0] = $signed(a[7:0]) <= $signed(b[7:0]);
+ 3'd1: o[63:0] = $signed(a[15:0]) <= $signed(b[15:0]);
+ 3'd2: o[63:0] = $signed(a[31:0]) <= $signed(b[31:0]);
+ 3'd3: o[63:0] = $signed(a) <= $signed(b);
+ 3'd4: o[63:0] = {
+ 7'h0,$signed(a[7:0]) <= $signed(b[7:0]),
+ 7'h0,$signed(a[15:8]) <= $signed(b[15:8]),
+ 7'h0,$signed(a[23:16]) <= $signed(b[23:16]),
+ 7'h0,$signed(a[31:24]) <= $signed(b[31:24]),
+ 7'h0,$signed(a[39:32]) <= $signed(b[39:32]),
+ 7'h0,$signed(a[47:40]) <= $signed(b[47:40]),
+ 7'h0,$signed(a[55:48]) <= $signed(b[55:48]),
+ 7'h0,$signed(a[63:56]) <= $signed(b[63:56])
+ };
+ 3'd5: o[63:0] = {
+ 15'h0,$signed(a[15:0]) <= $signed(b[15:0]),
+ 15'h0,$signed(a[31:16]) <= $signed(b[31:16]),
+ 15'h0,$signed(a[47:32]) <= $signed(b[47:32]),
+ 15'h0,$signed(a[63:48]) <= $signed(b[63:48])
+ };
+ 3'd6: o[63:0] = {
+ 31'h0,$signed(a[31:0]) <= $signed(b[31:0]),
+ 31'h0,$signed(a[63:32]) <= $signed(b[63:32])
+ };
+ 3'd7: o[63:0] = $signed(a[63:0]) <= $signed(b[63:0]);
+ endcase
+`else
+ o[63:0] = $signed(a[63:0]) <= $signed(b[63:0]);
+`endif
+end
+endtask
+
+task tskSltu;
+input [47:0] instr;
+input [2:0] sz;
+input [63:0] a;
+input [63:0] b;
+output [63:0] o;
+begin
+`ifdef SIMD
+ case(sz[2:0])
+ 3'd4,3'd0: o = {
+ 7'h0,(a[7:0]) < (b[7:0]),
+ 7'h0,(a[15:8]) < (b[15:8]),
+ 7'h0,(a[23:16]) < (b[23:16]),
+ 7'h0,(a[31:24]) < (b[31:24]),
+ 7'h0,(a[39:32]) < (b[39:32]),
+ 7'h0,(a[47:40]) < (b[47:40]),
+ 7'h0,(a[55:48]) < (b[55:48]),
+ 7'h0,(a[63:56]) < (b[63:56])
+ };
+ 3'd5,3'd1: o = {
+ 15'h0,(a[15:0]) < (b[15:0]),
+ 15'h0,(a[31:16]) < (b[31:16]),
+ 15'h0,(a[47:32]) < (b[47:32]),
+ 15'h0,(a[63:48]) < (b[63:48])
+ };
+ 3'd6,3'd2: o = {
+ 31'h0,(a[31:0]) < (b[31:0]),
+ 31'h0,(a[63:32]) < (b[63:32])
+ };
+ 3'd7,3'd3: o = (a[63:0]) < (b[63:0]);
+ endcase
+`else
+ o = (a) < (b);
+`endif
+end
+endtask
+
+task tskSleu;
+input [47:0] instr;
+input [2:0] sz;
+input [63:0] a;
+input [63:0] b;
+output [63:0] o;
+begin
+`ifdef SIMD
+ case(sz[2:0])
+ 3'd0: o[63:0] = (a[7:0]) <= (b[7:0]);
+ 3'd1: o[63:0] = (a[15:0]) <= (b[15:0]);
+ 3'd2: o[63:0] = (a[31:0]) <= (b[31:0]);
+ 3'd3: o[63:0] = (a) <= (b);
+ 3'd4: o[63:0] = {
+ 7'h0,(a[7:0]) <= (b[7:0]),
+ 7'h0,(a[15:8]) <= (b[15:8]),
+ 7'h0,(a[23:16]) <= (b[23:16]),
+ 7'h0,(a[31:24]) <= (b[31:24]),
+ 7'h0,(a[39:32]) <= (b[39:32]),
+ 7'h0,(a[47:40]) <= (b[47:40]),
+ 7'h0,(a[55:48]) <= (b[55:48]),
+ 7'h0,(a[63:56]) <= (b[63:56])
+ };
+ 3'd5: o[63:0] = {
+ 15'h0,(a[15:0]) <= (b[15:0]),
+ 15'h0,(a[31:16]) <= (b[31:16]),
+ 15'h0,(a[47:32]) <= (b[47:32]),
+ 15'h0,(a[63:48]) <= (b[63:48])
+ };
+ 3'd6: o[63:0] = {
+ 31'h0,(a[31:0]) <= (b[31:0]),
+ 31'h0,(a[63:32]) <= (b[63:32])
+ };
+ 3'd7: o[63:0] = (a[63:0]) <= (b[63:0]);
+ endcase
+`else
+ o[63:0] = (a[63:0]) <= (b[63:0]);
+`endif
+end
+endtask
+
+endmodule
Index: FT64v7/rtl/common/FT64_bitfield.v
===================================================================
--- FT64v7/rtl/common/FT64_bitfield.v (nonexistent)
+++ FT64v7/rtl/common/FT64_bitfield.v (revision 60)
@@ -0,0 +1,110 @@
+`timescale 1ns / 1ps
+// ============================================================================
+// __
+// \\__/ o\ (C) 2016-2018 Robert Finch, Waterloo
+// \ __ / All rights reserved.
+// \/_// robfinch@finitron.ca
+// ||
+//
+// FT64_bitfield.v
+//
+//
+// This source file is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published
+// by the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This source file is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+//
+//
+// ============================================================================
+//
+`ifndef BFSET
+`define BFSET 4'd0
+`define BFCLR 4'd1
+`define BFCHG 4'd2
+`define BFINS 4'd3
+`define BFINSI 4'd4
+`define BFEXT 4'd5
+`define BFEXTU 4'd6
+`define BFFFO 4'd8
+`endif
+
+module FT64_bitfield(inst, a, b, c, o, masko);
+parameter DWIDTH=64;
+input [47:0] inst;
+input [DWIDTH-1:0] a;
+input [DWIDTH-1:0] b;
+input [DWIDTH-1:0] c;
+output [DWIDTH-1:0] o;
+reg [DWIDTH-1:0] o;
+output [DWIDTH-1:0] masko;
+
+reg [DWIDTH-1:0] o1;
+reg [DWIDTH-1:0] o2;
+wire [6:0] ffoo;
+
+// generate mask
+reg [DWIDTH-1:0] mask;
+assign masko = mask;
+wire [3:0] op = inst[31:28];
+wire [5:0] mb = inst[30] ? a[5:0] : {inst[28],inst[12:8]};
+wire [5:0] mw = inst[31] ? b[5:0] : {inst[29],inst[17:13]};
+wire [63:0] da = inst[32] ? c : {inst[43:33],inst[22:18]};
+wire [5:0] me = mb + mw;
+wire [5:0] ml = mw; // mask length-1
+wire [63:0] imm = {59'd0,inst[10:6]};
+
+integer nn,n;
+always @(mb or me or nn)
+ for (nn = 0; nn < DWIDTH; nn = nn + 1)
+ mask[nn] <= (nn >= mb) ^ (nn <= me) ^ (me >= mb);
+
+ffo96 u1 ({32'h0,o1},ffoo);
+
+always @(op,mask,b,a,da,imm,mb,ml)
+case (op)
+// ToDo: Fix bitfield inserts
+`BFINS: begin
+ o2 = a << mb;
+ for (n = 0; n < DWIDTH; n = n + 1) o[n] = (mask[n] ? o2[n] : b[n]);
+ end
+`BFINSI: begin
+ o2 = imm << mb;
+ for (n = 0; n < DWIDTH; n = n + 1) o[n] = (mask[n] ? o2[n] : b[n]);
+ end
+`BFSET: begin for (n = 0; n < DWIDTH; n = n + 1) o[n] = mask[n] ? 1'b1 : da[n]; end
+`BFCLR: begin for (n = 0; n < DWIDTH; n = n + 1) o[n] = mask[n] ? 1'b0 : da[n]; end
+`BFCHG: begin for (n = 0; n < DWIDTH; n = n + 1) o[n] = mask[n] ? ~da[n] : da[n]; end
+`BFEXTU: begin
+ for (n = 0; n < DWIDTH; n = n + 1)
+ o1[n] = mask[n] ? da[n] : 1'b0;
+ o = o1 >> mb;
+ end
+`BFEXT: begin
+ for (n = 0; n < DWIDTH; n = n + 1)
+ o1[n] = mask[n] ? da[n] : 1'b0;
+ o2 = o1 >> mb;
+ for (n = 0; n < DWIDTH; n = n + 1)
+ o[n] = n > ml ? o2[ml] : o2[n];
+ end
+`BFFFO:
+ begin
+ for (n = 0; n < DWIDTH; n = n + 1)
+ o1[n] = mask[n] ? da[n] : 1'b0;
+ o = (ffoo==7'd127) ? -64'd1 : ffoo; // ffoo returns 127 if no one was found
+ end
+`ifdef I_SEXT
+`SEXT: begin for (n = 0; n < DWIDTH; n = n + 1) o[n] = mask[n] ? da[mb] : da[n]; end
+`endif
+default: o = {DWIDTH{1'b0}};
+endcase
+
+endmodule
+
Index: FT64v7/rtl/common/FT64_config.vh
===================================================================
--- FT64v7/rtl/common/FT64_config.vh (nonexistent)
+++ FT64v7/rtl/common/FT64_config.vh (revision 60)
@@ -0,0 +1,119 @@
+// ============================================================================
+// __
+// \\__/ o\ (C) 2017-2018 Robert Finch, Waterloo
+// \ __ / All rights reserved.
+// \/_// robfinch@finitron.ca
+// ||
+//
+// FT64_config.vh
+//
+// This source file is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published
+// by the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This source file is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+//
+// ============================================================================
+//
+// The following line is to enable simulation versions of some modules.
+// Comment out for synthesis.
+//`define SIM 1'b1
+
+//`define SUPPORT_SMT 1'b1
+//`define SUPPORT_VECTOR 1'b1
+//`define SUPPORT_DCI 1'b1 // dynamically compressed instructions
+//`define SUPPORT_BBMS 1'b1
+//`define SUPPORT_SEGMENTATION 1'b1
+//`define SUPPORT_PREDICATION 1'b1
+//`define DEBUG_LOGIC 1'b1
+`define L1_ICACHE_SIZE 2 // 2 or 4 for 2 or 4 kB
+
+// One way to tweak the size of the core a little bit is to limit the number
+// of address bits processed. The test system for instance has only 512MB of
+// memory, so the address size is limited to 32 bits.
+// ** The ASID is stored in the upper 8 bits of the address
+`define AMSB 63
+`define ABITS `AMSB:0
+
+
+// bitfield representing a queue entry index. The field must be large
+// enough to accomodate a queue entry number, determined by the number
+// of queue entries below.
+`define QBIT 4
+`define QBITS 3:0
+`define QBITSP1 4:0
+
+// The following bitfield spec is for the instruction sequence number. It
+// must have at least one more bit in it than the QBITS above as the counter
+// can overflow a little bit.
+`define SNBITS 4:0
+
+// If set greater than 10, then memory instructions won't
+// issue until they are within 10 of the head of the queue.
+`define QENTRIES 4
+
+// Bitfield for representing exception codes
+`define XBITS 7:0
+
+//`define SUPPORT_DBG 1'b1
+
+// Issue logic is not really required for every possible distance from
+// the head of the queue. Later queue entries tend to depend on prior
+// ones and hence may not be ready to be issued. Also note that
+// instruction decode takes a cycle making the last entry or two in the
+// queue not ready to be issued. Commenting out this line will limit
+// much of the issue logic to the first six queue slots relative to the
+// head of the queue.
+`define FULL_ISSUE_LOGIC 1'b1
+
+// The WAYS config define affects things like the number of ports on the
+// register file, the number of ports on the instruction cache, and how
+// many entries are contained in the fetch buffers. It also indirectly
+// affects how many instructions are queued.
+`define WAYS 1 // number of ways parallel (1-3 3 not working yet)
+`define NUM_IDU 1 // number of instruction decode units (1-3)
+`define NUM_ALU 1 // number of ALU's (1-2)
+`define NUM_MEM 1 // number of memory queues (1-3)
+`define NUM_FPU 0 // number of floating-point units (0-2)
+// Note that even with just a single commit bus, multiple instructions may
+// commit if they do not target any registers. Up to three instruction may
+// commit even with just a single bus.
+`define NUM_CMT 1 // number of commit busses (1-3)
+// Comment out the following to remove FCU enhancements (branch predictor, BTB, RSB)
+//`define FCU_ENH 1
+// Comment out the following to remove bypassing logic on the functional units
+//`define FU_BYPASS 1
+
+//`define SUPPORT_TLB 1
+
+// These are unit availability settings at reset.
+`define ID1_AVAIL 1'b1
+`define ID2_AVAIL 1'b1
+`define ID3_AVAIL 1'b0
+`define ALU0_AVAIL 1'b1
+`define ALU1_AVAIL 1'b1
+`define FPU1_AVAIL 1'b1
+`define FPU2_AVAIL 1'b0
+`define MEM1_AVAIL 1'b1
+`define MEM2_AVAIL 1'b1
+`define MEM3_AVAIL 1'b0
+`define FCU_AVAIL 1'b1
+
+// Comment out to remove the write buffer from the core.
+`define HAS_WB 1'b1
+`define WB_DEPTH 5 // must be one more than desired depth
+
+// Uncomment to allow SIMD operations
+//`define SIMD 1'b1
+
+// Comment the following to disable registering the output of instruction decoders.
+// Inline decoding should not be registered.
+//`define REGISTER_DECODE 1'b1
+`define INLINE_DECODE 1'b1
Index: FT64v7/rtl/common/FT64_dcache.v
===================================================================
--- FT64v7/rtl/common/FT64_dcache.v (nonexistent)
+++ FT64v7/rtl/common/FT64_dcache.v (revision 60)
@@ -0,0 +1,202 @@
+// ============================================================================
+// __
+// \\__/ o\ (C) 2018 Robert Finch, Waterloo
+// \ __ / All rights reserved.
+// \/_// robfinch@finitron.ca
+// ||
+//
+// FT64_dcache.v
+// - a simple direct mapped cache
+// - three cycle latency
+//
+//
+// This source file is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published
+// by the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This source file is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+//
+//
+// ============================================================================
+//
+module FT64_dcache(rst, dce, wclk, wr, sel, wadr, whit, i, li, rclk, rdsize, radr, o, lo, rhit);
+input rst;
+input dce; // data cache enable
+input wclk;
+input wr;
+input [7:0] sel;
+input [37:0] wadr;
+output whit;
+input [63:0] i;
+input [255:0] li; // line input
+input rclk;
+input [2:0] rdsize;
+input [37:0] radr;
+output reg [63:0] o;
+output reg [255:0] lo; // line out
+output reg rhit;
+parameter byt = 3'd0;
+parameter wyde = 3'd1;
+parameter tetra = 3'd2;
+parameter octa = 3'd3;
+
+wire [255:0] dc;
+wire [31:0] v;
+wire rhita;
+
+dcache_mem u1 (
+ .rst(rst),
+ .clka(wclk),
+ .ena(dce & wr),
+ .wea(sel),
+ .addra(wadr[13:0]),
+ .dina(i),
+ .clkb(rclk),
+ .enb(dce),
+ .addrb(radr[13:0]),
+ .doutb(dc),
+ .ov(v)
+);
+
+FT64_dcache_tag u3
+(
+ .wclk(wclk),
+ .dce(dce),
+ .wr(wr && wadr[4:3]==2'b11),
+ .wadr(wadr),
+ .rclk(rclk),
+ .radr(radr),
+ .whit(whit),
+ .rhit(rhita)
+);
+
+wire [7:0] va = v >> radr[4:0];
+always @(posedge rclk)
+begin
+case(rdsize)
+byt: rhit <= rhita & va[ 0];
+wyde: rhit <= rhita & &va[1:0];
+tetra: rhit <= rhita & &va[3:0];
+default:rhit <= rhita & &va[7:0];
+endcase
+end
+
+// hit is also delayed by a clock already
+always @(posedge rclk)
+ lo <= dc;
+always @(posedge rclk)
+ o <= dc >> {radr[4:3],6'b0};
+
+endmodule
+
+// -----------------------------------------------------------------------------
+// -----------------------------------------------------------------------------
+
+module dcache_mem(rst, clka, ena, wea, addra, dina, clkb, enb, addrb, doutb, ov);
+input rst;
+input clka;
+input ena;
+input [7:0] wea;
+input [13:0] addra;
+input [63:0] dina;
+input clkb;
+input enb;
+input [13:0] addrb;
+output reg [255:0] doutb;
+output reg [31:0] ov;
+
+reg [255:0] mem [0:511];
+reg [31:0] valid [0:511];
+reg [255:0] doutb1;
+reg [31:0] ov1;
+
+integer n;
+
+initial begin
+ for (n = 0; n < 512; n = n + 1)
+ valid[n] = 32'h00;
+end
+
+genvar g;
+generate begin
+for (g = 0; g < 4; g = g + 1)
+always @(posedge clka)
+begin
+ if (ena & wea[0] & addra[4:3]==g) mem[addra[13:5]][g*64+7:g*64] <= dina[7:0];
+ if (ena & wea[1] & addra[4:3]==g) mem[addra[13:5]][g*64+15:g*64+8] <= dina[15:8];
+ if (ena & wea[2] & addra[4:3]==g) mem[addra[13:5]][g*64+23:g*64+16] <= dina[23:16];
+ if (ena & wea[3] & addra[4:3]==g) mem[addra[13:5]][g*64+31:g*64+24] <= dina[31:24];
+ if (ena & wea[4] & addra[4:3]==g) mem[addra[13:5]][g*64+39:g*64+32] <= dina[39:32];
+ if (ena & wea[5] & addra[4:3]==g) mem[addra[13:5]][g*64+47:g*64+40] <= dina[47:40];
+ if (ena & wea[6] & addra[4:3]==g) mem[addra[13:5]][g*64+55:g*64+48] <= dina[55:48];
+ if (ena & wea[7] & addra[4:3]==g) mem[addra[13:5]][g*64+63:g*64+56] <= dina[63:56];
+ if (ena & wea[0] & addra[4:3]==g) valid[addra[13:5]][g*8] <= 1'b1;
+ if (ena & wea[1] & addra[4:3]==g) valid[addra[13:5]][g*8+1] <= 1'b1;
+ if (ena & wea[2] & addra[4:3]==g) valid[addra[13:5]][g*8+2] <= 1'b1;
+ if (ena & wea[3] & addra[4:3]==g) valid[addra[13:5]][g*8+3] <= 1'b1;
+ if (ena & wea[4] & addra[4:3]==g) valid[addra[13:5]][g*8+4] <= 1'b1;
+ if (ena & wea[5] & addra[4:3]==g) valid[addra[13:5]][g*8+5] <= 1'b1;
+ if (ena & wea[6] & addra[4:3]==g) valid[addra[13:5]][g*8+6] <= 1'b1;
+ if (ena & wea[7] & addra[4:3]==g) valid[addra[13:5]][g*8+7] <= 1'b1;
+end
+end
+endgenerate
+always @(posedge clkb)
+ if (enb)
+ doutb1 <= mem[addrb[13:5]];
+always @(posedge clkb)
+ if (enb)
+ doutb <= doutb1;
+always @(posedge clkb)
+ if (enb)
+ ov1 <= valid[addrb[13:5]];
+always @(posedge clkb)
+ if (enb)
+ ov <= ov1;
+endmodule
+
+// -----------------------------------------------------------------------------
+// -----------------------------------------------------------------------------
+
+module FT64_dcache_tag(wclk, dce, wr, wadr, rclk, radr, whit, rhit);
+input wclk;
+input dce; // data cache enable
+input wr;
+input [37:0] wadr;
+input rclk;
+input [37:0] radr;
+output reg whit; // write hit
+output reg rhit; // read hit
+
+wire [31:0] rtago;
+wire [31:0] wtago;
+
+FT64_dcache_tag2 u1 (
+ .clka(wclk),
+ .ena(dce),
+ .wea(wr),
+ .addra(wadr[13:5]),
+ .dina(wadr[37:14]),
+ .douta(wtago),
+ .clkb(rclk),
+ .web(1'b0),
+ .dinb(32'd0),
+ .enb(dce),
+ .addrb(radr[13:5]),
+ .doutb(rtago)
+);
+
+always @(posedge rclk)
+ rhit <= rtago[23:0]==radr[37:14];
+always @(posedge wclk)
+ whit <= wtago[23:0]==wadr[37:14];
+
+endmodule
+
Index: FT64v7/rtl/common/FT64_defines.vh
===================================================================
--- FT64v7/rtl/common/FT64_defines.vh (nonexistent)
+++ FT64v7/rtl/common/FT64_defines.vh (revision 60)
@@ -0,0 +1,555 @@
+// ============================================================================
+// __
+// \\__/ o\ (C) 2017-2018 Robert Finch, Waterloo
+// \ __ / All rights reserved.
+// \/_// robfinch@finitron.ca
+// ||
+//
+// FT64_defines.v
+//
+// This source file is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published
+// by the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This source file is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+//
+// ============================================================================
+//
+`define HIGH 1'b1
+`define LOW 1'b0
+`define TRUE 1'b1
+`define FALSE 1'b0
+//`define Q2VECTORS 1'b1
+
+`define ZERO 64'd0
+
+`define BRK 6'h00
+`define FVECTOR 6'h01
+`define VCMPRSS 6'h00
+`define VCIDX 6'h01
+`define VSCAN 6'h02
+`define VABS 6'h03
+`define VADD 6'h04
+`define VSUB 6'h05
+`define VSxx 6'h06
+`define VSEQ 3'd0
+`define VSNE 3'd1
+`define VSLT 3'd2
+`define VSGE 3'd3
+`define VSLE 3'd4
+`define VSGT 3'd5
+`define VSUN 3'd7
+`define VSxxS 6'h07
+`define VAND 6'h08
+`define VOR 6'h09
+`define VXOR 6'h0A
+`define VXCHG 6'h0B
+`define VSHL 6'h0C
+`define VSHR 6'h0D
+`define VASR 6'h0E
+`define VSxxSb 6'h0F
+`define VSHLV 6'h10
+`define VSHRV 6'h11
+`define VROLV 6'h12
+`define VRORV 6'h13
+`define VADDS 6'h14
+`define VSUBS 6'h15
+`define VSxxSU 6'h17
+`define VANDS 6'h18
+`define VORS 6'h19
+`define VXORS 6'h1A
+`define VSxxSUb 6'h1F
+`define VBITS2V 6'h20
+`define V2BITS 6'h21
+`define VEINS 6'h22
+`define VEX 6'h23
+`define VFLT2INT 6'h24
+`define VINT2FLT 6'h25
+`define VSIGN 6'h26
+`define VSxxU 6'h27
+`define VCNTPOP 6'h28
+`define VMULS 6'h2A
+`define VDIVS 6'h2E
+`define VSxxUb 6'h2F
+`define VMxx 6'h30
+`define VMAND 3'h0
+`define VMOR 3'h1
+`define VMXOR 3'h2
+`define VMXNOR 3'h3
+`define VMPOP 3'h4
+`define VMFILL 3'h5
+`define VMFIRST 3'h6
+`define VMLAST 3'h7
+`define VMUL 6'h3A
+`define VDIV 6'h3E
+`define VSxxb 6'h3F
+`define R2 6'h02
+`define RR 6'h02
+`define BCD 6'h00
+`define BCDADD 5'h00
+`define BCDSUB 5'h01
+`define BCDMUL 5'h02
+`define PCRELX 6'h02
+`define AUIPC 6'h03
+`define SHL 4'h0
+`define SHR 4'h1
+`define ASL 4'h2
+`define ASR 4'h3
+`define ROL 4'h4
+`define ROR 4'h5
+`define SHLI 4'h8
+`define SHRI 4'h9
+`define ASLI 4'hA
+`define ASRI 4'hB
+`define ROLI 4'hC
+`define RORI 4'hD
+// Register / Miscellaneous (01) Ops
+`define R1 6'h01
+`define CNTLZ 5'h00
+`define CNTLO 5'h01
+`define CNTPOP 5'h02
+`define COM 5'h03
+`define ABS 5'h04
+`define NOT 5'h05
+`define REDOR 5'h06
+`define NEG 5'h07
+`define ZXH 5'h08
+`define ZXC 5'h09
+`define ZXB 5'h0A
+`define MEMDB 5'h10
+`define MEMSB 5'h11
+`define SYNC 5'h12
+`define CHAIN_OFF 5'h14
+`define CHAIN_ON 5'h15
+`define SETWB 5'h16
+`define SXH 5'h18
+`define SXC 5'h19
+`define SXB 5'h1A
+// Register-Register (02) Ops
+`define RTOP 6'h00
+`define BMM 6'h03
+`define ADD 6'h04
+`define SUB 6'h05
+`define SLT 6'h06
+`define SLTU 6'h07
+`define AND 6'h08
+`define OR 6'h09
+`define XOR 6'h0A
+`define NAND 6'h0C
+`define NOR 6'h0D
+`define XNOR 6'h0E
+`define SHIFT31 6'h0F
+`define CMP 6'h12
+`define MODU 6'h14
+`define MODSU 6'h15
+`define MOD 6'h16
+`define LEAX 6'h18
+`define MUX 6'h1B
+`define SHIFT63 6'h1F
+`define MOV 6'b01001?
+`define MULUH 6'h24
+`define MULSUH 6'h25
+`define MULH 6'h26
+`define SLE 6'h28
+`define SLEU 6'h29
+`define MULF 6'h2A
+// The following two instructions are 48 bit ops
+`define CMOVEZ 6'h28
+`define CMOVNZ 6'h29
+`define MIN 6'h2C
+`define MAX 6'h2D
+`define MAJ 6'h2E
+`define SHIFTR 6'h2F
+`define SEI 6'h30
+`define WAIT 6'h31
+`define RTI 6'h32
+`define RTE 6'h32
+`define VMOV 6'h33
+`define MULU 6'h38
+`define MULSU 6'h39
+`define MUL 6'h3A
+`define FXMUL 6'h3B
+`define DIVU 6'h3C
+`define DIVSU 6'h3D
+`define DIV 6'h3E
+`define SHIFTH 6'h3F
+// Root Level Ops
+`define ADDI 6'h04
+`define CSRRW 6'h05
+`define SLTI 6'h06
+`define SLTUI 6'h07
+`define ANDI 6'h08
+`define ORI 6'h09
+`define XORI 6'h0A
+`define EXEC 6'h0B
+`define REX 6'h0D
+`define XNORI 6'h0E
+`define FLOAT 6'h0F
+`define LDCS 6'h10
+`define LVxU 6'h11
+`define CMPI 6'h12
+`define LB 6'h13
+`define SB 6'h15
+`define MEMNDX 6'h16
+`define LVBX 6'h00
+`define LVBUX 6'h01
+`define LVCX 6'h02
+`define LVCUX 6'h03
+`define LVHX 6'h04
+`define LVHUX 6'h05
+`define LVWX 6'h06
+`define LCX 6'h08
+`define LCUX 6'h09
+`define LBUX 6'h0A
+`define LHX 6'h10
+`define LHUX 6'h11
+`define LWX 6'h12
+`define LBX 6'h13
+`define LWRX 6'h14
+`define LVWS 6'h18
+`define LVX 6'h19
+`define CACHEX 6'h1E
+`define SHX 6'h21
+`define SBX 6'h20
+`define SWX 6'h22
+`define SWCX 6'h23
+`define SCX 6'h24
+`define CASX 6'h25
+`define SVWS 6'h27
+`define INCX 6'h2A
+`define PUSH 6'h33
+`define SVX 6'h37
+`define SWC 6'h17
+`define JAL 6'h18
+`define CALL 6'h19
+`define INC 6'h1A
+`define LFx 6'h1B
+`define SGTUI 6'h1C
+`define LWR 6'h1D
+`define CACHE 6'h1E
+`define Lx 6'h20
+`define LxU 6'h21
+`define BITFIELD 6'h22
+`define BFINSI 4'h4
+`define LBU 6'h23
+`define Sx 6'h24
+`define CAS 6'h25
+`define BBc 6'h26
+`define IBNE 2'd2
+`define DBNZ 2'd3
+`define LUI 6'h27
+`define JMP 6'h28
+`define RET 6'h29
+`define MULFI 6'h2A
+`define SFx 6'h2B
+`define SGTI 6'h2C
+`define CMPRSSD 6'h2D
+`define MODI 6'h2E
+`define AMO 6'h2F
+`define AMO_SWAP 6'h00
+`define AMO_ADD 6'h04
+`define AMO_AND 6'h08
+`define AMO_OR 6'h09
+`define AMO_XOR 6'h0A
+`define AMO_SHL 6'h0C
+`define AMO_SHR 6'h0D
+`define AMO_MIN 6'h1C
+`define AMO_MAX 6'h1D
+`define AMO_MINU 6'h1E
+`define AMO_MAXU 6'h1F
+`define Bcc 6'h30
+`define BEQ 3'd0
+`define BNE 3'd1
+`define BLT 3'd2
+`define BGE 3'd3
+`define BLTU 3'd6
+`define BGEU 3'd7
+`define IVECTOR 6'h31
+`define BEQI 6'h32
+`define BCHK 6'h33
+`define CHK 6'h34
+`define LV 6'h36
+`define SV 6'h37
+`define MULUI 6'h38
+`define MULSUI 6'h39
+`define MULI 6'h3A
+`define LVx 6'h3B
+`define DIVUI 6'h3C
+`define NOP 6'h3D
+`define DIVI 6'h3E
+
+`define FMOV 6'h10
+`define FTOI 6'h12
+`define ITOF 6'h13
+`define FNEG 6'h14
+`define FABS 6'h15
+`define FSIGN 6'h16
+`define FMAN 6'h17
+`define FNABS 6'h18
+`define FCVTSD 6'h19
+`define FCVTSQ 6'h1B
+`define FSTAT 6'h1C
+`define FTX 6'h20
+`define FCX 6'h21
+`define FEX 6'h22
+`define FDX 6'h23
+`define FRM 6'h24
+`define FCVTDS 6'h29
+`define FSYNC 6'h36
+
+`define FADD 6'h04
+`define FSUB 6'h05
+`define FCMP 6'h06
+`define FMUL 6'h08
+`define FDIV 6'h09
+
+`define EXR 8'h7F
+
+`define NOP_INSN {42'd0,`NOP}
+`define INSN_FLT_EXF 16'h1180
+`define INSN_FLT_IBE 16'h10A0
+`define INSN_FLT_TLB 16'h1280
+
+`define CSR_CR0 10'h000
+`define CSR_HARTID 10'h001
+`define CSR_TICK 10'h002
+`define CSR_PCR 10'h003
+`define CSR_PMR 10'h005
+`define CSR_CAUSE 10'h006
+`define CSR_BADADR 10'h007
+`define CSR_PCR2 10'h008
+`define CSR_SCRATCH 10'h009
+`define CSR_WBRCD 10'h00A
+`define CSR_BADINSTR 10'h00B
+`define CSR_SEMA 10'h00C
+`define CSR_KEYS 10'h00E
+`define CSR_TCB 10'h010
+`define CSR_FSTAT 10'h014
+`define CSR_DBAD0 10'h018
+`define CSR_DBAD1 10'h019
+`define CSR_DBAD2 10'h01A
+`define CSR_DBAD3 10'h01B
+`define CSR_DBCTRL 10'h01C
+`define CSR_DBSTAT 10'h01D
+`define CSR_CAS 10'h02C
+`define CSR_TVEC 10'b00000110???
+`define CSR_IM_STACK 10'h040
+`define CSR_OL_STACK 10'h041
+`define CSR_PL_STACK 10'h042
+`define CSR_RS_STACK 10'h043
+`define CSR_STATUS 10'h044
+`define CSR_BRS_STACK 10'h046
+`define CSR_EPC0 10'h048
+`define CSR_EPC1 10'h049
+`define CSR_EPC2 10'h04A
+`define CSR_EPC3 10'h04B
+`define CSR_EPC4 10'h04C
+`define CSR_EPC5 10'h04D
+`define CSR_EPC6 10'h04E
+`define CSR_EPC7 10'h04F
+`define CSR_GOLEX0 10'h050
+`define CSR_GOLEX1 10'h051
+`define CSR_GOLEX2 10'h052
+`define CSR_GOLEX3 10'h053
+`define CSR_GOLEXVP 10'h054
+`define CSR_CODEBUF 10'b00010??????
+`define CSR_TB 10'h0C0
+`define CSR_CBL 10'h0C1
+`define CSR_CBU 10'h0C2
+`define CSR_RO 10'h0C3
+`define CSR_DBL 10'h0C4
+`define CSR_DBU 10'h0C5
+`define CSR_SBL 10'h0C6
+`define CSR_SBU 10'h0C7
+`define CSR_ENU 10'h0C8
+`define CSR_PREGS 10'h0F0
+`define CSR_Q_CTR 10'h3C0
+`define CSR_BM_CTR 10'h3C1
+`define CSR_ICL_CTR 10'h3C2
+`define CSR_IRQ_CTR 10'h3C3
+`define CSR_TIME 10'h3E0
+`define CSR_INFO 10'b11_1111_????
+
+`define OL_USER 2'd3
+`define OL_SUPERVISOR 2'd2
+`define OL_HYPERVISOR 2'd1
+`define OL_MACHINE 2'd0
+
+// JALR and EXTENDED are synonyms
+`define EXTEND 3'd7
+
+// system-call subclasses:
+`define SYS_NONE 3'd0
+`define SYS_CALL 3'd1
+`define SYS_MFSR 3'd2
+`define SYS_MTSR 3'd3
+`define SYS_RFU1 3'd4
+`define SYS_RFU2 3'd5
+`define SYS_RFU3 3'd6
+`define SYS_EXC 3'd7 // doesn't need to be last, but what the heck
+
+// exception types:
+`define EXC_NONE 9'd000
+`define EXC_HALT 9'd1
+`define EXC_TLBMISS 9'd2
+`define EXC_SIGSEGV 9'd3
+`define EXC_INVALID 9'd4
+
+`define FLT_NONE 8'd00
+`define FLT_IBE 8'd01
+`define FLT_EXF 8'd02
+`define FLT_TLB 8'd04
+`define FLT_SSM 8'd32
+`define FLT_DBG 8'd33
+`define FLT_TGT 8'd34
+`define FLT_IADR 8'd36
+`define FLT_UNIMP 8'd37
+`define FLT_FLT 8'd38
+`define FLT_CHK 8'd39
+`define FLT_DBZ 8'd40
+`define FLT_OFL 8'd41
+`define FLT_SEG 8'd47
+`define FLT_ALN 8'd48
+`define FLT_DWF 8'd50
+`define FLT_DRF 8'd51
+`define FLT_SGB 8'd52
+`define FLT_PRIV 8'd53
+`define FLT_CMT 8'd54
+`define FLT_BD 8'd55
+`define FLT_STK 8'd56
+`define FLT_DBE 8'd60
+`define FLT_RET 8'd230
+`define FLT_CS 8'd231
+`define FLT_ZS_LD 8'd232
+`define FLT_DS_LD 8'd233
+`define FLT_ES_LD 8'd234
+`define FLT_FS_LD 8'd235
+`define FLT_GS_LD 8'd236
+`define FLT_HS_LD 8'd237
+`define FLT_SS_LD 8'd238
+`define FLT_CS_LD 8'd239
+
+`define INSTRUCTION_OP 5:0
+`define INSTRUCTION_L2 7:6
+`define INSTRUCTION_RA 12:8
+`define INSTRUCTION_RT 17:13
+`define INSTRUCTION_RB 22:18
+`define INSTRUCTION_RC 27:23
+`define INSTRUCTION_IM 31:18
+`define INSTRUCTION_IML 47:18
+`define INSTRUCTION_SB 31
+`define INSTRUCTION_S1 22:18
+`define INSTRUCTION_S2 31:26
+`define INSTRUCTION_S2L 47:42
+`define INSTRUCTION_COND 21:18
+
+`define FORW_BRANCH 1'b0
+`define BACK_BRANCH 1'b1
+
+`define DRAMSLOT_AVAIL 3'b000
+`define DRAMSLOT_BUSY 3'b001
+`define DRAMSLOT_REQBUS 3'b101
+`define DRAMSLOT_HASBUS 3'b110
+`define DRAMREQ_READY 3'b111
+
+`define INV 1'b0
+`define VAL 1'b1
+
+//
+// define PANIC types
+//
+`define PANIC_NONE 4'd0
+`define PANIC_FETCHBUFBEQ 4'd1
+`define PANIC_INVALIDISLOT 4'd2
+`define PANIC_MEMORYRACE 4'd3
+`define PANIC_IDENTICALDRAMS 4'd4
+`define PANIC_OVERRUN 4'd5
+`define PANIC_HALTINSTRUCTION 4'd6
+`define PANIC_INVALIDMEMOP 4'd7
+`define PANIC_INVALIDFBSTATE 4'd9
+`define PANIC_INVALIDIQSTATE 4'd10
+`define PANIC_BRANCHBACK 4'd11
+`define PANIC_BADTARGETID 4'd12
+`define PANIC_ALU0ONLY 4'd13
+
+`define IB_CONST 143:80
+`define IB_LN 78:76
+`define IB_RT 75:71
+`define IB_RC 70:66
+`define IB_RB 65:61
+`define IB_RA 60:56
+`define IB_PRFW 52
+`define IB_CMP 51
+`define IB_PUSH 47
+`define IB_TLB 46
+`define IB_SZ 45:43
+`define IB_IRQ 42
+`define IB_RTI 41
+`define IB_BRK 40
+`define IB_RET 39
+`define IB_JAL 38
+`define IB_ODDBALL 37
+`define IB_STORE 36
+`define IB_MEMSZ 35:33
+`define IB_LOADV 32
+`define IB_IMM 31
+`define IB_MEM 30
+`define IB_BT 28
+`define IB_ALU 27
+`define IB_ALU0 26
+`define IB_FPU 25
+`define IB_FC 24
+`define IB_CANEX 23
+`define IB_LOAD 22
+`define IB_PRELOAD 21
+`define IB_MEMNDX 20
+`define IB_RMW 19
+`define IB_MEMDB 18
+`define IB_MEMSB 17
+`define IB_SHFT 16
+`define IB_SEI 15
+`define IB_AQ 14
+`define IB_RL 13
+`define IB_JMP 12
+`define IB_BR 11
+`define IB_SYNC 10
+`define IB_FSYNC 9
+`define IB_RFW 8
+`define IB_WE 7:0
+
+`define TLB 6'h3F
+`define TLB_NOP 4'd0
+`define TLB_P 4'd1
+`define TLB_RD 4'd2
+`define TLB_WR 4'd3
+`define TLB_WI 4'd4
+`define TLB_EN 4'd5
+`define TLB_DIS 4'd6
+`define TLB_RDREG 4'd7
+`define TLB_WRREG 4'd8
+`define TLB_INVALL 4'd9
+`define TLB_RDAGE 4'd10
+`define TLB_WRAGE 4'd11
+
+`define TLBWired 4'h0
+`define TLBIndex 4'h1
+`define TLBRandom 4'h2
+`define TLBPageSize 4'h3
+`define TLBVirtPage 4'h4
+`define TLBPhysPage 4'h5
+`define TLBASID 4'h7
+`define TLBMissAdr 4'd8
+`define TLBPageTblAddr 4'd10
+`define TLBPageTblCtrl 4'd11
+`define TLBAFC 4'd12
+`define TLBPageCount 4'd13
+
+`define EXC_RGS 6'h00
+`define BRK_RGS 6'h10
Index: FT64v7/rtl/common/FT64_divider.v
===================================================================
--- FT64v7/rtl/common/FT64_divider.v (nonexistent)
+++ FT64v7/rtl/common/FT64_divider.v (revision 60)
@@ -0,0 +1,206 @@
+// ============================================================================
+// __
+// \\__/ o\ (C) 2013-2018 Robert Finch, Waterloo
+// \ __ / All rights reserved.
+// \/_// robfinch@finitron.ca
+// ||
+//
+// This source file is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published
+// by the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This source file is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+//
+//
+// FT64 Superscaler
+// FT64_divider.v
+// - 64 bit divider
+//
+// ============================================================================
+//
+module FT64_divider(rst, clk, ld, abort, sgn, sgnus, a, b, qo, ro, dvByZr, done, idle);
+parameter WID=64;
+parameter DIV=3'd3;
+parameter IDLE=3'd4;
+parameter DONE=3'd5;
+input clk;
+input rst;
+input ld;
+input abort;
+input sgn;
+input sgnus;
+input [WID-1:0] a;
+input [WID-1:0] b;
+output [WID-1:0] qo;
+reg [WID-1:0] qo;
+output [WID-1:0] ro;
+reg [WID-1:0] ro;
+output done;
+output idle;
+output dvByZr;
+reg dvByZr;
+
+reg [WID-1:0] bb;
+reg so;
+reg [2:0] state;
+reg [7:0] cnt;
+wire cnt_done = cnt==8'd0;
+assign done = state==DONE||(state==IDLE && !ld);
+assign idle = state==IDLE;
+reg ce1;
+reg [WID-1:0] q;
+reg [WID:0] r;
+wire b0 = bb <= r;
+wire [WID-1:0] r1 = b0 ? r - bb : r;
+
+initial begin
+ q = 64'd0;
+ r = 64'd0;
+ qo = 64'd0;
+ ro = 64'd0;
+end
+
+always @(posedge clk)
+if (rst)
+ state <= IDLE;
+else
+ case(state)
+ IDLE:
+ if (ld)
+ state <= DIV;
+ DIV:
+ if (dvByZr)
+ state <= DONE;
+ else if (cnt_done)
+ state <= DONE;
+ DONE:
+ state <= IDLE;
+ default: state <= IDLE;
+ endcase
+
+always @(posedge clk)
+if (rst)
+ cnt <= 8'h00;
+else begin
+ if (abort)
+ cnt <= 8'd00;
+ else if (ld)
+ cnt <= WID+1;
+ else if (!cnt_done)
+ cnt <= cnt - 8'd1;
+end
+
+always @(posedge clk)
+if (rst)
+ dvByZr <= 1'b0;
+else begin
+ if (ld)
+ dvByZr <= b=={WID{1'b0}};
+end
+
+always @(posedge clk)
+if (rst) begin
+ bb <= {WID{1'b0}};
+ q <= {WID{1'b0}};
+ r <= {WID{1'b0}};
+ qo <= {WID{1'b0}};
+ ro <= {WID{1'b0}};
+end
+else
+begin
+
+case(state)
+IDLE:
+ if (ld) begin
+ if (sgn) begin
+ q <= a[WID-1] ? -a : a;
+ bb <= b[WID-1] ? -b : b;
+ so <= a[WID-1] ^ b[WID-1];
+ end
+ else if (sgnus) begin
+ q <= a[WID-1] ? -a : a;
+ bb <= b;
+ so <= a[WID-1];
+ end
+ else begin
+ q <= a;
+ bb <= b;
+ so <= 1'b0;
+ $display("bb=%d", b);
+ end
+ r <= {WID{1'b0}};
+ end
+DIV:
+ if (!cnt_done && !dvByZr) begin
+ $display("cnt:%d r1=%h q[63:0]=%h", cnt,r1,q);
+ q <= {q[WID-2:0],b0};
+ r <= {r1,q[WID-1]};
+ end
+ else begin
+ $display("cnt:%d r1=%h q[63:0]=%h", cnt,r1,q);
+ if (sgn|sgnus) begin
+ if (so) begin
+ qo <= dvByZr ? {1'b1,{WID-1{1'b0}}} : -q;
+ ro <= dvByZr ? {1'b1,{WID-1{1'b0}}} : -r[WID:1];
+ end
+ else begin
+ qo <= dvByZr ? {WID-1{1'b1}} : q;
+ ro <= dvByZr ? {WID-1{1'b1}} : r[WID:1];
+ end
+ end
+ else begin
+ qo <= dvByZr ? {WID-1{1'b1}} : q;
+ ro <= dvByZr ? {WID-1{1'b1}} : r[WID:1];
+ end
+ end
+default: ;
+endcase
+end
+
+endmodule
+
+module FT64_divider_tb();
+parameter WID=64;
+reg rst;
+reg clk;
+reg ld;
+wire done;
+wire [WID-1:0] qo,ro;
+
+initial begin
+ clk = 1;
+ rst = 0;
+ #100 rst = 1;
+ #100 rst = 0;
+ #100 ld = 1;
+ #150 ld = 0;
+end
+
+always #10 clk = ~clk; // 50 MHz
+
+
+FT64_divider #(WID) u1
+(
+ .rst(rst),
+ .clk(clk),
+ .ld(ld),
+ .sgn(1'b1),
+ .isDivi(1'b0),
+ .a(64'd10005),
+ .b(64'd27),
+ .imm(64'd123),
+ .qo(qo),
+ .ro(ro),
+ .dvByZr(),
+ .done(done)
+);
+
+endmodule
+
Index: FT64v7/rtl/common/FT64_icache.v
===================================================================
--- FT64v7/rtl/common/FT64_icache.v (nonexistent)
+++ FT64v7/rtl/common/FT64_icache.v (revision 60)
@@ -0,0 +1,729 @@
+// ============================================================================
+// __
+// \\__/ o\ (C) 2017-2018 Robert Finch, Waterloo
+// \ __ / All rights reserved.
+// \/_// robfinch@finitron.ca
+// ||
+//
+// FT64_cache.v
+//
+//
+// This source file is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published
+// by the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This source file is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+//
+//
+// ============================================================================
+//
+`define TRUE 1'b1
+`define FALSE 1'b0
+
+// -----------------------------------------------------------------------------
+// Small, 64 line cache memory (2kiB) made from distributed RAM. Access is
+// within a single clock cycle.
+// -----------------------------------------------------------------------------
+
+module FT64_L1_icache_mem(rst, clk, wr, en, lineno, i, o, ov, invall, invline);
+parameter pLines = 64;
+parameter pLineWidth = 298;
+localparam pLNMSB = pLines==128 ? 6 : 5;
+input rst;
+input clk;
+input wr;
+input [8:0] en;
+input [pLNMSB:0] lineno;
+input [pLineWidth-1:0] i;
+output [pLineWidth-1:0] o;
+output [8:0] ov;
+input invall;
+input invline;
+
+integer n;
+
+(* ram_style="distributed" *)
+reg [pLineWidth-1:0] mem [0:pLines-1];
+reg [pLines-1:0] valid0;
+reg [pLines-1:0] valid1;
+reg [pLines-1:0] valid2;
+reg [pLines-1:0] valid3;
+reg [pLines-1:0] valid4;
+reg [pLines-1:0] valid5;
+reg [pLines-1:0] valid6;
+reg [pLines-1:0] valid7;
+reg [pLines-1:0] valid8;
+
+initial begin
+ for (n = 0; n < pLines; n = n + 1)
+ mem[n] <= 2'b00;
+end
+
+always @(posedge clk)
+ if (wr & en[0]) mem[lineno][31:0] <= i[31:0];
+always @(posedge clk)
+ if (wr & en[1]) mem[lineno][63:32] <= i[63:32];
+always @(posedge clk)
+ if (wr & en[2]) mem[lineno][95:64] <= i[95:64];
+always @(posedge clk)
+ if (wr & en[3]) mem[lineno][127:96] <= i[127:96];
+always @(posedge clk)
+ if (wr & en[4]) mem[lineno][159:128] <= i[159:128];
+always @(posedge clk)
+ if (wr & en[5]) mem[lineno][191:160] <= i[191:160];
+always @(posedge clk)
+ if (wr & en[6]) mem[lineno][223:192] <= i[223:192];
+always @(posedge clk)
+ if (wr & en[7]) mem[lineno][255:224] <= i[255:224];
+always @(posedge clk)
+ if (wr & en[8]) mem[lineno][297:256] <= i[297:256];
+always @(posedge clk)
+if (rst) begin
+ valid0 <= 64'd0;
+ valid1 <= 64'd0;
+ valid2 <= 64'd0;
+ valid3 <= 64'd0;
+ valid4 <= 64'd0;
+ valid5 <= 64'd0;
+ valid6 <= 64'd0;
+ valid7 <= 64'd0;
+ valid8 <= 64'd0;
+end
+else begin
+ if (invall) begin
+ valid0 <= 64'd0;
+ valid1 <= 64'd0;
+ valid2 <= 64'd0;
+ valid3 <= 64'd0;
+ valid4 <= 64'd0;
+ valid5 <= 64'd0;
+ valid6 <= 64'd0;
+ valid7 <= 64'd0;
+ valid8 <= 64'd0;
+ end
+ else if (invline) begin
+ valid0[lineno] <= 1'b0;
+ valid1[lineno] <= 1'b0;
+ valid2[lineno] <= 1'b0;
+ valid3[lineno] <= 1'b0;
+ valid4[lineno] <= 1'b0;
+ valid5[lineno] <= 1'b0;
+ valid6[lineno] <= 1'b0;
+ valid7[lineno] <= 1'b0;
+ valid8[lineno] <= 1'b0;
+ end
+ else if (wr) begin
+ if (en[0]) valid0[lineno] <= 1'b1;
+ if (en[1]) valid1[lineno] <= 1'b1;
+ if (en[2]) valid2[lineno] <= 1'b1;
+ if (en[3]) valid3[lineno] <= 1'b1;
+ if (en[4]) valid4[lineno] <= 1'b1;
+ if (en[5]) valid5[lineno] <= 1'b1;
+ if (en[6]) valid6[lineno] <= 1'b1;
+ if (en[7]) valid7[lineno] <= 1'b1;
+ if (en[8]) valid8[lineno] <= 1'b1;
+ end
+end
+
+assign o = mem[lineno];
+assign ov[0] = valid0[lineno];
+assign ov[1] = valid1[lineno];
+assign ov[2] = valid2[lineno];
+assign ov[3] = valid3[lineno];
+assign ov[4] = valid4[lineno];
+assign ov[5] = valid5[lineno];
+assign ov[6] = valid6[lineno];
+assign ov[7] = valid7[lineno];
+assign ov[8] = valid8[lineno];
+
+endmodule
+
+// -----------------------------------------------------------------------------
+// Fully associative (64 way) tag memory for L1 icache.
+//
+// -----------------------------------------------------------------------------
+
+module FT64_L1_icache_camtag(rst, clk, nxt, wlineno, wr, wadr, adr, hit, lineno);
+input rst;
+input clk;
+input nxt;
+output [5:0] wlineno;
+input wr;
+input [37:0] adr;
+input [37:0] wadr;
+output hit;
+output reg [5:0] lineno;
+
+wire [35:0] wtagi = {9'b0,wadr[37:5]};
+wire [35:0] tagi = {9'b0,adr[37:5]};
+wire [63:0] match_addr;
+
+reg [5:0] cntr;
+always @(posedge clk)
+if (rst)
+ cntr <= 6'd0;
+else begin
+ if (nxt) cntr <= cntr + 6'd1;
+end
+assign wlineno = cntr;
+
+//wire [21:0] lfsro;
+//lfsr #(22,22'h0ACE1) u1 (rst, clk, !(wr3|wr2|wr), 1'b0, lfsro);
+
+cam36x64 u01 (rst, clk, wr, cntr[5:0], wtagi, tagi, match_addr);
+assign hit = |match_addr;
+
+integer n;
+always @*
+begin
+lineno = 0;
+for (n = 0; n < 64; n = n + 1)
+ if (match_addr[n]) lineno = n;
+end
+
+endmodule
+
+
+// -----------------------------------------------------------------------------
+// Four way set associative tag memory for L1 cache.
+// -----------------------------------------------------------------------------
+
+module FT64_L1_icache_cmptag4way(rst, clk, nxt, wr, adr, lineno, hit);
+parameter pLines = 64;
+parameter AMSB = 63;
+localparam pLNMSB = pLines==128 ? 6 : 5;
+localparam pMSB = pLines==128 ? 9 : 8;
+input rst;
+input clk;
+input nxt;
+input wr;
+input [AMSB+8:0] adr;
+output reg [pLNMSB:0] lineno;
+output hit;
+
+(* ram_style="distributed" *)
+reg [AMSB+8-5:0] mem0 [0:pLines/4-1];
+reg [AMSB+8-5:0] mem1 [0:pLines/4-1];
+reg [AMSB+8-5:0] mem2 [0:pLines/4-1];
+reg [AMSB+8-5:0] mem3 [0:pLines/4-1];
+reg [AMSB+8:0] rradr;
+integer n;
+initial begin
+ for (n = 0; n < pLines/4; n = n + 1)
+ begin
+ mem0[n] = 0;
+ mem1[n] = 0;
+ mem2[n] = 0;
+ mem3[n] = 0;
+ end
+end
+
+wire [21:0] lfsro;
+lfsr #(22,22'h0ACE3) u1 (rst, clk, nxt, 1'b0, lfsro);
+reg [pLNMSB:0] wlineno;
+always @(posedge clk)
+if (rst)
+ wlineno <= 6'h00;
+else begin
+ if (wr) begin
+ case(lfsro[1:0])
+ 2'b00: begin mem0[adr[pMSB:5]] <= adr[AMSB+8:5]; wlineno <= {2'b00,adr[pMSB:5]}; end
+ 2'b01: begin mem1[adr[pMSB:5]] <= adr[AMSB+8:5]; wlineno <= {2'b01,adr[pMSB:5]}; end
+ 2'b10: begin mem2[adr[pMSB:5]] <= adr[AMSB+8:5]; wlineno <= {2'b10,adr[pMSB:5]}; end
+ 2'b11: begin mem3[adr[pMSB:5]] <= adr[AMSB+8:5]; wlineno <= {2'b11,adr[pMSB:5]}; end
+ endcase
+ end
+end
+
+wire hit0 = mem0[adr[pMSB:5]]==adr[AMSB+8:5];
+wire hit1 = mem1[adr[pMSB:5]]==adr[AMSB+8:5];
+wire hit2 = mem2[adr[pMSB:5]]==adr[AMSB+8:5];
+wire hit3 = mem3[adr[pMSB:5]]==adr[AMSB+8:5];
+always @*
+ //if (wr2) lineno = wlineno;
+ if (hit0) lineno = {2'b00,adr[pMSB:5]};
+ else if (hit1) lineno = {2'b01,adr[pMSB:5]};
+ else if (hit2) lineno = {2'b10,adr[pMSB:5]};
+ else lineno = {2'b11,adr[pMSB:5]};
+assign hit = hit0|hit1|hit2|hit3;
+endmodule
+
+
+// -----------------------------------------------------------------------------
+// 32 way, 16 set associative tag memory for L2 cache
+// -----------------------------------------------------------------------------
+
+module FT64_L2_icache_camtag(rst, clk, wr, adr, hit, lineno);
+parameter AMSB=63;
+input rst;
+input clk;
+input wr;
+input [AMSB+8:0] adr;
+output hit;
+output [8:0] lineno;
+
+wire [3:0] set = adr[13:10];
+wire [AMSB+8-5:0] tagi = {7'd0,adr[AMSB+8:14],adr[9:5]};
+reg [4:0] encadr;
+assign lineno[4:0] = encadr;
+assign lineno[8:5] = adr[13:10];
+reg [15:0] we;
+wire [31:0] ma [0:15];
+always @*
+begin
+ we <= 16'h0000;
+ we[set] <= wr;
+end
+
+reg wr2;
+wire [21:0] lfsro;
+lfsr #(22,22'h0ACE2) u1 (rst, clk, !(wr2|wr), 1'b0, lfsro);
+
+always @(posedge clk)
+ wr2 <= wr;
+
+genvar g;
+generate
+begin
+for (g = 0; g < 16; g = g + 1)
+ cam36x32 u01 (clk, we[g], lfsro[4:0], tagi, tagi, ma[g]);
+end
+endgenerate
+wire [31:0] match_addr = ma[set];
+assign hit = |match_addr;
+
+integer n;
+always @*
+begin
+encadr = 0;
+for (n = 0; n < 32; n = n + 1)
+ if (match_addr[n]) encadr = n;
+end
+
+endmodule
+
+// -----------------------------------------------------------------------------
+// -----------------------------------------------------------------------------
+
+module FT64_L1_icache(rst, clk, nxt, wr, wr_ack, en, wadr, adr, i, o, fault, hit, invall, invline);
+parameter pSize = 2;
+parameter CAMTAGS = 1'b0; // 32 way
+parameter FOURWAY = 1'b1;
+parameter AMSB = 63;
+localparam pLines = pSize==4 ? 128 : 64;
+localparam pLNMSB = pSize==4 ? 6 : 5;
+input rst;
+input clk;
+input nxt;
+input wr;
+output wr_ack;
+input [8:0] en;
+input [AMSB+8:0] adr;
+input [AMSB+8:0] wadr;
+input [297:0] i;
+output reg [55:0] o;
+output reg [1:0] fault;
+output hit;
+input invall;
+input invline;
+
+wire [297:0] ic;
+reg [297:0] i1, i2;
+wire [8:0] lv; // line valid
+wire [pLNMSB:0] lineno;
+wire [pLNMSB:0] wlineno;
+wire taghit;
+reg wr1,wr2;
+reg [8:0] en1, en2;
+reg invline1, invline2;
+
+// Must update the cache memory on the cycle after a write to the tag memmory.
+// Otherwise lineno won't be valid. Tag memory takes two clock cycles to update.
+always @(posedge clk)
+ wr1 <= wr;
+always @(posedge clk)
+ wr2 <= wr1;
+always @(posedge clk)
+ i1 <= i[297:0];
+always @(posedge clk)
+ i2 <= i1;
+always @(posedge clk)
+ en1 <= en;
+always @(posedge clk)
+ en2 <= en1;
+always @(posedge clk)
+ invline1 <= invline;
+always @(posedge clk)
+ invline2 <= invline1;
+
+generate begin : tags
+if (FOURWAY) begin
+
+FT64_L1_icache_mem #(.pLines(pLines)) u1
+(
+ .rst(rst),
+ .clk(clk),
+ .wr(wr1),
+ .en(en1),
+ .i(i1),
+ .lineno(lineno),
+ .o(ic),
+ .ov(lv),
+ .invall(invall),
+ .invline(invline1)
+);
+
+FT64_L1_icache_cmptag4way #(.pLines(pLines)) u3
+(
+ .rst(rst),
+ .clk(clk),
+ .nxt(nxt),
+ .wr(wr),
+ .adr(adr),
+ .lineno(lineno),
+ .hit(taghit)
+);
+end
+else if (CAMTAGS) begin
+
+FT64_L1_icache_mem u1
+(
+ .rst(rst),
+ .clk(clk),
+ .wr(wr2),
+ .en(en2),
+ .i(i2),
+ .lineno(lineno),
+ .o(ic),
+ .ov(lv),
+ .invall(invall),
+ .invline(invline2)
+);
+
+FT64_L1_icache_camtag u2
+(
+ .rst(rst),
+ .clk(clk),
+ .nxt(nxt),
+ .wlineno(wlineno),
+ .wadr(wadr),
+ .wr(wr),
+ .adr(adr),
+ .lineno(lineno),
+ .hit(taghit)
+);
+end
+end
+endgenerate
+
+// Valid if a 64-bit area encompassing a potential 48-bit instruction is valid.
+assign hit = taghit & lv[adr[4:2]] & lv[adr[4:2]+4'd1];
+
+//always @(radr or ic0 or ic1)
+always @(adr or ic)
+ o <= ic >> {adr[4:0],3'h0};
+always @*
+ fault <= ic[297:296];
+
+assign wr_ack = wr2;
+
+endmodule
+
+// -----------------------------------------------------------------------------
+// -----------------------------------------------------------------------------
+
+module FT64_L2_icache_mem(clk, wr, lineno, sel, i, fault, o, ov, invall, invline);
+input clk;
+input wr;
+input [8:0] lineno;
+input [2:0] sel;
+input [63:0] i;
+input [1:0] fault;
+output [297:0] o;
+output reg ov;
+input invall;
+input invline;
+
+(* ram_style="block" *)
+reg [63:0] mem0 [0:511];
+reg [63:0] mem1 [0:511];
+reg [63:0] mem2 [0:511];
+reg [63:0] mem3 [0:511];
+reg [39:0] mem4 [0:511];
+reg [1:0] memf [0:511];
+reg [511:0] valid;
+reg [8:0] rrcl;
+
+// instruction parcels per cache line
+wire [8:0] cache_line;
+integer n;
+initial begin
+ for (n = 0; n < 512; n = n + 1) begin
+ valid[n] <= 0;
+ memf[n] <= 2'b00;
+ end
+end
+
+always @(posedge clk)
+ if (invall) valid <= 512'd0;
+ else if (invline) valid[lineno] <= 1'b0;
+ else if (wr) valid[lineno] <= 1'b1;
+
+always @(posedge clk)
+begin
+ if (wr) begin
+ case(sel[2:0])
+ 3'd0: begin mem0[lineno] <= i; memf[lineno] <= fault; end
+ 3'd1: begin mem1[lineno] <= i; memf[lineno] <= memf[lineno] | fault; end
+ 3'd2: begin mem2[lineno] <= i; memf[lineno] <= memf[lineno] | fault; end
+ 3'd3: begin mem3[lineno] <= i; memf[lineno] <= memf[lineno] | fault; end
+ 3'd4: begin mem4[lineno] <= i[39:0]; memf[lineno] <= memf[lineno] | fault; end
+ endcase
+ end
+end
+
+always @(posedge clk)
+ rrcl <= lineno;
+
+always @(posedge clk)
+ ov <= valid[lineno];
+
+assign o = {memf[rrcl],mem4[rrcl],mem3[rrcl],mem2[rrcl],mem1[rrcl],mem0[rrcl]};
+
+endmodule
+
+// -----------------------------------------------------------------------------
+// Because the line to update is driven by the output of the cam tag memory,
+// the tag write should occur only during the first half of the line load.
+// Otherwise the line number would change in the middle of the line. The
+// first half of the line load is signified by an even hexibyte address (
+// address bit 4).
+// -----------------------------------------------------------------------------
+
+module FT64_L2_icache(rst, clk, nxt, wr, wr_ack, rd_ack, xsel, adr, cnt, exv_i, i, err_i, o, hit, invall, invline);
+parameter CAMTAGS = 1'b0; // 32 way
+parameter FOURWAY = 1'b1;
+parameter AMSB = 63;
+input rst;
+input clk;
+input nxt;
+input wr;
+output wr_ack;
+output rd_ack;
+input xsel;
+input [AMSB+8:0] adr;
+input [2:0] cnt;
+input exv_i;
+input [63:0] i;
+input err_i;
+output [297:0] o;
+output hit;
+input invall;
+input invline;
+
+wire lv; // line valid
+wire [8:0] lineno;
+wire taghit;
+reg wr1,wr2;
+reg [2:0] sel1,sel2;
+reg [63:0] i1,i2;
+reg [1:0] f1, f2;
+reg [AMSB+8:0] last_adr;
+
+// Must update the cache memory on the cycle after a write to the tag memmory.
+// Otherwise lineno won't be valid. camTag memory takes two clock cycles to update.
+always @(posedge clk)
+ wr1 <= wr;
+always @(posedge clk)
+ wr2 <= wr1;
+always @(posedge clk)
+ sel1 <= {xsel,adr[4:3]};
+always @(posedge clk)
+ sel2 <= sel1;
+always @(posedge clk)
+ last_adr <= adr;
+always @(posedge clk)
+ f1 <= {err_i,exv_i};
+always @(posedge clk)
+ f2 <= f1;
+
+reg [3:0] rdackx;
+always @(posedge clk)
+if (rst)
+ rdackx <= 4'b0;
+else begin
+ if (last_adr != adr || wr || wr1 || wr2)
+ rdackx <= 4'b0;
+ else
+ rdackx <= {rdackx,~(wr|wr1|wr2)};
+end
+
+assign rd_ack = rdackx[3] & ~(last_adr!=adr || wr || wr1 || wr2);
+
+always @(posedge clk)
+ i1 <= i;
+always @(posedge clk)
+ i2 <= i1;
+
+wire pe_wr;
+edge_det u3 (.rst(rst), .clk(clk), .ce(1'b1), .i(wr && cnt==3'd0), .pe(pe_wr), .ne(), .ee() );
+
+FT64_L2_icache_mem u1
+(
+ .clk(clk),
+ .wr(wr2),
+ .lineno(lineno),
+ .sel(sel2),
+ .i(i2),
+ .fault(f2),
+ .o(o),
+ .ov(lv),
+ .invall(invall),
+ .invline(invline)
+);
+
+generate
+begin : tags
+if (FOURWAY)
+FT64_L2_icache_cmptag4way u2
+(
+ .rst(rst),
+ .clk(clk),
+ .nxt(nxt),
+ .wr(pe_wr),
+ .adr(adr),
+ .lineno(lineno),
+ .hit(taghit)
+);
+else if (CAMTAGS)
+FT64_L2_icache_camtag u2
+(
+ .rst(rst),
+ .clk(clk),
+ .wr(pe_wr),
+ .adr(adr),
+ .lineno(lineno),
+ .hit(taghit)
+);
+else
+FT64_L2_icache_cmptag u2
+(
+ .rst(rst),
+ .clk(clk),
+ .wr(pe_wr),
+ .adr(adr),
+ .lineno(lineno),
+ .hit(taghit)
+);
+end
+endgenerate
+
+assign hit = taghit & lv;
+assign wr_ack = wr2;
+
+endmodule
+
+// Four way set associative tag memory
+module FT64_L2_icache_cmptag4way(rst, clk, nxt, wr, adr, lineno, hit);
+parameter AMSB = 63;
+input rst;
+input clk;
+input nxt;
+input wr;
+input [AMSB+8:0] adr;
+output reg [8:0] lineno;
+output hit;
+
+(* ram_style="block" *)
+reg [AMSB+8-5:0] mem0 [0:127];
+reg [AMSB+8-5:0] mem1 [0:127];
+reg [AMSB+8-5:0] mem2 [0:127];
+reg [AMSB+8-5:0] mem3 [0:127];
+reg [AMSB+8:0] rradr;
+integer n;
+initial begin
+ for (n = 0; n < 128; n = n + 1)
+ begin
+ mem0[n] = 0;
+ mem1[n] = 0;
+ mem2[n] = 0;
+ mem3[n] = 0;
+ end
+end
+
+reg wr2;
+wire [21:0] lfsro;
+lfsr #(22,22'h0ACE3) u1 (rst, clk, nxt, 1'b0, lfsro);
+reg [8:0] wlineno;
+always @(posedge clk)
+if (rst)
+ wlineno <= 9'h000;
+else begin
+ wr2 <= wr;
+ if (wr) begin
+ case(lfsro[1:0])
+ 2'b00: begin mem0[adr[11:5]] <= adr[AMSB+8:5]; wlineno <= {2'b00,adr[11:5]}; end
+ 2'b01: begin mem1[adr[11:5]] <= adr[AMSB+8:5]; wlineno <= {2'b01,adr[11:5]}; end
+ 2'b10: begin mem2[adr[11:5]] <= adr[AMSB+8:5]; wlineno <= {2'b10,adr[11:5]}; end
+ 2'b11: begin mem3[adr[11:5]] <= adr[AMSB+8:5]; wlineno <= {2'b11,adr[11:5]}; end
+ endcase
+ end
+ rradr <= adr;
+end
+
+wire hit0 = mem0[rradr[11:5]]==rradr[AMSB+8:5];
+wire hit1 = mem1[rradr[11:5]]==rradr[AMSB+8:5];
+wire hit2 = mem2[rradr[11:5]]==rradr[AMSB+8:5];
+wire hit3 = mem3[rradr[11:5]]==rradr[AMSB+8:5];
+always @*
+ if (wr2) lineno = wlineno;
+ else if (hit0) lineno = {2'b00,rradr[11:5]};
+ else if (hit1) lineno = {2'b01,rradr[11:5]};
+ else if (hit2) lineno = {2'b10,rradr[11:5]};
+ else lineno = {2'b11,rradr[11:5]};
+assign hit = hit0|hit1|hit2|hit3;
+endmodule
+
+// Simple tag array, 1-way direct mapped
+module FT64_L2_icache_cmptag(rst, clk, wr, adr, lineno, hit);
+parameter AMSB = 63;
+input rst;
+input clk;
+input wr;
+input [AMSB+8:0] adr;
+output reg [8:0] lineno;
+output hit;
+
+reg [AMSB+8-14:0] mem [0:511];
+reg [AMSB+8:0] rradr;
+integer n;
+initial begin
+ for (n = 0; n < 512; n = n + 1)
+ begin
+ mem[n] = 0;
+ end
+end
+
+reg wr2;
+always @(posedge clk)
+ wr2 <= wr;
+reg [8:0] wlineno;
+always @(posedge clk)
+begin
+ if (wr) begin mem[adr[13:5]] <= adr[AMSB+8:14]; wlineno <= adr[13:5]; end
+end
+always @(posedge clk)
+ rradr <= adr;
+wire hit = mem[rradr[13:5]]==rradr[AMSB+8:14];
+always @*
+ if (wr2) lineno = wlineno;
+ else lineno = rradr[13:5];
+endmodule
+
Index: FT64v7/rtl/common/FT64_idecoder.v
===================================================================
--- FT64v7/rtl/common/FT64_idecoder.v (nonexistent)
+++ FT64v7/rtl/common/FT64_idecoder.v (revision 60)
@@ -0,0 +1,1208 @@
+// ============================================================================
+// __
+// \\__/ o\ (C) 2017-2018 Robert Finch, Waterloo
+// \ __ / All rights reserved.
+// \/_// robfinch@finitron.ca
+// ||
+//
+// FT64_idecoder.v
+//
+// This source file is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published
+// by the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This source file is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+//
+// ============================================================================
+//
+`include ".\FT64_config.vh"
+`include ".\FT64_defines.vh"
+
+module FT64_idecoder(clk,idv_i,id_i,instr,vl,ven,thrd,predict_taken,Rt,bus,id_o,idv_o,debug_on,pred_on);
+input clk;
+input idv_i;
+input [4:0] id_i;
+input [47:0] instr;
+input [7:0] vl;
+input [5:0] ven;
+input thrd;
+input predict_taken;
+input [4:0] Rt;
+output reg [143:0] bus;
+output reg [4:0] id_o;
+output reg idv_o;
+input debug_on;
+input pred_on;
+
+parameter TRUE = 1'b1;
+parameter FALSE = 1'b0;
+// Memory access sizes
+parameter byt = 3'd0;
+parameter wyde = 3'd1;
+parameter tetra = 3'd2;
+parameter octa = 3'd3;
+
+// Really IsPredictableBranch
+// Does not include BccR's
+//function IsBranch;
+//input [47:0] isn;
+//casez(isn[`INSTRUCTION_OP])
+//`Bcc: IsBranch = TRUE;
+//`BBc: IsBranch = TRUE;
+//`BEQI: IsBranch = TRUE;
+//`CHK: IsBranch = TRUE;
+//default: IsBranch = FALSE;
+//endcase
+//endfunction
+
+wire [10:0] brdisp = instr[31:21];
+
+wire iAlu;
+mIsALU uialu1
+(
+ .instr(instr),
+ .IsALU(iAlu)
+);
+
+function IsTLB;
+input [47:0] isn;
+case(isn[`INSTRUCTION_OP])
+`R2:
+ case(isn[`INSTRUCTION_S2])
+ `TLB: IsTLB = TRUE;
+ default: IsTLB = FALSE;
+ endcase
+default: IsTLB = FALSE;
+endcase
+endfunction
+
+reg IsALU;
+always @*
+case(instr[`INSTRUCTION_OP])
+`R2:
+ if (instr[`INSTRUCTION_L2]==2'b00)
+ case(instr[`INSTRUCTION_S2])
+ `VMOV: IsALU = TRUE;
+ `RTI: IsALU = FALSE;
+ default: IsALU = TRUE;
+ endcase
+ else
+ IsALU = TRUE;
+`BRK: IsALU = FALSE;
+`Bcc: IsALU = FALSE;
+`BBc: IsALU = FALSE;
+`BEQI: IsALU = FALSE;
+`CHK: IsALU = FALSE;
+`JAL: IsALU = FALSE;
+`JMP: IsALU = FALSE;
+`CALL: IsALU = FALSE;
+`RET: IsALU = FALSE;
+`FVECTOR:
+ case(instr[`INSTRUCTION_S2])
+ `VSHL,`VSHR,`VASR: IsALU = TRUE;
+ default: IsALU = FALSE; // Integer
+ endcase
+`IVECTOR:
+ case(instr[`INSTRUCTION_S2])
+ `VSHL,`VSHR,`VASR: IsALU = TRUE;
+ default: IsALU = TRUE; // Integer
+ endcase
+`FLOAT: IsALU = FALSE;
+default: IsALU = TRUE;
+endcase
+
+function IsAlu0Only;
+input [47:0] isn;
+begin
+case(isn[`INSTRUCTION_OP])
+`R2:
+ if (isn[`INSTRUCTION_L2]==2'b00)
+ case(isn[`INSTRUCTION_S2])
+ `TLB: IsAlu0Only = TRUE;
+ `R1: IsAlu0Only = TRUE;
+ `SHIFTR,`SHIFT31,`SHIFT63:
+ IsAlu0Only = !(instr[25:23]==`SHL || instr[25:23]==`ASL);
+ `MULU,`MULSU,`MUL,
+ `MULUH,`MULSUH,`MULH,
+ `MODU,`MODSU,`MOD: IsAlu0Only = TRUE;
+ `DIVU,`DIVSU,`DIV: IsAlu0Only = TRUE;
+ `MIN,`MAX: IsAlu0Only = TRUE;
+ default: IsAlu0Only = FALSE;
+ endcase
+ else
+ IsAlu0Only = FALSE;
+`MEMNDX: IsAlu0Only = TRUE;
+`IVECTOR,`FVECTOR:
+ case(isn[`INSTRUCTION_S2])
+ `VSHL,`VSHR,`VASR: IsAlu0Only = TRUE;
+ default: IsAlu0Only = FALSE;
+ endcase
+`BITFIELD: IsAlu0Only = TRUE;
+`MULUI,`MULI,
+`DIVUI,`DIVI,
+`MODI: IsAlu0Only = TRUE;
+`CSRRW: IsAlu0Only = TRUE;
+default: IsAlu0Only = FALSE;
+endcase
+end
+endfunction
+
+function IsFPU;
+input [47:0] isn;
+begin
+case(isn[`INSTRUCTION_OP])
+`FLOAT: IsFPU = TRUE;
+`FVECTOR:
+ case(isn[`INSTRUCTION_S2])
+ `VSHL,`VSHR,`VASR: IsFPU = FALSE;
+ default: IsFPU = TRUE;
+ endcase
+default: IsFPU = FALSE;
+endcase
+end
+endfunction
+
+reg IsFlowCtrl;
+always @*
+case(instr[`INSTRUCTION_OP])
+`BRK: IsFlowCtrl <= TRUE;
+`R2: case(instr[`INSTRUCTION_S2])
+ `RTI: IsFlowCtrl <= TRUE;
+ default: IsFlowCtrl <= FALSE;
+ endcase
+`Bcc: IsFlowCtrl <= TRUE;
+`BBc: IsFlowCtrl <= TRUE;
+`BEQI: IsFlowCtrl <= TRUE;
+`CHK: IsFlowCtrl <= TRUE;
+`JAL: IsFlowCtrl <= TRUE;
+`JMP: IsFlowCtrl <= TRUE;
+`CALL: IsFlowCtrl <= TRUE;
+`RET: IsFlowCtrl <= TRUE;
+default: IsFlowCtrl <= FALSE;
+endcase
+
+//function IsFlowCtrl;
+//input [47:0] isn;
+//begin
+//case(isn[`INSTRUCTION_OP])
+//`BRK: IsFlowCtrl = TRUE;
+//`RR: case(isn[`INSTRUCTION_S2])
+// `RTI: IsFlowCtrl = TRUE;
+// default: IsFlowCtrl = FALSE;
+// endcase
+//`Bcc: IsFlowCtrl = TRUE;
+//`BBc: IsFlowCtrl = TRUE;
+//`BEQI: IsFlowCtrl = TRUE;
+//`CHK: IsFlowCtrl = TRUE;
+//`JAL: IsFlowCtrl = TRUE;
+//`JMP: IsFlowCtrl = TRUE;
+//`CALL: IsFlowCtrl = TRUE;
+//`RET: IsFlowCtrl = TRUE;
+//default: IsFlowCtrl = FALSE;
+//endcase
+//end
+//endfunction
+
+// fnCanException
+//
+// Used by memory issue logic (stores).
+// Returns TRUE if the instruction can cause an exception.
+// In debug mode any instruction could potentially cause a breakpoint exception.
+// Rather than check all the addresses for potential debug exceptions it's
+// simpler to just have it so that all instructions could exception. This will
+// slow processing down somewhat as stores will only be done at the head of the
+// instruction queue, but it's debug mode so we probably don't care.
+//
+function fnCanException;
+input [47:0] isn;
+begin
+// ToDo add debug_on as input
+`ifdef SUPPORT_DBG
+if (debug_on)
+ fnCanException = `TRUE;
+else
+`endif
+case(isn[`INSTRUCTION_OP])
+`FLOAT:
+ case(isn[`INSTRUCTION_S2])
+ `FDIV,`FMUL,`FADD,`FSUB,`FTX:
+ fnCanException = `TRUE;
+ default: fnCanException = `FALSE;
+ endcase
+`DIVI,`MODI,`MULI:
+ fnCanException = `TRUE;
+`R2:
+ case(isn[`INSTRUCTION_S2])
+ `MUL,
+ `DIV,`MULSU,`DIVSU,
+ `MOD,`MODSU:
+ fnCanException = TRUE;
+ `RTI: fnCanException = TRUE;
+ default: fnCanException = FALSE;
+ endcase
+// Had branches that could exception if looping to self. But in a tight loop
+// it affects store performance.
+// -> A branch may only exception if it loops back to itself.
+`Bcc,`BBc,`BEQI: fnCanException = isn[7] ? brdisp == 11'h7FF : brdisp == 11'h7FE;
+`CHK: fnCanException = TRUE;
+default:
+// Stores can stil exception if there is a write buffer, but we allow following
+// stores to be issued by ignoring the fact they can exception because the stores
+// can be undone by invalidating the write buffer.
+`ifdef HAS_WB
+ fnCanException = IsMem(isn) && !IsStore(isn);
+`else
+ fnCanException = IsMem(isn);
+`endif
+endcase
+end
+endfunction
+
+function IsLoad;
+input [47:0] isn;
+case(isn[`INSTRUCTION_OP])
+`MEMNDX: IsLoad = !isn[31];
+`LB: IsLoad = TRUE;
+`LBU: IsLoad = TRUE;
+`Lx: IsLoad = TRUE;
+`LxU: IsLoad = TRUE;
+`LWR: IsLoad = TRUE;
+`LV: IsLoad = TRUE;
+`LVx: IsLoad = TRUE;
+`LVxU: IsLoad = TRUE;
+default: IsLoad = FALSE;
+endcase
+endfunction
+
+function IsVolatileLoad;
+input [47:0] isn;
+case(isn[`INSTRUCTION_OP])
+`MEMNDX:
+ if (isn[`INSTRUCTION_L2]==2'b00)
+ case({isn[31:28],isn[22:21]})
+ `LWRX: IsVolatileLoad = TRUE;
+ `LVBX: IsVolatileLoad = TRUE;
+ `LVBUX: IsVolatileLoad = TRUE;
+ `LVCX: IsVolatileLoad = TRUE;
+ `LVCUX: IsVolatileLoad = TRUE;
+ `LVHX: IsVolatileLoad = TRUE;
+ `LVHUX: IsVolatileLoad = TRUE;
+ `LVWX: IsVolatileLoad = TRUE;
+ default: IsVolatileLoad = FALSE;
+ endcase
+ else
+ IsVolatileLoad = FALSE;
+`LWR: IsVolatileLoad = TRUE;
+`LVx: IsVolatileLoad = TRUE;
+`LVxU: IsVolatileLoad = TRUE;
+default: IsVolatileLoad = FALSE;
+endcase
+endfunction
+
+function IsStore;
+input [47:0] isn;
+case(isn[`INSTRUCTION_OP])
+`MEMNDX:
+ if (isn[`INSTRUCTION_L2]==2'b10) begin
+ if (isn[31])
+ case({isn[31:28],isn[17:16]})
+ `PUSH: IsStore = TRUE;
+ default: IsStore = FALSE;
+ endcase
+ else
+ IsStore = FALSE;
+ end
+ else if (isn[`INSTRUCTION_L2]==2'b00)
+ case({isn[31:28],isn[17:16]})
+ `PUSH: IsStore = TRUE;
+ `SBX: IsStore = TRUE;
+ `SCX: IsStore = TRUE;
+ `SHX: IsStore = TRUE;
+ `SWX: IsStore = TRUE;
+ `SWCX: IsStore = TRUE;
+ `SVX: IsStore = TRUE;
+ `CASX: IsStore = TRUE;
+ `INC: IsStore = TRUE;
+ default: IsStore = FALSE;
+ endcase
+ else
+ IsStore = FALSE;
+`SB: IsStore = TRUE;
+`Sx: IsStore = TRUE;
+`SWC: IsStore = TRUE;
+`INC: IsStore = TRUE;
+`SV: IsStore = TRUE;
+`CAS: IsStore = TRUE;
+`AMO: IsStore = TRUE;
+default: IsStore = FALSE;
+endcase
+endfunction
+
+function IsPush;
+input [47:0] isn;
+case(isn[`INSTRUCTION_OP])
+`MEMNDX:
+ if (isn[`INSTRUCTION_L2]==2'b10) begin
+ if (isn[31])
+ case({isn[31:28],isn[17:16]})
+ `PUSH: IsPush = TRUE;
+ default: IsPush = FALSE;
+ endcase
+ else
+ IsPush = FALSE;
+ end
+ else if (isn[`INSTRUCTION_L2]==2'b00)
+ case({isn[31:28],isn[17:16]})
+ `PUSH: IsPush = TRUE;
+ default: IsPush = FALSE;
+ endcase
+ else
+ IsPush = FALSE;
+default: IsPush = FALSE;
+endcase
+endfunction
+
+function [0:0] IsMem;
+input [47:0] isn;
+case(isn[`INSTRUCTION_OP])
+`MEMNDX: IsMem = TRUE;
+`AMO: IsMem = TRUE;
+`LB: IsMem = TRUE;
+`LBU: IsMem = TRUE;
+`Lx: IsMem = TRUE;
+`LxU: IsMem = TRUE;
+`LWR: IsMem = TRUE;
+`LV,`SV: IsMem = TRUE;
+`INC: IsMem = TRUE;
+`SB: IsMem = TRUE;
+`Sx: IsMem = TRUE;
+`SWC: IsMem = TRUE;
+`CAS: IsMem = TRUE;
+`LVx: IsMem = TRUE;
+`LVxU: IsMem = TRUE;
+default: IsMem = FALSE;
+endcase
+endfunction
+
+function IsMemNdx;
+input [47:0] isn;
+case(isn[`INSTRUCTION_OP])
+`MEMNDX: IsMemNdx = TRUE;
+default: IsMemNdx = FALSE;
+endcase
+endfunction
+
+function [2:0] MemSize;
+input [47:0] isn;
+case(isn[`INSTRUCTION_OP])
+`MEMNDX:
+ if (isn[`INSTRUCTION_L2]==2'b00) begin
+ if (IsLoad(isn))
+ case({isn[31:28],isn[22:21]})
+ `LVBX,`LVBUX: MemSize = byt;
+ `LBX,`LBUX: MemSize = byt;
+ `LVCX,`LVCUX: MemSize = wyde;
+ `LCX,`LCUX: MemSize = wyde;
+ `LVHX,`LVHUX: MemSize = tetra;
+ `LHX: MemSize = tetra;
+ `LHUX: MemSize = tetra;
+ `LVWX: MemSize = octa;
+ `LWX: MemSize = octa;
+ `LWRX: MemSize = octa;
+ `LVX: MemSize = octa;
+ `LVx:
+ case(isn[20:18])
+ 3'd0,3'd1: MemSize = byt;
+ 3'd2,3'd3: MemSize = wyde;
+ 3'd4,3'd5: MemSize = tetra;
+ default: MemSize = octa;
+ endcase
+ default: MemSize = octa;
+ endcase
+ else
+ case({isn[31:28],isn[17:16]})
+ `SBX: MemSize = byt;
+ `SCX: MemSize = wyde;
+ `SHX: MemSize = tetra;
+ `SWX: MemSize = octa;
+ `SWCX: MemSize = octa;
+ `SVX: MemSize = octa;
+ default: MemSize = octa;
+ endcase
+ end
+ else
+ MemSize = octa;
+`LB,`LBU: MemSize = byt;
+`Lx,`LxU,`LVx,`LVxU:
+ casez(isn[20:18])
+ 3'b100: MemSize = octa;
+ 3'b?10: MemSize = tetra;
+ 3'b??1: MemSize = wyde;
+ default: MemSize = octa;
+ endcase
+`LWR: MemSize = octa;
+`LV: MemSize = octa;
+`AMO:
+ case(isn[23:21])
+ 3'd0: MemSize = byt;
+ 3'd1: MemSize = wyde;
+ 3'd2: MemSize = tetra;
+ 3'd3: MemSize = octa;
+ default: MemSize = octa;
+ endcase
+`SB: MemSize = byt;
+`Sx:
+ casez(isn[15:13])
+ 3'b100: MemSize = octa;
+ 3'b?10: MemSize = tetra;
+ 3'b??1: MemSize = wyde;
+ default: MemSize = octa;
+ endcase
+`SWC: MemSize = octa;
+`SV: MemSize = octa;
+default: MemSize = octa;
+endcase
+endfunction
+
+function IsCAS;
+input [47:0] isn;
+case(isn[`INSTRUCTION_OP])
+`MEMNDX:
+ if (isn[`INSTRUCTION_L2]==2'b00)
+ case({isn[31:28],isn[17:16]})
+ `CASX: IsCAS = TRUE;
+ default: IsCAS = FALSE;
+ endcase
+ else
+ IsCAS = FALSE;
+`CAS: IsCAS = TRUE;
+default: IsCAS = FALSE;
+endcase
+endfunction
+
+function IsAMO;
+input [47:0] isn;
+case(isn[`INSTRUCTION_OP])
+`AMO: IsAMO = TRUE;
+default: IsAMO = FALSE;
+endcase
+endfunction
+
+function IsInc;
+input [47:0] isn;
+case(isn[`INSTRUCTION_OP])
+`MEMNDX:
+ if (isn[`INSTRUCTION_L2]==2'b00)
+ case({isn[31:28],isn[17:16]})
+ `INCX: IsInc = TRUE;
+ default: IsInc = FALSE;
+ endcase
+ else
+ IsInc = FALSE;
+`INC: IsInc = TRUE;
+default: IsInc = FALSE;
+endcase
+endfunction
+
+function IsFSync;
+input [47:0] isn;
+IsFSync = (isn[`INSTRUCTION_OP]==`FLOAT && isn[`INSTRUCTION_L2]==2'b00 && isn[`INSTRUCTION_S2]==`FSYNC);
+endfunction
+
+function IsMemdb;
+input [47:0] isn;
+IsMemdb = (isn[`INSTRUCTION_OP]==`R2 && isn[`INSTRUCTION_L2]==2'b00 && isn[`INSTRUCTION_S2]==`R1 && isn[22:18]==`MEMDB);
+endfunction
+
+function IsMemsb;
+input [47:0] isn;
+IsMemsb = (isn[`INSTRUCTION_OP]==`RR && isn[`INSTRUCTION_L2]==2'b00 && isn[`INSTRUCTION_S2]==`R1 && isn[22:18]==`MEMSB);
+endfunction
+
+function IsSEI;
+input [47:0] isn;
+IsSEI = (isn[`INSTRUCTION_OP]==`R2 && isn[`INSTRUCTION_L2]==2'b00 && isn[`INSTRUCTION_S2]==`SEI);
+endfunction
+
+function IsShift48;
+input [47:0] isn;
+case(isn[`INSTRUCTION_OP])
+`R2:
+ if (isn[`INSTRUCTION_L2]==2'b01)
+ case(isn[47:42])
+ `SHIFTR: IsShift48 = TRUE;
+ default: IsShift48 = FALSE;
+ endcase
+ else
+ IsShift48 = FALSE;
+default: IsShift48 = FALSE;
+endcase
+endfunction
+
+function IsShift;
+input [47:0] isn;
+case(isn[`INSTRUCTION_OP])
+`R2:
+ if (isn[`INSTRUCTION_L2]==2'b00)
+ case(isn[31:26])
+ `SHIFTR: IsShift = TRUE;
+ `SHIFT31: IsShift = TRUE;
+ `SHIFT63: IsShift = TRUE;
+ default: IsShift = FALSE;
+ endcase
+ else
+ IsShift = FALSE;
+default: IsShift = FALSE;
+endcase
+endfunction
+
+function IsCmp;
+input [47:0] isn;
+case(isn[`INSTRUCTION_OP])
+`R2:
+ if (isn[`INSTRUCTION_L2]==2'b00)
+ case(isn[31:26])
+ `CMP: IsCmp = TRUE;
+ default: IsCmp = FALSE;
+ endcase
+ else
+ IsCmp = FALSE;
+`CMPI: IsCmp = TRUE;
+default: IsCmp = FALSE;
+endcase
+endfunction
+
+function IsLWRX;
+input [47:0] isn;
+case(isn[`INSTRUCTION_OP])
+`MEMNDX:
+ if (isn[`INSTRUCTION_L2]==2'b00)
+ case({isn[31:28],isn[22:21]})
+ `LWRX: IsLWRX = TRUE;
+ default: IsLWRX = FALSE;
+ endcase
+ else
+ IsLWRX = FALSE;
+default: IsLWRX = FALSE;
+endcase
+endfunction
+
+// Aquire / release bits are only available on indexed SWC / LWR
+function IsSWCX;
+input [47:0] isn;
+case(isn[`INSTRUCTION_OP])
+`MEMNDX:
+ if (isn[`INSTRUCTION_L2]==2'b00)
+ case({isn[31:28],isn[17:16]})
+ `SWCX: IsSWCX = TRUE;
+ default: IsSWCX = FALSE;
+ endcase
+ else
+ IsSWCX = FALSE;
+default: IsSWCX = FALSE;
+endcase
+endfunction
+
+function IsJmp;
+input [47:0] isn;
+IsJmp = isn[`INSTRUCTION_OP]==`JMP;
+endfunction
+
+// Really IsPredictableBranch
+// Does not include BccR's
+function IsBranch;
+input [47:0] isn;
+casez(isn[`INSTRUCTION_OP])
+`Bcc: IsBranch = TRUE;
+`BBc: IsBranch = TRUE;
+`BEQI: IsBranch = TRUE;
+`CHK: IsBranch = TRUE;
+default: IsBranch = FALSE;
+endcase
+endfunction
+
+function IsJAL;
+input [47:0] isn;
+IsJAL = isn[`INSTRUCTION_OP]==`JAL;
+endfunction
+
+function IsRet;
+input [47:0] isn;
+IsRet = isn[`INSTRUCTION_OP]==`RET;
+endfunction
+
+function IsIrq;
+input [47:0] isn;
+IsIrq = isn[`INSTRUCTION_OP]==`BRK && isn[25:21]==5'h0;
+endfunction
+
+function IsBrk;
+input [47:0] isn;
+IsBrk = isn[`INSTRUCTION_OP]==`BRK;
+endfunction
+
+function IsRti;
+input [47:0] isn;
+IsRti = isn[`INSTRUCTION_OP]==`RR && isn[`INSTRUCTION_S2]==`RTI;
+endfunction
+
+function IsSync;
+input [47:0] isn;
+IsSync = (isn[`INSTRUCTION_OP]==`R2 && isn[`INSTRUCTION_L2]==2'b00 && isn[`INSTRUCTION_S2]==`R1 && isn[22:18]==`SYNC) || IsRti(isn);
+endfunction
+
+// Has an extendable 14-bit constant
+function HasConst;
+input [47:0] isn;
+casez(isn[`INSTRUCTION_OP])
+`ADDI: HasConst = TRUE;
+`SLTI: HasConst = TRUE;
+`SLTUI: HasConst = TRUE;
+`SGTI: HasConst = TRUE;
+`SGTUI: HasConst = TRUE;
+`ANDI: HasConst = TRUE;
+`ORI: HasConst = TRUE;
+`XORI: HasConst = TRUE;
+`XNORI: HasConst = TRUE;
+`MULUI: HasConst = TRUE;
+`MULI: HasConst = TRUE;
+`MULFI: HasConst = TRUE;
+`DIVUI: HasConst = TRUE;
+`DIVI: HasConst = TRUE;
+`MODI: HasConst = TRUE;
+`LB: HasConst = TRUE;
+`LBU: HasConst = TRUE;
+`Lx: HasConst = TRUE;
+`LxU: HasConst = TRUE;
+`LWR: HasConst = TRUE;
+`LV: HasConst = TRUE;
+`SB: HasConst = TRUE;
+`Sx: HasConst = TRUE;
+`SWC: HasConst = TRUE;
+`INC: HasConst = TRUE;
+`SV: HasConst = TRUE;
+`CAS: HasConst = TRUE;
+`JAL: HasConst = TRUE;
+`CALL: HasConst = TRUE;
+`RET: HasConst = TRUE;
+`LVx: HasConst = TRUE;
+`LVxU: HasConst = TRUE;
+default: HasConst = FALSE;
+endcase
+endfunction
+
+function IsOddball;
+input [47:0] instr;
+//if (|iqentry_exc[head])
+// IsOddball = TRUE;
+//else
+case(instr[`INSTRUCTION_OP])
+`BRK: IsOddball = TRUE;
+`IVECTOR:
+ case(instr[`INSTRUCTION_S2])
+ `VSxx: IsOddball = TRUE;
+ default: IsOddball = FALSE;
+ endcase
+`RR:
+ case(instr[`INSTRUCTION_S2])
+ `VMOV: IsOddball = TRUE;
+ `SEI,`RTI: IsOddball = TRUE;
+ default: IsOddball = FALSE;
+ endcase
+`MEMNDX:
+ case({instr[31:28],instr[17:16]})
+ `CACHEX: IsOddball = TRUE;
+ default: IsOddball = FALSE;
+ endcase
+`CSRRW,`REX,`CACHE,`FLOAT: IsOddball = TRUE;
+default: IsOddball = FALSE;
+endcase
+endfunction
+
+function IsRFW;
+input [47:0] isn;
+casez(isn[`INSTRUCTION_OP])
+`IVECTOR: IsRFW = TRUE;
+`FVECTOR: IsRFW = TRUE;
+`R2:
+ if (isn[`INSTRUCTION_L2]==2'b00)
+ casez(isn[`INSTRUCTION_S2])
+ `TLB: IsRFW = TRUE;
+ `R1:
+ case(isn[22:18])
+ `MEMDB,`MEMSB,`SYNC,`SETWB,5'h14,5'h15: IsRFW = FALSE;
+ default: IsRFW = TRUE;
+ endcase
+ `ADD: IsRFW = TRUE;
+ `SUB: IsRFW = TRUE;
+ `SLT: IsRFW = TRUE;
+ `SLTU: IsRFW = TRUE;
+ `SLE: IsRFW = TRUE;
+ `SLEU: IsRFW = TRUE;
+ `AND: IsRFW = TRUE;
+ `OR: IsRFW = TRUE;
+ `XOR: IsRFW = TRUE;
+ `NAND: IsRFW = TRUE;
+ `NOR: IsRFW = TRUE;
+ `XNOR: IsRFW = TRUE;
+ `MULU: IsRFW = TRUE;
+ `MULSU: IsRFW = TRUE;
+ `MUL: IsRFW = TRUE;
+ `MULUH: IsRFW = TRUE;
+ `MULSUH: IsRFW = TRUE;
+ `MULH: IsRFW = TRUE;
+ `MULF: IsRFW = TRUE;
+ `FXMUL: IsRFW = TRUE;
+ `DIVU: IsRFW = TRUE;
+ `DIVSU: IsRFW = TRUE;
+ `DIV:IsRFW = TRUE;
+ `MODU: IsRFW = TRUE;
+ `MODSU: IsRFW = TRUE;
+ `MOD:IsRFW = TRUE;
+ `MOV: IsRFW = TRUE;
+ `VMOV: IsRFW = TRUE;
+ `SHIFTR,`SHIFT31,`SHIFT63:
+ IsRFW = TRUE;
+ `MIN,`MAX: IsRFW = TRUE;
+ `SEI: IsRFW = TRUE;
+ default: IsRFW = FALSE;
+ endcase
+ else if (isn[`INSTRUCTION_L2]==2'b01)
+ case(isn[47:42])
+ `CMOVEZ: IsRFW = TRUE;
+ `CMOVNZ: IsRFW = TRUE;
+ default: IsRFW = FALSE;
+ endcase
+ else if (isn[7]==1'b1)
+ casez(isn[`INSTRUCTION_S2])
+ `ADD: IsRFW = TRUE;
+ `SUB: IsRFW = TRUE;
+ `AND: IsRFW = TRUE;
+ `OR: IsRFW = TRUE;
+ `XOR: IsRFW = TRUE;
+ `MOV: IsRFW = TRUE;
+ `SHIFTR,`SHIFT31,`SHIFT63:
+ IsRFW = TRUE;
+ default: IsRFW = FALSE;
+ endcase
+ else
+ IsRFW = FALSE;
+`MEMNDX:
+ if (isn[`INSTRUCTION_L2]==2'b10) begin
+ if (IsLoad(isn))
+ IsRFW = TRUE;
+ else
+ case({isn[31:28],isn[17:16]})
+ `PUSH: IsRFW = TRUE;
+ `CASX: IsRFW = TRUE;
+ default: IsRFW = FALSE;
+ endcase
+ end
+ else if (isn[`INSTRUCTION_L2]==2'b00) begin
+ if (IsLoad(isn))
+ case({isn[31:28],isn[22:21]})
+ `LBX: IsRFW = TRUE;
+ `LBUX: IsRFW = TRUE;
+ `LCX: IsRFW = TRUE;
+ `LCUX: IsRFW = TRUE;
+ `LHX: IsRFW = TRUE;
+ `LHUX: IsRFW = TRUE;
+ `LWX: IsRFW = TRUE;
+ `LVBX: IsRFW = TRUE;
+ `LVBUX: IsRFW = TRUE;
+ `LVCX: IsRFW = TRUE;
+ `LVCUX: IsRFW = TRUE;
+ `LVHX: IsRFW = TRUE;
+ `LVHUX: IsRFW = TRUE;
+ `LVWX: IsRFW = TRUE;
+ `LWX: IsRFW = TRUE;
+ `LWRX: IsRFW = TRUE;
+ `LVX: IsRFW = TRUE;
+ default: IsRFW = FALSE;
+ endcase
+ else
+ case({isn[31:28],isn[17:16]})
+ `PUSH: IsRFW = TRUE;
+ `CASX: IsRFW = TRUE;
+ default: IsRFW = FALSE;
+ endcase
+ end
+ else
+ IsRFW = FALSE;
+`BBc: IsRFW = FALSE;
+`BITFIELD: IsRFW = TRUE;
+`ADDI: IsRFW = TRUE;
+`SLTI: IsRFW = TRUE;
+`SLTUI: IsRFW = TRUE;
+`SGTI: IsRFW = TRUE;
+`SGTUI: IsRFW = TRUE;
+`ANDI: IsRFW = TRUE;
+`ORI: IsRFW = TRUE;
+`XORI: IsRFW = TRUE;
+`XNORI: IsRFW = TRUE;
+`MULUI: IsRFW = TRUE;
+`MULI: IsRFW = TRUE;
+`MULFI: IsRFW = TRUE;
+`DIVUI: IsRFW = TRUE;
+`DIVI: IsRFW = TRUE;
+`MODI: IsRFW = TRUE;
+`JAL: IsRFW = TRUE;
+`CALL: IsRFW = TRUE;
+`RET: IsRFW = TRUE;
+`LB: IsRFW = TRUE;
+`LBU: IsRFW = TRUE;
+`Lx: IsRFW = TRUE;
+`LxU: IsRFW = TRUE;
+`LWR: IsRFW = TRUE;
+`LV: IsRFW = TRUE;
+`LVx: IsRFW = TRUE;
+`LVxU: IsRFW = TRUE;
+`CAS: IsRFW = TRUE;
+`AMO: IsRFW = TRUE;
+`CSRRW: IsRFW = TRUE;
+`AUIPC: IsRFW = TRUE;
+`LUI: IsRFW = TRUE;
+default: IsRFW = FALSE;
+endcase
+endfunction
+
+// Determines which lanes of the target register get updated.
+function [7:0] fnWe;
+input [47:0] isn;
+casez(isn[`INSTRUCTION_OP])
+`R2:
+ case(isn[`INSTRUCTION_S2])
+ `CMP: fnWe = 8'h00; // CMP sets predicate registers so doesn't update general register file.
+ default: fnWe = 8'hFF;
+ endcase
+`CMPI: fnWe = 8'h00;
+default: fnWe = 8'hFF;
+endcase
+/*
+casez(isn[`INSTRUCTION_OP])
+`R2:
+ case(isn[`INSTRUCTION_S2])
+ `R1:
+ case(isn[22:18])
+ `ABS,`CNTLZ,`CNTLO,`CNTPOP:
+ case(isn[25:23])
+ 3'b000: fnWe = 8'h01;
+ 3'b001: fnWe = 8'h03;
+ 3'b010: fnWe = 8'h0F;
+ 3'b011: fnWe = 8'hFF;
+ default: fnWe = 8'hFF;
+ endcase
+ default: fnWe = 8'hFF;
+ endcase
+ `SHIFT31: fnWe = (~isn[25] & isn[21]) ? 8'hFF : 8'hFF;
+ `SHIFT63: fnWe = (~isn[25] & isn[21]) ? 8'hFF : 8'hFF;
+ `SLT,`SLTU,`SLE,`SLEU,
+ `ADD,`SUB,
+ `AND,`OR,`XOR,
+ `NAND,`NOR,`XNOR,
+ `DIV,`DIVU,`DIVSU,
+ `MOD,`MODU,`MODSU,
+ `MUL,`MULU,`MULSU,
+ `MULH,`MULUH,`MULSUH,
+ `FXMUL:
+ case(isn[25:23])
+ 3'b000: fnWe = 8'h01;
+ 3'b001: fnWe = 8'h03;
+ 3'b010: fnWe = 8'h0F;
+ 3'b011: fnWe = 8'hFF;
+ default: fnWe = 8'hFF;
+ endcase
+ default: fnWe = 8'hFF;
+ endcase
+default: fnWe = 8'hFF;
+endcase
+*/
+endfunction
+
+// Detect if a source is automatically valid
+function Source1Valid;
+input [47:0] isn;
+casez(isn[`INSTRUCTION_OP])
+`BRK: Source1Valid = TRUE;
+`Bcc: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`BBc: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`BEQI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`CHK: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`R2: case(isn[`INSTRUCTION_S2])
+ `SHIFT31: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+ `SHIFT63: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+ `SHIFTR: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+ default: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+ endcase
+`MEMNDX: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`ADDI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`SLTI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`SLTUI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`SGTI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`SGTUI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`ANDI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`ORI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`XORI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`XNORI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`MULUI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`AMO: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`LB: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`LBU: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`Lx: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`LxU: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`LWR: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`LV: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`LVx: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`SB: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`Sx: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`SWC: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`SV: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`INC: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`CAS: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`JAL: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`RET: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`CSRRW: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`BITFIELD: case(isn[31:28])
+ `BFINSI: Source1Valid = TRUE;
+ default: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+ endcase
+`IVECTOR:
+ Source1Valid = FALSE;
+default: Source1Valid = TRUE;
+endcase
+endfunction
+
+function Source2Valid;
+input [47:0] isn;
+casez(isn[`INSTRUCTION_OP])
+`BRK: Source2Valid = TRUE;
+`Bcc: Source2Valid = isn[`INSTRUCTION_RB]==5'd0;
+`BBc: Source2Valid = TRUE;
+`BEQI: Source2Valid = TRUE;
+`CHK: Source2Valid = isn[`INSTRUCTION_RB]==5'd0;
+`R2: case(isn[`INSTRUCTION_S2])
+ `R1: Source2Valid = TRUE;
+ `SHIFTR: Source2Valid = isn[25] ? 1'b1 : isn[`INSTRUCTION_RB]==5'd0;
+ `SHIFT31: Source2Valid = isn[25] ? 1'b1 : isn[`INSTRUCTION_RB]==5'd0;
+ `SHIFT63: Source2Valid = isn[25] ? 1'b1 : isn[`INSTRUCTION_RB]==5'd0;
+ default: Source2Valid = isn[`INSTRUCTION_RB]==5'd0;
+ endcase
+`MEMNDX:
+ if (IsLoad(isn))
+ case({isn[31:28],isn[22:21]})
+ `LVX: Source2Valid = FALSE;
+ default: Source2Valid = isn[`INSTRUCTION_RB]==5'd0;
+ endcase
+ else
+ case({isn[31:28],isn[17:16]})
+ `SVX: Source2Valid = FALSE;
+ default: Source2Valid = isn[`INSTRUCTION_RB]==5'd0;
+ endcase
+`ADDI: Source2Valid = TRUE;
+`SLTI: Source2Valid = TRUE;
+`SLTUI: Source2Valid = TRUE;
+`SGTI: Source2Valid = TRUE;
+`SGTUI: Source2Valid = TRUE;
+`ANDI: Source2Valid = TRUE;
+`ORI: Source2Valid = TRUE;
+`XORI: Source2Valid = TRUE;
+`XNORI: Source2Valid = TRUE;
+`MULUI: Source2Valid = TRUE;
+`LB: Source2Valid = TRUE;
+`LBU: Source2Valid = TRUE;
+`Lx: Source2Valid = TRUE;
+`LxU: Source2Valid = TRUE;
+`LWR: Source2Valid = TRUE;
+`LVx: Source2Valid = TRUE;
+`INC: Source2Valid = TRUE;
+`SB: Source2Valid = isn[`INSTRUCTION_RB]==5'd0;
+`Sx: Source2Valid = isn[`INSTRUCTION_RB]==5'd0;
+`SWC: Source2Valid = isn[`INSTRUCTION_RB]==5'd0;
+`CAS: Source2Valid = isn[`INSTRUCTION_RB]==5'd0;
+`JAL: Source2Valid = TRUE;
+`RET: Source2Valid = isn[`INSTRUCTION_RB]==5'd0;
+`IVECTOR:
+ case(isn[`INSTRUCTION_S2])
+ `VABS: Source2Valid = TRUE;
+ `VMAND,`VMOR,`VMXOR,`VMXNOR,`VMPOP:
+ Source2Valid = FALSE;
+ `VADDS,`VSUBS,`VANDS,`VORS,`VXORS:
+ Source2Valid = isn[`INSTRUCTION_RB]==5'd0;
+ `VBITS2V: Source2Valid = TRUE;
+ `V2BITS: Source2Valid = isn[`INSTRUCTION_RB]==5'd0;
+ `VSHL,`VSHR,`VASR: Source2Valid = isn[22:21]==2'd2;
+ default: Source2Valid = FALSE;
+ endcase
+`LV: Source2Valid = TRUE;
+`SV: Source2Valid = FALSE;
+`AMO: Source2Valid = isn[31] || isn[`INSTRUCTION_RB]==5'd0;
+default: Source2Valid = TRUE;
+endcase
+endfunction
+
+function Source3Valid;
+input [47:0] isn;
+case(isn[`INSTRUCTION_OP])
+`IVECTOR:
+ case(isn[`INSTRUCTION_S2])
+ `VEX: Source3Valid = TRUE;
+ default: Source3Valid = TRUE;
+ endcase
+`CHK: Source3Valid = isn[`INSTRUCTION_RC]==5'd0;
+`R2:
+ if (isn[`INSTRUCTION_L2]==2'b01)
+ case(isn[47:42])
+ `CMOVEZ,`CMOVNZ: Source3Valid = isn[`INSTRUCTION_RC]==5'd0;
+ default: Source3Valid = TRUE;
+ endcase
+ else
+ case(isn[`INSTRUCTION_S2])
+ `MAJ: Source3Valid = isn[`INSTRUCTION_RC]==5'd0;
+ default: Source3Valid = TRUE;
+ endcase
+`MEMNDX:
+ if (isn[`INSTRUCTION_L2]==2'b00)
+ case({isn[31:28],isn[17:16]})
+ `SBX: Source3Valid = isn[`INSTRUCTION_RC]==5'd0;
+ `SCX: Source3Valid = isn[`INSTRUCTION_RC]==5'd0;
+ `SHX: Source3Valid = isn[`INSTRUCTION_RC]==5'd0;
+ `SWX: Source3Valid = isn[`INSTRUCTION_RC]==5'd0;
+ `SWCX: Source3Valid = isn[`INSTRUCTION_RC]==5'd0;
+ `CASX: Source3Valid = isn[`INSTRUCTION_RC]==5'd0;
+ default: Source3Valid = TRUE;
+ endcase
+ else
+ Source3Valid = TRUE;
+default: Source3Valid = TRUE;
+endcase
+endfunction
+
+wire isRet = IsRet(instr);
+wire isJal = IsJAL(instr);
+wire isBrk = IsBrk(instr);
+wire isRti = IsRti(instr);
+
+`ifdef REGISTER_DECODE
+always @(posedge clk)
+`else
+always @*
+`endif
+begin
+ bus <= 144'h0;
+ bus[`IB_CMP] <= IsCmp(instr);
+ if (IsStore(instr))
+ bus[`IB_CONST] <= instr[6]==1'b1 ? {{34{instr[47]}},instr[47:23],instr[17:13]} :
+ {{50{instr[31]}},instr[31:23],instr[17:13]};
+ else
+ bus[`IB_CONST] <= instr[6]==1'b1 ? {{34{instr[47]}},instr[47:18]} :
+ {{50{instr[31]}},instr[31:18]};
+`ifdef SUPPORT_DCI
+ if (instr[`INSTRUCTION_OP]==`CMPRSSD)
+ bus[`IB_LN] <= 3'd2 | pred_on;
+ else
+`endif
+ case(instr[7:6])
+ 2'b00: bus[`IB_LN] <= 3'd4 | pred_on;
+ 2'b01: bus[`IB_LN] <= 3'd6 | pred_on;
+ default: bus[`IB_LN] <= 3'd2 | pred_on;
+ endcase
+// bus[`IB_RT] <= fnRt(instr,ven,vl,thrd) | {thrd,7'b0};
+// bus[`IB_RC] <= fnRc(instr,ven,thrd) | {thrd,7'b0};
+// bus[`IB_RA] <= fnRa(instr,ven,vl,thrd) | {thrd,7'b0};
+ bus[`IB_IMM] <= HasConst(instr);
+// bus[`IB_A3V] <= Source3Valid(instr);
+// bus[`IB_A2V] <= Source2Valid(instr);
+// bus[`IB_A1V] <= Source1Valid(instr);
+ bus[`IB_TLB] <= IsTLB(instr);
+ bus[`IB_SZ] <= instr[`INSTRUCTION_OP]==`R2 ? instr[25:23] : 3'd3; // 3'd3=word size
+ bus[`IB_IRQ] <= IsIrq(instr);
+ bus[`IB_BRK] <= isBrk;
+ bus[`IB_RTI] <= isRti;
+ bus[`IB_RET] <= isRet;
+ bus[`IB_JAL] <= isJal;
+ // IB_BT is now used to indicate when to update the branch target buffer.
+ // This occurs when one of the instructions with an unknown or calculated
+ // target is present.
+ bus[`IB_BT] <= isJal | isRet | isBrk | isRti;
+ bus[`IB_ALU] <= IsALU;
+ bus[`IB_ALU0] <= IsAlu0Only(instr);
+ bus[`IB_FPU] <= IsFPU(instr);
+ bus[`IB_FC] <= IsFlowCtrl;
+ bus[`IB_CANEX] <= fnCanException(instr);
+ bus[`IB_LOADV] <= IsVolatileLoad(instr);
+ bus[`IB_LOAD] <= IsLoad(instr);
+ bus[`IB_PRELOAD] <= IsLoad(instr) && Rt==5'd0;
+ bus[`IB_STORE] <= IsStore(instr);
+ bus[`IB_PUSH] <= IsPush(instr);
+ bus[`IB_ODDBALL] <= IsOddball(instr);
+ bus[`IB_MEMSZ] <= MemSize(instr);
+ bus[`IB_MEM] <= IsMem(instr);
+ bus[`IB_MEMNDX] <= IsMemNdx(instr);
+ bus[`IB_RMW] <= IsCAS(instr) || IsAMO(instr) || IsInc(instr);
+ bus[`IB_MEMDB] <= IsMemdb(instr);
+ bus[`IB_MEMSB] <= IsMemsb(instr);
+ bus[`IB_SHFT] <= IsShift48(instr);//|IsShift(instr);
+ bus[`IB_SEI] <= IsSEI(instr);
+ bus[`IB_AQ] <= (IsAMO(instr)|IsLWRX(instr)|IsSWCX(instr)) & instr[25];
+ bus[`IB_RL] <= (IsAMO(instr)|IsLWRX(instr)|IsSWCX(instr)) & instr[24];
+ bus[`IB_JMP] <= IsJmp(instr);
+ bus[`IB_BR] <= IsBranch(instr);
+ bus[`IB_SYNC] <= IsSync(instr)||IsBrk(instr)||IsRti(instr);
+ bus[`IB_FSYNC] <= IsFSync(instr);
+ bus[`IB_RFW] <= (Rt==5'd0) ? 1'b0 : IsRFW(instr);// && !IsCmp(instr);
+ bus[`IB_PRFW] <= IsCmp(instr);
+ bus[`IB_WE] <= fnWe(instr);
+ id_o <= id_i;
+ idv_o <= idv_i;
+end
+
+endmodule
+
+module mIsALU(instr, IsALU);
+input [47:0] instr;
+output reg IsALU;
+parameter TRUE = 1'b1;
+parameter FALSE = 1'b0;
+
+always @*
+casez(instr[`INSTRUCTION_OP])
+`R2:
+ if (instr[`INSTRUCTION_L2]==2'b00)
+ case(instr[`INSTRUCTION_S2])
+ `VMOV: IsALU = TRUE;
+ `RTI: IsALU = FALSE;
+ default: IsALU = TRUE;
+ endcase
+ else
+ IsALU = TRUE;
+`BRK: IsALU = FALSE;
+`Bcc: IsALU = FALSE;
+`BBc: IsALU = FALSE;
+`BEQI: IsALU = FALSE;
+`CHK: IsALU = FALSE;
+`JAL: IsALU = FALSE;
+`JMP: IsALU = FALSE;
+`CALL: IsALU = FALSE;
+`RET: IsALU = FALSE;
+`FVECTOR:
+ case(instr[`INSTRUCTION_S2])
+ `VSHL,`VSHR,`VASR: IsALU = TRUE;
+ default: IsALU = FALSE; // Integer
+ endcase
+`IVECTOR:
+ case(instr[`INSTRUCTION_S2])
+ `VSHL,`VSHR,`VASR: IsALU = TRUE;
+ default: IsALU = TRUE; // Integer
+ endcase
+`FLOAT: IsALU = FALSE;
+default: IsALU = TRUE;
+endcase
+
+endmodule
Index: FT64v7/rtl/common/FT64_iexpander.v
===================================================================
--- FT64v7/rtl/common/FT64_iexpander.v (nonexistent)
+++ FT64v7/rtl/common/FT64_iexpander.v (revision 60)
@@ -0,0 +1,406 @@
+// ============================================================================
+// __
+// \\__/ o\ (C) 2017-2018 Robert Finch, Waterloo
+// \ __ / All rights reserved.
+// \/_// robfinch@finitron.ca
+// ||
+//
+// FT64_iexpander.v
+//
+// This source file is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published
+// by the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This source file is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+//
+// ============================================================================
+//
+`include ".\FT64_defines.vh"
+
+module FT64_iexpander(cinstr,expand);
+input [15:0] cinstr;
+output reg [47:0] expand;
+
+// Maps a subset of registers for compressed instructions.
+function [4:0] fnRp;
+input [2:0] rg;
+case(rg)
+3'd0: fnRp = 5'd1; // return value 0
+3'd1: fnRp = 5'd3; // temp
+3'd2: fnRp = 5'd4; // temp
+3'd3: fnRp = 5'd11; // regvar
+3'd4: fnRp = 5'd12; // regvar
+3'd5: fnRp = 5'd18; // arg1
+3'd6: fnRp = 5'd19; // arg2
+3'd7: fnRp = 5'd20; // arg3
+endcase
+endfunction
+
+always @*
+casez({cinstr[15:12],cinstr[6]})
+5'b00000: // NOP / ADDI
+ case(cinstr[4:0])
+ 5'd31: begin
+ expand[47:32] = 16'h0000;
+ expand[31:18] = {{6{cinstr[11]}},cinstr[11:8],cinstr[5],3'b0};
+ expand[17:13] = cinstr[4:0];
+ expand[12:8] = cinstr[4:0];
+ expand[7:6] = 2'b10;
+ expand[5:0] = `ADDI;
+ end
+ default:
+ begin
+ expand[47:32] = 16'h0000;
+ expand[31:18] = {{9{cinstr[11]}},cinstr[11:8],cinstr[5]};
+ expand[17:13] = cinstr[4:0];
+ expand[12:8] = cinstr[4:0];
+ expand[7:6] = 2'b10;
+ expand[5:0] = `ADDI;
+ end
+ endcase
+5'b00010: // SYS
+ if (cinstr[4:0]==5'd0) begin
+ expand[47:32] = 16'h0000;
+ expand[5:0] = `BRK;
+ expand[7:6] = 2'b10;
+ expand[15:8] = {3'd1,cinstr[11:8],cinstr[5]};
+ expand[16] = 1'b0;
+ expand[20:17] = 4'd0;
+ expand[23:21] = 3'd1;
+ expand[31:24] = 8'd0;
+ end
+ // LDI
+ else begin
+ expand[47:32] = 16'h0000;
+ expand[31:18] = {{9{cinstr[11]}},cinstr[11:8],cinstr[5]};
+ expand[17:13] = cinstr[4:0];
+ expand[12:8] = 5'd0;
+ expand[7:6] = 2'b10;
+ expand[5:0] = `ADDI; // ADDI to sign extend
+ end
+5'b00100: // RET / ANDI
+ if (cinstr[4:0]==5'd0) begin
+ expand[47:32] = 16'h0000;
+ expand[31:23] = {4'd0,cinstr[11:8],cinstr[5]};
+ expand[22:18] = 5'd29;
+ expand[17:13] = 5'd31;
+ expand[12:8] = 5'd31;
+ expand[7:6] = 2'b10;
+ expand[5:0] = `RET;
+ end
+ else begin
+ expand[47:32] = 16'h0000;
+ expand[5:0] = `ANDI;
+ expand[7:6] = 2'b10;
+ expand[12:8] = cinstr[4:0];
+ expand[17:13] = cinstr[4:0];
+ expand[31:18] = {{11{cinstr[11]}},cinstr[11:8],cinstr[5]};
+ end
+5'b00110: // SHLI
+ begin
+ expand[47:32] = 16'h0000;
+ expand[31:26] = 6'h0F; // immediate mode 0-31
+ expand[25:23] = 3'd0; // SHL
+ expand[22:18] = {cinstr[11:8],cinstr[5]}; // amount
+ expand[17:13] = cinstr[4:0];
+ expand[12:8] = cinstr[4:0];
+ expand[7:6] = 2'b10;
+ expand[5:0] = 8'h02; // R2 instruction
+ end
+5'b01000:
+ case(cinstr[5:4])
+ 2'd0: // SHRI
+ begin
+ expand[47:32] = 16'h0000;
+ expand[31:26] = 6'h0F; // shift immediate 0-31
+ expand[25:23] = 3'd1; // SHR
+ expand[22:18] = {cinstr[11:8],cinstr[3]}; // amount
+ expand[17:13] = fnRp(cinstr[2:0]);
+ expand[12:8] = fnRp(cinstr[2:0]);
+ expand[7:6] = 2'b10;
+ expand[5:0] = 8'h02; // R2 instruction
+ end
+ 2'd1: // ASRI
+ begin
+ expand[47:32] = 16'h0000;
+ expand[31:26] = 6'h0F; // shift immediate 0-31
+ expand[25:23] = 3'd3; // ASR
+ expand[22:18] = {cinstr[11:8],cinstr[3]}; // amount
+ expand[17:13] = fnRp(cinstr[2:0]);
+ expand[12:8] = fnRp(cinstr[2:0]);
+ expand[7:6] = 2'b10;
+ expand[5:0] = 8'h02; // R2 instruction
+ end
+ 2'd2: // ORI
+ begin
+ expand[47:32] = 16'h0000;
+ expand[31:18] = {{9{cinstr[11]}},cinstr[11:8],cinstr[3]};
+ expand[17:13] = fnRp(cinstr[2:0]);
+ expand[12:8] = fnRp(cinstr[2:0]);
+ expand[7:6] = 2'b10;
+ expand[5:0] = `ORI;
+ end
+ 2'd3:
+ case(cinstr[11:10])
+ 2'd0: begin
+ expand[47:32] = 16'h0000;
+ expand[31:26] = `SUB;
+ expand[25:23] = 3'b011; // word size
+ expand[22:18] = fnRp({cinstr[9:8],cinstr[3]});
+ expand[17:13] = fnRp(cinstr[2:0]);
+ expand[12:8] = fnRp(cinstr[2:0]);
+ expand[7:6] = 2'b10;
+ expand[5:0] = 8'h02; // R2 instruction
+ end
+ 2'd1: begin
+ expand[47:32] = 16'h0000;
+ expand[31:26] = `AND;
+ expand[25:23] = 3'b011; // word size
+ expand[22:18] = fnRp({cinstr[9:8],cinstr[3]});
+ expand[17:13] = fnRp(cinstr[2:0]);
+ expand[12:8] = fnRp(cinstr[2:0]);
+ expand[7:6] = 2'b10;
+ expand[5:0] = 8'h02; // R2 instruction
+ end
+ 2'd2: begin
+ expand[47:32] = 16'h0000;
+ expand[31:26] = `OR;
+ expand[25:23] = 3'b011; // word size
+ expand[22:18] = fnRp({cinstr[9:8],cinstr[3]});
+ expand[17:13] = fnRp(cinstr[2:0]);
+ expand[12:8] = fnRp(cinstr[2:0]);
+ expand[7:6] = 2'b10;
+ expand[5:0] = 8'h02; // R2 instruction
+ end
+ 2'd3: begin
+ expand[47:32] = 16'h0000;
+ expand[31:26] = `XOR;
+ expand[25:23] = 3'b011; // word size
+ expand[22:18] = fnRp({cinstr[9:8],cinstr[3]});
+ expand[17:13] = fnRp(cinstr[2:0]);
+ expand[12:8] = fnRp(cinstr[2:0]);
+ expand[7:6] = 2'b10;
+ expand[5:0] = 8'h02; // R2 instruction
+ end
+ endcase
+ endcase
+5'b01110:
+ begin
+ expand[47:32] = 16'h0000;
+ expand[31:23] = {{1{cinstr[11]}},{cinstr[11:8],cinstr[5:2]}};
+ expand[22:18] = 5'd0; // Rb = 0
+ expand[17:16] = cinstr[1:0];
+ expand[15:13] = 3'd0; // BEQ
+ expand[12:8] = 5'd0; // r0==r0
+ expand[7:6] = 2'b10;
+ expand[5:0] = `Bcc; // 0x38
+ end
+5'b10??0:
+ begin
+ expand[47:32] = 16'h0000;
+ expand[31:23] = {{4{cinstr[13]}},cinstr[13:9]};
+ expand[22:18] = 5'd0; // r0
+ expand[17:16] = {cinstr[8],cinstr[5]};
+ expand[15:13] = 3'd0; // BEQ
+ expand[12:8] = cinstr[4:0]; // Ra
+ expand[7:6] = 2'b10;
+ expand[5:0] = `Bcc;
+ end
+5'b11??0:
+ begin
+ expand[47:32] = 16'h0000;
+ expand[31:23] = {{4{cinstr[13]}},cinstr[13:9]};
+ expand[22:18] = 5'd0; // r0
+ expand[17:16] = {cinstr[8],cinstr[5]};
+ expand[15:13] = 3'd1; // BNE
+ expand[12:8] = cinstr[4:0]; // Ra
+ expand[7:6] = 2'b10;
+ expand[5:0] = `Bcc;
+ end
+5'b00001:
+ begin
+ expand[47:32] = 16'h0000;
+ expand[31:26] = `MOV; // `MOV is 6'b01001?
+ expand[26] = 1'b0;
+ expand[25:23] = 3'd7; // move current to current
+ expand[22:18] = 5'd0; // register set (ignored)
+ expand[17:13] = {cinstr[11:8],cinstr[5]};
+ expand[12:8] = cinstr[4:0];
+ expand[7:6] = 2'b10;
+ expand[5:0] = 6'h02;
+ end
+5'b00011: // ADD
+ begin
+ expand[47:32] = 16'h0000;
+ expand[31:26] = `ADD;
+ expand[27:23] = 3'b011; // word size
+ expand[22:18] = {cinstr[11:8],cinstr[5]};
+ expand[17:13] = cinstr[4:0];
+ expand[12:8] = cinstr[4:0];
+ expand[7:6] = 2'b10;
+ expand[5:0] = 6'h02; // R2 instruction
+ end
+5'b00101: // JALR
+ begin
+ expand[47:32] = 16'h0000;
+ expand[31:18] = 14'd0;
+ expand[17:13] = {cinstr[11:8],cinstr[5]};
+ expand[12:8] = cinstr[4:0];
+ expand[7:6] = 2'b10;
+ expand[5:0] = `JAL;
+ end
+5'b00111:
+ if ({cinstr[11:8]==4'h1}) begin
+ expand[47:32] = 16'h0000;
+ expand[31:26] = 6'h36; // SEG instructions
+ expand[22:18] = {2'b0,cinstr[2:0]};
+ expand[17:13] = 5'd0; // no target
+ expand[12:8] = 5'd0;
+ expand[7:6] = 2'b10;
+ expand[5:0] = 6'h02;
+ end
+ else if ({cinstr[11:8],cinstr[5]}==5'b0) begin // PUSH
+ expand[47:32] = 16'h0000;
+ expand[31:28] = 4'hC;
+ expand[27:23] = 5'd0;
+ expand[22:18] = cinstr[4:0];
+ expand[17:13] = 5'd31;
+ expand[12:8] = 5'd31;
+ expand[7:6] = 2'b10;
+ expand[5:0] = `MEMNDX;
+ end
+ else begin
+ expand[47:8] = 40'd0;
+ expand[7:6] = 2'b10;
+ expand[5:0] = `NOP;
+ end
+5'b01001: // LH Rt,d[SP]
+ begin
+ expand[47:32] = 16'h0000;
+ expand[31:18] = {{7{cinstr[11]}},cinstr[11:8],cinstr[5],2'd2};
+ expand[17:13] = {cinstr[4:0]};
+ expand[12:8] = 5'd31;
+ expand[7:6] = 2'b10;
+ expand[5:0] = `Lx;
+ end
+5'b01011: // LW Rt,d[SP]
+ begin
+ expand[47:32] = 16'h0000;
+ expand[31:18] = {{6{cinstr[11]}},cinstr[11:8],cinstr[5],3'd4};
+ expand[17:13] = cinstr[4:0];
+ expand[12:8] = 5'd31;
+ expand[7:6] = 2'b10;
+ expand[5:0] = `Lx;
+ end
+5'b01101: // LH Rt,d[fP]
+ begin
+ expand[47:32] = 16'h0000;
+ expand[31:18] = {{7{cinstr[11]}},cinstr[11:8],cinstr[5],2'd2};
+ expand[17:13] = cinstr[4:0];
+ expand[12:8] = 5'd30;
+ expand[7:6] = 2'b10;
+ expand[5:0] = `Lx;
+ end
+5'b01111: // LW Rt,d[FP]
+ begin
+ expand[47:32] = 16'h0000;
+ expand[31:18] = {{6{cinstr[11]}},cinstr[11:8],cinstr[5],3'd4};
+ expand[17:13] = cinstr[4:0];
+ expand[12:8] = 5'd30;
+ expand[7:6] = 2'b10;
+ expand[5:0] = `Lx;
+ end
+5'b10001: // SH Rt,d[SP]
+ begin
+ expand[47:32] = 16'h0000;
+ expand[31:23] = {{7{cinstr[11]}},cinstr[11:10]};
+ expand[22:18] = cinstr[4:0];
+ expand[17:13] = {cinstr[9:8],cinstr[5],2'd2};
+ expand[12:8] = 5'd31;
+ expand[7:6] = 2'b10;
+ expand[5:0] = `Sx;
+ end
+5'b10011: // SW Rt,d[SP]
+ begin
+ expand[47:32] = 16'h0000;
+ expand[31:23] = {{6{cinstr[11]}},cinstr[11:9]};
+ expand[22:18] = cinstr[4:0];
+ expand[17:13] = {cinstr[8],cinstr[5],3'd4};
+ expand[12:8] = 5'd31;
+ expand[7:6] = 2'b10;
+ expand[5:0] = `Sx;
+ end
+5'b10101: // SH Rt,d[fP]
+ begin
+ expand[47:32] = 16'h0000;
+ expand[31:23] = {{7{cinstr[11]}},cinstr[11:10]};
+ expand[22:18] = cinstr[4:0];
+ expand[17:13] = {cinstr[9:8],cinstr[5],2'd2};
+ expand[12:8] = 5'd30;
+ expand[7:6] = 2'b10;
+ expand[5:0] = `Sx;
+ end
+5'b10111: // SW Rt,d[FP]
+ begin
+ expand[47:32] = 16'h0000;
+ expand[31:23] = {{6{cinstr[11]}},cinstr[11:9]};
+ expand[22:18] = cinstr[4:0];
+ expand[17:13] = {cinstr[8],cinstr[5],3'd4};
+ expand[12:8] = 5'd30;
+ expand[7:6] = 2'b10;
+ expand[5:0] = `Sx;
+ end
+5'b11001:
+ begin // LH
+ expand[47:32] = 16'h0000;
+ expand[31:18] = {{8{cinstr[11]}},cinstr[11:10],cinstr[4:3],2'd2};
+ expand[17:13] = fnRp({cinstr[9:8],cinstr[5]});
+ expand[12:8] = fnRp(cinstr[2:0]);
+ expand[7:6] = 2'b10;
+ expand[5:0] = `Lx;
+ end
+5'b11011: // LW
+ begin
+ expand[47:32] = 16'h0000;
+ expand[31:18] = {{7{cinstr[11]}},cinstr[11:10],cinstr[4:3],3'd4};
+ expand[17:13] = fnRp({cinstr[9:8],cinstr[5]});
+ expand[12:8] = fnRp(cinstr[2:0]);
+ expand[7:6] = 2'b10;
+ expand[5:0] = `Lx;
+ end
+5'b11101: // SH
+ begin
+ expand[47:32] = 16'h0000;
+ expand[31:23] = {{8{cinstr[11]}},cinstr[11]};
+ expand[22:18] = fnRp({cinstr[9:8],cinstr[5]});
+ expand[17:13] = {cinstr[10],cinstr[4:3],2'd2};
+ expand[12:8] = fnRp(cinstr[2:0]);
+ expand[7:6] = 2'b10;
+ expand[5:0] = `Sx;
+ end
+5'b11111: // SW
+ begin
+ expand[47:32] = 16'h0000;
+ expand[31:23] = {{7{cinstr[11]}},cinstr[11:10]};
+ expand[22:18] = fnRp({cinstr[9:8],cinstr[5]});
+ expand[17:13] = {cinstr[4:3],3'd4};
+ expand[12:8] = fnRp(cinstr[2:0]);
+ expand[7:6] = 2'b10;
+ expand[5:0] = `Sx;
+ end
+default:
+ begin
+ expand[47:8] = 40'd0;
+ expand[7:6] = 2'b10;
+ expand[5:0] = `NOP;
+ end
+endcase
+
+endmodule
Index: FT64v7/rtl/common/FT64_ipt.v
===================================================================
--- FT64v7/rtl/common/FT64_ipt.v (nonexistent)
+++ FT64v7/rtl/common/FT64_ipt.v (revision 60)
@@ -0,0 +1,412 @@
+// ============================================================================
+// __
+// \\__/ o\ (C) 2018 Robert Finch, Waterloo
+// \ __ / All rights reserved.
+// \/_// robfinch@finitron.ca
+// ||
+//
+// FT64_ipt.v
+// - 64 bit CPU inverted page table memory management unit
+//
+// This source file is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published
+// by the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This source file is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+//
+// ============================================================================
+//
+`ifndef TRUE
+`define TRUE 1'b1
+`define FALSE 1'b0
+`endif
+
+module FT64_ipt(rst, clk, pkeys_i, ol_i, cti_i, cs_i, icl_i, cyc_i, stb_i, ack_o, we_i, sel_i, vadr_i, dat_i, dat_o,
+ cyc_o, ack_i, we_o, padr_o, exv_o, rdv_o, wrv_o, prv_o, page_fault);
+input rst;
+input clk;
+input [63:0] pkeys_i;
+input [1:0] ol_i;
+input [2:0] cti_i;
+input cs_i;
+input icl_i;
+input cyc_i;
+input stb_i;
+output reg ack_o;
+input we_i;
+input [7:0] sel_i;
+input [63:0] vadr_i;
+input [63:0] dat_i;
+output reg [63:0] dat_o;
+output reg cyc_o;
+input ack_i;
+output reg we_o;
+output reg [31:0] padr_o;
+output reg exv_o;
+output reg rdv_o;
+output reg wrv_o;
+output reg prv_o;
+output reg page_fault;
+
+parameter S_IDLE = 4'd0;
+parameter S_CMP1 = 4'd1;
+parameter S_CMP2 = 4'd2;
+parameter S_CMP3 = 4'd3;
+parameter S_CMP4 = 4'd4;
+parameter S_CMP5 = 4'd5;
+parameter S_CMP6 = 4'd6;
+parameter S_WAIT1 = 4'd7;
+parameter S_ACK = 4'd8;
+
+integer n;
+wire [9:0] pkey [0:5];
+assign pkey[0] = pkeys_i[9:0];
+assign pkey[1] = pkeys_i[19:10];
+assign pkey[2] = pkeys_i[29:20];
+assign pkey[3] = pkeys_i[39:30];
+assign pkey[4] = pkeys_i[49:40];
+assign pkey[5] = pkeys_i[59:50];
+reg [3:0] state;
+reg [15:0] pt_ad;
+reg upd;
+reg upd_done;
+reg probe, probe_done;
+reg pte_last;
+reg [7:0] pte_asid;
+reg [3:0] pte_drwx;
+reg [18:0] pte_vadr;
+reg [9:0] pte_key;
+reg pt_wr;
+reg [41:0] pt_dati;
+wire [41:0] pt_dat;
+
+FT64_iptram uram1 (
+ .clka(clk),
+ .ena(1'b1),
+ .wea(pt_wr),
+ .addra(pt_ad),
+ .dina(pt_dati),
+ .douta(pt_dat)
+);
+
+wire pt_last = pt_dat[23];
+wire [18:0] pt_vadr = pt_dat[22:4];
+wire [7:0] pt_asid = pt_dat[31:24];
+wire [3:0] pt_drwx = pt_dat[3:0];
+wire [9:0] pt_key = pt_dat[41:32];
+
+reg keymatch;
+always @*
+begin
+keymatch = 1'b0;
+for (n = 0; n < 6; n = n + 1)
+ if (pt_key==pkey[n] || pt_key==10'h0)
+ keymatch = 1'b1;
+end
+
+function [15:0] Hash1;
+input [39:0] vadr;
+begin
+ Hash1 = {1'b0,vadr[37:32],vadr[21:13]};
+end
+endfunction
+
+function [15:0] Hash2;
+input [39:0] vadr;
+begin
+ Hash2 = {1'b1,vadr[37:32],vadr[21:13]};
+end
+endfunction
+
+always @(posedge clk)
+ case(vadr_i[5:3])
+ 3'd1:
+ dat_o <= pt_ad;
+ 3'd2:
+ begin
+ dat_o[41:32] <= pte_key;
+ dat_o[31:24] <= pte_asid;
+ dat_o[23] <= pte_last;
+ dat_o[2:0] <= pte_drwx[2:0];
+ dat_o[7] <= pte_drwx[3];
+ end
+ 3'd3:
+ dat_o <= pte_vadr;
+ default: dat_o <= 1'b0;
+ endcase
+
+always @(posedge clk)
+if (rst) begin
+ cyc_o <= 1'b0;
+ padr_o <= 32'hFFFC0100;
+ ack_o <= 1'b0;
+ exv_o <= 1'b0;
+ rdv_o <= 1'b0;
+ wrv_o <= 1'b0;
+ prv_o <= 1'b0;
+ pt_wr <= 1'b0;
+ upd <= 1'b0;
+ probe <= 1'b0;
+ upd_done <= 1'b0;
+ probe_done <= 1'b0;
+ goto(S_IDLE);
+end
+else begin
+ pt_wr <= 1'b0;
+ page_fault <= 1'b0;
+ ack_o <= 1'b0;
+case(state)
+S_IDLE:
+ if (cyc_i) begin
+ if (cs_i) begin
+ ack_o <= 1'b1;
+ case(vadr_i[5:3])
+ 3'd0:
+ begin
+ if (dat_i[0] & !upd_done) begin
+ pt_ad <= Hash1({pte_asid,pte_vadr});
+ upd <= 1'b1;
+ goto(S_CMP1);
+ end
+ else if (dat_i[1] & !probe_done) begin
+ pt_ad <= Hash1({pte_asid,pte_vadr});
+ probe <= 1'b1;
+ goto(S_CMP1);
+ end
+ end
+ 3'd2:
+ begin
+ pte_key <= dat_i[41:32];
+ pte_asid <= dat_i[31:24];
+ pte_last <= dat_i[22];
+ pte_drwx <= {dat_i[7],dat_i[2:0]};
+ end
+ 3'd3:
+ begin
+ pte_vadr <= dat_i[18:0];
+ end
+ endcase
+ end
+ else begin
+ upd_done <= 1'b0;
+ probe_done <= 1'b0;
+ upd <= 1'b0;
+ probe <= 1'b0;
+ if (ol_i==2'b0) begin
+ cyc_o <= 1'b1;
+ we_o <= we_i;
+ padr_o <= vadr_i;
+ goto(S_ACK);
+ end
+ else begin
+ // Video frame buffer ($00xxxxxx) and ROM / IO ($FFxxxxxx) regions are
+ // not mapped.
+ if (vadr_i[31:24]==8'hFF || vadr_i[31:24]==8'h00) begin
+ cyc_o <= 1'b1;
+ we_o <= we_i;
+ padr_o <= vadr_i;
+ goto(S_ACK);
+ end
+ else begin
+ pt_ad <= Hash1({vadr_i[63:56],vadr_i});
+ goto(S_CMP1);
+ end
+ end
+ end
+ end
+ else begin
+ exv_o <= 1'b0;
+ rdv_o <= 1'b0;
+ wrv_o <= 1'b0;
+ prv_o <= 1'b0;
+ end
+
+S_CMP1:
+ goto(S_CMP2);
+S_CMP2:
+ goto(S_CMP3);
+S_CMP3:
+ if (pt_drwx[2:0]==3'b0) begin
+ if (upd) begin
+ pte_key <= 10'h0;
+ pte_last <= 1'b0;
+ pte_drwx <= 4'd0;
+ pt_wr <= 1'b1;
+ pt_dati <= {pte_key,pte_asid,pte_last,pte_vadr[18:0],pte_drwx};
+ upd_done <= 1'b1;
+ goto(S_IDLE);
+ end
+ else if (probe) begin
+ pte_drwx <= 3'b0;
+ pte_vadr <= 19'b0;
+ pte_asid <= 8'b0;
+ pte_last <= 1'b0;
+ pte_key <= 10'h0;
+ probe_done <= 1'b1;
+ goto(S_IDLE);
+ end
+ else begin
+ page_fault <= 1'b1;
+ goto(S_WAIT1);
+ end
+ end
+ else if (pt_asid==vadr_i[63:56] && pt_vadr==vadr_i[31:13]) begin
+ if (upd) begin
+ if (keymatch) begin
+ pte_key <= pt_key;
+ pte_last <= pt_last;
+ pte_drwx <= pt_drwx;
+ pt_wr <= 1'b1;
+ pt_dati <= {pte_key,pt_dat[31:4],pte_drwx};
+ end
+ else
+ prv_o <= 1'b1;
+ upd_done <= 1'b1;
+ goto(S_IDLE);
+ end
+ else if (probe) begin
+ if (keymatch) begin
+ pte_key <= pt_key;
+ pte_last <= pt_last;
+ pte_drwx <= pt_drwx;
+ end
+ else
+ prv_o <= 1'b1;
+ probe_done <= 1'b1;
+ goto(S_IDLE);
+ end
+ else if (~ack_i) begin
+ if (keymatch) begin
+ cyc_o <= 1'b1;
+ we_o <= we_i & pt_drwx[1];
+ if (!pt_drwx[1] & we_i) wrv_o <= 1'b1;
+ if (!pt_drwx[2] & ~we_i) rdv_o <= 1'b1;
+ if (!pt_drwx[0] & icl_i) exv_o <= 1'b1;
+ padr_o <= {pt_ad,vadr_i[12:0]};
+ end
+ else begin
+ cyc_o <= 1'b1;
+ we_o <= 1'b0;
+ padr_o <= 64'hFFFFFFFFFFFFFFF8;
+ prv_o <= 1'b1;
+ end
+ goto(S_ACK);
+ end
+ end
+ else begin
+ if (upd|probe)
+ pt_ad <= Hash2({pte_asid,pte_vadr});
+ else
+ pt_ad <= Hash2({vadr_i[63:56],vadr_i});
+ goto(S_CMP4);
+ end
+
+S_CMP4:
+ goto(S_CMP5);
+S_CMP5:
+ goto(S_CMP6);
+S_CMP6:
+ if (pt_drwx[2:0]==3'b0) begin
+ if (upd) begin
+ pte_key <= 10'h0;
+ pte_last <= 1'b0;
+ pte_drwx <= 4'd0;
+ pt_wr <= 1'b1;
+ pt_dati <= {pte_key,pte_asid,pte_last,pte_vadr[18:0],pte_drwx};
+ upd_done <= 1'b1;
+ goto(S_IDLE);
+ end
+ else if (probe) begin
+ pte_key <= 10'h0;
+ pte_drwx <= 43'b0;
+ pte_vadr <= 19'b0;
+ pte_asid <= 8'b0;
+ pte_last <= 1'b0;
+ probe_done <= 1'b1;
+ goto(S_IDLE);
+ end
+ else begin
+ page_fault <= 1'b1;
+ goto(S_WAIT1);
+ end
+ end
+ else if (pt_asid==vadr_i[63:56] && pt_vadr==vadr_i[31:13]) begin
+ if (upd) begin
+ if (keymatch) begin
+ pte_key <= pt_key;
+ pte_last <= pt_last;
+ pte_drwx <= pt_drwx;
+ pt_wr <= 1'b1;
+ pt_dati <= {pte_key,pt_dat[31:4],pte_drwx};
+ end
+ else
+ prv_o <= 1'b1;
+ upd_done <= 1'b1;
+ goto(S_IDLE);
+ end
+ else if (probe) begin
+ if (keymatch) begin
+ pte_key <= pt_key;
+ pte_last <= pt_last;
+ pte_drwx <= pt_drwx;
+ probe_done <= 1'b1;
+ end
+ else
+ prv_o <= 1'b1;
+ goto(S_IDLE);
+ end
+ else if (~ack_i) begin
+ if (keymatch) begin
+ cyc_o <= 1'b1;
+ we_o <= we_i & pt_drwx[1];
+ if (!pt_drwx[1] & we_i) wrv_o <= 1'b1;
+ if (!pt_drwx[2] & ~we_i) rdv_o <= 1'b1;
+ if (!pt_drwx[0] & icl_i) exv_o <= 1'b1;
+ padr_o <= {pt_ad,vadr_i[12:0]};
+ end
+ else begin
+ cyc_o <= 1'b1;
+ we_o <= 1'b0;
+ padr_o <= 64'hFFFFFFFFFFFFFFF8;
+ prv_o <= 1'b1;
+ end
+ goto(S_ACK);
+ end
+ end
+ else begin
+ pt_ad <= {pt_ad+8'd65};
+ goto(S_CMP4);
+ end
+
+// Wait a clock cycle for a page fault to register.
+S_WAIT1:
+ goto(S_IDLE);
+
+S_ACK:
+ if (ack_i) begin
+ if (cti_i==3'b000 || cti_i==3'b111) begin
+ cyc_o <= 1'b0;
+ we_o <= 1'b0;
+ goto(S_WAIT1);
+ end
+ end
+
+endcase
+end
+
+task goto;
+input [3:0] nst;
+begin
+ state <= nst;
+end
+endtask
+
+endmodule
+
Index: FT64v7/rtl/common/FT64_mmu.v
===================================================================
--- FT64v7/rtl/common/FT64_mmu.v (nonexistent)
+++ FT64v7/rtl/common/FT64_mmu.v (revision 60)
@@ -0,0 +1,142 @@
+`timescale 1ns / 1ps
+// ============================================================================
+// __
+// \\__/ o\ (C) 2016-2018 Robert Finch, Waterloo
+// \ __ / All rights reserved.
+// \/_// robfinch@finitron.ca
+// ||
+//
+// FT64_MMU.v
+//
+//
+// This source file is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published
+// by the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This source file is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+//
+//
+// ============================================================================
+//
+`define LOW 1'b0
+`define HIGH 1'b1
+
+module FT64_mmu(rst_i, clk_i, ol_i, pcr_i, pcr2_i, mapen_i, s_ex_i, s_cyc_i, s_stb_i, s_ack_o, s_wr_i, s_adr_i, s_dat_i, s_dat_o,
+ pea_o, cyc_o, stb_o,
+ exv_o, rdv_o, wrv_o);
+input rst_i;
+input clk_i;
+input [2:0] ol_i;
+input [31:0] pcr_i; // paging enabled
+input [63:0] pcr2_i;
+input mapen_i;
+input s_ex_i; // executable address
+input s_cyc_i;
+input s_stb_i;
+input s_wr_i; // write strobe
+output s_ack_o;
+input [31:0] s_adr_i; // virtual address
+input [31:0] s_dat_i;
+output [31:0] s_dat_o;
+output reg [31:0] pea_o;
+output reg cyc_o;
+output reg stb_o;
+output reg exv_o; // execute violation
+output reg rdv_o; // read violation
+output reg wrv_o; // write violation
+
+wire cs = s_cyc_i && s_stb_i && (s_adr_i[31:12]==20'hFFDC4);
+wire [5:0] okey = pcr_i[5:0];
+wire [5:0] akey = pcr_i[13:8];
+wire mol = ol_i==3'b000; // machine operating level
+
+reg ack1, ack2, ack3;
+always @(posedge clk_i)
+ ack1 <= cs;
+always @(posedge clk_i)
+ ack2 <= ack1 & (cs);
+assign s_ack_o = (cs) ? ack2 : 1'b0;
+
+reg cyc1,cyc2,stb1,stb2;
+wire [20:0] douta,doutb;
+wire [20:0] doutca;
+wire [2:0] cwrx = doutb[18:16];
+
+always @(posedge clk_i)
+ exv_o <= s_ex_i & ~cwrx[0] & cyc2 & stb2 & mapen_i;
+always @(posedge clk_i)
+ rdv_o <= ~(s_wr_i | s_ex_i) & ~cwrx[1] & cyc2 & stb2 & mapen_i;
+always @(posedge clk_i)
+ wrv_o <= s_wr_i & ~cwrx[2] & cyc2 & stb2 & mapen_i;
+
+wire [15:0] addra = {akey,s_adr_i[11:2]};
+wire [15:0] addrb = pcr2_i[okey] ? {okey,s_adr_i[28:19]} :
+ {okey,s_adr_i[22:13]};
+
+FT64_MMURam1 u1 (
+ .clka(clk_i), // input wire clka
+ .ena(cs), // input wire ena
+ .wea(cs & s_wr_i), // input wire [0 : 0] wea
+ .addra(addra), // input wire [15 : 0] addra
+ .dina(s_dat_i[20:0]), // input wire [12 : 0] dina
+ .douta(douta),
+ .clkb(clk_i), // input wire clkb
+ .enb(mapen_i), // input wire enb
+ .web(1'b0),
+ .addrb(addrb), // input wire [13 : 0] addrb
+ .dinb(21'h0),
+ .doutb(doutb) // output wire [51 : 0] doutb
+);
+
+assign s_dat_o = {11'd0,douta};
+
+// The following delay reg is to keep all the address bits in sync
+// with the output of the map table. So there are no intermediate
+// invalid addresses.
+reg mapen1, mapen2;
+reg [31:0] s_adr1, s_adr2;
+reg _4MB1, _4MB2;
+always @(posedge clk_i)
+ s_adr1 <= s_adr_i;
+always @(posedge clk_i)
+ s_adr2 <= s_adr1;
+always @(posedge clk_i)
+ _4MB1 <= pcr2_i[okey];
+always @(posedge clk_i)
+ _4MB2 <= _4MB1 | !mapen1;
+always @(posedge clk_i)
+ mapen1 <= !mol && mapen_i && (s_adr_i[31:29]==3'h0);
+always @(posedge clk_i)
+ mapen2 <= mapen1;
+always @(posedge clk_i)
+ cyc1 <= s_cyc_i;
+always @(posedge clk_i)
+ cyc2 <= cyc1 & s_cyc_i;
+always @(posedge clk_i)
+ stb1 <= s_stb_i;
+always @(posedge clk_i)
+ stb2 <= stb1 & s_stb_i;
+
+always @(posedge clk_i)
+if (rst_i) begin
+ cyc_o <= 1'b0;
+ stb_o <= 1'b0;
+ pea_o <= 32'hFFFC0100;
+end
+else begin
+ pea_o[12:0] <= s_adr2[12:0];
+ pea_o[18:13] <= mapen2 ? (_4MB2 ? s_adr2[18:13] : doutb[5:0]) : s_adr2[18:13];
+ pea_o[28:19] <= mapen2 ? doutb[15:6] : s_adr2[28:19];
+ pea_o[31:29] <= s_adr2[31:29];
+ cyc_o <= cyc2 & s_cyc_i;
+ stb_o <= stb2 & s_stb_i;
+end
+
+endmodule
Index: FT64v7/rtl/common/FT64_mpu.v
===================================================================
--- FT64v7/rtl/common/FT64_mpu.v (nonexistent)
+++ FT64v7/rtl/common/FT64_mpu.v (revision 60)
@@ -0,0 +1,309 @@
+`timescale 1ns / 1ps
+// ============================================================================
+// __
+// \\__/ o\ (C) 2017-2018 Robert Finch, Waterloo
+// \ __ / All rights reserved.
+// \/_// robfinch@finitron.ca
+// ||
+//
+// FT64_MPU.v
+//
+//
+// This source file is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published
+// by the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This source file is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+//
+//
+// ============================================================================
+//
+//`define CARD_MEMORY 1'b1
+
+module FT64_mpu(hartid_i,rst_i, clk4x_i, clk_i, tm_clk_i,
+ pit_clk2, pit_gate2, pit_out2,
+ irq_o,
+ i1,i2,i3,i4,i5,i6,i7,i8,i9,i10,i11,i12,i13,i14,i15,i16,i17,i18,i19,
+ i20,i21,i22,i23,i24,i25,i26,i27,i28,
+ cti_o,bte_o,bok_i,cyc_o,stb_o,ack_i,err_i,we_o,sel_o,adr_o,dat_o,dat_i,
+ sr_o, cr_o, rb_i);
+input [63:0] hartid_i;
+input rst_i;
+input clk4x_i;
+input clk_i;
+input tm_clk_i;
+input pit_clk2;
+input pit_gate2;
+output pit_out2;
+output [3:0] irq_o;
+input i1;
+input i2;
+input i3;
+input i4;
+input i5;
+input i6;
+input i7;
+input i8;
+input i9;
+input i10;
+input i11;
+input i12;
+input i13;
+input i14;
+input i15;
+input i16;
+input i17;
+input i18;
+input i19;
+input i20;
+input i21;
+input i22;
+input i23;
+input i24;
+input i25;
+input i26;
+input i27;
+input i28;
+output reg [2:0] cti_o;
+output reg [1:0] bte_o;
+input bok_i;
+output cyc_o;
+output reg stb_o;
+input ack_i;
+input err_i;
+output we_o;
+output reg [7:0] sel_o;
+output [31:0] adr_o;
+output reg [63:0] dat_o;
+input [63:0] dat_i;
+output sr_o;
+output cr_o;
+input rb_i;
+
+wire [3:0] cti;
+wire [2:0] bte;
+wire cyc,stb,we;
+wire [7:0] sel;
+wire [63:0] adr;
+reg [63:0] dati;
+wire [63:0] dato;
+wire [3:0] irq;
+wire [7:0] cause;
+wire pic_ack;
+wire [31:0] pic_dato;
+wire pit_ack;
+wire [31:0] pit_dato;
+wire pit_out0, pit_out1;
+wire crd_ack;
+wire [63:0] crd_dato;
+reg ack;
+wire [63:0] ipt_dato;
+wire ipt_ack;
+wire [1:0] ol;
+wire [31:0] pcr;
+wire [63:0] pcr2;
+wire icl; // instruction cache load
+wire exv,rdv,wrv;
+wire pulse60;
+wire sptr_o;
+wire [63:0] pkeys;
+
+always @(posedge clk_i)
+ cti_o <= cti;
+always @(posedge clk_i)
+ bte_o <= bte;
+//always @(posedge clk_i)
+// cyc_o <= cyc;
+always @(posedge clk_i)
+ stb_o <= stb;
+//always @(posedge clk_i)
+// we_o <= we;
+always @(posedge clk_i)
+ sel_o <= sel;
+//always @(posedge clk_i)
+// adr_o <= adr;
+always @(posedge clk_i)
+ dat_o <= dato;
+
+wire cs_pit = adr[31:8]==24'hFFDC11;
+wire cs_ipt = adr[31:8]==24'hFFDCD0;
+`ifdef CARD_MEMORY
+wire cs_crd = adr[31:11]==21'd0; // $00000000 in virtual address space
+`else
+wire cs_crd = 1'b0;
+`endif
+
+// Need to recreate the a2 address bit for 32 bit peripherals.
+wire [31:0] adr32 = {adr[31:3],|sel[7:4],2'b00};
+wire [31:0] dat32 = |sel[7:4] ? dato[63:32] : dato[31:0];
+
+FT64_pit upit1
+(
+ .rst_i(rst_i),
+ .clk_i(clk_i),
+ .cs_i(cs_pit),
+ .cyc_i(cyc_o),
+ .stb_i(stb_o),
+ .ack_o(pit_ack),
+ .sel_i(sel_o[7:4]|sel_o[3:0]),
+ .we_i(we_o),
+ .adr_i(adr32[5:0]),
+ .dat_i(dat32),
+ .dat_o(pit_dato),
+ .clk0(1'b0),
+ .gate0(1'b0),
+ .out0(pit_out0),
+ .clk1(1'b0),
+ .gate1(1'b0),
+ .out1(pit_out1),
+ .clk2(1'b0),
+ .gate2(1'b0),
+ .out2(pit_out2)
+);
+
+FT64_pic upic1
+(
+ .rst_i(rst_i), // reset
+ .clk_i(clk_i), // system clock
+ .cyc_i(cyc_o),
+ .stb_i(stb_o),
+ .ack_o(pic_ack), // controller is ready
+ .wr_i(we_o), // write
+ .adr_i(adr32), // address
+ .dat_i(dat32),
+ .dat_o(pic_dato),
+ .vol_o(), // volatile register selected
+ .i1(i1),
+ .i2(i2),
+ .i3(i3),
+ .i4(i4),
+ .i5(i5),
+ .i6(i6),
+ .i7(i7),
+ .i8(i8),
+ .i9(i9),
+ .i10(i10),
+ .i11(i11),
+ .i12(i12),
+ .i13(i13),
+ .i14(i14),
+ .i15(i15),
+ .i16(i16),
+ .i17(i17),
+ .i18(i18),
+ .i19(i19),
+ .i20(i20),
+ .i21(i21),
+ .i22(i22),
+ .i23(i23),
+ .i24(i24),
+ .i25(i25),
+ .i26(i26),
+ .i27(i27),
+ .i28(i28),
+ .i29(pit_out2), // garbage collector stop interrupt
+ .i30(pit_out1), // garbage collector interrupt
+ .i31(pit_out0), // time slice interrupt
+ .irqo(irq),
+ .nmii(1'b0),
+ .nmio(),
+ .causeo(cause)
+);
+
+assign irq_o = irq;
+
+`ifdef CARD_MEMORY
+CardMemory ucrd1
+(
+ .clk_i(clk_i),
+ .cs_i(cs_crd & cyc_o & stb_o),
+ .ack_o(crd_ack),
+ .wr_i(we_o),
+ .adr_i(adr),
+ .dat_i(dato),
+ .dat_o(crd_dato),
+ .stp(1'b0),
+ .mapno(pcr[5:0])
+);
+`else
+assign crd_dato = 64'd0;
+assign crd_ack = 1'b0;
+`endif
+
+FT64_ipt uipt1
+(
+ .rst(rst_i),
+ .clk(clk_i),
+ .pkeys_i(pkeys),
+ .ol_i(ol),
+ .cti_i(cti),
+ .cs_i(cs_ipt),
+ .icl_i(icl),
+ .cyc_i(cyc),
+ .stb_i(stb),
+ .ack_o(ipt_ack),
+ .we_i(we),
+ .sel_i(sel),
+ .vadr_i(adr),
+ .dat_i(dato),
+ .dat_o(ipt_dato),
+ .cyc_o(cyc_o),
+ .ack_i(ack),
+ .we_o(we_o),
+ .padr_o(adr_o),
+ .exv_o(exv),
+ .rdv_o(rdv),
+ .wrv_o(wrv)
+);
+
+always @(posedge clk_i)
+casez({pic_ack,pit_ack,crd_ack,cs_ipt})
+4'b1???: dati <= {2{pic_dato}};
+4'b01??: dati <= {2{pit_dato}};
+4'b001?: dati <= crd_dato;
+4'b0001: dati <= ipt_dato;
+default: dati <= dat_i;
+endcase
+
+always @(posedge clk_i)
+ ack <= ack_i|pic_ack|pit_ack|crd_ack|ipt_ack;
+
+FT64 ucpu1
+(
+ .hartid(hartid_i),
+ .rst(rst_i),
+ .clk_i(clk_i),
+ .clk4x(clk4x_i),
+ .tm_clk_i(tm_clk_i),
+ .irq_i(irq),
+ .vec_i(cause),
+ .cti_o(cti),
+ .bte_o(bte),
+ .bok_i(bok_i),
+ .cyc_o(cyc),
+ .stb_o(stb),
+ .ack_i(ack),
+ .err_i(err_i),
+ .we_o(we),
+ .sel_o(sel),
+ .adr_o(adr),
+ .dat_o(dato),
+ .dat_i(dati),
+ .ol_o(ol),
+ .pcr_o(pcr),
+ .pcr2_o(pcr2),
+ .pkeys_o(pkeys),
+ .icl_o(icl),
+ .sr_o(sr_o),
+ .cr_o(cr_o),
+ .rbi_i(rb_i)
+);
+
+endmodule
Index: FT64v7/rtl/common/FT64_multiplier.v
===================================================================
--- FT64v7/rtl/common/FT64_multiplier.v (nonexistent)
+++ FT64v7/rtl/common/FT64_multiplier.v (revision 60)
@@ -0,0 +1,185 @@
+// ============================================================================
+// __
+// \\__/ o\ (C) 2013-2018 Robert Finch, Waterloo
+// \ __ / All rights reserved.
+// \/_// robfinch@finitron.ca
+// ||
+//
+// This source file is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published
+// by the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This source file is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+//
+//
+// FT64 Superscaler
+// FT64_multiplier.v
+// - 64 bit multiplier
+//
+// ============================================================================
+//
+module FT64_multiplier(rst, clk, ld, abort, sgn, sgnus, a, b, o, done, idle);
+parameter WID=64;
+parameter SGNADJO=3'd2;
+parameter MULT=3'd3;
+parameter IDLE=3'd4;
+parameter DONE=3'd5;
+input clk;
+input rst;
+input ld;
+input abort;
+input sgn;
+input sgnus;
+input [WID-1:0] a;
+input [WID-1:0] b;
+output [WID*2-1:0] o;
+reg [WID*2-1:0] o;
+output done;
+output idle;
+
+reg [WID-1:0] aa,bb;
+reg so;
+reg [2:0] state;
+reg [7:0] cnt;
+wire cnt_done = cnt==8'd0;
+assign done = state==DONE || (state==IDLE && !ld); // State == DONE
+assign idle = state==IDLE;
+
+wire [127:0] pp;
+
+generate begin : gMults
+if (WID > 32) begin
+FT64_mult umul1
+(
+ .CLK(clk), // input wire CLK
+ .A(aa), // input wire [63 : 0] A
+ .B(bb), // input wire [63 : 0] B
+ .P(pp) // output wire [127 : 0] P
+);
+end
+else if (WID > 16) begin
+FT64_mult32 umul1
+(
+ .CLK(clk), // input wire CLK
+ .A(aa), // input wire [63 : 0] A
+ .B(bb), // input wire [63 : 0] B
+ .P(pp) // output wire [127 : 0] P
+);
+end
+else if (WID > 8) begin
+FT64_mult16 umul1
+(
+ .CLK(clk), // input wire CLK
+ .A(aa), // input wire [63 : 0] A
+ .B(bb), // input wire [63 : 0] B
+ .P(pp) // output wire [127 : 0] P
+);
+end
+else begin
+FT64_mult8 umul1
+(
+ .CLK(clk), // input wire CLK
+ .A(aa), // input wire [63 : 0] A
+ .B(bb), // input wire [63 : 0] B
+ .P(pp) // output wire [127 : 0] P
+);
+end
+end
+endgenerate
+
+always @(posedge clk)
+if (rst) begin
+ aa <= {WID{1'b0}};
+ bb <= {WID{1'b0}};
+ o <= {WID*2{1'b0}};
+ state <= IDLE;
+end
+else
+begin
+if (abort)
+ cnt <= 8'd00;
+else if (!cnt_done)
+ cnt <= cnt - 8'd1;
+
+case(state)
+IDLE:
+ if (ld) begin
+ if (sgnus) begin
+ aa <= a[WID-1] ? -a : a;
+ bb <= b;
+ so = a[WID-1];
+ end
+ else if (sgn) begin
+ aa <= a[WID-1] ? -a : a;
+ bb <= b[WID-1] ? -b : b;
+ so <= a[WID-1] ^ b[WID-1];
+ end
+ else begin
+ aa <= a;
+ bb <= b;
+ so <= 1'b0;
+ end
+ cnt <= 8'd20;
+ state <= MULT;
+ end
+MULT:
+ if (cnt_done) begin
+ if (sgn|sgnus) begin
+ if (so)
+ o <= -pp;
+ else
+ o <= pp;
+ end
+ else
+ o <= pp;
+ state <= DONE;
+ end
+DONE:
+ state <= IDLE;
+default:
+ state <= IDLE;
+endcase
+end
+
+endmodule
+
+module FT64_multiplier_tb();
+
+reg rst;
+reg clk;
+reg ld;
+wire [127:0] o;
+
+initial begin
+ clk = 1;
+ rst = 0;
+ #100 rst = 1;
+ #100 rst = 0;
+ #100 ld = 1;
+ #150 ld = 0;
+end
+
+always #10 clk = ~clk; // 50 MHz
+
+
+FT64_multiplier u1
+(
+ .rst(rst),
+ .clk(clk),
+ .ld(ld),
+ .sgn(1'b1),
+ .isMuli(1'b0),
+ .a(64'd0),
+ .b(64'd48),
+ .o(o)
+);
+
+endmodule
+
Index: FT64v7/rtl/common/FT64_pic.v
===================================================================
--- FT64v7/rtl/common/FT64_pic.v (nonexistent)
+++ FT64v7/rtl/common/FT64_pic.v (revision 60)
@@ -0,0 +1,189 @@
+`timescale 1ns / 1ps
+// ============================================================================
+// __
+// \\__/ o\ (C) 2013-2018 Robert Finch, Waterloo
+// \ __ / All rights reserved.
+// \/_// robfinch@finitron.ca
+// ||
+//
+// This source file is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published
+// by the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This source file is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+//
+//
+// Encodes discrete interrupt request signals into five
+// bit code using a priority encoder.
+//
+// reg
+// 0x00 - encoded request number (read / write)
+// This register contains the number identifying
+// the current requester in bits 0 to 4
+// If there is no
+// active request, then this number will be
+// zero.
+// bits 8 to 15 set the base number for the vector
+//
+// 0x04 - request enable (read / write)
+// this register contains request enable bits
+// for each request line. 1 = request
+// enabled, 0 = request disabled. On reset this
+// register is set to zero (disable all ints).
+// bit zero is specially reserved for nmi
+//
+// 0x08 - write only
+// this register disables the interrupt indicated
+// by the low order five bits of the input data
+//
+// 0x0C - write only
+// this register enables the interrupt indicated
+// by the low order five bits of the input data
+//
+// 0x10 - write only
+// this register indicates which interrupt inputs are
+// edge sensitive
+//
+// 0x14 - write only
+// This register resets the edge sense circuitry
+// indicated by the low order five bits of the input data.
+//
+// 0x80 - irq control for irq #0
+// 0x84 - irq control for irq #1
+// bits 0 to 7 = cause code to issue
+// bits 8 to 11 = irq level to issue
+// bit 16 = irq enable
+// bit 17 = edge sensitivity
+//=============================================================================
+
+module FT64_pic
+(
+ input rst_i, // reset
+ input clk_i, // system clock
+ input cyc_i,
+ input stb_i,
+ output ack_o, // controller is ready
+ input wr_i, // write
+ input [31:0] adr_i, // address
+ input [31:0] dat_i,
+ output reg [31:0] dat_o,
+ output vol_o, // volatile register selected
+ input i1, i2, i3, i4, i5, i6, i7,
+ i8, i9, i10, i11, i12, i13, i14, i15,
+ i16, i17, i18, i19, i20, i21, i22, i23,
+ i24, i25, i26, i27, i28, i29, i30, i31,
+ output [3:0] irqo, // normally connected to the processor irq
+ input nmii, // nmi input connected to nmi requester
+ output nmio, // normally connected to the nmi of cpu
+ output [6:0] causeo
+);
+parameter pIOAddress = 32'hFFDC_0F00;
+
+reg [31:0] ie; // interrupt enable register
+reg rdy1;
+reg [4:0] irqenc;
+wire [31:0] i = { i31,i30,i29,i28,i27,i26,i25,i24,i23,i22,i21,i20,i19,i18,i17,i16,
+ i15,i14,i13,i12,i11,i10,i9,i8,i7,i6,i5,i4,i3,i2,i1,nmii};
+reg [31:0] ib;
+reg [31:0] iedge;
+reg [31:0] rste;
+reg [31:0] es;
+reg [3:0] irq [0:31];
+reg [7:0] cause [0:31];
+integer n;
+
+initial begin
+ ie <= 32'h0;
+ es <= 32'hFFFFFFFF;
+ rste <= 32'h0;
+ for (n = 0; n < 32; n = n + 1) begin
+ cause[n] <= 8'h00;
+ irq[n] <= 4'h8;
+ end
+end
+
+wire cs = cyc_i && stb_i && adr_i[31:8]==pIOAddress[31:8];
+assign vol_o = cs;
+
+always @(posedge clk_i)
+ rdy1 <= cs;
+assign ack_o = cs ? (wr_i ? 1'b1 : rdy1) : 1'b0;
+
+// write registers
+always @(posedge clk_i)
+ if (rst_i) begin
+ ie <= 32'h0;
+ rste <= 32'h0;
+ end
+ else begin
+ rste <= 32'h0;
+ if (cs & wr_i) begin
+ casez (adr_i[7:2])
+ 6'd0: ;
+ 6'd1:
+ begin
+ ie[31:0] <= dat_i[31:0];
+ end
+ 6'd2,6'd3:
+ ie[dat_i[4:0]] <= adr_i[2];
+ 6'd4: es <= dat_i[31:0];
+ 6'd5: rste[dat_i[4:0]] <= 1'b1;
+ 6'b1?????:
+ begin
+ cause[adr_i[6:2]] <= dat_i[7:0];
+ irq[adr_i[6:2]] <= dat_i[11:8];
+ ie[adr_i[6:2]] <= dat_i[16];
+ es[adr_i[6:2]] <= dat_i[17];
+ end
+ endcase
+ end
+ end
+
+// read registers
+always @(posedge clk_i)
+begin
+ if (irqenc!=5'd0)
+ $display("PIC: %d",irqenc);
+ if (cs)
+ casez (adr_i[7:2])
+ 6'd0: dat_o <= cause[irqenc];
+ 6'b1?????: dat_o <= {es[adr_i[6:2]],ie[adr_i[6:2]],4'b0,irq[adr_i[6:2]],cause[adr_i[6:2]]};
+ default: dat_o <= ie;
+ endcase
+ else
+ dat_o <= 32'h0000;
+end
+
+assign irqo = (irqenc == 5'h0) ? 4'd0 : irq[irqenc];
+assign causeo = (irqenc == 5'h0) ? 8'd0 : cause[irqenc];
+assign nmio = nmii & ie[0];
+
+// Edge detect circuit
+always @(posedge clk_i)
+begin
+ for (n = 1; n < 32; n = n + 1)
+ begin
+ ib[n] <= i[n];
+ if (i[n] & !ib[n]) iedge[n] <= 1'b1;
+ if (rste[n]) iedge[n] <= 1'b0;
+ end
+end
+
+// irq requests are latched on every rising clock edge to prevent
+// misreads
+// nmi is not encoded
+always @(posedge clk_i)
+begin
+ irqenc <= 5'd0;
+ for (n = 31; n > 0; n = n - 1)
+ if (ie[n] & (es[n] ? iedge[n] : i[n])) irqenc <= n;
+end
+
+endmodule
Index: FT64v7/rtl/common/FT64_pit.v
===================================================================
--- FT64v7/rtl/common/FT64_pit.v (nonexistent)
+++ FT64v7/rtl/common/FT64_pit.v (revision 60)
@@ -0,0 +1,184 @@
+`timescale 1ns / 1ps
+// ============================================================================
+// __
+// \\__/ o\ (C) 2017-2018 Robert Finch, Waterloo
+// \ __ / All rights reserved.
+// \/_// robfinch@finitron.ca
+// ||
+//
+// FT64_pit.v
+// - programmable interval timer
+//
+//
+// This source file is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published
+// by the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This source file is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+//
+//
+// Reg Description
+// 00 current count (read only)
+// 04 max count (read-write)
+// 08 on time (read-write)
+// 0C control
+// byte 0 for counter 0, byte 1 for counter 1, byte 2 for counter 2
+// bit in byte
+// 0 = 1 = load, automatically clears
+// 1 = 1 = enable counting, 0 = disable counting
+// 2 = 1 = auto-reload on terminal count, 0 = no reload
+// 3 = 1 = use external clock, 0 = internal clk_i
+// 4 = 1 = use gate to enable count, 0 = ignore gate
+// 10 current count 1
+// 14 max count 1
+// 18 on time 1
+// 20 current count 2
+// 24 max count 2
+// 28 on time 2
+//
+// - all three counter controls can be written at the same time with a
+// single instruction allowing synchronization of the counters.
+// ============================================================================
+//
+module FT64_pit(rst_i, clk_i, cs_i, cyc_i, stb_i, ack_o, sel_i, we_i, adr_i, dat_i, dat_o,
+ clk0, gate0, out0, clk1, gate1, out1, clk2, gate2, out2
+ );
+input rst_i;
+input clk_i;
+input cs_i;
+input cyc_i;
+input stb_i;
+output ack_o;
+input [3:0] sel_i;
+input we_i;
+input [5:0] adr_i;
+input [31:0] dat_i;
+output reg [31:0] dat_o;
+input clk0;
+input gate0;
+output out0;
+input clk1;
+input gate1;
+output out1;
+input clk2;
+input gate2;
+output out2;
+
+integer n;
+reg [31:0] maxcount [0:2];
+reg [31:0] count [0:2];
+reg [31:0] ont [0:2];
+wire [2:0] gate;
+wire [2:0] pulse;
+reg ld [0:2];
+reg ce [0:2];
+reg ar [0:2];
+reg ge [0:2];
+reg xc [0:2];
+reg out [0:2];
+
+wire cs = cyc_i & stb_i & cs_i;
+reg rdy;
+always @(posedge clk_i)
+ rdy <= cs;
+assign ack_o = cs ? (we_i ? 1'b1 : rdy) : 1'b0;
+
+assign out0 = out[0];
+assign out1 = out[1];
+assign out2 = out[2];
+assign gate[0] = gate0;
+assign gate[1] = gate1;
+assign gate[2] = gate2;
+
+edge_det ued0 (.rst(rst_i), .clk(clk_i), .ce(1'b1), .i(clk0), .pe(pulse[0]), .ne());
+edge_det ued1 (.rst(rst_i), .clk(clk_i), .ce(1'b1), .i(clk1), .pe(pulse[1]), .ne());
+edge_det ued2 (.rst(rst_i), .clk(clk_i), .ce(1'b1), .i(clk2), .pe(pulse[2]), .ne());
+
+initial begin
+ for (n = 0; n < 3; n = n + 1) begin
+ maxcount[n] <= 32'd0;
+ count[n] <= 32'd0;
+ ont[n] <= 32'd0;
+ ld[n] <= 1'b0;
+ ce[n] <= 1'b0;
+ ar[n] <= 1'b0;
+ ge[n] <= 1'b0;
+ xc[n] <= 1'b0;
+ out[n] <= 1'b0;
+ end
+end
+
+always @(posedge clk_i)
+if (rst_i) begin
+ for (n = 0; n < 3; n = n + 1) begin
+ ld[n] <= 1'b0;
+ ce[n] <= 1'b0;
+ ar[n] <= 1'b1;
+ ge[n] <= 1'b0;
+ out[n] <= 1'b0;
+ end
+end
+else begin
+ for (n = 0; n < 3; n = n + 1) begin
+ ld[n] <= 1'b0;
+ if (cs && we_i && adr_i[5:4]==n)
+ case(adr_i[3:2])
+ 2'd1: maxcount[n] <= dat_i;
+ 2'd2: ont[n] <= dat_i;
+ 2'd3: begin
+ if (sel_i[0]) begin
+ ld[0] <= dat_i[0];
+ ce[0] <= dat_i[1];
+ ar[0] <= dat_i[2];
+ xc[0] <= dat_i[3];
+ ge[0] <= dat_i[4];
+ end
+ if (sel_i[1]) begin
+ ld[1] <= dat_i[8];
+ ce[1] <= dat_i[9];
+ ar[1] <= dat_i[10];
+ xc[1] <= dat_i[11];
+ ge[1] <= dat_i[12];
+ end
+ if (sel_i[2]) begin
+ ld[2] <= dat_i[16];
+ ce[2] <= dat_i[17];
+ ar[2] <= dat_i[18];
+ xc[2] <= dat_i[19];
+ ge[2] <= dat_i[20];
+ end
+ end
+ endcase
+ if (adr_i[5:4]==n)
+ case(adr_i[3:2])
+ 2'd0: dat_o <= count[n];
+ 2'd1: dat_o <= maxcount[n];
+ 2'd2: dat_o <= ont[n];
+ 2'd3: dat_o <= {ge[2],xc[2],ar[2],ce[2],4'b0,ge[1],xc[1],ar[1],ce[1],4'b0,ge[0],xc[0],ar[0],ce[0],1'b0};
+ endcase
+
+ if (ld[n])
+ count[n] <= maxcount[n];
+ else if ((xc[n] ? pulse[n] & ce[n] : ce[n]) & (ge[n] ? gate[n] : 1'b1)) begin
+ count[n] <= count[n] - 32'd1;
+ if (count[n]==ont[n])
+ out[n] <= 1'b1;
+ else if (count[n]==32'd0) begin
+ out[n] <= 1'b0;
+ if (ar[n])
+ count[n] <= maxcount[n];
+ else
+ ce[n] <= 1'b0;
+ end
+ end
+ end
+end
+
+endmodule
Index: FT64v7/rtl/common/FT64_pmmu.v
===================================================================
--- FT64v7/rtl/common/FT64_pmmu.v (nonexistent)
+++ FT64v7/rtl/common/FT64_pmmu.v (revision 60)
@@ -0,0 +1,715 @@
+// ============================================================================
+// __
+// \\__/ o\ (C) 2007-2018 Robert Finch, Waterloo
+// \ __ / All rights reserved.
+// \/_// robfinch@finitron.ca
+// ||
+//
+// FT64_pmmu.v
+// - 64 bit CPU paged memory management unit
+// - 512 entry TLB, 8 way associative
+// - variable page table depth
+// - address short-cutting for larger page sizes (8MB)
+// - hardware clearing of access bit
+//
+// This source file is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published
+// by the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This source file is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+//
+// ============================================================================
+//
+`ifndef TRUE
+`define TRUE 1'b1
+`define FALSE 1'b0
+`endif
+`define _8MBPG 5
+
+module FT64_pmmu
+#(
+parameter
+ AMSB = 31,
+ pAssociativity = 8, // number of ways (parallel compares)
+ pTLB_size = 64,
+ S_WAIT_MISS = 0,
+ S_WR_PTL0L = 1,
+ S_WR_PTL0H = 2,
+ S_RD_PTL0L = 3,
+ S_RD_PTL0H = 4,
+ S_RD_PTL1L = 5,
+ S_RD_PTL1H = 6,
+ S_RD_PTL2 = 7,
+ S_RD_PTL3 = 8,
+ S_RD_PTL4 = 9,
+ S_RD_PTL5 = 10,
+ S_RD_PTL5_ACK = 11,
+ S_RD_PTL = 12,
+ S_WR_PTL = 13
+)
+(
+// syscon
+input rst_i,
+input clk_i,
+
+input age_tick_i, // indicates when to age reference counts
+
+// master
+output reg m_cyc_o, // valid memory address
+output reg m_lock_o, // lock the bus
+input m_ack_i, // acknowledge from memory system
+output reg m_we_o, // write enable output
+output reg [15:0] m_sel_o, // lane selects (always all active)
+output reg [AMSB:0] m_adr_o,
+input [127:0] m_dat_i, // data input from memory
+output reg [127:0] m_dat_o, // data to memory
+
+// Translation request / control
+input invalidate, // invalidate a specific entry
+input invalidate_all, // causes all entries to be invalidated
+input [47:0] pta, // page directory/table address register
+output reg page_fault,
+
+input [7:0] asid_i,
+input [7:0] pl_i,
+input [1:0] ol_i, // operating level
+input icl_i, // instruction cache load
+input cyc_i,
+input we_i, // cpu is performing write cycle
+input [7:0] sel_i,
+input [63:0] vadr_i, // virtual address to translate
+
+output reg cyc_o,
+output reg we_o,
+output reg [7:0] sel_o,
+output reg [AMSB:0] padr_o, // translated address
+output reg cac_o, // cachable
+output reg prv_o, // privilege violation
+output reg exv_o, // execute violation
+output reg rdv_o, // read violation
+output reg wrv_o // write violation
+);
+
+integer nn;
+reg [8:0] tlb_wa;
+reg [8:0] tlb_ra;
+reg [8:0] tlb_ua;
+reg [AMSB:0] tmpadr;
+reg pv_o;
+reg v_o;
+reg r_o;
+reg w_o;
+reg x_o;
+reg c_o;
+reg a_o;
+reg [2:0] nnx;
+reg [127:0] pte; // holding place for data
+reg [AMSB-4:0] pte_adr;
+reg [3:0] state;
+reg [3:0] stkstate;
+reg [2:0] cnt; // tlb replacement counter
+reg [2:0] whichSet; // which set to update
+reg dbit; // temp dirty bit
+reg miss;
+reg proc;
+reg [63:0] miss_adr;
+wire pta_changed;
+assign ack_o = !miss||page_fault;
+wire pgen = pta[11];
+
+wire [AMSB:0] tlb_pte_adr [pAssociativity-1:0];
+wire [pAssociativity-1:0] tlb_d;
+wire [ 6: 0] tlb_flags [pAssociativity-1:0];
+wire [ 7: 0] tlb_pl [pAssociativity-1:0];
+wire [ 7: 0] tlb_asid [pAssociativity-1:0];
+wire [31: 0] tlb_refcount [pAssociativity-1:0];
+wire tlb_g [pAssociativity-1:0];
+wire [63:19] tlb_vadr [pAssociativity-1:0];
+wire [34:0] tlb_tadr [pAssociativity-1:0];
+
+//wire wr_tlb = state==S_WR_PTL0;
+reg wr_tlb;
+always @(posedge clk_i)
+ cyc_o <= cyc_i & v_o & ~pv_o;
+always @(posedge clk_i)
+ we_o <= we_i & v_o & ~pv_o & w_o;
+always @(posedge clk_i)
+ sel_o <= sel_i & {8{~pv_o}};
+always @(posedge clk_i)
+ prv_o <= pv_o & v_o && ol_i!=2'b00;
+always @(posedge clk_i)
+ exv_o <= icl_i & v_o & ~x_o && ol_i!=2'b00;
+always @(posedge clk_i)
+ rdv_o <= ~icl_i & v_o & ~r_o && ol_i!=2'b00;
+always @(posedge clk_i)
+ wrv_o <= ~icl_i & v_o & ~w_o && ol_i!=2'b00;
+always @(posedge clk_i)
+ cac_o <= c_o & v_o;
+
+genvar g;
+generate
+ for (g = 0; g < pAssociativity; g = g + 1)
+ begin : genTLB
+ ram_ar1w1r #(45,pTLB_size) tlbVadr
+ (
+ .clk(clk_i),
+ .ce(whichSet==g),
+ .we(wr_tlb),
+ .wa(miss_adr[18:13]),
+ .ra(vadr_i[18:13]),
+ .i(miss_adr[63:19]),
+ .o(tlb_vadr[g])
+ );
+ ram_ar1w1r #(AMSB+1,pTLB_size) tlbPteAdr
+ (
+ .clk(clk_i),
+ .ce(whichSet==g),
+ .we(wr_tlb),
+ .wa(miss_adr[18:13]),
+ .ra(vadr_i[18:13]),
+ .i(pte_adr),
+ .o(tlb_pte_adr[g])
+ );
+ ram_ar1w1r #( 7,pTLB_size) tlbFlag
+ (
+ .clk(clk_i),
+ .ce(whichSet==g),
+ .we(wr_tlb),
+ .wa(miss_adr[18:13]),
+ .ra(vadr_i[18:13]),
+ .i(pte[6:0]),
+ .o(tlb_flags[g])
+ );
+ ram_ar1w1r #(8,pTLB_size) tlbPL
+ (
+ .clk(clk_i),
+ .ce(whichSet==g),
+ .we(wr_tlb),
+ .wa(miss_adr[18:13]),
+ .ra(vadr_i[18:13]),
+ .i(pte[15:8]),
+ .o(tlb_pl[g])
+ );
+ ram_ar1w1r #( 1,pTLB_size) tlbG
+ (
+ .clk(clk_i),
+ .ce(whichSet==g),
+ .we(wr_tlb),
+ .wa(miss_adr[18:13]),
+ .ra(vadr_i[18:13]),
+ .i(pte[23]),
+ .o(tlb_g[g])
+ );
+ ram_ar1w1r #(8,pTLB_size) tlbASID
+ (
+ .clk(clk_i),
+ .ce(whichSet==g),
+ .we(wr_tlb),
+ .wa(miss_adr[18:13]),
+ .ra(vadr_i[18:13]),
+ .i(pte[31:24]),
+ .o(tlb_asid[g])
+ );
+ ram_ar1w1r #(32,pTLB_size) tlbRefCount
+ (
+ .clk(clk_i),
+ .ce(whichSet==g),
+ .we(wr_tlb),
+ .wa(miss_adr[18:13]),
+ .ra(vadr_i[18:13]),
+ .i(pte[63:32]),
+ .o(tlb_refcount[g])
+ );
+ ram_ar1w1r #(32,pTLB_size) tlbRefCount
+ (
+ .clk(clk_i),
+ .ce(wr_tlb?whichSet==g:nnx==g),
+ .we(wr_tlb||state==S_WAIT_MISS && !miss && cyc_i),
+ .wa(wr_tlb?miss_adr[18:13]:vadr_i[18:13]),
+ .ra(vadr_i[18:13]),
+ .i(pte[63:32]),
+ .o(tlb_refcount[g])
+ );
+ ram_ar1w1r #(35,pTLB_size) tlbTadr
+ (
+ .clk(clk_i),
+ .ce(whichSet==g),
+ .we(wr_tlb),
+ .wa(miss_adr[18:13]),
+ .ra(vadr_i[18:13]),
+ .i(pte[98:64]),
+ .o(tlb_tadr[g])
+ );
+ ram_ar1w1r #( 1,pTLB_size) tlbD
+ (
+ .clk(clk_i),
+ .ce(wr_tlb?whichSet==g:nnx==g),
+ .we(wr_tlb||state==S_WAIT_MISS && wr && !miss && cyc_i),
+ .wa(wr_tlb?miss_adr[18:13]:vadr_i[18:13]),
+ .ra(vadr_i[18:13]),
+ .i(!wr_tlb),
+ .o(tlb_d[g])
+ );
+ end
+endgenerate
+
+reg [pAssociativity*pTLB_size-1:0] tlb_v; // valid
+
+// The following reg allows detection of when the page table address changes
+change_det #(48) u1
+(
+ .rst(rst_i),
+ .clk(clk_i),
+ .ce(1'b1),
+ .i(pta),
+ .cd(pta_changed)
+);
+
+// This must be fast !!!
+// Lookup the virtual address in the tlb
+// Translate the address
+// I/O and system BIOS addresses are not mapped
+// Cxxx_xxxx_xxxx_xxxx to FFFF_FFFF_FFFF_FFFF not mapped (kernel segment)
+// 0000_0000_0000_0000 to 0000_0000_0000_xxxx not mapped (kernel data segement)
+always @(posedge clk_i)
+begin
+ miss <= 1;
+ nnx <= pAssociativity;
+ a_o <= 1;
+ c_o <= 1;
+ r_o <= 1;
+ x_o <= 1;
+ w_o <= 1;
+ v_o <= 0;
+ pv_o <= 0;
+ padr_o[12: 0] <= vadr_i[12: 0];
+ padr_o[47:13] <= vadr_i[47:13];
+ if (vadr_i[63:16]==48'h0 || vadr_i[63:20]==44'hFFFF_FFFF_FFD) begin
+ miss <= 0;
+ c_o <= 1;
+ v_o <= 1;
+ end
+ else if (&vadr_i[47:46]) begin
+ miss <= 0;
+ c_o <= vadr_i[45:44]==2'b00; // C000_0000_0000 to CFFF_FFFF_FFFF is cacheable
+ v_o <= 1;
+ end
+ else begin
+ if (!pgen) begin
+ miss <= 0;
+ v_o <= 1;
+ end
+ else
+ for (nn = 0; nn < pAssociativity; nn = nn + 1)
+ if (tlb_v[{nn,vadr_i[18:13]}] && vadr_i[63:19]==tlb_vadr[nn]) begin
+ if (tlb_flags[nn][`_8MBPG])
+ padr_o[47:13] <= {tlb_tadr[nn][34:10],vadr_i[22:13]};
+ else
+ padr_o[47:13] <= tlb_tadr[nn];
+ miss <= 1'b0;
+ nnx <= nn;
+ a_o <= tlb_flags[nn][4];
+ c_o <= tlb_flags[nn][3];
+ r_o <= tlb_flags[nn][2];
+ w_o <= tlb_flags[nn][1];
+ x_o <= tlb_flags[nn][0];
+ v_o <= tlb_flags[nn][2]|tlb_flags[nn][1]|tlb_flags[nn][0];
+ pv_o <= (cyc_i & icl_i) ? pl != tlb_pl[nn] && pl!=8'h00 : pl > tlb_pl[nn];
+ end
+ end
+end
+
+reg age_tick_r;
+wire pe_age_rtick;
+edge_det ued1(.clk(clk_i), .ce(1'b1), .i(age_tick), .pe(pe_age_tick), .ne(), .ee());
+
+// The following state machine loads the tlb buffer on a
+// miss.
+always @(posedge clk_i)
+if (rst_i) begin
+ nack();
+ wr_tlb <= 1'b0;
+ m_adr_o <= 1'b0;
+ goto(S_WAIT_MISS);
+ dbit <= 1'b0;
+ whichSet <= 1'b0;
+ for (nn = 0; nn < pAssociativity * pTLB_size; nn = nn + 1)
+ tlb_v[nn] <= 1'b0; // all entries are invalid on reset
+ page_fault <= `FALSE;
+ age_tick_r <= 1'b0;
+end
+else begin
+ wr_tlb <= 1'b0;
+
+ // page fault pulses
+ page_fault <= `FALSE;
+
+ if (pe_age_tick)
+ age_tick_r <= 1'b1;
+
+ // changing the address of the page table invalidates all entries
+ if (invalidate_all)
+ for (nn = 0; nn < pAssociativity * pTLB_size; nn = nn + 1)
+ tlb_v[nn] <= 1'b0;
+
+ // handle invalidate command
+ if (invalidate)
+ for (nn = 0; nn < pAssociativity; nn = nn + 1)
+ if (vadr_i[63:19]==tlb_vadr[nn] && (tlb_g[nn] || tlb_asid[nn]==asid_i))
+ tlb_v[{nn,vadr_i[18:13]}] <= 1'b0;
+
+ case (state) // synopsys full_case parallel_case
+
+ // Wait for a miss to occur. then initiate bus cycle
+ // Output either the page directory address
+ // or the page table address, depending on the
+ // size of the app.
+ S_WAIT_MISS:
+ begin
+ goto(S_WAIT_MISS);
+ dbit <= we_i;
+ proc <= `FALSE;
+
+ if (miss) begin
+ proc <= `TRUE;
+ miss_adr <= vadr_i;
+ // try and pick an empty tlb entry
+ whichSet <= cnt;
+ for (nn = 0; nn < pAssociativity; nn = nn + 1)
+ if (!tlb_v[{nn,vadr_i[18:13]}])
+ whichSet <= nn;
+ goto(S_RD_PTL5);
+ end
+ // If there's a write cycle, check to see if the
+ // dirty bit is set. If the dirty bit hasn't been
+ // set yet, then set it and write the dirty status
+ // to memory.
+ else if (cyc_i && we_i && !tlb_d[nnx]) begin
+ miss_adr <= vadr_i;
+ whichSet <= nnx;
+ goto(S_RD_PTL5);
+ end
+ else if (age_tick_r) begin
+ age_tick_r <= 1'b0;
+ tlb_wa <= tlb_ua + 3'd1;
+ tlb_ra <= tlb_ua + 3'd1;
+ tlb_ua <= tlb_ua + 3'd1;
+ goto(S_AGE);
+ end
+ else begin
+ tlb_wa <= {nnx,vadr_i[18:13]};
+ tlb_ra <= {nnx,vadr_i[18:13]};
+ goto(S_COUNT);
+ end
+ end
+
+ S_RD_PTL5:
+ if (~m_ack_i & ~m_cyc_o) begin
+ tlb_ra <= {whichSet,miss_adr[18:13]};
+ tlb_wa <= {whichSet,miss_adr[18:13]};
+ m_cyc_o <= 1'b1;
+ m_sel_o <= 8'hFF;
+ m_lock_o <= 1'b0;
+ m_we_o <= 1'b0;
+ case(pta[10:8])
+ 3'd0: state <= S_RD_PTL0L;
+ 3'd1: state <= S_RD_PTL1L;
+ 3'd2: state <= S_RD_PTL2;
+ 3'd3: state <= S_RD_PTL3;
+ 3'd4: state <= S_RD_PTL4;
+ 3'd5: state <= S_RD_PTL5_ACK;
+ default: ;
+ endcase
+ // Set page table address for lookup
+ case(pta[10:8])
+ 3'b000: m_adr_o <= {pta[47:14],miss_adr[22:13],4'h0}; // 8MB translations
+ 3'b001: m_adr_o <= {pta[47:14],miss_adr[32:23],4'h0}; // 8GB translations
+ 3'b010: m_adr_o <= {pta[47:14],miss_adr[42:33],4'h8}; // 8TB translations
+ 3'b011: m_adr_o <= {pta[47:14],miss_adr[52:43],4'h8}; // 8XB translations
+ 3'b100: m_adr_o <= {pta[47:14],miss_adr[62:53],4'h8}; // translations
+ 3'b101: m_adr_o <= {pta[47:14],9'b00,miss_adr[63],4'h8}; // translations
+ default: ;
+ endcase
+ end
+ // Wait for ack from system
+ // Setup to access page table
+ // If app uses a page directory, now address the page table
+ S_RD_PTL5_ACK:
+ if (m_ack_i) begin
+ nack();
+ if (|m_dat_i[2:0]) begin // pte valid bit
+ tmpadr <= {m_dat_i[33:0],miss_adr[62:53],4'h8};
+ call(S_RD_PTL,S_RD_PTL4);
+ end
+ else begin
+ if (clock) begin
+ clock_adr[64:63] <= clock_adr[64:63] + 4'h1;
+ clock_adr[62:0] <= 4'h0;
+ goto (S_WAIT_MISS);
+ end
+ else
+ raise_page_fault();
+ // not a valid translation
+ // OS messed up ?
+ end
+ end
+
+ // Wait for ack from system
+ // Setup to access page table
+ // If app uses a page directory, now address the page table
+ S_RD_PTL4:
+ if (m_ack_i) begin
+ nack();
+ if (|m_dat_i[2:0]) begin // pte valid bit
+ tmpadr <= {m_dat_i[50:16],miss_adr[52:43],3'b0};
+ call(S_RD_PTL,S_RD_PTL3);
+ end
+ else begin
+ if (clock) begin
+ clock_adr[64:53] <= clock_adr[64:53] + 4'h1;
+ clock_adr[52:0] <= 4'h0;
+ goto (S_WAIT_MISS);
+ end
+ else
+ raise_page_fault();
+ end
+ end
+
+ // Wait for ack from system
+ // Setup to access page table
+ // If app uses a page directory, now address the page table
+ S_RD_PTL3:
+ if (m_ack_i) begin
+ nack();
+ if (|m_dat_i[2:0]) begin // pte valid bit
+ tmpadr <= {m_dat_i[50:16],miss_adr[42:33],3'b0};
+ call(S_RD_PTL,S_RD_PTL2);
+ end
+ else begin
+ if (clock) begin
+ clock_adr[64:43] <= clock_adr[64:43] + 4'h1;
+ clock_adr[32:0] <= 4'h0;
+ goto (S_WAIT_MISS);
+ end
+ else
+ raise_page_fault();
+ end
+ end
+
+ // Wait for ack from system
+ // Setup to access page table
+ // If app uses a page directory, now address the page table
+ S_RD_PTL2:
+ if (m_ack_i) begin
+ nack();
+ if (|m_dat_i[2:0]) begin // pte valid bit
+ tmpadr <= {m_dat_i[50:16],miss_adr[32:23],3'b0};
+ call(S_RD_PTL,S_RD_PTL1);
+ end
+ else begin
+ if (clock) begin
+ clock_adr[64:33] <= clock_adr[64:33] + 4'h1;
+ clock_adr[32:0] <= 4'h0;
+ goto (S_WAIT_MISS);
+ end
+ else
+ raise_page_fault();
+ end
+ end
+
+ // Wait for ack from system
+ // Setup to access page table
+ // If app uses a page directory, now address the page table
+ S_RD_PTL1:
+ if (m_ack_i) begin
+ nack();
+ if (|m_dat_i[2:0]) begin // pte valid bit
+ // Shortcut 8MiB page ?
+ if (m_dat_i[`_8MBPG]) begin
+ pte <= m_dat_i;
+ m_dat_o <= m_dat_i|{dbit,2'b00,~clock,4'b0};
+ m_dat_o[4] <= ~clock;
+ call(S_WR_PTL,S_WR_PTL0);
+ end
+ else begin
+ tmpadr <= {m_dat_i[50:16],miss_adr[22:13],3'b0};
+ call(S_RD_PTL,S_RD_PTL0);
+ end
+ end
+ else begin
+ if (clock) begin
+ clock_adr[64:23] <= clock_adr[64:23] + 4'h1;
+ clock_adr[22:0] <= 4'h0;
+ goto (S_WAIT_MISS);
+ end
+ else
+ raise_page_fault();
+ end
+ end
+
+ //---------------------------------------------------
+ // This section of the state machine performs a
+ // read then write of a PTE
+ //---------------------------------------------------
+ // Perform a read cycle of page table level 0 entry
+ S_RD_PTL0:
+ // The tlb has been updated so the page must have been accessed
+ // set the accessed bit for the page table entry
+ // Also set dirty bit if a write access.
+ if (m_ack_i) begin
+ nack();
+ tlb_wr <= 1'b1;
+ pte_adr <= m_adr_o[AMSB:4];
+ m_dat_o <= m_dat_i|{dbit,2'b00,1'b1,4'b0}; // This line will only set bits
+ pte <= m_dat_i|{dbit,2'b00,1'b1,4'b0};
+ // If the tlb entry is already marked dirty don't bother with updating
+ // the pte in memory. Only write on a new dirty status.
+ if (tlb_d[tlb_ra[8:6]])
+ goto(S_WAIT_MISS);
+ else
+ call(S_WR_PTL,S_WR_PTL0);
+ end
+
+ S_WR_PTL0:
+ if (m_ack_i) begin
+ tlb_wr <= 1'b1;
+ nack();
+ tlb_v[tlb_wa] <= |pte[2:0];
+ if (~|pte[2:0])
+ raise_page_fault();
+ goto(S_WAIT_MISS);
+ end
+
+ //---------------------------------------------------
+ // Take care of reference counting and aging.
+ //---------------------------------------------------
+
+ S_COUNT:
+ begin
+ pte[6:0] <= tlb_flags[tlb_ra[8:6]];
+ pte[7] <= tlb_d[tlb_ra[8:6]];
+ pte[15:8] <= tlb_pl[tlb_ra[8:6]];
+ pte[23] <= tlb_g[tlb_ra[8:6]];
+ pte[31:24] <= tlb_asid[tlb_ra[8:6]];
+ pte[63:32] <= {tlb_refcount[tlb_ra[8:6]][63:42] + 4'd1,tlb_refcount[tlb_ra[8:6]][41:32]};
+ pte[127:64] <= tlb_tadr[tlb_ra[8:6]];
+ tlb_wr <= 1'b1;
+ goto(S_WAIT_MISS);
+ end
+
+ S_AGE:
+ begin
+ pte[6:0] <= tlb_flags[tlb_ra[8:6]];
+ pte[7] <= tlb_d[tlb_ra[8:6]];
+ pte[15:8] <= tlb_pl[tlb_ra[8:6]];
+ pte[23] <= tlb_g[tlb_ra[8:6]];
+ pte[31:24] <= tlb_asid[tlb_ra[8:6]];
+ pte[63:32] <= {1'b0,tlb_refcount[tlb_ra[8:6]][63:33]};
+ pte[127:64] <= tlb_tadr[tlb_ra[8:6]];
+ tlb_wr <= 1'b1;
+ goto(S_WAIT_MISS);
+ end
+
+ //---------------------------------------------------
+ // Subroutine: initiate read cycle
+ //---------------------------------------------------
+ S_RD_PTL:
+ if (~m_ack_i & ~m_cyc_o) begin
+ m_cyc_o <= 1'b1;
+ m_sel_o <= 8'hFF;
+ m_lock_o <= 1'b0;
+ m_we_o <= 1'b0;
+ m_adr_o <= tmpadr;
+ return();
+ end
+
+ //---------------------------------------------------
+ // Subroutine: initiate write cycle
+ //---------------------------------------------------
+ S_WR_PTL:
+ if (~m_ack_i & ~m_cyc_o) begin
+ m_cyc_o <= 1'b1;
+ m_sel_o <= 8'hFF;
+ m_lock_o <= 1'b0;
+ m_we_o <= 1'b1;
+ // Address comes from a previous read address
+// m_adr_o <= tmpadr;
+ return();
+ end
+
+ //---------------------------------------------------
+ // This state can't happen without a hardware error
+ //---------------------------------------------------
+ default:
+ begin
+ nack();
+ goto(S_WAIT_MISS);
+ end
+
+ endcase
+end
+
+
+// This counter is used to select the tlb entry that gets
+// replaced when a new entry is entered into the buffer.
+// It just increments every time an entry is updated.
+always @(posedge clk_i)
+if (rst_i)
+ cnt <= 0;
+else if (state==S_WAIT_MISS && miss) begin
+ if (cnt == pAssociativity-1)
+ cnt <= 0;
+ else
+ cnt <= cnt + 1;
+end
+
+task nack;
+begin
+ m_cyc_o <= 1'b0;
+ m_sel_o <= 8'h00;
+ m_lock_o <= 1'b0;
+ m_we_o <= 1'b0;
+end
+endtask
+
+task raise_page_fault;
+begin
+ nack();
+ if (proc)
+ page_fault <= `TRUE;
+ proc <= `FALSE;
+ state <= S_WAIT_MISS;
+end
+endtask
+
+task goto;
+input [3:0] nst;
+begin
+ state <= nst;
+end
+endtask
+
+task call;
+input [3:0] nst;
+input [3:0] rst;
+begin
+ goto(nst);
+ stkstate <= rst;
+end
+endtask
+
+task return;
+begin
+ state <= stkstate;
+end
+endtask
+
+endmodule
+
Index: FT64v7/rtl/common/FT64_shift.v
===================================================================
--- FT64v7/rtl/common/FT64_shift.v (nonexistent)
+++ FT64v7/rtl/common/FT64_shift.v (revision 60)
@@ -0,0 +1,135 @@
+`timescale 1ns / 1ps
+// ============================================================================
+// __
+// \\__/ o\ (C) 2016-2018 Robert Finch, Waterloo
+// \ __ / All rights reserved.
+// \/_// robfinch@finitron.ca
+// ||
+//
+// FT64_shift.v
+//
+//
+// This source file is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published
+// by the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This source file is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+//
+//
+// ============================================================================
+//
+//`ifndef SHL
+`define IVECTOR 6'h01
+`define VSHL 6'h0C
+`define VSHR 6'h0D
+`define VASR 6'h0E
+`define RR 6'h02
+`define SHIFTR 6'h2F
+`define SHIFT31 6'h0F
+`define SHIFT63 6'h1F
+`define AMO 6'h2F
+`define AMOSHL 6'h0C
+`define AMOSHR 6'h0D
+`define AMOASR 6'h0E
+`define AMOROL 6'h0F
+`define AMOSHLI 6'h2C
+`define AMOSHRI 6'h2D
+`define AMOASRI 6'h2E
+`define AMOROLI 6'h2F
+`define SHL 3'h0
+`define SHR 3'h1
+`define ASL 3'h2
+`define ASR 3'h3
+`define ROL 3'h4
+`define ROR 3'h5
+//`endif
+`define HIGHWORD 127:64
+
+module FT64_shift(instr, a, b, res, ov);
+parameter DMSB=63;
+parameter SUP_VECTOR = 1;
+input [47:0] instr;
+input [DMSB:0] a;
+input [DMSB:0] b;
+output [DMSB:0] res;
+reg [DMSB:0] res;
+output ov;
+parameter ROTATE_INSN = 1;
+
+wire [5:0] opcode = instr[5:0];
+wire [5:0] func = instr[31:26];
+wire [2:0] shiftop = instr[25:23];
+
+wire [127:0] shl = {64'd0,a} << b[5:0];
+wire [127:0] shr = {a,64'd0} >> b[5:0];
+
+assign ov = shl[127:64] != {64{a[63]}};
+
+always @*
+case(opcode)
+`IVECTOR:
+ if (SUP_VECTOR)
+ case(func)
+ `VSHL: res <= shl[DMSB:0];
+ `VSHR: res <= shr[`HIGHWORD];
+ `VASR: if (a[DMSB])
+ res <= (shr[`HIGHWORD]) | ~({64{1'b1}} >> b[5:0]);
+ else
+ res <= shr[`HIGHWORD];
+ default: res <= 64'd0;
+ endcase
+ else
+ res <= 64'd0;
+`RR:
+ case(func)
+ `SHIFTR:
+ case(shiftop)
+ `SHL,`ASL: res <= shl[DMSB:0];
+ `SHR: res <= shr[`HIGHWORD];
+ `ASR: if (a[DMSB])
+ res <= (shr[`HIGHWORD]) | ~({64{1'b1}} >> b[5:0]);
+ else
+ res <= shr[`HIGHWORD];
+ `ROL: res <= ROTATE_INSN ? shl[63:0]|shl[`HIGHWORD] : 64'hDEADDEADDEAD;
+ `ROR: res <= ROTATE_INSN ? shr[63:0]|shr[`HIGHWORD] : 64'hDEADDEADDEAD;
+ default: res <= 64'd0;
+ endcase
+ `SHIFT31,
+ `SHIFT63:
+ case(shiftop)
+ `SHL,`ASL:res <= shl[DMSB:0];
+ `SHR: res <= shr[`HIGHWORD];
+ `ASR: if (a[DMSB])
+ res <= (shr[`HIGHWORD]) | ~({64{1'b1}} >> b[5:0]);
+ else
+ res <= shr[`HIGHWORD];
+ `ROL: res <= ROTATE_INSN ? shl[63:0]|shl[`HIGHWORD] : 64'hDEADDEADDEAD;
+ `ROR: res <= ROTATE_INSN ? shr[63:0]|shr[`HIGHWORD] : 64'hDEADDEADDEAD;
+ default: res <= 64'd0;
+ endcase
+ default: res <= 64'd0;
+ endcase
+`AMO:
+ case(func)
+ `AMOSHL,`AMOSHLI: res <= shl[DMSB:0];
+ `AMOSHR,`AMOSHRI: res <= shr[`HIGHWORD];
+ `AMOASR,`AMOASRI: if (a[DMSB])
+ res <= (shr[`HIGHWORD]) | ~({64{1'b1}} >> b[5:0]);
+ else
+ res <= shr[`HIGHWORD];
+ `AMOROL: res <= ROTATE_INSN ? shl[63:0]|shl[`HIGHWORD] : 64'hDEADDEADDEAD;
+ `AMOROLI: res <= ROTATE_INSN ? shl[63:0]|shl[`HIGHWORD] : 64'hDEADDEADDEAD;
+ default: res <= 64'd0;
+ endcase
+default: res <= 64'd0;
+endcase
+
+endmodule
+
Index: FT64v7/rtl/common/FT64_shiftb.v
===================================================================
--- FT64v7/rtl/common/FT64_shiftb.v (nonexistent)
+++ FT64v7/rtl/common/FT64_shiftb.v (revision 60)
@@ -0,0 +1,77 @@
+`timescale 1ns / 1ps
+// ============================================================================
+// __
+// \\__/ o\ (C) 2016-2018 Robert Finch, Waterloo
+// \ __ / All rights reserved.
+// \/_// robfinch@finitron.ca
+// ||
+//
+// FT64_shiftb.v
+//
+//
+// This source file is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published
+// by the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This source file is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+//
+//
+// ============================================================================
+//
+//`ifndef SHL
+`define R2 6'h02
+`define SHL 3'h0
+`define SHR 3'h1
+`define ASL 3'h2
+`define ASR 3'h3
+`define ROL 3'h4
+`define ROR 3'h5
+//`endif
+`define HIGHWORDB 15:8
+
+module FT64_shiftb(instr, a, b, res, ov);
+parameter DMSB=7;
+input [47:0] instr;
+input [DMSB:0] a;
+input [DMSB:0] b;
+output [DMSB:0] res;
+reg [DMSB:0] res;
+output ov;
+parameter ROTATE_INSN = 1;
+
+wire [5:0] opcode = instr[5:0];
+wire [3:0] shiftop = instr[35:33];
+wire [2:0] bb = instr[29] ? instr[15:13] : b[2:0];
+
+wire [15:0] shl = {8'd0,a} << bb[2:0];
+wire [15:0] shr = {a,8'd0} >> bb[2:0];
+
+assign ov = 1'b0;
+
+always @*
+case(opcode)
+`R2:
+ if (instr[7:6]==2'b01)
+ case(shiftop)
+ `SHL,`ASL: res <= shl[DMSB:0];
+ `SHR: res <= shr[`HIGHWORDB];
+ `ASR: if (a[DMSB])
+ res <= (shr[`HIGHWORDB]) | ~({8{1'b1}} >> bb[2:0]);
+ else
+ res <= shr[`HIGHWORDB];
+ `ROL: res <= ROTATE_INSN ? shl[DMSB:0]|shl[`HIGHWORDB] : 8'hDE;
+ `ROR: res <= ROTATE_INSN ? shr[DMSB:0]|shr[`HIGHWORDB] : 8'hDE;
+ default: res <= 8'd0;
+ endcase
+default: res <= 8'd0;
+endcase
+
+endmodule
+
Index: FT64v7/rtl/common/FT64_shiftc.v
===================================================================
--- FT64v7/rtl/common/FT64_shiftc.v (nonexistent)
+++ FT64v7/rtl/common/FT64_shiftc.v (revision 60)
@@ -0,0 +1,76 @@
+`timescale 1ns / 1ps
+// ============================================================================
+// __
+// \\__/ o\ (C) 2016-2018 Robert Finch, Waterloo
+// \ __ / All rights reserved.
+// \/_// robfinch@finitron.ca
+// ||
+//
+// FT64_shiftc.v
+//
+//
+// This source file is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published
+// by the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This source file is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+//
+//
+// ============================================================================
+//
+//`ifndef SHL
+`define R2 6'h02
+`define SHL 3'h0
+`define SHR 3'h1
+`define ASL 3'h2
+`define ASR 3'h3
+`define ROL 3'h4
+`define ROR 3'h5
+//`endif
+`define HIGHWORDC 31:16
+
+module FT64_shiftc(instr, a, b, res, ov);
+parameter DMSB=15;
+input [47:0] instr;
+input [DMSB:0] a;
+input [DMSB:0] b;
+output [DMSB:0] res;
+reg [DMSB:0] res;
+output ov;
+parameter ROTATE_INSN = 1;
+
+wire [5:0] opcode = instr[5:0];
+wire [5:0] func = instr[31:26];
+wire [3:0] shiftop = instr[35:33];
+wire [3:0] bb = instr[29] ? instr[16:13] : b[3:0];
+wire [31:0] shl = {16'd0,a} << bb;
+wire [31:0] shr = {a,16'd0} >> bb;
+
+assign ov = 1'b0;
+
+always @*
+case(opcode)
+`RR:
+ case(shiftop)
+ `SHL,`ASL: res <= shl[DMSB:0];
+ `SHR: res <= shr[`HIGHWORDC];
+ `ASR: if (a[DMSB])
+ res <= (shr[`HIGHWORDC]) | ~({16{1'b1}} >> bb);
+ else
+ res <= shr[`HIGHWORDC];
+ `ROL: res <= ROTATE_INSN ? shl[DMSB:0]|shl[`HIGHWORDC] : 16'hDEAD;
+ `ROR: res <= ROTATE_INSN ? shr[DMSB:0]|shr[`HIGHWORDC] : 16'hDEAD;
+ default: res <= 16'd0;
+ endcase
+default: res <= 16'd0;
+endcase
+
+endmodule
+
Index: FT64v7/rtl/common/FT64_shifth.v
===================================================================
--- FT64v7/rtl/common/FT64_shifth.v (nonexistent)
+++ FT64v7/rtl/common/FT64_shifth.v (revision 60)
@@ -0,0 +1,97 @@
+`timescale 1ns / 1ps
+// ============================================================================
+// __
+// \\__/ o\ (C) 2016-2018 Robert Finch, Waterloo
+// \ __ / All rights reserved.
+// \/_// robfinch@finitron.ca
+// ||
+//
+// FT64_shifth.v
+//
+//
+// This source file is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published
+// by the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This source file is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+//
+//
+// ============================================================================
+//
+//`ifndef SHL
+`define R2 6'h02
+`define AMO 6'h2F
+`define AMOSHL 6'h0C
+`define AMOSHR 6'h0D
+`define AMOASR 6'h0E
+`define AMOROL 6'h0F
+`define AMOSHLI 6'h2C
+`define AMOSHRI 6'h2D
+`define AMOASRI 6'h2E
+`define AMOROLI 6'h2F
+`define SHL 3'h0
+`define SHR 3'h1
+`define ASL 3'h2
+`define ASR 3'h3
+`define ROL 3'h4
+`define ROR 3'h5
+//`endif
+`define HIGHWORDH 63:32
+
+module FT64_shifth(instr, a, b, res, ov);
+parameter DMSB=31;
+input [47:0] instr;
+input [DMSB:0] a;
+input [DMSB:0] b;
+output [DMSB:0] res;
+reg [DMSB:0] res;
+output ov;
+parameter ROTATE_INSN = 1;
+
+wire [5:0] opcode = instr[5:0];
+wire [5:0] func = instr[31:26];
+wire [3:0] shiftop = instr[35:33];
+wire [4:0] bb = instr[29] ? instr[17:13] : b[4:0];
+wire [63:0] shl = {32'd0,a} << bb;
+wire [63:0] shr = {a,32'd0} >> bb;
+
+assign ov = 1'b0;
+
+always @*
+case(opcode)
+`R2:
+ case(shiftop)
+ `SHL,`ASL: res <= shl[DMSB:0];
+ `SHR: res <= shr[`HIGHWORDH];
+ `ASR: if (a[DMSB])
+ res <= (shr[`HIGHWORDH]) | ~({32{1'b1}} >> bb);
+ else
+ res <= shr[`HIGHWORDH];
+ `ROL: res <= ROTATE_INSN ? shl[DMSB:0]|shl[`HIGHWORDH] : 32'hDEADDEAD;
+ `ROR: res <= ROTATE_INSN ? shr[DMSB:0]|shr[`HIGHWORDH] : 32'hDEADDEAD;
+ default: res <= 32'd0;
+ endcase
+`AMO:
+ case(func)
+ `AMOSHL,`AMOSHLI: res <= shl[DMSB:0];
+ `AMOSHR,`AMOSHRI: res <= shr[`HIGHWORDH];
+ `AMOASR,`AMOASRI: if (a[DMSB])
+ res <= (shr[`HIGHWORDH]) | ~({32{1'b1}} >> b[4:0]);
+ else
+ res <= shr[`HIGHWORDH];
+ `AMOROL: res <= ROTATE_INSN ? shl[DMSB:0]|shl[`HIGHWORDH] : 32'hDEADDEAD;
+ `AMOROLI: res <= ROTATE_INSN ? shl[DMSB:0]|shl[`HIGHWORDH] : 32'hDEADDEAD;
+ default: res <= 32'd0;
+ endcase
+default: res <= 32'd0;
+endcase
+
+endmodule
+
Index: FT64v7/rtl/lib/BCDMath.v
===================================================================
--- FT64v7/rtl/lib/BCDMath.v (nonexistent)
+++ FT64v7/rtl/lib/BCDMath.v (revision 60)
@@ -0,0 +1,291 @@
+`timescale 1ns / 1ps
+//=============================================================================
+// __
+// \\__/ o\ (C) 2012 Robert Finch
+// \ __ / All rights reserved.
+// \/_// robfinch@opencores.org
+// ||
+//
+// BCDMath.v
+//
+//
+// This source file is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published
+// by the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This source file is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+//
+//
+//=============================================================================
+//
+module BCDAdd(ci,a,b,o,c);
+input ci; // carry input
+input [7:0] a;
+input [7:0] b;
+output [7:0] o;
+output c;
+
+wire c0,c1;
+
+wire [4:0] hsN0 = a[3:0] + b[3:0] + ci;
+wire [4:0] hsN1 = a[7:4] + b[7:4] + c0;
+
+BCDAddAdjust u1 (hsN0,o[3:0],c0);
+BCDAddAdjust u2 (hsN1,o[7:4],c);
+
+endmodule
+
+module BCDAdd4(ci,a,b,o,c);
+input ci; // carry input
+input [15:0] a;
+input [15:0] b;
+output [15:0] o;
+output c;
+
+wire c0,c1,c2;
+
+wire [4:0] hsN0 = a[3:0] + b[3:0] + ci;
+wire [4:0] hsN1 = a[7:4] + b[7:4] + c0;
+wire [4:0] hsN2 = a[11:8] + b[11:8] + c1;
+wire [4:0] hsN3 = a[15:12] + b[15:12] + c2;
+
+BCDAddAdjust u1 (hsN0,o[3:0],c0);
+BCDAddAdjust u2 (hsN1,o[7:4],c1);
+BCDAddAdjust u3 (hsN2,o[11:8],c2);
+BCDAddAdjust u4 (hsN3,o[15:12],c);
+
+endmodule
+
+module BCDSub(ci,a,b,o,c);
+input ci; // carry input
+input [7:0] a;
+input [7:0] b;
+output [7:0] o;
+output c;
+
+wire c0,c1;
+
+wire [4:0] hdN0 = a[3:0] - b[3:0] - ci;
+wire [4:0] hdN1 = a[7:4] - b[7:4] - c0;
+
+BCDSubAdjust u1 (hdN0,o[3:0],c0);
+BCDSubAdjust u2 (hdN1,o[7:4],c);
+
+endmodule
+
+module BCDAddAdjust(i,o,c);
+input [4:0] i;
+output [3:0] o;
+reg [3:0] o;
+output c;
+reg c;
+always @(i)
+case(i)
+5'h0: begin o = 4'h0; c = 1'b0; end
+5'h1: begin o = 4'h1; c = 1'b0; end
+5'h2: begin o = 4'h2; c = 1'b0; end
+5'h3: begin o = 4'h3; c = 1'b0; end
+5'h4: begin o = 4'h4; c = 1'b0; end
+5'h5: begin o = 4'h5; c = 1'b0; end
+5'h6: begin o = 4'h6; c = 1'b0; end
+5'h7: begin o = 4'h7; c = 1'b0; end
+5'h8: begin o = 4'h8; c = 1'b0; end
+5'h9: begin o = 4'h9; c = 1'b0; end
+5'hA: begin o = 4'h0; c = 1'b1; end
+5'hB: begin o = 4'h1; c = 1'b1; end
+5'hC: begin o = 4'h2; c = 1'b1; end
+5'hD: begin o = 4'h3; c = 1'b1; end
+5'hE: begin o = 4'h4; c = 1'b1; end
+5'hF: begin o = 4'h5; c = 1'b1; end
+5'h10: begin o = 4'h6; c = 1'b1; end
+5'h11: begin o = 4'h7; c = 1'b1; end
+5'h12: begin o = 4'h8; c = 1'b1; end
+5'h13: begin o = 4'h9; c = 1'b1; end
+default: begin o = 4'h9; c = 1'b1; end
+endcase
+endmodule
+
+module BCDSubAdjust(i,o,c);
+input [4:0] i;
+output [3:0] o;
+reg [3:0] o;
+output c;
+reg c;
+always @(i)
+case(i)
+5'h0: begin o = 4'h0; c = 1'b0; end
+5'h1: begin o = 4'h1; c = 1'b0; end
+5'h2: begin o = 4'h2; c = 1'b0; end
+5'h3: begin o = 4'h3; c = 1'b0; end
+5'h4: begin o = 4'h4; c = 1'b0; end
+5'h5: begin o = 4'h5; c = 1'b0; end
+5'h6: begin o = 4'h6; c = 1'b0; end
+5'h7: begin o = 4'h7; c = 1'b0; end
+5'h8: begin o = 4'h8; c = 1'b0; end
+5'h9: begin o = 4'h9; c = 1'b0; end
+5'h16: begin o = 4'h0; c = 1'b1; end
+5'h17: begin o = 4'h1; c = 1'b1; end
+5'h18: begin o = 4'h2; c = 1'b1; end
+5'h19: begin o = 4'h3; c = 1'b1; end
+5'h1A: begin o = 4'h4; c = 1'b1; end
+5'h1B: begin o = 4'h5; c = 1'b1; end
+5'h1C: begin o = 4'h6; c = 1'b1; end
+5'h1D: begin o = 4'h7; c = 1'b1; end
+5'h1E: begin o = 4'h8; c = 1'b1; end
+5'h1F: begin o = 4'h9; c = 1'b1; end
+default: begin o = 4'h9; c = 1'b1; end
+endcase
+endmodule
+
+// Multiply two BCD digits
+// Method used is table lookup
+module BCDMul1(a,b,o);
+input [3:0] a;
+input [3:0] b;
+output [7:0] o;
+reg [7:0] o;
+
+always @(a or b)
+casex({a,b})
+8'h00: o = 8'h00;
+8'h01: o = 8'h00;
+8'h02: o = 8'h00;
+8'h03: o = 8'h00;
+8'h04: o = 8'h00;
+8'h05: o = 8'h00;
+8'h06: o = 8'h00;
+8'h07: o = 8'h00;
+8'h08: o = 8'h00;
+8'h09: o = 8'h00;
+8'h10: o = 8'h00;
+8'h11: o = 8'h01;
+8'h12: o = 8'h02;
+8'h13: o = 8'h03;
+8'h14: o = 8'h04;
+8'h15: o = 8'h05;
+8'h16: o = 8'h06;
+8'h17: o = 8'h07;
+8'h18: o = 8'h08;
+8'h19: o = 8'h09;
+8'h20: o = 8'h00;
+8'h21: o = 8'h02;
+8'h22: o = 8'h04;
+8'h23: o = 8'h06;
+8'h24: o = 8'h08;
+8'h25: o = 8'h10;
+8'h26: o = 8'h12;
+8'h27: o = 8'h14;
+8'h28: o = 8'h16;
+8'h29: o = 8'h18;
+8'h30: o = 8'h00;
+8'h31: o = 8'h03;
+8'h32: o = 8'h06;
+8'h33: o = 8'h09;
+8'h34: o = 8'h12;
+8'h35: o = 8'h15;
+8'h36: o = 8'h18;
+8'h37: o = 8'h21;
+8'h38: o = 8'h24;
+8'h39: o = 8'h27;
+8'h40: o = 8'h00;
+8'h41: o = 8'h04;
+8'h42: o = 8'h08;
+8'h43: o = 8'h12;
+8'h44: o = 8'h16;
+8'h45: o = 8'h20;
+8'h46: o = 8'h24;
+8'h47: o = 8'h28;
+8'h48: o = 8'h32;
+8'h49: o = 8'h36;
+8'h50: o = 8'h00;
+8'h51: o = 8'h05;
+8'h52: o = 8'h10;
+8'h53: o = 8'h15;
+8'h54: o = 8'h20;
+8'h55: o = 8'h25;
+8'h56: o = 8'h30;
+8'h57: o = 8'h35;
+8'h58: o = 8'h40;
+8'h59: o = 8'h45;
+8'h60: o = 8'h00;
+8'h61: o = 8'h06;
+8'h62: o = 8'h12;
+8'h63: o = 8'h18;
+8'h64: o = 8'h24;
+8'h65: o = 8'h30;
+8'h66: o = 8'h36;
+8'h67: o = 8'h42;
+8'h68: o = 8'h48;
+8'h69: o = 8'h54;
+8'h70: o = 8'h00;
+8'h71: o = 8'h07;
+8'h72: o = 8'h14;
+8'h73: o = 8'h21;
+8'h74: o = 8'h28;
+8'h75: o = 8'h35;
+8'h76: o = 8'h42;
+8'h77: o = 8'h49;
+8'h78: o = 8'h56;
+8'h79: o = 8'h63;
+8'h80: o = 8'h00;
+8'h81: o = 8'h08;
+8'h82: o = 8'h16;
+8'h83: o = 8'h24;
+8'h84: o = 8'h32;
+8'h85: o = 8'h40;
+8'h86: o = 8'h48;
+8'h87: o = 8'h56;
+8'h88: o = 8'h64;
+8'h89: o = 8'h72;
+8'h90: o = 8'h00;
+8'h91: o = 8'h09;
+8'h92: o = 8'h18;
+8'h93: o = 8'h27;
+8'h94: o = 8'h36;
+8'h95: o = 8'h45;
+8'h96: o = 8'h54;
+8'h97: o = 8'h63;
+8'h98: o = 8'h72;
+8'h99: o = 8'h81;
+default: o = 8'h00;
+endcase
+endmodule
+
+
+// Multiply two pairs of BCD digits
+// handles from 0x0 to 99x99
+module BCDMul2(a,b,o);
+input [7:0] a;
+input [7:0] b;
+output [15:0] o;
+
+wire [7:0] p1,p2,p3,p4;
+wire [15:0] s1;
+
+BCDMul1 u1 (a[3:0],b[3:0],p1);
+BCDMul1 u2 (a[7:4],b[3:0],p2);
+BCDMul1 u3 (a[3:0],b[7:4],p3);
+BCDMul1 u4 (a[7:4],b[7:4],p4);
+
+BCDAdd4 u5 (1'b0,{p4,p1},{4'h0,p2,4'h0},s1);
+BCDAdd4 u6 (1'b0,s1,{4'h0,p3,4'h0},o);
+
+endmodule
+
+module BCDMul_tb();
+
+wire [15:0] o1,o2,o3,o4;
+
+BCDMul2 u1 (8'h00,8'h00,o1);
+BCDMul2 u2 (8'h99,8'h99,o2);
+BCDMul2 u3 (8'h25,8'h18,o3);
+BCDMul2 u4 (8'h37,8'h21,o4);
+
+endmodule
Index: FT64v7/rtl/lib/ack_gen.v
===================================================================
--- FT64v7/rtl/lib/ack_gen.v (nonexistent)
+++ FT64v7/rtl/lib/ack_gen.v (revision 60)
@@ -0,0 +1,61 @@
+// ============================================================================
+// __
+// \\__/ o\ (C) 2018 Robert Finch, Waterloo
+// \ __ / All rights reserved.
+// \/_// robfinch@finitron.ca
+// ||
+//
+// This source file is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published
+// by the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This source file is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+//
+// ack_gen.v
+// - generates a acknowledge signal after a specified number of clocks.
+// - separate stages for read and write
+//
+// ============================================================================
+//
+module ack_gen(clk_i, ce_i, i, we_i, o);
+input clk_i;
+input ce_i;
+input i;
+input we_i;
+output reg o;
+parameter READ_STAGES = 3;
+parameter WRITE_STAGES = 0;
+parameter ACK_LEVEL = 1'b0;
+parameter REGISTER_OUTPUT = 1'b0;
+
+wire ro, wo;
+generate begin : gRdy
+if (READ_STAGES==0)
+assign ro = i;
+else begin
+ready_gen #(READ_STAGES) urrdy (clk_i, ce_i, i, ro);
+end
+if (WRITE_STAGES==0)
+assign wo = i;
+else begin
+ready_gen #(READ_STAGES) uwrdy (clk_i, ce_i, we_i, wo);
+end
+if (REGISTER_OUTPUT) begin
+always @(posedge clk_i)
+ o <= (i & we_i) ? wo : i ? ro : ACK_LEVEL;
+end
+else begin
+always @*
+ o <= (i & we_i) ? wo : i ? ro : ACK_LEVEL;
+end
+end
+endgenerate
+
+endmodule
Index: FT64v7/rtl/lib/cntlz.v
===================================================================
--- FT64v7/rtl/lib/cntlz.v (nonexistent)
+++ FT64v7/rtl/lib/cntlz.v (revision 60)
@@ -0,0 +1,613 @@
+/* ===============================================================
+ (C) 2006 Robert Finch
+ All rights reserved.
+ rob@birdcomputer.ca
+
+ cntlz.v
+ - count number of leading zeros in a byte
+ - count number of leading ones in a byte
+ - simple fast approach - lookup table
+
+ This source code is free for use and modification for
+ non-commercial or evaluation purposes, provided this
+ copyright statement and disclaimer remains present in
+ the file.
+
+ If the code is modified, please state the origin and
+ note that the code has been modified.
+
+ NO WARRANTY.
+ THIS Work, IS PROVIDEDED "AS IS" WITH NO WARRANTIES OF
+ ANY KIND, WHETHER EXPRESS OR IMPLIED. The user must assume
+ the entire risk of using the Work.
+
+ IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
+ ANY INCIDENTAL, CONSEQUENTIAL, OR PUNITIVE DAMAGES
+ WHATSOEVER RELATING TO THE USE OF THIS WORK, OR YOUR
+ RELATIONSHIP WITH THE AUTHOR.
+
+ IN ADDITION, IN NO EVENT DOES THE AUTHOR AUTHORIZE YOU
+ TO USE THE WORK IN APPLICATIONS OR SYSTEMS WHERE THE
+ WORK'S FAILURE TO PERFORM CAN REASONABLY BE EXPECTED
+ TO RESULT IN A SIGNIFICANT PHYSICAL INJURY, OR IN LOSS
+ OF LIFE. ANY SUCH USE BY YOU IS ENTIRELY AT YOUR OWN RISK,
+ AND YOU AGREE TO HOLD THE AUTHOR AND CONTRIBUTORS HARMLESS
+ FROM ANY CLAIMS OR LOSSES RELATING TO SUCH UNAUTHORIZED
+ USE.
+
+ Ref: Webpack 8.1i Spartan3-4 xc3s1000-4ft256
+ 11 LUTs / 6 slices / 12.2 ns
+
+=============================================================== */
+
+module cntlz8(
+ input [7:0] i,
+ output reg [3:0] o
+);
+
+ always @(i)
+ case (i)
+ 8'b00000000: o = 8;
+ 8'b00000001: o = 7;
+ 8'b00000010: o = 6;
+ 8'b00000011: o = 6;
+ 8'b00000100: o = 5;
+ 8'b00000101: o = 5;
+ 8'b00000110: o = 5;
+ 8'b00000111: o = 5;
+ 8'b00001000: o = 4;
+ 8'b00001001: o = 4;
+ 8'b00001010: o = 4;
+ 8'b00001011: o = 4;
+ 8'b00001100: o = 4;
+ 8'b00001101: o = 4;
+ 8'b00001110: o = 4;
+ 8'b00001111: o = 4;
+
+ 8'b00010000: o = 3;
+ 8'b00010001: o = 3;
+ 8'b00010010: o = 3;
+ 8'b00010011: o = 3;
+ 8'b00010100: o = 3;
+ 8'b00010101: o = 3;
+ 8'b00010110: o = 3;
+ 8'b00010111: o = 3;
+ 8'b00011000: o = 3;
+ 8'b00011001: o = 3;
+ 8'b00011010: o = 3;
+ 8'b00011011: o = 3;
+ 8'b00011100: o = 3;
+ 8'b00011101: o = 3;
+ 8'b00011110: o = 3;
+ 8'b00011111: o = 3;
+
+ 8'b00100000: o = 2;
+ 8'b00100001: o = 2;
+ 8'b00100010: o = 2;
+ 8'b00100011: o = 2;
+ 8'b00100100: o = 2;
+ 8'b00100101: o = 2;
+ 8'b00100110: o = 2;
+ 8'b00100111: o = 2;
+ 8'b00101000: o = 2;
+ 8'b00101001: o = 2;
+ 8'b00101010: o = 2;
+ 8'b00101011: o = 2;
+ 8'b00101100: o = 2;
+ 8'b00101101: o = 2;
+ 8'b00101110: o = 2;
+ 8'b00101111: o = 2;
+
+ 8'b00110000: o = 2;
+ 8'b00110001: o = 2;
+ 8'b00110010: o = 2;
+ 8'b00110011: o = 2;
+ 8'b00110100: o = 2;
+ 8'b00110101: o = 2;
+ 8'b00110110: o = 2;
+ 8'b00110111: o = 2;
+ 8'b00111000: o = 2;
+ 8'b00111001: o = 2;
+ 8'b00111010: o = 2;
+ 8'b00111011: o = 2;
+ 8'b00111100: o = 2;
+ 8'b00111101: o = 2;
+ 8'b00111110: o = 2;
+ 8'b00111111: o = 2;
+
+ // 44 - 1
+ 8'b01000000: o = 1;
+ 8'b01000001: o = 1;
+ 8'b01000010: o = 1;
+ 8'b01000011: o = 1;
+ 8'b01000100: o = 1;
+ 8'b01000101: o = 1;
+ 8'b01000110: o = 1;
+ 8'b01000111: o = 1;
+ 8'b01001000: o = 1;
+ 8'b01001001: o = 1;
+ 8'b01001010: o = 1;
+ 8'b01001011: o = 1;
+ 8'b01001100: o = 1;
+ 8'b01001101: o = 1;
+ 8'b01001110: o = 1;
+ 8'b01001111: o = 1;
+
+ 8'b01010000: o = 1;
+ 8'b01010001: o = 1;
+ 8'b01010010: o = 1;
+ 8'b01010011: o = 1;
+ 8'b01010100: o = 1;
+ 8'b01010101: o = 1;
+ 8'b01010110: o = 1;
+ 8'b01010111: o = 1;
+ 8'b01011000: o = 1;
+ 8'b01011001: o = 1;
+ 8'b01011010: o = 1;
+ 8'b01011011: o = 1;
+ 8'b01011100: o = 1;
+ 8'b01011101: o = 1;
+ 8'b01011110: o = 1;
+ 8'b01011111: o = 1;
+
+ 8'b01100000: o = 1;
+ 8'b01100001: o = 1;
+ 8'b01100010: o = 1;
+ 8'b01100011: o = 1;
+ 8'b01100100: o = 1;
+ 8'b01100101: o = 1;
+ 8'b01100110: o = 1;
+ 8'b01100111: o = 1;
+ 8'b01101000: o = 1;
+ 8'b01101001: o = 1;
+ 8'b01101010: o = 1;
+ 8'b01101011: o = 1;
+ 8'b01101100: o = 1;
+ 8'b01101101: o = 1;
+ 8'b01101110: o = 1;
+ 8'b01101111: o = 1;
+
+ 8'b01110000: o = 1;
+ 8'b01110001: o = 1;
+ 8'b01110010: o = 1;
+ 8'b01110011: o = 1;
+ 8'b01110100: o = 1;
+ 8'b01110101: o = 1;
+ 8'b01110110: o = 1;
+ 8'b01110111: o = 1;
+ 8'b01111000: o = 1;
+ 8'b01111001: o = 1;
+ 8'b01111010: o = 1;
+ 8'b01111011: o = 1;
+ 8'b01111100: o = 1;
+ 8'b01111101: o = 1;
+ 8'b01111110: o = 1;
+ 8'b01111111: o = 1;
+
+ // - 2
+ 8'b10000000: o = 0;
+ 8'b10000001: o = 0;
+ 8'b10000010: o = 0;
+ 8'b10000011: o = 0;
+ 8'b10000100: o = 0;
+ 8'b10000101: o = 0;
+ 8'b10000110: o = 0;
+ 8'b10000111: o = 0;
+ 8'b10001000: o = 0;
+ 8'b10001001: o = 0;
+ 8'b10001010: o = 0;
+ 8'b10001011: o = 0;
+ 8'b10001100: o = 0;
+ 8'b10001101: o = 0;
+ 8'b10001110: o = 0;
+ 8'b10001111: o = 0;
+
+ 8'b10010000: o = 0;
+ 8'b10010001: o = 0;
+ 8'b10010010: o = 0;
+ 8'b10010011: o = 0;
+ 8'b10010100: o = 0;
+ 8'b10010101: o = 0;
+ 8'b10010110: o = 0;
+ 8'b10010111: o = 0;
+ 8'b10011000: o = 0;
+ 8'b10011001: o = 0;
+ 8'b10011010: o = 0;
+ 8'b10011011: o = 0;
+ 8'b10011100: o = 0;
+ 8'b10011101: o = 0;
+ 8'b10011110: o = 0;
+ 8'b10011111: o = 0;
+
+ 8'b10100000: o = 0;
+ 8'b10100001: o = 0;
+ 8'b10100010: o = 0;
+ 8'b10100011: o = 0;
+ 8'b10100100: o = 0;
+ 8'b10100101: o = 0;
+ 8'b10100110: o = 0;
+ 8'b10100111: o = 0;
+ 8'b10101000: o = 0;
+ 8'b10101001: o = 0;
+ 8'b10101010: o = 0;
+ 8'b10101011: o = 0;
+ 8'b10101100: o = 0;
+ 8'b10101101: o = 0;
+ 8'b10101110: o = 0;
+ 8'b10101111: o = 0;
+
+ 8'b10110000: o = 0;
+ 8'b10110001: o = 0;
+ 8'b10110010: o = 0;
+ 8'b10110011: o = 0;
+ 8'b10110100: o = 0;
+ 8'b10110101: o = 0;
+ 8'b10110110: o = 0;
+ 8'b10110111: o = 0;
+ 8'b10111000: o = 0;
+ 8'b10111001: o = 0;
+ 8'b10111010: o = 0;
+ 8'b10111011: o = 0;
+ 8'b10111100: o = 0;
+ 8'b10111101: o = 0;
+ 8'b10111110: o = 0;
+ 8'b10111111: o = 0;
+
+ // 44 - 3
+ 8'b11000000: o = 0;
+ 8'b11000001: o = 0;
+ 8'b11000010: o = 0;
+ 8'b11000011: o = 0;
+ 8'b11000100: o = 0;
+ 8'b11000101: o = 0;
+ 8'b11000110: o = 0;
+ 8'b11000111: o = 0;
+ 8'b11001000: o = 0;
+ 8'b11001001: o = 0;
+ 8'b11001010: o = 0;
+ 8'b11001011: o = 0;
+ 8'b11001100: o = 0;
+ 8'b11001101: o = 0;
+ 8'b11001110: o = 0;
+ 8'b11001111: o = 0;
+
+ 8'b11010000: o = 0;
+ 8'b11010001: o = 0;
+ 8'b11010010: o = 0;
+ 8'b11010011: o = 0;
+ 8'b11010100: o = 0;
+ 8'b11010101: o = 0;
+ 8'b11010110: o = 0;
+ 8'b11010111: o = 0;
+ 8'b11011000: o = 0;
+ 8'b11011001: o = 0;
+ 8'b11011010: o = 0;
+ 8'b11011011: o = 0;
+ 8'b11011100: o = 0;
+ 8'b11011101: o = 0;
+ 8'b11011110: o = 0;
+ 8'b11011111: o = 0;
+
+ 8'b11100000: o = 0;
+ 8'b11100001: o = 0;
+ 8'b11100010: o = 0;
+ 8'b11100011: o = 0;
+ 8'b11100100: o = 0;
+ 8'b11100101: o = 0;
+ 8'b11100110: o = 0;
+ 8'b11100111: o = 0;
+ 8'b11101000: o = 0;
+ 8'b11101001: o = 0;
+ 8'b11101010: o = 0;
+ 8'b11101011: o = 0;
+ 8'b11101100: o = 0;
+ 8'b11101101: o = 0;
+ 8'b11101110: o = 0;
+ 8'b11101111: o = 0;
+
+ 8'b11110000: o = 0;
+ 8'b11110001: o = 0;
+ 8'b11110010: o = 0;
+ 8'b11110011: o = 0;
+ 8'b11110100: o = 0;
+ 8'b11110101: o = 0;
+ 8'b11110110: o = 0;
+ 8'b11110111: o = 0;
+ 8'b11111000: o = 0;
+ 8'b11111001: o = 0;
+ 8'b11111010: o = 0;
+ 8'b11111011: o = 0;
+ 8'b11111100: o = 0;
+ 8'b11111101: o = 0;
+ 8'b11111110: o = 0;
+ 8'b11111111: o = 0;
+
+ endcase
+
+
+endmodule
+
+
+module cntlz16(
+ input [15:0] i,
+ output [4:0] o
+);
+
+ wire [3:0] cnt1, cnt2;
+
+ cntlz8 u1 (i[ 7:0],cnt1);
+ cntlz8 u2 (i[15:8],cnt2);
+
+ assign o = cnt2[3] ? cnt1 + 4'h8 : cnt2;
+
+endmodule
+
+
+// 39 slices / 67 LUTs / 19.3ns
+module cntlz24(
+ input [23:0] i,
+ output [4:0] o
+);
+
+ wire [3:0] cnt1, cnt2, cnt3;
+
+ // cntlz8 results in faster result than cntlz16
+ cntlz8 u1 (i[ 7: 0],cnt1);
+ cntlz8 u2 (i[15: 8],cnt2);
+ cntlz8 u3 (i[23:16],cnt3);
+
+ assign o =
+ !cnt3[3] ? cnt3 :
+ !cnt2[3] ? cnt2 + 5'd8 :
+ cnt1 + 5'd16;
+
+endmodule
+
+// 39 slices / 67 LUTs / 19.3ns
+module cntlz32(
+ input [31:0] i,
+ output [5:0] o
+);
+
+ wire [3:0] cnt1, cnt2, cnt3, cnt4;
+
+ // cntlz8 results in faster result than cntlz16
+ cntlz8 u1 (i[ 7: 0],cnt1);
+ cntlz8 u2 (i[15: 8],cnt2);
+ cntlz8 u3 (i[23:16],cnt3);
+ cntlz8 u4 (i[31:24],cnt4);
+
+ assign o =
+ !cnt4[3] ? cnt4 :
+ !cnt3[3] ? cnt3 + 6'd8 :
+ !cnt2[3] ? cnt2 + 6'd16 :
+ cnt1 + 6'd24;
+
+endmodule
+
+
+// 88 slices / 154 LUTs / 22.5 ns
+module cntlz48(
+ input [47:0] i,
+ output [5:0] o
+);
+
+ wire [4:0] cnt1, cnt2, cnt3;
+
+ cntlz16 u1 (i[15: 0],cnt1);
+ cntlz16 u2 (i[31:16],cnt2);
+ cntlz16 u3 (i[47:32],cnt3);
+
+ assign o =
+ !cnt3[4] ? cnt3 :
+ !cnt2[4] ? cnt2 + 7'd16 :
+ cnt1 + 7'd32;
+
+endmodule
+
+
+// 88 slices / 154 LUTs / 22.5 ns
+module cntlz64(
+ input [63:0] i,
+ output [6:0] o
+);
+
+ wire [4:0] cnt1, cnt2, cnt3, cnt4;
+
+ cntlz16 u1 (i[15: 0],cnt1);
+ cntlz16 u2 (i[31:16],cnt2);
+ cntlz16 u3 (i[47:32],cnt3);
+ cntlz16 u4 (i[63:48],cnt4);
+
+ assign o =
+ !cnt4[4] ? cnt4 :
+ !cnt3[4] ? cnt3 + 7'd16 :
+ !cnt2[4] ? cnt2 + 7'd32 :
+ cnt1 + 7'd48;
+
+endmodule
+
+
+module cntlz80(
+ input [79:0] i,
+ output [6:0] o
+);
+
+ wire [4:0] cnt3;
+ wire [5:0] cnt1, cnt2;
+
+ cntlz32 u1 (i[31: 0],cnt1);
+ cntlz32 u2 (i[63:32],cnt2);
+ cntlz16 u3 (i[79:64],cnt3);
+
+ assign o =
+ !cnt3[4] ? {2'b0,cnt3} :
+ !cnt2[5] ? {1'b0,cnt2} + 8'd16 :
+ {1'b0,cnt1} + 7'd48;
+
+endmodule
+
+
+module cntlz96(
+ input [95:0] i,
+ output [7:0] o
+);
+
+ wire [5:0] cnt1, cnt2, cnt3;
+
+ cntlz32 u1 (i[31: 0],cnt1);
+ cntlz32 u2 (i[63:32],cnt2);
+ cntlz32 u3 (i[95:64],cnt3);
+
+ assign o =
+ !cnt3[5] ? cnt3 :
+ !cnt2[5] ? cnt2 + 8'd32 :
+ cnt1 + 8'd64;
+
+endmodule
+
+
+module cntlz128(
+ input [127:0] i,
+ output [7:0] o
+);
+
+ wire [5:0] cnt1, cnt2, cnt3, cnt4;
+
+ cntlz32 u1 (i[31: 0],cnt1);
+ cntlz32 u2 (i[63:32],cnt2);
+ cntlz32 u3 (i[95:64],cnt3);
+ cntlz32 u4 (i[127:96],cnt4);
+
+ assign o =
+ !cnt4[5] ? cnt4 :
+ !cnt3[5] ? cnt3 + 8'd32 :
+ !cnt2[5] ? cnt2 + 8'd64 :
+ cnt1 + 8'd96;
+
+endmodule
+
+
+module cntlz32Reg(
+ input clk,
+ input ce,
+ input [31:0] i,
+ output reg [5:0] o
+);
+
+ wire [5:0] o1;
+ cntlz32 u1 (i,o1);
+ always @(posedge clk)
+ if (ce) o <= o1;
+
+endmodule
+
+
+module cntlz64Reg(
+ input clk,
+ input ce,
+ input [63:0] i,
+ output reg [6:0] o
+);
+
+ wire [6:0] o1;
+ cntlz64 u1 (i,o1);
+ always @(posedge clk)
+ if (ce) o <= o1;
+
+endmodule
+
+module cntlz80Reg(
+ input clk,
+ input ce,
+ input [79:0] i,
+ output reg [6:0] o
+);
+
+ wire [6:0] o1;
+ cntlz80 u1 (i,o1);
+ always @(posedge clk)
+ if (ce) o <= o1;
+
+endmodule
+
+module cntlz96Reg(
+ input clk,
+ input ce,
+ input [95:0] i,
+ output reg [7:0] o
+);
+
+ wire [7:0] o1;
+ cntlz96 u1 (i,o1);
+ always @(posedge clk)
+ if (ce) o <= o1;
+
+endmodule
+
+module cntlz128Reg(
+ input clk,
+ input ce,
+ input [127:0] i,
+ output reg [7:0] o
+);
+
+ wire [7:0] o1;
+ cntlz128 u1 (i,o1);
+ always @(posedge clk)
+ if (ce) o <= o1;
+
+endmodule
+
+// 5 slices / 10 LUTs / 7.702 ns
+module cntlo8(
+ input [7:0] i,
+ output [3:0] o
+);
+
+ cntlz8 u1 (~i,o);
+
+endmodule
+
+
+module cntlo16(
+ input [15:0] i,
+ output [4:0] o
+);
+
+ cntlz16 u1 (~i,o);
+
+endmodule
+
+
+module cntlo32(
+ input [31:0] i,
+ output [5:0] o
+);
+
+ cntlz32 u1 (~i,o);
+
+endmodule
+
+
+module cntlo48(
+ input [47:0] i,
+ output [5:0] o
+);
+
+ cntlz48 u1 (~i,o);
+
+endmodule
+
+
+// 59 slices / 99 LUTs / 14.065 ns
+module cntlo64(
+ input [63:0] i,
+ output [6:0] o
+);
+
+ cntlz64 u1 (~i,o);
+
+endmodule
+
+
Index: FT64v7/rtl/lib/cntpop.v
===================================================================
--- FT64v7/rtl/lib/cntpop.v (nonexistent)
+++ FT64v7/rtl/lib/cntpop.v (revision 60)
@@ -0,0 +1,381 @@
+/* ===============================================================
+ (C) 2006 Robert Finch
+ All rights reserved.
+ rob@birdcomputer.ca
+
+ cntpop.v
+ - count number of one bits in a byte
+ - simple fast approach - lookup table
+
+ This source code is free for use and modification for
+ non-commercial or evaluation purposes, provided this
+ copyright statement and disclaimer remains present in
+ the file.
+
+ If the code is modified, please state the origin and
+ note that the code has been modified.
+
+ NO WARRANTY.
+ THIS Work, IS PROVIDEDED "AS IS" WITH NO WARRANTIES OF
+ ANY KIND, WHETHER EXPRESS OR IMPLIED. The user must assume
+ the entire risk of using the Work.
+
+ IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
+ ANY INCIDENTAL, CONSEQUENTIAL, OR PUNITIVE DAMAGES
+ WHATSOEVER RELATING TO THE USE OF THIS WORK, OR YOUR
+ RELATIONSHIP WITH THE AUTHOR.
+
+ IN ADDITION, IN NO EVENT DOES THE AUTHOR AUTHORIZE YOU
+ TO USE THE WORK IN APPLICATIONS OR SYSTEMS WHERE THE
+ WORK'S FAILURE TO PERFORM CAN REASONABLY BE EXPECTED
+ TO RESULT IN A SIGNIFICANT PHYSICAL INJURY, OR IN LOSS
+ OF LIFE. ANY SUCH USE BY YOU IS ENTIRELY AT YOUR OWN RISK,
+ AND YOU AGREE TO HOLD THE AUTHOR AND CONTRIBUTORS HARMLESS
+ FROM ANY CLAIMS OR LOSSES RELATING TO SUCH UNAUTHORIZED
+ USE.
+
+ Ref: Webpack 8.1i Spartan3-4 xc3s1000-4ft256
+ 11 LUTs / 6 slices / 12.2 ns
+
+=============================================================== */
+
+module cntpop8(
+ input [7:0] i,
+ output reg [3:0] o
+);
+
+ always @(i)
+ case (i)
+ 8'b00000000: o = 0;
+ 8'b00000001: o = 1;
+ 8'b00000010: o = 1;
+ 8'b00000011: o = 2;
+ 8'b00000100: o = 1;
+ 8'b00000101: o = 2;
+ 8'b00000110: o = 2;
+ 8'b00000111: o = 3;
+ 8'b00001000: o = 1;
+ 8'b00001001: o = 2;
+ 8'b00001010: o = 2;
+ 8'b00001011: o = 3;
+ 8'b00001100: o = 2;
+ 8'b00001101: o = 3;
+ 8'b00001110: o = 3;
+ 8'b00001111: o = 4;
+
+ 8'b00010000: o = 1;
+ 8'b00010001: o = 2;
+ 8'b00010010: o = 2;
+ 8'b00010011: o = 3;
+ 8'b00010100: o = 2;
+ 8'b00010101: o = 3;
+ 8'b00010110: o = 3;
+ 8'b00010111: o = 4;
+ 8'b00011000: o = 2;
+ 8'b00011001: o = 3;
+ 8'b00011010: o = 3;
+ 8'b00011011: o = 4;
+ 8'b00011100: o = 3;
+ 8'b00011101: o = 4;
+ 8'b00011110: o = 4;
+ 8'b00011111: o = 5;
+
+ 8'b00100000: o = 1;
+ 8'b00100001: o = 2;
+ 8'b00100010: o = 2;
+ 8'b00100011: o = 3;
+ 8'b00100100: o = 2;
+ 8'b00100101: o = 3;
+ 8'b00100110: o = 3;
+ 8'b00100111: o = 4;
+ 8'b00101000: o = 2;
+ 8'b00101001: o = 3;
+ 8'b00101010: o = 3;
+ 8'b00101011: o = 4;
+ 8'b00101100: o = 3;
+ 8'b00101101: o = 4;
+ 8'b00101110: o = 4;
+ 8'b00101111: o = 5;
+
+ 8'b00110000: o = 2;
+ 8'b00110001: o = 3;
+ 8'b00110010: o = 3;
+ 8'b00110011: o = 4;
+ 8'b00110100: o = 3;
+ 8'b00110101: o = 4;
+ 8'b00110110: o = 4;
+ 8'b00110111: o = 5;
+ 8'b00111000: o = 3;
+ 8'b00111001: o = 4;
+ 8'b00111010: o = 4;
+ 8'b00111011: o = 5;
+ 8'b00111100: o = 4;
+ 8'b00111101: o = 5;
+ 8'b00111110: o = 5;
+ 8'b00111111: o = 6;
+
+ // 44 - 1
+ 8'b01000000: o = 1;
+ 8'b01000001: o = 2;
+ 8'b01000010: o = 2;
+ 8'b01000011: o = 3;
+ 8'b01000100: o = 2;
+ 8'b01000101: o = 3;
+ 8'b01000110: o = 3;
+ 8'b01000111: o = 4;
+ 8'b01001000: o = 2;
+ 8'b01001001: o = 3;
+ 8'b01001010: o = 3;
+ 8'b01001011: o = 4;
+ 8'b01001100: o = 3;
+ 8'b01001101: o = 4;
+ 8'b01001110: o = 4;
+ 8'b01001111: o = 5;
+
+ 8'b01010000: o = 2;
+ 8'b01010001: o = 3;
+ 8'b01010010: o = 3;
+ 8'b01010011: o = 4;
+ 8'b01010100: o = 3;
+ 8'b01010101: o = 4;
+ 8'b01010110: o = 4;
+ 8'b01010111: o = 5;
+ 8'b01011000: o = 3;
+ 8'b01011001: o = 4;
+ 8'b01011010: o = 4;
+ 8'b01011011: o = 5;
+ 8'b01011100: o = 4;
+ 8'b01011101: o = 5;
+ 8'b01011110: o = 5;
+ 8'b01011111: o = 6;
+
+ 8'b01100000: o = 2;
+ 8'b01100001: o = 3;
+ 8'b01100010: o = 3;
+ 8'b01100011: o = 4;
+ 8'b01100100: o = 3;
+ 8'b01100101: o = 4;
+ 8'b01100110: o = 4;
+ 8'b01100111: o = 5;
+ 8'b01101000: o = 3;
+ 8'b01101001: o = 4;
+ 8'b01101010: o = 4;
+ 8'b01101011: o = 5;
+ 8'b01101100: o = 4;
+ 8'b01101101: o = 5;
+ 8'b01101110: o = 5;
+ 8'b01101111: o = 6;
+
+ 8'b01110000: o = 3;
+ 8'b01110001: o = 4;
+ 8'b01110010: o = 4;
+ 8'b01110011: o = 5;
+ 8'b01110100: o = 4;
+ 8'b01110101: o = 5;
+ 8'b01110110: o = 5;
+ 8'b01110111: o = 6;
+ 8'b01111000: o = 4;
+ 8'b01111001: o = 5;
+ 8'b01111010: o = 5;
+ 8'b01111011: o = 6;
+ 8'b01111100: o = 5;
+ 8'b01111101: o = 6;
+ 8'b01111110: o = 6;
+ 8'b01111111: o = 7;
+
+ // - 2
+ 8'b10000000: o = 1;
+ 8'b10000001: o = 2;
+ 8'b10000010: o = 2;
+ 8'b10000011: o = 3;
+ 8'b10000100: o = 2;
+ 8'b10000101: o = 3;
+ 8'b10000110: o = 3;
+ 8'b10000111: o = 4;
+ 8'b10001000: o = 2;
+ 8'b10001001: o = 3;
+ 8'b10001010: o = 3;
+ 8'b10001011: o = 4;
+ 8'b10001100: o = 3;
+ 8'b10001101: o = 4;
+ 8'b10001110: o = 4;
+ 8'b10001111: o = 5;
+
+ 8'b10010000: o = 2;
+ 8'b10010001: o = 3;
+ 8'b10010010: o = 3;
+ 8'b10010011: o = 4;
+ 8'b10010100: o = 3;
+ 8'b10010101: o = 4;
+ 8'b10010110: o = 4;
+ 8'b10010111: o = 5;
+ 8'b10011000: o = 3;
+ 8'b10011001: o = 4;
+ 8'b10011010: o = 4;
+ 8'b10011011: o = 5;
+ 8'b10011100: o = 4;
+ 8'b10011101: o = 5;
+ 8'b10011110: o = 5;
+ 8'b10011111: o = 6;
+
+ 8'b10100000: o = 2;
+ 8'b10100001: o = 3;
+ 8'b10100010: o = 3;
+ 8'b10100011: o = 4;
+ 8'b10100100: o = 3;
+ 8'b10100101: o = 4;
+ 8'b10100110: o = 4;
+ 8'b10100111: o = 5;
+ 8'b10101000: o = 3;
+ 8'b10101001: o = 4;
+ 8'b10101010: o = 4;
+ 8'b10101011: o = 5;
+ 8'b10101100: o = 4;
+ 8'b10101101: o = 5;
+ 8'b10101110: o = 5;
+ 8'b10101111: o = 6;
+
+ 8'b10110000: o = 3;
+ 8'b10110001: o = 4;
+ 8'b10110010: o = 4;
+ 8'b10110011: o = 5;
+ 8'b10110100: o = 4;
+ 8'b10110101: o = 5;
+ 8'b10110110: o = 5;
+ 8'b10110111: o = 6;
+ 8'b10111000: o = 4;
+ 8'b10111001: o = 5;
+ 8'b10111010: o = 5;
+ 8'b10111011: o = 6;
+ 8'b10111100: o = 5;
+ 8'b10111101: o = 6;
+ 8'b10111110: o = 6;
+ 8'b10111111: o = 7;
+
+ // 44 - 3
+ 8'b11000000: o = 2;
+ 8'b11000001: o = 3;
+ 8'b11000010: o = 3;
+ 8'b11000011: o = 4;
+ 8'b11000100: o = 3;
+ 8'b11000101: o = 4;
+ 8'b11000110: o = 4;
+ 8'b11000111: o = 5;
+ 8'b11001000: o = 3;
+ 8'b11001001: o = 4;
+ 8'b11001010: o = 4;
+ 8'b11001011: o = 5;
+ 8'b11001100: o = 4;
+ 8'b11001101: o = 5;
+ 8'b11001110: o = 5;
+ 8'b11001111: o = 6;
+
+ 8'b11010000: o = 3;
+ 8'b11010001: o = 4;
+ 8'b11010010: o = 4;
+ 8'b11010011: o = 5;
+ 8'b11010100: o = 4;
+ 8'b11010101: o = 5;
+ 8'b11010110: o = 5;
+ 8'b11010111: o = 6;
+ 8'b11011000: o = 4;
+ 8'b11011001: o = 5;
+ 8'b11011010: o = 5;
+ 8'b11011011: o = 6;
+ 8'b11011100: o = 5;
+ 8'b11011101: o = 6;
+ 8'b11011110: o = 6;
+ 8'b11011111: o = 7;
+
+ 8'b11100000: o = 3;
+ 8'b11100001: o = 4;
+ 8'b11100010: o = 4;
+ 8'b11100011: o = 5;
+ 8'b11100100: o = 4;
+ 8'b11100101: o = 5;
+ 8'b11100110: o = 5;
+ 8'b11100111: o = 6;
+ 8'b11101000: o = 4;
+ 8'b11101001: o = 5;
+ 8'b11101010: o = 5;
+ 8'b11101011: o = 6;
+ 8'b11101100: o = 5;
+ 8'b11101101: o = 6;
+ 8'b11101110: o = 6;
+ 8'b11101111: o = 7;
+
+ 8'b11110000: o = 4;
+ 8'b11110001: o = 5;
+ 8'b11110010: o = 5;
+ 8'b11110011: o = 6;
+ 8'b11110100: o = 5;
+ 8'b11110101: o = 6;
+ 8'b11110110: o = 6;
+ 8'b11110111: o = 7;
+ 8'b11111000: o = 5;
+ 8'b11111001: o = 6;
+ 8'b11111010: o = 6;
+ 8'b11111011: o = 7;
+ 8'b11111100: o = 6;
+ 8'b11111101: o = 7;
+ 8'b11111110: o = 7;
+ 8'b11111111: o = 8;
+
+ endcase
+
+
+endmodule
+
+
+module cntpop16(
+ input [15:0] i,
+ output [4:0] o
+);
+
+ wire [3:0] cnt1, cnt2;
+
+ cntpop8 u1 (i[ 7:0],cnt1);
+ cntpop8 u2 (i[15:8],cnt2);
+
+ assign o = cnt1 + cnt2;
+
+endmodule
+
+
+// 76 slices / 147 LUTs / 19 ns
+module cntpop32(
+ input [31:0] i,
+ output [5:0] o
+);
+
+ wire [3:0] cnt1, cnt2, cnt3, cnt4;
+
+ // cntpop8 results in faster result than cntpop16
+ cntpop8 u1 (i[ 7: 0],cnt1);
+ cntpop8 u2 (i[15: 8],cnt2);
+ cntpop8 u3 (i[23:16],cnt3);
+ cntpop8 u4 (i[31:24],cnt4);
+
+ assign o = cnt1+cnt2+cnt3+cnt4;
+
+endmodule
+
+
+// 156 slices / 300 LUTs / 22.2 ns
+module cntpop64(
+ input [63:0] i,
+ output [6:0] o
+);
+
+ wire [4:0] cnt1, cnt2, cnt3, cnt4;
+
+ cntpop16 u1 (i[15: 0],cnt1);
+ cntpop16 u2 (i[31:16],cnt2);
+ cntpop16 u3 (i[47:32],cnt3);
+ cntpop16 u4 (i[63:48],cnt4);
+
+ assign o = cnt1+cnt2+cnt3+cnt4;
+
+endmodule
+
+
Index: FT64v7/rtl/lib/delay.v
===================================================================
--- FT64v7/rtl/lib/delay.v (nonexistent)
+++ FT64v7/rtl/lib/delay.v (revision 60)
@@ -0,0 +1,160 @@
+/* ===============================================================
+ (C) 2006 Robert Finch
+ All rights reserved.
+ rob@birdcomputer.ca
+
+ delay.v
+ - delays signals by so many clock cycles
+
+
+ This source code is free for use and modification for
+ non-commercial or evaluation purposes, provided this
+ copyright statement and disclaimer remains present in
+ the file.
+
+ If you do modify the code, please state the origin and
+ note that you have modified the code.
+
+ NO WARRANTY.
+ THIS Work, IS PROVIDEDED "AS IS" WITH NO WARRANTIES OF
+ ANY KIND, WHETHER EXPRESS OR IMPLIED. The user must assume
+ the entire risk of using the Work.
+
+ IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
+ ANY INCIDENTAL, CONSEQUENTIAL, OR PUNITIVE DAMAGES
+ WHATSOEVER RELATING TO THE USE OF THIS WORK, OR YOUR
+ RELATIONSHIP WITH THE AUTHOR.
+
+ IN ADDITION, IN NO EVENT DOES THE AUTHOR AUTHORIZE YOU
+ TO USE THE WORK IN APPLICATIONS OR SYSTEMS WHERE THE
+ WORK'S FAILURE TO PERFORM CAN REASONABLY BE EXPECTED
+ TO RESULT IN A SIGNIFICANT PHYSICAL INJURY, OR IN LOSS
+ OF LIFE. ANY SUCH USE BY YOU IS ENTIRELY AT YOUR OWN RISK,
+ AND YOU AGREE TO HOLD THE AUTHOR AND CONTRIBUTORS HARMLESS
+ FROM ANY CLAIMS OR LOSSES RELATING TO SUCH UNAUTHORIZED
+ USE.
+
+=============================================================== */
+
+module delay1
+ #(parameter WID = 1)
+ (
+ input clk,
+ input ce,
+ input [WID:1] i,
+ output reg [WID:1] o
+ );
+
+ always @(posedge clk)
+ if (ce)
+ o <= i;
+
+endmodule
+
+
+module delay2
+ #(parameter WID = 1)
+ (
+ input clk,
+ input ce,
+ input [WID:1] i,
+ output reg [WID:1] o
+ );
+
+
+ reg [WID:1] r1;
+
+ always @(posedge clk)
+ if (ce)
+ r1 <= i;
+
+ always @(posedge clk)
+ if (ce)
+ o <= r1;
+
+endmodule
+
+
+module delay3
+ #(parameter WID = 1)
+ (
+ input clk,
+ input ce,
+ input [WID:1] i,
+ output reg [WID:1] o
+ );
+
+ reg [WID:1] r1, r2;
+
+ always @(posedge clk)
+ if (ce)
+ r1 <= i;
+
+ always @(posedge clk)
+ if (ce)
+ r2 <= r1;
+
+ always @(posedge clk)
+ if (ce)
+ o <= r2;
+
+endmodule
+
+module delay4
+ #(parameter WID = 1)
+ (
+ input clk,
+ input ce,
+ input [WID-1:0] i,
+ output reg [WID-1:0] o
+ );
+
+ reg [WID:1] r1, r2, r3;
+
+ always @(posedge clk)
+ if (ce)
+ r1 <= i;
+
+ always @(posedge clk)
+ if (ce)
+ r2 <= r1;
+
+ always @(posedge clk)
+ if (ce)
+ r3 <= r2;
+
+ always @(posedge clk)
+ if (ce)
+ o <= r3;
+
+endmodule
+
+
+module delay5
+#(parameter WID = 1)
+(
+ input clk,
+ input ce,
+ input [WID:1] i,
+ output reg [WID:1] o
+);
+
+ reg [WID:1] r1, r2, r3, r4;
+
+ always @(posedge clk)
+ if (ce) r1 <= i;
+
+ always @(posedge clk)
+ if (ce) r2 <= r1;
+
+ always @(posedge clk)
+ if (ce) r3 <= r2;
+
+ always @(posedge clk)
+ if (ce) r4 <= r3;
+
+ always @(posedge clk)
+ if (ce) o <= r4;
+
+endmodule
+
Index: FT64v7/rtl/lib/edge_det.v
===================================================================
--- FT64v7/rtl/lib/edge_det.v (nonexistent)
+++ FT64v7/rtl/lib/edge_det.v (revision 60)
@@ -0,0 +1,49 @@
+// ============================================================================
+// (C) 2007 Robert Finch
+// All Rights Reserved.
+//
+// edge_det.v
+//
+//
+// This source file is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published
+// by the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This source file is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+//
+// Notes:
+//
+// Edge detector
+// This little core detects an edge (positive, negative, and
+// either) in the input signal.
+//
+// ============================================================================
+//
+module edge_det(rst, clk, ce, i, pe, ne, ee);
+input rst; // reset
+input clk; // clock
+input ce; // clock enable
+input i; // input signal
+output pe; // positive transition detected
+output ne; // negative transition detected
+output ee; // either edge (positive or negative) transition detected
+
+reg ed;
+always @(posedge clk)
+ if (rst)
+ ed <= 1'b0;
+ else if (ce)
+ ed <= i;
+
+assign pe = ~ed & i; // positive: was low and is now high
+assign ne = ed & ~i; // negative: was high and is now low
+assign ee = ed ^ i; // either: signal is now opposite to what it was
+
+endmodule
Index: FT64v7/rtl/lib/ffo.v
===================================================================
--- FT64v7/rtl/lib/ffo.v (nonexistent)
+++ FT64v7/rtl/lib/ffo.v (revision 60)
@@ -0,0 +1,85 @@
+
+// Find first one
+module ffo6(i, o);
+input [5:0] i;
+output reg [2:0] o;
+always @*
+casex(i)
+6'b1xxxxx: o <= 3'd5;
+6'b01xxxx: o <= 3'd4;
+6'b001xxx: o <= 3'd3;
+6'b0001xx: o <= 3'd2;
+6'b00001x: o <= 3'd1;
+6'b000001: o <= 3'd0;
+default: o <= 3'd7;
+endcase
+endmodule
+
+module ffo12(i, o);
+input [11:0] i;
+output reg [3:0] o;
+
+wire [2:0] o1,o2;
+ffo6 u1 (i[11:6],o1);
+ffo6 u2 (i[5:0],o2);
+always @*
+if (o1==3'd7 && o2==3'd7)
+ o <= 4'd15;
+else if (o1==3'd7)
+ o <= o2;
+else
+ o <= 3'd6 + o1;
+
+endmodule
+
+module ffo24(i, o);
+input [23:0] i;
+output reg [4:0] o;
+
+wire [3:0] o1,o2;
+ffo12 u1 (i[23:12],o1);
+ffo12 u2 (i[11:0],o2);
+always @*
+if (o1==4'd15 && o2==4'd15)
+ o <= 5'd31;
+else if (o1==4'd15)
+ o <= o2;
+else
+ o <= 4'd12 + o1;
+
+endmodule
+
+module ffo48(i, o);
+input [47:0] i;
+output reg [5:0] o;
+
+wire [4:0] o1,o2;
+ffo24 u1 (i[47:24],o1);
+ffo24 u2 (i[23:0],o2);
+always @*
+if (o1==5'd31 && o2==5'd31)
+ o <= 6'd63;
+else if (o1==5'd31)
+ o <= o2;
+else
+ o <= 5'd24 + o1;
+
+endmodule
+
+module ffo96(i, o);
+input [95:0] i;
+output reg [6:0] o;
+
+wire [5:0] o1,o2;
+ffo48 u1 (i[95:48],o1);
+ffo48 u2 (i[47:0],o2);
+always @*
+if (o1==6'd63 && o2==6'd63)
+ o <= 7'd127;
+else if (o1==6'd63)
+ o <= o2;
+else
+ o <= 6'd48 + o1;
+
+endmodule
+
Index: FT64v7/rtl/lib/ffz.v
===================================================================
--- FT64v7/rtl/lib/ffz.v (nonexistent)
+++ FT64v7/rtl/lib/ffz.v (revision 60)
@@ -0,0 +1,174 @@
+
+// Find first zero
+module ffz6(i, o);
+input [5:0] i;
+output reg [2:0] o;
+always @*
+casex(i)
+6'b0xxxxx: o <= 3'd5;
+6'b10xxxx: o <= 3'd4;
+6'b110xxx: o <= 3'd3;
+6'b1110xx: o <= 3'd2;
+6'b11110x: o <= 3'd1;
+6'b111110: o <= 3'd0;
+default: o <= 3'd7;
+endcase
+endmodule
+
+module ffz12(i, o);
+input [11:0] i;
+output reg [3:0] o;
+
+wire [2:0] o1,o2;
+ffz6 u1 (i[11:6],o1);
+ffz6 u2 (i[5:0],o2);
+always @*
+if (o1==3'd7 && o2==3'd7)
+ o <= 4'd15;
+else if (o1==3'd7)
+ o <= o2;
+else
+ o <= 3'd6 + o1;
+
+endmodule
+
+module ffz24(i, o);
+input [23:0] i;
+output reg [4:0] o;
+
+wire [3:0] o1,o2;
+ffz12 u1 (i[23:12],o1);
+ffz12 u2 (i[11:0],o2);
+always @*
+if (o1==4'd15 && o2==4'd15)
+ o <= 5'd31;
+else if (o1==4'd15)
+ o <= o2;
+else
+ o <= 4'd12 + o1;
+
+endmodule
+
+module ffz48(i, o);
+input [47:0] i;
+output reg [5:0] o;
+
+wire [4:0] o1,o2;
+ffz24 u1 (i[47:24],o1);
+ffz24 u2 (i[23:0],o2);
+always @*
+if (o1==5'd31 && o2==5'd31)
+ o <= 6'd63;
+else if (o1==5'd31)
+ o <= o2;
+else
+ o <= 5'd24 + o1;
+
+endmodule
+
+module ffz96(i, o);
+input [95:0] i;
+output reg [6:0] o;
+
+wire [5:0] o1,o2;
+ffz48 u1 (i[95:48],o1);
+ffz48 u2 (i[47:0],o2);
+always @*
+if (o1==6'd63 && o2==6'd63)
+ o <= 7'd127;
+else if (o1==6'd63)
+ o <= o2;
+else
+ o <= 6'd48 + o1;
+
+endmodule
+
+// Find last zero
+module flz6(i, o);
+input [5:0] i;
+output reg [2:0] o;
+always @*
+casex(i)
+6'bxxxxx0: o <= 3'd0;
+6'bxxxx01: o <= 3'd1;
+6'bxxx011: o <= 3'd2;
+6'bxx0111: o <= 3'd3;
+6'bx01111: o <= 3'd4;
+6'b011111: o <= 3'd5;
+default: o <= 3'd7;
+endcase
+
+endmodule
+
+module flz12(i, o);
+input [11:0] i;
+output reg [3:0] o;
+
+wire [2:0] o1,o2;
+flz6 u1 (i[11:6],o1);
+flz6 u2 (i[5:0],o2);
+
+always @*
+if (o1==3'd7 && o2==3'd7)
+ o <= 4'd15;
+else if (o2==3'd7)
+ o <= 4'd6 + o1;
+else
+ o <= o2;
+
+endmodule
+
+module flz24(i, o);
+input [23:0] i;
+output reg [4:0] o;
+
+wire [3:0] o1,o2;
+flz12 u1 (i[23:12],o1);
+flz12 u2 (i[11:0],o2);
+
+always @*
+if (o1==4'd15 && o2==4'd15)
+ o <= 5'd31;
+else if (o2==4'd15)
+ o <= 4'd12 + o1;
+else
+ o <= o2;
+
+endmodule
+
+module flz48(i, o);
+input [47:0] i;
+output reg [5:0] o;
+
+wire [4:0] o1,o2;
+flz24 u1 (i[47:24],o1);
+flz24 u2 (i[23:0],o2);
+
+always @*
+if (o1==5'd31 && o2==5'd31)
+ o <= 6'd63;
+else if (o2==5'd31)
+ o <= 5'd24 + o1;
+else
+ o <= o2;
+
+endmodule
+
+module flz96(i, o);
+input [95:0] i;
+output reg [6:0] o;
+
+wire [5:0] o1,o2;
+flz48 u1 (i[95:48],o1);
+flz48 u2 (i[47:0],o2);
+
+always @*
+if (o1==6'd63 && o2==6'd63)
+ o <= 7'd127;
+else if (o2==6'd63)
+ o <= 6'd48 + o1;
+else
+ o <= o2;
+
+endmodule
+
Index: FT64v7/rtl/lib/lfsr.v
===================================================================
--- FT64v7/rtl/lib/lfsr.v (nonexistent)
+++ FT64v7/rtl/lib/lfsr.v (revision 60)
@@ -0,0 +1,88 @@
+// ============================================================================
+// __
+// \\__/ o\ (C) 2003-2018 Robert Finch, Waterloo
+// \ __ / All rights reserved.
+// \/_// robfinch@finitron.ca
+// ||
+//
+// lfsr.v
+// - linear feedback shift register
+// - parameterized
+//
+//
+// This source file is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published
+// by the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This source file is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+//
+//
+// ============================================================================
+//
+module lfsr(rst, clk, ce, cyc, o);
+ parameter WID=17;
+ parameter RST_VAL=0;
+ localparam MSB=WID-1;
+
+ input rst;
+ input clk;
+ input ce;
+ input cyc; // shorten the feedback cycle
+ output [WID:1] o;
+
+ reg [WID:0] c;
+ reg [23:0] n;
+ assign o = c[WID:1];
+
+ always @(posedge clk) begin
+ case (WID)
+ 3: n <= 24'h00_0003;
+ 4: n <= 24'h00_0004;
+ 5: n <= 24'h00_0003;
+ 6: n <= 24'h00_0005;
+ 7: n <= 24'h00_0006;
+ 8: n <= 24'h06_0504;
+ 9: n <= 24'h00_0005;
+ 10: n <= 24'h00_0007;
+ 11: n <= 24'h00_0009;
+ 12: n <= 24'h06_0401;
+ 13: n <= 24'h04_0301;
+ 14: n <= 24'h05_0301;
+ 15: n <= 24'h00_000E;
+ 16: n <= 24'h0F_0D04;
+ 17: n <= 24'h00_000E;
+ 18: n <= 24'h00_000B;
+ 19: n <= 24'h06_0201;
+ 20: n <= 24'h00_0011;
+ 21: n <= 24'h00_0013;
+ 22: n <= 24'h00_0015;
+ 23: n <= 24'h00_0012;
+ 24: n <= 24'h17_1611;
+ 25: n <= 24'h00_0016;
+ 26: n <= 24'h06_0201;
+ 27: n <= 24'h05_0201;
+ 28: n <= 24'h00_0019;
+ 29: n <= 24'h00_001B;
+ 30: n <= 24'h06_0401;
+ 31: n <= 24'h00_001C;
+ default:
+ n <= 24'h00_0000;
+ endcase
+ end
+
+
+ always @(posedge clk)
+ if (rst)
+ c <= RST_VAL;
+ else if (ce)
+ c <= {c[MSB:0],~(c[WID]^c[n[23:16]]^c[n[15:8]]^c[n[7:0]]^cyc)};
+
+endmodule
+
Index: FT64v7/rtl/lib/ready_gen.v
===================================================================
--- FT64v7/rtl/lib/ready_gen.v (nonexistent)
+++ FT64v7/rtl/lib/ready_gen.v (revision 60)
@@ -0,0 +1,45 @@
+// ============================================================================
+// __
+// \\__/ o\ (C) 2018 Robert Finch, Waterloo
+// \ __ / All rights reserved.
+// \/_// robfinch@opencores.org
+// ||
+//
+// This source file is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published
+// by the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This source file is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+//
+// ready_gen.v
+// - generates a ready signal after a specified number of clocks.
+// - this is not a simple delay line. Output is set low as soom as the
+// input goes low.
+//
+// ============================================================================
+//
+module ready_gen(clk_i, ce_i, i, o);
+input clk_i;
+input ce_i;
+input i;
+output reg o = 1'd0;
+parameter STAGES = 3;
+
+integer n;
+reg rdy[STAGES-1:0];
+always @(posedge clk_i)
+if (ce_i) begin
+ rdy[0] <= i;
+ for (n = 1; n < STAGES; n = n + 1)
+ rdy[n] <= rdy[n-1] & i;
+ o <= rdy[STAGES-1] & i;
+end
+
+endmodule
Index: FT64v7/rtl/lib/redor128.v
===================================================================
--- FT64v7/rtl/lib/redor128.v (nonexistent)
+++ FT64v7/rtl/lib/redor128.v (revision 60)
@@ -0,0 +1,141 @@
+module redor128
+(
+ input [6:0] a,
+ input [127:0] b,
+ output reg o
+);
+
+ always @(a,b)
+ case (a)
+ 7'd0: o = b[0];
+ 7'd1: o = |b[1:0];
+ 7'd2: o = |b[2:0];
+ 7'd3: o = |b[3:0];
+ 7'd4: o = |b[4:0];
+ 7'd5: o = |b[5:0];
+ 7'd6: o = |b[6:0];
+ 7'd7: o = |b[7:0];
+ 7'd8: o = |b[8:0];
+ 7'd9: o = |b[9:0];
+ 7'd10: o = |b[10:0];
+ 7'd11: o = |b[11:0];
+ 7'd12: o = |b[12:0];
+ 7'd13: o = |b[13:0];
+ 7'd14: o = |b[14:0];
+ 7'd15: o = |b[15:0];
+ 7'd16: o = |b[16:0];
+ 7'd17: o = |b[17:0];
+ 7'd18: o = |b[18:0];
+ 7'd19: o = |b[19:0];
+ 7'd20: o = |b[20:0];
+ 7'd21: o = |b[21:0];
+ 7'd22: o = |b[22:0];
+ 7'd23: o = |b[23:0];
+ 7'd24: o = |b[24:0];
+ 7'd25: o = |b[25:0];
+ 7'd26: o = |b[26:0];
+ 7'd27: o = |b[27:0];
+ 7'd28: o = |b[28:0];
+ 7'd29: o = |b[29:0];
+ 7'd30: o = |b[30:0];
+ 7'd31: o = |b[31:0];
+ 7'd32: o = |b[32:0];
+ 7'd33: o = |b[33:0];
+ 7'd34: o = |b[34:0];
+ 7'd35: o = |b[35:0];
+ 7'd36: o = |b[36:0];
+ 7'd37: o = |b[37:0];
+ 7'd38: o = |b[38:0];
+ 7'd39: o = |b[39:0];
+ 7'd40: o = |b[40:0];
+ 7'd41: o = |b[41:0];
+ 7'd42: o = |b[42:0];
+ 7'd43: o = |b[43:0];
+ 7'd44: o = |b[44:0];
+ 7'd45: o = |b[45:0];
+ 7'd46: o = |b[46:0];
+ 7'd47: o = |b[47:0];
+ 7'd48: o = |b[48:0];
+ 7'd49: o = |b[49:0];
+ 7'd50: o = |b[50:0];
+ 7'd51: o = |b[51:0];
+ 7'd52: o = |b[52:0];
+ 7'd53: o = |b[53:0];
+ 7'd54: o = |b[54:0];
+ 7'd55: o = |b[55:0];
+ 7'd56: o = |b[56:0];
+ 7'd57: o = |b[57:0];
+ 7'd58: o = |b[58:0];
+ 7'd59: o = |b[59:0];
+ 7'd60: o = |b[60:0];
+ 7'd61: o = |b[61:0];
+ 7'd62: o = |b[62:0];
+ 7'd63: o = |b[63:0];
+
+ 7'd64: o = |b[64:0];
+ 7'd65: o = |b[65:0];
+ 7'd66: o = |b[66:0];
+ 7'd67: o = |b[67:0];
+ 7'd68: o = |b[68:0];
+ 7'd69: o = |b[69:0];
+ 7'd70: o = |b[70:0];
+ 7'd71: o = |b[71:0];
+ 7'd72: o = |b[72:0];
+ 7'd73: o = |b[73:0];
+ 7'd74: o = |b[74:0];
+ 7'd75: o = |b[75:0];
+ 7'd76: o = |b[76:0];
+ 7'd77: o = |b[77:0];
+ 7'd78: o = |b[78:0];
+ 7'd79: o = |b[79:0];
+ 7'd80: o = |b[80:0];
+ 7'd81: o = |b[81:0];
+ 7'd82: o = |b[82:0];
+ 7'd83: o = |b[83:0];
+ 7'd84: o = |b[84:0];
+ 7'd85: o = |b[85:0];
+ 7'd86: o = |b[86:0];
+ 7'd87: o = |b[87:0];
+ 7'd88: o = |b[88:0];
+ 7'd89: o = |b[89:0];
+ 7'd90: o = |b[90:0];
+ 7'd91: o = |b[91:0];
+ 7'd92: o = |b[92:0];
+ 7'd93: o = |b[93:0];
+ 7'd94: o = |b[94:0];
+ 7'd95: o = |b[95:0];
+ 7'd96: o = |b[96:0];
+ 7'd97: o = |b[97:0];
+ 7'd98: o = |b[98:0];
+ 7'd99: o = |b[99:0];
+ 7'd100: o = |b[100:0];
+ 7'd101: o = |b[101:0];
+ 7'd102: o = |b[102:0];
+ 7'd103: o = |b[103:0];
+ 7'd104: o = |b[104:0];
+ 7'd105: o = |b[105:0];
+ 7'd106: o = |b[106:0];
+ 7'd107: o = |b[107:0];
+ 7'd108: o = |b[108:0];
+ 7'd109: o = |b[109:0];
+ 7'd110: o = |b[110:0];
+ 7'd111: o = |b[111:0];
+ 7'd112: o = |b[112:0];
+ 7'd113: o = |b[113:0];
+ 7'd114: o = |b[114:0];
+ 7'd115: o = |b[115:0];
+ 7'd116: o = |b[116:0];
+ 7'd117: o = |b[117:0];
+ 7'd118: o = |b[118:0];
+ 7'd119: o = |b[119:0];
+ 7'd120: o = |b[120:0];
+ 7'd121: o = |b[121:0];
+ 7'd122: o = |b[122:0];
+ 7'd123: o = |b[123:0];
+ 7'd124: o = |b[124:0];
+ 7'd125: o = |b[125:0];
+ 7'd126: o = |b[126:0];
+ 7'd127: o = |b[127:0];
+ endcase
+
+endmodule
Index: FT64v7/rtl/lib/redor32.v
===================================================================
--- FT64v7/rtl/lib/redor32.v (nonexistent)
+++ FT64v7/rtl/lib/redor32.v (revision 60)
@@ -0,0 +1,44 @@
+module redor32
+(
+ input [4:0] a,
+ input [31:0] b,
+ output reg o
+);
+
+ always @(a,b)
+ case (a)
+ 5'd0: o = b[0];
+ 5'd1: o = |b[1:0];
+ 5'd2: o = |b[2:0];
+ 5'd3: o = |b[3:0];
+ 5'd4: o = |b[4:0];
+ 5'd5: o = |b[5:0];
+ 5'd6: o = |b[6:0];
+ 5'd7: o = |b[7:0];
+ 5'd8: o = |b[8:0];
+ 5'd9: o = |b[9:0];
+ 5'd10: o = |b[10:0];
+ 5'd11: o = |b[11:0];
+ 5'd12: o = |b[12:0];
+ 5'd13: o = |b[13:0];
+ 5'd14: o = |b[14:0];
+ 5'd15: o = |b[15:0];
+ 5'd16: o = |b[16:0];
+ 5'd17: o = |b[17:0];
+ 5'd18: o = |b[18:0];
+ 5'd19: o = |b[19:0];
+ 5'd20: o = |b[20:0];
+ 5'd21: o = |b[21:0];
+ 5'd22: o = |b[22:0];
+ 5'd23: o = |b[23:0];
+ 5'd24: o = |b[24:0];
+ 5'd25: o = |b[25:0];
+ 5'd26: o = |b[26:0];
+ 5'd27: o = |b[27:0];
+ 5'd28: o = |b[28:0];
+ 5'd29: o = |b[29:0];
+ 5'd30: o = |b[30:0];
+ 5'd31: o = |b[31:0];
+ endcase
+
+endmodule
Index: FT64v7/rtl/lib/redor64.v
===================================================================
--- FT64v7/rtl/lib/redor64.v (nonexistent)
+++ FT64v7/rtl/lib/redor64.v (revision 60)
@@ -0,0 +1,76 @@
+module redor64
+(
+ input [5:0] a,
+ input [63:0] b,
+ output reg o
+);
+
+ always @(a,b)
+ case (a)
+ 6'd0: o = b[0];
+ 6'd1: o = |b[1:0];
+ 6'd2: o = |b[2:0];
+ 6'd3: o = |b[3:0];
+ 6'd4: o = |b[4:0];
+ 6'd5: o = |b[5:0];
+ 6'd6: o = |b[6:0];
+ 6'd7: o = |b[7:0];
+ 6'd8: o = |b[8:0];
+ 6'd9: o = |b[9:0];
+ 6'd10: o = |b[10:0];
+ 6'd11: o = |b[11:0];
+ 6'd12: o = |b[12:0];
+ 6'd13: o = |b[13:0];
+ 6'd14: o = |b[14:0];
+ 6'd15: o = |b[15:0];
+ 6'd16: o = |b[16:0];
+ 6'd17: o = |b[17:0];
+ 6'd18: o = |b[18:0];
+ 6'd19: o = |b[19:0];
+ 6'd20: o = |b[20:0];
+ 6'd21: o = |b[21:0];
+ 6'd22: o = |b[22:0];
+ 6'd23: o = |b[23:0];
+ 6'd24: o = |b[24:0];
+ 6'd25: o = |b[25:0];
+ 6'd26: o = |b[26:0];
+ 6'd27: o = |b[27:0];
+ 6'd28: o = |b[28:0];
+ 6'd29: o = |b[29:0];
+ 6'd30: o = |b[30:0];
+ 6'd31: o = |b[31:0];
+ 6'd32: o = |b[32:0];
+ 6'd33: o = |b[33:0];
+ 6'd34: o = |b[34:0];
+ 6'd35: o = |b[35:0];
+ 6'd36: o = |b[36:0];
+ 6'd37: o = |b[37:0];
+ 6'd38: o = |b[38:0];
+ 6'd39: o = |b[39:0];
+ 6'd40: o = |b[40:0];
+ 6'd41: o = |b[41:0];
+ 6'd42: o = |b[42:0];
+ 6'd43: o = |b[43:0];
+ 6'd44: o = |b[44:0];
+ 6'd45: o = |b[45:0];
+ 6'd46: o = |b[46:0];
+ 6'd47: o = |b[47:0];
+ 6'd48: o = |b[48:0];
+ 6'd49: o = |b[49:0];
+ 6'd50: o = |b[50:0];
+ 6'd51: o = |b[51:0];
+ 6'd52: o = |b[52:0];
+ 6'd53: o = |b[53:0];
+ 6'd54: o = |b[54:0];
+ 6'd55: o = |b[55:0];
+ 6'd56: o = |b[56:0];
+ 6'd57: o = |b[57:0];
+ 6'd58: o = |b[58:0];
+ 6'd59: o = |b[59:0];
+ 6'd60: o = |b[60:0];
+ 6'd61: o = |b[61:0];
+ 6'd62: o = |b[62:0];
+ 6'd63: o = |b[63:0];
+ endcase
+
+endmodule
Index: FT64v7/rtl/lib/redor80.v
===================================================================
--- FT64v7/rtl/lib/redor80.v (nonexistent)
+++ FT64v7/rtl/lib/redor80.v (revision 60)
@@ -0,0 +1,93 @@
+module redor80
+(
+ input [6:0] a,
+ input [79:0] b,
+ output reg o
+);
+
+ always @(a,b)
+ case (a)
+ 7'd0: o = b[0];
+ 7'd1: o = |b[1:0];
+ 7'd2: o = |b[2:0];
+ 7'd3: o = |b[3:0];
+ 7'd4: o = |b[4:0];
+ 7'd5: o = |b[5:0];
+ 7'd6: o = |b[6:0];
+ 7'd7: o = |b[7:0];
+ 7'd8: o = |b[8:0];
+ 7'd9: o = |b[9:0];
+ 7'd10: o = |b[10:0];
+ 7'd11: o = |b[11:0];
+ 7'd12: o = |b[12:0];
+ 7'd13: o = |b[13:0];
+ 7'd14: o = |b[14:0];
+ 7'd15: o = |b[15:0];
+ 7'd16: o = |b[16:0];
+ 7'd17: o = |b[17:0];
+ 7'd18: o = |b[18:0];
+ 7'd19: o = |b[19:0];
+ 7'd20: o = |b[20:0];
+ 7'd21: o = |b[21:0];
+ 7'd22: o = |b[22:0];
+ 7'd23: o = |b[23:0];
+ 7'd24: o = |b[24:0];
+ 7'd25: o = |b[25:0];
+ 7'd26: o = |b[26:0];
+ 7'd27: o = |b[27:0];
+ 7'd28: o = |b[28:0];
+ 7'd29: o = |b[29:0];
+ 7'd30: o = |b[30:0];
+ 7'd31: o = |b[31:0];
+ 7'd32: o = |b[32:0];
+ 7'd33: o = |b[33:0];
+ 7'd34: o = |b[34:0];
+ 7'd35: o = |b[35:0];
+ 7'd36: o = |b[36:0];
+ 7'd37: o = |b[37:0];
+ 7'd38: o = |b[38:0];
+ 7'd39: o = |b[39:0];
+ 7'd40: o = |b[40:0];
+ 7'd41: o = |b[41:0];
+ 7'd42: o = |b[42:0];
+ 7'd43: o = |b[43:0];
+ 7'd44: o = |b[44:0];
+ 7'd45: o = |b[45:0];
+ 7'd46: o = |b[46:0];
+ 7'd47: o = |b[47:0];
+ 7'd48: o = |b[48:0];
+ 7'd49: o = |b[49:0];
+ 7'd50: o = |b[50:0];
+ 7'd51: o = |b[51:0];
+ 7'd52: o = |b[52:0];
+ 7'd53: o = |b[53:0];
+ 7'd54: o = |b[54:0];
+ 7'd55: o = |b[55:0];
+ 7'd56: o = |b[56:0];
+ 7'd57: o = |b[57:0];
+ 7'd58: o = |b[58:0];
+ 7'd59: o = |b[59:0];
+ 7'd60: o = |b[60:0];
+ 7'd61: o = |b[61:0];
+ 7'd62: o = |b[62:0];
+ 7'd63: o = |b[63:0];
+
+ 7'd64: o = |b[64:0];
+ 7'd65: o = |b[65:0];
+ 7'd66: o = |b[66:0];
+ 7'd67: o = |b[67:0];
+ 7'd68: o = |b[68:0];
+ 7'd69: o = |b[69:0];
+ 7'd70: o = |b[70:0];
+ 7'd71: o = |b[71:0];
+ 7'd72: o = |b[72:0];
+ 7'd73: o = |b[73:0];
+ 7'd74: o = |b[74:0];
+ 7'd75: o = |b[75:0];
+ 7'd76: o = |b[76:0];
+ 7'd77: o = |b[77:0];
+ 7'd78: o = |b[78:0];
+ 7'd79: o = |b[79:0];
+ endcase
+
+endmodule
Index: FT64v7/rtl/lib/redor96.v
===================================================================
--- FT64v7/rtl/lib/redor96.v (nonexistent)
+++ FT64v7/rtl/lib/redor96.v (revision 60)
@@ -0,0 +1,109 @@
+module redor96
+(
+ input [6:0] a,
+ input [95:0] b,
+ output reg o
+);
+
+ always @(a,b)
+ case (a)
+ 7'd0: o = b[0];
+ 7'd1: o = |b[1:0];
+ 7'd2: o = |b[2:0];
+ 7'd3: o = |b[3:0];
+ 7'd4: o = |b[4:0];
+ 7'd5: o = |b[5:0];
+ 7'd6: o = |b[6:0];
+ 7'd7: o = |b[7:0];
+ 7'd8: o = |b[8:0];
+ 7'd9: o = |b[9:0];
+ 7'd10: o = |b[10:0];
+ 7'd11: o = |b[11:0];
+ 7'd12: o = |b[12:0];
+ 7'd13: o = |b[13:0];
+ 7'd14: o = |b[14:0];
+ 7'd15: o = |b[15:0];
+ 7'd16: o = |b[16:0];
+ 7'd17: o = |b[17:0];
+ 7'd18: o = |b[18:0];
+ 7'd19: o = |b[19:0];
+ 7'd20: o = |b[20:0];
+ 7'd21: o = |b[21:0];
+ 7'd22: o = |b[22:0];
+ 7'd23: o = |b[23:0];
+ 7'd24: o = |b[24:0];
+ 7'd25: o = |b[25:0];
+ 7'd26: o = |b[26:0];
+ 7'd27: o = |b[27:0];
+ 7'd28: o = |b[28:0];
+ 7'd29: o = |b[29:0];
+ 7'd30: o = |b[30:0];
+ 7'd31: o = |b[31:0];
+ 7'd32: o = |b[32:0];
+ 7'd33: o = |b[33:0];
+ 7'd34: o = |b[34:0];
+ 7'd35: o = |b[35:0];
+ 7'd36: o = |b[36:0];
+ 7'd37: o = |b[37:0];
+ 7'd38: o = |b[38:0];
+ 7'd39: o = |b[39:0];
+ 7'd40: o = |b[40:0];
+ 7'd41: o = |b[41:0];
+ 7'd42: o = |b[42:0];
+ 7'd43: o = |b[43:0];
+ 7'd44: o = |b[44:0];
+ 7'd45: o = |b[45:0];
+ 7'd46: o = |b[46:0];
+ 7'd47: o = |b[47:0];
+ 7'd48: o = |b[48:0];
+ 7'd49: o = |b[49:0];
+ 7'd50: o = |b[50:0];
+ 7'd51: o = |b[51:0];
+ 7'd52: o = |b[52:0];
+ 7'd53: o = |b[53:0];
+ 7'd54: o = |b[54:0];
+ 7'd55: o = |b[55:0];
+ 7'd56: o = |b[56:0];
+ 7'd57: o = |b[57:0];
+ 7'd58: o = |b[58:0];
+ 7'd59: o = |b[59:0];
+ 7'd60: o = |b[60:0];
+ 7'd61: o = |b[61:0];
+ 7'd62: o = |b[62:0];
+ 7'd63: o = |b[63:0];
+
+ 7'd64: o = |b[64:0];
+ 7'd65: o = |b[65:0];
+ 7'd66: o = |b[66:0];
+ 7'd67: o = |b[67:0];
+ 7'd68: o = |b[68:0];
+ 7'd69: o = |b[69:0];
+ 7'd70: o = |b[70:0];
+ 7'd71: o = |b[71:0];
+ 7'd72: o = |b[72:0];
+ 7'd73: o = |b[73:0];
+ 7'd74: o = |b[74:0];
+ 7'd75: o = |b[75:0];
+ 7'd76: o = |b[76:0];
+ 7'd77: o = |b[77:0];
+ 7'd78: o = |b[78:0];
+ 7'd79: o = |b[79:0];
+ 7'd80: o = |b[80:0];
+ 7'd81: o = |b[81:0];
+ 7'd82: o = |b[82:0];
+ 7'd83: o = |b[83:0];
+ 7'd84: o = |b[84:0];
+ 7'd85: o = |b[85:0];
+ 7'd86: o = |b[86:0];
+ 7'd87: o = |b[87:0];
+ 7'd88: o = |b[88:0];
+ 7'd89: o = |b[89:0];
+ 7'd90: o = |b[90:0];
+ 7'd91: o = |b[91:0];
+ 7'd92: o = |b[92:0];
+ 7'd93: o = |b[93:0];
+ 7'd94: o = |b[94:0];
+ 7'd95: o = |b[95:0];
+ endcase
+
+endmodule
Index: FT64v7/rtl/lib/round_robin.v
===================================================================
--- FT64v7/rtl/lib/round_robin.v (nonexistent)
+++ FT64v7/rtl/lib/round_robin.v (revision 60)
@@ -0,0 +1,85 @@
+// ============================================================================
+// __
+// \\__/ o\ (C) 2005-2018 Robert Finch, Waterloo
+// \ __ / All rights reserved.
+// \/_// robfinch@finitron.ca
+// ||
+//
+// This source file is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published
+// by the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This source file is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+//
+// ============================================================================
+//
+module round_robin(rst, clk, ce, req, lock, sel);
+parameter N=12;
+localparam B=$clog2(N);
+input rst; // reset
+input clk; // clock
+input ce; // clock enable
+input [N-1:0] req; // request
+input [N-1:0] lock; // lock selection
+output [N-1:0] sel; // select
+
+integer n;
+reg [N-1:0] sel;
+
+reg [B-1:0] rot; // forward rotate applied to request lines
+reg [B-1:0] amt; // how much to rotate forward after a grant
+reg [N-1:0] rgrnt; // rotated value of grant
+wire [N-1:0] nextGrant; // unrotated value of grant
+wire [N-1:0] rr1; // rotated request imtermediate
+wire [N-1:0] ng1; // intermediate grant rotation
+wire [N-1:0] rreq; // rotated request
+
+// rotate the request lines to set priority
+wire [2*N-1:0] rreq1 = {req,{N{1'b0}}} >> rot;
+assign rreq = rreq1[2*N-1:N]|rreq1[N-1:0];
+
+// rotate the rotated grant value back into place
+wire [2*N-1:0] rgnt1 = {{N{1'b0}},rgrnt} << rot;
+assign nextGrant = rgnt1[2*N-1:N]|rgnt1[N-1:0];
+
+// If there is a request, determine how far the request
+// lines should be rotated when there is a grant
+always @*
+begin
+ amt <= 0;
+ for (n = N-1; n >= 0; n = n - 1)
+ if (rreq[n])
+ amt <= n;
+end
+
+// set grant (if request present) based on which request
+// was honored.
+always @*
+ rgrnt <= {{N{1'b0}},|rreq} << ((amt-1) % N);
+
+// rotate the priorities on a grant
+always @(posedge clk)
+if (rst)
+ rot = 0;
+else if (ce)
+ if (!(lock & sel))
+ rot = rot + amt;
+
+// Assign the next owner, if bus isn't locked
+always @(posedge clk)
+if (rst)
+ sel = 0;
+else if (ce)
+ if (!(lock & sel))
+ sel = nextGrant;
+
+endmodule
+
+
Index: FT64v7/rtl/lib/vtdl.v
===================================================================
--- FT64v7/rtl/lib/vtdl.v (nonexistent)
+++ FT64v7/rtl/lib/vtdl.v (revision 60)
@@ -0,0 +1,59 @@
+//=============================================================================
+// (C) 2007,2012 Robert Finch, Stratford
+// robfinch@opencores.org
+//
+//
+// vtdl - variable tap delay line
+// (dynamic shift register)
+//
+//
+//
+// This source file is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published
+// by the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This source file is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+//
+//
+// Notes:
+//
+// This module acts like a clocked delay line with a variable tap.
+// Miscellaneous usage in rate control circuitry such as fifo's.
+// Capable of delaying a signal bus.
+// Signal bus width is specified with the WID parameter.
+//
+// Verilog 1995
+// Ref: Webpack9.1i xc3s1000-4ft256
+// 4 slices / 8 LUTs / < 10ns
+//=============================================================================
+//
+module vtdl(clk, ce, a, d, q);
+parameter WID = 8;
+parameter DEP = 16;
+localparam AMSB = DEP>64?6:DEP>32?5:DEP>16?4:DEP>8?3:DEP>4?2:DEP>2?1:0;
+input clk;
+input ce;
+input [AMSB:0] a;
+input [WID-1:0] d;
+output [WID-1:0] q;
+
+reg [WID-1:0] m [DEP-1:0];
+integer n;
+
+always @(posedge clk)
+ if (ce) begin
+ for (n = 1; n < DEP; n = n + 1)
+ m[n] <= m[n-1];
+ m[0] <= d;
+ end
+
+assign q = m[a];
+
+endmodule
Index: FT64v7/rtl/twoway/FT64.v
===================================================================
--- FT64v7/rtl/twoway/FT64.v (nonexistent)
+++ FT64v7/rtl/twoway/FT64.v (revision 60)
@@ -0,0 +1,10642 @@
+// ============================================================================
+// __
+// \\__/ o\ (C) 2017-2018 Robert Finch, Waterloo
+// \ __ / All rights reserved.
+// \/_// robfinch@finitron.ca
+// ||
+//
+// FT64.v
+// Features include:
+// - 16/32/48 bit instructions
+// - vector instruction set,
+// - SIMD instructions
+// - data width of 64 bits
+// - 32 general purpose registers
+// - 32 floating point registers
+// - 32 vector registers, length 63
+// - powerful branch prediction
+// - branch target buffer (BTB)
+// - return address predictor (RSB)
+// - bus interface unit
+// - instruction and data caches
+// - fine-grained simultaneous multi-threading (SMT)
+// - bus randomizer on exceptional conditions
+//
+// This source file is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published
+// by the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This source file is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+//
+// Approx 41,000 LUTs. 66,000 LC's.
+// ============================================================================
+//
+`include "FT64_config.vh"
+`include "FT64_defines.vh"
+
+module FT64(hartid, rst, clk_i, clk4x, tm_clk_i, irq_i, vec_i, bte_o, cti_o, bok_i, cyc_o, stb_o, ack_i, err_i, we_o, sel_o, adr_o, dat_o, dat_i,
+ ol_o, pcr_o, pcr2_o, pkeys_o, icl_o, sr_o, cr_o, rbi_i, signal_i);
+input [63:0] hartid;
+input rst;
+input clk_i;
+input clk4x;
+input tm_clk_i;
+input [3:0] irq_i;
+input [7:0] vec_i;
+output reg [1:0] bte_o;
+output reg [2:0] cti_o;
+input bok_i;
+output cyc_o;
+output reg stb_o;
+input ack_i;
+input err_i;
+output we_o;
+output reg [7:0] sel_o;
+output [`ABITS] adr_o;
+output reg [63:0] dat_o;
+input [63:0] dat_i;
+output reg [1:0] ol_o;
+output [31:0] pcr_o;
+output [63:0] pcr2_o;
+output [63:0] pkeys_o;
+output reg icl_o;
+output reg cr_o;
+output reg sr_o;
+input rbi_i;
+input [31:0] signal_i;
+
+parameter TM_CLKFREQ = 20000000;
+parameter QENTRIES = `QENTRIES;
+parameter RSTPC = 64'hFFFFFFFFFFFC0100;
+parameter BRKPC = 64'hFFFFFFFFFFFC0000;
+`ifdef SUPPORT_SMT
+parameter PREGS = 256; // number of physical registers - 1
+parameter AREGS = 256; // number of architectural registers
+`else
+parameter PREGS = 128;
+parameter AREGS = 128;
+`endif
+parameter RBIT = 11;
+parameter DEBUG = 1'b0;
+parameter NMAP = QENTRIES;
+parameter BRANCH_PRED = 1'b0;
+parameter SUP_TXE = 1'b0;
+`ifdef SUPPORT_VECTOR
+parameter SUP_VECTOR = 1'b1;
+`else
+parameter SUP_VECTOR = 1'b0;
+`endif
+parameter DBW = 64;
+parameter ABW = 64;
+parameter AMSB = ABW-1;
+parameter NTHREAD = 1;
+reg [7:0] i;
+integer n;
+integer j, k;
+genvar g, h;
+parameter TRUE = 1'b1;
+parameter FALSE = 1'b0;
+// Memory access sizes
+parameter byt = 3'd0;
+parameter wyde = 3'd1;
+parameter tetra = 3'd2;
+parameter octa = 3'd3;
+// IQ states
+parameter IQS_INVALID = 3'd0;
+parameter IQS_QUEUED = 3'd1;
+parameter IQS_OUT = 3'd2;
+parameter IQS_AGEN = 3'd3;
+parameter IQS_MEM = 3'd4;
+parameter IQS_DONE = 3'd5;
+parameter IQS_CMT = 3'd6;
+
+wire clk;
+//BUFG uclkb1
+//(
+// .I(clk_i),
+// .O(clk)
+//);
+assign clk = clk_i;
+
+wire exv_i;
+wire rdv_i;
+wire wrv_i;
+reg [ABW-1:0] vadr;
+reg cyc;
+reg we;
+
+wire dc_ack;
+wire acki = ack_i|dc_ack;
+wire [RBIT:0] Ra0, Ra1, Ra2;
+wire [RBIT:0] Rb0, Rb1, Rb2;
+wire [RBIT:0] Rc0, Rc1, Rc2;
+wire [RBIT:0] Rt0, Rt1, Rt2;
+wire [63:0] rfoa0,rfob0,rfoc0,rfoc0a,rfot0;
+wire [63:0] rfoa1,rfob1,rfoc1,rfoc1a,rfot1;
+wire [63:0] rfoa2,rfob2,rfoc2,rfoc2a,rfot2;
+`ifdef SUPPORT_SMT
+wire [7:0] Ra0s = {Ra0[7:0]};
+wire [7:0] Ra1s = {Ra1[7:0]};
+wire [7:0] Ra2s = {Ra2[7:0]};
+wire [7:0] Rb0s = {Rb0[7:0]};
+wire [7:0] Rb1s = {Rb1[7:0]};
+wire [7:0] Rb2s = {Rb2[7:0]};
+wire [7:0] Rc0s = {Rc0[7:0]};
+wire [7:0] Rc1s = {Rc1[7:0]};
+wire [7:0] Rc2s = {Rc2[7:0]};
+wire [7:0] Rt0s = {Rt0[7:0]};
+wire [7:0] Rt1s = {Rt1[7:0]};
+wire [7:0] Rt2s = {Rt2[7:0]};
+`else
+wire [6:0] Ra0s = {Ra0[7],Ra0[5:0]};
+wire [6:0] Ra1s = {Ra1[7],Ra1[5:0]};
+wire [6:0] Ra2s = {Ra2[7],Ra2[5:0]};
+wire [6:0] Rb0s = {Rb0[7],Rb0[5:0]};
+wire [6:0] Rb1s = {Rb1[7],Rb1[5:0]};
+wire [6:0] Rb2s = {Rb2[7],Rb2[5:0]};
+wire [6:0] Rc0s = {Rc0[7],Rc0[5:0]};
+wire [6:0] Rc1s = {Rc1[7],Rc1[5:0]};
+wire [6:0] Rc2s = {Rc2[7],Rc2[5:0]};
+wire [6:0] Rt0s = {Rt0[7],Rt0[5:0]};
+wire [6:0] Rt1s = {Rt1[7],Rt1[5:0]};
+wire [6:0] Rt2s = {Rt2[7],Rt2[5:0]};
+/*
+wire [5:0] Ra0s = {Ra0[5:0]};
+wire [5:0] Ra1s = {Ra1[5:0]};
+wire [5:0] Rb0s = {Rb0[5:0]};
+wire [5:0] Rb1s = {Rb1[5:0]};
+wire [5:0] Rc0s = {Rc0[5:0]};
+wire [5:0] Rc1s = {Rc1[5:0]};
+wire [5:0] Rt0s = {Rt0[5:0]};
+wire [5:0] Rt1s = {Rt1[5:0]};
+*/
+`endif
+
+`ifdef SUPPORT_PREDICATION
+reg [3:0] pregs [0:1023];
+`endif
+
+reg [63:0] wbrcd;
+wire [5:0] brgs;
+`ifdef SUPPORT_SEGMENTATION
+reg [23:0] currentCSSelector;
+reg [63:0] zs_base [0:63];
+reg [63:0] ds_base [0:63];
+reg [63:0] es_base [0:63];
+reg [63:0] fs_base [0:63];
+reg [63:0] gs_base [0:63];
+reg [63:0] hs_base [0:63];
+reg [63:0] ss_base [0:63];
+reg [63:0] cs_base [0:63];
+reg [63:0] zsx_base;
+reg [63:0] dsx_base;
+reg [63:0] esx_base;
+reg [63:0] fsx_base;
+reg [63:0] gsx_base;
+reg [63:0] hsx_base;
+reg [63:0] ssx_base;
+reg [63:0] csx_base;
+reg [63:0] zs_lb [0:63];
+reg [63:0] ds_lb [0:63];
+reg [63:0] es_lb [0:63];
+reg [63:0] fs_lb [0:63];
+reg [63:0] gs_lb [0:63];
+reg [63:0] hs_lb [0:63];
+reg [63:0] ss_lb [0:63];
+reg [63:0] cs_lb [0:63];
+reg [63:0] zslb;
+reg [63:0] dslb;
+reg [63:0] eslb;
+reg [63:0] fslb;
+reg [63:0] gslb;
+reg [63:0] hslb;
+reg [63:0] sslb;
+reg [63:0] cslb;
+reg [63:0] zs_ub [0:63];
+reg [63:0] ds_ub [0:63];
+reg [63:0] es_ub [0:63];
+reg [63:0] fs_ub [0:63];
+reg [63:0] gs_ub [0:63];
+reg [63:0] hs_ub [0:63];
+reg [63:0] ss_ub [0:63];
+reg [63:0] cs_ub [0:63];
+reg [63:0] zsub;
+reg [63:0] dsub;
+reg [63:0] esub;
+reg [63:0] fsub;
+reg [63:0] gsub;
+reg [63:0] hsub;
+reg [63:0] ssub;
+reg [63:0] csub;
+reg [23:0] zs_sel [0:63];
+reg [23:0] ds_sel [0:63];
+reg [23:0] es_sel [0:63];
+reg [23:0] fs_sel [0:63];
+reg [23:0] gs_sel [0:63];
+reg [23:0] hs_sel [0:63];
+reg [23:0] ss_sel [0:63];
+reg [23:0] cs_sel [0:63];
+reg [15:0] zs_acr [0:63];
+reg [15:0] ds_acr [0:63];
+reg [15:0] es_acr [0:63];
+reg [15:0] fs_acr [0:63];
+reg [15:0] gs_acr [0:63];
+reg [15:0] hs_acr [0:63];
+reg [15:0] ss_acr [0:63];
+reg [15:0] cs_acr [0:63];
+initial begin
+ for (n = 0; n < 64; n = n + 1) begin
+ zs_base[n] <= 64'h0;
+ ds_base[n] <= 64'h0;
+ es_base[n] <= 64'h0;
+ fs_base[n] <= 64'h0;
+ gs_base[n] <= 64'h0;
+ hs_base[n] <= 64'h0;
+ ss_base[n] <= 64'h0;
+ cs_base[n] <= 64'h0;
+ zs_lb[n] <= 64'h0;
+ ds_lb[n] <= 64'h0;
+ es_lb[n] <= 64'h0;
+ fs_lb[n] <= 64'h0;
+ gs_lb[n] <= 64'h0;
+ hs_lb[n] <= 64'h0;
+ ss_lb[n] <= 64'h0;
+ cs_lb[n] <= 64'h0;
+ zs_ub[n] <= 64'hFFFFFFFFFFFFFFFF;
+ ds_ub[n] <= 64'hFFFFFFFFFFFFFFFF;
+ es_ub[n] <= 64'hFFFFFFFFFFFFFFFF;
+ fs_ub[n] <= 64'hFFFFFFFFFFFFFFFF;
+ gs_ub[n] <= 64'hFFFFFFFFFFFFFFFF;
+ hs_ub[n] <= 64'hFFFFFFFFFFFFFFFF;
+ ss_ub[n] <= 64'hFFFFFFFFFFFFFFFF;
+ cs_ub[n] <= 64'hFFFFFFFFFFFFFFFF;
+ zs_sel[n] <= 24'h0;
+ ds_sel[n] <= 24'h0;
+ es_sel[n] <= 24'h0;
+ fs_sel[n] <= 24'h0;
+ gs_sel[n] <= 24'h0;
+ hs_sel[n] <= 24'h0;
+ ss_sel[n] <= 24'h0;
+ cs_sel[n] <= 24'h0;
+ zs_acr[n] <= 16'h8000;
+ ds_acr[n] <= 16'h9200;
+ es_acr[n] <= 16'h8000;
+ fs_acr[n] <= 16'h8000;
+ gs_acr[n] <= 16'h8000;
+ hs_acr[n] <= 16'h8000;
+ ss_acr[n] <= 16'h9600;
+ cs_acr[n] <= 16'h9A00;
+ end
+end
+always @(posedge clk_i)
+begin
+ zsx_base <= zs_base[brgs];
+ dsx_base <= ds_base[brgs];
+ esx_base <= es_base[brgs];
+ fsx_base <= fs_base[brgs];
+ gsx_base <= gs_base[brgs];
+ hsx_base <= hs_base[brgs];
+ ssx_base <= ss_base[brgs];
+ csx_base <= cs_base[brgs];
+ zsub <= zs_ub[brgs];
+ dsub <= ds_ub[brgs];
+ esub <= es_ub[brgs];
+ fsub <= fs_ub[brgs];
+ gsub <= gs_ub[brgs];
+ hsub <= hs_ub[brgs];
+ ssub <= ss_ub[brgs];
+ csub <= cs_ub[brgs];
+ zslb <= zs_lb[brgs];
+ dslb <= ds_lb[brgs];
+ eslb <= es_lb[brgs];
+ fslb <= fs_lb[brgs];
+ gslb <= gs_lb[brgs];
+ hslb <= hs_lb[brgs];
+ sslb <= ss_lb[brgs];
+ cslb <= cs_lb[brgs];
+ currentCSSelector <= cs_sel[brgs];
+end
+`endif
+`ifdef SUPPORT_BBMS
+reg [15:0] thrd_handle [0:63];
+reg [63:0] prg_base [0:63];
+reg [63:0] prg_limit [0:63];
+reg [63:0] en_barrier [0:63]; // environment bound
+reg [63:0] cl_barrier [0:63];
+reg [63:0] cu_barrier [0:63];
+reg [63:0] ro_barrier [0:63];
+reg [63:0] dl_barrier [0:63];
+reg [63:0] du_barrier [0:63];
+reg [63:0] sl_barrier [0:63];
+reg [63:0] su_barrier [0:63];
+reg [7:0] env_priv [0:63];
+reg [7:0] cod_priv [0:63];
+reg [7:0] rdo_priv [0:63];
+reg [7:0] dat_priv [0:63];
+reg [7:0] stk_priv [0:63];
+reg [15:0] th;
+reg [63:0] pb;
+reg [63:0] cbl;
+reg [63:0] cbu;
+reg [63:0] ro;
+reg [63:0] dbl;
+reg [63:0] dbu;
+reg [63:0] sbl;
+reg [63:0] sbu;
+reg [63:0] en;
+reg [7:0] env_pl;
+reg [7:0] cod_pl;
+reg [7:0] rdo_pl;
+reg [7:0] dat_pl;
+reg [7:0] stk_pl;
+initial begin
+ for (n = 0; n < 64; n = n + 1)
+ begin
+ thrd_handle[n] <= 1'd0;
+ prg_base[n] <= 1'd0;
+ cl_barrier[n] <= 1'd0;
+ cu_barrier[n] <= 64'hFFFFFFFFFFFFFFFF;
+ ro_barrier[n] <= 1'd0;
+ dl_barrier[n] <= 1'd0;
+ du_barrier[n] <= 64'hFFFFFFFFFFFFFFFF;
+ sl_barrier[n] <= 1'd0;
+ su_barrier[n] <= 64'hFFFFFFFFFFFFFFFF;
+ env_priv[n] <= 8'h00;
+ cod_priv[n] <= 8'h00;
+ rdo_priv[n] <= 8'h00;
+ dat_priv[n] <= 8'h00;
+ stk_priv[n] <= 8'h00;
+ end
+end
+always @(posedge clk_i)
+begin
+ th <= thrd_handle[brgs];
+ pb <= prg_base[brgs];
+ cbl <= cl_barrier[brgs];
+ cbu <= cu_barrier[brgs];
+ ro <= ro_barrier[brgs];
+ dbl <= dl_barrier[brgs];
+ dbu <= du_barrier[brgs];
+ sbl <= sl_barrier[brgs];
+ sbu <= su_barrier[brgs];
+ en <= en_barrier[brgs];
+ env_pl <= env_priv[brgs];
+ cod_pl <= cod_priv[brgs];
+ rdo_pl <= rdo_priv[brgs];
+ dat_pl <= dat_priv[brgs];
+ stk_pl <= stk_priv[brgs];
+end
+//wire [23:0] currentPrgSelector = prg_selector[brgs];
+`else
+wire [63:0] pb = 1'd0;
+wire [63:0] cbl = 1'd0;
+wire [63:0] cbu = 64'hFFFFFFFFFFFFFFFF;
+wire [63:0] ro = 1'd0;
+wire [63:0] dbl = 1'd0;
+wire [63:0] dbu = 64'hFFFFFFFFFFFFFFFF;
+wire [63:0] sbl = 1'd0;
+wire [63:0] sbu = 64'hFFFFFFFFFFFFFFFF;
+wire [63:0] en = 1'd0;
+wire [7:0] env_pl = 8'h00;
+wire [7:0] cod_pl = 8'h00;
+wire [7:0] rdo_pl = 8'h00;
+wire [7:0] dat_pl = 8'h00;
+wire [7:0] stk_pl = 8'h00;
+`endif
+
+reg [PREGS-1:0] rf_v;
+reg [`QBITSP1] rf_source[0:AREGS-1];
+reg [15:0] prf_v;
+reg [`QBITSP1] prf_source[0:15];
+initial begin
+for (n = 0; n < AREGS; n = n + 1)
+ rf_source[n] = 1'b0;
+for (n = 0; n < 16; n = n + 1)
+ prf_source[n] <= 1'b0;
+end
+wire [`ABITS] pc0a;
+wire [`ABITS] pc1a;
+wire [`ABITS] pc2a;
+`ifdef SUPPORT_SEGMENTATION
+wire [`ABITS] pc0 = (pc0a[47:40]==8'hFF||ol==2'b00) ? pc0a : {csx_base[50:0],13'd0} + pc0a[47:0];
+wire [`ABITS] pc1 = (pc1a[47:40]==8'hFF||ol==2'b00) ? pc1a : {csx_base[50:0],13'd0} + pc1a[47:0];
+wire [`ABITS] pc2 = (pc2a[47:40]==8'hFF||ol==2'b00) ? pc2a : {csx_base[50:0],13'd0} + pc2a[47:0];
+`else
+`ifdef SUPPORT_BBMS
+wire [`ABITS] pc0 = (pc0a[47:40]==8'hFF||ol==2'b00) ? pc0a : {pb[50:0],13'd0} + pc0a[47:0];
+wire [`ABITS] pc1 = (pc1a[47:40]==8'hFF||ol==2'b00) ? pc1a : {pb[50:0],13'd0} + pc1a[47:0];
+wire [`ABITS] pc2 = (pc2a[47:40]==8'hFF||ol==2'b00) ? pc2a : {pb[50:0],13'd0} + pc2a[47:0];
+`else
+wire [`ABITS] pc0 = pc0a;
+wire [`ABITS] pc1 = pc1a;
+wire [`ABITS] pc2 = pc2a;
+`endif
+`endif
+
+reg excmiss;
+reg [`ABITS] excmisspc;
+reg excthrd;
+reg exception_set;
+reg rdvq; // accumulated read violation
+reg errq; // accumulated err_i input status
+reg exvq;
+
+// Vector
+reg [5:0] vqe0, vqe1, vqe2; // vector element being queued
+reg [5:0] vqet0, vqet1, vqet2;
+reg [7:0] vl; // vector length
+reg [63:0] vm [0:7]; // vector mask registers
+reg [1:0] m2;
+
+reg [31:0] wb_merges;
+// CSR's
+reg [63:0] cr0;
+wire snr = cr0[17]; // sequence number reset
+wire dce = cr0[30]; // data cache enable
+wire bpe = cr0[32]; // branch predictor enable
+wire wbm = cr0[34];
+wire sple = cr0[35]; // speculative load enable
+wire ctgtxe = cr0[33];
+`ifdef SUPPORT_PREDICATION
+wire pred_on = cr0[36]; // predicated execution mode on
+`else
+wire pred_on = 1'b0;
+`endif
+reg [63:0] pmr;
+wire id1_available = pmr[0];
+wire id2_available = pmr[1];
+wire id3_available = pmr[2];
+wire alu0_available = pmr[8];
+wire alu1_available = pmr[9];
+wire fpu1_available = pmr[16];
+wire fpu2_available = pmr[17];
+wire mem1_available = pmr[24];
+wire mem2_available = pmr[25];
+wire mem3_available = pmr[26];
+wire fcu_available = pmr[32];
+// Simply setting this flag to zero should strip out almost all the logic
+// associated SMT.
+`ifdef SUPPORT_SMT
+wire thread_en = cr0[16];
+`else
+wire thread_en = 1'b0;
+`endif
+wire vechain = cr0[18];
+reg [39:0] iq_ctr;
+reg [39:0] irq_ctr; // count of number of interrupts
+reg [39:0] bm_ctr; // branch miss counter
+reg [39:0] icl_ctr; // instruction cache load counter
+
+reg [7:0] fcu_timeout;
+reg [63:0] tick;
+reg [63:0] wc_time;
+reg [31:0] pcr;
+reg [63:0] pcr2;
+assign pcr_o = pcr;
+assign pcr2_o = pcr2;
+reg [63:0] aec;
+reg [15:0] cause[0:15];
+`ifdef SUPPORT_SMT
+reg [`ABITS] epc [0:NTHREAD];
+reg [`ABITS] epc0 [0:NTHREAD];
+reg [`ABITS] epc1 [0:NTHREAD];
+reg [`ABITS] epc2 [0:NTHREAD];
+reg [`ABITS] epc3 [0:NTHREAD];
+reg [`ABITS] epc4 [0:NTHREAD];
+reg [`ABITS] epc5 [0:NTHREAD];
+reg [`ABITS] epc6 [0:NTHREAD];
+reg [`ABITS] epc7 [0:NTHREAD];
+reg [`ABITS] epc8 [0:NTHREAD]; // exception pc and stack
+reg [63:0] mstatus [0:NTHREAD]; // machine status
+wire [3:0] im = mstatus[0][3:0];
+wire [1:0] ol [0:NTHREAD];
+wire [1:0] dl [0:NTHREAD];
+assign ol[0] = mstatus[0][5:4]; // operating level
+assign dl[0] = mstatus[0][21:20];
+wire [7:0] cpl [0:NTHREAD];
+assign cpl[0] = mstatus[0][13:6]; // current privilege level
+wire [5:0] rgs [0:NTHREAD];
+assign ol[1] = mstatus[1][5:4]; // operating level
+assign cpl[1] = mstatus[1][13:6]; // current privilege level
+assign dl[1] = mstatus[1][21:20];
+wire [7:0] ASID = mstatus[0][47:40];
+reg [15:0] ol_stack [0:NTHREAD];
+reg [15:0] dl_stack [0:NTHREAD];
+reg [31:0] im_stack [0:NTHREAD];
+reg [63:0] pl_stack [0:NTHREAD];
+reg [63:0] rs_stack [0:NTHREAD];
+reg [63:0] brs_stack [0:NTHREAD];
+reg [63:0] fr_stack [0:NTHREAD];
+assign rgs[0] = rs_stack[0][5:0];
+assign rgs[1] = rs_stack[1][5:0];
+wire mprv = mstatus[0][55];
+wire [5:0] fprgs = mstatus[0][25:20];
+//assign ol_o = mprv ? ol_stack[0][2:0] : ol[0];
+wire vca = mstatus[0][32]; // vector chaining active
+`else
+reg [`ABITS] epc ;
+reg [`ABITS] epc0 ;
+reg [`ABITS] epc1 ;
+reg [`ABITS] epc2 ;
+reg [`ABITS] epc3 ;
+reg [`ABITS] epc4 ;
+reg [`ABITS] epc5 ;
+reg [`ABITS] epc6 ;
+reg [`ABITS] epc7 ;
+reg [`ABITS] epc8 ; // exception pc and stack
+reg [63:0] mstatus ; // machine status
+wire [3:0] im = mstatus[3:0];
+wire [1:0] ol ;
+wire [1:0] dl;
+assign ol = mstatus[5:4]; // operating level
+assign dl = mstatus[21:20];
+wire [7:0] cpl ;
+assign cpl = mstatus[13:6]; // current privilege level
+wire [5:0] rgs ;
+reg [15:0] ol_stack ;
+reg [15:0] dl_stack ;
+reg [31:0] im_stack ;
+reg [63:0] pl_stack ;
+reg [63:0] rs_stack ;
+reg [63:0] brs_stack ;
+reg [63:0] fr_stack ;
+assign rgs = rs_stack[5:0];
+assign brgs = brs_stack[5:0];
+wire mprv = mstatus[55];
+wire [7:0] ASID = mstatus[47:40];
+wire [5:0] fprgs = mstatus[25:20];
+//assign ol_o = mprv ? ol_stack[2:0] : ol;
+wire vca = mstatus[32]; // vector chaining active
+`endif
+reg [63:0] keys;
+assign pkeys_o = keys;
+reg [63:0] tcb;
+reg [47:0] bad_instr[0:15];
+reg [`ABITS] badaddr[0:15];
+reg [`ABITS] tvec[0:7];
+reg [63:0] sema;
+reg [63:0] vm_sema;
+reg [63:0] cas; // compare and swap
+reg [63:0] ve_hold;
+reg isCAS, isAMO, isInc, isSpt, isRMW;
+reg [`QBITS] casid;
+reg [4:0] regLR = 5'd29;
+
+
+reg [2:0] fp_rm;
+reg fp_inexe;
+reg fp_dbzxe;
+reg fp_underxe;
+reg fp_overxe;
+reg fp_invopxe;
+reg fp_giopxe;
+reg fp_nsfp = 1'b0;
+reg fp_fractie;
+reg fp_raz;
+
+reg fp_neg;
+reg fp_pos;
+reg fp_zero;
+reg fp_inf;
+
+reg fp_inex; // inexact exception
+reg fp_dbzx; // divide by zero exception
+reg fp_underx; // underflow exception
+reg fp_overx; // overflow exception
+reg fp_giopx; // global invalid operation exception
+reg fp_sx; // summary exception
+reg fp_swtx; // software triggered exception
+reg fp_gx;
+reg fp_invopx;
+
+reg fp_infzerox;
+reg fp_zerozerox;
+reg fp_subinfx;
+reg fp_infdivx;
+reg fp_NaNCmpx;
+reg fp_cvtx;
+reg fp_sqrtx;
+reg fp_snanx;
+
+wire [31:0] fp_status = {
+
+ fp_rm,
+ fp_inexe,
+ fp_dbzxe,
+ fp_underxe,
+ fp_overxe,
+ fp_invopxe,
+ fp_nsfp,
+
+ fp_fractie,
+ fp_raz,
+ 1'b0,
+ fp_neg,
+ fp_pos,
+ fp_zero,
+ fp_inf,
+
+ fp_swtx,
+ fp_inex,
+ fp_dbzx,
+ fp_underx,
+ fp_overx,
+ fp_giopx,
+ fp_gx,
+ fp_sx,
+
+ fp_cvtx,
+ fp_sqrtx,
+ fp_NaNCmpx,
+ fp_infzerox,
+ fp_zerozerox,
+ fp_infdivx,
+ fp_subinfx,
+ fp_snanx
+ };
+
+reg [63:0] fpu_csr;
+wire [5:0] fp_rgs = fpu_csr[37:32];
+
+//reg [25:0] m[0:8191];
+reg [3:0] panic; // indexes the message structure
+reg [128:0] message [0:15]; // indexed by panic
+
+wire int_commit;
+reg StatusHWI;
+(* mark_debug = "true" *)
+reg [55:0] insn0, insn1, insn2;
+wire [55:0] insn0a, insn1b, insn2b;
+reg [55:0] insn1a, insn2a;
+// Only need enough bits in the seqnence number to cover the instructions in
+// the queue plus an extra count for skipping on branch misses. In this case
+// that would be four bits minimum (count 0 to 8).
+wire [63:0] rdat0,rdat1,rdat2;
+reg [63:0] xdati;
+
+reg canq1, canq2, canq3;
+(* mark_debug = "true" *)
+reg queued1;
+reg queued2;
+reg queued3;
+(* mark_debug = "true" *)
+reg queuedNop;
+
+reg [47:0] codebuf[0:63];
+reg [QENTRIES-1:0] setpred;
+
+// instruction queue (ROB)
+// State and stqte decodes
+reg [2:0] iqentry_state [0:QENTRIES-1];
+reg [QENTRIES-1:0] iqentry_v; // entry valid? -- this should be the first bit
+reg [QENTRIES-1:0] iqentry_done;
+reg [QENTRIES-1:0] iqentry_out;
+reg [QENTRIES-1:0] iqentry_agen;
+reg [`SNBITS] iqentry_sn [0:QENTRIES-1]; // instruction sequence number
+reg [QENTRIES-1:0] iqentry_iv; // instruction is valid
+reg [`QBITSP1] iqentry_is [0:QENTRIES-1]; // source of instruction
+reg [QENTRIES-1:0] iqentry_thrd; // which thread the instruction is in
+reg [QENTRIES-1:0] iqentry_pt; // predict taken
+reg [QENTRIES-1:0] iqentry_bt; // update branch target buffer
+reg [QENTRIES-1:0] iqentry_takb; // take branch record
+reg [QENTRIES-1:0] iqentry_jal;
+reg [2:0] iqentry_sz [0:QENTRIES-1];
+reg [QENTRIES-1:0] iqentry_alu = 8'h00; // alu type instruction
+reg [QENTRIES-1:0] iqentry_alu0; // only valid on alu #0
+reg [QENTRIES-1:0] iqentry_fpu; // floating point instruction
+reg [QENTRIES-1:0] iqentry_fc; // flow control instruction
+reg [QENTRIES-1:0] iqentry_canex = 8'h00; // true if it's an instruction that can exception
+reg [QENTRIES-1:0] iqentry_oddball = 8'h00; // writes to register file
+reg [QENTRIES-1:0] iqentry_load; // is a memory load instruction
+reg [QENTRIES-1:0] iqentry_loadv; // is a volatile memory load instruction
+reg [QENTRIES-1:0] iqentry_store; // is a memory store instruction
+reg [QENTRIES-1:0] iqentry_preload; // is a memory preload instruction
+reg [QENTRIES-1:0] iqentry_ldcmp;
+reg [QENTRIES-1:0] iqentry_mem; // touches memory: 1 if LW/SW
+reg [QENTRIES-1:0] iqentry_memndx; // indexed memory operation
+reg [2:0] iqentry_memsz [0:QENTRIES-1]; // size of memory op
+reg [QENTRIES-1:0] iqentry_rmw; // memory RMW op
+reg [QENTRIES-1:0] iqentry_push;
+reg [QENTRIES-1:0] iqentry_memdb;
+reg [QENTRIES-1:0] iqentry_memsb;
+reg [QENTRIES-1:0] iqentry_rtop;
+reg [QENTRIES-1:0] iqentry_sei;
+reg [QENTRIES-1:0] iqentry_aq; // memory aquire
+reg [QENTRIES-1:0] iqentry_rl; // memory release
+reg [QENTRIES-1:0] iqentry_shft;
+reg [QENTRIES-1:0] iqentry_jmp; // changes control flow: 1 if BEQ/JALR
+reg [QENTRIES-1:0] iqentry_br; // Bcc (for predictor)
+reg [QENTRIES-1:0] iqentry_ret;
+reg [QENTRIES-1:0] iqentry_irq;
+reg [QENTRIES-1:0] iqentry_brk;
+reg [QENTRIES-1:0] iqentry_rti;
+reg [QENTRIES-1:0] iqentry_sync; // sync instruction
+reg [QENTRIES-1:0] iqentry_fsync;
+reg [QENTRIES-1:0] iqentry_tlb;
+reg [QENTRIES-1:0] iqentry_cmp;
+reg [QENTRIES-1:0] iqentry_rfw = 1'b0; // writes to register file
+reg [QENTRIES-1:0] iqentry_prfw = 1'b0;
+reg [7:0] iqentry_we [0:QENTRIES-1]; // enable strobe
+reg [63:0] iqentry_res [0:QENTRIES-1]; // instruction result
+reg [63:0] iqentry_ares [0:QENTRIES-1]; // alternate instruction result
+reg [47:0] iqentry_instr[0:QENTRIES-1]; // instruction opcode
+reg [2:0] iqentry_insln[0:QENTRIES-1]; // instruction length
+reg [7:0] iqentry_exc [0:QENTRIES-1]; // only for branches ... indicates a HALT instruction
+reg [RBIT:0] iqentry_tgt[0:QENTRIES-1]; // Rt field or ZERO -- this is the instruction's target (if any)
+reg [7:0] iqentry_vl [0:QENTRIES-1];
+reg [5:0] iqentry_ven [0:QENTRIES-1]; // vector element number
+reg [AMSB:0] iqentry_ma [0:QENTRIES-1]; // memory address
+reg [63:0] iqentry_a0 [0:QENTRIES-1]; // argument 0 (immediate)
+reg [63:0] iqentry_a1 [0:QENTRIES-1]; // argument 1
+reg [QENTRIES-1:0] iqentry_a1_v; // arg1 valid
+reg [`QBITSP1] iqentry_a1_s [0:QENTRIES-1]; // arg1 source (iq entry # with top bit representing ALU/DRAM bus)
+reg [63:0] iqentry_a2 [0:QENTRIES-1]; // argument 2
+reg iqentry_a2_v [0:QENTRIES-1]; // arg2 valid
+reg [`QBITSP1] iqentry_a2_s [0:QENTRIES-1]; // arg2 source (iq entry # with top bit representing ALU/DRAM bus)
+reg [63:0] iqentry_a3 [0:QENTRIES-1]; // argument 3
+reg iqentry_a3_v [0:QENTRIES-1]; // arg3 valid
+reg [`QBITSP1] iqentry_a3_s [0:QENTRIES-1]; // arg3 source (iq entry # with top bit representing ALU/DRAM bus)
+reg [`ABITS] iqentry_pc [0:QENTRIES-1]; // program counter for this instruction
+reg [RBIT:0] iqentry_Ra [0:QENTRIES-1];
+reg [RBIT:0] iqentry_Rb [0:QENTRIES-1];
+reg [RBIT:0] iqentry_Rc [0:QENTRIES-1];
+
+// debugging
+//reg [4:0] iqentry_ra [0:7]; // Ra
+initial begin
+for (n = 0; n < QENTRIES; n = n + 1)
+ iqentry_a1_s[n] <= 5'd0;
+ iqentry_a2_s[n] <= 5'd0;
+ iqentry_a3_s[n] <= 5'd0;
+end
+
+reg [QENTRIES-1:0] iqentry_source = {QENTRIES{1'b0}};
+reg [QENTRIES-1:0] iqentry_imm;
+reg [QENTRIES-1:0] iqentry_memready;
+reg [QENTRIES-1:0] iqentry_memopsvalid;
+
+reg [QENTRIES-1:0] memissue = {QENTRIES{1'b0}};
+reg [1:0] missued;
+reg [7:0] last_issue0, last_issue1, last_issue2;
+reg [QENTRIES-1:0] iqentry_memissue;
+reg [QENTRIES-1:0] iqentry_stomp;
+reg [3:0] stompedOnRets;
+reg [QENTRIES-1:0] iqentry_alu0_issue;
+reg [QENTRIES-1:0] iqentry_alu1_issue;
+reg [QENTRIES-1:0] iqentry_alu2_issue;
+reg [QENTRIES-1:0] iqentry_id1issue;
+reg [QENTRIES-1:0] iqentry_id2issue;
+reg [QENTRIES-1:0] iqentry_id3issue;
+reg [1:0] iqentry_mem_islot [0:QENTRIES-1];
+reg [QENTRIES-1:0] iqentry_fcu_issue;
+reg [QENTRIES-1:0] iqentry_fpu1_issue;
+reg [QENTRIES-1:0] iqentry_fpu2_issue;
+
+reg [PREGS-1:1] livetarget;
+reg [PREGS-1:1] iqentry_livetarget [0:QENTRIES-1];
+reg [PREGS-1:1] iqentry_latestID [0:QENTRIES-1];
+reg [PREGS-1:1] iqentry_cumulative [0:QENTRIES-1];
+`ifdef SUPPORT_PREDICATION
+reg [QENTRIES-1:0] iqentry_psource = {QENTRIES{1'b0}};
+reg [15:0] plivetarget;
+reg [15:0] iqentry_plivetarget [0:QENTRIES-1];
+reg [15:0] iqentry_platestID [0:QENTRIES-1];
+reg [15:0] iqentry_pcumulative [0:QENTRIES-1];
+`endif
+wire [PREGS-1:1] iq_out [0:QENTRIES-1];
+
+reg [`QBITS] tail0;
+reg [`QBITS] tail1;
+reg [`QBITS] tail2;
+reg [`QBITS] heads[0:QENTRIES-1];
+
+// To detect a head change at time of commit. Some values need to pulsed
+// with a single pulse.
+reg [`QBITS] ohead[0:2];
+reg ocommit0_v, ocommit1_v, ocommit2_v;
+reg [11:0] cmt_timer;
+
+wire take_branch0;
+wire take_branch1;
+
+reg [3:0] nop_fetchbuf;
+wire fetchbuf; // determines which pair to read from & write to
+wire [3:0] fb_panic;
+
+wire [47:0] fetchbuf0_instr;
+wire [2:0] fetchbuf0_insln;
+wire [`ABITS] fetchbuf0_pc;
+(* mark_debug = "true" *)
+wire fetchbuf0_v;
+wire fetchbuf0_thrd;
+wire fetchbuf0_mem;
+wire fetchbuf0_rfw;
+wire [47:0] fetchbuf1_instr;
+wire [2:0] fetchbuf1_insln;
+wire [`ABITS] fetchbuf1_pc;
+wire fetchbuf1_v;
+wire fetchbuf1_thrd;
+wire fetchbuf1_mem;
+wire fetchbuf1_rfw;
+wire [47:0] fetchbuf2_instr;
+wire [2:0] fetchbuf2_insln;
+wire [`ABITS] fetchbuf2_pc;
+wire fetchbuf2_v;
+wire fetchbuf2_thrd;
+wire fetchbuf2_mem;
+wire fetchbuf2_rfw;
+`ifdef SUPPORT_PREDICATION
+wire fetchbuf0_prfw;
+wire [7:0] fetchbuf0_pbyte;
+wire fetchbuf1_prfw;
+wire [7:0] fetchbuf1_pbyte;
+wire fetchbuf2_prfw;
+wire [7:0] fetchbuf2_pbyte;
+`endif
+wire [47:0] fetchbufA_instr;
+wire [`ABITS] fetchbufA_pc;
+wire fetchbufA_v;
+wire [47:0] fetchbufB_instr;
+wire [`ABITS] fetchbufB_pc;
+wire fetchbufB_v;
+wire [47:0] fetchbufC_instr;
+wire [`ABITS] fetchbufC_pc;
+wire fetchbufC_v;
+wire [47:0] fetchbufD_instr;
+wire [`ABITS] fetchbufD_pc;
+wire fetchbufD_v;
+wire [47:0] fetchbufE_instr;
+wire [`ABITS] fetchbufE_pc;
+wire fetchbufE_v;
+wire [47:0] fetchbufF_instr;
+wire [`ABITS] fetchbufF_pc;
+wire fetchbufF_v;
+
+//reg did_branchback0;
+//reg did_branchback1;
+
+reg id1_v;
+reg [`QBITSP1] id1_id;
+reg [47:0] id1_instr;
+reg [5:0] id1_ven;
+reg [7:0] id1_vl;
+reg id1_thrd;
+reg id1_pt;
+reg [4:0] id1_Rt;
+wire [143:0] id1_bus;
+
+reg id2_v;
+reg [`QBITSP1] id2_id;
+reg [47:0] id2_instr;
+reg [5:0] id2_ven;
+reg [7:0] id2_vl;
+reg id2_thrd;
+reg id2_pt;
+reg [4:0] id2_Rt;
+wire [143:0] id2_bus;
+
+reg id3_v;
+reg [`QBITSP1] id3_id;
+reg [47:0] id3_instr;
+reg [5:0] id3_ven;
+reg [7:0] id3_vl;
+reg id3_thrd;
+reg id3_pt;
+reg [4:0] id3_Rt;
+wire [143:0] id3_bus;
+
+reg [63:0] alu0_xs = 64'd0;
+reg [63:0] alu1_xs = 64'd0;
+
+reg [3:0] alu0_pred;
+reg alu0_cmt;
+wire alu0_abort;
+reg alu0_ld;
+reg alu0_dataready;
+wire alu0_done;
+wire alu0_idle;
+reg [`QBITSP1] alu0_sourceid;
+reg [47:0] alu0_instr;
+reg alu0_tlb;
+reg alu0_mem;
+reg alu0_load;
+reg alu0_store;
+reg alu0_push;
+reg alu0_shft;
+reg [RBIT:0] alu0_Ra;
+reg [63:0] alu0_argA;
+reg [63:0] alu0_argB;
+reg [63:0] alu0_argC;
+reg [63:0] alu0_argT;
+reg [63:0] alu0_argI; // only used by BEQ
+reg [2:0] alu0_sz;
+reg [RBIT:0] alu0_tgt;
+reg [5:0] alu0_ven;
+reg alu0_thrd;
+reg [`ABITS] alu0_pc;
+reg [63:0] alu0_bus;
+wire [63:0] alu0b_bus;
+wire [63:0] alu0_out;
+wire [`QBITSP1] alu0_id;
+wire [`XBITS] alu0_exc;
+wire alu0_v;
+wire alu0_branchmiss;
+wire [`ABITS] alu0_misspc;
+
+reg [3:0] alu1_pred;
+reg alu1_cmt;
+wire alu1_abort;
+reg alu1_ld;
+reg alu1_dataready;
+wire alu1_done;
+wire alu1_idle;
+reg [`QBITSP1] alu1_sourceid;
+reg [47:0] alu1_instr;
+reg alu1_mem;
+reg alu1_load;
+reg alu1_store;
+reg alu1_push;
+reg alu1_shft;
+reg [RBIT:0] alu1_Ra;
+reg [63:0] alu1_argA;
+reg [63:0] alu1_argB;
+reg [63:0] alu1_argC;
+reg [63:0] alu1_argT;
+reg [63:0] alu1_argI; // only used by BEQ
+reg [2:0] alu1_sz;
+reg [RBIT:0] alu1_tgt;
+reg [5:0] alu1_ven;
+reg [`ABITS] alu1_pc;
+reg alu1_thrd;
+reg [63:0] alu1_bus;
+wire [63:0] alu1b_bus;
+wire [63:0] alu1_out;
+wire [`QBITSP1] alu1_id;
+wire [`XBITS] alu1_exc;
+wire alu1_v;
+wire alu1_branchmiss;
+wire [`ABITS] alu1_misspc;
+
+wire [`XBITS] fpu_exc;
+reg [3:0] fpu1_pred;
+reg fpu1_cmt;
+reg fpu1_ld;
+reg fpu1_dataready = 1'b1;
+wire fpu1_done = 1'b1;
+wire fpu1_idle;
+reg [`QBITSP1] fpu1_sourceid;
+reg [47:0] fpu1_instr;
+reg [63:0] fpu1_argA;
+reg [63:0] fpu1_argB;
+reg [63:0] fpu1_argC;
+reg [63:0] fpu1_argT;
+reg [63:0] fpu1_argI; // only used by BEQ
+reg [RBIT:0] fpu1_tgt;
+reg [`ABITS] fpu1_pc;
+wire [63:0] fpu1_out = 64'h0;
+reg [63:0] fpu1_bus = 64'h0;
+wire [`QBITSP1] fpu1_id;
+wire [`XBITS] fpu1_exc = 9'h000;
+wire fpu1_v;
+wire [31:0] fpu1_status;
+
+reg [3:0] fpu2_pred;
+reg fpu2_cmt;
+reg fpu2_ld;
+reg fpu2_dataready = 1'b1;
+wire fpu2_done = 1'b1;
+wire fpu2_idle;
+reg [`QBITSP1] fpu2_sourceid;
+reg [47:0] fpu2_instr;
+reg [63:0] fpu2_argA;
+reg [63:0] fpu2_argB;
+reg [63:0] fpu2_argC;
+reg [63:0] fpu2_argT;
+reg [63:0] fpu2_argI; // only used by BEQ
+reg [RBIT:0] fpu2_tgt;
+reg [`ABITS] fpu2_pc;
+wire [63:0] fpu2_out = 64'h0;
+reg [63:0] fpu2_bus = 64'h0;
+wire [`QBITSP1] fpu2_id;
+wire [`XBITS] fpu2_exc = 9'h000;
+wire fpu2_v;
+wire [31:0] fpu2_status;
+
+reg [7:0] fccnt;
+reg [47:0] waitctr;
+reg [3:0] fcu_pred;
+reg fcu_cmt;
+reg fcu_ld;
+reg fcu_dataready;
+reg fcu_done;
+reg fcu_idle = 1'b1;
+reg [`QBITSP1] fcu_sourceid;
+reg [47:0] fcu_instr;
+reg [47:0] fcu_prevInstr;
+reg [2:0] fcu_insln;
+reg fcu_pt; // predict taken
+reg fcu_branch;
+reg fcu_call;
+reg fcu_ret;
+reg fcu_jal;
+reg fcu_brk;
+reg fcu_rti;
+reg [63:0] fcu_argA;
+reg [63:0] fcu_argB;
+reg [63:0] fcu_argC;
+reg [63:0] fcu_argI; // only used by BEQ
+reg [63:0] fcu_argT;
+reg [63:0] fcu_argT2;
+reg [`ABITS] fcu_pc;
+reg [`ABITS] fcu_nextpc;
+reg [`ABITS] fcu_brdisp;
+wire [63:0] fcu_out;
+reg [63:0] fcu_bus;
+wire [`QBITSP1] fcu_id;
+reg [`XBITS] fcu_exc;
+wire fcu_v;
+reg fcu_thrd;
+reg fcu_branchmiss;
+reg fcu_clearbm;
+reg [`ABITS] fcu_misspc;
+
+reg [63:0] rmw_argA;
+reg [63:0] rmw_argB;
+reg [63:0] rmw_argC;
+wire [63:0] rmw_res;
+reg [47:0] rmw_instr;
+
+// write buffer
+reg [63:0] wb_data [0:`WB_DEPTH-1];
+reg [`ABITS] wb_addr [0:`WB_DEPTH-1];
+reg [1:0] wb_ol [0:`WB_DEPTH-1];
+reg [`WB_DEPTH-1:0] wb_v;
+reg [`WB_DEPTH-1:0] wb_rmw;
+reg [QENTRIES-1:0] wb_id [0:`WB_DEPTH-1];
+reg [QENTRIES-1:0] wbo_id;
+reg [7:0] wb_sel [0:`WB_DEPTH-1];
+reg wb_en;
+reg wb_shift;
+
+reg branchmiss = 1'b0;
+reg branchmiss_thrd = 1'b0;
+reg [`ABITS] misspc;
+reg [`QBITS] missid;
+
+wire take_branch;
+wire take_branchA;
+wire take_branchB;
+wire take_branchC;
+wire take_branchD;
+
+wire dram_avail;
+reg [2:0] dram0; // state of the DRAM request (latency = 4; can have three in pipeline)
+reg [2:0] dram1; // state of the DRAM request (latency = 4; can have three in pipeline)
+reg [2:0] dram2; // state of the DRAM request (latency = 4; can have three in pipeline)
+reg [63:0] dram0_data;
+reg [`ABITS] dram0_addr;
+reg [47:0] dram0_instr;
+reg dram0_rmw;
+reg dram0_preload;
+reg [RBIT:0] dram0_tgt;
+reg [`QBITSP1] dram0_id;
+reg [`XBITS] dram0_exc;
+reg dram0_unc;
+reg [2:0] dram0_memsize;
+reg dram0_load; // is a load operation
+reg dram0_store;
+reg [1:0] dram0_ol;
+reg [63:0] dram1_data;
+reg [`ABITS] dram1_addr;
+reg [47:0] dram1_instr;
+reg dram1_rmw;
+reg dram1_preload;
+reg [RBIT:0] dram1_tgt;
+reg [`QBITSP1] dram1_id;
+reg [`XBITS] dram1_exc;
+reg dram1_unc;
+reg [2:0] dram1_memsize;
+reg dram1_load;
+reg dram1_store;
+reg [1:0] dram1_ol;
+reg [63:0] dram2_data;
+reg [`ABITS] dram2_addr;
+reg [47:0] dram2_instr;
+reg dram2_rmw;
+reg dram2_preload;
+reg [RBIT:0] dram2_tgt;
+reg [`QBITSP1] dram2_id;
+reg [`XBITS] dram2_exc;
+reg dram2_unc;
+reg [2:0] dram2_memsize;
+reg dram2_load;
+reg dram2_store;
+reg [1:0] dram2_ol;
+
+reg dramA_v;
+reg [`QBITSP1] dramA_id;
+reg [63:0] dramA_bus;
+reg [`XBITS] dramA_exc;
+reg dramB_v;
+reg [`QBITSP1] dramB_id;
+reg [63:0] dramB_bus;
+reg [`XBITS] dramB_exc;
+reg dramC_v;
+reg [`QBITSP1] dramC_id;
+reg [63:0] dramC_bus;
+reg [`XBITS] dramC_exc;
+
+wire outstanding_stores;
+reg [63:0] I; // instruction count
+reg [63:0] CC; // commit count
+
+reg commit0_v;
+reg [`QBITSP1] commit0_id;
+reg [RBIT:0] commit0_tgt;
+reg [7:0] commit0_we = 8'h00;
+reg [63:0] commit0_bus;
+reg commit1_v;
+reg [`QBITSP1] commit1_id;
+reg [RBIT:0] commit1_tgt;
+reg [7:0] commit1_we = 8'h00;
+reg [63:0] commit1_bus;
+reg commit2_v;
+reg [`QBITSP1] commit2_id;
+reg [RBIT:0] commit2_tgt;
+reg [7:0] commit2_we = 8'h00;
+reg [63:0] commit2_bus;
+
+reg StoreAck1;
+reg [4:0] bstate;
+parameter BIDLE = 5'd0;
+parameter B_StoreAck = 5'd1;
+parameter B_DCacheLoadStart = 5'd2;
+parameter B_DCacheLoadStb = 5'd3;
+parameter B_DCacheLoadWait1 = 5'd4;
+parameter B_DCacheLoadWait2 = 5'd5;
+parameter B_DCacheLoadResetBusy = 5'd6;
+parameter B_ICacheAck = 5'd7;
+parameter B8 = 5'd8;
+parameter B_ICacheNack = 5'd9;
+parameter B_ICacheNack2 = 5'd10;
+parameter B11 = 5'd11;
+parameter B12 = 5'd12;
+parameter B_DLoadAck = 5'd13;
+parameter B14 = 5'd14;
+parameter B15 = 5'd15;
+parameter B16 = 5'd16;
+parameter B17 = 5'd17;
+parameter B18 = 5'd18;
+parameter B19 = 5'd19;
+parameter B2a = 5'd20;
+parameter B2b = 5'd21;
+parameter B2c = 5'd22;
+parameter B_DCacheLoadAck = 5'd23;
+parameter B20 = 5'd24;
+parameter B21 = 5'd25;
+parameter B_DCacheLoadWait3 = 5'd26;
+reg [1:0] bwhich;
+reg [3:0] icstate,picstate;
+parameter IDLE = 4'd0;
+parameter IC1 = 4'd1;
+parameter IC2 = 4'd2;
+parameter IC3 = 4'd3;
+parameter IC_WaitL2 = 4'd4;
+parameter IC5 = 4'd5;
+parameter IC6 = 4'd6;
+parameter IC7 = 4'd7;
+parameter IC_Next = 4'd8;
+parameter IC9 = 4'd9;
+parameter IC10 = 4'd10;
+parameter IC3a = 4'd11;
+reg invic, invdc;
+reg [1:0] icwhich;
+reg icnxt,L2_nxt;
+wire ihit0,ihit1,ihit2,ihitL2;
+wire ihit = ihit0&ihit1&ihit2;
+reg phit;
+wire threadx;
+always @*
+ phit <= ihit&&icstate==IDLE;
+reg [2:0] iccnt;
+reg L1_wr0,L1_wr1,L1_wr2;
+reg L1_invline;
+wire [1:0] ic0_fault,ic1_fault,ic2_fault;
+reg [8:0] L1_en;
+reg [71:0] L1_adr, L2_adr;
+reg [297:0] L2_rdat;
+wire [297:0] L2_dato;
+reg L2_xsel;
+
+generate begin : gRegfileInst
+if (`WAYS > 2) begin : gb1
+FT64_regfile2w9r_oc #(.RBIT(RBIT)) urf1
+(
+ .clk(clk),
+ .clk4x(clk4x),
+ .wr0(commit0_v),
+ .wr1(commit1_v),
+ .we0(commit0_we),
+ .we1(commit1_we),
+ .wa0(commit0_tgt),
+ .wa1(commit1_tgt),
+ .i0(commit0_bus),
+ .i1(commit1_bus),
+ .rclk(~clk),
+ .ra0(Ra0),
+ .ra1(Rb0),
+ .ra2(Rc0),
+ .o0(rfoa0),
+ .o1(rfob0),
+ .o2(rfoc0a),
+ .ra3(Ra1),
+ .ra4(Rb1),
+ .ra5(Rc1),
+ .o3(rfoa1),
+ .o4(rfob1),
+ .o5(rfoc1a),
+ .ra6(Ra2),
+ .ra7(Rb2),
+ .ra8(Rc2),
+ .o6(rfoa2),
+ .o7(rfob2),
+ .o8(rfoc2a)
+);
+assign rfoc0 = Rc0[11:6]==6'h3F ? vm[Rc0[2:0]] : rfoc0a;
+assign rfoc1 = Rc1[11:6]==6'h3F ? vm[Rc1[2:0]] : rfoc1a;
+assign rfoc2 = Rc2[11:6]==6'h3F ? vm[Rc2[2:0]] : rfoc2a;
+end
+else if (`WAYS > 1) begin : gb1
+FT64_regfile2w6r_oc #(.RBIT(RBIT)) urf1
+(
+ .clk(clk),
+ .clk4x(clk4x),
+ .wr0(commit0_v),
+ .wr1(commit1_v),
+ .we0(commit0_we),
+ .we1(commit1_we),
+ .wa0(commit0_tgt),
+ .wa1(commit1_tgt),
+ .i0(commit0_bus),
+ .i1(commit1_bus),
+ .rclk(~clk),
+ .ra0(Ra0),
+ .ra1(Rb0),
+ .ra2(Rc0),
+ .o0(rfoa0),
+ .o1(rfob0),
+ .o2(rfoc0a),
+ .ra3(Ra1),
+ .ra4(Rb1),
+ .ra5(Rc1),
+ .o3(rfoa1),
+ .o4(rfob1),
+ .o5(rfoc1a)
+);
+assign rfoc0 = Rc0[11:6]==6'h3F ? vm[Rc0[2:0]] : rfoc0a;
+assign rfoc1 = Rc1[11:6]==6'h3F ? vm[Rc1[2:0]] : rfoc1a;
+end
+else begin : gb1
+FT64_regfile1w4r_oc #(.RBIT(RBIT)) urf1
+(
+ .clk(clk),
+ .wr0(commit0_v),
+ .wa0(commit0_tgt),
+ .we0(8'hFF),
+ .i0(commit0_bus),
+ .rclk(~clk),
+ .ra0(Ra0),
+ .ra1(Rb0),
+ .ra2(Rc0),
+ .ra3(Rt0),
+ .o0(rfoa0),
+ .o1(rfob0),
+ .o2(rfoc0a),
+ .o3(rfot0)
+);
+end
+assign rfoc0 = Rc0[11:6]==6'h3F ? vm[Rc0[2:0]] : rfoc0a;
+end
+endgenerate
+
+function [3:0] fnInsLength;
+input [47:0] ins;
+`ifdef SUPPORT_DCI
+if (ins[`INSTRUCTION_OP]==`CMPRSSD)
+ fnInsLength = 4'd2 | pred_on;
+else
+`endif
+ case(ins[7:6])
+ 2'd0: fnInsLength = 4'd4 | pred_on;
+ 2'd1: fnInsLength = 4'd6 | pred_on;
+ default: fnInsLength = 4'd2 | pred_on;
+ endcase
+endfunction
+
+wire [`ABITS] pc0plus6 = pc0 + 32'd7;
+wire [`ABITS] pc0plus12 = pc0 + 32'd14;
+
+generate begin : gInsnVar
+ if (`WAYS > 1) begin
+ always @*
+ if (thread_en)
+ insn1a <= insn1b;
+ else
+ insn1a <= {insn1b,insn0a} >> {fnInsLength(insn0a),3'b0};
+ end
+ if (`WAYS > 2) begin
+ always @*
+ if (thread_en)
+ insn2a <= insn2b;
+ else
+ insn2a <= {insn2b,insn1b,insn0a} >> {fnInsLength(insn0a) + fnInsLength(insn1a),3'b0};
+ end
+end
+endgenerate
+
+FT64_L1_icache #(.pSize(`L1_ICACHE_SIZE)) uic0
+(
+ .rst(rst),
+ .clk(clk),
+ .nxt(icnxt),
+ .wr(L1_wr0),
+ .wr_ack(),
+ .en(L1_en),
+ .adr((icstate==IDLE||icstate==IC_Next) ? {pcr[7:0],pc0} : L1_adr),
+ .wadr(L1_adr),
+ .i(L2_rdat),
+ .o(insn0a),
+ .fault(ic0_fault),
+ .hit(ihit0),
+ .invall(invic),
+ .invline(L1_invline)
+);
+generate begin : gICacheInst
+if (`WAYS > 1) begin
+FT64_L1_icache #(.pSize(`L1_ICACHE_SIZE)) uic1
+(
+ .rst(rst),
+ .clk(clk),
+ .nxt(icnxt),
+ .wr(L1_wr1),
+ .wr_ack(),
+ .en(L1_en),
+ .adr((icstate==IDLE||icstate==IC_Next) ? (thread_en ? {pcr[7:0],pc1}: {pcr[7:0],pc0plus6} ): L1_adr),
+ .wadr(L1_adr),
+ .i(L2_rdat),
+ .o(insn1b),
+ .fault(ic1_fault),
+ .hit(ihit1),
+ .invall(invic),
+ .invline(L1_invline)
+);
+end
+else begin
+assign ihit1 = 1'b1;
+end
+if (`WAYS > 2) begin
+FT64_L1_icache #(.pSize(`L1_ICACHE_SIZE)) uic2
+(
+ .rst(rst),
+ .clk(clk),
+ .nxt(icnxt),
+ .wr(L1_wr2),
+ .wr_ack(),
+ .en(L1_en),
+ .adr((icstate==IDLE||icstate==IC_Next) ? (thread_en ? {pcr[7:0],pc2} : {pcr[7:0],pc0plus12}) : L1_adr),
+ .wadr(L1_adr),
+ .i(L2_rdat),
+ .o(insn2b),
+ .fault(ic2_fault),
+ .hit(ihit2),
+ .invall(invic),
+ .invline(L1_invline)
+);
+end
+else
+assign ihit2 = 1'b1;
+end
+endgenerate
+
+FT64_L2_icache uic2
+(
+ .rst(rst),
+ .clk(clk),
+ .nxt(L2_nxt),
+ .wr(bstate==B_ICacheAck && (ack_i|err_i)),
+ .xsel(L2_xsel),
+ .adr(L2_adr),
+ .cnt(iccnt),
+ .exv_i(exvq),
+ .i(dat_i),
+ .err_i(errq),
+ .o(L2_dato),
+ .hit(ihitL2),
+ .invall(invic),
+ .invline()
+);
+
+wire predict_taken;
+wire predict_taken0;
+wire predict_taken1;
+wire predict_taken2;
+wire predict_takenA;
+wire predict_takenB;
+wire predict_takenC;
+wire predict_takenD;
+wire predict_takenE;
+wire predict_takenF;
+wire predict_takenA1;
+wire predict_takenB1;
+wire predict_takenC1;
+wire predict_takenD1;
+
+wire [`ABITS] btgtA, btgtB, btgtC, btgtD, btgtE, btgtF;
+wire btbwr0 = iqentry_v[heads[0]] && iqentry_state[heads[0]]==IQS_CMT &&
+ (iqentry_fc[heads[0]]);
+generate begin: gbtbvar
+if (`WAYS > 1) begin
+wire btbwr1 = iqentry_v[heads[1]] && iqentry_state[heads[1]]==IQS_CMT &&
+ (iqentry_fc[heads[1]]);
+end
+if (`WAYS > 2) begin
+wire btbwr2 = iqentry_v[heads[2]] && iqentry_state[heads[2]]==IQS_CMT &&
+ (iqentry_fc[heads[2]]);
+end
+end
+endgenerate
+
+wire fcu_clk;
+`ifdef FCU_ENH
+//BUFGCE ufcuclk
+//(
+// .I(clk_i),
+// .CE(fcu_available),
+// .O(fcu_clk)
+//);
+`endif
+assign fcu_clk = clk_i;
+
+generate begin: gBTBInst
+if (`WAYS > 2) begin
+`ifdef FCU_ENH
+FT64_BTB #(.AMSB(AMSB)) ubtb1
+(
+ .rst(rst),
+ .wclk(fcu_clk),
+ .wr0(btbwr0),
+ .wadr0(iqentry_pc[heads[0]]),
+ .wdat0(iqentry_ma[heads[0]]),
+ .valid0((iqentry_br[heads[0]] ? iqentry_takb[heads[0]] : iqentry_bt[heads[0]]) & iqentry_v[heads[0]]),
+ .wr1(btbwr1),
+ .wadr1(iqentry_pc[heads[1]]),
+ .wdat1(iqentry_ma[heads[1]]),
+ .valid1((iqentry_br[heads[1]] ? iqentry_takb[heads[1]] : iqentry_bt[heads[1]]) & iqentry_v[heads[1]]),
+ .wr2(btbwr2),
+ .wadr2(iqentry_pc[heads[2]]),
+ .wdat2(iqentry_ma[heads[2]]),
+ .valid2((iqentry_br[heads[2]] ? iqentry_takb[heads[2]] : iqentry_bt[heads[2]]) & iqentry_v[heads[2]]),
+ .rclk(~clk),
+ .pcA(fetchbufA_pc),
+ .btgtA(btgtA),
+ .pcB(fetchbufB_pc),
+ .btgtB(btgtB),
+ .pcC(fetchbufC_pc),
+ .btgtC(btgtC),
+ .pcD(fetchbufD_pc),
+ .btgtD(btgtD),
+ .pcE(fetchbufE_pc),
+ .btgtE(btgtE),
+ .pcF(fetchbufF_pc),
+ .btgtF(btgtF),
+ .npcA(BRKPC),
+ .npcB(BRKPC),
+ .npcC(BRKPC),
+ .npcD(BRKPC),
+ .npcE(BRKPC),
+ .npcF(BRKPC)
+);
+`else
+// Branch tergets are picked up by fetchbuf logic and need to be present.
+// Without a target predictor they are just set to the reset address.
+// This virtually guarentees a miss.
+assign btgtA = RSTPC;
+assign btgtB = RSTPC;
+assign btgtC = RSTPC;
+assign btgtD = RSTPC;
+assign btgtE = RSTPC;
+assign btgtF = RSTPC;
+`endif
+end
+else if (`WAYS > 1) begin
+`ifdef FCU_ENH
+FT64_BTB #(.AMSB(AMSB)) ubtb1
+(
+ .rst(rst),
+ .wclk(fcu_clk),
+ .wr0(btbwr0),
+ .wadr0(iqentry_pc[heads[0]]),
+ .wdat0(iqentry_ma[heads[0]]),
+ .valid0((iqentry_br[heads[0]] ? iqentry_takb[heads[0]] : iqentry_bt[heads[0]]) & iqentry_v[heads[0]]),
+ .wr1(btbwr1),
+ .wadr1(iqentry_pc[heads[1]]),
+ .wdat1(iqentry_ma[heads[1]]),
+ .valid1((iqentry_br[heads[1]] ? iqentry_takb[heads[1]] : iqentry_bt[heads[1]]) & iqentry_v[heads[1]]),
+ .rclk(~clk),
+ .pcA(fetchbufA_pc),
+ .btgtA(btgtA),
+ .pcB(fetchbufB_pc),
+ .btgtB(btgtB),
+ .pcC(fetchbufC_pc),
+ .btgtC(btgtC),
+ .pcD(fetchbufD_pc),
+ .btgtD(btgtD),
+ .pcE(32'd0),
+ .btgtE(),
+ .pcF(32'd0),
+ .btgtF(),
+ .npcA(BRKPC),
+ .npcB(BRKPC),
+ .npcC(BRKPC),
+ .npcD(BRKPC),
+ .npcE(BRKPC),
+ .npcF(BRKPC)
+);
+`else
+// Branch tergets are picked up by fetchbuf logic and need to be present.
+// Without a target predictor they are just set to the reset address.
+// This virtually guarentees a miss.
+assign btgtA = RSTPC;
+assign btgtB = RSTPC;
+assign btgtC = RSTPC;
+assign btgtD = RSTPC;
+`endif
+end
+else begin
+`ifdef FCU_ENH
+FT64_BTB #(.AMSB(AMSB)) ubtb1
+(
+ .rst(rst),
+ .wclk(fcu_clk),
+ .wr0(btbwr0),
+ .wadr0(iqentry_pc[heads[0]]),
+ .wdat0(iqentry_ma[heads[0]]),
+ .valid0((iqentry_br[heads[0]] ? iqentry_takb[heads[0]] : iqentry_bt[heads[0]]) & iqentry_v[heads[0]]),
+ .wr1(1'b0);
+ .wadr1(RSTPC),
+ .wdat1(RSTPC),
+ .valid1(1'b0),
+ .wr2(1'b0);
+ .wadr2(RSTPC),
+ .wdat2(RSTPC),
+ .valid2(1'b0),
+ .rclk(~clk),
+ .pcA(fetchbufA_pc),
+ .btgtA(btgtA),
+ .pcB(fetchbufB_pc),
+ .btgtB(btgtB),
+ .pcC(32'd0),
+ .btgtC(),
+ .pcD(32'd0),
+ .btgtD(),
+ .pcE(32'd0),
+ .btgtE(),
+ .pcF(32'd0),
+ .btgtF(),
+ .hitA(),
+ .hitB(),
+ .hitC(),
+ .hitD(),
+ .hitE(),
+ .hitF(),
+ .npcA(BRKPC),
+ .npcB(BRKPC),
+ .npcC(BRKPC),
+ .npcD(BRKPC),
+ .npcE(BRKPC),
+ .npcF(BRKPC)
+);
+`else
+// Branch tergets are picked up by fetchbuf logic and need to be present.
+// Without a target predictor they are just set to the reset address.
+// This virtually guarentees a miss.
+assign btgtA = RSTPC;
+assign btgtB = RSTPC;
+`endif
+end
+end
+endgenerate
+
+generate begin: gBPInst
+if (`WAYS > 2) begin
+`ifdef FCU_ENH
+FT64_BranchPredictor ubp1
+(
+ .rst(rst),
+ .clk(fcu_clk),
+ .en(bpe),
+ .xisBranch0(iqentry_br[heads[0]] & commit0_v),
+ .xisBranch1(iqentry_br[heads[1]] & commit1_v),
+ .xisBranch2(iqentry_br[heads[2]] & commit2_v),
+ .pcA(fetchbufA_pc),
+ .pcB(fetchbufB_pc),
+ .pcC(fetchbufC_pc),
+ .pcD(fetchbufD_pc),
+ .pcE(fetchbufE_pc),
+ .pcF(fetchbufF_pc),
+ .xpc0(iqentry_pc[heads[0]]),
+ .xpc1(iqentry_pc[heads[1]]),
+ .xpc2(iqentry_pc[heads[2]]),
+ .takb0(commit0_v & iqentry_takb[heads[0]]),
+ .takb1(commit1_v & iqentry_takb[heads[1]]),
+ .takb2(commit2_v & iqentry_takb[heads[2]]),
+ .predict_takenA(predict_takenA),
+ .predict_takenB(predict_takenB),
+ .predict_takenC(predict_takenC),
+ .predict_takenD(predict_takenD),
+ .predict_takenE(predict_takenE),
+ .predict_takenF(predict_takenF)
+);
+`else
+// Predict based on sign of displacement
+assign predict_takenA = fetchbufA_instr[6] ? fetchbufA_instr[47] : fetchbufA_instr[31];
+assign predict_takenB = fetchbufB_instr[6] ? fetchbufB_instr[47] : fetchbufB_instr[31];
+assign predict_takenC = fetchbufC_instr[6] ? fetchbufC_instr[47] : fetchbufC_instr[31];
+assign predict_takenD = fetchbufD_instr[6] ? fetchbufD_instr[47] : fetchbufD_instr[31];
+assign predict_takenE = fetchbufE_instr[6] ? fetchbufE_instr[47] : fetchbufE_instr[31];
+assign predict_takenF = fetchbufF_instr[6] ? fetchbufF_instr[47] : fetchbufF_instr[31];
+`endif
+end
+else if (`WAYS > 1) begin
+`ifdef FCU_ENH
+FT64_BranchPredictor ubp1
+(
+ .rst(rst),
+ .clk(fcu_clk),
+ .en(bpe),
+ .xisBranch0(iqentry_br[heads[0]] & commit0_v),
+ .xisBranch1(iqentry_br[heads[1]] & commit1_v),
+ .xisBranch2(iqentry_br[heads[2]] & commit2_v),
+ .pcA(fetchbufA_pc),
+ .pcB(fetchbufB_pc),
+ .pcC(fetchbufC_pc),
+ .pcD(fetchbufD_pc),
+ .pcE(32'd0),
+ .pcF(32'd0),
+ .xpc0(iqentry_pc[heads[0]]),
+ .xpc1(iqentry_pc[heads[1]]),
+ .xpc2(iqentry_pc[heads[2]]),
+ .takb0(commit0_v & iqentry_takb[heads[0]]),
+ .takb1(commit1_v & iqentry_takb[heads[1]]),
+ .takb2(commit2_v & iqentry_takb[heads[2]]),
+ .predict_takenA(predict_takenA),
+ .predict_takenB(predict_takenB),
+ .predict_takenC(predict_takenC),
+ .predict_takenD(predict_takenD),
+ .predict_takenE(),
+ .predict_takenF()
+);
+`else
+// Predict based on sign of displacement
+assign predict_takenA = fetchbufA_instr[6] ? fetchbufA_instr[47] : fetchbufA_instr[31];
+assign predict_takenB = fetchbufB_instr[6] ? fetchbufB_instr[47] : fetchbufB_instr[31];
+assign predict_takenC = fetchbufC_instr[6] ? fetchbufC_instr[47] : fetchbufC_instr[31];
+assign predict_takenD = fetchbufD_instr[6] ? fetchbufD_instr[47] : fetchbufD_instr[31];
+`endif
+end
+else begin
+`ifdef FCU_ENH
+FT64_BranchPredictor ubp1
+(
+ .rst(rst),
+ .clk(fcu_clk),
+ .en(bpe),
+ .xisBranch0(iqentry_br[heads[0]] & commit0_v),
+ .xisBranch1(iqentry_br[heads[1]] & commit1_v),
+ .xisBranch2(iqentry_br[heads[2]] & commit2_v),
+ .pcA(fetchbufA_pc),
+ .pcB(fetchbufB_pc),
+ .pcC(32'd0),
+ .pcD(32'd0),
+ .pcE(32'd0),
+ .pcF(32'd0),
+ .xpc0(iqentry_pc[heads[0]]),
+ .xpc1(iqentry_pc[heads[1]]),
+ .xpc2(iqentry_pc[heads[2]]),
+ .takb0(commit0_v & iqentry_takb[heads[0]]),
+ .takb1(commit1_v & iqentry_takb[heads[1]]),
+ .takb2(commit2_v & iqentry_takb[heads[2]]),
+ .predict_takenA(predict_takenA),
+ .predict_takenB(predict_takenB),
+ .predict_takenC(),
+ .predict_takenD(),
+ .predict_takenE(),
+ .predict_takenF()
+);
+`else
+// Predict based on sign of displacement
+assign predict_takenA = fetchbufA_instr[6] ? fetchbufA_instr[47] : fetchbufA_instr[31];
+assign predict_takenB = fetchbufB_instr[6] ? fetchbufB_instr[47] : fetchbufB_instr[31];
+`endif
+end
+end
+endgenerate
+
+//-----------------------------------------------------------------------------
+// Debug
+//-----------------------------------------------------------------------------
+`ifdef SUPPORT_DBG
+
+wire [DBW-1:0] dbg_stat1x;
+reg [DBW-1:0] dbg_stat;
+reg [DBW-1:0] dbg_ctrl;
+reg [ABW-1:0] dbg_adr0;
+reg [ABW-1:0] dbg_adr1;
+reg [ABW-1:0] dbg_adr2;
+reg [ABW-1:0] dbg_adr3;
+reg dbg_imatchA0,dbg_imatchA1,dbg_imatchA2,dbg_imatchA3,dbg_imatchA;
+reg dbg_imatchB0,dbg_imatchB1,dbg_imatchB2,dbg_imatchB3,dbg_imatchB;
+
+wire dbg_lmatch00 =
+ dbg_ctrl[0] && dbg_ctrl[17:16]==2'b11 && dram0_addr[AMSB:3]==dbg_adr0[AMSB:3] &&
+ ((dbg_ctrl[19:18]==2'b00 && dram0_addr[2:0]==dbg_adr0[2:0]) ||
+ (dbg_ctrl[19:18]==2'b01 && dram0_addr[2:1]==dbg_adr0[2:1]) ||
+ (dbg_ctrl[19:18]==2'b10 && dram0_addr[2]==dbg_adr0[2]) ||
+ dbg_ctrl[19:18]==2'b11)
+ ;
+wire dbg_lmatch01 =
+ dbg_ctrl[0] && dbg_ctrl[17:16]==2'b11 && dram1_addr[AMSB:3]==dbg_adr0[AMSB:3] &&
+ ((dbg_ctrl[19:18]==2'b00 && dram1_addr[2:0]==dbg_adr0[2:0]) ||
+ (dbg_ctrl[19:18]==2'b01 && dram1_addr[2:1]==dbg_adr0[2:1]) ||
+ (dbg_ctrl[19:18]==2'b10 && dram1_addr[2]==dbg_adr0[2]) ||
+ dbg_ctrl[19:18]==2'b11)
+ ;
+wire dbg_lmatch02 =
+ dbg_ctrl[0] && dbg_ctrl[17:16]==2'b11 && dram2_addr[AMSB:3]==dbg_adr0[AMSB:3] &&
+ ((dbg_ctrl[19:18]==2'b00 && dram2_addr[2:0]==dbg_adr0[2:0]) ||
+ (dbg_ctrl[19:18]==2'b01 && dram2_addr[2:1]==dbg_adr0[2:1]) ||
+ (dbg_ctrl[19:18]==2'b10 && dram2_addr[2]==dbg_adr0[2]) ||
+ dbg_ctrl[19:18]==2'b11)
+ ;
+wire dbg_lmatch10 =
+ dbg_ctrl[1] && dbg_ctrl[21:20]==2'b11 && dram0_addr[AMSB:3]==dbg_adr1[AMSB:3] &&
+ ((dbg_ctrl[23:22]==2'b00 && dram0_addr[2:0]==dbg_adr1[2:0]) ||
+ (dbg_ctrl[23:22]==2'b01 && dram0_addr[2:1]==dbg_adr1[2:1]) ||
+ (dbg_ctrl[23:22]==2'b10 && dram0_addr[2]==dbg_adr1[2]) ||
+ dbg_ctrl[23:22]==2'b11)
+ ;
+wire dbg_lmatch11 =
+ dbg_ctrl[1] && dbg_ctrl[21:20]==2'b11 && dram1_addr[AMSB:3]==dbg_adr1[AMSB:3] &&
+ ((dbg_ctrl[23:22]==2'b00 && dram1_addr[2:0]==dbg_adr1[2:0]) ||
+ (dbg_ctrl[23:22]==2'b01 && dram1_addr[2:1]==dbg_adr1[2:1]) ||
+ (dbg_ctrl[23:22]==2'b10 && dram1_addr[2]==dbg_adr1[2]) ||
+ dbg_ctrl[23:22]==2'b11)
+ ;
+wire dbg_lmatch12 =
+ dbg_ctrl[1] && dbg_ctrl[21:20]==2'b11 && dram2_addr[AMSB:3]==dbg_adr1[AMSB:3] &&
+ ((dbg_ctrl[23:22]==2'b00 && dram2_addr[2:0]==dbg_adr1[2:0]) ||
+ (dbg_ctrl[23:22]==2'b01 && dram2_addr[2:1]==dbg_adr1[2:1]) ||
+ (dbg_ctrl[23:22]==2'b10 && dram2_addr[2]==dbg_adr1[2]) ||
+ dbg_ctrl[23:22]==2'b11)
+ ;
+wire dbg_lmatch20 =
+ dbg_ctrl[2] && dbg_ctrl[25:24]==2'b11 && dram0_addr[AMSB:3]==dbg_adr2[AMSB:3] &&
+ ((dbg_ctrl[27:26]==2'b00 && dram0_addr[2:0]==dbg_adr2[2:0]) ||
+ (dbg_ctrl[27:26]==2'b01 && dram0_addr[2:1]==dbg_adr2[2:1]) ||
+ (dbg_ctrl[27:26]==2'b10 && dram0_addr[2]==dbg_adr2[2]) ||
+ dbg_ctrl[27:26]==2'b11)
+ ;
+wire dbg_lmatch21 =
+ dbg_ctrl[2] && dbg_ctrl[25:24]==2'b11 && dram1_addr[AMSB:3]==dbg_adr2[AMSB:3] &&
+ ((dbg_ctrl[27:26]==2'b00 && dram1_addr[2:0]==dbg_adr2[2:0]) ||
+ (dbg_ctrl[27:26]==2'b01 && dram1_addr[2:1]==dbg_adr2[2:1]) ||
+ (dbg_ctrl[27:26]==2'b10 && dram1_addr[2]==dbg_adr2[2]) ||
+ dbg_ctrl[27:26]==2'b11)
+ ;
+wire dbg_lmatch22 =
+ dbg_ctrl[2] && dbg_ctrl[25:24]==2'b11 && dram2_addr[AMSB:3]==dbg_adr2[AMSB:3] &&
+ ((dbg_ctrl[27:26]==2'b00 && dram2_addr[2:0]==dbg_adr2[2:0]) ||
+ (dbg_ctrl[27:26]==2'b01 && dram2_addr[2:1]==dbg_adr2[2:1]) ||
+ (dbg_ctrl[27:26]==2'b10 && dram2_addr[2]==dbg_adr2[2]) ||
+ dbg_ctrl[27:26]==2'b11)
+ ;
+wire dbg_lmatch30 =
+ dbg_ctrl[3] && dbg_ctrl[29:28]==2'b11 && dram0_addr[AMSB:3]==dbg_adr3[AMSB:3] &&
+ ((dbg_ctrl[31:30]==2'b00 && dram0_addr[2:0]==dbg_adr3[2:0]) ||
+ (dbg_ctrl[31:30]==2'b01 && dram0_addr[2:1]==dbg_adr3[2:1]) ||
+ (dbg_ctrl[31:30]==2'b10 && dram0_addr[2]==dbg_adr3[2]) ||
+ dbg_ctrl[31:30]==2'b11)
+ ;
+wire dbg_lmatch31 =
+ dbg_ctrl[3] && dbg_ctrl[29:28]==2'b11 && dram1_addr[AMSB:3]==dbg_adr3[AMSB:3] &&
+ ((dbg_ctrl[31:30]==2'b00 && dram1_addr[2:0]==dbg_adr3[2:0]) ||
+ (dbg_ctrl[31:30]==2'b01 && dram1_addr[2:1]==dbg_adr3[2:1]) ||
+ (dbg_ctrl[31:30]==2'b10 && dram1_addr[2]==dbg_adr3[2]) ||
+ dbg_ctrl[31:30]==2'b11)
+ ;
+wire dbg_lmatch32 =
+ dbg_ctrl[3] && dbg_ctrl[29:28]==2'b11 && dram2_addr[AMSB:3]==dbg_adr3[AMSB:3] &&
+ ((dbg_ctrl[31:30]==2'b00 && dram2_addr[2:0]==dbg_adr3[2:0]) ||
+ (dbg_ctrl[31:30]==2'b01 && dram2_addr[2:1]==dbg_adr3[2:1]) ||
+ (dbg_ctrl[31:30]==2'b10 && dram2_addr[2]==dbg_adr3[2]) ||
+ dbg_ctrl[31:30]==2'b11)
+ ;
+wire dbg_lmatch0 = dbg_lmatch00|dbg_lmatch10|dbg_lmatch20|dbg_lmatch30;
+wire dbg_lmatch1 = dbg_lmatch01|dbg_lmatch11|dbg_lmatch21|dbg_lmatch31;
+wire dbg_lmatch2 = dbg_lmatch02|dbg_lmatch12|dbg_lmatch22|dbg_lmatch32;
+wire dbg_lmatch = dbg_lmatch00|dbg_lmatch10|dbg_lmatch20|dbg_lmatch30|
+ dbg_lmatch01|dbg_lmatch11|dbg_lmatch21|dbg_lmatch31|
+ dbg_lmatch02|dbg_lmatch12|dbg_lmatch22|dbg_lmatch32
+ ;
+
+wire dbg_smatch00 =
+ dbg_ctrl[0] && dbg_ctrl[17:16]==2'b11 && dram0_addr[AMSB:3]==dbg_adr0[AMSB:3] &&
+ ((dbg_ctrl[19:18]==2'b00 && dram0_addr[2:0]==dbg_adr0[2:0]) ||
+ (dbg_ctrl[19:18]==2'b01 && dram0_addr[2:1]==dbg_adr0[2:1]) ||
+ (dbg_ctrl[19:18]==2'b10 && dram0_addr[2]==dbg_adr0[2]) ||
+ dbg_ctrl[19:18]==2'b11)
+ ;
+wire dbg_smatch01 =
+ dbg_ctrl[0] && dbg_ctrl[17:16]==2'b11 && dram1_addr[AMSB:3]==dbg_adr0[AMSB:3] &&
+ ((dbg_ctrl[19:18]==2'b00 && dram1_addr[2:0]==dbg_adr0[2:0]) ||
+ (dbg_ctrl[19:18]==2'b01 && dram1_addr[2:1]==dbg_adr0[2:1]) ||
+ (dbg_ctrl[19:18]==2'b10 && dram1_addr[2]==dbg_adr0[2]) ||
+ dbg_ctrl[19:18]==2'b11)
+ ;
+wire dbg_smatch02 =
+ dbg_ctrl[0] && dbg_ctrl[17:16]==2'b11 && dram2_addr[AMSB:3]==dbg_adr0[AMSB:3] &&
+ ((dbg_ctrl[19:18]==2'b00 && dram2_addr[2:0]==dbg_adr0[2:0]) ||
+ (dbg_ctrl[19:18]==2'b01 && dram2_addr[2:1]==dbg_adr0[2:1]) ||
+ (dbg_ctrl[19:18]==2'b10 && dram2_addr[2]==dbg_adr0[2]) ||
+ dbg_ctrl[19:18]==2'b11)
+ ;
+wire dbg_smatch10 =
+ dbg_ctrl[1] && dbg_ctrl[21:20]==2'b11 && dram0_addr[AMSB:3]==dbg_adr1[AMSB:3] &&
+ ((dbg_ctrl[23:22]==2'b00 && dram0_addr[2:0]==dbg_adr1[2:0]) ||
+ (dbg_ctrl[23:22]==2'b01 && dram0_addr[2:1]==dbg_adr1[2:1]) ||
+ (dbg_ctrl[23:22]==2'b10 && dram0_addr[2]==dbg_adr1[2]) ||
+ dbg_ctrl[23:22]==2'b11)
+ ;
+wire dbg_smatch11 =
+ dbg_ctrl[1] && dbg_ctrl[21:20]==2'b11 && dram1_addr[AMSB:3]==dbg_adr1[AMSB:3] &&
+ ((dbg_ctrl[23:22]==2'b00 && dram1_addr[2:0]==dbg_adr1[2:0]) ||
+ (dbg_ctrl[23:22]==2'b01 && dram1_addr[2:1]==dbg_adr1[2:1]) ||
+ (dbg_ctrl[23:22]==2'b10 && dram1_addr[2]==dbg_adr1[2]) ||
+ dbg_ctrl[23:22]==2'b11)
+ ;
+wire dbg_smatch12 =
+ dbg_ctrl[1] && dbg_ctrl[21:20]==2'b11 && dram2_addr[AMSB:3]==dbg_adr1[AMSB:3] &&
+ ((dbg_ctrl[23:22]==2'b00 && dram2_addr[2:0]==dbg_adr1[2:0]) ||
+ (dbg_ctrl[23:22]==2'b01 && dram2_addr[2:1]==dbg_adr1[2:1]) ||
+ (dbg_ctrl[23:22]==2'b10 && dram2_addr[2]==dbg_adr1[2]) ||
+ dbg_ctrl[23:22]==2'b11)
+ ;
+wire dbg_smatch20 =
+ dbg_ctrl[2] && dbg_ctrl[25:24]==2'b11 && dram0_addr[AMSB:3]==dbg_adr2[AMSB:3] &&
+ ((dbg_ctrl[27:26]==2'b00 && dram0_addr[2:0]==dbg_adr2[2:0]) ||
+ (dbg_ctrl[27:26]==2'b01 && dram0_addr[2:1]==dbg_adr2[2:1]) ||
+ (dbg_ctrl[27:26]==2'b10 && dram0_addr[2]==dbg_adr2[2]) ||
+ dbg_ctrl[27:26]==2'b11)
+ ;
+wire dbg_smatch21 =
+ dbg_ctrl[2] && dbg_ctrl[25:24]==2'b11 && dram1_addr[AMSB:3]==dbg_adr2[AMSB:3] &&
+ ((dbg_ctrl[27:26]==2'b00 && dram1_addr[2:0]==dbg_adr2[2:0]) ||
+ (dbg_ctrl[27:26]==2'b01 && dram1_addr[2:1]==dbg_adr2[2:1]) ||
+ (dbg_ctrl[27:26]==2'b10 && dram1_addr[2]==dbg_adr2[2]) ||
+ dbg_ctrl[27:26]==2'b11)
+ ;
+wire dbg_smatch22 =
+ dbg_ctrl[2] && dbg_ctrl[25:24]==2'b11 && dram2_addr[AMSB:3]==dbg_adr2[AMSB:3] &&
+ ((dbg_ctrl[27:26]==2'b00 && dram2_addr[2:0]==dbg_adr2[2:0]) ||
+ (dbg_ctrl[27:26]==2'b01 && dram2_addr[2:1]==dbg_adr2[2:1]) ||
+ (dbg_ctrl[27:26]==2'b10 && dram2_addr[2]==dbg_adr2[2]) ||
+ dbg_ctrl[27:26]==2'b11)
+ ;
+wire dbg_smatch30 =
+ dbg_ctrl[3] && dbg_ctrl[29:28]==2'b11 && dram0_addr[AMSB:3]==dbg_adr3[AMSB:3] &&
+ ((dbg_ctrl[31:30]==2'b00 && dram0_addr[2:0]==dbg_adr3[2:0]) ||
+ (dbg_ctrl[31:30]==2'b01 && dram0_addr[2:1]==dbg_adr3[2:1]) ||
+ (dbg_ctrl[31:30]==2'b10 && dram0_addr[2]==dbg_adr3[2]) ||
+ dbg_ctrl[31:30]==2'b11)
+ ;
+wire dbg_smatch31 =
+ dbg_ctrl[3] && dbg_ctrl[29:28]==2'b11 && dram1_addr[AMSB:3]==dbg_adr3[AMSB:3] &&
+ ((dbg_ctrl[31:30]==2'b00 && dram1_addr[2:0]==dbg_adr3[2:0]) ||
+ (dbg_ctrl[31:30]==2'b01 && dram1_addr[2:1]==dbg_adr3[2:1]) ||
+ (dbg_ctrl[31:30]==2'b10 && dram1_addr[2]==dbg_adr3[2]) ||
+ dbg_ctrl[31:30]==2'b11)
+ ;
+wire dbg_smatch32 =
+ dbg_ctrl[3] && dbg_ctrl[29:28]==2'b11 && dram2_addr[AMSB:3]==dbg_adr3[AMSB:3] &&
+ ((dbg_ctrl[31:30]==2'b00 && dram2_addr[2:0]==dbg_adr3[2:0]) ||
+ (dbg_ctrl[31:30]==2'b01 && dram2_addr[2:1]==dbg_adr3[2:1]) ||
+ (dbg_ctrl[31:30]==2'b10 && dram2_addr[2]==dbg_adr3[2]) ||
+ dbg_ctrl[31:30]==2'b11)
+ ;
+wire dbg_smatch0 = dbg_smatch00|dbg_smatch10|dbg_smatch20|dbg_smatch30;
+wire dbg_smatch1 = dbg_smatch01|dbg_smatch11|dbg_smatch21|dbg_smatch31;
+wire dbg_smatch2 = dbg_smatch02|dbg_smatch12|dbg_smatch22|dbg_smatch32;
+
+wire dbg_smatch = dbg_smatch00|dbg_smatch10|dbg_smatch20|dbg_smatch30|
+ dbg_smatch01|dbg_smatch11|dbg_smatch21|dbg_smatch31|
+ dbg_smatch02|dbg_smatch12|dbg_smatch22|dbg_smatch32
+ ;
+
+wire dbg_stat0 = dbg_imatchA0 | dbg_imatchB0 | dbg_lmatch00 | dbg_lmatch01 | dbg_lmatch02 | dbg_smatch00 | dbg_smatch01 | dbg_smatch02;
+wire dbg_stat1 = dbg_imatchA1 | dbg_imatchB1 | dbg_lmatch10 | dbg_lmatch11 | dbg_lmatch12 | dbg_smatch10 | dbg_smatch11 | dbg_smatch12;
+wire dbg_stat2 = dbg_imatchA2 | dbg_imatchB2 | dbg_lmatch20 | dbg_lmatch21 | dbg_lmatch22 | dbg_smatch20 | dbg_smatch21 | dbg_smatch22;
+wire dbg_stat3 = dbg_imatchA3 | dbg_imatchB3 | dbg_lmatch30 | dbg_lmatch31 | dbg_lmatch32 | dbg_smatch30 | dbg_smatch31 | dbg_smatch32;
+assign dbg_stat1x = {dbg_stat3,dbg_stat2,dbg_stat1,dbg_stat0};
+wire debug_on = |dbg_ctrl[3:0]|dbg_ctrl[7]|dbg_ctrl[63];
+
+always @*
+begin
+ if (dbg_ctrl[0] && dbg_ctrl[17:16]==2'b00 && fetchbuf0_pc==dbg_adr0)
+ dbg_imatchA0 = `TRUE;
+ if (dbg_ctrl[1] && dbg_ctrl[21:20]==2'b00 && fetchbuf0_pc==dbg_adr1)
+ dbg_imatchA1 = `TRUE;
+ if (dbg_ctrl[2] && dbg_ctrl[25:24]==2'b00 && fetchbuf0_pc==dbg_adr2)
+ dbg_imatchA2 = `TRUE;
+ if (dbg_ctrl[3] && dbg_ctrl[29:28]==2'b00 && fetchbuf0_pc==dbg_adr3)
+ dbg_imatchA3 = `TRUE;
+ if (dbg_imatchA0|dbg_imatchA1|dbg_imatchA2|dbg_imatchA3)
+ dbg_imatchA = `TRUE;
+end
+
+always @*
+begin
+ if (dbg_ctrl[0] && dbg_ctrl[17:16]==2'b00 && fetchbuf1_pc==dbg_adr0)
+ dbg_imatchB0 = `TRUE;
+ if (dbg_ctrl[1] && dbg_ctrl[21:20]==2'b00 && fetchbuf1_pc==dbg_adr1)
+ dbg_imatchB1 = `TRUE;
+ if (dbg_ctrl[2] && dbg_ctrl[25:24]==2'b00 && fetchbuf1_pc==dbg_adr2)
+ dbg_imatchB2 = `TRUE;
+ if (dbg_ctrl[3] && dbg_ctrl[29:28]==2'b00 && fetchbuf1_pc==dbg_adr3)
+ dbg_imatchB3 = `TRUE;
+ if (dbg_imatchB0|dbg_imatchB1|dbg_imatchB2|dbg_imatchB3)
+ dbg_imatchB = `TRUE;
+end
+`endif
+
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+
+// freezePC squashes the pc increment if there's an irq.
+// If there is a segment prefix present then defer the freezing of the pc.
+// If a hardware interrupt instruction is encountered in the instruction stream
+// flag it as a privilege violation.
+wire freezePC = (irq_i > im) && !int_commit;
+always @*
+if (freezePC) begin
+ insn0 <= {8'h00,6'd0,5'd0,irq_i,1'b0,vec_i,2'b00,`BRK};
+end
+else if (phit) begin
+// if (insn0a[`INSTRUCTION_OP]==`BRK && insn0a[25:21]==5'd0 && insn0a[`INSTRUCTION_L2]==2'b00)
+// insn0 <= {6'd1,5'd0,4'b0,1'b0,`FLT_PRIV,2'b00,`BRK};
+// else
+ insn0 <= insn0a;
+end
+else begin
+ insn0 <= {8'h00,`NOP_INSN};
+end
+generate begin : gInsnMux
+if (`WAYS > 1) begin
+always @*
+if (freezePC && !thread_en) begin
+ insn1 <= {8'h00,6'd0,5'd0,irq_i,1'b0,vec_i,2'b00,`BRK};
+end
+else if (phit) begin
+// if (insn1a[`INSTRUCTION_OP]==`BRK && insn1a[25:21]==5'd0 && insn1a[`INSTRUCTION_L2]==2'b00)
+// insn1 <= {6'd1,5'd0,4'b0,1'b0,`FLT_PRIV,2'b00,`BRK};
+// else
+ insn1 <= insn1a;
+end
+else begin
+ insn1 <= {8'h00,`NOP_INSN};
+end
+end
+if (`WAYS > 2) begin
+always @*
+if (freezePC && !thread_en)
+ insn2 <= {6'd0,5'd0,irq_i,1'b0,vec_i,2'b00,`BRK};
+else if (phit) begin
+// if (insn2a[`INSTRUCTION_OP]==`BRK && insn1a[25:21]==5'd0 && insn2a[`INSTRUCTION_L2]==2'b00)
+// insn2 <= {6'd1,5'd0,4'b0,1'b0,`FLT_PRIV,2'b00,`BRK};
+// else
+ insn2 <= insn2a;
+end
+else
+ insn2 <= `NOP_INSN;
+end
+end
+endgenerate
+
+wire [63:0] dc0_out, dc1_out, dc2_out;
+assign rdat0 = dram0_unc ? xdati : dc0_out;
+assign rdat1 = dram1_unc ? xdati : dc1_out;
+assign rdat2 = dram2_unc ? xdati : dc2_out;
+
+reg preload;
+reg [1:0] dccnt;
+wire dhit0, dhit1, dhit2;
+wire dhit0a, dhit1a, dhit2a;
+wire dhit00, dhit10, dhit20;
+wire dhit01, dhit11, dhit21;
+reg [`ABITS] dc_wadr;
+reg [63:0] dc_wdat;
+reg isStore;
+
+// If the data is in the write buffer, give the buffer a chance to
+// write out the data before trying to load from the cache.
+reg wb_hit0, wb_hit1, wb_hit2;
+always @*
+begin
+ wb_hit0 <= FALSE;
+ wb_hit1 <= FALSE;
+ wb_hit2 <= FALSE;
+ for (n = 0; n < `WB_DEPTH; n = n + 1) begin
+ if (wb_v[n] && wb_addr[n][AMSB:3]==dram0_addr[AMSB:3])
+ wb_hit0 <= TRUE;
+ if (`NUM_MEM > 1 && wb_v[n] && wb_addr[n][AMSB:3]==dram1_addr[AMSB:3])
+ wb_hit1 <= TRUE;
+ if (`NUM_MEM > 2 && wb_v[n] && wb_addr[n][AMSB:3]==dram2_addr[AMSB:3])
+ wb_hit2 <= TRUE;
+ end
+end
+
+assign dhit0 = dhit0a && !wb_hit0;
+assign dhit1 = dhit1a && !wb_hit1;
+assign dhit2 = dhit2a && !wb_hit2;
+wire whit0, whit1, whit2;
+
+wire wr_dcache0 = (bstate==B_DCacheLoadAck && ack_i)||(((bstate==B_StoreAck && StoreAck1) || (bstate==B19 && isStore)) && whit0);
+wire wr_dcache1 = (bstate==B_DCacheLoadAck && ack_i)||(((bstate==B_StoreAck && StoreAck1) || (bstate==B19 && isStore)) && whit1);
+wire wr_dcache2 = (bstate==B_DCacheLoadAck && ack_i)||(((bstate==B_StoreAck && StoreAck1) || (bstate==B19 && isStore)) && whit2);
+
+FT64_dcache udc0
+(
+ .rst(rst),
+ .wclk(clk),
+ .dce(dce),
+ .wr(wr_dcache0),
+ .sel(sel_o),
+ .wadr({pcr[7:0],vadr}),
+ .whit(whit0),
+ .i((bstate==B_DCacheLoadAck) ? dat_i : dat_o),
+ .rclk(clk),
+ .rdsize(dram0_memsize),
+ .radr({pcr[7:0],dram0_addr}),
+ .o(dc0_out),
+ .rhit(dhit0a)
+);
+generate begin : gDCacheInst
+if (`NUM_MEM > 1) begin
+FT64_dcache udc1
+(
+ .rst(rst),
+ .wclk(clk),
+ .dce(dce),
+ .wr(wr_dcache1),
+ .sel(sel_o),
+ .wadr({pcr[7:0],vadr}),
+ .whit(whit1),
+ .i((bstate==B_DCacheLoadAck) ? dat_i : dat_o),
+ .rclk(clk),
+ .rdsize(dram1_memsize),
+ .radr({pcr[7:0],dram1_addr}),
+ .o(dc1_out),
+ .rhit(dhit1a)
+);
+end
+if (`NUM_MEM > 2) begin
+FT64_dcache udc2
+(
+ .rst(rst),
+ .wclk(clk),
+ .dce(dce),
+ .wr(wr_dcache2),
+ .sel(sel_o),
+ .wadr({pcr[7:0],vadr}),
+ .whit(whit2),
+ .i((bstate==B_DCacheLoadAck) ? dat_i : dat_o),
+ .rclk(clk),
+ .rdsize(dram2_memsize),
+ .radr({pcr[7:0],dram2_addr}),
+ .o(dc2_out),
+ .rhit(dhit2a)
+);
+end
+end
+endgenerate
+
+`ifdef SUPPORT_SMT
+function [RBIT:0] fnRa;
+input [47:0] isn;
+input [5:0] vqei;
+input [5:0] vli;
+input thrd;
+case(isn[`INSTRUCTION_OP])
+`IVECTOR:
+ case(isn[`INSTRUCTION_S2])
+ `VCIDX,`VSCAN: fnRa = {6'd0,1'b1,isn[`INSTRUCTION_RA]};
+ `VMxx:
+ case(isn[25:23])
+ `VMAND,`VMOR,`VMXOR,`VMXNOR,`VMPOP,`VMFIRST,`VMLAST:
+ fnRa = {6'h3F,1'b1,2'b0,isn[10:8]};
+ `VMFILL:fnRa = {6'd0,1'b1,isn[`INSTRUCTION_RA]};
+ default:fnRa = {6'h3F,1'b1,2'b0,isn[10:8]};
+ endcase
+ `VSHLV: fnRa = (vqei+1+isn[15:11] >= vli) ? 11'h000 : {vli-vqei-isn[15:11]-1,1'b1,isn[`INSTRUCTION_RA]};
+ `VSHRV: fnRa = (vqei+isn[15:11] >= vli) ? 11'h000 : {vqei+isn[15:11],1'b1,isn[`INSTRUCTION_RA]};
+ `VSxx,`VSxxU,`VSxxS,`VSxxSU: fnRa = {vqei,1'b1,isn[`INSTRUCTION_RA]};
+ default: fnRa = {vqei,1'b1,isn[`INSTRUCTION_RA]};
+ endcase
+`R2: casez(isn[`INSTRUCTION_S2])
+ `MOV:
+ case(isn[25:23])
+ 3'd0: fnRa = {rgs[thrd],1'b0,isn[`INSTRUCTION_RA]};
+ 3'd1: fnRa = {isn[26],isn[22:18],1'b0,isn[`INSTRUCTION_RA]};
+ 3'd2: fnRa = {rgs[thrd],1'b0,isn[`INSTRUCTION_RA]};
+ 3'd3: fnRa = {rs_stack[thrd][5:0],1'b0,isn[`INSTRUCTION_RA]};
+ 3'd4: fnRa = {rgs[thrd],1'b0,isn[`INSTRUCTION_RA]};
+ 3'd5: fnRa = {fp_rgs[thrd],1'b0,isn[`INSTRUCTION_RA]};
+ 3'd6: fnRa = {fp_rgs[thrd],1'b0,isn[`INSTRUCTION_RA]};
+ default:fnRa = {rgs[thrd],1'b0,isn[`INSTRUCTION_RA]};
+ endcase
+ `VMOV:
+ case (isn[`INSTRUCTION_S1])
+ 5'h0: fnRa = {rgs[thrd],1'b0,isn[`INSTRUCTION_RA]};
+ 5'h1: fnRa = {6'h3F,1'b1,isn[`INSTRUCTION_RA]};
+ endcase
+ default: fnRa = {rgs[thrd],1'b0,isn[`INSTRUCTION_RA]};
+ endcase
+`FLOAT: fnRa = {fp_rgs[thrd],1'b0,isn[`INSTRUCTION_RA]};
+default: fnRa = {rgs[thrd],1'b0,isn[`INSTRUCTION_RA]};
+endcase
+endfunction
+
+function [RBIT:0] fnRb;
+input [47:0] isn;
+input fb;
+input [5:0] vqei;
+input [5:0] rfoa0i;
+input [5:0] rfoa1i;
+input thrd;
+case(isn[`INSTRUCTION_OP])
+`R2: case(isn[`INSTRUCTION_S2])
+ `VEX: fnRb = fb ? {rfoa1i,1'b1,isn[`INSTRUCTION_RB]} : {rfoa0i,1'b1,isn[`INSTRUCTION_RB]};
+ `LVX,`SVX: fnRb = {vqei,1'b1,isn[`INSTRUCTION_RB]};
+ default: fnRb = {rgs[thrd],1'b0,isn[`INSTRUCTION_RB]};
+ endcase
+`IVECTOR:
+ case(isn[`INSTRUCTION_S2])
+ `VMxx:
+ case(isn[25:23])
+ `VMAND,`VMOR,`VMXOR,`VMXNOR,`VMPOP:
+ fnRb = {6'h3F,1'b1,2'b0,isn[20:18]};
+ default: fnRb = 12'h000;
+ endcase
+ `VXCHG: fnRb = {vqei,1'b1,isn[`INSTRUCTION_RB]};
+ `VSxx,`VSxxU: fnRb = {vqei,1'b1,isn[`INSTRUCTION_RB]};
+ `VSxxS,`VSxxSU: fnRb = {vqei,1'b0,isn[`INSTRUCTION_RB]};
+ `VADDS,`VSUBS,`VMULS,`VANDS,`VORS,`VXORS,`VXORS:
+ fnRb = {rgs[thrd],1'b0,isn[`INSTRUCTION_RB]};
+ `VSHL,`VSHR,`VASR:
+ fnRb = {isn[25],isn[22]}==2'b00 ? {rgs[thrd],1'b0,isn[`INSTRUCTION_RB]} : {vqei,1'b1,isn[`INSTRUCTION_RB]};
+ default: fnRb = {vqei,1'b1,isn[`INSTRUCTION_RB]};
+ endcase
+`FLOAT: fnRb = {fp_rgs[thrd],1'b0,isn[`INSTRUCTION_RB]};
+default: fnRb = {rgs[thrd],1'b0,isn[`INSTRUCTION_RB]};
+endcase
+endfunction
+
+function [RBIT:0] fnRc;
+input [47:0] isn;
+input [5:0] vqei;
+input thrd;
+case(isn[`INSTRUCTION_OP])
+`R2: fnRc = {rgs[thrd],1'b0,isn[`INSTRUCTION_RC]};
+`MEMNDX: fnRc = {rgs[thrd],1'b0,isn[`INSTRUCTION_RC]}; // SVX not implemented
+`IVECTOR:
+ case(isn[`INSTRUCTION_S2])
+ `VSxx,`VSxxS,`VSxxU,`VSxxSU: fnRc = {6'h3F,1'b1,2'b0,isn[25:23]};
+ default: fnRc = {vqei,1'b1,isn[`INSTRUCTION_RC]};
+ endcase
+`FLOAT: fnRc = {fp_rgs[thrd],1'b0,isn[`INSTRUCTION_RC]};
+default: fnRc = {rgs[thrd],1'b0,isn[`INSTRUCTION_RC]};
+endcase
+endfunction
+
+function [RBIT:0] fnRt;
+input [47:0] isn;
+input [5:0] vqei;
+input [5:0] vli;
+input thrd;
+casez(isn[`INSTRUCTION_OP])
+`IVECTOR:
+ case(isn[`INSTRUCTION_S2])
+ `VMxx:
+ case(isn[25:23])
+ `VMAND,`VMOR,`VMXOR,`VMXNOR,`VMFILL:
+ fnRt = {6'h3F,1'b1,2'b0,isn[15:13]};
+ `VMPOP: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]};
+ default:
+ fnRt = {6'h3F,1'b1,2'b0,isn[15:13]};
+ endcase
+ `VSxx,`VSxxU,`VSxxS,`VSxxSU: fnRt = {6'h3F,1'b1,2'b0,isn[15:13]};
+ `VSHLV: fnRt = (vqei+1 >= vli) ? 11'h000 : {vli-vqei-1,1'b1,isn[`INSTRUCTION_RT]};
+ `VSHRV: fnRt = (vqei >= vli) ? 11'h000 : {vqei,1'b1,isn[`INSTRUCTION_RT]};
+ `VEINS: fnRt = {vqei,1'b1,isn[`INSTRUCTION_RT]}; // ToDo: add element # from Ra
+ `V2BITS: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]};
+ default: fnRt = {vqei,1'b1,isn[`INSTRUCTION_Rt]};
+ endcase
+
+`R2:
+ if (isn[`INSTRUCTION_L2]==2'b01)
+ case(isn[47:42])
+ `CMOVEZ: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]};
+ `CMOVNZ: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]};
+ default: fnRt = 12'd0;
+ endcase
+ else
+ casez(isn[`INSTRUCTION_S2])
+ `MOV:
+ case(isn[25:23])
+ 3'd0: fnRt = {isn[26],isn[22:18],1'b0,isn[`INSTRUCTION_RT]};
+ 3'd1: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]};
+ 3'd2: fnRt = {rs_stack[thrd][5:0],1'b0,isn[`INSTRUCTION_RT]};
+ 3'd3: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]};
+ 3'd4: fnRt = {fp_rgs[thrd],1'b0,isn[`INSTRUCTION_RT]};
+ 3'd5: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]};
+ 3'd6: fnRt = {fp_rgs[thrd],1'b0,isn[`INSTRUCTION_RT]};
+ default:fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]};
+ endcase
+ `VMOV:
+ case (isn[`INSTRUCTION_S1])
+ 5'h0: fnRt = {6'h3F,1'b1,isn[`INSTRUCTION_RT]};
+ 5'h1: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]};
+ default: fnRt = 12'h000;
+ endcase
+ `R1:
+ case(isn[22:18])
+ `CNTLO,`CNTLZ,`CNTPOP,`ABS,`NOT,`NEG,`REDOR,`ZXB,`ZXC,`ZXH,`SXB,`SXC,`SXH:
+ fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]};
+ `MEMDB,`MEMSB,`SYNC:
+ fnRt = 12'd0;
+ default: fnRt = 12'd0;
+ endcase
+ `CMOVEZ: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]};
+ `CMOVNZ: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]};
+ `MUX: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]};
+ `MIN: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]};
+ `MAX: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]};
+ `LVX: fnRt = {vqei,1'b1,isn[20:16]};
+ `SHIFTR: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]};
+ `SHIFT31,`SHIFT63:
+ fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]};
+ `SEI: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]};
+ `WAIT,`RTI,`CHK:
+ fnRt = 12'd0;
+ default: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]};
+ endcase
+`MEMNDX:
+ begin
+ if (!isn[31])
+ case({isn[31:28],isn[22:21]})
+ `LVX,
+ `CACHEX,
+ `LVBX,`LVBUX,`LVCX,`LVCUX,`LVHX,`LVHUX,`LVWX,
+ `LBX,`LBUX,`LCX,`LCUX,`LHX,`LHUX,`LWX,`LWRX:
+ fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]};
+ default: fnRt = 12'd0;
+ endcase
+ else
+ case({isn[31:28],isn[17:16]})
+ `PUSH: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]};
+ `SBX,`SCX,`SHX,`SWX,`SWCX,`CACHEX:
+ fnRt = 12'd0;
+ default: fnRt = 12'd0;
+ endcase
+ end
+`FLOAT:
+ case(isn[31:26])
+ `FTX,`FCX,`FEX,`FDX,`FRM:
+ fnRt = 12'd0;
+ `FSYNC: fnRt = 12'd0;
+ default: fnRt = {fp_rgs[thrd],1'b0,isn[`INSTRUCTION_RT]};
+ endcase
+`BRK: fnRt = 12'd0;
+`REX: fnRt = 12'd0;
+`CHK: fnRt = 12'd0;
+`EXEC: fnRt = 12'd0;
+`Bcc: fnRt = 12'd0;
+`BBc: fnRt = 12'd0;
+`NOP: fnRt = 12'd0;
+`BEQI: fnRt = 12'd0;
+`SB,`Sx,`SWC,`CACHE:
+ fnRt = 12'd0;
+`JMP: fnRt = 12'd0;
+`CALL: fnRt = {rgs[thrd],1'b0,5'd29}; // regLR
+`LV: fnRt = {vqei,1'b1,isn[`INSTRUCTION_RT]};
+`AMO: fnRt = isn[31] ? {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]} : {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]};
+`AUIPC,`LUI: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]};
+default: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]};
+endcase
+endfunction
+`else
+function [RBIT:0] fnRa;
+input [47:0] isn;
+input [5:0] vqei;
+input [5:0] vli;
+input thrd;
+case(isn[`INSTRUCTION_OP])
+`IVECTOR:
+ case(isn[`INSTRUCTION_S2])
+ `VCIDX,`VSCAN: fnRa = {6'd0,1'b1,isn[`INSTRUCTION_RA]};
+ `VMxx:
+ case(isn[25:23])
+ `VMAND,`VMOR,`VMXOR,`VMXNOR,`VMPOP,`VMFIRST,`VMLAST:
+ fnRa = {6'h3F,1'b1,2'b0,isn[10:8]};
+ `VMFILL:fnRa = {6'd0,1'b1,isn[`INSTRUCTION_RA]};
+ default:fnRa = {6'h3F,1'b1,2'b0,isn[10:8]};
+ endcase
+ `VSHLV: fnRa = (vqei+1+isn[15:11] >= vli) ? 11'h000 : {vli-vqei-isn[15:11]-1,1'b1,isn[`INSTRUCTION_RA]};
+ `VSHRV: fnRa = (vqei+isn[15:11] >= vli) ? 11'h000 : {vqei+isn[15:11],1'b1,isn[`INSTRUCTION_RA]};
+ `VSxx,`VSxxU,`VSxxS,`VSxxSU: fnRa = {vqei,1'b1,isn[`INSTRUCTION_RA]};
+ default: fnRa = {vqei,1'b1,isn[`INSTRUCTION_RA]};
+ endcase
+`R2:
+ casez(isn[`INSTRUCTION_S2])
+ `MOV:
+ case(isn[25:23])
+ 3'd0: fnRa = {rgs,1'b0,isn[`INSTRUCTION_RA]};
+ 3'd1: fnRa = {isn[26],isn[22:18],1'b0,isn[`INSTRUCTION_RA]};
+ 3'd2: fnRa = {rgs,1'b0,isn[`INSTRUCTION_RA]};
+ 3'd3: fnRa = {rs_stack[5:0],1'b0,isn[`INSTRUCTION_RA]};
+ 3'd4: fnRa = {rgs,1'b0,isn[`INSTRUCTION_RA]};
+ 3'd5: fnRa = {fp_rgs,1'b0,isn[`INSTRUCTION_RA]};
+ 3'd6: fnRa = {fp_rgs,1'b0,isn[`INSTRUCTION_RA]};
+ default:fnRa = {rgs,1'b0,isn[`INSTRUCTION_RA]};
+ endcase
+ `VMOV:
+ case (isn[`INSTRUCTION_S1])
+ 5'h0: fnRa = {rgs,1'b0,isn[`INSTRUCTION_RA]};
+ 5'h1: fnRa = {6'h3F,1'b1,isn[`INSTRUCTION_RA]};
+ default: fnRa = {rgs,1'b0,isn[`INSTRUCTION_RA]};
+ endcase
+ default: fnRa = {rgs,1'b0,isn[`INSTRUCTION_RA]};
+ endcase
+`FLOAT: fnRa = {fp_rgs,1'b0,isn[`INSTRUCTION_RA]};
+default: fnRa = {rgs,1'b0,isn[`INSTRUCTION_RA]};
+endcase
+endfunction
+
+function [RBIT:0] fnRb;
+input [47:0] isn;
+input fb;
+input [5:0] vqei;
+input [5:0] rfoa0i;
+input [5:0] rfoa1i;
+input thrd;
+case(isn[`INSTRUCTION_OP])
+`RR: case(isn[`INSTRUCTION_S2])
+ `VEX: fnRb = fb ? {rfoa1i,1'b1,isn[`INSTRUCTION_RB]} : {rfoa0i,1'b1,isn[`INSTRUCTION_RB]};
+ `LVX,`SVX: fnRb = {vqei,1'b1,isn[`INSTRUCTION_RB]};
+ default: fnRb = {rgs,1'b0,isn[`INSTRUCTION_RB]};
+ endcase
+`IVECTOR:
+ case(isn[`INSTRUCTION_S2])
+ `VMxx:
+ case(isn[25:23])
+ `VMAND,`VMOR,`VMXOR,`VMXNOR,`VMPOP:
+ fnRb = {6'h3F,1'b1,2'b0,isn[20:18]};
+ default: fnRb = 12'h000;
+ endcase
+ `VXCHG: fnRb = {vqei,1'b1,isn[`INSTRUCTION_RB]};
+ `VSxx,`VSxxU: fnRb = {vqei,1'b1,isn[`INSTRUCTION_RB]};
+ `VSxxS,`VSxxSU: fnRb = {vqei,1'b0,isn[`INSTRUCTION_RB]};
+ `VADDS,`VSUBS,`VMULS,`VANDS,`VORS,`VXORS,`VXORS:
+ fnRb = {rgs,1'b0,isn[`INSTRUCTION_RB]};
+ `VSHL,`VSHR,`VASR:
+ fnRb = {isn[25],isn[22]}==2'b00 ? {rgs,1'b0,isn[`INSTRUCTION_RB]} : {vqei,1'b1,isn[`INSTRUCTION_RB]};
+ default: fnRb = {vqei,1'b1,isn[`INSTRUCTION_RB]};
+ endcase
+`FLOAT: fnRb = {fp_rgs,1'b0,isn[`INSTRUCTION_RB]};
+default: fnRb = {rgs,1'b0,isn[`INSTRUCTION_RB]};
+endcase
+endfunction
+
+function [RBIT:0] fnRc;
+input [47:0] isn;
+input [5:0] vqei;
+input thrd;
+case(isn[`INSTRUCTION_OP])
+`R2: fnRc = {rgs,1'b0,isn[`INSTRUCTION_RC]};
+`MEMNDX: fnRc = {rgs,1'b0,isn[`INSTRUCTION_RC]}; // SVX not implemented
+`IVECTOR:
+ case(isn[`INSTRUCTION_S2])
+ `VSxx,`VSxxS,`VSxxU,`VSxxSU: fnRc = {6'h3F,1'b1,2'b0,isn[25:23]};
+ default: fnRc = {vqei,1'b1,isn[`INSTRUCTION_RC]};
+ endcase
+`FLOAT: fnRc = {fp_rgs,1'b0,isn[`INSTRUCTION_RC]};
+default: fnRc = {rgs,1'b0,isn[`INSTRUCTION_RC]};
+endcase
+endfunction
+
+function [RBIT:0] fnRt;
+input [47:0] isn;
+input [5:0] vqei;
+input [5:0] vli;
+input thrd;
+casez(isn[`INSTRUCTION_OP])
+`IVECTOR:
+ case(isn[`INSTRUCTION_S2])
+ `VMxx:
+ case(isn[25:23])
+ `VMAND,`VMOR,`VMXOR,`VMXNOR,`VMFILL:
+ fnRt = {6'h3F,1'b1,2'b0,isn[15:13]};
+ `VMPOP: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]};
+ default:
+ fnRt = {6'h3F,1'b1,2'b0,isn[15:13]};
+ endcase
+ `VSxx,`VSxxU,`VSxxS,`VSxxSU: fnRt = {6'h3F,1'b1,2'b0,isn[15:13]};
+ `VSHLV: fnRt = (vqei+1 >= vli) ? 11'h000 : {vli-vqei-1,1'b1,isn[`INSTRUCTION_RT]};
+ `VSHRV: fnRt = (vqei >= vli) ? 11'h000 : {vqei,1'b1,isn[`INSTRUCTION_RT]};
+ `VEINS: fnRt = {vqei,1'b1,isn[`INSTRUCTION_RT]}; // ToDo: add element # from Ra
+ `V2BITS: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]};
+ default: fnRt = {vqei,1'b1,isn[`INSTRUCTION_RT]};
+ endcase
+
+`FVECTOR:
+ case(isn[`INSTRUCTION_S2])
+ `VMxx:
+ case(isn[25:23])
+ `VMAND,`VMOR,`VMXOR,`VMXNOR,`VMFILL:
+ fnRt = {6'h3F,1'b1,2'b0,isn[15:13]};
+ `VMPOP: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RB]};
+ default:
+ fnRt = {6'h3F,1'b1,2'b0,isn[15:13]};
+ endcase
+ `VSxx,`VSxxU,`VSxxS,`VSxxSU: fnRt = {6'h3F,1'b1,2'b0,isn[15:13]};
+ `VSHLV: fnRt = (vqei+1 >= vli) ? 11'h000 : {vli-vqei-1,1'b1,isn[`INSTRUCTION_RT]};
+ `VSHRV: fnRt = (vqei >= vli) ? 11'h000 : {vqei,1'b1,isn[`INSTRUCTION_RT]};
+ `VEINS: fnRt = {vqei,1'b1,isn[`INSTRUCTION_RT]}; // ToDo: add element # from Ra
+ `V2BITS: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]};
+ default: fnRt = {vqei,1'b1,isn[`INSTRUCTION_RT]};
+ endcase
+
+`R2:
+ if (isn[`INSTRUCTION_L2]==2'b01)
+ case(isn[47:42])
+ `CMOVEZ: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]};
+ `CMOVNZ: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]};
+ default: fnRt = 12'd0;
+ endcase
+ else
+ casez(isn[`INSTRUCTION_S2])
+ `MOV:
+ case(isn[25:23])
+ 3'd0: fnRt = {isn[26],isn[22:18],1'b0,isn[`INSTRUCTION_RT]};
+ 3'd1: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]};
+ 3'd2: fnRt = {rs_stack[5:0],1'b0,isn[`INSTRUCTION_RT]};
+ 3'd3: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]};
+ 3'd4: fnRt = {fp_rgs,1'b0,isn[`INSTRUCTION_RT]};
+ 3'd5: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]};
+ 3'd6: fnRt = {fp_rgs,1'b0,isn[`INSTRUCTION_RT]};
+ default:fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]};
+ endcase
+ `VMOV:
+ case (isn[`INSTRUCTION_S1])
+ 5'h0: fnRt = {6'h3F,1'b1,isn[`INSTRUCTION_RT]};
+ 5'h1: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]};
+ default: fnRt = 12'h000;
+ endcase
+ `R1:
+ case(isn[22:18])
+ `CNTLO,`CNTLZ,`CNTPOP,`ABS,`NOT,`NEG,`REDOR,`ZXB,`ZXC,`ZXH,`SXB,`SXC,`SXH:
+ fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]};
+ `MEMDB,`MEMSB,`SYNC:
+ fnRt = 12'd0;
+ default: fnRt = 12'd0;
+ endcase
+ `MUX: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]};
+ `MIN: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]};
+ `MAX: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]};
+ `LVX: fnRt = {vqei,1'b1,isn[`INSTRUCTION_RT]};
+ `SHIFTR: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]};
+ `SHIFT31,`SHIFT63:
+ fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]};
+ `SEI: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]};
+ `WAIT,`RTI,`CHK:
+ fnRt = 12'd0;
+ default: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]};
+ endcase
+`MEMNDX:
+ begin
+ if (!isn[31])
+ case({isn[31:28],isn[22:21]})
+ `LVX,
+ `CACHEX,
+ `LVBX,`LVBUX,`LVCX,`LVCUX,`LVHX,`LVHUX,`LVWX,
+ `LBX,`LBUX,`LCX,`LCUX,`LHX,`LHUX,`LWX,`LWRX:
+ fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]};
+ default: fnRt = 12'd0;
+ endcase
+ else
+ case({isn[31:28],isn[17:16]})
+ `PUSH: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]};
+ `SBX,`SCX,`SHX,`SWX,`SWCX,`CACHEX:
+ fnRt = 12'd0;
+ default: fnRt = 12'd0;
+ endcase
+ end
+`FLOAT:
+ case(isn[31:26])
+ `FTX,`FCX,`FEX,`FDX,`FRM:
+ fnRt = 12'd0;
+ `FSYNC: fnRt = 12'd0;
+ default: fnRt = {fp_rgs,1'b0,isn[`INSTRUCTION_RT]};
+ endcase
+`BRK: fnRt = 12'd0;
+`REX: fnRt = 12'd0;
+`CHK: fnRt = 12'd0;
+`EXEC: fnRt = 12'd0;
+`Bcc: fnRt = 12'd0;
+`BBc: fnRt = 12'd0;
+`NOP: fnRt = 12'd0;
+`BEQI: fnRt = 12'd0;
+`SB,`Sx,`SWC,`CACHE:
+ fnRt = 12'd0;
+`JMP: fnRt = 12'd0;
+`CALL: fnRt = {rgs,1'b0,5'd29}; // regLR
+`LV: fnRt = {vqei,1'b1,isn[`INSTRUCTION_RT]};
+`AMO: fnRt = isn[31] ? {rgs,1'b0,isn[`INSTRUCTION_RT]} : {rgs,1'b0,isn[`INSTRUCTION_RT]};
+`AUIPC,`LUI: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]};
+default: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]};
+endcase
+endfunction
+`endif
+
+// Determines which lanes of the target register get updated.
+// Duh, all the lanes.
+function [7:0] fnWe;
+input [47:0] isn;
+casez(isn[`INSTRUCTION_OP])
+`R2:
+ case(isn[`INSTRUCTION_S2])
+ `CMP: fnWe = 8'h00;
+ default: fnWe = 8'hFF;
+ endcase
+`CMPI: fnWe = 8'h00;
+default: fnWe = 8'hFF;
+endcase
+/*
+casez(isn[`INSTRUCTION_OP])
+`R2:
+ case(isn[`INSTRUCTION_S2])
+ `R1:
+ case(isn[22:18])
+ `ABS,`CNTLZ,`CNTLO,`CNTPOP:
+ case(isn[25:23])
+ 3'b000: fnWe = 8'h01;
+ 3'b001: fnWe = 8'h03;
+ 3'b010: fnWe = 8'h0F;
+ 3'b011: fnWe = 8'hFF;
+ default: fnWe = 8'hFF;
+ endcase
+ default: fnWe = 8'hFF;
+ endcase
+ `SHIFT31: fnWe = (~isn[25] & isn[21]) ? 8'hFF : 8'hFF;
+ `SHIFT63: fnWe = (~isn[25] & isn[21]) ? 8'hFF : 8'hFF;
+ `SLT,`SLTU,`SLE,`SLEU,
+ `ADD,`SUB,
+ `AND,`OR,`XOR,
+ `NAND,`NOR,`XNOR,
+ `DIV,`DIVU,`DIVSU,
+ `MOD,`MODU,`MODSU,
+ `MUL,`MULU,`MULSU,
+ `MULH,`MULUH,`MULSUH,
+ `FXMUL:
+ case(isn[25:23])
+ 3'b000: fnWe = 8'h01;
+ 3'b001: fnWe = 8'h03;
+ 3'b010: fnWe = 8'h0F;
+ 3'b011: fnWe = 8'hFF;
+ default: fnWe = 8'hFF;
+ endcase
+ default: fnWe = 8'hFF;
+ endcase
+default: fnWe = 8'hFF;
+endcase
+*/
+endfunction
+
+// Detect if a source is automatically valid
+function Source1Valid;
+input [47:0] isn;
+casez(isn[`INSTRUCTION_OP])
+`BRK: Source1Valid = isn[16] ? isn[`INSTRUCTION_RA]==5'd0 : TRUE;
+`Bcc: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`BBc: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`BEQI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`CHK: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`RR: case(isn[`INSTRUCTION_S2])
+ `SHIFT31: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+ `SHIFT63: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+ `SHIFTR: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+ default: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+ endcase
+`MEMNDX:Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`ADDI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`SLTI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`SLTUI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`SGTI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`SGTUI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`ANDI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`ORI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`XORI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`XNORI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`MULI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`MULUI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`MULFI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`DIVI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`DIVUI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`AMO: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`LB: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`LBU: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`Lx: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`LxU: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`LWR: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`LV: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`LVx: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`LVxU: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`SB: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`Sx: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`SWC: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`SV: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`INC: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`CAS: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`CACHE: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`JAL: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`RET: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`CSRRW: Source1Valid = isn[`INSTRUCTION_RA]==5'd0;
+`BITFIELD: case(isn[47:44])
+ `BFINSI: Source1Valid = TRUE;
+ default: Source1Valid = isn[`INSTRUCTION_RA]==5'd0 || isn[30]==1'b0;
+ endcase
+`IVECTOR:
+ Source1Valid = FALSE;
+default: Source1Valid = TRUE;
+endcase
+endfunction
+
+function Source2Valid;
+input [47:0] isn;
+casez(isn[`INSTRUCTION_OP])
+`BRK: Source2Valid = TRUE;
+`Bcc: Source2Valid = isn[`INSTRUCTION_RB]==5'd0;
+`BBc: Source2Valid = TRUE;
+`BEQI: Source2Valid = TRUE;
+`CHK: Source2Valid = isn[`INSTRUCTION_RB]==5'd0;
+`R2: casez(isn[`INSTRUCTION_S2])
+ `TLB: Source2Valid = TRUE;
+ `R1: Source2Valid = TRUE;
+ `MOV: Source2Valid = TRUE;
+ `SHIFTR: Source2Valid = isn[25] ? 1'b1 : isn[`INSTRUCTION_RB]==5'd0;
+ `SHIFT31: Source2Valid = isn[25] ? 1'b1 : isn[`INSTRUCTION_RB]==5'd0;
+ `SHIFT63: Source2Valid = isn[25] ? 1'b1 : isn[`INSTRUCTION_RB]==5'd0;
+ `LVX,`SVX: Source2Valid = FALSE;
+ default: Source2Valid = isn[`INSTRUCTION_RB]==5'd0;
+ endcase
+`MEMNDX:
+ begin
+ if (!isn[31])
+ case({isn[31:28],isn[22:21]})
+ `LVX: Source2Valid = FALSE;
+ `CACHEX,
+ `LVBX,`LVBUX,`LVCX,`LVCUX,`LVHX,`LVHUX,`LVWX,
+ `LBX,`LBUX,`LCX,`LCUX,`LHX,`LHUX,`LWX,`LWRX: Source2Valid = TRUE;
+ default: Source2Valid = isn[`INSTRUCTION_RB]==5'd0;
+ endcase
+ else
+ case({isn[31:28],isn[17:16]})
+ `SVX: Source2Valid = FALSE;
+ default: Source2Valid = isn[`INSTRUCTION_RB]==5'd0;
+ endcase
+ end
+`ADDI: Source2Valid = TRUE;
+`SLTI: Source2Valid = TRUE;
+`SLTUI: Source2Valid = TRUE;
+`SGTI: Source2Valid = TRUE;
+`SGTUI: Source2Valid = TRUE;
+`ANDI: Source2Valid = TRUE;
+`ORI: Source2Valid = TRUE;
+`XORI: Source2Valid = TRUE;
+`XNORI: Source2Valid = TRUE;
+`MULUI: Source2Valid = TRUE;
+`MULFI: Source2Valid = TRUE;
+`LB: Source2Valid = TRUE;
+`LBU: Source2Valid = TRUE;
+`Lx: Source2Valid = TRUE;
+`LxU: Source2Valid = TRUE;
+`LWR: Source2Valid = TRUE;
+`LVx: Source2Valid = TRUE;
+`LVxU: Source2Valid = TRUE;
+`INC: Source2Valid = TRUE;
+`SB: Source2Valid = isn[`INSTRUCTION_RB]==5'd0;
+`Sx: Source2Valid = isn[`INSTRUCTION_RB]==5'd0;
+`SWC: Source2Valid = isn[`INSTRUCTION_RB]==5'd0;
+`CAS: Source2Valid = isn[`INSTRUCTION_RB]==5'd0;
+`JAL: Source2Valid = TRUE;
+`RET: Source2Valid = isn[`INSTRUCTION_RB]==5'd0;
+`IVECTOR:
+ case(isn[`INSTRUCTION_S2])
+ `VABS: Source2Valid = TRUE;
+ `VMAND,`VMOR,`VMXOR,`VMXNOR,`VMPOP:
+ Source2Valid = FALSE;
+ `VADDS,`VSUBS,`VANDS,`VORS,`VXORS:
+ Source2Valid = isn[`INSTRUCTION_RB]==5'd0;
+ `VBITS2V: Source2Valid = TRUE;
+ `V2BITS: Source2Valid = isn[`INSTRUCTION_RB]==5'd0;
+ `VSHL,`VSHR,`VASR: Source2Valid = isn[22:21]==2'd2;
+ default: Source2Valid = FALSE;
+ endcase
+`LV: Source2Valid = TRUE;
+`SV: Source2Valid = FALSE;
+`AMO: Source2Valid = isn[31] || isn[`INSTRUCTION_RB]==5'd0;
+`BITFIELD: Source2Valid = isn[`INSTRUCTION_RB]==5'd0 || isn[31]==1'b0;
+default: Source2Valid = TRUE;
+endcase
+endfunction
+
+function Source3Valid;
+input [47:0] isn;
+case(isn[`INSTRUCTION_OP])
+`IVECTOR:
+ case(isn[`INSTRUCTION_S2])
+ `VEX: Source3Valid = TRUE;
+ default: Source3Valid = TRUE;
+ endcase
+`CHK: Source3Valid = isn[`INSTRUCTION_RC]==5'd0;
+`R2:
+ if (isn[`INSTRUCTION_L2]==2'b01)
+ case(isn[47:42])
+ `CMOVEZ,`CMOVNZ: Source3Valid = isn[`INSTRUCTION_RC]==5'd0;
+ default: Source3Valid = TRUE;
+ endcase
+ else
+ case(isn[`INSTRUCTION_S2])
+ `MAJ: Source3Valid = isn[`INSTRUCTION_RC]==5'd0;
+ default: Source3Valid = TRUE;
+ endcase
+`MEMNDX:
+ if (!isn[31])
+ case({isn[31:28],isn[22:21]})
+ `CACHEX,
+ `LVBX,`LVBUX,`LVCX,`LVCUX,`LVHX,`LVHUX,`LVWX,
+ `LBX,`LBUX,`LCX,`LCUX,`LHX,`LHUX,`LWX,`LWRX:
+ Source3Valid = isn[`INSTRUCTION_RC]==5'd0;
+ default: Source3Valid = TRUE;
+ endcase
+ else
+ case({isn[31:28],isn[17:16]})
+ `PUSH: Source3Valid = TRUE;
+ `SBX: Source3Valid = isn[`INSTRUCTION_RC]==5'd0;
+ `SCX: Source3Valid = isn[`INSTRUCTION_RC]==5'd0;
+ `SHX: Source3Valid = isn[`INSTRUCTION_RC]==5'd0;
+ `SWX: Source3Valid = isn[`INSTRUCTION_RC]==5'd0;
+ `SWCX: Source3Valid = isn[`INSTRUCTION_RC]==5'd0;
+ `CASX: Source3Valid = isn[`INSTRUCTION_RC]==5'd0;
+ default: Source3Valid = TRUE;
+ endcase
+`BITFIELD: Source3Valid = isn[`INSTRUCTION_RC]==5'd0 || isn[32]==1'b0;
+default: Source3Valid = TRUE;
+endcase
+endfunction
+
+// For predication logic
+function SourceTValid;
+input [47:0] isn;
+case(isn[`INSTRUCTION_OP])
+`BRK: SourceTValid = TRUE;
+`Bcc: SourceTValid = TRUE;
+`BBc: SourceTValid = TRUE;
+`BEQI: SourceTValid = TRUE;
+`IVECTOR:
+ case(isn[`INSTRUCTION_S2])
+ `VEX: SourceTValid = TRUE;
+ default: SourceTValid = TRUE;
+ endcase
+`CHK: SourceTValid = isn[`INSTRUCTION_RT]==5'd0;
+`R2:
+ if (isn[`INSTRUCTION_L2]==2'b01)
+ case(isn[47:42])
+ `CMOVEZ,`CMOVNZ: SourceTValid = isn[`INSTRUCTION_RT]==5'd0;
+ default: SourceTValid = TRUE;
+ endcase
+ else
+ case(isn[`INSTRUCTION_S2])
+ `MAJ: SourceTValid = isn[`INSTRUCTION_RT]==5'd0;
+ default: SourceTValid = TRUE;
+ endcase
+`MEMNDX:
+ if (!isn[31])
+ case({isn[31:28],isn[22:21]})
+ `CACHEX,
+ `LVBX,`LVBUX,`LVCX,`LVCUX,`LVHX,`LVHUX,`LVWX,
+ `LBX,`LBUX,`LCX,`LCUX,`LHX,`LHUX,`LWX,`LWRX:
+ SourceTValid = isn[`INSTRUCTION_RT]==5'd0;
+ default: SourceTValid = TRUE;
+ endcase
+ else
+ SourceTValid = TRUE;
+`SB: SourceTValid = TRUE;
+`Sx: SourceTValid = TRUE;
+`SWC: SourceTValid = TRUE;
+`CAS: SourceTValid = TRUE;
+`BITFIELD: SourceTValid = isn[`INSTRUCTION_RT]==5'd0 || isn[32]==1'b0;
+default: SourceTValid = isn[`INSTRUCTION_RT]==5'd0;
+endcase
+endfunction
+
+// Used to indicate to the queue logic that the instruction needs to be
+// recycled to the queue VL number of times.
+function IsVector;
+input [47:0] isn;
+case(isn[`INSTRUCTION_OP])
+`MEMNDX:
+ case(isn[`INSTRUCTION_S2])
+ `LVX,`SVX: IsVector = TRUE;
+ default: IsVector = FALSE;
+ endcase
+`IVECTOR:
+ case(isn[`INSTRUCTION_S2])
+ `VMxx:
+ case(isn[25:23])
+ `VMAND,`VMOR,`VMXOR,`VMXNOR,`VMPOP:
+ IsVector = FALSE;
+ default: IsVector = TRUE;
+ endcase
+ `VEINS: IsVector = FALSE;
+ `VEX: IsVector = FALSE;
+ default: IsVector = TRUE;
+ endcase
+`LV,`SV: IsVector = TRUE;
+default: IsVector = FALSE;
+endcase
+endfunction
+
+function IsVeins;
+input [47:0] isn;
+case(isn[`INSTRUCTION_OP])
+`IVECTOR: IsVeins = isn[`INSTRUCTION_S2]==`VEINS;
+default: IsVeins = FALSE;
+endcase
+endfunction
+
+function IsVex;
+input [47:0] isn;
+case(isn[`INSTRUCTION_OP])
+`IVECTOR: IsVex = isn[`INSTRUCTION_S2]==`VEX;
+default: IsVex = FALSE;
+endcase
+endfunction
+
+function IsVCmprss;
+input [47:0] isn;
+case(isn[`INSTRUCTION_OP])
+`IVECTOR: IsVCmprss = isn[`INSTRUCTION_S2]==`VCMPRSS || isn[`INSTRUCTION_S2]==`VCIDX;
+default: IsVCmprss = FALSE;
+endcase
+endfunction
+
+function IsVShifti;
+input [47:0] isn;
+case(isn[`INSTRUCTION_OP])
+`IVECTOR:
+ case(isn[`INSTRUCTION_S2])
+ `VSHL,`VSHR,`VASR:
+ IsVShifti = {isn[25],isn[22]}==2'd2;
+ default: IsVShifti = FALSE;
+ endcase
+default: IsVShifti = FALSE;
+endcase
+endfunction
+
+function IsVLS;
+input [47:0] isn;
+case(isn[`INSTRUCTION_OP])
+`MEMNDX:
+ if (IsLoad(isn))
+ case({isn[31:28],isn[22:21]})
+ `LVX,`LVWS: IsVLS = TRUE;
+ default: IsVLS = FALSE;
+ endcase
+ else
+ case({isn[31:28],isn[17:16]})
+ `SVX,`SVWS: IsVLS = TRUE;
+ default: IsVLS = FALSE;
+ endcase
+`LV,`SV: IsVLS = TRUE;
+default: IsVLS = FALSE;
+endcase
+endfunction
+
+function [1:0] fnM2;
+input [31:0] isn;
+case(isn[`INSTRUCTION_OP])
+`RR: fnM2 = isn[24:23];
+default: fnM2 = 2'b00;
+endcase
+endfunction
+
+function IsCmp;
+input [47:0] isn;
+case(isn[`INSTRUCTION_OP])
+`R2:
+ if (isn[`INSTRUCTION_L2]==2'b00)
+ case(isn[31:26])
+ `CMP: IsCmp = TRUE;
+ default: IsCmp = FALSE;
+ endcase
+ else
+ IsCmp = FALSE;
+`CMPI: IsCmp = TRUE;
+default: IsCmp = FALSE;
+endcase
+endfunction
+
+function [0:0] IsMem;
+input [47:0] isn;
+case(isn[`INSTRUCTION_OP])
+`MEMNDX: IsMem = TRUE;
+`AMO: IsMem = TRUE;
+`LB: IsMem = TRUE;
+`LBU: IsMem = TRUE;
+`Lx: IsMem = TRUE;
+`LxU: IsMem = TRUE;
+`LWR: IsMem = TRUE;
+`LV,`SV: IsMem = TRUE;
+`INC: IsMem = TRUE;
+`SB: IsMem = TRUE;
+`Sx: IsMem = TRUE;
+`SWC: IsMem = TRUE;
+`CAS: IsMem = TRUE;
+`LVx: IsMem = TRUE;
+`LVxU: IsMem = TRUE;
+default: IsMem = FALSE;
+endcase
+endfunction
+
+function IsMemNdx;
+input [47:0] isn;
+case(isn[`INSTRUCTION_OP])
+`MEMNDX: IsMemNdx = TRUE;
+default: IsMemNdx = FALSE;
+endcase
+endfunction
+
+function IsLoad;
+input [47:0] isn;
+case(isn[`INSTRUCTION_OP])
+`MEMNDX: IsLoad = !isn[31];
+`LB: IsLoad = TRUE;
+`LBU: IsLoad = TRUE;
+`Lx: IsLoad = TRUE;
+`LxU: IsLoad = TRUE;
+`LWR: IsLoad = TRUE;
+`LV: IsLoad = TRUE;
+`LVx: IsLoad = TRUE;
+`LVxU: IsLoad = TRUE;
+default: IsLoad = FALSE;
+endcase
+endfunction
+
+function IsInc;
+input [47:0] isn;
+case(isn[`INSTRUCTION_OP])
+`MEMNDX:
+ if (isn[`INSTRUCTION_L2]==2'b00)
+ case({isn[31:28],isn[17:16]})
+ `INC: IsInc = TRUE;
+ default: IsInc = FALSE;
+ endcase
+ else
+ IsInc = FALSE;
+`INC: IsInc = TRUE;
+default: IsInc = FALSE;
+endcase
+endfunction
+
+function IsSWC;
+input [47:0] isn;
+case(isn[`INSTRUCTION_OP])
+`MEMNDX:
+ if (isn[`INSTRUCTION_L2]==2'b00)
+ case({isn[31:28],isn[17:16]})
+ `SWCX: IsSWC = TRUE;
+ default: IsSWC = FALSE;
+ endcase
+ else
+ IsSWC = FALSE;
+`SWC: IsSWC = TRUE;
+default: IsSWC = FALSE;
+endcase
+endfunction
+
+// Aquire / release bits are only available on indexed SWC / LWR
+function IsSWCX;
+input [47:0] isn;
+case(isn[`INSTRUCTION_OP])
+`MEMNDX:
+ if (isn[`INSTRUCTION_L2]==2'b00)
+ case({isn[31:28],isn[17:16]})
+ `SWCX: IsSWCX = TRUE;
+ default: IsSWCX = FALSE;
+ endcase
+ else
+ IsSWCX = FALSE;
+default: IsSWCX = FALSE;
+endcase
+endfunction
+
+function IsLWR;
+input [47:0] isn;
+case(isn[`INSTRUCTION_OP])
+`MEMNDX:
+ if (isn[`INSTRUCTION_L2]==2'b00)
+ case({isn[31:28],isn[22:21]})
+ `LWRX: IsLWR = TRUE;
+ default: IsLWR = FALSE;
+ endcase
+ else
+ IsLWR = FALSE;
+`LWR: IsLWR = TRUE;
+default: IsLWR = FALSE;
+endcase
+endfunction
+
+function IsLWRX;
+input [47:0] isn;
+case(isn[`INSTRUCTION_OP])
+`MEMNDX:
+ if (isn[`INSTRUCTION_L2]==2'b00)
+ case({isn[31:28],isn[22:21]})
+ `LWRX: IsLWRX = TRUE;
+ default: IsLWRX = FALSE;
+ endcase
+ else
+ IsLWRX = FALSE;
+default: IsLWRX = FALSE;
+endcase
+endfunction
+
+function IsCAS;
+input [47:0] isn;
+case(isn[`INSTRUCTION_OP])
+`MEMNDX:
+ if (isn[`INSTRUCTION_L2]==2'b00)
+ case({isn[31:28],isn[17:16]})
+ `CASX: IsCAS = TRUE;
+ default: IsCAS = FALSE;
+ endcase
+ else
+ IsCAS = FALSE;
+`CAS: IsCAS = TRUE;
+default: IsCAS = FALSE;
+endcase
+endfunction
+
+function IsAMO;
+input [47:0] isn;
+case(isn[`INSTRUCTION_OP])
+`AMO: IsAMO = TRUE;
+default: IsAMO = FALSE;
+endcase
+endfunction
+
+// Really IsPredictableBranch
+// Does not include BccR's
+function IsBranch;
+input [47:0] isn;
+casez(isn[`INSTRUCTION_OP])
+`Bcc: IsBranch = TRUE;
+`BBc: IsBranch = TRUE;
+`BEQI: IsBranch = TRUE;
+`CHK: IsBranch = TRUE;
+default: IsBranch = FALSE;
+endcase
+endfunction
+
+function IsWait;
+input [47:0] isn;
+IsWait = isn[`INSTRUCTION_OP]==`R2 && isn[`INSTRUCTION_L2]==2'b00 && isn[`INSTRUCTION_S2]==`WAIT;
+endfunction
+
+function IsCall;
+input [47:0] isn;
+IsCall = isn[`INSTRUCTION_OP]==`CALL && isn[7]==1'b0;
+endfunction
+
+function IsJmp;
+input [47:0] isn;
+IsJmp = isn[`INSTRUCTION_OP]==`JMP && isn[7]==1'b0;
+endfunction
+
+function IsFlowCtrl;
+input [47:0] isn;
+casez(isn[`INSTRUCTION_OP])
+`BRK: IsFlowCtrl = TRUE;
+`R2: case(isn[`INSTRUCTION_S2])
+ `RTI: IsFlowCtrl = TRUE;
+ default: IsFlowCtrl = FALSE;
+ endcase
+`Bcc: IsFlowCtrl = TRUE;
+`BBc: IsFlowCtrl = TRUE;
+`BEQI: IsFlowCtrl = TRUE;
+`CHK: IsFlowCtrl = TRUE;
+`JAL: IsFlowCtrl = TRUE;
+`JMP: IsFlowCtrl = TRUE;
+`CALL: IsFlowCtrl = TRUE;
+`RET: IsFlowCtrl = TRUE;
+default: IsFlowCtrl = FALSE;
+endcase
+endfunction
+
+function IsCache;
+input [47:0] isn;
+case(isn[`INSTRUCTION_OP])
+`MEMNDX:
+ if (isn[`INSTRUCTION_L2]==2'b00)
+ case({isn[31:28],isn[22:21]})
+ `CACHEX: IsCache = TRUE;
+ default: IsCache = FALSE;
+ endcase
+ else
+ IsCache = FALSE;
+`CACHE: IsCache = TRUE;
+default: IsCache = FALSE;
+endcase
+endfunction
+
+function [4:0] CacheCmd;
+input [47:0] isn;
+case(isn[`INSTRUCTION_OP])
+`MEMNDX:
+ if (isn[`INSTRUCTION_L2]==2'b00)
+ case({isn[31:28],isn[22:21]})
+ `CACHEX: CacheCmd = isn[17:13];
+ default: CacheCmd = 5'd0;
+ endcase
+ else
+ CacheCmd = 5'd0;
+`CACHE: CacheCmd = isn[15:11];
+default: CacheCmd = 5'd0;
+endcase
+endfunction
+
+function IsMemsb;
+input [47:0] isn;
+IsMemsb = (isn[`INSTRUCTION_OP]==`RR && isn[`INSTRUCTION_L2]==2'b00 && isn[`INSTRUCTION_S2]==`R1 && isn[22:18]==`MEMSB);
+endfunction
+
+function IsSEI;
+input [47:0] isn;
+IsSEI = (isn[`INSTRUCTION_OP]==`R2 && isn[`INSTRUCTION_L2]==2'b00 && isn[`INSTRUCTION_S2]==`SEI);
+endfunction
+
+function IsLV;
+input [47:0] isn;
+case(isn[`INSTRUCTION_OP])
+`MEMNDX:
+ if (isn[`INSTRUCTION_L2]==2'b00)
+ case({isn[31:28],isn[22:21]})
+ `LVX: IsLV = TRUE;
+ default: IsLV = FALSE;
+ endcase
+ else
+ IsLV = FALSE;
+`LV: IsLV = TRUE;
+default: IsLV = FALSE;
+endcase
+endfunction
+
+function IsRFW;
+input [47:0] isn;
+input [5:0] vqei;
+input [5:0] vli;
+input thrd;
+if (fnRt(isn,vqei,vli,thrd)==12'd0)
+ IsRFW = FALSE;
+else
+casez(isn[`INSTRUCTION_OP])
+`IVECTOR: IsRFW = TRUE;
+`FVECTOR: IsRFW = TRUE;
+`R2:
+ if (isn[`INSTRUCTION_L2]==2'b00)
+ casez(isn[`INSTRUCTION_S2])
+ `TLB: IsRFW = TRUE;
+ `R1:
+ case(isn[22:18])
+ `MEMDB,`MEMSB,`SYNC,`SETWB,5'h14,5'h15: IsRFW = FALSE;
+ default: IsRFW = TRUE;
+ endcase
+ `ADD: IsRFW = TRUE;
+ `SUB: IsRFW = TRUE;
+ `SLT: IsRFW = TRUE;
+ `SLTU: IsRFW = TRUE;
+ `SLE: IsRFW = TRUE;
+ `SLEU: IsRFW = TRUE;
+ `AND: IsRFW = TRUE;
+ `OR: IsRFW = TRUE;
+ `XOR: IsRFW = TRUE;
+ `NAND: IsRFW = TRUE;
+ `NOR: IsRFW = TRUE;
+ `XNOR: IsRFW = TRUE;
+ `MULU: IsRFW = TRUE;
+ `MULSU: IsRFW = TRUE;
+ `MUL: IsRFW = TRUE;
+ `MULUH: IsRFW = TRUE;
+ `MULSUH: IsRFW = TRUE;
+ `MULH: IsRFW = TRUE;
+ `MULF: IsRFW = TRUE;
+ `FXMUL: IsRFW = TRUE;
+ `DIVU: IsRFW = TRUE;
+ `DIVSU: IsRFW = TRUE;
+ `DIV:IsRFW = TRUE;
+ `MODU: IsRFW = TRUE;
+ `MODSU: IsRFW = TRUE;
+ `MOD:IsRFW = TRUE;
+ `MOV: IsRFW = TRUE;
+ `VMOV: IsRFW = TRUE;
+ `SHIFTR,`SHIFT31,`SHIFT63:
+ IsRFW = TRUE;
+ `MIN,`MAX: IsRFW = TRUE;
+ `SEI: IsRFW = TRUE;
+ default: IsRFW = FALSE;
+ endcase
+ else if (isn[`INSTRUCTION_L2]==2'b01)
+ case(isn[47:42])
+ `CMOVEZ: IsRFW = TRUE;
+ `CMOVNZ: IsRFW = TRUE;
+ default: IsRFW = FALSE;
+ endcase
+ else if (isn[7]==1'b1)
+ // The following instructions might come from a compressed version.
+ casez(isn[`INSTRUCTION_S2])
+ `ADD: IsRFW = TRUE;
+ `SUB: IsRFW = TRUE;
+ `AND: IsRFW = TRUE;
+ `OR: IsRFW = TRUE;
+ `XOR: IsRFW = TRUE;
+ `MOV: IsRFW = TRUE;
+ `SHIFTR,`SHIFT31,`SHIFT63:
+ IsRFW = TRUE;
+ default: IsRFW = FALSE;
+ endcase
+ else
+ IsRFW = FALSE;
+`MEMNDX:
+ if (isn[`INSTRUCTION_L2]==2'b10) begin
+ if (!isn[31])
+ IsRFW = TRUE;
+ else
+ case({isn[31:28],isn[17:16]})
+ `PUSH: IsRFW = TRUE;
+ `CASX: IsRFW = TRUE;
+ default: IsRFW = FALSE;
+ endcase
+ end
+ else if (isn[`INSTRUCTION_L2]==2'b00) begin
+ if (!isn[31])
+ case({isn[31:28],isn[22:21]})
+ `LBX: IsRFW = TRUE;
+ `LBUX: IsRFW = TRUE;
+ `LCX: IsRFW = TRUE;
+ `LCUX: IsRFW = TRUE;
+ `LHX: IsRFW = TRUE;
+ `LHUX: IsRFW = TRUE;
+ `LWX: IsRFW = TRUE;
+ `LVBX: IsRFW = TRUE;
+ `LVBUX: IsRFW = TRUE;
+ `LVCX: IsRFW = TRUE;
+ `LVCUX: IsRFW = TRUE;
+ `LVHX: IsRFW = TRUE;
+ `LVHUX: IsRFW = TRUE;
+ `LVWX: IsRFW = TRUE;
+ `LWRX: IsRFW = TRUE;
+ `LVX: IsRFW = TRUE;
+ default: IsRFW = FALSE;
+ endcase
+ else
+ case({isn[31:28],isn[17:16]})
+ `PUSH: IsRFW = TRUE;
+ `CASX: IsRFW = TRUE;
+ default: IsRFW = FALSE;
+ endcase
+ end
+ else
+ IsRFW = FALSE;
+`BBc: IsRFW = FALSE;
+`BITFIELD: IsRFW = TRUE;
+`ADDI: IsRFW = TRUE;
+`SLTI: IsRFW = TRUE;
+`SLTUI: IsRFW = TRUE;
+`SGTI: IsRFW = TRUE;
+`SGTUI: IsRFW = TRUE;
+`ANDI: IsRFW = TRUE;
+`ORI: IsRFW = TRUE;
+`XORI: IsRFW = TRUE;
+`XNORI: IsRFW = TRUE;
+`MULUI: IsRFW = TRUE;
+`MULI: IsRFW = TRUE;
+`MULFI: IsRFW = TRUE;
+`DIVUI: IsRFW = TRUE;
+`DIVI: IsRFW = TRUE;
+`MODI: IsRFW = TRUE;
+`JAL: IsRFW = TRUE;
+`CALL: IsRFW = TRUE;
+`RET: IsRFW = TRUE;
+`LB: IsRFW = TRUE;
+`LBU: IsRFW = TRUE;
+`Lx: IsRFW = TRUE;
+`LxU: IsRFW = TRUE;
+`LWR: IsRFW = TRUE;
+`LV: IsRFW = TRUE;
+`LVx: IsRFW = TRUE;
+`LVxU: IsRFW = TRUE;
+`CAS: IsRFW = TRUE;
+`AMO: IsRFW = TRUE;
+`CSRRW: IsRFW = TRUE;
+`AUIPC: IsRFW = TRUE;
+`LUI: IsRFW = TRUE;
+default: IsRFW = FALSE;
+endcase
+endfunction
+
+function IsShifti;
+input [47:0] isn;
+case(isn[`INSTRUCTION_OP])
+`R2:
+ if (isn[`INSTRUCTION_L2]==2'b00)
+ case(isn[`INSTRUCTION_S2])
+ `SHIFT31,`SHIFT63:
+ IsShifti = TRUE;
+ default: IsShifti = FALSE;
+ endcase
+ else
+ IsShifti = FALSE;
+default: IsShifti = FALSE;
+endcase
+endfunction
+
+function IsShift;
+input [47:0] isn;
+case(isn[`INSTRUCTION_OP])
+`R2:
+ if (isn[`INSTRUCTION_L2]==2'b00)
+ case(isn[31:26])
+ `SHIFTR: IsShift = TRUE;
+ `SHIFT31: IsShift = TRUE;
+ `SHIFT63: IsShift = TRUE;
+ default: IsShift = FALSE;
+ endcase
+ else
+ IsShift = FALSE;
+default: IsShift = FALSE;
+endcase
+endfunction
+
+function IsShift48;
+input [47:0] isn;
+case(isn[`INSTRUCTION_OP])
+`R2:
+ if (isn[`INSTRUCTION_L2]==2'b01)
+ case(isn[47:42])
+ `SHIFTR: IsShift48 = TRUE;
+ default: IsShift48 = FALSE;
+ endcase
+ else
+ IsShift48 = FALSE;
+default: IsShift48 = FALSE;
+endcase
+endfunction
+
+function IsRtop;
+input [47:0] isn;
+case(isn[`INSTRUCTION_OP])
+`R2:
+ if (isn[`INSTRUCTION_L2]==2'b01)
+ case(isn[47:42])
+ `RTOP: IsRtop = TRUE;
+ default: IsRtop = FALSE;
+ endcase
+ else
+ IsRtop = FALSE;
+default: IsRtop = FALSE;
+endcase
+endfunction
+
+function IsMul;
+input [47:0] isn;
+case(isn[`INSTRUCTION_OP])
+`R2:
+ if (isn[`INSTRUCTION_L2]==2'b00)
+ case(isn[`INSTRUCTION_S2])
+ `MULU,`MULSU,`MUL: IsMul = TRUE;
+ `MULUH,`MULSUH,`MULH: IsMul = TRUE;
+ default: IsMul = FALSE;
+ endcase
+ else
+ IsMul = FALSE;
+`MULUI,`MULI: IsMul = TRUE;
+default: IsMul = FALSE;
+endcase
+endfunction
+
+function IsDivmod;
+input [47:0] isn;
+case(isn[`INSTRUCTION_OP])
+`R2:
+ if (isn[`INSTRUCTION_L2]==2'b00)
+ case(isn[`INSTRUCTION_S2])
+ `DIVU,`DIVSU,`DIV: IsDivmod = TRUE;
+ `MODU,`MODSU,`MOD: IsDivmod = TRUE;
+ default: IsDivmod = FALSE;
+ endcase
+ else
+ IsDivmod = FALSE;
+`DIVUI,`DIVI,`MODI: IsDivmod = TRUE;
+default: IsDivmod = FALSE;
+endcase
+endfunction
+
+function IsExec;
+input [47:0] isn;
+case(isn[`INSTRUCTION_OP])
+`EXEC: IsExec = TRUE;
+default: IsExec = FALSE;
+endcase
+endfunction
+
+function [7:0] fnSelect;
+input [47:0] ins;
+input [`ABITS] adr;
+begin
+ case(ins[`INSTRUCTION_OP])
+ `MEMNDX:
+ if (ins[`INSTRUCTION_L2]==2'b10) begin
+ if (ins[31]) begin
+ case({ins[31:28],ins[17:16]})
+ `PUSH: fnSelect = 8'hFF;
+ default: fnSelect = 8'h00;
+ endcase
+ end
+ else
+ fnSelect = 8'h00;
+ end
+ else if (ins[`INSTRUCTION_L2]==2'b00) begin
+ if (!ins[31])
+ case({ins[31:28],ins[22:21]})
+ `LBX,`LBUX,`LVBX,`LVBUX:
+ case(adr[2:0])
+ 3'd0: fnSelect = 8'h01;
+ 3'd1: fnSelect = 8'h02;
+ 3'd2: fnSelect = 8'h04;
+ 3'd3: fnSelect = 8'h08;
+ 3'd4: fnSelect = 8'h10;
+ 3'd5: fnSelect = 8'h20;
+ 3'd6: fnSelect = 8'h40;
+ 3'd7: fnSelect = 8'h80;
+ endcase
+ `LCX,`LCUX,`LVCX,`LVCUX:
+ case(adr[2:1])
+ 2'd0: fnSelect = 8'h03;
+ 2'd1: fnSelect = 8'h0C;
+ 2'd2: fnSelect = 8'h30;
+ 2'd3: fnSelect = 8'hC0;
+ endcase
+ `LHX,`LHUX,`LVHX,`LVHUX:
+ case(adr[2])
+ 1'b0: fnSelect = 8'h0F;
+ 1'b1: fnSelect = 8'hF0;
+ endcase
+ `INC,`LVWX,
+ `LWX,`LWRX,`LVX:
+ fnSelect = 8'hFF;
+ default:fnSelect = 8'hFF;
+ endcase
+ else
+ case({ins[31:28],ins[17:16]})
+ `SBX:
+ case(adr[2:0])
+ 3'd0: fnSelect = 8'h01;
+ 3'd1: fnSelect = 8'h02;
+ 3'd2: fnSelect = 8'h04;
+ 3'd3: fnSelect = 8'h08;
+ 3'd4: fnSelect = 8'h10;
+ 3'd5: fnSelect = 8'h20;
+ 3'd6: fnSelect = 8'h40;
+ 3'd7: fnSelect = 8'h80;
+ endcase
+ `SCX:
+ case(adr[2:1])
+ 2'd0: fnSelect = 8'h03;
+ 2'd1: fnSelect = 8'h0C;
+ 2'd2: fnSelect = 8'h30;
+ 2'd3: fnSelect = 8'hC0;
+ endcase
+ `SHX:
+ case(adr[2])
+ 1'b0: fnSelect = 8'h0F;
+ 1'b1: fnSelect = 8'hF0;
+ endcase
+ `INC,
+ `SWX,`SWCX,`SVX,`CASX,`PUSH:
+ fnSelect = 8'hFF;
+ default: fnSelect = 8'h00;
+ endcase
+ end
+ else
+ fnSelect = 8'h00;
+ `LB,`LBU,`SB:
+ case(adr[2:0])
+ 3'd0: fnSelect = 8'h01;
+ 3'd1: fnSelect = 8'h02;
+ 3'd2: fnSelect = 8'h04;
+ 3'd3: fnSelect = 8'h08;
+ 3'd4: fnSelect = 8'h10;
+ 3'd5: fnSelect = 8'h20;
+ 3'd6: fnSelect = 8'h40;
+ 3'd7: fnSelect = 8'h80;
+ endcase
+ `Lx,`LxU,`LVx,`LVxU:
+ casez(ins[20:18])
+ 3'b100: fnSelect = 8'hFF;
+ 3'b?10: fnSelect = adr[2] ? 8'hF0 : 8'h0F;
+ 3'b??1:
+ case(adr[2:1])
+ 2'd0: fnSelect = 8'h03;
+ 2'd1: fnSelect = 8'h0C;
+ 2'd2: fnSelect = 8'h30;
+ 2'd3: fnSelect = 8'hC0;
+ endcase
+ default: fnSelect = 8'h00;
+ endcase
+ `Sx:
+ casez(ins[15:13])
+ 3'b100: fnSelect = 8'hFF;
+ 3'b?10: fnSelect = adr[2] ? 8'hF0 : 8'h0F;
+ 3'b??1:
+ case(adr[2:1])
+ 2'd0: fnSelect = 8'h03;
+ 2'd1: fnSelect = 8'h0C;
+ 2'd2: fnSelect = 8'h30;
+ 2'd3: fnSelect = 8'hC0;
+ endcase
+ default: fnSelect = 8'h00;
+ endcase
+ `INC,
+ `LWR,`SWC,`CAS: fnSelect = 8'hFF;
+ `LV,`SV: fnSelect = 8'hFF;
+ `AMO:
+ case(ins[23:21])
+ 3'd0: fnSelect = {8'h01 << adr[2:0]};
+ 3'd1: fnSelect = {8'h03 << {adr[2:1],1'b0}};
+ 3'd2: fnSelect = {8'h0F << {adr[2],2'b00}};
+ 3'd3: fnSelect = 8'hFF;
+ default: fnSelect = 8'hFF;
+ endcase
+ default: fnSelect = 8'h00;
+ endcase
+end
+endfunction
+/*
+function [63:0] fnDatc;
+input [47:0] ins;
+input [63:0] dat;
+case(ins[`INSTRUCTION_OP])
+`R2:
+ if (isn[`INSTRUCTION_L2]==2'b01)
+ case(ins[47:42])
+ `FINDB: fnDatc = dat[7:0];
+ `FINDC: fnDatc = dat[15:0];
+ `FINDH: fnDatc = dat[31:0];
+ `FINDW: fnDatc = dat[63:0];
+ default: fnDatc = dat[63:0];
+ endcase
+ else
+ fnDatc = dat[63:0];
+default: fnDatc = dat[63:0];
+endcase
+endfunction
+*/
+/*
+function [63:0] fnMemInc;
+input [47:0] ins;
+case(ins[`INSTRUCTION_OP])
+`R2:
+ if (isn[`INSTRUCTION_L2]==2'b01)
+ case(ins[47:42])
+ `FINDB: fnMemInc = 32'd1;
+ `FINDC: fnMemInc = 32'd2;
+ `FINDH: fnMemInc = 32'd4;
+ `FINDW: fnMemInc = 32'd8;
+ default: fnMemInc = 32'd8;
+ endcase
+ else
+ fnMemInc = 32'd8;
+default: fnMemInc = 32'd8;
+endcase
+endfunction
+*/
+function [63:0] fnDatiAlign;
+input [47:0] ins;
+input [`ABITS] adr;
+input [63:0] dat;
+case(ins[`INSTRUCTION_OP])
+`MEMNDX:
+ if (ins[`INSTRUCTION_L2]==2'b00)
+ case({ins[31:28],ins[22:21]})
+ `LBX,`LVBX:
+ case(adr[2:0])
+ 3'd0: fnDatiAlign = {{56{dat[7]}},dat[7:0]};
+ 3'd1: fnDatiAlign = {{56{dat[15]}},dat[15:8]};
+ 3'd2: fnDatiAlign = {{56{dat[23]}},dat[23:16]};
+ 3'd3: fnDatiAlign = {{56{dat[31]}},dat[31:24]};
+ 3'd4: fnDatiAlign = {{56{dat[39]}},dat[39:32]};
+ 3'd5: fnDatiAlign = {{56{dat[47]}},dat[47:40]};
+ 3'd6: fnDatiAlign = {{56{dat[55]}},dat[55:48]};
+ 3'd7: fnDatiAlign = {{56{dat[63]}},dat[63:56]};
+ endcase
+ `LBUX,`LVBUX:
+ case(adr[2:0])
+ 3'd0: fnDatiAlign = {{56{1'b0}},dat[7:0]};
+ 3'd1: fnDatiAlign = {{56{1'b0}},dat[15:8]};
+ 3'd2: fnDatiAlign = {{56{1'b0}},dat[23:16]};
+ 3'd3: fnDatiAlign = {{56{1'b0}},dat[31:24]};
+ 3'd4: fnDatiAlign = {{56{1'b0}},dat[39:32]};
+ 3'd5: fnDatiAlign = {{56{1'b0}},dat[47:40]};
+ 3'd6: fnDatiAlign = {{56{1'b0}},dat[55:48]};
+ 3'd7: fnDatiAlign = {{56{2'b0}},dat[63:56]};
+ endcase
+ `LCX,`LVCX:
+ case(adr[2:1])
+ 2'd0: fnDatiAlign = {{48{dat[15]}},dat[15:0]};
+ 2'd1: fnDatiAlign = {{48{dat[31]}},dat[31:16]};
+ 2'd2: fnDatiAlign = {{48{dat[47]}},dat[47:32]};
+ 2'd3: fnDatiAlign = {{48{dat[63]}},dat[63:48]};
+ endcase
+ `LCUX,`LVCUX:
+ case(adr[2:1])
+ 2'd0: fnDatiAlign = {{48{1'b0}},dat[15:0]};
+ 2'd1: fnDatiAlign = {{48{1'b0}},dat[31:16]};
+ 2'd2: fnDatiAlign = {{48{1'b0}},dat[47:32]};
+ 2'd3: fnDatiAlign = {{48{1'b0}},dat[63:48]};
+ endcase
+ `LHX,`LVHX:
+ case(adr[2])
+ 1'b0: fnDatiAlign = {{32{dat[31]}},dat[31:0]};
+ 1'b1: fnDatiAlign = {{32{dat[63]}},dat[63:32]};
+ endcase
+ `LHUX,`LVHUX:
+ case(adr[2])
+ 1'b0: fnDatiAlign = {{32{1'b0}},dat[31:0]};
+ 1'b1: fnDatiAlign = {{32{1'b0}},dat[63:32]};
+ endcase
+ `LWX,`LWRX,`LVX,`CAS,`LVWX: fnDatiAlign = dat;
+ default: fnDatiAlign = dat;
+ endcase
+ else
+ fnDatiAlign = dat;
+`LB:
+ case(adr[2:0])
+ 3'd0: fnDatiAlign = {{56{dat[7]}},dat[7:0]};
+ 3'd1: fnDatiAlign = {{56{dat[15]}},dat[15:8]};
+ 3'd2: fnDatiAlign = {{56{dat[23]}},dat[23:16]};
+ 3'd3: fnDatiAlign = {{56{dat[31]}},dat[31:24]};
+ 3'd4: fnDatiAlign = {{56{dat[39]}},dat[39:32]};
+ 3'd5: fnDatiAlign = {{56{dat[47]}},dat[47:40]};
+ 3'd6: fnDatiAlign = {{56{dat[55]}},dat[55:48]};
+ 3'd7: fnDatiAlign = {{56{dat[63]}},dat[63:56]};
+ endcase
+`LBU:
+ case(adr[2:0])
+ 3'd0: fnDatiAlign = {{56{1'b0}},dat[7:0]};
+ 3'd1: fnDatiAlign = {{56{1'b0}},dat[15:8]};
+ 3'd2: fnDatiAlign = {{56{1'b0}},dat[23:16]};
+ 3'd3: fnDatiAlign = {{56{1'b0}},dat[31:24]};
+ 3'd4: fnDatiAlign = {{56{1'b0}},dat[39:32]};
+ 3'd5: fnDatiAlign = {{56{1'b0}},dat[47:40]};
+ 3'd6: fnDatiAlign = {{56{1'b0}},dat[55:48]};
+ 3'd7: fnDatiAlign = {{56{2'b0}},dat[63:56]};
+ endcase
+`Lx,`LVx:
+ casez(ins[20:18])
+ 3'b100: fnDatiAlign = dat;
+ 3'b?10:
+ case(adr[2])
+ 1'b0: fnDatiAlign = {{32{dat[31]}},dat[31:0]};
+ 1'b1: fnDatiAlign = {{32{dat[63]}},dat[63:32]};
+ endcase
+ 3'b??1:
+ case(adr[2:1])
+ 2'd0: fnDatiAlign = {{48{dat[15]}},dat[15:0]};
+ 2'd1: fnDatiAlign = {{48{dat[31]}},dat[31:16]};
+ 2'd2: fnDatiAlign = {{48{dat[47]}},dat[47:32]};
+ 2'd3: fnDatiAlign = {{48{dat[63]}},dat[63:48]};
+ endcase
+ default: fnDatiAlign = dat;
+ endcase
+`LxU,`LVxU:
+ casez(ins[20:18])
+ 3'b100: fnDatiAlign = dat;
+ 3'b?10:
+ case(adr[2])
+ 1'b0: fnDatiAlign = {{32{1'b0}},dat[31:0]};
+ 1'b1: fnDatiAlign = {{32{1'b0}},dat[63:32]};
+ endcase
+ 3'b??1:
+ case(adr[2:1])
+ 2'd0: fnDatiAlign = {{48{1'b0}},dat[15:0]};
+ 2'd1: fnDatiAlign = {{48{1'b0}},dat[31:16]};
+ 2'd2: fnDatiAlign = {{48{1'b0}},dat[47:32]};
+ 2'd3: fnDatiAlign = {{48{1'b0}},dat[63:48]};
+ endcase
+ default: fnDatiAlign = dat;
+ endcase
+`LWR,`LV,`CAS,`AMO: fnDatiAlign = dat;
+default: fnDatiAlign = dat;
+endcase
+endfunction
+
+function [63:0] fnDato;
+input [47:0] isn;
+input [63:0] dat;
+case(isn[`INSTRUCTION_OP])
+`MEMNDX:
+ if (isn[`INSTRUCTION_L2]==2'b00)
+ case({isn[31:28],isn[17:16]})
+ `SBX: fnDato = {8{dat[7:0]}};
+ `SCX: fnDato = {4{dat[15:0]}};
+ `SHX: fnDato = {2{dat[31:0]}};
+ default: fnDato = dat;
+ endcase
+ else
+ fnDato = dat;
+`SB: fnDato = {8{dat[7:0]}};
+`Sx:
+ casez(isn[15:13])
+ 3'b100: fnDato = dat;
+ 3'b?10: fnDato = {2{dat[31:0]}};
+ 3'b??1: fnDato = {4{dat[15:0]}};
+ default: fnDato = dat;
+ endcase
+`AMO:
+ case(isn[23:21])
+ 3'd0: fnDato = {8{dat[7:0]}};
+ 3'd1: fnDato = {4{dat[15:0]}};
+ 3'd2: fnDato = {2{dat[31:0]}};
+ 3'd3: fnDato = dat;
+ default: fnDato = dat;
+ endcase
+default: fnDato = dat;
+endcase
+endfunction
+
+function IsTLB;
+input [47:0] isn;
+case(isn[`INSTRUCTION_OP])
+`R2:
+ case(isn[`INSTRUCTION_S2])
+ `TLB: IsTLB = TRUE;
+ default: IsTLB = FALSE;
+ endcase
+default: IsTLB = FALSE;
+endcase
+endfunction
+
+// Indicate if the ALU instruction is valid immediately (single cycle operation)
+function IsSingleCycle;
+input [47:0] isn;
+IsSingleCycle = !(IsMul(isn)|IsDivmod(isn)|IsTLB(isn)|IsShift48(isn));
+endfunction
+
+
+generate begin : gDecocderInst
+for (g = 0; g < QENTRIES; g = g + 1) begin
+`ifdef SUPPORT_SMT
+decoder8 iq0(.num({iqentry_tgt[g][8:7],iqentry_tgt[g][5:0]}), .out(iq_out[g]));
+`else
+decoder7 iq0(.num({iqentry_tgt[g][7],iqentry_tgt[g][5:0]}), .out(iq_out[g]));
+`endif
+end
+end
+endgenerate
+
+initial begin: Init
+ //
+ //
+ // set up panic messages
+ message[ `PANIC_NONE ] = "NONE ";
+ message[ `PANIC_FETCHBUFBEQ ] = "FETCHBUFBEQ ";
+ message[ `PANIC_INVALIDISLOT ] = "INVALIDISLOT ";
+ message[ `PANIC_IDENTICALDRAMS ] = "IDENTICALDRAMS ";
+ message[ `PANIC_OVERRUN ] = "OVERRUN ";
+ message[ `PANIC_HALTINSTRUCTION ] = "HALTINSTRUCTION ";
+ message[ `PANIC_INVALIDMEMOP ] = "INVALIDMEMOP ";
+ message[ `PANIC_INVALIDFBSTATE ] = "INVALIDFBSTATE ";
+ message[ `PANIC_INVALIDIQSTATE ] = "INVALIDIQSTATE ";
+ message[ `PANIC_BRANCHBACK ] = "BRANCHBACK ";
+ message[ `PANIC_MEMORYRACE ] = "MEMORYRACE ";
+ message[ `PANIC_ALU0ONLY ] = "ALU0 Only ";
+
+ for (n = 0; n < 64; n = n + 1)
+ codebuf[n] <= 48'h0;
+
+end
+
+// ---------------------------------------------------------------------------
+// FETCH
+// ---------------------------------------------------------------------------
+//
+assign fetchbuf0_mem = IsMem(fetchbuf0_instr);// & IsLoad(fetchbuf0_instr);
+assign fetchbuf0_rfw = IsRFW(fetchbuf0_instr,vqe0,vl,fetchbuf0_thrd);
+`ifdef SUPPORT_PREDICATION
+assign fetchbuf0_prfw = IsCmp(fetchbuf0_instr);
+`endif
+
+generate begin: gFetchbufDec
+if (`WAYS > 1) begin
+assign fetchbuf1_mem = IsMem(fetchbuf1_instr);// & IsLoad(fetchbuf1_instr);
+assign fetchbuf1_rfw = IsRFW(fetchbuf1_instr,vqe1,vl,fetchbuf1_thrd);
+`ifdef SUPPORT_PREDICATION
+assign fetchbuf1_prfw = IsCmp(fetchbuf1_instr);
+`endif
+end
+if (`WAYS > 2) begin
+assign fetchbuf2_mem = IsMem(fetchbuf2_instr);// & IsLoad(fetchbuf2_instr);
+assign fetchbuf2_rfw = IsRFW(fetchbuf2_instr,vqe2,vl,fetchbuf2_thrd);
+`ifdef SUPPORT_PREDICATION
+assign fetchbuf2_prfw = IsCmp(fetchbuf2_instr);
+`endif
+end
+end
+endgenerate
+
+generate begin : gFetchbufInst
+if (`WAYS > 2) begin : gb1
+FT64_fetchbuf_x3 #(AMSB,RSTPC) ufb1
+(
+ .rst(rst),
+ .clk4x(clk4x),
+ .clk(clk),
+ .fcu_clk(fcu_clk),
+ .cs_i(vadr[31:16]==16'hFFFF),
+ .cyc_i(cyc),
+ .stb_i(stb_o),
+ .ack_o(dc_ack),
+ .we_i(we),
+ .adr_i(vadr[15:0]),
+ .dat_i(dat_o[47:0]),
+ .cmpgrp(cr0[10:8]),
+ .freezePC(freezePC),
+ .regLR(regLR),
+ .thread_en(thread_en),
+ .insn0(insn0),
+ .insn1(insn1),
+ .insn1(insn2),
+ .phit(phit),
+ .threadx(threadx),
+ .branchmiss(branchmiss),
+ .misspc(misspc),
+ .branchmiss_thrd(branchmiss_thrd),
+ .predict_takenA(predict_takenA),
+ .predict_takenB(predict_takenB),
+ .predict_takenC(predict_takenC),
+ .predict_takenD(predict_takenD),
+ .predict_takenE(predict_takenE),
+ .predict_takenF(predict_takenF),
+ .predict_taken0(predict_taken0),
+ .predict_taken1(predict_taken1),
+ .predict_taken2(predict_taken2),
+ .queued1(queued1),
+ .queued2(queued2),
+ .queued2(queued3),
+ .queuedNop(queuedNop),
+ .pc0(pc0a),
+ .pc1(pc1a),
+ .fetchbuf(fetchbuf),
+ .fetchbufA_v(fetchbufA_v),
+ .fetchbufB_v(fetchbufB_v),
+ .fetchbufC_v(fetchbufC_v),
+ .fetchbufD_v(fetchbufD_v),
+ .fetchbufD_v(fetchbufE_v),
+ .fetchbufD_v(fetchbufF_v),
+ .fetchbufA_pc(fetchbufA_pc),
+ .fetchbufB_pc(fetchbufB_pc),
+ .fetchbufC_pc(fetchbufC_pc),
+ .fetchbufD_pc(fetchbufD_pc),
+ .fetchbufD_pc(fetchbufE_pc),
+ .fetchbufD_pc(fetchbufF_pc),
+ .fetchbufA_instr(fetchbufA_instr),
+ .fetchbufB_instr(fetchbufB_instr),
+ .fetchbufC_instr(fetchbufC_instr),
+ .fetchbufD_instr(fetchbufD_instr),
+ .fetchbufE_instr(fetchbufE_instr),
+ .fetchbufF_instr(fetchbufF_instr),
+ .fetchbuf0_instr(fetchbuf0_instr),
+ .fetchbuf1_instr(fetchbuf1_instr),
+ .fetchbuf0_thrd(fetchbuf0_thrd),
+ .fetchbuf1_thrd(fetchbuf1_thrd),
+ .fetchbuf2_thrd(fetchbuf2_thrd),
+ .fetchbuf0_pc(fetchbuf0_pc),
+ .fetchbuf1_pc(fetchbuf1_pc),
+ .fetchbuf2_pc(fetchbuf2_pc),
+ .fetchbuf0_v(fetchbuf0_v),
+ .fetchbuf1_v(fetchbuf1_v),
+ .fetchbuf2_v(fetchbuf2_v),
+ .fetchbuf0_insln(fetchbuf0_insln),
+ .fetchbuf1_insln(fetchbuf1_insln),
+ .fetchbuf2_insln(fetchbuf2_insln),
+ .codebuf0(codebuf[insn0[21:16]]),
+ .codebuf1(codebuf[insn1[21:16]]),
+ .codebuf2(codebuf[insn2[21:16]]),
+ .btgtA(btgtA),
+ .btgtB(btgtB),
+ .btgtC(btgtC),
+ .btgtD(btgtD),
+ .btgtE(btgtE),
+ .btgtF(btgtF),
+ .nop_fetchbuf(nop_fetchbuf),
+ .take_branch0(take_branch0),
+ .take_branch1(take_branch1),
+ .take_branch2(take_branch2),
+ .stompedRets(stompedOnRets),
+ .pred_on(pred_on),
+ .panic(fb_panic)
+);
+end
+else if (`WAYS > 1) begin : gb1
+FT64_fetchbuf #(AMSB,RSTPC) ufb1
+(
+ .rst(rst),
+ .clk4x(clk4x),
+ .clk(clk),
+ .fcu_clk(fcu_clk),
+ .cs_i(vadr[31:16]==16'hFFFF),
+ .cyc_i(cyc),
+ .stb_i(stb_o),
+ .ack_o(dc_ack),
+ .we_i(we),
+ .adr_i(vadr[15:0]),
+ .dat_i(dat_o[47:0]),
+ .cmpgrp(cr0[10:8]),
+ .freezePC(freezePC),
+ .regLR(regLR),
+ .thread_en(thread_en),
+ .insn0(insn0),
+ .insn1(insn1),
+ .phit(phit),
+ .threadx(threadx),
+ .branchmiss(branchmiss),
+ .misspc(misspc),
+ .branchmiss_thrd(branchmiss_thrd),
+ .predict_takenA(predict_takenA),
+ .predict_takenB(predict_takenB),
+ .predict_takenC(predict_takenC),
+ .predict_takenD(predict_takenD),
+ .predict_taken0(predict_taken0),
+ .predict_taken1(predict_taken1),
+ .queued1(queued1),
+ .queued2(queued2),
+ .queuedNop(queuedNop),
+ .pc0(pc0a),
+ .pc1(pc1a),
+ .fetchbuf(fetchbuf),
+ .fetchbufA_v(fetchbufA_v),
+ .fetchbufB_v(fetchbufB_v),
+ .fetchbufC_v(fetchbufC_v),
+ .fetchbufD_v(fetchbufD_v),
+ .fetchbufA_pc(fetchbufA_pc),
+ .fetchbufB_pc(fetchbufB_pc),
+ .fetchbufC_pc(fetchbufC_pc),
+ .fetchbufD_pc(fetchbufD_pc),
+ .fetchbufA_instr(fetchbufA_instr),
+ .fetchbufB_instr(fetchbufB_instr),
+ .fetchbufC_instr(fetchbufC_instr),
+ .fetchbufD_instr(fetchbufD_instr),
+ .fetchbuf0_instr(fetchbuf0_instr),
+ .fetchbuf1_instr(fetchbuf1_instr),
+ .fetchbuf0_thrd(fetchbuf0_thrd),
+ .fetchbuf1_thrd(fetchbuf1_thrd),
+ .fetchbuf0_pc(fetchbuf0_pc),
+ .fetchbuf1_pc(fetchbuf1_pc),
+ .fetchbuf0_v(fetchbuf0_v),
+ .fetchbuf1_v(fetchbuf1_v),
+ .fetchbuf0_insln(fetchbuf0_insln),
+ .fetchbuf1_insln(fetchbuf1_insln),
+ .codebuf0(codebuf[insn0[21:16]]),
+ .codebuf1(codebuf[insn1[21:16]]),
+ .btgtA(btgtA),
+ .btgtB(btgtB),
+ .btgtC(btgtC),
+ .btgtD(btgtD),
+ .nop_fetchbuf(nop_fetchbuf),
+ .take_branch0(take_branch0),
+ .take_branch1(take_branch1),
+ .stompedRets(stompedOnRets),
+ .pred_on(pred_on),
+ .panic(fb_panic)
+);
+end
+else begin : gb1
+FT64_fetchbuf_x1 #(AMSB,RSTPC) ufb1
+(
+ .rst(rst),
+ .clk4x(clk4x),
+ .clk(clk),
+ .fcu_clk(fcu_clk),
+ .cs_i(vadr[31:16]==16'hFFFF),
+ .cyc_i(cyc),
+ .stb_i(stb_o),
+ .ack_o(dc_ack),
+ .we_i(we),
+ .adr_i(vadr[15:0]),
+ .dat_i(dat_o[47:0]),
+ .cmpgrp(cr0[10:8]),
+ .freezePC(freezePC),
+ .regLR(regLR),
+ .thread_en(thread_en),
+ .insn0(insn0),
+ .phit(phit),
+ .threadx(threadx),
+ .branchmiss(branchmiss),
+ .misspc(misspc),
+ .branchmiss_thrd(branchmiss_thrd),
+ .predict_takenA(predict_takenA),
+ .predict_takenB(predict_takenB),
+ .predict_taken0(predict_taken0),
+ .queued1(queued1),
+ .queuedNop(queuedNop),
+ .pc0(pc0a),
+ .fetchbuf(fetchbuf),
+ .fetchbufA_v(fetchbufA_v),
+ .fetchbufB_v(fetchbufB_v),
+ .fetchbufA_pc(fetchbufA_pc),
+ .fetchbufB_pc(fetchbufB_pc),
+ .fetchbufA_instr(fetchbufA_instr),
+ .fetchbufB_instr(fetchbufB_instr),
+ .fetchbuf0_instr(fetchbuf0_instr),
+ .fetchbuf0_thrd(fetchbuf0_thrd),
+ .fetchbuf0_pc(fetchbuf0_pc),
+ .fetchbuf0_v(fetchbuf0_v),
+ .fetchbuf0_insln(fetchbuf0_insln),
+ .fetchbuf0_pbyte(fetchbuf0_pbyte),
+ .codebuf0(codebuf[insn0[21:16]]),
+ .btgtA(btgtA),
+ .btgtB(btgtB),
+ .nop_fetchbuf(nop_fetchbuf),
+ .take_branch0(take_branch0),
+ .stompedRets(stompedOnRets),
+ .pred_on(pred_on),
+ .panic(fb_panic)
+);
+assign fetchbuf1_v = `INV;
+end
+end
+endgenerate
+
+wire cmt_head1 = (!iqentry_rfw[heads[1]] && !iqentry_oddball[heads[1]] && ~|iqentry_exc[heads[1]]);
+wire cmt_head2 = (!iqentry_rfw[heads[2]] && !iqentry_oddball[heads[2]] && ~|iqentry_exc[heads[2]]);
+
+// Determine the head increment amount, this must match code later on.
+reg [2:0] hi_amt;
+always @*
+begin
+ hi_amt <= 4'd0;
+ casez ({ iqentry_v[heads[0]],
+ iqentry_state[heads[0]]==IQS_CMT,
+ iqentry_v[heads[1]],
+ iqentry_state[heads[1]]==IQS_CMT,
+ iqentry_v[heads[2]],
+ iqentry_state[heads[2]]==IQS_CMT})
+
+ // retire 3
+ 6'b0?_0?_0?:
+ if (heads[0] != tail0 && heads[1] != tail0 && heads[2] != tail0)
+ hi_amt <= 3'd3;
+ else if (heads[0] != tail0 && heads[1] != tail0)
+ hi_amt <= 3'd2;
+ else if (heads[0] != tail0)
+ hi_amt <= 3'd1;
+ 6'b0?_0?_10:
+ if (heads[0] != tail0 && heads[1] != tail0)
+ hi_amt <= 3'd2;
+ else if (heads[0] != tail0)
+ hi_amt <= 3'd1;
+ else
+ hi_amt <= 3'd0;
+ 6'b0?_0?_11:
+ if (`NUM_CMT > 2 || cmt_head2)
+ hi_amt <= 3'd3;
+ else
+ hi_amt <= 3'd2;
+
+ // retire 1 (wait for regfile for heads[1])
+ 6'b0?_10_??:
+ hi_amt <= 3'd1;
+
+ // retire 2
+ 6'b0?_11_0?,
+ 6'b0?_11_10:
+ if (`NUM_CMT > 1 || cmt_head1)
+ hi_amt <= 3'd2;
+ else
+ hi_amt <= 3'd1;
+ 6'b0?_11_11:
+ if (`NUM_CMT > 2 || (`NUM_CMT > 1 && cmt_head2))
+ hi_amt <= 3'd3;
+ else if (`NUM_CMT > 1 || cmt_head1)
+ hi_amt <= 3'd2;
+ else
+ hi_amt <= 3'd1;
+ 6'b10_??_??: ;
+ 6'b11_0?_0?:
+ if (heads[1] != tail0 && heads[2] != tail0)
+ hi_amt <= 3'd3;
+ else if (heads[1] != tail0)
+ hi_amt <= 3'd2;
+ else
+ hi_amt <= 3'd1;
+ 6'b11_0?_10:
+ if (heads[1] != tail0)
+ hi_amt <= 3'd2;
+ else
+ hi_amt <= 3'd1;
+ 6'b11_0?_11:
+ if (heads[1] != tail0) begin
+ if (`NUM_CMT > 2 || cmt_head2)
+ hi_amt <= 3'd3;
+ else
+ hi_amt <= 3'd2;
+ end
+ else
+ hi_amt <= 3'd1;
+ 6'b11_10_??:
+ hi_amt <= 3'd1;
+ 6'b11_11_0?:
+ if (`NUM_CMT > 1 && heads[2] != tail0)
+ hi_amt <= 3'd3;
+ else if (cmt_head1 && heads[2] != tail0)
+ hi_amt <= 3'd3;
+ else if (`NUM_CMT > 1 || cmt_head1)
+ hi_amt <= 3'd2;
+ else
+ hi_amt <= 3'd1;
+ 6'b11_11_10:
+ if (`NUM_CMT > 1 || cmt_head1)
+ hi_amt <= 3'd2;
+ else
+ hi_amt <= 3'd1;
+ 6'b11_11_11:
+ if (`NUM_CMT > 2 || (`NUM_CMT > 1 && cmt_head2))
+ hi_amt <= 3'd3;
+ else if (`NUM_CMT > 1 || cmt_head1)
+ hi_amt <= 3'd2;
+ else
+ hi_amt <= 3'd1;
+ default:
+ begin
+ hi_amt <= 3'd0;
+ $display("hi_amt: Uncoded case %h",{ iqentry_v[heads[0]],
+ iqentry_state[heads[0]],
+ iqentry_v[heads[1]],
+ iqentry_state[heads[1]],
+ iqentry_v[heads[2]],
+ iqentry_state[heads[2]]});
+ end
+ endcase
+end
+
+// Amount subtracted from sequence numbers
+reg [`SNBITS] tosub;
+always @*
+case(hi_amt)
+3'd3: tosub <= (iqentry_v[heads[2]] ? iqentry_sn[heads[2]]
+ : iqentry_v[heads[1]] ? iqentry_sn[heads[1]]
+ : iqentry_v[heads[0]] ? iqentry_sn[heads[0]]
+ : 4'b0);
+3'd2: tosub <= (iqentry_v[heads[1]] ? iqentry_sn[heads[1]]
+ : iqentry_v[heads[0]] ? iqentry_sn[heads[0]]
+ : 4'b0);
+3'd1: tosub <= (iqentry_v[heads[0]] ? iqentry_sn[heads[0]]
+ : 4'b0);
+default: tosub <= 4'd0;
+endcase
+
+//initial begin: stop_at
+//#1000000; panic <= `PANIC_OVERRUN;
+//end
+
+//
+// BRANCH-MISS LOGIC: livetarget
+//
+// livetarget implies that there is a not-to-be-stomped instruction that targets the register in question
+// therefore, if it is zero it implies the rf_v value should become VALID on a branchmiss
+//
+
+always @*
+for (j = 1; j < PREGS; j = j + 1) begin
+ livetarget[j] = 1'b0;
+ for (n = 0; n < QENTRIES; n = n + 1)
+ livetarget[j] = livetarget[j] | iqentry_livetarget[n][j];
+end
+
+always @*
+ for (n = 0; n < QENTRIES; n = n + 1)
+`ifdef SUPPORT_PREDICATION
+ iqentry_livetarget[n] = {PREGS {iqentry_v[n]}} & {PREGS {~iqentry_stomp[n] && iqentry_thrd[n]==branchmiss_thrd}} & iq_out[n] & ~{PREGS{iqentry_cmp[n]}};
+`else
+ iqentry_livetarget[n] = {PREGS {iqentry_v[n]}} & {PREGS {~iqentry_stomp[n] && iqentry_thrd[n]==branchmiss_thrd}} & iq_out[n];
+`endif
+
+`ifdef SUPPORT_PREDICATION
+always @*
+for (j = 1; j < 16; j = j + 1) begin
+ plivetarget[j] = 1'b0;
+ for (n = 0; n < QENTRIES; n = n + 1)
+ plivetarget[j] = plivetarget[j] | iqentry_plivetarget[n][j];
+end
+
+always @*
+ for (n = 0; n < QENTRIES; n = n + 1)
+ iqentry_plivetarget[n] = {16 {iqentry_v[n]}} & {16 {~iqentry_stomp[n] && iqentry_thrd[n]==branchmiss_thrd}} & iq_out[n] & {16{iqentry_cmp[n]}};
+`endif
+
+//
+// BRANCH-MISS LOGIC: latestID
+//
+// latestID is the instruction queue ID of the newest instruction (latest) that targets
+// a particular register. looks a lot like scheduling logic, but in reverse.
+//
+always @*
+ for (n = 0; n < QENTRIES; n = n + 1) begin
+ iqentry_cumulative[n] = 1'b0;
+ for (j = n; j < n + QENTRIES; j = j + 1) begin
+ if (missid==(j % QENTRIES))
+ for (k = n; k <= j; k = k + 1)
+ iqentry_cumulative[n] = iqentry_cumulative[n] | iqentry_livetarget[k % QENTRIES];
+ end
+ end
+
+always @*
+ for (n = 0; n < QENTRIES; n = n + 1)
+ iqentry_latestID[n] = (missid == n || ((iqentry_livetarget[n] & iqentry_cumulative[(n+1)%QENTRIES]) == {PREGS{1'b0}}))
+ ? iqentry_livetarget[n]
+ : {PREGS{1'b0}};
+
+always @*
+ for (n = 0; n < QENTRIES; n = n + 1)
+ iqentry_source[n] = | iqentry_latestID[n];
+
+`ifdef SUPPORT_PREDICATION
+always @*
+ for (n = 0; n < QENTRIES; n = n + 1) begin
+ iqentry_pcumulative[n] = 1'b0;
+ for (j = n; j < n + QENTRIES; j = j + 1) begin
+ if (missid==(j % QENTRIES))
+ for (k = n; k <= j; k = k + 1)
+ iqentry_pcumulative[n] = iqentry_pcumulative[n] | iqentry_plivetarget[k % QENTRIES];
+ end
+ end
+
+always @*
+ for (n = 0; n < QENTRIES; n = n + 1)
+ iqentry_platestID[n] = (missid == n || ((iqentry_plivetarget[n] & iqentry_pcumulative[(n+1)%QENTRIES]) == {16{1'b0}}))
+ ? iqentry_plivetarget[n]
+ : {16{1'b0}};
+
+always @*
+ for (n = 0; n < QENTRIES; n = n + 1)
+ iqentry_psource[n] = | iqentry_platestID[n];
+
+`endif
+
+reg vqueued2;
+assign Ra0 = fnRa(fetchbuf0_instr,vqe0,vl,fetchbuf0_thrd) | {fetchbuf0_thrd,7'b0};
+assign Rb0 = fnRb(fetchbuf0_instr,1'b0,vqe0,rfoa0[5:0],rfoa1[5:0],fetchbuf0_thrd) | {fetchbuf0_thrd,7'b0};
+assign Rc0 = fnRc(fetchbuf0_instr,vqe0,fetchbuf0_thrd) | {fetchbuf0_thrd,7'b0};
+assign Rt0 = fnRt(fetchbuf0_instr,vqet0,vl,fetchbuf0_thrd) | {fetchbuf0_thrd,7'b0};
+assign Ra1 = fnRa(fetchbuf1_instr,vqueued2 ? vqe0 + 1 : vqe1,vl,fetchbuf1_thrd) | {fetchbuf1_thrd,7'b0};
+assign Rb1 = fnRb(fetchbuf1_instr,1'b1,vqueued2 ? vqe0 + 1 : vqe1,rfoa0[5:0],rfoa1[5:0],fetchbuf1_thrd) | {fetchbuf1_thrd,7'b0};
+assign Rc1 = fnRc(fetchbuf1_instr,vqueued2 ? vqe0 + 1 : vqe1,fetchbuf1_thrd) | {fetchbuf1_thrd,7'b0};
+assign Rt1 = fnRt(fetchbuf1_instr,vqueued2 ? vqet0 + 1 : vqet1,vl,fetchbuf1_thrd) | {fetchbuf1_thrd,7'b0};
+
+//
+// additional logic for ISSUE
+//
+// for the moment, we look at ALU-input buffers to allow back-to-back issue of
+// dependent instructions ... we do not, however, look ahead for DRAM requests
+// that will become valid in the next cycle. instead, these have to propagate
+// their results into the IQ entry directly, at which point it becomes issue-able
+//
+
+// note that, for all intents & purposes, iqentry_done == iqentry_agen ... no need to duplicate
+
+wire [QENTRIES-1:0] args_valid;
+wire [QENTRIES-1:0] could_issue;
+wire [QENTRIES-1:0] could_issueid;
+
+// Note that bypassing is provided only from the first fpu.
+generate begin : issue_logic
+for (g = 0; g < QENTRIES; g = g + 1)
+begin
+assign args_valid[g] =
+ (iqentry_a1_v[g]
+`ifdef FU_BYPASS
+ || (iqentry_a1_s[g] == alu0_sourceid && alu0_dataready && (~alu0_mem | alu0_push))
+ || ((iqentry_a1_s[g] == alu1_sourceid && alu1_dataready && (~alu1_mem | alu1_push)) && (`NUM_ALU > 1))
+ || ((iqentry_a1_s[g] == fpu1_sourceid && fpu1_dataready) && (`NUM_FPU > 0))
+`endif
+ )
+ && (iqentry_a2_v[g] || iqentry_mem[g] // a2 does not need to be valid immediately for a mem op (agen), it is checked by iqentry_memready logic
+`ifdef FU_BYPASS
+ || (iqentry_a2_s[g] == alu0_sourceid && alu0_dataready && (~alu0_mem | alu0_push))
+ || ((iqentry_a2_s[g] == alu1_sourceid && alu1_dataready && (~alu1_mem | alu1_push)) && (`NUM_ALU > 1))
+ || ((iqentry_a2_s[g] == fpu1_sourceid && fpu1_dataready) && (`NUM_FPU > 0))
+`endif
+ )
+ && (iqentry_a3_v[g]
+ || (iqentry_mem[g] & ~iqentry_agen[g] & ~iqentry_memndx[g]) // a3 needs to be valid for indexed instruction
+// || (iqentry_mem[g] & ~iqentry_agen[g])
+`ifdef FU_BYPASS
+ || (iqentry_a3_s[g] == alu0_sourceid && alu0_dataready && (~alu0_mem | alu0_push))
+ || ((iqentry_a3_s[g] == alu1_sourceid && alu1_dataready && (~alu1_mem | alu1_push)) && (`NUM_ALU > 1))
+`endif
+ )
+ ;
+
+assign could_issue[g] = iqentry_v[g] && iqentry_state[g]==IQS_QUEUED
+ && args_valid[g]
+ && iqentry_iv[g];
+ //&& (iqentry_mem[g] ? !iqentry_agen[g] : 1'b1);
+
+assign could_issueid[g] = (iqentry_v[g])// || (g==tail0 && canq1))// || (g==tail1 && canq2))
+ && !iqentry_iv[g];
+// && (iqentry_a1_v[g]
+// || (iqentry_a1_s[g] == alu0_sourceid && alu0_dataready)
+// || (iqentry_a1_s[g] == alu1_sourceid && alu1_dataready));
+
+end
+end
+endgenerate
+
+// The (old) simulator didn't handle the asynchronous race loop properly in the
+// original code. It would issue two instructions to the same islot. So the
+// issue logic has been re-written to eliminate the asynchronous loop.
+// Can't issue to the ALU if it's busy doing a long running operation like a
+// divide.
+// ToDo: fix the memory synchronization, see fp_issue below
+`ifndef INLINE_DECODE
+always @*
+begin
+ iqentry_id1issue = {QENTRIES{1'b0}};
+ if (id1_available) begin
+ for (n = 0; n < QENTRIES; n = n + 1)
+ if (could_issueid[heads[n]] && iqentry_id1issue=={QENTRIES{1'b0}})
+ iqentry_id1issue[heads[n]] = `TRUE;
+ end
+end
+generate begin : gIDUIssue
+ if (`NUM_IDU > 1) begin
+ always @*
+ begin
+ iqentry_id2issue = {QENTRIES{1'b0}};
+ if (id2_available) begin
+ for (n = 0; n < QENTRIES; n = n + 1)
+ if (could_issueid[heads[n]] && !iqentry_id1issue[heads[n]] && iqentry_id2issue=={QENTRIES{1'b0}})
+ iqentry_id2issue[heads[n]] = `TRUE;
+ end
+ end
+ end
+ if (`NUM_IDU > 2) begin
+ always @*
+ begin
+ iqentry_id3issue = {QENTRIES{1'b0}};
+ if (id3_available) begin
+ for (n = 0; n < QENTRIES; n = n + 1)
+ if (could_issueid[heads[n]]
+ && !iqentry_id1issue[heads[n]]
+ && !iqentry_id2issue[heads[n]]
+ && iqentry_id3issue=={QENTRIES{1'b0}})
+ iqentry_id3issue[heads[n]] = `TRUE;
+ end
+ end
+ end
+end
+endgenerate
+`endif // not INLINE_DECODE
+
+// Detect if there are any valid queue entries prior to the given queue entry.
+reg [QENTRIES-1:0] prior_valid;
+//generate begin : gPriorValid
+always @*
+for (j = 0; j < QENTRIES; j = j + 1)
+begin
+ prior_valid[heads[j]] = 1'b0;
+ if (j > 0)
+ for (n = j-1; n >= 0; n = n - 1)
+ prior_valid[heads[j]] = prior_valid[heads[j]]|iqentry_v[heads[n]];
+end
+//end
+//endgenerate
+
+// Detect if there are any valid sync instructions prior to the given queue
+// entry.
+reg [QENTRIES-1:0] prior_sync;
+//generate begin : gPriorSync
+always @*
+for (j = 0; j < QENTRIES; j = j + 1)
+begin
+ prior_sync[heads[j]] = 1'b0;
+ if (j > 0)
+ for (n = j-1; n >= 0; n = n - 1)
+ prior_sync[heads[j]] = prior_sync[heads[j]]|(iqentry_v[heads[n]] & iqentry_sync[heads[n]]);
+end
+//end
+//endgenerate
+
+// Detect if there are any valid fsync instructions prior to the given queue
+// entry.
+reg [QENTRIES-1:0] prior_fsync;
+//generate begin : gPriorFsync
+always @*
+for (j = 0; j < QENTRIES; j = j + 1)
+begin
+ prior_fsync[heads[j]] = 1'b0;
+ if (j > 0)
+ for (n = j-1; n >= 0; n = n - 1)
+ prior_fsync[heads[j]] = prior_fsync[heads[j]]|(iqentry_v[heads[n]] & iqentry_fsync[heads[n]]);
+end
+//end
+//endgenerate
+
+// Start search for instructions to process at head of queue (oldest instruction).
+always @*
+begin
+ iqentry_alu0_issue = {QENTRIES{1'b0}};
+ iqentry_alu1_issue = {QENTRIES{1'b0}};
+
+ if (alu0_available & alu0_idle) begin
+ for (n = 0; n < QENTRIES; n = n + 1) begin
+ if (could_issue[heads[n]] && iqentry_alu[heads[n]]
+ && iqentry_alu0_issue == {QENTRIES{1'b0}}
+ // If there are no valid queue entries prior it doesn't matter if there is
+ // a sync.
+ && (!prior_sync[heads[n]] || !prior_valid[heads[n]])
+ )
+ iqentry_alu0_issue[heads[n]] = `TRUE;
+ end
+ end
+
+ if (alu1_available && alu1_idle && `NUM_ALU > 1) begin
+// if ((could_issue & ~iqentry_alu0_issue & ~iqentry_alu0) != {QENTRIES{1'b0}}) begin
+ for (n = 0; n < QENTRIES; n = n + 1) begin
+ if (could_issue[heads[n]] && iqentry_alu[heads[n]]
+ && !iqentry_alu0[heads[n]] // alu0 only
+ && !iqentry_alu0_issue[heads[n]]
+ && iqentry_alu1_issue == {QENTRIES{1'b0}}
+ && (!prior_sync[heads[n]] || !prior_valid[heads[n]])
+ )
+ iqentry_alu1_issue[heads[n]] = `TRUE;
+ end
+// end
+ end
+end
+
+
+// Start search for instructions to process at head of queue (oldest instruction).
+always @*
+begin
+ iqentry_fpu1_issue = {QENTRIES{1'b0}};
+ iqentry_fpu2_issue = {QENTRIES{1'b0}};
+
+ if (fpu1_available && fpu1_idle && `NUM_FPU > 0) begin
+ for (n = 0; n < QENTRIES; n = n + 1) begin
+ if (could_issue[heads[n]] && iqentry_fpu[heads[n]]
+ && iqentry_fpu1_issue == {QENTRIES{1'b0}}
+ // If there are no valid queue entries prior it doesn't matter if there is
+ // a sync.
+ && (!(prior_sync[heads[n]]|prior_fsync[heads[n]]) || !prior_valid[heads[n]])
+ )
+ iqentry_fpu1_issue[heads[n]] = `TRUE;
+ end
+ end
+
+ if (fpu2_available && fpu2_idle && `NUM_FPU > 1) begin
+ for (n = 0; n < QENTRIES; n = n + 1) begin
+ if (could_issue[heads[n]] && iqentry_fpu[heads[n]]
+ && !iqentry_fpu1_issue[heads[n]]
+ && iqentry_fpu2_issue == {QENTRIES{1'b0}}
+ && (!(prior_sync[heads[n]]|prior_fsync[heads[n]]) || !prior_valid[heads[n]])
+ )
+ iqentry_fpu2_issue[heads[n]] = `TRUE;
+ end
+ end
+end
+
+reg [QENTRIES-1:0] nextqd;
+// Next queue id
+
+/*
+reg [`QBITS] nids [0:QENTRIES-1];
+always @*
+for (n = 0; n < QENTRIES; n = n + 1)
+begin
+ nids[n] = n[`QBITS];
+ for (j = n; j != (n+1) % QENTRIES; j = (j - 1) % QENTRIES)
+ if (iqentry_thrd[(j+1)%QENTRIES]==iqentry_thrd[n])
+ nids[n] = (j + 1) % QENTRIES;
+ // Add one more compare and set
+end
+*/
+
+reg [`QBITS] nids [0:QENTRIES-1];
+always @*
+for (j = 0; j < QENTRIES; j = j + 1) begin
+ // We can't both start and stop at j
+ for (n = j; n != (j+1)%QENTRIES; n = (n + (QENTRIES-1)) % QENTRIES)
+ if (iqentry_thrd[n]==iqentry_thrd[j])
+ nids[j] = n;
+ // Do the last one
+ if (iqentry_thrd[(j+1)%QENTRIES]==iqentry_thrd[j])
+ nids[j] = (j+1)%QENTRIES;
+end
+/*
+assign nids[0] = nid0;
+assign nids[1] = nid1;
+assign nids[2] = nid2;
+assign nids[3] = nid3;
+assign nids[4] = nid4;
+assign nids[5] = nid5;
+assign nids[6] = nid6;
+assign nids[7] = nid7;
+assign nids[8] = nid8;
+assign nids[9] = nid9;
+*/
+// Search the queue for the next entry on the same thread.
+reg [`QBITS] nid;
+always @*
+begin
+ nid = fcu_id;
+ for (n = QENTRIES-1; n > 0; n = n - 1)
+ if (iqentry_thrd[(fcu_id + n) % QENTRIES]==fcu_thrd)
+ nid = (fcu_id + n) % QENTRIES;
+end
+/*
+always @*
+if (iqentry_thrd[idp1(fcu_id)]==iqentry_thrd[fcu_id[`QBITS]])
+ nid = idp1(fcu_id);
+else if (iqentry_thrd[idp2(fcu_id)]==iqentry_thrd[fcu_id[`QBITS]])
+ nid = idp2(fcu_id);
+else if (iqentry_thrd[idp3(fcu_id)]==iqentry_thrd[fcu_id[`QBITS]])
+ nid = idp3(fcu_id);
+else if (iqentry_thrd[idp4(fcu_id)]==iqentry_thrd[fcu_id[`QBITS]])
+ nid = idp4(fcu_id);
+else if (iqentry_thrd[idp5(fcu_id)]==iqentry_thrd[fcu_id[`QBITS]])
+ nid = idp5(fcu_id);
+else if (iqentry_thrd[idp6(fcu_id)]==iqentry_thrd[fcu_id[`QBITS]])
+ nid = idp6(fcu_id);
+else if (iqentry_thrd[idp7(fcu_id)]==iqentry_thrd[fcu_id[`QBITS]])
+ nid = idp7(fcu_id);
+else if (iqentry_thrd[idp8(fcu_id)]==iqentry_thrd[fcu_id[`QBITS]])
+ nid = idp8(fcu_id);
+else if (iqentry_thrd[idp9(fcu_id)]==iqentry_thrd[fcu_id[`QBITS]])
+ nid = idp9(fcu_id);
+else
+ nid = fcu_id;
+*/
+always @*
+for (n = 0; n < QENTRIES; n = n + 1)
+ nextqd[n] <= iqentry_sn[nids[n]] > iqentry_sn[n] || iqentry_v[n];
+
+//assign nextqd = 8'hFF;
+
+// Don't issue to the fcu until the following instruction is enqueued.
+// However, if the queue is full then issue anyway. A branch miss will likely occur.
+// Start search for instructions at head of queue (oldest instruction).
+always @*
+begin
+ iqentry_fcu_issue = {QENTRIES{1'b0}};
+
+ if (fcu_done & ~branchmiss) begin
+ for (n = 0; n < QENTRIES; n = n + 1) begin
+ if (could_issue[heads[n]] && iqentry_fc[heads[n]] && (nextqd[heads[n]] || iqentry_br[heads[n]])
+ && iqentry_fcu_issue == {QENTRIES{1'b0}}
+ && (!prior_sync[heads[n]] || !prior_valid[heads[n]])
+ )
+ iqentry_fcu_issue[heads[n]] = `TRUE;
+ end
+ end
+end
+
+
+// Test if a given address is in the write buffer. This is done only for the
+// first two queue slots to save logic on comparators.
+reg inwb0;
+always @*
+begin
+ inwb0 = FALSE;
+`ifdef HAS_WB
+ for (n = 0; n < `WB_DEPTH; n = n + 1)
+ if (iqentry_ma[heads[0]][AMSB:3]==wb_addr[n][AMSB:3] && wb_v[n])
+ inwb0 = TRUE;
+`endif
+end
+
+reg inwb1;
+always @*
+begin
+ inwb1 = FALSE;
+`ifdef HAS_WB
+ for (n = 0; n < `WB_DEPTH; n = n + 1)
+ if (iqentry_ma[heads[1]][AMSB:3]==wb_addr[n][AMSB:3] && wb_v[n])
+ inwb1 = TRUE;
+`endif
+end
+
+always @*
+begin
+ for (n = 0; n < QENTRIES; n = n + 1) begin
+ iqentry_v[n] <= iqentry_state[n] != IQS_INVALID;
+ iqentry_done[n] <= iqentry_state[n]==IQS_DONE || iqentry_state[n]==IQS_CMT;
+ iqentry_out[n] <= iqentry_state[n]==IQS_OUT;
+ iqentry_agen[n] <= iqentry_state[n]==IQS_AGEN;
+ end
+end
+
+//
+// determine if the instructions ready to issue can, in fact, issue.
+// "ready" means that the instruction has valid operands but has not gone yet
+reg [1:0] issue_count, missue_count;
+generate begin : gMemIssue
+always @*
+begin
+ issue_count = 0;
+ memissue[ heads[0] ] = iqentry_memready[ heads[0] ] && !(iqentry_load[heads[0]] && inwb0); // first in line ... go as soon as ready
+ if (memissue[heads[0]])
+ issue_count = issue_count + 1;
+
+ memissue[ heads[1] ] = ~iqentry_stomp[heads[1]] && iqentry_memready[ heads[1] ] // addr and data are valid
+ && issue_count < `NUM_MEM
+ // ... and no preceding instruction is ready to go
+ //&& ~iqentry_memready[heads[0]]
+ // ... and there is no address-overlap with any preceding instruction
+ && (!iqentry_mem[heads[0]] || (iqentry_agen[heads[0]] & iqentry_out[heads[0]]) || iqentry_done[heads[0]]
+ || ((iqentry_ma[heads[1]][AMSB:3] != iqentry_ma[heads[0]][AMSB:3] || iqentry_out[heads[0]] || iqentry_done[heads[0]])))
+ // ... if a release, any prior memory ops must be done before this one
+ && (iqentry_rl[heads[1]] ? iqentry_done[heads[0]] || !iqentry_v[heads[0]] || !iqentry_mem[heads[0]] : 1'b1)
+ // ... if a preivous op has the aquire bit set
+ && !(iqentry_aq[heads[0]] && iqentry_v[heads[0]])
+ // ... and there's nothing in the write buffer during a load
+ && !(iqentry_load[heads[1]] && (inwb1 || iqentry_store[heads[0]]))
+ // ... and, if it is a store, there is no chance of it being undone
+ && ((iqentry_load[heads[1]] && sple) ||
+ !(iqentry_fc[heads[0]]||iqentry_canex[heads[0]]));
+ if (memissue[heads[1]])
+ issue_count = issue_count + 1;
+
+ memissue[ heads[2] ] = ~iqentry_stomp[heads[2]] && iqentry_memready[ heads[2] ] // addr and data are valid
+ // ... and no preceding instruction is ready to go
+ && issue_count < `NUM_MEM
+ //&& ~iqentry_memready[heads[0]]
+ //&& ~iqentry_memready[heads[1]]
+ // ... and there is no address-overlap with any preceding instruction
+ && (!iqentry_mem[heads[0]] || (iqentry_agen[heads[0]] & iqentry_out[heads[0]]) || iqentry_done[heads[0]]
+ || ((iqentry_ma[heads[2]][AMSB:3] != iqentry_ma[heads[0]][AMSB:3] || iqentry_out[heads[0]] || iqentry_done[heads[0]])))
+ && (!iqentry_mem[heads[1]] || (iqentry_agen[heads[1]] & iqentry_out[heads[1]]) || iqentry_done[heads[1]]
+ || ((iqentry_ma[heads[2]][AMSB:3] != iqentry_ma[heads[1]][AMSB:3] || iqentry_out[heads[1]] || iqentry_done[heads[1]])))
+ // ... if a release, any prior memory ops must be done before this one
+ && (iqentry_rl[heads[2]] ? (iqentry_done[heads[0]] || !iqentry_v[heads[0]] || !iqentry_mem[heads[0]])
+ && (iqentry_done[heads[1]] || !iqentry_v[heads[1]] || !iqentry_mem[heads[1]])
+ : 1'b1)
+ // ... if a preivous op has the aquire bit set
+ && !(iqentry_aq[heads[0]] && iqentry_v[heads[0]])
+ && !(iqentry_aq[heads[1]] && iqentry_v[heads[1]])
+ // ... and there's nothing in the write buffer during a load
+ && !(iqentry_load[heads[2]] && (wb_v!=1'b0
+ || iqentry_store[heads[0]] || iqentry_store[heads[1]]))
+ // ... and there isn't a barrier, or everything before the barrier is done or invalid
+ && (!(iqentry_iv[heads[1]] && iqentry_memsb[heads[1]]) || (iqentry_done[heads[0]] || !iqentry_v[heads[0]]))
+ && (!(iqentry_iv[heads[1]] && iqentry_memdb[heads[1]]) || (!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]))
+ // ... and, if it is a SW, there is no chance of it being undone
+ && ((iqentry_load[heads[2]] && sple) ||
+ !(iqentry_fc[heads[0]]||iqentry_canex[heads[0]])
+ && !(iqentry_fc[heads[1]]||iqentry_canex[heads[1]]));
+ if (memissue[heads[2]])
+ issue_count = issue_count + 1;
+
+ memissue[ heads[3] ] = ~iqentry_stomp[heads[3]] && iqentry_memready[ heads[3] ] // addr and data are valid
+ // ... and no preceding instruction is ready to go
+ && issue_count < `NUM_MEM
+ //&& ~iqentry_memready[heads[0]]
+ //&& ~iqentry_memready[heads[1]]
+ //&& ~iqentry_memready[heads[2]]
+ // ... and there is no address-overlap with any preceding instruction
+ && (!iqentry_mem[heads[0]] || (iqentry_agen[heads[0]] & iqentry_out[heads[0]]) || iqentry_done[heads[0]]
+ || ((iqentry_ma[heads[3]][AMSB:3] != iqentry_ma[heads[0]][AMSB:3] || iqentry_out[heads[0]] || iqentry_done[heads[0]])))
+ && (!iqentry_mem[heads[1]] || (iqentry_agen[heads[1]] & iqentry_out[heads[1]]) || iqentry_done[heads[1]]
+ || ((iqentry_ma[heads[3]][AMSB:3] != iqentry_ma[heads[1]][AMSB:3] || iqentry_out[heads[1]] || iqentry_done[heads[1]])))
+ && (!iqentry_mem[heads[2]] || (iqentry_agen[heads[2]] & iqentry_out[heads[2]]) || iqentry_done[heads[2]]
+ || ((iqentry_ma[heads[3]][AMSB:3] != iqentry_ma[heads[2]][AMSB:3] || iqentry_out[heads[2]] || iqentry_done[heads[2]])))
+ // ... if a release, any prior memory ops must be done before this one
+ && (iqentry_rl[heads[3]] ? (iqentry_done[heads[0]] || !iqentry_v[heads[0]] || !iqentry_mem[heads[0]])
+ && (iqentry_done[heads[1]] || !iqentry_v[heads[1]] || !iqentry_mem[heads[1]])
+ && (iqentry_done[heads[2]] || !iqentry_v[heads[2]] || !iqentry_mem[heads[2]])
+ : 1'b1)
+ // ... if a preivous op has the aquire bit set
+ && !(iqentry_aq[heads[0]] && iqentry_v[heads[0]])
+ && !(iqentry_aq[heads[1]] && iqentry_v[heads[1]])
+ && !(iqentry_aq[heads[2]] && iqentry_v[heads[2]])
+ // ... and there's nothing in the write buffer during a load
+ && !(iqentry_load[heads[3]] && (wb_v!=1'b0
+ || iqentry_store[heads[0]] || iqentry_store[heads[1]] || iqentry_store[heads[2]]))
+ // ... and there isn't a barrier, or everything before the barrier is done or invalid
+ && (!(iqentry_iv[heads[1]] && iqentry_memsb[heads[1]]) || (iqentry_done[heads[0]] || !iqentry_v[heads[0]]))
+ && (!(iqentry_iv[heads[2]] && iqentry_memsb[heads[2]]) ||
+ ((iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (iqentry_done[heads[1]] || !iqentry_v[heads[1]]))
+ )
+ && (!(iqentry_iv[heads[1]] && iqentry_memdb[heads[1]]) || (!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]))
+ && (!(iqentry_iv[heads[2]] && iqentry_memdb[heads[2]]) ||
+ ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]]))
+ )
+ // ... and, if it is a SW, there is no chance of it being undone
+ && ((iqentry_load[heads[3]] && sple) ||
+ !(iqentry_fc[heads[0]]||iqentry_canex[heads[0]])
+ && !(iqentry_fc[heads[1]]||iqentry_canex[heads[1]])
+ && !(iqentry_fc[heads[2]]||iqentry_canex[heads[2]]));
+ if (memissue[heads[3]])
+ issue_count = issue_count + 1;
+
+ if (QENTRIES > 4) begin
+ memissue[ heads[4] ] = ~iqentry_stomp[heads[4]] && iqentry_memready[ heads[4] ] // addr and data are valid
+ // ... and no preceding instruction is ready to go
+ && issue_count < `NUM_MEM
+ //&& ~iqentry_memready[heads[0]]
+ //&& ~iqentry_memready[heads[1]]
+ //&& ~iqentry_memready[heads[2]]
+ //&& ~iqentry_memready[heads[3]]
+ // ... and there is no address-overlap with any preceding instruction
+ && (!iqentry_mem[heads[0]] || (iqentry_agen[heads[0]] & iqentry_out[heads[0]]) || iqentry_done[heads[0]]
+ || ((iqentry_ma[heads[4]][AMSB:3] != iqentry_ma[heads[0]][AMSB:3] || iqentry_out[heads[0]] || iqentry_done[heads[0]])))
+ && (!iqentry_mem[heads[1]] || (iqentry_agen[heads[1]] & iqentry_out[heads[1]]) || iqentry_done[heads[1]]
+ || ((iqentry_ma[heads[4]][AMSB:3] != iqentry_ma[heads[1]][AMSB:3] || iqentry_out[heads[1]] || iqentry_done[heads[1]])))
+ && (!iqentry_mem[heads[2]] || (iqentry_agen[heads[2]] & iqentry_out[heads[2]]) || iqentry_done[heads[2]]
+ || ((iqentry_ma[heads[4]][AMSB:3] != iqentry_ma[heads[2]][AMSB:3] || iqentry_out[heads[2]] || iqentry_done[heads[2]])))
+ && (!iqentry_mem[heads[3]] || (iqentry_agen[heads[3]] & iqentry_out[heads[3]]) || iqentry_done[heads[3]]
+ || ((iqentry_ma[heads[4]][AMSB:3] != iqentry_ma[heads[3]][AMSB:3] || iqentry_out[heads[3]] || iqentry_done[heads[3]])))
+ // ... if a release, any prior memory ops must be done before this one
+ && (iqentry_rl[heads[4]] ? (iqentry_done[heads[0]] || !iqentry_v[heads[0]] || !iqentry_mem[heads[0]])
+ && (iqentry_done[heads[1]] || !iqentry_v[heads[1]] || !iqentry_mem[heads[1]])
+ && (iqentry_done[heads[2]] || !iqentry_v[heads[2]] || !iqentry_mem[heads[2]])
+ && (iqentry_done[heads[3]] || !iqentry_v[heads[3]] || !iqentry_mem[heads[3]])
+ : 1'b1)
+ // ... if a preivous op has the aquire bit set
+ && !(iqentry_aq[heads[0]] && iqentry_v[heads[0]])
+ && !(iqentry_aq[heads[1]] && iqentry_v[heads[1]])
+ && !(iqentry_aq[heads[2]] && iqentry_v[heads[2]])
+ && !(iqentry_aq[heads[3]] && iqentry_v[heads[3]])
+ // ... and there's nothing in the write buffer during a load
+ && !(iqentry_load[heads[4]] && (wb_v!=1'b0
+ || iqentry_store[heads[0]] || iqentry_store[heads[1]] || iqentry_store[heads[2]] || iqentry_store[heads[3]]))
+ // ... and there isn't a barrier, or everything before the barrier is done or invalid
+ && (!(iqentry_iv[heads[1]] && iqentry_memsb[heads[1]]) || (iqentry_done[heads[0]] || !iqentry_v[heads[0]]))
+ && (!(iqentry_iv[heads[2]] && iqentry_memsb[heads[2]]) ||
+ ((iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (iqentry_done[heads[1]] || !iqentry_v[heads[1]]))
+ )
+ && (!(iqentry_iv[heads[3]] && iqentry_memsb[heads[3]]) ||
+ ((iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (iqentry_done[heads[1]] || !iqentry_v[heads[1]])
+ && (iqentry_done[heads[2]] || !iqentry_v[heads[2]]))
+ )
+ && (!(iqentry_v[heads[1]] && iqentry_memdb[heads[1]]) || (!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]))
+ && (!(iqentry_iv[heads[2]] && iqentry_memdb[heads[2]]) ||
+ ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]]))
+ )
+ && (!(iqentry_iv[heads[3]] && iqentry_memdb[heads[3]]) ||
+ ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]])
+ && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]]))
+ )
+ // ... and, if it is a SW, there is no chance of it being undone
+ && ((iqentry_load[heads[4]] && sple) ||
+ !(iqentry_fc[heads[0]]||iqentry_canex[heads[0]])
+ && !(iqentry_fc[heads[1]]||iqentry_canex[heads[1]])
+ && !(iqentry_fc[heads[2]]||iqentry_canex[heads[2]])
+ && !(iqentry_fc[heads[3]]||iqentry_canex[heads[3]]));
+ if (memissue[heads[4]])
+ issue_count = issue_count + 1;
+ end
+
+ if (QENTRIES > 5) begin
+ memissue[ heads[5] ] = ~iqentry_stomp[heads[5]] && iqentry_memready[ heads[5] ] // addr and data are valid
+ // ... and no preceding instruction is ready to go
+ && issue_count < `NUM_MEM
+ //&& ~iqentry_memready[heads[0]]
+ //&& ~iqentry_memready[heads[1]]
+ //&& ~iqentry_memready[heads[2]]
+ //&& ~iqentry_memready[heads[3]]
+ //&& ~iqentry_memready[heads[4]]
+ // ... and there is no address-overlap with any preceding instruction
+ && (!iqentry_mem[heads[0]] || (iqentry_agen[heads[0]] & iqentry_out[heads[0]]) || iqentry_done[heads[0]]
+ || ((iqentry_ma[heads[5]][AMSB:3] != iqentry_ma[heads[0]][AMSB:3] || iqentry_out[heads[0]] || iqentry_done[heads[0]])))
+ && (!iqentry_mem[heads[1]] || (iqentry_agen[heads[1]] & iqentry_out[heads[1]]) || iqentry_done[heads[1]]
+ || ((iqentry_ma[heads[5]][AMSB:3] != iqentry_ma[heads[1]][AMSB:3] || iqentry_out[heads[1]] || iqentry_done[heads[1]])))
+ && (!iqentry_mem[heads[2]] || (iqentry_agen[heads[2]] & iqentry_out[heads[2]]) || iqentry_done[heads[2]]
+ || ((iqentry_ma[heads[5]][AMSB:3] != iqentry_ma[heads[2]][AMSB:3] || iqentry_out[heads[2]] || iqentry_done[heads[2]])))
+ && (!iqentry_mem[heads[3]] || (iqentry_agen[heads[3]] & iqentry_out[heads[3]]) || iqentry_done[heads[3]]
+ || ((iqentry_ma[heads[5]][AMSB:3] != iqentry_ma[heads[3]][AMSB:3] || iqentry_out[heads[3]] || iqentry_done[heads[3]])))
+ && (!iqentry_mem[heads[4]] || (iqentry_agen[heads[4]] & iqentry_out[heads[4]]) || iqentry_done[heads[4]]
+ || ((iqentry_ma[heads[5]][AMSB:3] != iqentry_ma[heads[4]][AMSB:3] || iqentry_out[heads[4]] || iqentry_done[heads[4]])))
+ // ... if a release, any prior memory ops must be done before this one
+ && (iqentry_rl[heads[5]] ? (iqentry_done[heads[0]] || !iqentry_v[heads[0]] || !iqentry_mem[heads[0]])
+ && (iqentry_done[heads[1]] || !iqentry_v[heads[1]] || !iqentry_mem[heads[1]])
+ && (iqentry_done[heads[2]] || !iqentry_v[heads[2]] || !iqentry_mem[heads[2]])
+ && (iqentry_done[heads[3]] || !iqentry_v[heads[3]] || !iqentry_mem[heads[3]])
+ && (iqentry_done[heads[4]] || !iqentry_v[heads[4]] || !iqentry_mem[heads[4]])
+ : 1'b1)
+ // ... if a preivous op has the aquire bit set
+ && !(iqentry_aq[heads[0]] && iqentry_v[heads[0]])
+ && !(iqentry_aq[heads[1]] && iqentry_v[heads[1]])
+ && !(iqentry_aq[heads[2]] && iqentry_v[heads[2]])
+ && !(iqentry_aq[heads[3]] && iqentry_v[heads[3]])
+ && !(iqentry_aq[heads[4]] && iqentry_v[heads[4]])
+ // ... and there's nothing in the write buffer during a load
+ && !(iqentry_load[heads[5]] && (wb_v!=1'b0
+ || iqentry_store[heads[0]] || iqentry_store[heads[1]] || iqentry_store[heads[2]] || iqentry_store[heads[3]]
+ || iqentry_store[heads[4]]))
+ // ... and there isn't a barrier, or everything before the barrier is done or invalid
+ && (!(iqentry_iv[heads[1]] && iqentry_memsb[heads[1]]) || (iqentry_done[heads[0]] || !iqentry_v[heads[0]]))
+ && (!(iqentry_iv[heads[2]] && iqentry_memsb[heads[2]]) ||
+ ((iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (iqentry_done[heads[1]] || !iqentry_v[heads[1]]))
+ )
+ && (!(iqentry_iv[heads[3]] && iqentry_memsb[heads[3]]) ||
+ ((iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (iqentry_done[heads[1]] || !iqentry_v[heads[1]])
+ && (iqentry_done[heads[2]] || !iqentry_v[heads[2]]))
+ )
+ && (!(iqentry_iv[heads[4]] && iqentry_memsb[heads[4]]) ||
+ ((iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (iqentry_done[heads[1]] || !iqentry_v[heads[1]])
+ && (iqentry_done[heads[2]] || !iqentry_v[heads[2]])
+ && (iqentry_done[heads[3]] || !iqentry_v[heads[3]]))
+ )
+ && (!(iqentry_iv[heads[1]] && iqentry_memdb[heads[1]]) || (!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]))
+ && (!(iqentry_iv[heads[2]] && iqentry_memdb[heads[2]]) ||
+ ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]]))
+ )
+ && (!(iqentry_iv[heads[3]] && iqentry_memdb[heads[3]]) ||
+ ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]])
+ && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]]))
+ )
+ && (!(iqentry_iv[heads[4]] && iqentry_memdb[heads[4]]) ||
+ ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]])
+ && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]])
+ && (!iqentry_mem[heads[3]] || iqentry_done[heads[3]] || !iqentry_v[heads[3]]))
+ )
+ // ... and, if it is a SW, there is no chance of it being undone
+ && ((iqentry_load[heads[5]] && sple) ||
+ !(iqentry_fc[heads[0]]||iqentry_canex[heads[0]])
+ && !(iqentry_fc[heads[1]]||iqentry_canex[heads[1]])
+ && !(iqentry_fc[heads[2]]||iqentry_canex[heads[2]])
+ && !(iqentry_fc[heads[3]]||iqentry_canex[heads[3]])
+ && !(iqentry_fc[heads[4]]||iqentry_canex[heads[4]]));
+ if (memissue[heads[5]])
+ issue_count = issue_count + 1;
+ end
+
+`ifdef FULL_ISSUE_LOGIC
+if (QENTRIES > 6) begin
+ memissue[ heads[6] ] = ~iqentry_stomp[heads[6]] && iqentry_memready[ heads[6] ] // addr and data are valid
+ // ... and no preceding instruction is ready to go
+ && issue_count < `NUM_MEM
+ //&& ~iqentry_memready[heads[0]]
+ //&& ~iqentry_memready[heads[1]]
+ //&& ~iqentry_memready[heads[2]]
+ //&& ~iqentry_memready[heads[3]]
+ //&& ~iqentry_memready[heads[4]]
+ //&& ~iqentry_memready[heads[5]]
+ // ... and there is no address-overlap with any preceding instruction
+ && (!iqentry_mem[heads[0]] || (iqentry_agen[heads[0]] & iqentry_out[heads[0]]) || iqentry_done[heads[0]]
+ || ((iqentry_ma[heads[6]][AMSB:3] != iqentry_ma[heads[0]][AMSB:3])))
+ && (!iqentry_mem[heads[1]] || (iqentry_agen[heads[1]] & iqentry_out[heads[1]]) || iqentry_done[heads[1]]
+ || ((iqentry_ma[heads[6]][AMSB:3] != iqentry_ma[heads[1]][AMSB:3])))
+ && (!iqentry_mem[heads[2]] || (iqentry_agen[heads[2]] & iqentry_out[heads[2]]) || iqentry_done[heads[2]]
+ || ((iqentry_ma[heads[6]][AMSB:3] != iqentry_ma[heads[2]][AMSB:3])))
+ && (!iqentry_mem[heads[3]] || (iqentry_agen[heads[3]] & iqentry_out[heads[3]]) || iqentry_done[heads[3]]
+ || ((iqentry_ma[heads[6]][AMSB:3] != iqentry_ma[heads[3]][AMSB:3])))
+ && (!iqentry_mem[heads[4]] || (iqentry_agen[heads[4]] & iqentry_out[heads[4]]) || iqentry_done[heads[4]]
+ || ((iqentry_ma[heads[6]][AMSB:3] != iqentry_ma[heads[4]][AMSB:3])))
+ && (!iqentry_mem[heads[5]] || (iqentry_agen[heads[5]] & iqentry_out[heads[5]]) || iqentry_done[heads[5]]
+ || ((iqentry_ma[heads[6]][AMSB:3] != iqentry_ma[heads[5]][AMSB:3])))
+ && (iqentry_rl[heads[6]] ? (iqentry_done[heads[0]] || !iqentry_v[heads[0]] || !iqentry_mem[heads[0]])
+ && (iqentry_done[heads[1]] || !iqentry_v[heads[1]] || !iqentry_mem[heads[1]])
+ && (iqentry_done[heads[2]] || !iqentry_v[heads[2]] || !iqentry_mem[heads[2]])
+ && (iqentry_done[heads[3]] || !iqentry_v[heads[3]] || !iqentry_mem[heads[3]])
+ && (iqentry_done[heads[4]] || !iqentry_v[heads[4]] || !iqentry_mem[heads[4]])
+ && (iqentry_done[heads[5]] || !iqentry_v[heads[5]] || !iqentry_mem[heads[5]])
+ : 1'b1)
+ // ... if a preivous op has the aquire bit set
+ && !(iqentry_aq[heads[0]] && iqentry_v[heads[0]])
+ && !(iqentry_aq[heads[1]] && iqentry_v[heads[1]])
+ && !(iqentry_aq[heads[2]] && iqentry_v[heads[2]])
+ && !(iqentry_aq[heads[3]] && iqentry_v[heads[3]])
+ && !(iqentry_aq[heads[4]] && iqentry_v[heads[4]])
+ && !(iqentry_aq[heads[5]] && iqentry_v[heads[5]])
+ // ... and there's nothing in the write buffer during a load
+ && !(iqentry_load[heads[6]] && (wb_v!=1'b0
+ || iqentry_store[heads[0]] || iqentry_store[heads[1]] || iqentry_store[heads[2]] || iqentry_store[heads[3]]
+ || iqentry_store[heads[4]] || iqentry_store[heads[5]]))
+ // ... and there isn't a barrier, or everything before the barrier is done or invalid
+ && (!(iqentry_iv[heads[1]] && iqentry_memsb[heads[1]]) || (iqentry_done[heads[0]] || !iqentry_v[heads[0]]))
+ && (!(iqentry_iv[heads[2]] && iqentry_memsb[heads[2]]) ||
+ ((iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (iqentry_done[heads[1]] || !iqentry_v[heads[1]]))
+ )
+ && (!(iqentry_iv[heads[3]] && iqentry_memsb[heads[3]]) ||
+ ((iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (iqentry_done[heads[1]] || !iqentry_v[heads[1]])
+ && (iqentry_done[heads[2]] || !iqentry_v[heads[2]]))
+ )
+ && (!(iqentry_iv[heads[4]] && iqentry_memsb[heads[4]]) ||
+ ((iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (iqentry_done[heads[1]] || !iqentry_v[heads[1]])
+ && (iqentry_done[heads[2]] || !iqentry_v[heads[2]])
+ && (iqentry_done[heads[3]] || !iqentry_v[heads[3]]))
+ )
+ && (!(iqentry_iv[heads[5]] && iqentry_memsb[heads[5]]) ||
+ ((iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (iqentry_done[heads[1]] || !iqentry_v[heads[1]])
+ && (iqentry_done[heads[2]] || !iqentry_v[heads[2]])
+ && (iqentry_done[heads[3]] || !iqentry_v[heads[3]])
+ && (iqentry_done[heads[4]] || !iqentry_v[heads[4]]))
+ )
+ && (!(iqentry_iv[heads[1]] && iqentry_memdb[heads[1]]) || (!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]))
+ && (!(iqentry_iv[heads[2]] && iqentry_memdb[heads[2]]) ||
+ ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]]))
+ )
+ && (!(iqentry_iv[heads[3]] && iqentry_memdb[heads[3]]) ||
+ ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]])
+ && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]]))
+ )
+ && (!(iqentry_iv[heads[4]] && iqentry_memdb[heads[4]]) ||
+ ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]])
+ && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]])
+ && (!iqentry_mem[heads[3]] || iqentry_done[heads[3]] || !iqentry_v[heads[3]]))
+ )
+ && (!(iqentry_iv[heads[5]] && iqentry_memdb[heads[5]]) ||
+ ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]])
+ && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]])
+ && (!iqentry_mem[heads[3]] || iqentry_done[heads[3]] || !iqentry_v[heads[3]])
+ && (!iqentry_mem[heads[4]] || iqentry_done[heads[4]] || !iqentry_v[heads[4]]))
+ )
+ // ... and, if it is a SW, there is no chance of it being undone
+ && ((iqentry_load[heads[6]] && sple) ||
+ !(iqentry_fc[heads[0]]||iqentry_canex[heads[0]])
+ && !(iqentry_fc[heads[1]]||iqentry_canex[heads[1]])
+ && !(iqentry_fc[heads[2]]||iqentry_canex[heads[2]])
+ && !(iqentry_fc[heads[3]]||iqentry_canex[heads[3]])
+ && !(iqentry_fc[heads[4]]||iqentry_canex[heads[4]])
+ && !(iqentry_fc[heads[5]]||iqentry_canex[heads[5]]));
+ if (memissue[heads[6]])
+ issue_count = issue_count + 1;
+ end
+
+ if (QENTRIES > 7) begin
+ memissue[ heads[7] ] = ~iqentry_stomp[heads[7]] && iqentry_memready[ heads[7] ] // addr and data are valid
+ // ... and no preceding instruction is ready to go
+ && issue_count < `NUM_MEM
+ //&& ~iqentry_memready[heads[0]]
+ //&& ~iqentry_memready[heads[1]]
+ //&& ~iqentry_memready[heads[2]]
+ //&& ~iqentry_memready[heads[3]]
+ //&& ~iqentry_memready[heads[4]]
+ //&& ~iqentry_memready[heads[5]]
+ //&& ~iqentry_memready[heads[6]]
+ // ... and there is no address-overlap with any preceding instruction
+ && (!iqentry_mem[heads[0]] || (iqentry_agen[heads[0]] & iqentry_out[heads[0]]) || iqentry_done[heads[0]]
+ || ((iqentry_ma[heads[7]][AMSB:3] != iqentry_ma[heads[0]][AMSB:3] || iqentry_out[heads[0]] || iqentry_done[heads[0]])))
+ && (!iqentry_mem[heads[1]] || (iqentry_agen[heads[1]] & iqentry_out[heads[1]]) || iqentry_done[heads[1]]
+ || ((iqentry_ma[heads[7]][AMSB:3] != iqentry_ma[heads[1]][AMSB:3] || iqentry_out[heads[1]] || iqentry_done[heads[1]])))
+ && (!iqentry_mem[heads[2]] || (iqentry_agen[heads[2]] & iqentry_out[heads[2]]) || iqentry_done[heads[2]]
+ || ((iqentry_ma[heads[7]][AMSB:3] != iqentry_ma[heads[2]][AMSB:3] || iqentry_out[heads[2]] || iqentry_done[heads[2]])))
+ && (!iqentry_mem[heads[3]] || (iqentry_agen[heads[3]] & iqentry_out[heads[3]]) || iqentry_done[heads[3]]
+ || ((iqentry_ma[heads[7]][AMSB:3] != iqentry_ma[heads[3]][AMSB:3] || iqentry_out[heads[3]] || iqentry_done[heads[3]])))
+ && (!iqentry_mem[heads[4]] || (iqentry_agen[heads[4]] & iqentry_out[heads[4]]) || iqentry_done[heads[4]]
+ || ((iqentry_ma[heads[7]][AMSB:3] != iqentry_ma[heads[4]][AMSB:3] || iqentry_out[heads[4]] || iqentry_done[heads[4]])))
+ && (!iqentry_mem[heads[5]] || (iqentry_agen[heads[5]] & iqentry_out[heads[5]]) || iqentry_done[heads[5]]
+ || ((iqentry_ma[heads[7]][AMSB:3] != iqentry_ma[heads[5]][AMSB:3] || iqentry_out[heads[5]] || iqentry_done[heads[5]])))
+ && (!iqentry_mem[heads[6]] || (iqentry_agen[heads[6]] & iqentry_out[heads[6]]) || iqentry_done[heads[6]]
+ || ((iqentry_ma[heads[7]][AMSB:3] != iqentry_ma[heads[6]][AMSB:3] || iqentry_out[heads[6]] || iqentry_done[heads[6]])))
+ && (iqentry_rl[heads[7]] ? (iqentry_done[heads[0]] || !iqentry_v[heads[0]] || !iqentry_mem[heads[0]])
+ && (iqentry_done[heads[1]] || !iqentry_v[heads[1]] || !iqentry_mem[heads[1]])
+ && (iqentry_done[heads[2]] || !iqentry_v[heads[2]] || !iqentry_mem[heads[2]])
+ && (iqentry_done[heads[3]] || !iqentry_v[heads[3]] || !iqentry_mem[heads[3]])
+ && (iqentry_done[heads[4]] || !iqentry_v[heads[4]] || !iqentry_mem[heads[4]])
+ && (iqentry_done[heads[5]] || !iqentry_v[heads[5]] || !iqentry_mem[heads[5]])
+ && (iqentry_done[heads[6]] || !iqentry_v[heads[6]] || !iqentry_mem[heads[6]])
+ : 1'b1)
+ // ... if a preivous op has the aquire bit set
+ && !(iqentry_aq[heads[0]] && iqentry_v[heads[0]])
+ && !(iqentry_aq[heads[1]] && iqentry_v[heads[1]])
+ && !(iqentry_aq[heads[2]] && iqentry_v[heads[2]])
+ && !(iqentry_aq[heads[3]] && iqentry_v[heads[3]])
+ && !(iqentry_aq[heads[4]] && iqentry_v[heads[4]])
+ && !(iqentry_aq[heads[5]] && iqentry_v[heads[5]])
+ && !(iqentry_aq[heads[6]] && iqentry_v[heads[6]])
+ // ... and there's nothing in the write buffer during a load
+ && !(iqentry_load[heads[7]] && (wb_v!=1'b0
+ || iqentry_store[heads[0]] || iqentry_store[heads[1]] || iqentry_store[heads[2]] || iqentry_store[heads[3]]
+ || iqentry_store[heads[4]] || iqentry_store[heads[5]] || iqentry_store[heads[6]]))
+ // ... and there isn't a barrier, or everything before the barrier is done or invalid
+ && (!(iqentry_iv[heads[1]] && iqentry_memsb[heads[1]]) || (iqentry_done[heads[0]] || !iqentry_v[heads[0]]))
+ && (!(iqentry_iv[heads[2]] && iqentry_memsb[heads[2]]) ||
+ ((iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (iqentry_done[heads[1]] || !iqentry_v[heads[1]]))
+ )
+ && (!(iqentry_iv[heads[3]] && iqentry_memsb[heads[3]]) ||
+ ((iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (iqentry_done[heads[1]] || !iqentry_v[heads[1]])
+ && (iqentry_done[heads[2]] || !iqentry_v[heads[2]]))
+ )
+ && (!(iqentry_iv[heads[4]] && iqentry_memsb[heads[4]]) ||
+ ((iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (iqentry_done[heads[1]] || !iqentry_v[heads[1]])
+ && (iqentry_done[heads[2]] || !iqentry_v[heads[2]])
+ && (iqentry_done[heads[3]] || !iqentry_v[heads[3]]))
+ )
+ && (!(iqentry_iv[heads[5]] && iqentry_memsb[heads[5]]) ||
+ ((iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (iqentry_done[heads[1]] || !iqentry_v[heads[1]])
+ && (iqentry_done[heads[2]] || !iqentry_v[heads[2]])
+ && (iqentry_done[heads[3]] || !iqentry_v[heads[3]])
+ && (iqentry_done[heads[4]] || !iqentry_v[heads[4]]))
+ )
+ && (!(iqentry_iv[heads[6]] && iqentry_memsb[heads[6]]) ||
+ ((iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (iqentry_done[heads[1]] || !iqentry_v[heads[1]])
+ && (iqentry_done[heads[2]] || !iqentry_v[heads[2]])
+ && (iqentry_done[heads[3]] || !iqentry_v[heads[3]])
+ && (iqentry_done[heads[4]] || !iqentry_v[heads[4]])
+ && (iqentry_done[heads[5]] || !iqentry_v[heads[5]]))
+ )
+ && (!(iqentry_iv[heads[1]] && iqentry_memdb[heads[1]]) || (!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]))
+ && (!(iqentry_iv[heads[2]] && iqentry_memdb[heads[2]]) ||
+ ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]]))
+ )
+ && (!(iqentry_iv[heads[3]] && iqentry_memdb[heads[3]]) ||
+ ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]])
+ && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]]))
+ )
+ && (!(iqentry_iv[heads[4]] && iqentry_memdb[heads[4]]) ||
+ ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]])
+ && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]])
+ && (!iqentry_mem[heads[3]] || iqentry_done[heads[3]] || !iqentry_v[heads[3]]))
+ )
+ && (!(iqentry_iv[heads[5]] && iqentry_memdb[heads[5]]) ||
+ ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]])
+ && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]])
+ && (!iqentry_mem[heads[3]] || iqentry_done[heads[3]] || !iqentry_v[heads[3]])
+ && (!iqentry_mem[heads[4]] || iqentry_done[heads[4]] || !iqentry_v[heads[4]]))
+ )
+ && (!(iqentry_iv[heads[6]] && iqentry_memdb[heads[6]]) ||
+ ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]])
+ && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]])
+ && (!iqentry_mem[heads[3]] || iqentry_done[heads[3]] || !iqentry_v[heads[3]])
+ && (!iqentry_mem[heads[4]] || iqentry_done[heads[4]] || !iqentry_v[heads[4]])
+ && (!iqentry_mem[heads[5]] || iqentry_done[heads[5]] || !iqentry_v[heads[5]]))
+ )
+ // ... and, if it is a SW, there is no chance of it being undone
+ && ((iqentry_load[heads[7]] && sple) ||
+ !(iqentry_fc[heads[0]]||iqentry_canex[heads[0]])
+ && !(iqentry_fc[heads[1]]||iqentry_canex[heads[1]])
+ && !(iqentry_fc[heads[2]]||iqentry_canex[heads[2]])
+ && !(iqentry_fc[heads[3]]||iqentry_canex[heads[3]])
+ && !(iqentry_fc[heads[4]]||iqentry_canex[heads[4]])
+ && !(iqentry_fc[heads[5]]||iqentry_canex[heads[5]])
+ && !(iqentry_fc[heads[6]]||iqentry_canex[heads[6]]));
+ if (memissue[heads[7]])
+ issue_count = issue_count + 1;
+ end
+
+ if (QENTRIES > 8) begin
+ memissue[ heads[8] ] = ~iqentry_stomp[heads[8]] && iqentry_memready[ heads[8] ] // addr and data are valid
+ // ... and no preceding instruction is ready to go
+ && issue_count < `NUM_MEM
+ //&& ~iqentry_memready[heads[0]]
+ //&& ~iqentry_memready[heads[1]]
+ //&& ~iqentry_memready[heads[2]]
+ //&& ~iqentry_memready[heads[3]]
+ //&& ~iqentry_memready[heads[4]]
+ //&& ~iqentry_memready[heads[5]]
+ //&& ~iqentry_memready[heads[6]]
+ // ... and there is no address-overlap with any preceding instruction
+ && (!iqentry_mem[heads[0]] || (iqentry_agen[heads[0]] & iqentry_out[heads[0]]) || iqentry_done[heads[0]]
+ || ((iqentry_ma[heads[8]][AMSB:3] != iqentry_ma[heads[0]][AMSB:3] || iqentry_out[heads[0]] || iqentry_done[heads[0]])))
+ && (!iqentry_mem[heads[1]] || (iqentry_agen[heads[1]] & iqentry_out[heads[1]]) || iqentry_done[heads[1]]
+ || ((iqentry_ma[heads[8]][AMSB:3] != iqentry_ma[heads[1]][AMSB:3] || iqentry_out[heads[1]] || iqentry_done[heads[1]])))
+ && (!iqentry_mem[heads[2]] || (iqentry_agen[heads[2]] & iqentry_out[heads[2]]) || iqentry_done[heads[2]]
+ || ((iqentry_ma[heads[8]][AMSB:3] != iqentry_ma[heads[2]][AMSB:3] || iqentry_out[heads[2]] || iqentry_done[heads[2]])))
+ && (!iqentry_mem[heads[3]] || (iqentry_agen[heads[3]] & iqentry_out[heads[3]]) || iqentry_done[heads[3]]
+ || ((iqentry_ma[heads[8]][AMSB:3] != iqentry_ma[heads[3]][AMSB:3] || iqentry_out[heads[3]] || iqentry_done[heads[3]])))
+ && (!iqentry_mem[heads[4]] || (iqentry_agen[heads[4]] & iqentry_out[heads[4]]) || iqentry_done[heads[4]]
+ || ((iqentry_ma[heads[8]][AMSB:3] != iqentry_ma[heads[4]][AMSB:3] || iqentry_out[heads[4]] || iqentry_done[heads[4]])))
+ && (!iqentry_mem[heads[5]] || (iqentry_agen[heads[5]] & iqentry_out[heads[5]]) || iqentry_done[heads[5]]
+ || ((iqentry_ma[heads[8]][AMSB:3] != iqentry_ma[heads[5]][AMSB:3] || iqentry_out[heads[5]] || iqentry_done[heads[5]])))
+ && (!iqentry_mem[heads[6]] || (iqentry_agen[heads[6]] & iqentry_out[heads[6]]) || iqentry_done[heads[6]]
+ || ((iqentry_ma[heads[8]][AMSB:3] != iqentry_ma[heads[6]][AMSB:3] || iqentry_out[heads[6]] || iqentry_done[heads[6]])))
+ && (!iqentry_mem[heads[7]] || (iqentry_agen[heads[7]] & iqentry_out[heads[7]]) || iqentry_done[heads[7]]
+ || ((iqentry_ma[heads[8]][AMSB:3] != iqentry_ma[heads[7]][AMSB:3] || iqentry_out[heads[7]] || iqentry_done[heads[7]])))
+ && (iqentry_rl[heads[8]] ? (iqentry_done[heads[0]] || !iqentry_v[heads[0]] || !iqentry_mem[heads[0]])
+ && (iqentry_done[heads[1]] || !iqentry_v[heads[1]] || !iqentry_mem[heads[1]])
+ && (iqentry_done[heads[2]] || !iqentry_v[heads[2]] || !iqentry_mem[heads[2]])
+ && (iqentry_done[heads[3]] || !iqentry_v[heads[3]] || !iqentry_mem[heads[3]])
+ && (iqentry_done[heads[4]] || !iqentry_v[heads[4]] || !iqentry_mem[heads[4]])
+ && (iqentry_done[heads[5]] || !iqentry_v[heads[5]] || !iqentry_mem[heads[5]])
+ && (iqentry_done[heads[6]] || !iqentry_v[heads[6]] || !iqentry_mem[heads[6]])
+ && (iqentry_done[heads[7]] || !iqentry_v[heads[7]] || !iqentry_mem[heads[7]])
+ : 1'b1)
+ // ... if a preivous op has the aquire bit set
+ && !(iqentry_aq[heads[0]] && iqentry_v[heads[0]])
+ && !(iqentry_aq[heads[1]] && iqentry_v[heads[1]])
+ && !(iqentry_aq[heads[2]] && iqentry_v[heads[2]])
+ && !(iqentry_aq[heads[3]] && iqentry_v[heads[3]])
+ && !(iqentry_aq[heads[4]] && iqentry_v[heads[4]])
+ && !(iqentry_aq[heads[5]] && iqentry_v[heads[5]])
+ && !(iqentry_aq[heads[6]] && iqentry_v[heads[6]])
+ && !(iqentry_aq[heads[7]] && iqentry_v[heads[7]])
+ // ... and there's nothing in the write buffer during a load
+ && !(iqentry_load[heads[8]] && (wb_v!=1'b0
+ || iqentry_store[heads[0]] || iqentry_store[heads[1]] || iqentry_store[heads[2]] || iqentry_store[heads[3]]
+ || iqentry_store[heads[4]] || iqentry_store[heads[5]] || iqentry_store[heads[6]] || iqentry_store[heads[7]]))
+ // ... and there isn't a barrier, or everything before the barrier is done or invalid
+ && (!(iqentry_iv[heads[1]] && iqentry_memsb[heads[1]]) || (iqentry_done[heads[0]] || !iqentry_v[heads[0]]))
+ && (!(iqentry_iv[heads[2]] && iqentry_memsb[heads[2]]) ||
+ ((iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (iqentry_done[heads[1]] || !iqentry_v[heads[1]]))
+ )
+ && (!(iqentry_iv[heads[3]] && iqentry_memsb[heads[3]]) ||
+ ((iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (iqentry_done[heads[1]] || !iqentry_v[heads[1]])
+ && (iqentry_done[heads[2]] || !iqentry_v[heads[2]]))
+ )
+ && (!(iqentry_iv[heads[4]] && iqentry_memsb[heads[4]]) ||
+ ((iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (iqentry_done[heads[1]] || !iqentry_v[heads[1]])
+ && (iqentry_done[heads[2]] || !iqentry_v[heads[2]])
+ && (iqentry_done[heads[3]] || !iqentry_v[heads[3]]))
+ )
+ && (!(iqentry_iv[heads[5]] && iqentry_memsb[heads[5]]) ||
+ ((iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (iqentry_done[heads[1]] || !iqentry_v[heads[1]])
+ && (iqentry_done[heads[2]] || !iqentry_v[heads[2]])
+ && (iqentry_done[heads[3]] || !iqentry_v[heads[3]])
+ && (iqentry_done[heads[4]] || !iqentry_v[heads[4]]))
+ )
+ && (!(iqentry_iv[heads[6]] && iqentry_memsb[heads[6]]) ||
+ ((iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (iqentry_done[heads[1]] || !iqentry_v[heads[1]])
+ && (iqentry_done[heads[2]] || !iqentry_v[heads[2]])
+ && (iqentry_done[heads[3]] || !iqentry_v[heads[3]])
+ && (iqentry_done[heads[4]] || !iqentry_v[heads[4]])
+ && (iqentry_done[heads[5]] || !iqentry_v[heads[5]]))
+ )
+ && (!(iqentry_iv[heads[7]] && iqentry_memsb[heads[7]]) ||
+ ((iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (iqentry_done[heads[1]] || !iqentry_v[heads[1]])
+ && (iqentry_done[heads[2]] || !iqentry_v[heads[2]])
+ && (iqentry_done[heads[3]] || !iqentry_v[heads[3]])
+ && (iqentry_done[heads[4]] || !iqentry_v[heads[4]])
+ && (iqentry_done[heads[5]] || !iqentry_v[heads[5]])
+ && (iqentry_done[heads[6]] || !iqentry_v[heads[6]])
+ )
+ )
+ && (!(iqentry_iv[heads[1]] && iqentry_memdb[heads[1]]) || (!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]))
+ && (!(iqentry_iv[heads[2]] && iqentry_memdb[heads[2]]) ||
+ ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]]))
+ )
+ && (!(iqentry_iv[heads[3]] && iqentry_memdb[heads[3]]) ||
+ ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]])
+ && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]]))
+ )
+ && (!(iqentry_iv[heads[4]] && iqentry_memdb[heads[4]]) ||
+ ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]])
+ && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]])
+ && (!iqentry_mem[heads[3]] || iqentry_done[heads[3]] || !iqentry_v[heads[3]]))
+ )
+ && (!(iqentry_iv[heads[5]] && iqentry_memdb[heads[5]]) ||
+ ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]])
+ && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]])
+ && (!iqentry_mem[heads[3]] || iqentry_done[heads[3]] || !iqentry_v[heads[3]])
+ && (!iqentry_mem[heads[4]] || iqentry_done[heads[4]] || !iqentry_v[heads[4]]))
+ )
+ && (!(iqentry_iv[heads[6]] && iqentry_memdb[heads[6]]) ||
+ ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]])
+ && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]])
+ && (!iqentry_mem[heads[3]] || iqentry_done[heads[3]] || !iqentry_v[heads[3]])
+ && (!iqentry_mem[heads[4]] || iqentry_done[heads[4]] || !iqentry_v[heads[4]])
+ && (!iqentry_mem[heads[5]] || iqentry_done[heads[5]] || !iqentry_v[heads[5]]))
+ )
+ && (!(iqentry_iv[heads[7]] && iqentry_memdb[heads[7]]) ||
+ ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]])
+ && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]])
+ && (!iqentry_mem[heads[3]] || iqentry_done[heads[3]] || !iqentry_v[heads[3]])
+ && (!iqentry_mem[heads[4]] || iqentry_done[heads[4]] || !iqentry_v[heads[4]])
+ && (!iqentry_mem[heads[5]] || iqentry_done[heads[5]] || !iqentry_v[heads[5]])
+ && (!iqentry_mem[heads[6]] || iqentry_done[heads[6]] || !iqentry_v[heads[6]])
+ )
+ )
+ // ... and, if it is a SW, there is no chance of it being undone
+ && ((iqentry_load[heads[8]] && sple) ||
+ !(iqentry_fc[heads[0]]||iqentry_canex[heads[0]])
+ && !(iqentry_fc[heads[1]]||iqentry_canex[heads[1]])
+ && !(iqentry_fc[heads[2]]||iqentry_canex[heads[2]])
+ && !(iqentry_fc[heads[3]]||iqentry_canex[heads[3]])
+ && !(iqentry_fc[heads[4]]||iqentry_canex[heads[4]])
+ && !(iqentry_fc[heads[5]]||iqentry_canex[heads[5]])
+ && !(iqentry_fc[heads[6]]||iqentry_canex[heads[6]])
+ && !(iqentry_fc[heads[7]]||iqentry_canex[heads[7]])
+ );
+ if (memissue[heads[8]])
+ issue_count = issue_count + 1;
+ end
+
+ if (QENTRIES > 9) begin
+ memissue[ heads[9] ] = ~iqentry_stomp[heads[9]] && iqentry_memready[ heads[9] ] // addr and data are valid
+ // ... and no preceding instruction is ready to go
+ && issue_count < `NUM_MEM
+ //&& ~iqentry_memready[heads[0]]
+ //&& ~iqentry_memready[heads[1]]
+ //&& ~iqentry_memready[heads[2]]
+ //&& ~iqentry_memready[heads[3]]
+ //&& ~iqentry_memready[heads[4]]
+ //&& ~iqentry_memready[heads[5]]
+ //&& ~iqentry_memready[heads[6]]
+ // ... and there is no address-overlap with any preceding instruction
+ && (!iqentry_mem[heads[0]] || (iqentry_agen[heads[0]] & iqentry_out[heads[0]]) || iqentry_done[heads[0]]
+ || ((iqentry_ma[heads[9]][AMSB:3] != iqentry_ma[heads[0]][AMSB:3] || iqentry_out[heads[0]] || iqentry_done[heads[0]])))
+ && (!iqentry_mem[heads[1]] || (iqentry_agen[heads[1]] & iqentry_out[heads[1]]) || iqentry_done[heads[1]]
+ || ((iqentry_ma[heads[9]][AMSB:3] != iqentry_ma[heads[1]][AMSB:3] || iqentry_out[heads[1]] || iqentry_done[heads[1]])))
+ && (!iqentry_mem[heads[2]] || (iqentry_agen[heads[2]] & iqentry_out[heads[2]]) || iqentry_done[heads[2]]
+ || ((iqentry_ma[heads[9]][AMSB:3] != iqentry_ma[heads[2]][AMSB:3] || iqentry_out[heads[2]] || iqentry_done[heads[2]])))
+ && (!iqentry_mem[heads[3]] || (iqentry_agen[heads[3]] & iqentry_out[heads[3]]) || iqentry_done[heads[3]]
+ || ((iqentry_ma[heads[9]][AMSB:3] != iqentry_ma[heads[3]][AMSB:3] || iqentry_out[heads[3]] || iqentry_done[heads[3]])))
+ && (!iqentry_mem[heads[4]] || (iqentry_agen[heads[4]] & iqentry_out[heads[4]]) || iqentry_done[heads[4]]
+ || ((iqentry_ma[heads[9]][AMSB:3] != iqentry_ma[heads[4]][AMSB:3] || iqentry_out[heads[4]] || iqentry_done[heads[4]])))
+ && (!iqentry_mem[heads[5]] || (iqentry_agen[heads[5]] & iqentry_out[heads[5]]) || iqentry_done[heads[5]]
+ || ((iqentry_ma[heads[9]][AMSB:3] != iqentry_ma[heads[5]][AMSB:3] || iqentry_out[heads[5]] || iqentry_done[heads[5]])))
+ && (!iqentry_mem[heads[6]] || (iqentry_agen[heads[6]] & iqentry_out[heads[6]]) || iqentry_done[heads[6]]
+ || ((iqentry_ma[heads[9]][AMSB:3] != iqentry_ma[heads[6]][AMSB:3] || iqentry_out[heads[6]] || iqentry_done[heads[6]])))
+ && (!iqentry_mem[heads[7]] || (iqentry_agen[heads[7]] & iqentry_out[heads[7]]) || iqentry_done[heads[7]]
+ || ((iqentry_ma[heads[9]][AMSB:3] != iqentry_ma[heads[7]][AMSB:3] || iqentry_out[heads[7]] || iqentry_done[heads[7]])))
+ && (!iqentry_mem[heads[8]] || (iqentry_agen[heads[8]] & iqentry_out[heads[8]]) || iqentry_done[heads[8]]
+ || ((iqentry_ma[heads[9]][AMSB:3] != iqentry_ma[heads[8]][AMSB:3] || iqentry_out[heads[8]] || iqentry_done[heads[8]])))
+ && (iqentry_rl[heads[9]] ? (iqentry_done[heads[0]] || !iqentry_v[heads[0]] || !iqentry_mem[heads[0]])
+ && (iqentry_done[heads[1]] || !iqentry_v[heads[1]] || !iqentry_mem[heads[1]])
+ && (iqentry_done[heads[2]] || !iqentry_v[heads[2]] || !iqentry_mem[heads[2]])
+ && (iqentry_done[heads[3]] || !iqentry_v[heads[3]] || !iqentry_mem[heads[3]])
+ && (iqentry_done[heads[4]] || !iqentry_v[heads[4]] || !iqentry_mem[heads[4]])
+ && (iqentry_done[heads[5]] || !iqentry_v[heads[5]] || !iqentry_mem[heads[5]])
+ && (iqentry_done[heads[6]] || !iqentry_v[heads[6]] || !iqentry_mem[heads[6]])
+ && (iqentry_done[heads[7]] || !iqentry_v[heads[7]] || !iqentry_mem[heads[7]])
+ && (iqentry_done[heads[8]] || !iqentry_v[heads[8]] || !iqentry_mem[heads[8]])
+ : 1'b1)
+ // ... if a preivous op has the aquire bit set
+ && !(iqentry_aq[heads[0]] && iqentry_v[heads[0]])
+ && !(iqentry_aq[heads[1]] && iqentry_v[heads[1]])
+ && !(iqentry_aq[heads[2]] && iqentry_v[heads[2]])
+ && !(iqentry_aq[heads[3]] && iqentry_v[heads[3]])
+ && !(iqentry_aq[heads[4]] && iqentry_v[heads[4]])
+ && !(iqentry_aq[heads[5]] && iqentry_v[heads[5]])
+ && !(iqentry_aq[heads[6]] && iqentry_v[heads[6]])
+ && !(iqentry_aq[heads[7]] && iqentry_v[heads[7]])
+ && !(iqentry_aq[heads[8]] && iqentry_v[heads[8]])
+ // ... and there's nothing in the write buffer during a load
+ && !(iqentry_load[heads[9]] && (wb_v!=1'b0
+ || iqentry_store[heads[0]] || iqentry_store[heads[1]] || iqentry_store[heads[2]] || iqentry_store[heads[3]]
+ || iqentry_store[heads[4]] || iqentry_store[heads[5]] || iqentry_store[heads[6]] || iqentry_store[heads[7]]
+ || iqentry_store[heads[8]]))
+ // ... and there isn't a barrier, or everything before the barrier is done or invalid
+ && (!(iqentry_iv[heads[1]] && iqentry_memsb[heads[1]]) || (iqentry_done[heads[0]] || !iqentry_v[heads[0]]))
+ && (!(iqentry_iv[heads[2]] && iqentry_memsb[heads[2]]) ||
+ ((iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (iqentry_done[heads[1]] || !iqentry_v[heads[1]]))
+ )
+ && (!(iqentry_iv[heads[3]] && iqentry_memsb[heads[3]]) ||
+ ((iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (iqentry_done[heads[1]] || !iqentry_v[heads[1]])
+ && (iqentry_done[heads[2]] || !iqentry_v[heads[2]]))
+ )
+ && (!(iqentry_iv[heads[4]] && iqentry_memsb[heads[4]]) ||
+ ((iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (iqentry_done[heads[1]] || !iqentry_v[heads[1]])
+ && (iqentry_done[heads[2]] || !iqentry_v[heads[2]])
+ && (iqentry_done[heads[3]] || !iqentry_v[heads[3]]))
+ )
+ && (!(iqentry_iv[heads[5]] && iqentry_memsb[heads[5]]) ||
+ ((iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (iqentry_done[heads[1]] || !iqentry_v[heads[1]])
+ && (iqentry_done[heads[2]] || !iqentry_v[heads[2]])
+ && (iqentry_done[heads[3]] || !iqentry_v[heads[3]])
+ && (iqentry_done[heads[4]] || !iqentry_v[heads[4]]))
+ )
+ && (!(iqentry_iv[heads[6]] && iqentry_memsb[heads[6]]) ||
+ ((iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (iqentry_done[heads[1]] || !iqentry_v[heads[1]])
+ && (iqentry_done[heads[2]] || !iqentry_v[heads[2]])
+ && (iqentry_done[heads[3]] || !iqentry_v[heads[3]])
+ && (iqentry_done[heads[4]] || !iqentry_v[heads[4]])
+ && (iqentry_done[heads[5]] || !iqentry_v[heads[5]]))
+ )
+ && (!(iqentry_iv[heads[7]] && iqentry_memsb[heads[7]]) ||
+ ((iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (iqentry_done[heads[1]] || !iqentry_v[heads[1]])
+ && (iqentry_done[heads[2]] || !iqentry_v[heads[2]])
+ && (iqentry_done[heads[3]] || !iqentry_v[heads[3]])
+ && (iqentry_done[heads[4]] || !iqentry_v[heads[4]])
+ && (iqentry_done[heads[5]] || !iqentry_v[heads[5]])
+ && (iqentry_done[heads[6]] || !iqentry_v[heads[6]]))
+ )
+ && (!(iqentry_iv[heads[8]] && iqentry_memsb[heads[8]]) ||
+ ((iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (iqentry_done[heads[1]] || !iqentry_v[heads[1]])
+ && (iqentry_done[heads[2]] || !iqentry_v[heads[2]])
+ && (iqentry_done[heads[3]] || !iqentry_v[heads[3]])
+ && (iqentry_done[heads[4]] || !iqentry_v[heads[4]])
+ && (iqentry_done[heads[5]] || !iqentry_v[heads[5]])
+ && (iqentry_done[heads[6]] || !iqentry_v[heads[6]])
+ && (iqentry_done[heads[7]] || !iqentry_v[heads[7]])
+ )
+ )
+ && (!(iqentry_iv[heads[1]] && iqentry_memdb[heads[1]]) || (!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]))
+ && (!(iqentry_iv[heads[2]] && iqentry_memdb[heads[2]]) ||
+ ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]]))
+ )
+ && (!(iqentry_iv[heads[3]] && iqentry_memdb[heads[3]]) ||
+ ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]])
+ && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]]))
+ )
+ && (!(iqentry_iv[heads[4]] && iqentry_memdb[heads[4]]) ||
+ ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]])
+ && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]])
+ && (!iqentry_mem[heads[3]] || iqentry_done[heads[3]] || !iqentry_v[heads[3]]))
+ )
+ && (!(iqentry_iv[heads[5]] && iqentry_memdb[heads[5]]) ||
+ ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]])
+ && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]])
+ && (!iqentry_mem[heads[3]] || iqentry_done[heads[3]] || !iqentry_v[heads[3]])
+ && (!iqentry_mem[heads[4]] || iqentry_done[heads[4]] || !iqentry_v[heads[4]]))
+ )
+ && (!(iqentry_iv[heads[6]] && iqentry_memdb[heads[6]]) ||
+ ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]])
+ && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]])
+ && (!iqentry_mem[heads[3]] || iqentry_done[heads[3]] || !iqentry_v[heads[3]])
+ && (!iqentry_mem[heads[4]] || iqentry_done[heads[4]] || !iqentry_v[heads[4]])
+ && (!iqentry_mem[heads[5]] || iqentry_done[heads[5]] || !iqentry_v[heads[5]]))
+ )
+ && (!(iqentry_iv[heads[7]] && iqentry_memdb[heads[7]]) ||
+ ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]])
+ && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]])
+ && (!iqentry_mem[heads[3]] || iqentry_done[heads[3]] || !iqentry_v[heads[3]])
+ && (!iqentry_mem[heads[4]] || iqentry_done[heads[4]] || !iqentry_v[heads[4]])
+ && (!iqentry_mem[heads[5]] || iqentry_done[heads[5]] || !iqentry_v[heads[5]])
+ && (!iqentry_mem[heads[6]] || iqentry_done[heads[6]] || !iqentry_v[heads[6]]))
+ )
+ && (!(iqentry_iv[heads[8]] && iqentry_memdb[heads[8]]) ||
+ ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]])
+ && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]])
+ && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]])
+ && (!iqentry_mem[heads[3]] || iqentry_done[heads[3]] || !iqentry_v[heads[3]])
+ && (!iqentry_mem[heads[4]] || iqentry_done[heads[4]] || !iqentry_v[heads[4]])
+ && (!iqentry_mem[heads[5]] || iqentry_done[heads[5]] || !iqentry_v[heads[5]])
+ && (!iqentry_mem[heads[6]] || iqentry_done[heads[6]] || !iqentry_v[heads[6]])
+ && (!iqentry_mem[heads[7]] || iqentry_done[heads[7]] || !iqentry_v[heads[7]])
+ )
+ )
+ // ... and, if it is a store, there is no chance of it being undone
+ && ((iqentry_load[heads[9]] && sple) ||
+ !(iqentry_fc[heads[0]]||iqentry_canex[heads[0]])
+ && !(iqentry_fc[heads[1]]||iqentry_canex[heads[1]])
+ && !(iqentry_fc[heads[2]]||iqentry_canex[heads[2]])
+ && !(iqentry_fc[heads[3]]||iqentry_canex[heads[3]])
+ && !(iqentry_fc[heads[4]]||iqentry_canex[heads[4]])
+ && !(iqentry_fc[heads[5]]||iqentry_canex[heads[5]])
+ && !(iqentry_fc[heads[6]]||iqentry_canex[heads[6]])
+ && !(iqentry_fc[heads[7]]||iqentry_canex[heads[7]])
+ && !(iqentry_fc[heads[8]]||iqentry_canex[heads[8]])
+ );
+ if (memissue[heads[9]])
+ issue_count = issue_count + 1;
+ end
+end
+end
+endgenerate
+`endif
+
+// Starts search for instructions to issue at the head of the queue and
+// progresses from there. This ensures that the oldest instructions are
+// selected first for processing.
+always @*
+begin
+ last_issue0 = QENTRIES;
+ last_issue1 = QENTRIES;
+ last_issue2 = QENTRIES;
+ for (n = 0; n < QENTRIES; n = n + 1)
+ if (~iqentry_stomp[heads[n]] && iqentry_memissue[heads[n]]) begin
+ if (mem1_available && dram0 == `DRAMSLOT_AVAIL) begin
+ last_issue0 = heads[n];
+ end
+ end
+ for (n = 0; n < QENTRIES; n = n + 1)
+ if (~iqentry_stomp[heads[n]] && iqentry_memissue[heads[n]]) begin
+ if (mem2_available && heads[n] != last_issue0 && `NUM_MEM > 1) begin
+ if (dram1 == `DRAMSLOT_AVAIL) begin
+ last_issue1 = heads[n];
+ end
+ end
+ end
+ for (n = 0; n < QENTRIES; n = n + 1)
+ if (~iqentry_stomp[heads[n]] && iqentry_memissue[heads[n]]) begin
+ if (mem3_available && heads[n] != last_issue0 && heads[n] != last_issue1 && `NUM_MEM > 2) begin
+ if (dram2 == `DRAMSLOT_AVAIL) begin
+ last_issue2 = heads[n];
+ end
+ end
+ end
+end
+
+reg [2:0] wbptr;
+// Stomp logic for branch miss.
+/*
+FT64_stomp #(QENTRIES) ustmp1
+(
+ .branchmiss(branchmiss),
+ .branchmiss_thrd(branchmiss_thrd),
+ .missid(missid),
+ .head0(heads[0]),
+ .thrd(iqentry_thrd),
+ .iqentry_v(iqentry_v),
+ .stomp(iqentry_stomp)
+);
+*/
+always @*
+begin
+ iqentry_stomp <= 1'b0;
+ if (branchmiss) begin
+ for (n = 0; n < QENTRIES; n = n + 1) begin
+ if (iqentry_v[n] && iqentry_thrd[n]==branchmiss_thrd) begin
+ if (iqentry_sn[n] > iqentry_sn[missid[`QBITS]])
+ iqentry_stomp[n] <= `TRUE;
+ end
+ end
+ end
+ /*
+ if (fcu_branchmiss) begin
+ for (n = 0; n < QENTRIES; n = n + 1) begin
+ if (iqentry_v[n] && iqentry_thrd[n]==fcu_thrd) begin
+ if (iqentry_sn[n] > iqentry_sn[fcu_id[`QBITS]])
+ iqentry_stomp[n] <= `TRUE;
+ end
+ end
+ end
+ */
+end
+
+always @*
+begin
+ stompedOnRets = 1'b0;
+ for (n = 0; n < QENTRIES; n = n + 1)
+ if (iqentry_stomp[n] && iqentry_ret[n])
+ stompedOnRets = stompedOnRets + 4'd1;
+end
+
+reg id1_vi, id2_vi, id3_vi;
+wire [4:0] id1_ido, id2_ido, id3_ido;
+wire id1_vo, id2_vo, id3_vo;
+wire id1_clk, id2_clk, id3_clk;
+
+// Always at least one decoder
+assign id1_clk = clk_i;
+//BUFGCE uclkb2
+//(
+// .I(clk_i),
+// .CE(id1_available),
+// .O(id1_clk)
+//);
+
+FT64_idecoder uid1
+(
+ .clk(id1_clk),
+ .idv_i(id1_vi),
+ .id_i(id1_id),
+`ifdef INLINE_DECODE
+ .instr(fetchbuf0_instr),
+ .Rt(Rt0[4:0]),
+ .predict_taken(predict_taken0),
+ .thrd(fetchbuf0_thrd),
+ .vl(vl),
+`else
+ .instr(id1_instr),
+ .Rt(id1_Rt),
+ .predict_taken(id1_pt),
+ .thrd(id1_thrd),
+ .vl(id1_vl),
+`endif
+//ToDo: fix for vectors length and element number
+ .ven(id1_ven),
+ .bus(id1_bus),
+ .id_o(id1_ido),
+ .idv_o(id1_vo),
+ .debug_on(debug_on),
+ .pred_on(pred_on)
+);
+/*
+`ifdef INLINE_DECODE
+ id1_Rt <= Rt0[4:0];
+ id1_vl <= vl;
+ id1_ven <= venno;
+ id1_id <= tail;
+ id1_pt <= predict_taken0;
+ id1_thrd <= fetchbuf0_thrd;
+ setinsn1(tail,id1_bus);
+`endif
+*/
+generate begin : gIDUInst
+if (`NUM_IDU > 1) begin
+//BUFGCE uclkb3
+//(
+// .I(clk_i),
+// .CE(id2_available),
+// .O(id2_clk)
+//);
+assign id2_clk = clk_i;
+
+FT64_idecoder uid2
+(
+ .clk(id2_clk),
+ .idv_i(id2_vi),
+ .id_i(id2_id),
+`ifdef INLINE_DECODE
+ .instr(fetchbuf1_instr),
+ .Rt(Rt1[4:0]),
+ .predict_taken(predict_taken1),
+ .thrd(fetchbuf1_thrd),
+ .vl(vl),
+`else
+ .instr(id2_instr),
+ .Rt(id2_Rt),
+ .predict_taken(id2_pt),
+ .thrd(id2_thrd),
+ .vl(id2_vl),
+`endif
+ .ven(id2_ven),
+ .bus(id2_bus),
+ .id_o(id2_ido),
+ .idv_o(id2_vo),
+ .debug_on(debug_on),
+ .pred_on(pred_on)
+);
+end
+if (`NUM_IDU > 2) begin
+//BUFGCE uclkb4
+//(
+// .I(clk_i),
+// .CE(id3_available),
+// .O(id3_clk)
+//);
+assign id3_clk = clk_i;
+
+FT64_idecoder uid2
+(
+ .clk(id3_clk),
+ .idv_i(id3_vi),
+ .id_i(id3_id),
+`ifdef INLINE_DECODE
+ .instr(fetchbuf2_instr),
+ .Rt(Rt2[4:0]),
+ .predict_taken(predict_taken2),
+ .thrd(fetchbuf2_thrd),
+ .vl(vl),
+`else
+ .instr(id3_instr),
+ .Rt(id3_Rt),
+ .predict_taken(id3_pt),
+ .thrd(id3_thrd),
+ .vl(id3_vl),
+`endif
+ .ven(id3_ven),
+ .bus(id3_bus),
+ .id_o(id3_ido),
+ .idv_o(id3_vo),
+ .debug_on(debug_on),
+ .pred_on(pred_on)
+);
+end
+end
+endgenerate
+
+//
+// EXECUTE
+//
+wire [15:0] lfsro;
+lfsr #(16,16'hACE4) u1 (rst, clk, 1'b1, 1'b0, lfsro);
+
+reg [63:0] csr_r;
+wire [11:0] csrno = alu0_instr[29:18];
+always @*
+begin
+`ifdef SUPPORT_SMT
+ if (csrno[11:10] >= ol[alu0_thrd])
+`else
+ if (csrno[11:10] >= ol)
+`endif
+ casez(csrno[9:0])
+ `CSR_CR0: csr_r <= cr0;
+ `CSR_HARTID: csr_r <= hartid;
+ `CSR_TICK: csr_r <= tick;
+ `CSR_PCR: csr_r <= pcr;
+ `CSR_PCR2: csr_r <= pcr2;
+ `CSR_PMR: csr_r <= pmr;
+ `CSR_WBRCD: csr_r <= wbrcd;
+ `CSR_SEMA: csr_r <= sema;
+ `CSR_KEYS: csr_r <= keys;
+ `CSR_TCB: csr_r <= tcb;
+ `CSR_FSTAT: csr_r <= {fp_rgs,fp_status};
+`ifdef SUPPORT_DBG
+ `CSR_DBAD0: csr_r <= dbg_adr0;
+ `CSR_DBAD1: csr_r <= dbg_adr1;
+ `CSR_DBAD2: csr_r <= dbg_adr2;
+ `CSR_DBAD3: csr_r <= dbg_adr3;
+ `CSR_DBCTRL: csr_r <= dbg_ctrl;
+ `CSR_DBSTAT: csr_r <= dbg_stat;
+`endif
+ `CSR_CAS: csr_r <= cas;
+ `CSR_TVEC: csr_r <= tvec[csrno[2:0]];
+ `CSR_BADADR: csr_r <= badaddr[{alu0_thrd,csrno[11:10]}];
+ `CSR_BADINSTR: csr_r <= bad_instr[{alu0_thrd,csrno[11:10]}];
+ `CSR_CAUSE: csr_r <= {48'd0,cause[{alu0_thrd,csrno[11:10]}]};
+`ifdef SUPPORT_SMT
+ `CSR_IM_STACK: csr_r <= im_stack[alu0_thrd];
+ `CSR_OL_STACK: csr_r <= {dl_stack[alu0_thrd],ol_stack[alu0_thrd]};
+ `CSR_PL_STACK: csr_r <= pl_stack[alu0_thrd];
+ `CSR_RS_STACK: csr_r <= rs_stack[alu0_thrd];
+ `CSR_STATUS: csr_r <= mstatus[alu0_thrd][63:0];
+ `CSR_BRS_STACK: csr_r <= brs_stack[alu0_thrd];
+ `CSR_EPC0: csr_r <= epc0[alu0_thrd];
+ `CSR_EPC1: csr_r <= epc1[alu0_thrd];
+ `CSR_EPC2: csr_r <= epc2[alu0_thrd];
+ `CSR_EPC3: csr_r <= epc3[alu0_thrd];
+ `CSR_EPC4: csr_r <= epc4[alu0_thrd];
+ `CSR_EPC5: csr_r <= epc5[alu0_thrd];
+ `CSR_EPC6: csr_r <= epc6[alu0_thrd];
+ `CSR_EPC7: csr_r <= epc7[alu0_thrd];
+`else
+ `CSR_IM_STACK: csr_r <= im_stack;
+ `CSR_OL_STACK: csr_r <= {dl_stack,ol_stack};
+ `CSR_PL_STACK: csr_r <= pl_stack;
+ `CSR_RS_STACK: csr_r <= rs_stack;
+ `CSR_STATUS: csr_r <= mstatus[63:0];
+ `CSR_BRS_STACK: csr_r <= brs_stack;
+ `CSR_EPC0: csr_r <= epc0;
+ `CSR_EPC1: csr_r <= epc1;
+ `CSR_EPC2: csr_r <= epc2;
+ `CSR_EPC3: csr_r <= epc3;
+ `CSR_EPC4: csr_r <= epc4;
+ `CSR_EPC5: csr_r <= epc5;
+ `CSR_EPC6: csr_r <= epc6;
+ `CSR_EPC7: csr_r <= epc7;
+`endif
+ `CSR_CODEBUF: csr_r <= codebuf[csrno[5:0]];
+`ifdef SUPPORT_BBMS
+ `CSR_TB: csr_r <= tb;
+ `CSR_CBL: csr_r <= cbl;
+ `CSR_CBU: csr_r <= cbu;
+ `CSR_RO: csr_r <= ro;
+ `CSR_DBL: csr_r <= dbl;
+ `CSR_DBU: csr_r <= dbu;
+ `CSR_SBL: csr_r <= sbl;
+ `CSR_SBU: csr_r <= sbu;
+ `CSR_ENU: csr_r <= en;
+`endif
+`ifdef SUPPORT_PREDICATION
+ `CSR_PREGS: read_pregs(csr_r);
+`endif
+ `CSR_Q_CTR: csr_r <= iq_ctr;
+ `CSR_BM_CTR: csr_r <= bm_ctr;
+ `CSR_ICL_CTR: csr_r <= icl_ctr;
+ `CSR_IRQ_CTR: csr_r <= irq_ctr;
+ `CSR_TIME: csr_r <= wc_times;
+ `CSR_INFO:
+ case(csrno[3:0])
+ 4'd0: csr_r <= "Finitron"; // manufacturer
+ 4'd1: csr_r <= " ";
+ 4'd2: csr_r <= "64 bit "; // CPU class
+ 4'd3: csr_r <= " ";
+ 4'd4: csr_r <= "FT64 "; // Name
+ 4'd5: csr_r <= " ";
+ 4'd6: csr_r <= 64'd1; // model #
+ 4'd7: csr_r <= 64'd1; // serial number
+ 4'd8: csr_r <= {32'd16384,32'd16384}; // cache sizes instruction,csr_ra
+ 4'd9: csr_r <= 64'd0;
+ default: csr_r <= 64'd0;
+ endcase
+ default: begin
+ $display("Unsupported CSR:%h",csrno[10:0]);
+ csr_r <= 64'hEEEEEEEEEEEEEEEE;
+ end
+ endcase
+ else
+ csr_r <= 64'h0;
+end
+
+reg [63:0] alu0_xu = 1'd0, alu1_xu = 1'd0;
+
+`ifdef SUPPORT_BBMS
+
+`else
+// This always block didn't work, it left the signals as X's.
+// So they are set to zero where the reg declaration is.
+// I'm guessing the @* says there's no variables on the right
+// hand side, so I'm not going to evaluate it.
+always @*
+ alu0_xs <= 64'd0;
+always @*
+ alu1_xs <= 64'd0;
+`endif
+
+//always @*
+// read_csr(alu0_instr[29:18],csr_r,alu0_thrd);
+FT64_alu #(.BIG(1'b1),.SUP_VECTOR(SUP_VECTOR)) ualu0 (
+ .rst(rst),
+ .clk(clk),
+ .ld(alu0_ld),
+ .abort(alu0_abort),
+ .instr(alu0_instr),
+ .sz(alu0_sz),
+ .tlb(alu0_tlb),
+ .store(alu0_store),
+ .a(alu0_argA),
+ .b(alu0_argB),
+ .c(alu0_argC),
+ .pc(alu0_pc),
+// .imm(alu0_argI),
+ .tgt(alu0_tgt),
+ .ven(alu0_ven),
+ .vm(vm[alu0_instr[25:23]]),
+ .csr(csr_r),
+ .o(alu0_out),
+ .ob(alu0b_bus),
+ .done(alu0_done),
+ .idle(alu0_idle),
+ .excen(aec[4:0]),
+ .exc(alu0_exc),
+ .thrd(alu0_thrd),
+ .mem(alu0_mem),
+ .shift(alu0_shft), // 48 bit shift inst.
+ .ol(ol),
+ .ASID(ASID),
+ .icl_i(icl_o),
+ .cyc_i(cyc),
+ .we_i(we),
+ .vadr_i(vadr),
+ .cyc_o(cyc_o),
+ .we_o(we_o),
+ .padr_o(adr_o),
+ .uncached(),
+ .tlb_miss(tlb_miss),
+ .exv_o(exv_i),
+ .wrv_o(wrv_i),
+ .rdv_o(rdv_i)
+`ifdef SUPPORT_SEGMENTATION
+ ,
+ .zs_base(zsx_base),
+ .ds_base(dsx_base),
+ .es_base(esx_base),
+ .fs_base(fsx_base),
+ .gs_base(gsx_base),
+ .hs_base(hsx_base),
+ .ss_base(ssx_base),
+ .cs_base(csx_base),
+ .zsub(zsub),
+ .dsub(dsub),
+ .esub(esub),
+ .fsub(fsub),
+ .gsub(gsub),
+ .hsub(hsub),
+ .ssub(ssub),
+ .csub(csub),
+ .zslb(zslb),
+ .dslb(dslb),
+ .eslb(eslb),
+ .fslb(fslb),
+ .gslb(gslb),
+ .hslb(hslb),
+ .sslb(sslb),
+ .cslb(cslb)
+`endif
+`ifdef SUPPORT_BBMS
+ .pb(dl==2'b00 ? 64'd0 : pb),
+ .cbl(cbl),
+ .cbu(cbu),
+ .ro(ro),
+ .dbl(dbl),
+ .dbu(dbu),
+ .sbl(sbl),
+ .sbu(sbu),
+ .en(en)
+`endif
+);
+generate begin : gAluInst
+if (`NUM_ALU > 1) begin
+FT64_alu #(.BIG(1'b0),.SUP_VECTOR(SUP_VECTOR)) ualu1 (
+ .rst(rst),
+ .clk(clk),
+ .ld(alu1_ld),
+ .abort(alu1_abort),
+ .instr(alu1_instr),
+ .sz(alu1_sz),
+ .tlb(1'b0),
+ .store(alu1_store),
+ .a(alu1_argA),
+ .b(alu1_argB),
+ .c(alu1_argC),
+ .pc(alu1_pc),
+ //.imm(alu1_argI),
+ .tgt(alu1_tgt),
+ .ven(alu1_ven),
+ .vm(vm[alu1_instr[25:23]]),
+ .csr(64'd0),
+ .o(alu1_out),
+ .ob(alu1b_bus),
+ .done(alu1_done),
+ .idle(alu1_idle),
+ .excen(aec[4:0]),
+ .exc(alu1_exc),
+ .thrd(1'b0),
+ .mem(alu1_mem),
+ .shift(alu1_shft),
+ .ol(2'b0),
+ .ASID(8'h0),
+ .cyc_i(1'b0),
+ .we_i(1'b0),
+ .vadr_i(64'd0),
+ .cyc_o(),
+ .we_o(),
+ .padr_o(),
+ .uncached(),
+ .tlb_miss(),
+ .exv_o(),
+ .wrv_o(),
+ .rdv_o()
+`ifdef SUPPORT_SEGMENTATION
+ ,
+ .zs_base(zsx_base),
+ .ds_base(dsx_base),
+ .es_base(esx_base),
+ .fs_base(fsx_base),
+ .gs_base(gsx_base),
+ .hs_base(hsx_base),
+ .ss_base(ssx_base),
+ .cs_base(csx_base),
+ .zsub(zsub),
+ .dsub(dsub),
+ .esub(esub),
+ .fsub(fsub),
+ .gsub(gsub),
+ .hsub(hsub),
+ .ssub(ssub),
+ .csub(csub),
+ .zslb(zslb),
+ .dslb(dslb),
+ .eslb(eslb),
+ .fslb(fslb),
+ .gslb(gslb),
+ .hslb(hslb),
+ .sslb(sslb),
+ .cslb(cslb)
+`endif
+`ifdef SUPPORT_BBMS
+ .pb(dl==2'b00 ? 64'd0 : pb),
+ .cbl(cbl),
+ .cbu(cbu),
+ .ro(ro),
+ .dbl(dbl),
+ .dbu(dbu),
+ .sbl(sbl),
+ .sbu(sbu),
+ .en(en)
+`endif
+);
+end
+end
+endgenerate
+
+always @*
+begin
+ alu0_cmt <= 1'b1;
+ alu1_cmt <= 1'b1;
+ fpu1_cmt <= 1'b1;
+ fpu2_cmt <= 1'b1;
+ fcu_cmt <= 1'b1;
+
+ alu0_bus <= alu0_out;
+ alu1_bus <= alu1_out;
+ fpu1_bus <= fpu1_out;
+ fpu2_bus <= fpu2_out;
+ fcu_bus <= fcu_out;
+end
+
+assign alu0_abort = 1'b0;
+assign alu1_abort = 1'b0;
+
+generate begin : gFPUInst
+if (`NUM_FPU > 0) begin
+wire fpu1_clk;
+//BUFGCE ufpc1
+//(
+// .I(clk_i),
+// .CE(fpu1_available),
+// .O(fpu1_clk)
+//);
+assign fpu1_clk = clk_i;
+
+fpUnit ufp1
+(
+ .rst(rst),
+ .clk(fpu1_clk),
+ .clk4x(clk4x),
+ .ce(1'b1),
+ .ir(fpu1_instr),
+ .ld(fpu1_ld),
+ .a(fpu1_argA),
+ .b(fpu1_argB),
+ .imm(fpu1_argI),
+ .o(fpu1_out),
+ .csr_i(),
+ .status(fpu1_status),
+ .exception(),
+ .done(fpu1_done)
+);
+end
+if (`NUM_FPU > 1) begin
+wire fpu2_clk;
+//BUFGCE ufpc2
+//(
+// .I(clk_i),
+// .CE(fpu2_available),
+// .O(fpu2_clk)
+//);
+assign fpu2_clk = clk_i;
+fpUnit ufp1
+(
+ .rst(rst),
+ .clk(fpu2_clk),
+ .clk4x(clk4x),
+ .ce(1'b1),
+ .ir(fpu2_instr),
+ .ld(fpu2_ld),
+ .a(fpu2_argA),
+ .b(fpu2_argB),
+ .imm(fpu2_argI),
+ .o(fpu2_out),
+ .csr_i(),
+ .status(fpu2_status),
+ .exception(),
+ .done(fpu2_done)
+);
+end
+end
+endgenerate
+
+assign fpu1_exc = (fpu1_available) ?
+ ((|fpu1_status[15:0]) ? `FLT_FLT : `FLT_NONE) : `FLT_UNIMP;
+assign fpu2_exc = (fpu2_available) ?
+ ((|fpu2_status[15:0]) ? `FLT_FLT : `FLT_NONE) : `FLT_UNIMP;
+
+assign alu0_v = alu0_dataready,
+ alu1_v = alu1_dataready;
+assign alu0_id = alu0_sourceid,
+ alu1_id = alu1_sourceid;
+assign fpu1_v = fpu1_dataready;
+assign fpu1_id = fpu1_sourceid;
+assign fpu2_v = fpu2_dataready;
+assign fpu2_id = fpu2_sourceid;
+
+`ifdef SUPPORT_SMT
+wire [1:0] olm = ol[fcu_thrd];
+`else
+wire [1:0] olm = ol;
+`endif
+
+reg [`SNBITS] maxsn [0:`WAYS-1];
+always @*
+begin
+ for (j = 0; j < `WAYS; j = j + 1) begin
+ maxsn[j] = 8'd0;
+ for (n = 0; n < QENTRIES; n = n + 1)
+ if (iqentry_sn[n] > maxsn[j] && iqentry_thrd[n]==j && iqentry_v[n])
+ maxsn[j] = iqentry_sn[n];
+ maxsn[j] = maxsn[j] - tosub;
+ end
+end
+
+assign fcu_v = fcu_dataready;
+assign fcu_id = fcu_sourceid;
+
+wire [4:0] fcmpo;
+wire fnanx;
+fp_cmp_unit #(64) ufcmp1 (fcu_argA, fcu_argB, fcmpo, fnanx);
+
+wire fcu_takb;
+
+always @*
+begin
+ fcu_exc <= `FLT_NONE;
+ casez(fcu_instr[`INSTRUCTION_OP])
+`ifdef SUPPORT_SEGMENTATION
+ `LDCS: fcu_exc <= fcu_instr[31:8] != fcu_pc[63:40] ? `FLT_CS : `FLT_NONE;
+ `RET: fcu_exc <= fcu_argB[63:40] != fcu_pc[63:40] ? `FLT_RET : `FLT_NONE;
+`endif
+`ifdef SUPPORT_BBMS
+ `LFCS: fcu_exc <= currentCSSelector != fcu_instr[31:8] ? `FLT_CS : `FLT_NONE;
+ `RET: fcu_exc <= fcu_argB[63:40] != currentCSSelector ? `FLT_RET : `FLT_NONE;
+`endif
+ `CHK: begin
+ if (fcu_instr[21])
+ fcu_exc <= fcu_argA >= fcu_argB && fcu_argA < fcu_argC ? `FLT_NONE : `FLT_CHK;
+ end
+ `REX:
+ case(olm)
+ `OL_USER: fcu_exc <= `FLT_PRIV;
+ default: ;
+ endcase
+ default: fcu_exc <= `FLT_NONE;
+ endcase
+end
+
+FT64_EvalBranch ube1
+(
+ .instr(fcu_instr),
+ .a(fcu_argA),
+ .b(fcu_argB),
+ .c(fcu_argC),
+ .takb(fcu_takb)
+);
+
+FT64_FCU_Calc #(.AMSB(AMSB)) ufcuc1
+(
+ .ol(olm),
+ .instr(fcu_instr),
+ .tvec(tvec[fcu_instr[14:13]]),
+ .a(fcu_argA),
+ .pc(fcu_pc),
+ .nextpc(fcu_nextpc),
+ .im(im),
+ .waitctr(waitctr),
+ .bus(fcu_out)
+);
+
+wire will_clear_branchmiss = branchmiss && ((fetchbuf0_v && fetchbuf0_pc==misspc) || (fetchbuf1_v && fetchbuf1_pc==misspc));
+
+always @*
+begin
+case(fcu_instr[`INSTRUCTION_OP])
+`R2: fcu_misspc = fcu_argB; // RTI (we don't bother fully decoding this as it's the only R2)
+`RET: fcu_misspc = fcu_argB;
+`REX: fcu_misspc = fcu_bus;
+`BRK: fcu_misspc = {tvec[0][AMSB:8], 1'b0, olm, 5'h0};
+`JAL: fcu_misspc = fcu_argA + fcu_argI;
+//`CHK: fcu_misspc = fcu_nextpc + fcu_argI; // Handled as an instruction exception
+// Default: branch
+default: fcu_misspc = fcu_takb ? {fcu_pc[31:8] + fcu_brdisp[31:8],fcu_brdisp[7:0]} : fcu_nextpc;
+endcase
+fcu_misspc[0] = 1'b0;
+end
+
+// To avoid false branch mispredicts the branch isn't evaluated until the
+// following instruction queues. The address of the next instruction is
+// looked at to see if the BTB predicted correctly.
+
+wire fcu_brk_miss = fcu_brk || fcu_rti;
+`ifdef FCU_ENH
+wire fcu_ret_miss = fcu_ret && (fcu_argB != iqentry_pc[nid]);
+wire fcu_jal_miss = fcu_jal && (fcu_argA + fcu_argI != iqentry_pc[nid]);
+wire fcu_followed = iqentry_sn[nid] > iqentry_sn[fcu_id[`QBITS]];
+`else
+wire fcu_ret_miss = fcu_ret;
+wire fcu_jal_miss = fcu_jal;
+wire fcu_followed = `TRUE;
+`endif
+always @*
+if (fcu_v) begin
+ // Break and RTI switch register sets, and so are always treated as a branch miss in order to
+ // flush the pipeline. Hardware interrupts also stream break instructions so they need to
+ // flushed from the queue so the interrupt is recognized only once.
+ // BRK and RTI are handled as excmiss types which are processed during the commit stage.
+ if (fcu_brk_miss)
+ fcu_branchmiss = TRUE;
+ else if (fcu_branch && (fcu_takb ^ fcu_pt))
+ fcu_branchmiss = TRUE;
+ else
+`ifdef SUPPORT_SMT
+ if (fcu_instr[`INSTRUCTION_OP] == `REX && (im < ~ol[fcu_thrd]))
+`else
+ if (fcu_instr[`INSTRUCTION_OP] == `REX && (im < ~ol))
+`endif
+ fcu_branchmiss = TRUE;
+ else if (fcu_ret_miss)
+ fcu_branchmiss = TRUE;
+ else if (fcu_jal_miss)
+ fcu_branchmiss = TRUE;
+ else if (fcu_instr[`INSTRUCTION_OP] == `CHK && ~fcu_takb)
+ fcu_branchmiss = TRUE;
+ else
+ fcu_branchmiss = FALSE;
+end
+else
+ fcu_branchmiss = FALSE;
+
+FT64_RMW_alu urmwalu0 (rmw_instr, rmw_argA, rmw_argB, rmw_argC, rmw_res);
+
+
+//
+// additional DRAM-enqueue logic
+
+assign dram_avail = (dram0 == `DRAMSLOT_AVAIL || dram1 == `DRAMSLOT_AVAIL || dram2 == `DRAMSLOT_AVAIL);
+
+always @*
+for (n = 0; n < QENTRIES; n = n + 1)
+ iqentry_memopsvalid[n] <= (iqentry_mem[n] && (iqentry_store[n] ? iqentry_a2_v[n] : 1'b1) && iqentry_state[n]==IQS_AGEN);
+
+always @*
+for (n = 0; n < QENTRIES; n = n + 1)
+ iqentry_memready[n] <= (iqentry_v[n] & iqentry_iv[n] & iqentry_memopsvalid[n] & ~iqentry_memissue[n] & ~iqentry_stomp[n]);
+
+assign outstanding_stores = (dram0 && dram0_store) ||
+ (dram1 && dram1_store) ||
+ (dram2 && dram2_store);
+
+//
+// additional COMMIT logic
+//
+always @*
+begin
+ commit0_v <= (iqentry_state[heads[0]] == IQS_CMT && ~|panic);
+ commit0_id <= {iqentry_mem[heads[0]], heads[0]}; // if a memory op, it has a DRAM-bus id
+ commit0_tgt <= iqentry_tgt[heads[0]];
+ commit0_we <= iqentry_we[heads[0]];
+ commit0_bus <= iqentry_res[heads[0]];
+ if (`NUM_CMT > 1) begin
+ commit1_v <= ({iqentry_v[heads[0]], iqentry_state[heads[0]] == IQS_CMT} != 2'b10
+ && iqentry_state[heads[1]] == IQS_CMT
+ && ~|panic);
+ commit1_id <= {iqentry_mem[heads[1]], heads[1]};
+ commit1_tgt <= iqentry_tgt[heads[1]];
+ commit1_we <= iqentry_we[heads[1]];
+ commit1_bus <= iqentry_res[heads[1]];
+ // Need to set commit1, and commit2 valid bits for the branch predictor.
+ if (`NUM_CMT > 2) begin
+ end
+ else begin
+ commit2_v <= ({iqentry_v[heads[0]], iqentry_state[heads[0]] == IQS_CMT} != 2'b10
+ && {iqentry_v[heads[1]], iqentry_state[heads[1]] == IQS_CMT} != 2'b10
+ && {iqentry_v[heads[2]], iqentry_br[heads[2]], iqentry_state[heads[2]] == IQS_CMT}==3'b111
+ && iqentry_tgt[heads[2]][4:0]==5'd0 && ~|panic); // watch out for dbnz and ibne
+ commit2_tgt <= 12'h000;
+ commit2_we <= 8'h00;
+ end
+ end
+ else begin
+ commit1_v <= ({iqentry_v[heads[0]], iqentry_state[heads[0]] == IQS_CMT} != 2'b10
+ && {iqentry_v[heads[1]], iqentry_state[heads[1]] == IQS_CMT} == 2'b11
+ && !iqentry_rfw[heads[1]] && ~|panic); // watch out for dbnz and ibne
+ commit1_id <= {iqentry_mem[heads[1]], heads[1]}; // if a memory op, it has a DRAM-bus id
+ commit1_tgt <= 12'h000;
+ commit1_we <= 8'h00;
+ // We don't really need the bus value since nothing is being written.
+ commit1_bus <= iqentry_res[heads[1]];
+ commit2_v <= ({iqentry_v[heads[0]], iqentry_state[heads[0]] == IQS_CMT} != 2'b10
+ && {iqentry_v[heads[1]], iqentry_state[heads[1]] == IQS_CMT} != 2'b10
+ && {iqentry_v[heads[2]], iqentry_br[heads[2]], iqentry_state[heads[2]] == IQS_CMT}==3'b111
+ && !iqentry_rfw[heads[2]] && ~|panic); // watch out for dbnz and ibne
+ commit2_id <= {iqentry_mem[heads[2]], heads[2]}; // if a memory op, it has a DRAM-bus id
+ commit2_tgt <= 12'h000;
+ commit2_we <= 8'h00;
+ commit2_bus <= iqentry_res[heads[2]];
+ end
+end
+
+assign int_commit = (commit0_v && iqentry_irq[heads[0]])
+ || (commit0_v && commit1_v && iqentry_irq[heads[1]] && `NUM_CMT > 1)
+ || (commit0_v && commit1_v && commit2_v && iqentry_irq[heads[2]] && `NUM_CMT > 2);
+
+// Detect if a given register will become valid during the current cycle.
+// We want a signal that is active during the current clock cycle for the read
+// through register file, which trims a cycle off register access for every
+// instruction. But two different kinds of assignment statements can't be
+// placed under the same always block, it's a bad practice and may not work.
+// So a signal is created here with it's own always block.
+reg [AREGS-1:0] regIsValid;
+always @*
+begin
+ for (n = 1; n < AREGS; n = n + 1)
+ begin
+ regIsValid[n] = rf_v[n];
+ if (branchmiss)
+ if (~livetarget[n]) begin
+ if (branchmiss_thrd) begin
+ if (n >= 128)
+ regIsValid[n] = `VAL;
+ end
+ else begin
+ if (n < 128)
+ regIsValid[n] = `VAL;
+ end
+ end
+ if (commit0_v && n=={commit0_tgt[7:0]})
+ regIsValid[n] = regIsValid[n] | ((rf_source[ {commit0_tgt[7:0]} ] == commit0_id)
+ || (branchmiss && branchmiss_thrd == iqentry_thrd[commit0_id[`QBITS]] && iqentry_source[ commit0_id[`QBITS] ]));
+ if (commit1_v && n=={commit1_tgt[7:0]} && `NUM_CMT > 1)
+ regIsValid[n] = regIsValid[n] | ((rf_source[ {commit1_tgt[7:0]} ] == commit1_id)
+ || (branchmiss && branchmiss_thrd == iqentry_thrd[commit1_id[`QBITS]] && iqentry_source[ commit1_id[`QBITS] ]));
+ if (commit2_v && n=={commit2_tgt[7:0]} && `NUM_CMT > 2)
+ regIsValid[n] = regIsValid[n] | ((rf_source[ {commit2_tgt[7:0]} ] == commit2_id)
+ || (branchmiss && branchmiss_thrd == iqentry_thrd[commit2_id[`QBITS]] && iqentry_source[ commit2_id[`QBITS] ]));
+ end
+ regIsValid[0] = `VAL;
+ regIsValid[32] = `VAL;
+ regIsValid[64] = `VAL;
+ regIsValid[96] = `VAL;
+`ifdef SMT
+ regIsValid[128] = `VAL;
+ regIsValid[160] = `VAL;
+ regIsValid[192] = `VAL;
+ regIsValid[224] = `VAL;
+`endif
+end
+
+// Wait until the cycle after Ra becomes valid to give time to read
+// the vector element from the register file.
+reg rf_vra0, rf_vra1;
+/*always @(posedge clk)
+ rf_vra0 <= regIsValid[Ra0s];
+always @(posedge clk)
+ rf_vra1 <= regIsValid[Ra1s];
+*/
+// Check how many instructions can be queued. This might be fewer than the
+// number ready to queue from the fetch stage if queue slots aren't
+// available or if there are no more physical registers left for remapping.
+// The fetch stage needs to know how many instructions will queue so this
+// logic is placed here.
+// NOPs are filtered out and do not enter the instruction queue. The core
+// will stream NOPs on a cache miss and they would mess up the queue order
+// if there are immediate prefixes in the queue.
+// For the VEX instruction, the instruction can't queue until register Ra
+// is valid, because register Ra is used to specify the vector element to
+// read.
+wire q2open = iqentry_v[tail0]==`INV && iqentry_v[tail1]==`INV;
+wire q3open = iqentry_v[tail0]==`INV && iqentry_v[tail1]==`INV && iqentry_v[(tail1 + 2'd1) % QENTRIES]==`INV;
+always @*
+begin
+ canq1 <= FALSE;
+ canq2 <= FALSE;
+ queued1 <= FALSE;
+ queued2 <= FALSE;
+ queuedNop <= FALSE;
+ vqueued2 <= FALSE;
+ if (!branchmiss) begin
+ // Two available
+ if (fetchbuf1_v & fetchbuf0_v) begin
+ // Is there a pair of NOPs ? (cache miss)
+ if ((fetchbuf0_instr[`INSTRUCTION_OP]==`NOP) && (fetchbuf1_instr[`INSTRUCTION_OP]==`NOP))
+ queuedNop <= TRUE;
+ else begin
+ // If it's a predicted branch queue only the first instruction, the second
+ // instruction will be stomped on.
+ if (take_branch0 && fetchbuf1_thrd==fetchbuf0_thrd) begin
+ if (iqentry_v[tail0]==`INV) begin
+ canq1 <= TRUE;
+ queued1 <= TRUE;
+ end
+ end
+ // This is where a single NOP is allowed through to simplify the code. A
+ // single NOP can't be a cache miss. Otherwise it would be necessary to queue
+ // fetchbuf1 on tail0 it would add a nightmare to the enqueue code.
+ // Not a branch and there are two instructions fetched, see whether or not
+ // both instructions can be queued.
+ else begin
+ if (iqentry_v[tail0]==`INV) begin
+ canq1 <= !IsVex(fetchbuf0_instr) || rf_vra0 || !SUP_VECTOR;
+ queued1 <= (
+ ((!IsVex(fetchbuf0_instr) || rf_vra0) && (!IsVector(fetchbuf0_instr))) || !SUP_VECTOR);
+ if (iqentry_v[tail1]==`INV) begin
+ canq2 <= ((!IsVex(fetchbuf1_instr) || rf_vra1)) || !SUP_VECTOR;
+ queued2 <= (
+ (!IsVector(fetchbuf1_instr) && (!IsVex(fetchbuf1_instr) || rf_vra1) && (!IsVector(fetchbuf0_instr))) || !SUP_VECTOR);
+ vqueued2 <= IsVector(fetchbuf0_instr) && vqe0 < vl-2 && !vechain;
+ end
+ end
+ // If an irq is active during a vector instruction fetch, claim the vector instruction
+ // is finished queueing even though it may not be. It'll pick up where it left off after
+ // the exception is processed.
+ if (freezePC) begin
+ if (IsVector(fetchbuf0_instr) && IsVector(fetchbuf1_instr) && vechain) begin
+ queued1 <= TRUE;
+ queued2 <= TRUE;
+ end
+ else if (IsVector(fetchbuf0_instr)) begin
+ queued1 <= TRUE;
+ if (vqe0 < vl-2)
+ queued2 <= TRUE;
+ else
+ queued2 <= iqentry_v[tail1]==`INV;
+ end
+ end
+ end
+ end
+ end
+ // One available
+ else if (fetchbuf0_v) begin
+ if (fetchbuf0_instr[`INSTRUCTION_OP]!=`NOP) begin
+ if (iqentry_v[tail0]==`INV) begin
+ canq1 <= !IsVex(fetchbuf0_instr) || rf_vra0 || !SUP_VECTOR;
+ queued1 <=
+ (((!IsVex(fetchbuf0_instr) || rf_vra0) && (!IsVector(fetchbuf0_instr))) || !SUP_VECTOR);
+ end
+ if (iqentry_v[tail1]==`INV) begin
+ canq2 <= IsVector(fetchbuf0_instr) && vqe0 < vl-2 && SUP_VECTOR;
+ vqueued2 <= IsVector(fetchbuf0_instr) && vqe0 < vl-2 && !vechain;
+ end
+ if (freezePC) begin
+ if (IsVector(fetchbuf0_instr)) begin
+ queued1 <= TRUE;
+ if (vqe0 < vl-2)
+ queued2 <= iqentry_v[tail1]==`INV;
+ end
+ end
+ end
+ else
+ queuedNop <= TRUE;
+ end
+ else if (fetchbuf1_v) begin
+ if (fetchbuf1_instr[`INSTRUCTION_OP]!=`NOP) begin
+ if (iqentry_v[tail0]==`INV) begin
+ canq1 <= !IsVex(fetchbuf1_instr) || rf_vra1 || !SUP_VECTOR;
+ queued1 <= (
+ ((!IsVex(fetchbuf1_instr) || rf_vra1) && (!IsVector(fetchbuf1_instr))) || !SUP_VECTOR);
+ end
+ if (iqentry_v[tail1]==`INV) begin
+ canq2 <= IsVector(fetchbuf1_instr) && vqe1 < vl-2 && SUP_VECTOR;
+ vqueued2 <= IsVector(fetchbuf1_instr) && vqe1 < vl-2;
+ end
+ if (freezePC) begin
+ if (IsVector(fetchbuf1_instr)) begin
+ queued1 <= TRUE;
+ if (vqe1 < vl-2)
+ queued2 <= iqentry_v[tail1]==`INV;
+ end
+ end
+ end
+ else
+ queuedNop <= TRUE;
+ end
+ //else no instructions available to queue
+ end
+ else begin
+ // One available
+ if (fetchbuf0_v && fetchbuf0_thrd != branchmiss_thrd) begin
+ if (fetchbuf0_instr[`INSTRUCTION_OP]!=`NOP) begin
+ if (iqentry_v[tail0]==`INV) begin
+ canq1 <= !IsVex(fetchbuf0_instr) || rf_vra0 || !SUP_VECTOR;
+ queued1 <= (
+ ((!IsVex(fetchbuf0_instr) || rf_vra0) && (!IsVector(fetchbuf0_instr))) || !SUP_VECTOR);
+ end
+ if (iqentry_v[tail1]==`INV) begin
+ canq2 <= IsVector(fetchbuf0_instr) && vqe0 < vl-2 && SUP_VECTOR;
+ vqueued2 <= IsVector(fetchbuf0_instr) && vqe0 < vl-2 && !vechain;
+ end
+ end
+ else
+ queuedNop <= TRUE;
+ end
+ else if (fetchbuf1_v && fetchbuf1_thrd != branchmiss_thrd) begin
+ if (fetchbuf1_instr[`INSTRUCTION_OP]!=`NOP) begin
+ if (iqentry_v[tail0]==`INV) begin
+ canq1 <= !IsVex(fetchbuf1_instr) || rf_vra1 || !SUP_VECTOR;
+ queued1 <= (
+ ((!IsVex(fetchbuf1_instr) || rf_vra1) && (!IsVector(fetchbuf1_instr))) || !SUP_VECTOR);
+ end
+ if (iqentry_v[tail1]==`INV) begin
+ canq2 <= IsVector(fetchbuf1_instr) && vqe1 < vl-2 && SUP_VECTOR;
+ vqueued2 <= IsVector(fetchbuf0_instr) && vqe0 < vl-2 && !vechain;
+ end
+ end
+ else
+ queuedNop <= TRUE;
+ end
+// else
+// queuedNop <= TRUE;
+ end
+end
+
+//
+// Branchmiss seems to be sticky sometimes during simulation. For instance branch miss
+// and cache miss at same time. The branchmiss should clear before the core continues
+// so the positive edge is detected to avoid incrementing the sequnce number too many
+// times.
+wire pebm;
+edge_det uedbm (.rst(rst), .clk(clk), .ce(1'b1), .i(branchmiss), .pe(pebm), .ne(), .ee() );
+
+reg [5:0] ld_time;
+reg [63:0] wc_time_dat;
+reg [63:0] wc_times;
+always @(posedge tm_clk_i)
+begin
+ if (|ld_time)
+ wc_time <= wc_time_dat;
+ else begin
+ wc_time[31:0] <= wc_time[31:0] + 32'd1;
+ if (wc_time[31:0] >= TM_CLKFREQ-1) begin
+ wc_time[31:0] <= 32'd0;
+ wc_time[63:32] <= wc_time[63:32] + 32'd1;
+ end
+ end
+end
+
+wire writing_wb =
+ (mem1_available && dram0==`DRAMSLOT_BUSY && dram0_store && !iqentry_stomp[dram0_id[`QBITS]] && wbptr<`WB_DEPTH-1)
+ || (mem2_available && dram1==`DRAMSLOT_BUSY && dram1_store && !iqentry_stomp[dram1_id[`QBITS]] && `NUM_MEM > 1 && wbptr<`WB_DEPTH-1)
+ || (mem3_available && dram2==`DRAMSLOT_BUSY && dram2_store && !iqentry_stomp[dram2_id[`QBITS]] && `NUM_MEM > 2 && wbptr<`WB_DEPTH-1)
+ ;
+
+// Monster clock domain.
+// Like to move some of this to clocking under different always blocks in order
+// to help out the toolset's synthesis, but it ain't gonna be easy.
+// Simulation doesn't like it if things are under separate always blocks.
+// Synthesis doesn't like it if things are under the same always block.
+
+//always @(posedge clk)
+//begin
+// branchmiss <= excmiss|fcu_branchmiss;
+// misspc <= excmiss ? excmisspc : fcu_misspc;
+// missid <= excmiss ? (|iqentry_exc[heads[0]] ? heads[0] : heads[1]) : fcu_sourceid;
+// branchmiss_thrd <= excmiss ? excthrd : fcu_thrd;
+//end
+wire alu0_done_pe, alu1_done_pe, pe_wait;
+edge_det uedalu0d (.clk(clk), .ce(1'b1), .i(alu0_done), .pe(alu0_done_pe), .ne(), .ee());
+edge_det uedalu1d (.clk(clk), .ce(1'b1), .i(alu1_done), .pe(alu1_done_pe), .ne(), .ee());
+edge_det uedwait1 (.clk(clk), .ce(1'b1), .i((waitctr==48'd1) || signal_i[fcu_argA[4:0]|fcu_argI[4:0]]), .pe(pe_wait), .ne(), .ee());
+
+// Bus randomization to mitigate meltdown attacks
+wire [63:0] ralu0_bus = |alu0_exc ? {4{lfsro}} : alu0_bus;
+wire [63:0] ralu1_bus = |alu1_exc ? {4{lfsro}} : alu1_bus;
+wire [63:0] rfpu1_bus = |fpu1_exc ? {4{lfsro}} : fpu1_bus;
+wire [63:0] rfpu2_bus = |fpu2_exc ? {4{lfsro}} : fpu2_bus;
+wire [63:0] rfcu_bus = |fcu_exc ? {4{lfsro}} : fcu_bus;
+wire [63:0] rdramA_bus = |dramA_exc ? {4{lfsro}} : dramA_bus;
+wire [63:0] rdramB_bus = |dramB_exc ? {4{lfsro}} : dramB_bus;
+wire [63:0] rdramC_bus = |dramC_exc ? {4{lfsro}} : dramC_bus;
+
+always @(posedge clk)
+if (rst) begin
+`ifdef SUPPORT_SMT
+ mstatus[0] <= 64'h4000F; // select register set #16 for thread 0
+ mstatus[1] <= 64'h4800F; // select register set #18 for thread 1
+ rs_stack[0] <= 64'd16;
+ brs_stack[0] <= 64'd16;
+ rs_stack[1] <= 64'd18;
+ brs_stack[1] <= 64'd18;
+`else
+ mstatus <= 64'h4000F; // select register set #16 for thread 0
+ rs_stack <= 64'd16;
+ brs_stack <= 64'd16;
+`endif
+ for (n = 0; n < QENTRIES; n = n + 1) begin
+ iqentry_state[n] <= IQS_INVALID;
+ iqentry_iv[n] <= `INV;
+ iqentry_is[n] <= 3'b00;
+ iqentry_sn[n] <= 4'd0;
+ iqentry_pt[n] <= FALSE;
+ iqentry_bt[n] <= FALSE;
+ iqentry_br[n] <= FALSE;
+ iqentry_aq[n] <= FALSE;
+ iqentry_rl[n] <= FALSE;
+ iqentry_alu0[n] <= FALSE;
+ iqentry_alu[n] <= FALSE;
+ iqentry_fpu[n] <= FALSE;
+ iqentry_fsync[n] <= FALSE;
+ iqentry_fc[n] <= FALSE;
+ iqentry_takb[n] <= FALSE;
+ iqentry_jmp[n] <= FALSE;
+ iqentry_jal[n] <= FALSE;
+ iqentry_ret[n] <= FALSE;
+ iqentry_brk[n] <= FALSE;
+ iqentry_irq[n] <= FALSE;
+ iqentry_rti[n] <= FALSE;
+ iqentry_ldcmp[n] <= FALSE;
+ iqentry_load[n] <= FALSE;
+ iqentry_rtop[n] <= FALSE;
+ iqentry_sei[n] <= FALSE;
+ iqentry_shft[n] <= FALSE;
+ iqentry_sync[n] <= FALSE;
+ iqentry_ven[n] <= 6'd0;
+ iqentry_vl[n] <= 8'd0;
+ iqentry_we[n] <= 8'h00;
+ iqentry_rfw[n] <= FALSE;
+ iqentry_rmw[n] <= FALSE;
+ iqentry_pc[n] <= RSTPC;
+ iqentry_instr[n] <= `NOP_INSN;
+ iqentry_insln[n] <= 3'd4;
+ iqentry_preload[n] <= FALSE;
+ iqentry_mem[n] <= FALSE;
+ iqentry_memndx[n] <= FALSE;
+ iqentry_memissue[n] <= FALSE;
+ iqentry_mem_islot[n] <= 3'd0;
+ iqentry_memdb[n] <= FALSE;
+ iqentry_memsb[n] <= FALSE;
+ iqentry_tgt[n] <= 6'd0;
+ iqentry_imm[n] <= 1'b0;
+ iqentry_ma[n] <= 1'b0;
+ iqentry_a0[n] <= 64'd0;
+ iqentry_a1[n] <= 64'd0;
+ iqentry_a2[n] <= 64'd0;
+ iqentry_a3[n] <= 64'd0;
+ iqentry_a1_v[n] <= `INV;
+ iqentry_a2_v[n] <= `INV;
+ iqentry_a3_v[n] <= `INV;
+ iqentry_a1_s[n] <= 5'd0;
+ iqentry_a2_s[n] <= 5'd0;
+ iqentry_a3_s[n] <= 5'd0;
+`ifdef SUPPORT_PREDICATION
+ iqentry_aT[n] <= 64'd0;
+ iqentry_aT_s[n] <= 1'd0;
+`endif
+ iqentry_canex[n] <= FALSE;
+ end
+ bwhich <= 2'b00;
+ dram0 <= `DRAMSLOT_AVAIL;
+ dram1 <= `DRAMSLOT_AVAIL;
+ dram2 <= `DRAMSLOT_AVAIL;
+ dram0_instr <= `NOP_INSN;
+ dram1_instr <= `NOP_INSN;
+ dram2_instr <= `NOP_INSN;
+ dram0_addr <= 32'h0;
+ dram1_addr <= 32'h0;
+ dram2_addr <= 32'h0;
+ dram0_id <= 1'b0;
+ dram1_id <= 1'b0;
+ dram2_id <= 1'b0;
+ L1_adr <= RSTPC;
+ invic <= FALSE;
+ tail0 <= 3'd0;
+ tail1 <= 3'd1;
+ for (n = 0; n < QENTRIES; n = n + 1)
+ heads[n] <= n;
+ panic = `PANIC_NONE;
+ alu0_dataready <= 1'b0;
+ alu1_dataready <= 1'b0;
+ alu0_sourceid <= 5'd0;
+ alu1_sourceid <= 5'd0;
+`define SIM_
+`ifdef SIM_
+ alu0_pc <= RSTPC;
+ alu0_instr <= `NOP_INSN;
+ alu0_argA <= 64'h0;
+ alu0_argB <= 64'h0;
+ alu0_argC <= 64'h0;
+ alu0_argI <= 64'h0;
+ alu0_mem <= 1'b0;
+ alu0_shft <= 1'b0;
+ alu0_thrd <= 1'b0;
+ alu0_tgt <= 6'h00;
+ alu0_ven <= 6'd0;
+ alu1_pc <= RSTPC;
+ alu1_instr <= `NOP_INSN;
+ alu1_argA <= 64'h0;
+ alu1_argB <= 64'h0;
+ alu1_argC <= 64'h0;
+ alu1_argI <= 64'h0;
+ alu1_mem <= 1'b0;
+ alu1_shft <= 1'b0;
+ alu1_thrd <= 1'b0;
+ alu1_tgt <= 6'h00;
+ alu1_ven <= 6'd0;
+`endif
+ fcu_dataready <= 0;
+ fcu_instr <= `NOP_INSN;
+ dramA_v <= 0;
+ dramB_v <= 0;
+ dramC_v <= 0;
+ I <= 0;
+ CC <= 0;
+ icstate <= IDLE;
+ bstate <= BIDLE;
+ tick <= 64'd0;
+ ol_o <= 2'b0;
+ bte_o <= 2'b00;
+ cti_o <= 3'b000;
+ cyc <= `LOW;
+ stb_o <= `LOW;
+ we <= `LOW;
+ sel_o <= 8'h00;
+ dat_o <= 64'hFFFFFFFFFFFFFFFF;
+ sr_o <= `LOW;
+ cr_o <= `LOW;
+ vadr <= RSTPC;
+ icl_o <= `LOW; // instruction cache load
+ cr0 <= 64'd0;
+ cr0[13:8] <= 6'd0; // select compressed instruction group #0
+ cr0[30] <= TRUE; // enable data caching
+ cr0[32] <= TRUE; // enable branch predictor
+ cr0[16] <= 1'b0; // disable SMT
+ cr0[17] <= 1'b0; // sequence number reset = 1
+ cr0[34] <= FALSE; // write buffer merging enable
+ cr0[35] <= TRUE; // load speculation enable
+ pcr <= 32'd0;
+ pcr2 <= 64'd0;
+ for (n = 0; n < PREGS; n = n + 1) begin
+ rf_v[n] <= `VAL;
+ rf_source[n] <= {`QBIT{1'b1}};
+ end
+ fp_rm <= 3'd0; // round nearest even - default rounding mode
+ fpu_csr[37:32] <= 5'd31; // register set #31
+ waitctr <= 48'd0;
+ for (n = 0; n < 16; n = n + 1) begin
+ badaddr[n] <= 64'd0;
+ bad_instr[n] <= `NOP_INSN;
+ end
+ // Vector
+ vqe0 <= 6'd0;
+ vqet0 <= 6'd0;
+ vqe1 <= 6'd0;
+ vqet1 <= 6'd0;
+ vl <= 7'd62;
+ for (n = 0; n < 8; n = n + 1)
+ vm[n] <= 64'h7FFFFFFFFFFFFFFF;
+ nop_fetchbuf <= 4'h0;
+ fcu_done <= `TRUE;
+ sema <= 64'h0;
+ tvec[0] <= RSTPC;
+ pmr <= 64'hFFFFFFFFFFFFFFFF;
+ pmr[0] <= `ID1_AVAIL;
+ pmr[1] <= `ID2_AVAIL;
+ pmr[2] <= `ID3_AVAIL;
+ pmr[8] <= `ALU0_AVAIL;
+ pmr[9] <= `ALU1_AVAIL;
+ pmr[16] <= `FPU1_AVAIL;
+ pmr[17] <= `FPU2_AVAIL;
+ pmr[24] <= `MEM1_AVAIL;
+ pmr[25] <= `MEM2_AVAIL;
+ pmr[26] <= `MEM3_AVAIL;
+ pmr[32] <= `FCU_AVAIL;
+ for (n = 0; n < `WB_DEPTH; n = n + 1) begin
+ wb_v[n] <= 1'b0;
+ wb_rmw[n] <= 1'b0;
+ wb_id[n] <= {QENTRIES{1'b0}};
+ wb_ol[n] <= 2'b00;
+ wb_sel[n] <= 8'h00;
+ wb_addr[n] <= 32'd0;
+ wb_data[n] <= 64'd0;
+ end
+ wb_en <= `TRUE;
+ wbo_id <= {QENTRIES{1'b0}};
+ wbptr <= 2'd0;
+`ifdef SIM
+ wb_merges <= 32'd0;
+`endif
+ iq_ctr <= 40'd0;
+ icl_ctr <= 40'd0;
+ bm_ctr <= 40'd0;
+ irq_ctr <= 40'd0;
+ cmt_timer <= 9'd0;
+ StoreAck1 <= `FALSE;
+ keys <= 64'h0;
+`ifdef SUPPORT_DBG
+ dbg_ctrl <= 64'h0;
+`endif
+/* Initialized with initial begin above
+`ifdef SUPPORT_BBMS
+ for (n = 0; n < 64; n = n + 1) begin
+ thrd_handle[n] <= 16'h0;
+ prg_base[n] <= 64'h0;
+ cl_barrier[n] <= 64'h0;
+ cu_barrier[n] <= 64'hFFFFFFFFFFFFFFFF;
+ ro_barrier[n] <= 64'h0;
+ dl_barrier[n] <= 64'h0;
+ du_barrier[n] <= 64'hFFFFFFFFFFFFFFFF;
+ sl_barrier[n] <= 64'h0;
+ su_barrier[n] <= 64'hFFFFFFFFFFFFFFFF;
+ end
+`endif
+*/
+end
+else begin
+ if (|fb_panic)
+ panic <= fb_panic;
+
+ // Only one branchmiss is allowed to be processed at a time. If a second
+ // branchmiss occurs while the first is being processed, it would have
+ // to of occurred as a speculation in the branch shadow of the first.
+ // The second instruction would be stomped on by the first branchmiss so
+ // there is no need to process it.
+ // The branchmiss has to be latched, then cleared later as there could
+ // be a cache miss at the same time meaning the switch to the new pc
+ // does not take place immediately.
+ if (!branchmiss) begin
+ if (excmiss) begin
+ branchmiss <= `TRUE;
+ misspc <= excmisspc;
+ missid <= (|iqentry_exc[heads[0]] ? heads[0] : heads[1]);
+ branchmiss_thrd <= excthrd;
+ end
+ else if (fcu_branchmiss) begin
+ branchmiss <= `TRUE;
+ misspc <= fcu_misspc;
+ missid <= fcu_sourceid;
+ branchmiss_thrd <= fcu_thrd;
+ end
+ end
+ // Clear a branch miss when target instruction is fetched.
+ if (will_clear_branchmiss) begin
+ branchmiss <= `FALSE;
+ end
+
+ // The following signals only pulse
+
+ // Instruction decode output should only pulse once for a queue entry. We
+ // want the decode to be invalidated after a clock cycle so that it isn't
+ // inadvertently used to update the queue at a later point.
+ dramA_v <= `INV;
+ dramB_v <= `INV;
+ dramC_v <= `INV;
+ id1_vi <= `INV;
+ if (`NUM_IDU > 1)
+ id2_vi <= `INV;
+ if (`NUM_IDU > 2)
+ id3_vi <= `INV;
+ wb_shift <= FALSE;
+ ld_time <= {ld_time[4:0],1'b0};
+ wc_times <= wc_time;
+ rf_vra0 <= regIsValid[Ra0s];
+ rf_vra1 <= regIsValid[Ra1s];
+ if (vqe0 >= vl) begin
+ vqe0 <= 6'd0;
+ vqet0 <= 6'h0;
+ end
+ if (vqe1 >= vl) begin
+ vqe1 <= 6'd0;
+ vqet1 <= 6'h0;
+ end
+ // Turn off vector chaining indicator when chained instructions are done.
+ if ((vqe0 >= vl || vqe0==6'd0) && (vqe1 >= vl || vqe1==6'd0))
+`ifdef SUPPORT_SMT
+ mstatus[0][32] <= 1'b0;
+`else
+ mstatus[32] <= 1'b0;
+`endif
+
+ nop_fetchbuf <= 4'h0;
+ excmiss <= FALSE;
+ invic <= FALSE;
+ tick <= tick + 64'd1;
+ alu0_ld <= FALSE;
+ alu1_ld <= FALSE;
+ fpu1_ld <= FALSE;
+ fpu2_ld <= FALSE;
+ fcu_ld <= FALSE;
+ cr0[17] <= 1'b0;
+ if (waitctr != 48'd0)
+ waitctr <= waitctr - 4'd1;
+
+
+ if (iqentry_fc[fcu_id[`QBITS]] && iqentry_v[fcu_id[`QBITS]] && !iqentry_done[fcu_id[`QBITS]] && iqentry_out[fcu_id[`QBITS]])
+ fcu_timeout <= fcu_timeout + 8'd1;
+
+ if (branchmiss) begin
+ for (n = 1; n < PREGS; n = n + 1)
+ if (~livetarget[n]) begin
+ if (branchmiss_thrd) begin
+ if (n >= 128)
+ rf_v[n] <= `VAL;
+ end
+ else begin
+ if (n < 128)
+ rf_v[n] <= `VAL;
+ end
+ end
+ for (n = 0; n < QENTRIES; n = n + 1)
+ if (|iqentry_latestID[n])
+ if (iqentry_thrd[n]==branchmiss_thrd) rf_source[ {iqentry_tgt[n][7:0]} ] <= { 1'b0, iqentry_mem[n], n[`QBITS] };
+ end
+
+ // The source for the register file data might have changed since it was
+ // placed on the commit bus. So it's needed to check that the source is
+ // still as expected to validate the register.
+ if (commit0_v) begin
+ if (!rf_v[ {commit0_tgt[7:0]} ]) begin
+// rf_v[ {commit0_tgt[7:0]} ] <= rf_source[ commit0_tgt[7:0] ] == commit0_id || (branchmiss && iqentry_source[ commit0_id[`QBITS] ]);
+ rf_v[ {commit0_tgt[7:0]} ] <= regIsValid[{commit0_tgt[7:0]}];//rf_source[ commit0_tgt[4:0] ] == commit0_id || (branchmiss && iqentry_source[ commit0_id[`QBITS] ]);
+ if (regIsValid[{commit0_tgt[7:0]}])
+ rf_source[{commit0_tgt[7:0]}] <= {`QBIT{1'b1}};
+ end
+ if (commit0_tgt[5:0] != 6'd0) $display("r%d <- %h v[%d]<-%d", commit0_tgt, commit0_bus, regIsValid[commit0_tgt[5:0]],
+ rf_source[ {commit0_tgt[7:0]} ] == commit0_id || (branchmiss && iqentry_source[ commit0_id[`QBITS] ]));
+ if (commit0_tgt[5:0]==6'd30 && commit0_bus==64'd0)
+ $display("FP <= 0");
+ end
+ if (commit1_v && `NUM_CMT > 1) begin
+ if (!rf_v[ {commit1_tgt[7:0]} ]) begin
+ if ({commit1_tgt[7:0]}=={commit0_tgt[7:0]}) begin
+ rf_v[ {commit1_tgt[7:0]} ] <= regIsValid[{commit0_tgt[7:0]}] | regIsValid[{commit1_tgt[7:0]}];
+ if (regIsValid[{commit0_tgt[7:0]}] | regIsValid[{commit1_tgt[7:0]}])
+ rf_source[{commit1_tgt[7:0]}] <= {`QBIT{1'b1}};
+ /*
+ (rf_source[ commit0_tgt[4:0] ] == commit0_id || (branchmiss && iqentry_source[ commit0_id[`QBITS] ])) ||
+ (rf_source[ commit1_tgt[4:0] ] == commit1_id || (branchmiss && iqentry_source[ commit1_id[`QBITS] ]));
+ */
+ end
+ else begin
+ rf_v[ {commit1_tgt[7:0]} ] <= regIsValid[{commit1_tgt[7:0]}];//rf_source[ commit1_tgt[4:0] ] == commit1_id || (branchmiss && iqentry_source[ commit1_id[`QBITS] ]);
+ if (regIsValid[{commit1_tgt[7:0]}])
+ rf_source[{commit1_tgt[7:0]}] <= {`QBIT{1'b1}};
+ end
+ end
+ if (commit1_tgt[5:0] != 6'd0) $display("r%d <- %h v[%d]<-%d", commit1_tgt, commit1_bus, regIsValid[commit1_tgt[5:0]],
+ rf_source[ {commit1_tgt[7:0]} ] == commit1_id || (branchmiss && iqentry_source[ commit1_id[`QBITS] ]));
+ if (commit1_tgt[5:0]==6'd30 && commit1_bus==64'd0)
+ $display("FP <= 0");
+ end
+ if (commit2_v && `NUM_CMT > 2) begin
+ if (!rf_v[ {commit2_tgt[7:0]} ]) begin
+ if ({commit2_tgt[7:0]}=={commit1_tgt[7:0]} && {commit2_tgt[7:0]}=={commit0_tgt[7:0]}) begin
+ rf_v[ {commit2_tgt[7:0]} ] <= regIsValid[{commit0_tgt[7:0]}] | regIsValid[{commit1_tgt[7:0]}] | regIsValid[{commit2_tgt[7:0]}];
+ if (regIsValid[{commit0_tgt[7:0]}] | regIsValid[{commit1_tgt[7:0]}] | regIsValid[{commit2_tgt[7:0]}])
+ rf_source[{commit0_tgt[7:0]}] <= {`QBIT{1'b1}};
+ end
+ else if ({commit2_tgt[7:0]}=={commit0_tgt[7:0]}) begin
+ rf_v[ {commit2_tgt[7:0]} ] <= regIsValid[{commit0_tgt[7:0]}] | regIsValid[{commit2_tgt[7:0]}];
+ if (regIsValid[{commit0_tgt[7:0]}] | regIsValid[{commit2_tgt[7:0]}])
+ rf_source[{commit0_tgt[7:0]}] <= {`QBIT{1'b1}};
+ end
+ else if ({commit2_tgt[7:0]}=={commit1_tgt[7:0]}) begin
+ rf_v[ {commit2_tgt[7:0]} ] <= regIsValid[{commit1_tgt[7:0]}] | regIsValid[{commit2_tgt[7:0]}];
+ if (regIsValid[{commit1_tgt[7:0]}] | regIsValid[{commit2_tgt[7:0]}])
+ rf_source[{commit1_tgt[7:0]}] <= {`QBIT{1'b1}};
+ end
+ else begin
+ rf_v[ {commit2_tgt[7:0]} ] <= regIsValid[{commit2_tgt[7:0]}];//rf_source[ commit1_tgt[4:0] ] == commit1_id || (branchmiss && iqentry_source[ commit1_id[`QBITS] ]);
+ if (regIsValid[{commit2_tgt[7:0]}])
+ rf_source[{commit2_tgt[7:0]}] <= {`QBIT{1'b1}};
+ end
+ end
+ if (commit2_tgt[5:0] != 6'd0) $display("r%d <- %h v[%d]<-%d", commit2_tgt, commit2_bus, regIsValid[commit2_tgt[5:0]],
+ rf_source[ {commit2_tgt[7:0]} ] == commit2_id || (branchmiss && iqentry_source[ commit2_id[`QBITS] ]));
+ if (commit2_tgt[5:0]==6'd30 && commit2_bus==64'd0)
+ $display("FP <= 0");
+ end
+ rf_v[0] <= 1;
+
+ //
+ // ENQUEUE
+ //
+ // place up to two instructions from the fetch buffer into slots in the IQ.
+ // note: they are placed in-order, and they are expected to be executed
+ // 0, 1, or 2 of the fetch buffers may have valid data
+ // 0, 1, or 2 slots in the instruction queue may be available.
+ // if we notice that one of the instructions in the fetch buffer is a predicted branch,
+ // (set branchback/backpc and delete any instructions after it in fetchbuf)
+ //
+
+ // enqueue fetchbuf0 and fetchbuf1, but only if there is room,
+ // and ignore fetchbuf1 if fetchbuf0 has a backwards branch in it.
+ //
+ // also, do some instruction-decode ... set the operand_valid bits in the IQ
+ // appropriately so that the DATAINCOMING stage does not have to look at the opcode
+ //
+ if (!branchmiss) // don't bother doing anything if there's been a branch miss
+
+ case ({fetchbuf0_v, fetchbuf1_v})
+
+ 2'b00: ; // do nothing
+
+ 2'b01:
+ if (canq1) begin
+ if (fetchbuf1_rfw) begin
+ rf_source[ Rt1s ] <= { 1'b0, fetchbuf1_mem, tail0 }; // top bit indicates ALU/MEM bus
+ rf_v [Rt1s] <= `INV;
+ end
+ if (IsVector(fetchbuf1_instr) && SUP_VECTOR) begin
+ vqe1 <= vqe1 + 4'd1;
+ if (IsVCmprss(fetchbuf1_instr)) begin
+ if (vm[fetchbuf1_instr[25:23]][vqe1])
+ vqet1 <= vqet1 + 4'd1;
+ end
+ else
+ vqet1 <= vqet1 + 4'd1;
+ if (vqe1 >= vl-2)
+ nop_fetchbuf <= fetchbuf ? 4'b0100 : 4'b0001;
+ enque1(tail0, fetchbuf1_thrd ? maxsn[1]+4'd1 : maxsn[0]+4'd1, vqe1);
+ iq_ctr = iq_ctr + 4'd1;
+ if (canq2 && vqe1 < vl-2) begin
+ vqe1 <= vqe1 + 4'd2;
+ if (IsVCmprss(fetchbuf1_instr)) begin
+ if (vm[fetchbuf1_instr[25:23]][vqe1+6'd1])
+ vqet1 <= vqet1 + 4'd2;
+ end
+ else
+ vqet1 <= vqet1 + 4'd2;
+ enque1(tail1, fetchbuf1_thrd ? maxsn[1] + 4'd2 : maxsn[0] + 4'd2, vqe1 + 6'd1);
+ iq_ctr = iq_ctr + 4'd2;
+ end
+ end
+ else begin
+ enque1(tail0, fetchbuf1_thrd ? maxsn[1]+4'd1 : maxsn[0]+4'd1, 6'd0);
+ iq_ctr = iq_ctr + 4'd1;
+ end
+ end
+
+ 2'b10:
+ if (canq1) begin
+ enque0x();
+ end
+
+ 2'b11:
+ if (canq1) begin
+ //
+ // if the first instruction is a predicted branch, enqueue it & stomp on all following instructions
+ // but only if the following instruction is in the same thread. Otherwise we want to queue two.
+ //
+ if (take_branch0 && fetchbuf1_thrd==fetchbuf0_thrd) begin
+ enque0x();
+ end
+
+ else begin // fetchbuf0 doesn't contain a predicted branch
+ //
+ // so -- we can enqueue 1 or 2 instructions, depending on space in the IQ
+ // update the rf_v and rf_source bits separately (at end)
+ // the problem is that if we do have two instructions,
+ // they may interact with each other, so we have to be
+ // careful about where things point.
+ //
+ // enqueue the first instruction ...
+ //
+ if (IsVector(fetchbuf0_instr) && SUP_VECTOR) begin
+ vqe0 <= vqe0 + 4'd1;
+ if (IsVCmprss(fetchbuf0_instr)) begin
+ if (vm[fetchbuf0_instr[25:23]][vqe0])
+ vqet0 <= vqet0 + 4'd1;
+ end
+ else
+ vqet0 <= vqet0 + 4'd1;
+ if (vqe0 >= vl-2)
+ nop_fetchbuf <= fetchbuf ? 4'b1000 : 4'b0010;
+ end
+ if (vqe0 < vl || !IsVector(fetchbuf0_instr)) begin
+ enque0(tail0, fetchbuf0_thrd ? maxsn[1]+4'd1 : maxsn[0]+4'd1, vqe0);
+ iq_ctr = iq_ctr + 4'd1;
+ //
+ // if there is room for a second instruction, enqueue it
+ //
+ if (canq2) begin
+ if (vechain && IsVector(fetchbuf1_instr)
+ && Ra1s != Rt0s // And there is no dependency
+ && Rb1s != Rt0s
+ && Rc1s != Rt0s
+ ) begin
+`ifdef SUPPORT_SMT
+ mstatus[0][32] <= 1'b1;
+`else
+ mstatus[32] <= 1'b1;
+`endif
+ vqe1 <= vqe1 + 4'd1;
+ if (IsVCmprss(fetchbuf1_instr)) begin
+ if (vm[fetchbuf1_instr[25:23]][vqe1])
+ vqet1 <= vqet1 + 4'd1;
+ end
+ else
+ vqet1 <= vqet1 + 4'd1;
+ if (vqe1 >= vl-2)
+ nop_fetchbuf <= fetchbuf ? 4'b0100 : 4'b0001;
+ enque1(tail1,
+ fetchbuf1_thrd==fetchbuf0_thrd && fetchbuf1_thrd==1'b1 ? maxsn[1] + 4'd2 :
+ fetchbuf1_thrd==fetchbuf0_thrd && fetchbuf1_thrd==1'b0 ? maxsn[0] + 4'd2 :
+ fetchbuf1_thrd ? maxsn[1] + 4'd2: maxsn[0] + 4'd2, 6'd0);
+ iq_ctr = iq_ctr + 4'd2;
+
+ // SOURCE 1 ...
+ a1_vs();
+
+ // SOURCE 2 ...
+ a2_vs();
+
+ // SOURCE 3 ...
+ a3_vs();
+
+ // if the two instructions enqueued target the same register,
+ // make sure only the second writes to rf_v and rf_source.
+ // first is allowed to update rf_v and rf_source only if the
+ // second has no target
+ //
+ if (fetchbuf0_rfw) begin
+ rf_source[ Rt0s ] <= { 1'b0,fetchbuf0_mem, tail0 };
+ rf_v [ Rt0s] <= `INV;
+ end
+ if (fetchbuf1_rfw) begin
+ rf_source[ Rt1s ] <= { 1'b0,fetchbuf1_mem, tail1 };
+ rf_v [ Rt1s ] <= `INV;
+ end
+ end
+ // If there was a vector instruction in fetchbuf0, we really
+ // want to queue the next vector element, not the next
+ // instruction waiting in fetchbuf1.
+ else if (IsVector(fetchbuf0_instr) && SUP_VECTOR && vqe0 < vl-1) begin
+ vqe0 <= vqe0 + 4'd2;
+ if (IsVCmprss(fetchbuf0_instr)) begin
+ if (vm[fetchbuf0_instr[25:23]][vqe0+6'd1])
+ vqet0 <= vqet0 + 4'd2;
+ end
+ else
+ vqet0 <= vqet0 + 4'd2;
+ if (vqe0 >= vl-3)
+ nop_fetchbuf <= fetchbuf ? 4'b1000 : 4'b0010;
+ if (vqe0 < vl-1) begin
+ enque0(tail1, fetchbuf0_thrd ? maxsn[1] + 4'd2 : maxsn[0] + 4'd2, vqe0 + 6'd1);
+ iq_ctr = iq_ctr + 4'd2;
+
+ // SOURCE 1 ...
+ iqentry_a1_v [tail1] <= regIsValid[Ra0s];
+ iqentry_a1_s [tail1] <= rf_source [Ra0s];
+
+ // SOURCE 2 ...
+ iqentry_a2_v [tail1] <= regIsValid[Rb0s];
+ iqentry_a2_s [tail1] <= rf_source[ Rb0s ];
+
+ // SOURCE 3 ...
+ iqentry_a3_v [tail1] <= regIsValid[Rc0s];
+ iqentry_a3_s [tail1] <= rf_source[ Rc0s ];
+
+
+ // if the two instructions enqueued target the same register,
+ // make sure only the second writes to rf_v and rf_source.
+ // first is allowed to update rf_v and rf_source only if the
+ // second has no target (BEQ or SW)
+ //
+ if (fetchbuf0_rfw) begin
+ rf_source[ Rt0s ] <= { 1'b0, fetchbuf0_mem, tail1 };
+ rf_v [ Rt0s ] <= `INV;
+ end
+ end
+ end
+ else if (IsVector(fetchbuf1_instr) && SUP_VECTOR) begin
+ vqe1 <= 6'd1;
+ if (IsVCmprss(fetchbuf1_instr)) begin
+ if (vm[fetchbuf1_instr[25:23]][IsVector(fetchbuf0_instr)? 6'd0:vqe1+6'd1])
+ vqet1 <= 6'd1;
+ else
+ vqet1 <= 6'd0;
+ end
+ else
+ vqet1 <= 6'd1;
+ if (IsVector(fetchbuf0_instr) && SUP_VECTOR)
+ nop_fetchbuf <= fetchbuf ? 4'b1000 : 4'b0010;
+ enque1(tail1,
+ fetchbuf1_thrd==fetchbuf0_thrd && fetchbuf1_thrd==1'b1 ? maxsn[1] + 4'd2 :
+ fetchbuf1_thrd==fetchbuf0_thrd && fetchbuf1_thrd==1'b0 ? maxsn[0] + 4'd2 :
+ fetchbuf1_thrd ? maxsn[1] + 4'd2: maxsn[0] + 4'd2, 6'd0);
+ iq_ctr = iq_ctr + 4'd2;
+
+ // SOURCE 1 ...
+ a1_vs();
+
+ // SOURCE 2 ..
+ a2_vs();
+
+ // SOURCE 3 ...
+ a3_vs();
+
+ // if the two instructions enqueued target the same register,
+ // make sure only the second writes to rf_v and rf_source.
+ // first is allowed to update rf_v and rf_source only if the
+ // second has no target
+ //
+ if (fetchbuf0_rfw) begin
+ rf_source[ Rt0s ] <= { 1'b0,fetchbuf0_mem, tail0 };
+ rf_v [ Rt0s] <= `INV;
+ end
+ if (fetchbuf1_rfw) begin
+ rf_source[ Rt1s ] <= { 1'b0,fetchbuf1_mem, tail1 };
+ rf_v [ Rt1s ] <= `INV;
+ end
+ end
+ else begin
+// enque1(tail1, seq_num + 5'd1, 6'd0);
+ enque1(tail1,
+ fetchbuf1_thrd==fetchbuf0_thrd && fetchbuf1_thrd==1'b1 ? maxsn[1] + 4'd2 :
+ fetchbuf1_thrd==fetchbuf0_thrd && fetchbuf1_thrd==1'b0 ? maxsn[0] + 4'd2 :
+ fetchbuf1_thrd ? maxsn[1] + 4'd1: maxsn[0]+4'd1, 6'd0);
+ iq_ctr = iq_ctr + 4'd2;
+
+ // SOURCE 1 ...
+ a1_vs();
+
+ // SOURCE 2 ...
+ a2_vs();
+
+ // SOURCE 3 ...
+ a3_vs();
+
+
+ // if the two instructions enqueued target the same register,
+ // make sure only the second writes to regIsValid and rf_source.
+ // first is allowed to update regIsValid and rf_source only if the
+ // second has no target (BEQ or SW)
+ //
+ if (fetchbuf0_rfw) begin
+ rf_source[ Rt0s ] <= { 1'b0,fetchbuf0_mem, tail0 };
+ rf_v [ Rt0s] <= `INV;
+ $display("r%dx (%d) Invalidated", Rt0s, Rt0s[4:0]);
+ end
+ else
+ $display("No rfw");
+ if (fetchbuf1_rfw) begin
+ rf_source[ Rt1s ] <= { 1'b0,fetchbuf1_mem, tail1 };
+ $display("r%dx (%d) Invalidated", Rt1s, Rt1s[4:0]);
+ rf_v [ Rt1s ] <= `INV;
+ end
+ else
+ $display("No rfw");
+ end
+
+ end // ends the "if IQ[tail1] is available" clause
+ else begin // only first instruction was enqueued
+ if (fetchbuf0_rfw) begin
+ $display("r%dx (%d) Invalidated 1", Rt0s, Rt0s[4:0]);
+ rf_source[ Rt0s ] <= {1'b0,fetchbuf0_mem, tail0};
+ rf_v [ Rt0s ] <= `INV;
+ end
+ end
+ end
+
+ end // ends the "else fetchbuf0 doesn't have a backwards branch" clause
+ end
+ endcase
+ if (pebm) begin
+ bm_ctr <= bm_ctr + 40'd1;
+ end
+
+//
+// DATAINCOMING
+//
+// wait for operand/s to appear on alu busses and puts them into
+// the iqentry_a1 and iqentry_a2 slots (if appropriate)
+// as well as the appropriate iqentry_res slots (and setting valid bits)
+//
+// put results into the appropriate instruction entries
+//
+// This chunk of code has to be before the enqueue stage so that the agen bit
+// can be reset to zero by enqueue.
+// put results into the appropriate instruction entries
+//
+if (IsMul(alu0_instr)|IsDivmod(alu0_instr)|alu0_shft|alu0_tlb) begin
+ if (alu0_done_pe) begin
+ alu0_dataready <= TRUE;
+ end
+end
+if (alu1_shft) begin
+ if (alu1_done_pe) begin
+ alu1_dataready <= TRUE;
+ end
+end
+
+if (alu0_v) begin
+ iqentry_tgt [ alu0_id[`QBITS] ] <= alu0_tgt;
+ iqentry_res [ alu0_id[`QBITS] ] <= ralu0_bus;
+ iqentry_exc [ alu0_id[`QBITS] ] <= alu0_exc;
+ if (!iqentry_mem[ alu0_id[`QBITS] ] && alu0_done) begin
+// iqentry_done[ alu0_id[`QBITS] ] <= `TRUE;
+ iqentry_state[alu0_id[`QBITS]] <= IQS_CMT;
+ end
+// if (alu0_done)
+// iqentry_cmt [ alu0_id[`QBITS] ] <= `TRUE;
+// iqentry_out [ alu0_id[`QBITS] ] <= `INV;
+// iqentry_agen[ alu0_id[`QBITS] ] <= `VAL;//!iqentry_fc[alu0_id[`QBITS]]; // RET
+ if (iqentry_mem[alu0_id[`QBITS]])
+ iqentry_state[alu0_id[`QBITS]] <= IQS_AGEN;
+ if (iqentry_mem[ alu0_id[`QBITS] ] && !iqentry_agen[ alu0_id[`QBITS] ]) begin
+ iqentry_ma[ alu0_id[`QBITS] ] <= alu0_bus;
+ end
+ if (|alu0_exc) begin
+// iqentry_done[alu0_id[`QBITS]] <= `VAL;
+ iqentry_store[alu0_id[`QBITS]] <= `INV;
+ iqentry_state[alu0_id[`QBITS]] <= IQS_CMT;
+ end
+ alu0_dataready <= FALSE;
+end
+
+if (alu1_v && `NUM_ALU > 1) begin
+ iqentry_tgt [ alu1_id[`QBITS] ] <= alu1_tgt;
+ iqentry_res [ alu1_id[`QBITS] ] <= ralu1_bus;
+ iqentry_exc [ alu1_id[`QBITS] ] <= alu1_exc;
+ if (!iqentry_mem[ alu1_id[`QBITS] ] && alu1_done) begin
+// iqentry_done[ alu1_id[`QBITS] ] <= `TRUE;
+ iqentry_state[alu1_id[`QBITS]] <= IQS_CMT;
+ end
+// iqentry_done[ alu1_id[`QBITS] ] <= (!iqentry_mem[ alu1_id[`QBITS] ] && alu1_done);
+// if (alu1_done)
+// iqentry_cmt [ alu1_id[`QBITS] ] <= `TRUE;
+// iqentry_out [ alu1_id[`QBITS] ] <= `INV;
+ if (iqentry_mem[alu1_id[`QBITS]])
+ iqentry_state[alu1_id[`QBITS]] <= IQS_AGEN;
+// iqentry_agen[ alu1_id[`QBITS] ] <= `VAL;//!iqentry_fc[alu0_id[`QBITS]]; // RET
+ if (iqentry_mem[ alu1_id[`QBITS] ] && !iqentry_agen[ alu1_id[`QBITS] ]) begin
+ iqentry_ma[ alu1_id[`QBITS] ] <= alu1_bus;
+ end
+ if (|alu1_exc) begin
+// iqentry_done[alu1_id[`QBITS]] <= `VAL;
+ iqentry_store[alu1_id[`QBITS]] <= `INV;
+ iqentry_state[alu1_id[`QBITS]] <= IQS_CMT;
+ end
+ alu1_dataready <= FALSE;
+end
+
+if (fpu1_v && `NUM_FPU > 0) begin
+ iqentry_res [ fpu1_id[`QBITS] ] <= rfpu1_bus;
+ iqentry_ares[ fpu1_id[`QBITS] ] <= fpu1_status;
+ iqentry_exc [ fpu1_id[`QBITS] ] <= fpu1_exc;
+// iqentry_done[ fpu1_id[`QBITS] ] <= fpu1_done;
+// iqentry_out [ fpu1_id[`QBITS] ] <= `INV;
+ iqentry_state[fpu1_id[`QBITS]] <= IQS_CMT;
+ fpu1_dataready <= FALSE;
+end
+
+if (fpu2_v && `NUM_FPU > 1) begin
+ iqentry_res [ fpu2_id[`QBITS] ] <= rfpu2_bus;
+ iqentry_ares[ fpu2_id[`QBITS] ] <= fpu2_status;
+ iqentry_exc [ fpu2_id[`QBITS] ] <= fpu2_exc;
+// iqentry_done[ fpu2_id[`QBITS] ] <= fpu2_done;
+// iqentry_out [ fpu2_id[`QBITS] ] <= `INV;
+ iqentry_state[fpu2_id[`QBITS]] <= IQS_CMT;
+ //iqentry_agen[ fpu_id[`QBITS] ] <= `VAL; // RET
+ fpu2_dataready <= FALSE;
+end
+
+if (IsWait(fcu_instr)) begin
+ if (pe_wait)
+ fcu_dataready <= `TRUE;
+end
+
+if (fcu_v) begin
+ fcu_done <= `TRUE;
+ iqentry_ma [ fcu_id[`QBITS] ] <= fcu_misspc;
+ iqentry_res [ fcu_id[`QBITS] ] <= rfcu_bus;
+ iqentry_exc [ fcu_id[`QBITS] ] <= fcu_exc;
+// iqentry_done[ fcu_id[`QBITS] ] <= `TRUE;
+// iqentry_out [ fcu_id[`QBITS] ] <= `INV;
+ iqentry_state[fcu_id[`QBITS] ] <= IQS_CMT;
+ // takb is looked at only for branches to update the predictor. Here it is
+ // unconditionally set, the value will be ignored if it's not a branch.
+ iqentry_takb[ fcu_id[`QBITS] ] <= fcu_takb;
+ fcu_dataready <= `INV;
+end
+
+// dramX_v only set on a load
+if (mem1_available && dramA_v && iqentry_v[ dramA_id[`QBITS] ]) begin
+ iqentry_res [ dramA_id[`QBITS] ] <= rdramA_bus;
+ iqentry_exc [ dramA_id[`QBITS] ] <= dramA_exc;
+// iqentry_done[ dramA_id[`QBITS] ] <= `VAL;
+// iqentry_out [ dramA_id[`QBITS] ] <= `INV;
+ iqentry_state[dramA_id[`QBITS] ] <= IQS_CMT;
+ iqentry_aq [ dramA_id[`QBITS] ] <= `INV;
+end
+if (mem2_available && `NUM_MEM > 1 && dramB_v && iqentry_v[ dramB_id[`QBITS] ]) begin
+ iqentry_res [ dramB_id[`QBITS] ] <= rdramB_bus;
+ iqentry_exc [ dramB_id[`QBITS] ] <= dramB_exc;
+// iqentry_done[ dramB_id[`QBITS] ] <= `VAL;
+ iqentry_state[dramB_id[`QBITS] ] <= IQS_CMT;
+// iqentry_out [ dramB_id[`QBITS] ] <= `INV;
+ iqentry_aq [ dramB_id[`QBITS] ] <= `INV;
+end
+if (mem3_available && `NUM_MEM > 2 && dramC_v && iqentry_v[ dramC_id[`QBITS] ]) begin
+ iqentry_res [ dramC_id[`QBITS] ] <= rdramC_bus;
+ iqentry_exc [ dramC_id[`QBITS] ] <= dramC_exc;
+// iqentry_done[ dramC_id[`QBITS] ] <= `VAL;
+ iqentry_state[dramC_id[`QBITS] ] <= IQS_CMT;
+// iqentry_out [ dramC_id[`QBITS] ] <= `INV;
+ iqentry_aq [ dramC_id[`QBITS] ] <= `INV;
+// if (iqentry_lptr[dram2_id[`QBITS]])
+// wbrcd[pcr[5:0]] <= 1'b1;
+end
+
+//
+// see if anybody else wants the results ... look at lots of buses:
+// - fpu_bus
+// - alu0_bus
+// - alu1_bus
+// - fcu_bus
+// - dram_bus
+// - commit0_bus
+// - commit1_bus
+//
+
+for (n = 0; n < QENTRIES; n = n + 1)
+begin
+ if (`NUM_FPU > 0)
+ setargs(n,{1'b0,fpu1_id},fpu1_v,rfpu1_bus);
+ if (`NUM_FPU > 1)
+ setargs(n,{1'b0,fpu2_id},fpu2_v,rfpu2_bus);
+
+ // The memory address generated by the ALU should not be posted to be
+ // recieved into waiting argument registers. The arguments will be waiting
+ // for the result of the memory load, picked up from the dram busses. The
+ // only mem operation requiring the alu result bus is the push operation.
+ setargs(n,{1'b0,alu0_id},alu0_v & (~alu0_mem | alu0_push),ralu0_bus);
+ if (`NUM_ALU > 1)
+ setargs(n,{1'b0,alu1_id},alu1_v & (~alu1_mem | alu1_push),ralu1_bus);
+
+ setargs(n,{1'b0,fcu_id},fcu_v,rfcu_bus);
+
+ setargs(n,{1'b0,dramA_id},dramA_v,rdramA_bus);
+ if (`NUM_MEM > 1)
+ setargs(n,{1'b0,dramB_id},dramB_v,rdramB_bus);
+ if (`NUM_MEM > 2)
+ setargs(n,{1'b0,dramC_id},dramC_v,rdramC_bus);
+
+ setargs(n,commit0_id,commit0_v,commit0_bus);
+ if (`NUM_CMT > 1)
+ setargs(n,commit1_id,commit1_v,commit1_bus);
+ if (`NUM_CMT > 2)
+ setargs(n,commit2_id,commit2_v,commit2_bus);
+`ifndef INLINE_DECODE
+ setinsn(n[`QBITS],id1_ido,id1_available&id1_vo,id1_bus);
+ if (`NUM_IDU > 1)
+ setinsn(n[`QBITS],id2_ido,id2_available&id2_vo,id2_bus);
+ if (`NUM_IDU > 2)
+ setinsn(n[`QBITS],id3_ido,id3_available&id3_vo,id3_bus);
+`endif
+end
+
+
+//
+// ISSUE
+//
+// determines what instructions are ready to go, then places them
+// in the various ALU queues.
+// also invalidates instructions following a branch-miss BEQ or any JALR (STOMP logic)
+//
+`ifndef INLINE_DECODE
+for (n = 0; n < QENTRIES; n = n + 1)
+if (id1_available) begin
+if (iqentry_id1issue[n] && !iqentry_iv[n] && !(iqentry_v[n] && iqentry_stomp[n])) begin
+ id1_vi <= `VAL;
+ id1_id <= n[4:0];
+ id1_instr <= iqentry_rtop[n] ? (
+ iqentry_a3_v[n] ? iqentry_a3[n]
+`ifdef FU_BYPASS
+ : (iqentry_a3_s[n] == alu0_id) ? alu0_bus
+ : (iqentry_a3_s[n] == alu1_id) ? alu1_bus
+`endif
+ : `NOP_INSN)
+ : iqentry_instr[n];
+ id1_ven <= iqentry_ven[n];
+ id1_vl <= iqentry_vl[n];
+ id1_thrd <= iqentry_thrd[n];
+ id1_Rt <= iqentry_tgt[n][4:0];
+ id1_pt <= iqentry_pt[n];
+ end
+end
+if (`NUM_IDU > 1) begin
+for (n = 0; n < QENTRIES; n = n + 1)
+ if (id2_available) begin
+ if (iqentry_id2issue[n] && !iqentry_iv[n] && !(iqentry_v[n] && iqentry_stomp[n])) begin
+ id2_vi <= `VAL;
+ id2_id <= n[4:0];
+ id2_instr <= iqentry_rtop[n] ? (
+ iqentry_a3_v[n] ? iqentry_a3[n]
+`ifdef FU_BYPASS
+ : (iqentry_a3_s[n] == alu0_id) ? alu0_bus
+ : (iqentry_a3_s[n] == alu1_id) ? alu1_bus
+`endif
+ : `NOP_INSN)
+ : iqentry_instr[n];
+ id2_ven <= iqentry_ven[n];
+ id2_vl <= iqentry_vl[n];
+ id2_thrd <= iqentry_thrd[n];
+ id2_Rt <= iqentry_tgt[n][4:0];
+ id2_pt <= iqentry_pt[n];
+ end
+ end
+end
+if (`NUM_IDU > 2) begin
+for (n = 0; n < QENTRIES; n = n + 1)
+ if (id3_available) begin
+ if (iqentry_id3issue[n] && !iqentry_iv[n] && !(iqentry_v[n] && iqentry_stomp[n])) begin
+ id3_vi <= `VAL;
+ id3_id <= n[4:0];
+ id3_instr <= iqentry_rtop[n] ? (
+ iqentry_a3_v[n] ? iqentry_a3[n]
+`ifdef FU_BYPASS
+ : (iqentry_a3_s[n] == alu0_id) ? alu0_bus
+ : (iqentry_a3_s[n] == alu1_id) ? alu1_bus
+`endif
+ : `NOP_INSN)
+ : iqentry_instr[n];
+ id3_ven <= iqentry_ven[n];
+ id3_vl <= iqentry_vl[n];
+ id3_thrd <= iqentry_thrd[n];
+ id3_Rt <= iqentry_tgt[n][4:0];
+ id3_pt <= iqentry_pt[n];
+ end
+ end
+end
+`endif // not INLINE_DECODE
+
+// X's on unused busses cause problems in SIM.
+ for (n = 0; n < QENTRIES; n = n + 1)
+ if (iqentry_alu0_issue[n] && !(iqentry_v[n] && iqentry_stomp[n])) begin
+ if (alu0_available & alu0_done) begin
+ alu0_sourceid <= {iqentry_push[n],n[`QBITS]};
+ alu0_instr <= iqentry_rtop[n] ? (
+`ifdef FU_BYPASS
+ iqentry_a3_v[n] ? iqentry_a3[n]
+ : (iqentry_a3_s[n] == alu0_id) ? ralu0_bus
+ : (iqentry_a3_s[n] == alu1_id) ? ralu1_bus
+ : (iqentry_a3_s[n] == fpu1_id && `NUM_FPU > 0) ? rfpu1_bus
+ : `NOP_INSN)
+`else
+ iqentry_a3[n])
+`endif
+ : iqentry_instr[n];
+ alu0_sz <= iqentry_sz[n];
+ alu0_tlb <= iqentry_tlb[n];
+ alu0_mem <= iqentry_mem[n];
+ alu0_load <= iqentry_load[n];
+ alu0_store <= iqentry_store[n];
+ alu0_push <= iqentry_push[n];
+ alu0_shft <= iqentry_shft[n];
+ alu0_pc <= iqentry_pc[n];
+ alu0_argA <=
+`ifdef FU_BYPASS
+ iqentry_a1_v[n] ? iqentry_a1[n]
+ : (iqentry_a1_s[n] == alu0_id) ? ralu0_bus
+ : (iqentry_a1_s[n] == alu1_id) ? ralu1_bus
+ : (iqentry_a1_s[n] == fpu1_id && `NUM_FPU > 0) ? rfpu1_bus
+ : 64'hDEADDEADDEADDEAD;
+`else
+ iqentry_a1[n];
+`endif
+ alu0_argB <= iqentry_imm[n]
+ ? iqentry_a0[n]
+`ifdef FU_BYPASS
+ : (iqentry_a2_v[n] ? iqentry_a2[n]
+ : (iqentry_a2_s[n] == alu0_id) ? ralu0_bus
+ : (iqentry_a2_s[n] == alu1_id) ? ralu1_bus
+ : (iqentry_a2_s[n] == fpu1_id && `NUM_FPU > 0) ? rfpu1_bus
+ : 64'hDEADDEADDEADDEAD);
+`else
+ : iqentry_a2[n];
+`endif
+ alu0_argC <=
+`ifdef FU_BYPASS
+ iqentry_a3_v[n] ? iqentry_a3[n]
+ : (iqentry_a3_s[n] == alu0_id) ? ralu0_bus : ralu1_bus;
+`else
+ iqentry_a3[n];
+`endif
+ alu0_argI <= iqentry_a0[n];
+ alu0_tgt <= IsVeins(iqentry_instr[n]) ?
+ {6'h0,1'b1,iqentry_tgt[n][4:0]} | ((
+ iqentry_a2_v[n] ? iqentry_a2[n][5:0]
+ : (iqentry_a2_s[n] == alu0_id) ? ralu0_bus[5:0]
+ : (iqentry_a2_s[n] == alu1_id) ? ralu1_bus[5:0]
+ : {4{16'h0000}})) << 6 :
+ iqentry_tgt[n];
+ alu0_ven <= iqentry_ven[n];
+ alu0_thrd <= iqentry_thrd[n];
+ alu0_dataready <= IsSingleCycle(iqentry_instr[n]);
+ alu0_ld <= TRUE;
+ iqentry_state[n] <= IQS_OUT;
+ end
+ end
+ if (`NUM_ALU > 1) begin
+ for (n = 0; n < QENTRIES; n = n + 1)
+ if (iqentry_alu1_issue[n] && !(iqentry_v[n] && iqentry_stomp[n])) begin
+ if (alu1_available && alu1_done) begin
+ if (iqentry_alu0[n])
+ panic <= `PANIC_ALU0ONLY;
+ alu1_sourceid <= {iqentry_push[n],n[`QBITS]};
+ alu1_instr <= iqentry_instr[n];
+ alu1_sz <= iqentry_sz[n];
+ alu1_mem <= iqentry_mem[n];
+ alu1_load <= iqentry_load[n];
+ alu1_store <= iqentry_store[n];
+ alu1_push <= iqentry_push[n];
+ alu1_shft <= iqentry_shft[n];
+ alu1_pc <= iqentry_pc[n];
+ alu1_argA <=
+`ifdef FU_BYPASS
+ iqentry_a1_v[n] ? iqentry_a1[n]
+ : (iqentry_a1_s[n] == alu0_id) ? ralu0_bus
+ : (iqentry_a1_s[n] == alu1_id) ? ralu1_bus
+ : (iqentry_a1_s[n] == fpu1_id && `NUM_FPU > 0) ? rfpu1_bus
+ : 64'hDEADDEADDEADDEAD;
+`else
+ iqentry_a1[n];
+`endif
+ alu1_argB <= iqentry_imm[n]
+ ? iqentry_a0[n]
+`ifdef FU_BYPASS
+ : (iqentry_a2_v[n] ? iqentry_a2[n]
+ : (iqentry_a2_s[n] == alu0_id) ? ralu0_bus
+ : (iqentry_a2_s[n] == alu1_id) ? ralu1_bus
+ : (iqentry_a2_s[n] == fpu1_id && `NUM_FPU > 0) ? rfpu1_bus
+ : 64'hDEADDEADDEADDEAD);
+`else
+ : iqentry_a2[n];
+`endif
+ alu1_argC <=
+`ifdef FU_BYPASS
+ iqentry_a3_v[n] ? iqentry_a3[n]
+ : (iqentry_a3_s[n] == alu0_id) ? ralu0_bus : ralu1_bus;
+`else
+ iqentry_a3[n];
+`endif
+ alu1_argI <= iqentry_a0[n];
+ alu1_tgt <= IsVeins(iqentry_instr[n]) ?
+ {6'h0,1'b1,iqentry_tgt[n][4:0]} | ((iqentry_a2_v[n] ? iqentry_a2[n][5:0]
+ : (iqentry_a2_s[n] == alu0_id) ? ralu0_bus[5:0]
+ : (iqentry_a2_s[n] == alu1_id) ? ralu1_bus[5:0]
+ : {4{16'h0000}})) << 6 :
+ iqentry_tgt[n];
+ alu1_ven <= iqentry_ven[n];
+ alu1_dataready <= IsSingleCycle(iqentry_instr[n]);
+ alu1_ld <= TRUE;
+ iqentry_state[n] <= IQS_OUT;
+ end
+ end
+ end
+
+ for (n = 0; n < QENTRIES; n = n + 1)
+ if (iqentry_fpu1_issue[n] && !(iqentry_v[n] && iqentry_stomp[n])) begin
+ if (fpu1_available & fpu1_done) begin
+ fpu1_sourceid <= n[`QBITS];
+ fpu1_instr <= iqentry_instr[n];
+ fpu1_pc <= iqentry_pc[n];
+ fpu1_argA <=
+`ifdef FU_BYPASS
+ iqentry_a1_v[n] ? iqentry_a1[n]
+ : (iqentry_a1_s[n] == alu0_id) ? ralu0_bus
+ : (iqentry_a1_s[n] == alu1_id) ? ralu1_bus
+ : (iqentry_a1_s[n] == fpu1_id && `NUM_FPU > 0) ? rfpu1_bus
+ : 64'hDEADDEADDEADDEAD;
+`else
+ iqentry_a1[n];
+`endif
+ fpu1_argB <=
+`ifdef FU_BYPASS
+ (iqentry_a2_v[n] ? iqentry_a2[n]
+ : (iqentry_a2_s[n] == alu0_id) ? ralu0_bus
+ : (iqentry_a2_s[n] == alu1_id) ? ralu1_bus
+ : (iqentry_a2_s[n] == fpu1_id && `NUM_FPU > 0) ? rfpu1_bus
+ : 64'hDEADDEADDEADDEAD);
+`else
+ iqentry_a2[n];
+`endif
+ fpu1_argC <=
+`ifdef FU_BYPASS
+ iqentry_a3_v[n] ? iqentry_a3[n]
+ : (iqentry_a3_s[n] == alu0_id) ? ralu0_bus : ralu1_bus;
+`else
+ iqentry_a3[n];
+`endif
+`ifdef SUPPORT_PREDICATION
+ fpu1_pred <= iqentry_p_v[n] ? iqentry_pred[n] :
+`ifdef FU_BYPASS
+ (iqentry_p_s[n] == alu0_id) ? alu0nyb[iqentry_preg[n]] :
+ (iqentry_p_s[n] == alu1_id) ? alu1nyb[iqentry_preg[n]] :
+`endif
+ 4'h0;
+ fpu1_argT <=
+`ifdef FU_BYPASS
+ iqentry_aT_v[n] ? iqentry_aT[n]
+ : (iqentry_aT_s[n] == alu0_id) ? ralu0_bus : ralu1_bus;
+`else
+ iqentry_aT[n];
+`endif
+`endif
+ fpu1_argI <= iqentry_a0[n];
+ fpu1_dataready <= `VAL;
+ fpu1_ld <= TRUE;
+ iqentry_state[n] <= IQS_OUT;
+ end
+ end
+
+ for (n = 0; n < QENTRIES; n = n + 1)
+ if (`NUM_FPU > 1 && iqentry_fpu2_issue[n] && !(iqentry_v[n] && iqentry_stomp[n])) begin
+ if (fpu2_available & fpu2_done) begin
+ fpu2_sourceid <= n[`QBITS];
+ fpu2_instr <= iqentry_instr[n];
+ fpu2_pc <= iqentry_pc[n];
+ fpu2_argA <=
+`ifdef FU_BYPASS
+ iqentry_a1_v[n] ? iqentry_a1[n]
+ : (iqentry_a1_s[n] == alu0_id) ? ralu0_bus
+ : (iqentry_a1_s[n] == alu1_id) ? ralu1_bus
+ : (iqentry_a1_s[n] == fpu1_id && `NUM_FPU > 0) ? rfpu1_bus
+ : 64'hDEADDEADDEADDEAD;
+`else
+ iqentry_a1[n];
+`endif
+ fpu2_argB <=
+`ifdef FU_BYPASS
+ (iqentry_a2_v[n] ? iqentry_a2[n]
+ : (iqentry_a2_s[n] == alu0_id) ? ralu0_bus
+ : (iqentry_a2_s[n] == alu1_id) ? ralu1_bus
+ : (iqentry_a2_s[n] == fpu1_id && `NUM_FPU > 0) ? rfpu1_bus
+ : 64'hDEADDEADDEADDEAD);
+`else
+ iqentry_a2[n];
+`endif
+ fpu2_argC <=
+`ifdef FU_BYPASS
+ iqentry_a3_v[n] ? iqentry_a3[n]
+ : (iqentry_a3_s[n] == alu0_id) ? ralu0_bus : ralu1_bus;
+`else
+ iqentry_a3[n];
+`endif
+`ifdef SUPPORT_PREDICATION
+ fpu2_pred <= iqentry_p_v[n] ? iqentry_pred[n] :
+`ifdef FU_BYPASS
+ (iqentry_p_s[n] == alu0_id) ? alu0nyb[iqentry_preg[n]] :
+ (iqentry_p_s[n] == alu1_id) ? alu1nyb[iqentry_preg[n]] :
+`endif
+ 4'h0;
+ fpu2_argT <=
+`ifdef FU_BYPASS
+ iqentry_aT_v[n] ? iqentry_aT[n]
+ : (iqentry_aT_s[n] == alu0_id) ? ralu0_bus : ralu1_bus;
+`else
+ iqentry_aT[n];
+`endif
+`endif
+ fpu2_argI <= iqentry_a0[n];
+ fpu2_dataready <= `VAL;
+ fpu2_ld <= TRUE;
+ iqentry_state[n] <= IQS_OUT;
+ end
+ end
+
+ for (n = 0; n < QENTRIES; n = n + 1)
+ if (iqentry_fcu_issue[n] && !(iqentry_v[n] && iqentry_stomp[n])) begin
+ if (fcu_done) begin
+ fcu_sourceid <= n[`QBITS];
+ fcu_prevInstr <= fcu_instr;
+ fcu_instr <= iqentry_instr[n];
+ fcu_insln <= iqentry_insln[n];
+ fcu_pc <= iqentry_pc[n];
+ fcu_nextpc <= iqentry_pc[n] + iqentry_insln[n];
+ fcu_pt <= iqentry_pt[n];
+ fcu_brdisp <= iqentry_instr[n][6] ? {{37{iqentry_instr[n][47]}},iqentry_instr[n][47:23],iqentry_instr[n][17:16]}
+ : {{53{iqentry_instr[n][31]}},iqentry_instr[n][31:23],iqentry_instr[n][17:16]};
+ fcu_branch <= iqentry_br[n];
+ fcu_call <= IsCall(iqentry_instr[n])|iqentry_jal[n];
+ fcu_jal <= iqentry_jal[n];
+ fcu_ret <= iqentry_ret[n];
+ fcu_brk <= iqentry_brk[n];
+ fcu_rti <= iqentry_rti[n];
+ fcu_pc <= iqentry_pc[n];
+ fcu_argA <= iqentry_a1_v[n] ? iqentry_a1[n]
+ : (iqentry_a1_s[n] == alu0_id) ? ralu0_bus
+ : (iqentry_a1_s[n] == fpu1_id && `NUM_FPU > 0) ? rfpu1_bus
+ : ralu1_bus;
+`ifdef SUPPORT_SMT
+ fcu_argB <= iqentry_rti[n] ? epc0[iqentry_thrd[n]]
+`else
+ fcu_argB <= iqentry_rti[n] ? epc0
+`endif
+ : (iqentry_a2_v[n] ? iqentry_a2[n]
+ : (iqentry_a2_s[n] == alu0_id) ? ralu0_bus
+ : (iqentry_a2_s[n] == fpu1_id && `NUM_FPU > 0) ? rfpu1_bus
+ : ralu1_bus);
+ // argB
+ waitctr <= (iqentry_a2_v[n] ? iqentry_a2[n][47:0]
+ : (iqentry_a2_s[n] == alu0_id) ? ralu0_bus[47:0]
+ : (iqentry_a2_s[n] == fpu1_id && `NUM_FPU > 0) ? rfpu1_bus[47:0]
+ : ralu1_bus[47:0]);
+ fcu_argC <= iqentry_a3_v[n] ? iqentry_a3[n]
+ : (iqentry_a3_s[n] == alu0_id) ? ralu0_bus : ralu1_bus;
+`ifdef SUPPORT_PREDICATION
+ fcu_pred <= iqentry_p_v[n] ? iqentry_pred[n] :
+`ifdef FU_BYPASS
+ (iqentry_p_s[n] == alu0_id) ? alu0nyb[iqentry_preg[n]] :
+ (iqentry_p_s[n] == alu1_id) ? alu1nyb[iqentry_preg[n]] :
+`endif
+ 4'h0;
+ fcu_argT <=
+`ifdef FU_BYPASS
+ iqentry_aT_v[n] ? iqentry_aT[n]
+ : (iqentry_aT_s[n] == alu0_id) ? ralu0_bus : ralu1_bus;
+`else
+ iqentry_aT[n];
+`endif
+`endif
+ fcu_argI <= iqentry_a0[n];
+ fcu_thrd <= iqentry_thrd[n];
+ fcu_dataready <= !IsWait(iqentry_instr[n]);
+ fcu_clearbm <= `FALSE;
+ fcu_ld <= TRUE;
+ fcu_timeout <= 8'h00;
+ iqentry_state[n] <= IQS_OUT;
+ fcu_done <= `FALSE;
+ end
+ end
+//
+// MEMORY
+//
+// update the memory queues and put data out on bus if appropriate
+//
+
+//
+// dram0, dram1, dram2 are the "state machines" that keep track
+// of three pipelined DRAM requests. if any has the value "000",
+// then it can accept a request (which bumps it up to the value "001"
+// at the end of the cycle). once it hits the value "111" the request
+// is finished and the dram_bus takes the value. if it is a store, the
+// dram_bus value is not used, but the dram_v value along with the
+// dram_id value signals the waiting memq entry that the store is
+// completed and the instruction can commit.
+//
+
+// if (dram0 != `DRAMSLOT_AVAIL) dram0 <= dram0 + 2'd1;
+// if (dram1 != `DRAMSLOT_AVAIL) dram1 <= dram1 + 2'd1;
+// if (dram2 != `DRAMSLOT_AVAIL) dram2 <= dram2 + 2'd1;
+
+// Flip the ready status to available. Used for loads or stores.
+
+if (dram0 == `DRAMREQ_READY)
+ dram0 <= `DRAMSLOT_AVAIL;
+if (dram1 == `DRAMREQ_READY && `NUM_MEM > 1)
+ dram1 <= `DRAMSLOT_AVAIL;
+if (dram2 == `DRAMREQ_READY && `NUM_MEM > 2)
+ dram2 <= `DRAMSLOT_AVAIL;
+
+// grab requests that have finished and put them on the dram_bus
+
+if (dram0 == `DRAMREQ_READY && dram0_load) begin
+ dramA_v <= `VAL;//!iqentry_stomp[dram0_id[`QBITS]];
+ dramA_id <= dram0_id;
+ dramA_exc <= dram0_exc;
+ dramA_bus <= fnDatiAlign(dram0_instr,dram0_addr,rdat0);
+end
+if (dram1 == `DRAMREQ_READY && dram1_load && `NUM_MEM > 1) begin
+ dramB_v <= `VAL;//!iqentry_stomp[dram1_id[`QBITS]];
+ dramB_id <= dram1_id;
+ dramB_exc <= dram1_exc;
+ dramB_bus <= fnDatiAlign(dram1_instr,dram1_addr,rdat1);
+end
+if (dram2 == `DRAMREQ_READY && dram2_load && `NUM_MEM > 2) begin
+ dramC_v <= `VAL;//!iqentry_stomp[dram2_id[`QBITS]];
+ dramC_id <= dram2_id;
+ dramC_exc <= dram2_exc;
+ dramC_bus <= fnDatiAlign(dram2_instr,dram2_addr,rdat2);
+end
+
+if (dram0 == `DRAMREQ_READY && dram0_store)
+ $display("m[%h] <- %h", dram0_addr, dram0_data);
+if (dram1 == `DRAMREQ_READY && dram1_store && `NUM_MEM > 1)
+ $display("m[%h] <- %h", dram1_addr, dram1_data);
+if (dram2 == `DRAMREQ_READY && dram2_store && `NUM_MEM > 2)
+ $display("m[%h] <- %h", dram2_addr, dram2_data);
+
+//
+// determine if the instructions ready to issue can, in fact, issue.
+// "ready" means that the instruction has valid operands but has not gone yet
+iqentry_memissue <= memissue;
+missue_count <= issue_count;
+
+if (dram0 == `DRAMSLOT_AVAIL) dram0_exc <= `FLT_NONE;
+if (dram1 == `DRAMSLOT_AVAIL) dram1_exc <= `FLT_NONE;
+if (dram2 == `DRAMSLOT_AVAIL) dram2_exc <= `FLT_NONE;
+
+for (n = 0; n < QENTRIES; n = n + 1)
+ if (iqentry_v[n] && iqentry_stomp[n]) begin
+ iqentry_iv[n] <= `INV;
+ iqentry_mem[n] <= `INV;
+ iqentry_load[n] <= `INV;
+ iqentry_store[n] <= `INV;
+ iqentry_state[n] <= IQS_INVALID;
+// iqentry_agen[n] <= `INV;
+// iqentry_out[n] <= `INV;
+// iqentry_done[n] <= `INV;
+// iqentry_cmt[n] <= `INV;
+ if (dram0_id[`QBITS] == n[`QBITS]) begin
+ if (dram0==`DRAMSLOT_HASBUS)
+ wb_nack();
+ dram0_load <= `FALSE;
+ dram0_store <= `FALSE;
+ dram0_rmw <= `FALSE;
+ dram0 <= `DRAMSLOT_AVAIL;
+ end
+ if (dram1_id[`QBITS] == n[`QBITS]) begin
+ if (dram1==`DRAMSLOT_HASBUS)
+ wb_nack();
+ dram1_load <= `FALSE;
+ dram1_store <= `FALSE;
+ dram1_rmw <= `FALSE;
+ dram1 <= `DRAMSLOT_AVAIL;
+ end
+ if (dram2_id[`QBITS] == n[`QBITS]) begin
+ if (dram2==`DRAMSLOT_HASBUS)
+ wb_nack();
+ dram2_load <= `FALSE;
+ dram2_store <= `FALSE;
+ dram2_rmw <= `FALSE;
+ dram2 <= `DRAMSLOT_AVAIL;
+ end
+ end
+
+if (last_issue0 < QENTRIES)
+ tDram0Issue(last_issue0);
+if (last_issue1 < QENTRIES)
+ tDram1Issue(last_issue1);
+if (last_issue2 < QENTRIES)
+ tDram2Issue(last_issue2);
+
+
+//for (n = 0; n < QENTRIES; n = n + 1)
+//begin
+// if (!iqentry_v[n])
+// iqentry_done[n] <= FALSE;
+//end
+
+if (ohead[0]==heads[0])
+ cmt_timer <= cmt_timer + 12'd1;
+else
+ cmt_timer <= 12'd0;
+
+if (cmt_timer==12'd1000) begin
+ iqentry_state[heads[0]] <= IQS_CMT;
+ iqentry_exc[heads[0]] <= `FLT_CMT;
+ cmt_timer <= 12'd0;
+end
+
+//
+// COMMIT PHASE (dequeue only ... not register-file update)
+//
+// look at heads[0] and heads[1] and let 'em write to the register file if they are ready
+//
+// always @(posedge clk) begin: commit_phase
+ohead[0] <= heads[0];
+ohead[1] <= heads[1];
+ohead[2] <= heads[2];
+ocommit0_v <= commit0_v;
+ocommit1_v <= commit1_v;
+ocommit2_v <= commit2_v;
+
+oddball_commit(commit0_v, heads[0], 2'd0);
+if (`NUM_CMT > 1)
+ oddball_commit(commit1_v, heads[1], 2'd1);
+if (`NUM_CMT > 2)
+ oddball_commit(commit2_v, heads[2], 2'd2);
+
+// Fetch and queue are limited to two instructions per cycle, so we might as
+// well limit retiring to two instructions max to conserve logic.
+//
+if (~|panic)
+ casez ({ iqentry_v[heads[0]],
+ iqentry_state[heads[0]] == IQS_CMT,
+ iqentry_v[heads[1]],
+ iqentry_state[heads[1]] == IQS_CMT,
+ iqentry_v[heads[2]],
+ iqentry_state[heads[2]] == IQS_CMT})
+
+ // retire 3
+ 6'b0?_0?_0?:
+ if (heads[0] != tail0 && heads[1] != tail0 && heads[2] != tail0)
+ head_inc(3);
+ else if (heads[0] != tail0 && heads[1] != tail0)
+ head_inc(2);
+ else if (heads[0] != tail0)
+ head_inc(1);
+ 6'b0?_0?_10:
+ if (heads[0] != tail0 && heads[1] != tail0)
+ head_inc(2);
+ else if (heads[0] != tail0)
+ head_inc(1);
+ 6'b0?_0?_11:
+ if (`NUM_CMT > 2 || cmt_head2) // and it's not an oddball?
+ head_inc(3);
+ else
+ head_inc(2);
+
+ // retire 1 (wait for regfile for heads[1])
+ 6'b0?_10_??:
+ head_inc(1);
+
+ // retire 2
+ 6'b0?_11_0?,
+ 6'b0?_11_10:
+ if (`NUM_CMT > 1 || cmt_head1)
+ head_inc(2);
+ else
+ head_inc(1);
+ 6'b0?_11_11:
+ if (`NUM_CMT > 2 || (`NUM_CMT > 1 && cmt_head2))
+ head_inc(3);
+ else if (`NUM_CMT > 1 || cmt_head1)
+ head_inc(2);
+ else
+ head_inc(1);
+ 6'b10_??_??: ;
+ 6'b11_0?_0?:
+ if (heads[1] != tail0 && heads[2] != tail0)
+ head_inc(3);
+ else if (heads[1] != tail0)
+ head_inc(2);
+ else
+ head_inc(1);
+ 6'b11_0?_10:
+ if (heads[1] != tail0)
+ head_inc(2);
+ else
+ head_inc(1);
+ 6'b11_0?_11:
+ if (heads[1] != tail0) begin
+ if (`NUM_CMT > 2 || cmt_head2)
+ head_inc(3);
+ else
+ head_inc(2);
+ end
+ else
+ head_inc(1);
+ 6'b11_10_??:
+ head_inc(1);
+ 6'b11_11_0?:
+ if (`NUM_CMT > 1 && heads[2] != tail0)
+ head_inc(3);
+ else if (cmt_head1 && heads[2] != tail0)
+ head_inc(3);
+ else if (`NUM_CMT > 1 || cmt_head1)
+ head_inc(2);
+ else
+ head_inc(1);
+ 6'b11_11_10:
+ if (`NUM_CMT > 1 || cmt_head1)
+ head_inc(2);
+ else
+ head_inc(1);
+ 6'b11_11_11:
+ if (`NUM_CMT > 2 || (`NUM_CMT > 1 && cmt_head2))
+ head_inc(3);
+ else if (`NUM_CMT > 1 || cmt_head1)
+ head_inc(2);
+ else
+ head_inc(1);
+ default:
+ begin
+ $display("head_inc: Uncoded case %h",{ iqentry_v[heads[0]],
+ iqentry_state[heads[0]],
+ iqentry_v[heads[1]],
+ iqentry_state[heads[1]],
+ iqentry_v[heads[2]],
+ iqentry_state[heads[2]]});
+ $stop;
+ end
+ endcase
+
+
+rf_source[0] <= 0;
+L1_wr0 <= FALSE;
+L1_wr1 <= FALSE;
+L1_wr2 <= FALSE;
+L1_invline <= FALSE;
+icnxt <= FALSE;
+L2_nxt <= FALSE;
+// Instruction cache state machine.
+// On a miss first see if the instruction is in the L2 cache. No need to go to
+// the BIU on an L1 miss.
+// If not the machine will wait until the BIU loads the L2 cache.
+
+// Capture the previous ic state, used to determine how long to wait in
+// icstate #4.
+picstate <= icstate;
+case(icstate)
+IDLE:
+ // If the bus unit is busy doing an update involving L1_adr or L2_adr
+ // we have to wait.
+ if (bstate != B_ICacheAck && bstate != B_ICacheNack && bstate != B_ICacheNack2) begin
+ if (!ihit0) begin
+ L1_adr <= {pcr[7:0],pc0[AMSB:5],5'h0};
+ L2_adr <= {pcr[7:0],pc0[AMSB:5],5'h0};
+ L1_invline <= TRUE;
+ icwhich <= 2'b00;
+ iccnt <= 3'b00;
+ icstate <= IC2;
+ end
+ else if (!ihit1 && `WAYS > 1) begin
+ if (thread_en) begin
+ L1_adr <= {pcr[7:0],pc1[AMSB:5],5'h0};
+ L2_adr <= {pcr[7:0],pc1[AMSB:5],5'h0};
+ end
+ else begin
+ L1_adr <= {pcr[7:0],pc0plus6[AMSB:5],5'h0};
+ L2_adr <= {pcr[7:0],pc0plus6[AMSB:5],5'h0};
+ end
+ L1_invline <= TRUE;
+ icwhich <= 2'b01;
+ iccnt <= 3'b00;
+ icstate <= IC2;
+ end
+ else if (!ihit2 && `WAYS > 2) begin
+ if (thread_en) begin
+ L1_adr <= {pcr[7:0],pc2[AMSB:5],5'h0};
+ L2_adr <= {pcr[7:0],pc2[AMSB:5],5'h0};
+ end
+ else begin
+ L1_adr <= {pcr[7:0],pc0plus12[AMSB:5],5'h0};
+ L2_adr <= {pcr[7:0],pc0plus12[AMSB:5],5'h0};
+ end
+ L1_invline <= TRUE;
+ icwhich <= 2'b10;
+ iccnt <= 3'b00;
+ icstate <= IC2;
+ end
+ end
+IC2: icstate <= IC3;
+IC3: icstate <= IC3a;
+IC3a: icstate <= IC_WaitL2;
+// If data was in the L2 cache already there's no need to wait on the
+// BIU to retrieve data. It can be determined if the hit signal was
+// already active when this state was entered in which case waiting
+// will do no good.
+// The IC machine will stall in this state until the BIU has loaded the
+// L2 cache.
+IC_WaitL2:
+ if (ihitL2 && picstate==IC3a) begin
+ L1_en <= 9'h1FF;
+ L1_wr0 <= TRUE;
+ L1_wr1 <= TRUE && `WAYS > 1;
+ L1_wr2 <= TRUE && `WAYS > 2;
+ L1_adr <= L2_adr;
+ L2_rdat <= L2_dato;
+ icstate <= IC5;
+ end
+ else if (bstate!=B_ICacheNack)
+ ;
+ else begin
+ L1_en <= 9'h1FF;
+ L1_wr0 <= TRUE;
+ L1_wr1 <= TRUE && `WAYS > 1;
+ L1_wr2 <= TRUE && `WAYS > 2;
+ L1_adr <= L2_adr;
+ // L2_rdat set below while loading cache line
+ //L2_rdat <= L2_dato;
+ icstate <= IC5;
+ end
+IC5:
+ begin
+ L1_en <= 9'h000;
+ L1_wr0 <= FALSE;
+ L1_wr1 <= FALSE;
+ L1_wr2 <= FALSE;
+ icstate <= IC6;
+ end
+IC6: icstate <= IC7;
+IC7: icstate <= IC_Next;
+IC_Next:
+ begin
+ icstate <= IDLE;
+ icnxt <= TRUE;
+ end
+default: icstate <= IDLE;
+endcase
+
+if (mem1_available && dram0_load)
+case(dram0)
+`DRAMSLOT_AVAIL: ;
+`DRAMSLOT_BUSY:
+// if (iqentry_v[dram0_id[`QBITS]] && !iqentry_stomp[dram0_id[`QBITS]])
+ dram0 <= dram0 + !dram0_unc;
+// else begin
+// dram0 <= `DRAMSLOT_AVAIL;
+// dram0_load <= `FALSE;
+// end
+3'd2:
+// if (iqentry_v[dram0_id[`QBITS]] && !iqentry_stomp[dram0_id[`QBITS]])
+ dram0 <= dram0 + 3'd1;
+// else begin
+// dram0 <= `DRAMSLOT_AVAIL;
+// dram0_load <= `FALSE;
+// end
+3'd3:
+// if (iqentry_v[dram0_id[`QBITS]] && !iqentry_stomp[dram0_id[`QBITS]])
+ dram0 <= dram0 + 3'd1;
+// else begin
+// dram0 <= `DRAMSLOT_AVAIL;
+// dram0_load <= `FALSE;
+// end
+3'd4:
+ if (iqentry_v[dram0_id[`QBITS]] && !iqentry_stomp[dram0_id[`QBITS]]) begin
+ if (dhit0)
+ dram0 <= `DRAMREQ_READY;
+ else
+ dram0 <= `DRAMSLOT_REQBUS;
+ end
+ else begin
+ dram0 <= `DRAMSLOT_AVAIL;
+ dram0_load <= `FALSE;
+ end
+`DRAMSLOT_REQBUS: ;
+`DRAMSLOT_HASBUS: ;
+`DRAMREQ_READY: dram0 <= `DRAMSLOT_AVAIL;
+endcase
+
+if (mem2_available && dram1_load && `NUM_MEM > 1)
+case(dram1)
+`DRAMSLOT_AVAIL: ;
+`DRAMSLOT_BUSY:
+ dram1 <= dram1 + !dram1_unc;
+3'd2:
+ dram1 <= dram1 + 3'd1;
+3'd3:
+ dram1 <= dram1 + 3'd1;
+3'd4:
+ if (iqentry_v[dram1_id[`QBITS]] && !iqentry_stomp[dram1_id[`QBITS]]) begin
+ if (dhit1)
+ dram1 <= `DRAMREQ_READY;
+ else
+ dram1 <= `DRAMSLOT_REQBUS;
+ end
+ else begin
+ dram1 <= `DRAMSLOT_AVAIL;
+ dram1_load <= `FALSE;
+ end
+`DRAMSLOT_REQBUS: ;
+`DRAMSLOT_HASBUS: ;
+`DRAMREQ_READY: dram1 <= `DRAMSLOT_AVAIL;
+endcase
+
+if (mem3_available && dram2_load && `NUM_MEM > 2)
+case(dram2)
+`DRAMSLOT_AVAIL: ;
+`DRAMSLOT_BUSY:
+ dram2 <= dram2 + !dram2_unc;
+3'd2:
+ dram2 <= dram2 + 3'd1;
+3'd3:
+ dram2 <= dram2 + 3'd1;
+3'd4:
+ if (iqentry_v[dram2_id[`QBITS]] && !iqentry_stomp[dram2_id[`QBITS]]) begin
+ if (dhit2)
+ dram2 <= `DRAMREQ_READY;
+ else
+ dram2 <= `DRAMSLOT_REQBUS;
+ end
+ else begin
+ dram2 <= `DRAMSLOT_AVAIL;
+ dram2_load <= `FALSE;
+ end
+`DRAMSLOT_REQBUS: ;
+`DRAMSLOT_HASBUS: ;
+`DRAMREQ_READY: dram2 <= `DRAMSLOT_AVAIL;
+endcase
+
+
+// Bus Interface Unit (BIU)
+// Interfaces to the external bus which is WISHBONE compatible.
+// Stores take precedence over other operations.
+// Next data cache read misses are serviced.
+// Uncached data reads are serviced.
+// Finally L2 instruction cache misses are serviced.//
+// set the IQ entry == DONE as soon as the SW is let loose to the memory system
+//
+`ifndef HAS_WB
+if (mem1_available && dram0 == `DRAMSLOT_BUSY && dram0_store) begin
+ if ((alu0_v && (dram0_id[`QBITS] == alu0_id[`QBITS])) || (alu1_v && (dram0_id[`QBITS] == alu1_id[`QBITS]))) panic <= `PANIC_MEMORYRACE;
+// iqentry_done[ dram0_id[`QBITS] ] <= `VAL;
+// iqentry_out[ dram0_id[`QBITS] ] <= `INV;
+ iqentry_state[ dram0_id[`QBITS] ] <= IQS_DONE;
+end
+if (mem2_available && `NUM_MEM > 1 && dram1 == `DRAMSLOT_BUSY && dram1_store) begin
+ if ((alu0_v && (dram1_id[`QBITS] == alu0_id[`QBITS])) || (alu1_v && (dram1_id[`QBITS] == alu1_id[`QBITS]))) panic <= `PANIC_MEMORYRACE;
+// iqentry_done[ dram1_id[`QBITS] ] <= `VAL;
+// iqentry_out[ dram1_id[`QBITS] ] <= `INV;
+ iqentry_state[ dram1_id[`QBITS] ] <= IQS_DONE;
+end
+if (mem3_available && `NUM_MEM > 2 && dram2 == `DRAMSLOT_BUSY && dram2_store) begin
+ if ((alu0_v && (dram2_id[`QBITS] == alu0_id[`QBITS])) || (alu1_v && (dram2_id[`QBITS] == alu1_id[`QBITS]))) panic <= `PANIC_MEMORYRACE;
+// iqentry_done[ dram2_id[`QBITS] ] <= `VAL;
+// iqentry_out[ dram2_id[`QBITS] ] <= `INV;
+ iqentry_state[ dram2_id[`QBITS] ] <= IQS_DONE;
+end
+`endif
+
+`ifdef HAS_WB
+ if (mem1_available && dram0==`DRAMSLOT_BUSY && dram0_store && !iqentry_stomp[dram0_id[`QBITS]]) begin
+ if (wbptr<`WB_DEPTH-1) begin
+ dram0 <= `DRAMSLOT_AVAIL;
+ dram0_instr[`INSTRUCTION_OP] <= `NOP;
+ wb_update(
+ dram0_id,
+ `FALSE,
+ fnSelect(dram0_instr,dram0_addr),
+ dram0_ol,
+ dram0_addr,
+ fnDato(dram0_instr,dram0_data)
+ );
+// iqentry_done[ dram0_id[`QBITS] ] <= `VAL;
+// iqentry_out[ dram0_id[`QBITS] ] <= `INV;
+ iqentry_state[ dram0_id[`QBITS] ] <= IQS_DONE;
+ end
+ end
+ else if (mem2_available && dram1==`DRAMSLOT_BUSY && dram1_store && !iqentry_stomp[dram1_id[`QBITS]] && `NUM_MEM > 1) begin
+ if (wbptr<`WB_DEPTH-1) begin
+ dram1 <= `DRAMSLOT_AVAIL;
+ dram1_instr[`INSTRUCTION_OP] <= `NOP;
+ wb_update(
+ dram1_id,
+ `FALSE,
+ fnSelect(dram1_instr,dram1_addr),
+ dram1_ol,
+ dram1_addr,
+ fnDato(dram1_instr,dram1_data)
+ );
+ iqentry_state[ dram1_id[`QBITS] ] <= IQS_DONE;
+ end
+ end
+ else if (mem3_available && dram2==`DRAMSLOT_BUSY && dram2_store && !iqentry_stomp[dram2_id[`QBITS]] && `NUM_MEM > 2) begin
+ if (wbptr<`WB_DEPTH-1) begin
+ dram2 <= `DRAMSLOT_AVAIL;
+ dram2_instr[`INSTRUCTION_OP] <= `NOP;
+ wb_update(
+ dram2_id,
+ `FALSE,
+ fnSelect(dram2_instr,dram2_addr),
+ dram2_ol,
+ dram2_addr,
+ fnDato(dram2_instr,dram2_data)
+ );
+ iqentry_state[ dram2_id[`QBITS] ] <= IQS_DONE;
+ end
+ end
+`endif
+
+case(bstate)
+BIDLE:
+ begin
+ isCAS <= FALSE;
+ isAMO <= FALSE;
+ isInc <= FALSE;
+ isSpt <= FALSE;
+ isRMW <= FALSE;
+ rdvq <= 1'b0;
+ errq <= 1'b0;
+ exvq <= 1'b0;
+ bwhich <= 2'b00;
+ preload <= FALSE;
+`ifdef HAS_WB
+ if (wb_v[0] & wb_en & ~acki & ~cyc) begin
+ cyc <= `HIGH;
+ stb_o <= `HIGH;
+ we <= `HIGH;
+ sel_o <= wb_sel[0];
+ vadr <= wb_addr[0];
+ dat_o <= wb_data[0];
+ ol_o <= wb_ol[0];
+ wbo_id <= wb_id[0];
+ isStore <= TRUE;
+ bstate <= wb_rmw[0] ? B12 : B_StoreAck;
+ wb_v[0] <= `INV;
+ end
+ if (wb_v[0]==`INV && !writing_wb) begin
+ for (j = 1; j < `WB_DEPTH; j = j + 1) begin
+ wb_v[j-1] <= wb_v[j];
+ wb_id[j-1] <= wb_id[j];
+ wb_rmw[j-1] <= wb_rmw[j];
+ wb_sel[j-1] <= wb_sel[j];
+ wb_addr[j-1] <= wb_addr[j];
+ wb_data[j-1] <= wb_data[j];
+ wb_ol[j-1] <= wb_ol[j];
+ if (wbptr > 2'd0)
+ wbptr <= wbptr - 2'd1;
+ end
+ wb_v[`WB_DEPTH-1] <= `INV;
+ wb_rmw[`WB_DEPTH-1] <= `FALSE;
+ end
+
+`endif
+ if (~|wb_v && mem1_available && dram0==`DRAMSLOT_BUSY && dram0_rmw) begin
+`ifdef SUPPORT_DBG
+ if (dbg_smatch0|dbg_lmatch0) begin
+ dramA_v <= `TRUE;
+ dramA_id <= dram0_id;
+ dramA_exc <= `FLT_DBG;
+ dramA_bus <= 64'h0;
+ dram0 <= `DRAMSLOT_AVAIL;
+ end
+ else
+`endif
+ if (!acki) begin
+ isRMW <= dram0_rmw;
+ isCAS <= IsCAS(dram0_instr);
+ isAMO <= IsAMO(dram0_instr);
+ isInc <= IsInc(dram0_instr);
+ casid <= dram0_id;
+ bwhich <= 2'b00;
+ dram0 <= `DRAMSLOT_HASBUS;
+ cyc <= `HIGH;
+ stb_o <= `HIGH;
+ sel_o <= fnSelect(dram0_instr,dram0_addr);
+ vadr <= dram0_addr;
+ dat_o <= fnDato(dram0_instr,dram0_data);
+ ol_o <= dram0_ol;
+ bstate <= B12;
+ end
+ end
+ else if (~|wb_v && mem2_available && dram1==`DRAMSLOT_BUSY && dram1_rmw && `NUM_MEM > 1) begin
+`ifdef SUPPORT_DBG
+ if (dbg_smatch1|dbg_lmatch1) begin
+ dramB_v <= `TRUE;
+ dramB_id <= dram1_id;
+ dramB_exc <= `FLT_DBG;
+ dramB_bus <= 64'h0;
+ dram1 <= `DRAMSLOT_AVAIL;
+ end
+ else
+`endif
+ if (!acki) begin
+ isRMW <= dram1_rmw;
+ isCAS <= IsCAS(dram1_instr);
+ isAMO <= IsAMO(dram1_instr);
+ isInc <= IsInc(dram1_instr);
+ casid <= dram1_id;
+ bwhich <= 2'b01;
+ dram1 <= `DRAMSLOT_HASBUS;
+ cyc <= `HIGH;
+ stb_o <= `HIGH;
+ sel_o <= fnSelect(dram1_instr,dram1_addr);
+ vadr <= dram1_addr;
+ dat_o <= fnDato(dram1_instr,dram1_data);
+ ol_o <= dram1_ol;
+ bstate <= B12;
+ end
+ end
+ else if (~|wb_v && mem3_available && dram2==`DRAMSLOT_BUSY && dram2_rmw && `NUM_MEM > 2) begin
+`ifdef SUPPORT_DBG
+ if (dbg_smatch2|dbg_lmatch2) begin
+ dramC_v <= `TRUE;
+ dramC_id <= dram2_id;
+ dramC_exc <= `FLT_DBG;
+ dramC_bus <= 64'h0;
+ dram2 <= `DRAMSLOT_AVAIL;
+ end
+ else
+`endif
+ if (!acki) begin
+ isRMW <= dram2_rmw;
+ isCAS <= IsCAS(dram2_instr);
+ isAMO <= IsAMO(dram2_instr);
+ isInc <= IsInc(dram2_instr);
+ casid <= dram2_id;
+ bwhich <= 2'b10;
+ dram2 <= `DRAMSLOT_HASBUS;
+ cyc <= `HIGH;
+ stb_o <= `HIGH;
+ sel_o <= fnSelect(dram2_instr,dram2_addr);
+ vadr <= dram2_addr;
+ dat_o <= fnDato(dram2_instr,dram2_data);
+ ol_o <= dram2_ol;
+ bstate <= B12;
+ end
+ end
+`ifndef HAS_WB
+ // Check write buffer enable ?
+ else if (mem1_available && dram0==`DRAMSLOT_BUSY && dram0_store) begin
+`ifdef SUPPORT_DBG
+ if (dbg_smatch0) begin
+ dramA_v <= `TRUE;
+ dramA_id <= dram0_id;
+ dramA_exc <= `FLT_DBG;
+ dramA_bus <= 64'h0;
+ dram0 <= `DRAMSLOT_AVAIL;
+ end
+ else
+`endif
+ begin
+ bwhich <= 2'b00;
+ if (!acki) begin
+ dram0 <= `DRAMSLOT_HASBUS;
+ dram0_instr[`INSTRUCTION_OP] <= `NOP;
+ cyc <= `HIGH;
+ stb_o <= `HIGH;
+ sel_o <= fnSelect(dram0_instr,dram0_addr);
+ vadr <= dram0_addr;
+ dat_o <= fnDato(dram0_instr,dram0_data);
+ ol_o <= dram0_ol;
+ isStore <= TRUE;
+ bstate <= B_StoreAck;
+ end
+// cr_o <= IsSWC(dram0_instr);
+ end
+ end
+ else if (mem2_available && dram1==`DRAMSLOT_BUSY && dram1_store && `NUM_MEM > 1) begin
+`ifdef SUPPORT_DBG
+ if (dbg_smatch1) begin
+ dramB_v <= `TRUE;
+ dramB_id <= dram1_id;
+ dramB_exc <= `FLT_DBG;
+ dramB_bus <= 64'h0;
+ dram1 <= `DRAMSLOT_AVAIL;
+ end
+ else
+`endif
+ begin
+ bwhich <= 2'b01;
+ if (!acki) begin
+ dram1 <= `DRAMSLOT_HASBUS;
+ dram1_instr[`INSTRUCTION_OP] <= `NOP;
+ cyc <= `HIGH;
+ stb_o <= `HIGH;
+ sel_o <= fnSelect(dram1_instr,dram1_addr);
+ vadr <= dram1_addr;
+ dat_o <= fnDato(dram1_instr,dram1_data);
+ ol_o <= dram1_ol;
+ isStore <= TRUE;
+ bstate <= B_StoreAck;
+ end
+// cr_o <= IsSWC(dram0_instr);
+ end
+ end
+ else if (mem3_available && dram2==`DRAMSLOT_BUSY && dram2_store && `NUM_MEM > 2) begin
+`ifdef SUPPORT_DBG
+ if (dbg_smatch2) begin
+ dramC_v <= `TRUE;
+ dramC_id <= dram2_id;
+ dramC_exc <= `FLT_DBG;
+ dramC_bus <= 64'h0;
+ dram2 <= `DRAMSLOT_AVAIL;
+ end
+ else
+`endif
+ begin
+ bwhich <= 2'b10;
+ if (!acki) begin
+ dram2 <= `DRAMSLOT_HASBUS;
+ dram2_instr[`INSTRUCTION_OP] <= `NOP;
+ cyc <= `HIGH;
+ stb_o <= `HIGH;
+ sel_o <= fnSelect(dram2_instr,dram2_addr);
+ vadr <= dram2_addr;
+ dat_o <= fnDato(dram2_instr,dram2_data);
+ ol_o <= dram2_ol;
+ isStore <= TRUE;
+ bstate <= B_StoreAck;
+ end
+// cr_o <= IsSWC(dram0_instr);
+ end
+ end
+`endif
+ // Check for read misses on the data cache
+ else if (~|wb_v && mem1_available && !dram0_unc && dram0==`DRAMSLOT_REQBUS && dram0_load) begin
+`ifdef SUPPORT_DBG
+ if (dbg_lmatch0) begin
+ dramA_v <= `TRUE;
+ dramA_id <= dram0_id;
+ dramA_exc <= `FLT_DBG;
+ dramA_bus <= 64'h0;
+ dram0 <= `DRAMSLOT_AVAIL;
+ end
+ else
+`endif
+ begin
+ dram0 <= `DRAMSLOT_HASBUS;
+ bwhich <= 2'b00;
+ preload <= dram0_preload;
+ bstate <= B_DCacheLoadStart;
+ end
+ end
+ else if (~|wb_v && mem2_available && !dram1_unc && dram1==`DRAMSLOT_REQBUS && dram1_load && `NUM_MEM > 1) begin
+`ifdef SUPPORT_DBG
+ if (dbg_lmatch1) begin
+ dramB_v <= `TRUE;
+ dramB_id <= dram1_id;
+ dramB_exc <= `FLT_DBG;
+ dramB_bus <= 64'h0;
+ dram1 <= `DRAMSLOT_AVAIL;
+ end
+ else
+`endif
+ begin
+ dram1 <= `DRAMSLOT_HASBUS;
+ bwhich <= 2'b01;
+ preload <= dram1_preload;
+ bstate <= B_DCacheLoadStart;
+ end
+ end
+ else if (~|wb_v && mem3_available && !dram2_unc && dram2==`DRAMSLOT_REQBUS && dram2_load && `NUM_MEM > 2) begin
+`ifdef SUPPORT_DBG
+ if (dbg_lmatch2) begin
+ dramC_v <= `TRUE;
+ dramC_id <= dram2_id;
+ dramC_exc <= `FLT_DBG;
+ dramC_bus <= 64'h0;
+ dram2 <= `DRAMSLOT_AVAIL;
+ end
+ else
+`endif
+ begin
+ dram2 <= `DRAMSLOT_HASBUS;
+ preload <= dram2_preload;
+ bwhich <= 2'b10;
+ bstate <= B_DCacheLoadStart;
+ end
+ end
+ else if (~|wb_v && mem1_available && dram0_unc && dram0==`DRAMSLOT_BUSY && dram0_load) begin
+`ifdef SUPPORT_DBG
+ if (dbg_lmatch0) begin
+ dramA_v <= `TRUE;
+ dramA_id <= dram0_id;
+ dramA_exc <= `FLT_DBG;
+ dramA_bus <= 64'h0;
+ dram0 <= `DRAMSLOT_AVAIL;
+ end
+ else
+`endif
+ if (!acki) begin
+ bwhich <= 2'b00;
+ cyc <= `HIGH;
+ stb_o <= `HIGH;
+ sel_o <= fnSelect(dram0_instr,dram0_addr);
+ vadr <= {dram0_addr[AMSB:3],3'b0};
+ sr_o <= IsLWR(dram0_instr);
+ ol_o <= dram0_ol;
+ bstate <= B_DLoadAck;
+ end
+ end
+ else if (~|wb_v && mem2_available && dram1_unc && dram1==`DRAMSLOT_BUSY && dram1_load && `NUM_MEM > 1) begin
+`ifdef SUPPORT_DBG
+ if (dbg_lmatch1) begin
+ dramB_v <= `TRUE;
+ dramB_id <= dram1_id;
+ dramB_exc <= `FLT_DBG;
+ dramB_bus <= 64'h0;
+ dram1 <= `DRAMSLOT_AVAIL;
+ end
+ else
+`endif
+ if (!acki) begin
+ bwhich <= 2'b01;
+ cyc <= `HIGH;
+ stb_o <= `HIGH;
+ sel_o <= fnSelect(dram1_instr,dram1_addr);
+ vadr <= {dram1_addr[AMSB:3],3'b0};
+ sr_o <= IsLWR(dram1_instr);
+ ol_o <= dram1_ol;
+ bstate <= B_DLoadAck;
+ end
+ end
+ else if (~|wb_v && mem3_available && dram2_unc && dram2==`DRAMSLOT_BUSY && dram2_load && `NUM_MEM > 2) begin
+`ifdef SUPPORT_DBG
+ if (dbg_lmatch2) begin
+ dramC_v <= `TRUE;
+ dramC_id <= dram2_id;
+ dramC_exc <= `FLT_DBG;
+ dramC_bus <= 64'h0;
+ dram2 <= 2'd0;
+ end
+ else
+`endif
+ if (!acki) begin
+ bwhich <= 2'b10;
+ cyc <= `HIGH;
+ stb_o <= `HIGH;
+ sel_o <= fnSelect(dram2_instr,dram2_addr);
+ vadr <= {dram2_addr[AMSB:3],3'b0};
+ sr_o <= IsLWR(dram2_instr);
+ ol_o <= dram2_ol;
+ bstate <= B_DLoadAck;
+ end
+ end
+ // Check for L2 cache miss
+ else if (~|wb_v && !ihitL2 && !acki) begin
+ cti_o <= 3'b001;
+ bte_o <= 2'b00;//2'b01; // 4 beat burst wrap
+ cyc <= `HIGH;
+ stb_o <= `HIGH;
+ sel_o <= 8'hFF;
+ icl_o <= `HIGH;
+ iccnt <= 3'd0;
+// adr_o <= icwhich ? {pc0[31:5],5'b0} : {pc1[31:5],5'b0};
+// L2_adr <= icwhich ? {pc0[31:5],5'b0} : {pc1[31:5],5'b0};
+ vadr <= {pcr[7:0],L1_adr[AMSB:5],5'h0};
+ ol_o <= ol[0];
+ L2_adr <= {pcr[7:0],L1_adr[AMSB:5],5'h0};
+ L2_xsel <= 1'b0;
+ bstate <= B_ICacheAck;
+ end
+ end
+
+// Terminal state for a store operation.
+// Note that if only a single memory channel is selected, bwhich will be a
+// constant 0. This should cause the extra code to be removed.
+B_StoreAck:
+ begin
+ StoreAck1 <= `TRUE;
+ isStore <= `TRUE;
+ if (acki|err_i|tlb_miss|wrv_i) begin
+ wb_nack();
+ cr_o <= 1'b0;
+ // This isn't a good way of doing things; the state should be propagated
+ // to the commit stage, however since this is a store we know there will
+ // be no change of program flow. So the reservation status bit is set
+ // here. The author wanted to avoid the complexity of propagating the
+ // input signal to the commit stage. It does mean that the SWC
+ // instruction should be surrounded by SYNC's.
+ if (cr_o)
+ sema[0] <= rbi_i;
+`ifdef HAS_WB
+ for (n = 0; n < QENTRIES; n = n + 1) begin
+ if (wbo_id[n]) begin
+ iqentry_exc[n] <= tlb_miss ? `FLT_TLB : wrv_i ? `FLT_DWF : err_i ? `FLT_IBE : `FLT_NONE;
+ if (err_i|wrv_i) begin
+ wb_v <= 1'b0; // Invalidate write buffer if there is a problem with the store
+ wb_en <= `FALSE; // and disable write buffer
+ end
+ iqentry_state[n] <= IQS_CMT;
+ iqentry_aq[n] <= `INV;
+ end
+ end
+`else
+ case(bwhich)
+ 2'd0: begin
+ dram0 <= `DRAMSLOT_AVAIL;
+ iqentry_exc[dram0_id[`QBITS]] <= (wrv_i|err_i) ? `FLT_DWF : `FLT_NONE;
+ iqentry_state[dram0_id[`QBITS]] <= IQS_CMT;
+ iqentry_aq[ dram0_id[`QBITS] ] <= `INV;
+ //iqentry_out[ dram0_id[`QBITS] ] <= `INV;
+ end
+ 2'd1: if (`NUM_MEM > 1) begin
+ dram1 <= `DRAMSLOT_AVAIL;
+ iqentry_exc[dram1_id[`QBITS]] <= (wrv_i|err_i) ? `FLT_DWF : `FLT_NONE;
+ iqentry_state[dram1_id[`QBITS]] <= IQS_CMT;
+ iqentry_aq[ dram1_id[`QBITS] ] <= `INV;
+ //iqentry_out[ dram1_id[`QBITS] ] <= `INV;
+ end
+ 2'd2: if (`NUM_MEM > 2) begin
+ dram2 <= `DRAMSLOT_AVAIL;
+ iqentry_exc[dram2_id[`QBITS]] <= (wrv_i|err_i) ? `FLT_DWF : `FLT_NONE;
+ iqentry_state[dram2_id[`QBITS]] <= IQS_CMT;
+ iqentry_aq[ dram2_id[`QBITS] ] <= `INV;
+ //iqentry_out[ dram2_id[`QBITS] ] <= `INV;
+ end
+ default: ;
+ endcase
+`endif
+ bstate <= B19;
+ end
+ end
+
+B_DCacheLoadStart:
+ if (~acki & ~cyc) begin // check for idle bus - it should be
+ dccnt <= 2'd0;
+ bstate <= B_DCacheLoadAck;
+ cti_o <= 3'b001; // constant address burst
+ bte_o <= 2'b00; // linear burst, non-wrapping
+ cyc <= `HIGH;
+ stb_o <= `HIGH;
+ // Select should be selecting all byte lanes for a cache load
+ sel_o <= 8'hFF;
+ // bwhich should always be one of the three channels.
+ case(bwhich)
+ 2'd0: begin
+ vadr <= {dram0_addr[AMSB:5],5'b0};
+ ol_o <= dram0_ol;
+ end
+ 2'd1: if (`NUM_MEM > 1) begin
+ vadr <= {dram1_addr[AMSB:5],5'b0};
+ ol_o <= dram1_ol;
+ end
+ 2'd2: if (`NUM_MEM > 2) begin
+ vadr <= {dram2_addr[AMSB:5],5'b0};
+ ol_o <= dram2_ol;
+ end
+ default:
+ begin
+ $display("Invalid memory channel selection");
+ $stop;
+ wb_nack();
+ bstate <= BIDLE;
+ end
+ endcase
+ end
+
+// Data cache load terminal state
+B_DCacheLoadAck:
+ if (ack_i|err_i|tlb_miss|rdv_i) begin
+ if (!bok_i) begin
+ stb_o <= `LOW;
+ bstate <= B_DCacheLoadStb;
+ end
+ errq <= errq | err_i;
+ rdvq <= rdvq | rdv_i;
+ if (!preload) // A preload instruction ignores any error
+ case(bwhich)
+ 2'd0: if (err_i|rdv_i|tlb_miss) begin
+ iqentry_exc[dram0_id[`QBITS]] <= tlb_miss ? `FLT_TLB : err_i ? `FLT_DBE : `FLT_DRF;
+ end
+ 2'd1: if ((err_i|rdv_i|tlb_miss) && `NUM_MEM > 1) begin
+ iqentry_exc[dram1_id[`QBITS]] <= tlb_miss ? `FLT_TLB : err_i ? `FLT_DBE : `FLT_DRF;
+ end
+ 2'd2: if ((err_i|rdv_i|tlb_miss) && `NUM_MEM > 2) begin
+ iqentry_exc[dram2_id[`QBITS]] <= tlb_miss ? `FLT_TLB : err_i ? `FLT_DBE : `FLT_DRF;
+ end
+ default: ;
+ endcase
+ dccnt <= dccnt + 2'd1;
+ vadr[4:3] <= vadr[4:3] + 2'd1;
+ bstate <= B_DCacheLoadAck;
+ if (dccnt==2'd2)
+ cti_o <= 3'b111;
+ if (dccnt==2'd3) begin
+ wb_nack();
+ bstate <= B_DCacheLoadWait1;
+ end
+ end
+
+B_DCacheLoadStb:
+ begin
+ stb_o <= `HIGH;
+ bstate <= B_DCacheLoadAck;
+ end
+B_DCacheLoadWait1: bstate <= B_DCacheLoadWait2;
+B_DCacheLoadWait2: bstate <= B_DCacheLoadResetBusy;
+//B_DCacheLoadWait3: bstate <= B_DCacheLoadResetBusy;
+B_DCacheLoadResetBusy: begin
+ // There could be more than one memory cycle active. We reset the state
+ // of all the machines to retest for a hit because otherwise sequential
+ // loading of memory will cause successive machines to miss resulting in
+ // multiple dcache loads that aren't needed.
+ if (dram0 != `DRAMSLOT_AVAIL && dram0_addr[AMSB:5]==vadr[AMSB:5]) dram0 <= `DRAMSLOT_BUSY; // causes retest of dhit
+ if (dram1 != `DRAMSLOT_AVAIL && dram1_addr[AMSB:5]==vadr[AMSB:5]) dram1 <= `DRAMSLOT_BUSY;
+ if (dram2 != `DRAMSLOT_AVAIL && dram2_addr[AMSB:5]==vadr[AMSB:5]) dram2 <= `DRAMSLOT_BUSY;
+ if (~ack_i) bstate <= BIDLE;
+ end
+
+// Ack state for instruction cache load
+B_ICacheAck:
+ if (ack_i|err_i|tlb_miss|exv_i) begin
+ if (!bok_i) begin
+ stb_o <= `LOW;
+ bstate <= B_ICacheNack2;
+ end
+ errq <= errq | err_i;
+ exvq <= exvq | exv_i;
+// L1_en <= 9'h3 << {L2_xsel,L2_adr[4:3],1'b0};
+// L1_wr0 <= `TRUE;
+// L1_wr1 <= `TRUE;
+// L1_adr <= L2_adr;
+ if (tlb_miss) begin
+ L2_rdat <= {18{`INSN_FLT_TLB}};
+ wb_nack();
+ icl_o <= `LOW;
+ bstate <= B_ICacheNack;
+ end
+ else if (exv_i) begin
+ L2_rdat <= {18{`INSN_FLT_EXF}};
+ wb_nack();
+ icl_o <= `LOW;
+ bstate <= B_ICacheNack;
+ end
+ else if (err_i) begin
+ L2_rdat <= {18{`INSN_FLT_IBE}};
+ wb_nack();
+ icl_o <= `LOW;
+ bstate <= B_ICacheNack;
+ end
+ else
+ case(iccnt)
+ 3'd0: L2_rdat[63:0] <= dat_i;
+ 3'd1: L2_rdat[127:64] <= dat_i;
+ 3'd2: L2_rdat[191:128] <= dat_i;
+ 3'd3: L2_rdat[255:192] <= dat_i;
+ 3'd4: L2_rdat[297:256] <= {2'b00,dat_i[39:0]};
+ default: ;
+ endcase
+ //L2_rdat <= {dat_i[31:0],{4{dat_i}}};
+ iccnt <= iccnt + 3'd1;
+ //stb_o <= `LOW;
+ if (iccnt==3'd3)
+ cti_o <= 3'b111;
+ if (iccnt==3'd4) begin
+ wb_nack();
+ icl_o <= `LOW;
+ bstate <= B_ICacheNack;
+ end
+ else begin
+ L2_adr[4:3] <= L2_adr[4:3] + 2'd1;
+ if (L2_adr[4:3]==2'b11)
+ L2_xsel <= 1'b1;
+ end
+ end
+B_ICacheNack2:
+ if (~acki) begin
+ stb_o <= `HIGH;
+ vadr[AMSB:3] <= vadr[AMSB:3] + 2'd1;
+ bstate <= B_ICacheAck;
+ end
+B_ICacheNack:
+ begin
+ L1_wr0 <= `FALSE;
+ L1_wr1 <= `FALSE;
+ L1_wr2 <= `FALSE;
+ L1_en <= 9'h1FF;
+ L2_xsel <= 1'b0;
+ if (~ack_i) begin
+ icl_ctr <= icl_ctr + 40'd1;
+ bstate <= BIDLE;
+ L2_nxt <= TRUE;
+ end
+ end
+B12:
+ if (ack_i|err_i|tlb_miss|rdv_i) begin
+ if (isCAS) begin
+ iqentry_res [ casid[`QBITS] ] <= (dat_i == cas);
+ iqentry_exc [ casid[`QBITS] ] <= tlb_miss ? `FLT_TLB : err_i ? `FLT_DRF : rdv_i ? `FLT_DRF : `FLT_NONE;
+// iqentry_done[ casid[`QBITS] ] <= `VAL;
+// iqentry_out [ casid[`QBITS] ] <= `INV;
+ iqentry_state [ casid[`QBITS] ] <= IQS_DONE;
+ iqentry_instr[ casid[`QBITS]] <= `NOP_INSN;
+ if (err_i | rdv_i) iqentry_ma[casid[`QBITS]] <= vadr;
+ if (dat_i == cas) begin
+ stb_o <= `LOW;
+ we <= `TRUE;
+ bstate <= B15;
+ end
+ else begin
+ cas <= dat_i;
+ cyc <= `LOW;
+ stb_o <= `LOW;
+ case(bwhich)
+ 2'b00: dram0 <= `DRAMREQ_READY;
+ 2'b01: dram1 <= `DRAMREQ_READY;
+ 2'b10: dram2 <= `DRAMREQ_READY;
+ default: ;
+ endcase
+ bstate <= B19;
+ end
+ end
+ else if (isRMW) begin
+ rmw_instr <= iqentry_instr[casid[`QBITS]];
+ rmw_argA <= dat_i;
+ if (isSpt) begin
+ rmw_argB <= 64'd1 << iqentry_a1[casid[`QBITS]][63:58];
+ rmw_argC <= iqentry_instr[casid[`QBITS]][5:0]==`R2 ?
+ iqentry_a3[casid[`QBITS]][64] << iqentry_a1[casid[`QBITS]][63:58] :
+ iqentry_a2[casid[`QBITS]][64] << iqentry_a1[casid[`QBITS]][63:58];
+ end
+ else if (isInc) begin
+ rmw_argB <= iqentry_instr[casid[`QBITS]][5:0]==`R2 ? {{59{iqentry_instr[casid[`QBITS]][22]}},iqentry_instr[casid[`QBITS]][22:18]} :
+ {{59{iqentry_instr[casid[`QBITS]][17]}},iqentry_instr[casid[`QBITS]][17:13]};
+ end
+ else begin // isAMO
+ iqentry_res [ casid[`QBITS] ] <= dat_i;
+ rmw_argB <= iqentry_instr[casid[`QBITS]][31] ? {{59{iqentry_instr[casid[`QBITS]][20:16]}},iqentry_instr[casid[`QBITS]][20:16]} : iqentry_a2[casid[`QBITS]];
+ end
+ iqentry_exc [ casid[`QBITS] ] <= tlb_miss ? `FLT_TLB : err_i ? `FLT_DRF : rdv_i ? `FLT_DRF : `FLT_NONE;
+ stb_o <= `LOW;
+ bstate <= B20;
+ end
+ end
+
+// Regular load
+B_DLoadAck:
+ if (ack_i|err_i|tlb_miss|rdv_i) begin
+ wb_nack();
+ sr_o <= `LOW;
+ xdati <= dat_i;
+ case(bwhich)
+ 2'b00: begin
+ dram0 <= `DRAMREQ_READY;
+ iqentry_exc [ dram0_id[`QBITS] ] <= tlb_miss ? `FLT_TLB : err_i ? `FLT_DRF : rdv_i ? `FLT_DRF : `FLT_NONE;
+ end
+ 2'b01: if (`NUM_MEM > 1) begin
+ dram1 <= `DRAMREQ_READY;
+ iqentry_exc [ dram1_id[`QBITS] ] <= tlb_miss ? `FLT_TLB : err_i ? `FLT_DRF : rdv_i ? `FLT_DRF : `FLT_NONE;
+ end
+ 2'b10: if (`NUM_MEM > 2) begin
+ dram2 <= `DRAMREQ_READY;
+ iqentry_exc [ dram2_id[`QBITS] ] <= tlb_miss ? `FLT_TLB : err_i ? `FLT_DRF : rdv_i ? `FLT_DRF : `FLT_NONE;
+ end
+ default: ;
+ endcase
+ bstate <= B19;
+ end
+
+// Three cycles to detemrine if there's a cache hit during a store.
+B16: begin
+ case(bwhich)
+ 2'd0: if (dhit0) begin dram0 <= `DRAMREQ_READY; bstate <= B17; end
+ 2'd1: if (dhit1) begin dram1 <= `DRAMREQ_READY; bstate <= B17; end
+ 2'd2: if (dhit2) begin dram2 <= `DRAMREQ_READY; bstate <= B17; end
+ default: bstate <= BIDLE;
+ endcase
+ end
+B17: bstate <= B18;
+B18: bstate <= B19;
+B19: if (~acki) begin
+ sel_o <= 8'h00;
+ bstate <= BIDLE;
+ StoreAck1 <= `FALSE;
+ isStore <= `FALSE;
+ end
+B20:
+ if (~ack_i) begin
+ stb_o <= `HIGH;
+ we <= `HIGH;
+ dat_o <= fnDato(rmw_instr,rmw_res);
+ bstate <= B_StoreAck;
+ end
+B21:
+ if (~ack_i) begin
+ stb_o <= `HIGH;
+ bstate <= B12;
+ end
+default: bstate <= BIDLE;
+endcase
+
+if (!branchmiss) begin
+ case({fetchbuf0_v, fetchbuf1_v})
+ 2'b00: ;
+ 2'b01:
+ if (canq1) begin
+ tail0 <= (tail0+2'd1) % QENTRIES;
+ tail1 <= (tail1+2'd1) % QENTRIES;
+ end
+ 2'b10:
+ if (canq1) begin
+ tail0 <= (tail0+2'd1) % QENTRIES;
+ tail1 <= (tail1+2'd1) % QENTRIES;
+ end
+ 2'b11:
+ if (canq1) begin
+ if (IsBranch(fetchbuf0_instr) && predict_taken0 && fetchbuf0_thrd==fetchbuf1_thrd) begin
+ tail0 <= (tail0+2'd1) % QENTRIES;
+ tail1 <= (tail1+2'd1) % QENTRIES;
+ end
+ else begin
+ if (vqe0 < vl || !IsVector(fetchbuf0_instr)) begin
+ if (canq2) begin
+ tail0 <= (tail0 + 3'd2) % QENTRIES;
+ tail1 <= (tail1 + 3'd2) % QENTRIES;
+ end
+ else begin // queued1 will be true
+ tail0 <= (tail0+2'd1) % QENTRIES;
+ tail1 <= (tail1+2'd1) % QENTRIES;
+ end
+ end
+ end
+ end
+ endcase
+end
+else if (!thread_en) begin // if branchmiss
+ for (n = QENTRIES-1; n >= 0; n = n - 1)
+ // (QENTRIES-1) is needed to ensure that n increments forwards so that the modulus is
+ // a positive number.
+ if (iqentry_stomp[n] & ~iqentry_stomp[(n+(QENTRIES-1))%QENTRIES]) begin
+ tail0 <= n;
+ tail1 <= (n + 1) % QENTRIES;
+ end
+ // otherwise, it is the last instruction in the queue that has been mispredicted ... do nothing
+end
+
+// #5 rf[0] = 0; rf_v[0] = 1; rf_source[0] = 0;
+`ifdef SIM
+ $display("\n\n\n\n\n\n\n\n");
+ $display("TIME %0d", $time);
+ $display("%h #", pc0);
+`ifdef SUPPORT_SMT
+ $display ("Regfile: %d", rgs[0]);
+ for (n=0; n < 32; n=n+4) begin
+ $display("%d: %h %d %o %d: %h %d %o %d: %h %d %o %d: %h %d %o#",
+ n[4:0]+0, urf1.urf10.mem[{rgs[0],1'b0,n[4:2],2'b00}], regIsValid[n+0], rf_source[n+0],
+ n[4:0]+1, urf1.urf10.mem[{rgs[0],1'b0,n[4:2],2'b01}], regIsValid[n+1], rf_source[n+1],
+ n[4:0]+2, urf1.urf10.mem[{rgs[0],1'b0,n[4:2],2'b10}], regIsValid[n+2], rf_source[n+2],
+ n[4:0]+3, urf1.urf10.mem[{rgs[0],1'b0,n[4:2],2'b11}], regIsValid[n+3], rf_source[n+3]
+ );
+ end
+ $display ("Regfile: %d", rgs[1]);
+ for (n=128; n < 160; n=n+4) begin
+ $display("%d: %h %d %o %d: %h %d %o %d: %h %d %o %d: %h %d %o#",
+ n[4:0]+0, urf1.urf10.mem[{rgs[1],1'b0,n[4:2],2'b00}], regIsValid[n+0], rf_source[n+0],
+ n[4:0]+1, urf1.urf10.mem[{rgs[1],1'b0,n[4:2],2'b01}], regIsValid[n+1], rf_source[n+1],
+ n[4:0]+2, urf1.urf10.mem[{rgs[1],1'b0,n[4:2],2'b10}], regIsValid[n+2], rf_source[n+2],
+ n[4:0]+3, urf1.urf10.mem[{rgs[1],1'b0,n[4:2],2'b11}], regIsValid[n+3], rf_source[n+3]
+ );
+ end
+`else
+ $display ("Regfile: %d", rgs);
+ for (n=0; n < 32; n=n+4) begin
+ $display("%d: %h %d %o %d: %h %d %o %d: %h %d %o %d: %h %d %o#",
+ n[4:0]+0, gRegfileInst.gb1.urf1.urf10.mem[{rgs,1'b0,n[4:2],2'b00}], regIsValid[n+0], rf_source[n+0],
+ n[4:0]+1, gRegfileInst.gb1.urf1.urf10.mem[{rgs,1'b0,n[4:2],2'b01}], regIsValid[n+1], rf_source[n+1],
+ n[4:0]+2, gRegfileInst.gb1.urf1.urf10.mem[{rgs,1'b0,n[4:2],2'b10}], regIsValid[n+2], rf_source[n+2],
+ n[4:0]+3, gRegfileInst.gb1.urf1.urf10.mem[{rgs,1'b0,n[4:2],2'b11}], regIsValid[n+3], rf_source[n+3]
+ );
+ end
+`endif
+`ifdef FCU_ENH
+ $display("Call Stack:");
+ for (n = 0; n < 16; n = n + 4)
+ $display("%c%d: %h %c%d: %h %c%d: %h %c%d: %h",
+ gFetchbufInst.gb1.ufb1.ursb1.rasp==n+0 ?">" : " ", n[4:0]+0, gFetchbufInst.gb1.ufb1.ursb1.ras[n+0],
+ gFetchbufInst.gb1.ufb1.ursb1.rasp==n+1 ?">" : " ", n[4:0]+1, gFetchbufInst.gb1.ufb1.ursb1.ras[n+1],
+ gFetchbufInst.gb1.ufb1.ursb1.rasp==n+2 ?">" : " ", n[4:0]+2, gFetchbufInst.gb1.ufb1.ursb1.ras[n+2],
+ gFetchbufInst.gb1.ufb1.ursb1.rasp==n+3 ?">" : " ", n[4:0]+3, gFetchbufInst.gb1.ufb1.ursb1.ras[n+3]
+ );
+ $display("\n");
+`endif
+// $display("Return address stack:");
+// for (n = 0; n < 16; n = n + 1)
+// $display("%d %h", rasp+n[3:0], ras[rasp+n[3:0]]);
+ $display("TakeBr:%d #", take_branch);//, backpc);
+ $display("Insn%d: %h", 0, insn0);
+ if (`WAYS==1) begin
+ $display("%c%c A: %d %h %h #",
+ 45, fetchbuf?45:62, fetchbufA_v, fetchbufA_instr, fetchbufA_pc);
+ $display("%c%c B: %d %h %h #",
+ 45, fetchbuf?62:45, fetchbufB_v, fetchbufB_instr, fetchbufB_pc);
+ end
+ else if (`WAYS > 1) begin
+ $display("Insn%d: %h", 1, insn1);
+ $display("%c%c A: %d %h %h #",
+ 45, fetchbuf?45:62, fetchbufA_v, fetchbufA_instr, fetchbufA_pc);
+ $display("%c%c B: %d %h %h #",
+ 45, fetchbuf?45:62, fetchbufB_v, fetchbufB_instr, fetchbufB_pc);
+ end
+ else if (`WAYS > 2) begin
+ $display("%c%c C: %d %h %h #",
+ 45, fetchbuf?62:45, fetchbufC_v, fetchbufC_instr, fetchbufC_pc);
+ $display("%c%c D: %d %h %h #",
+ 45, fetchbuf?62:45, fetchbufD_v, fetchbufD_instr, fetchbufD_pc);
+ end
+ for (i=0; i 1)
+ $display("%d %h %h %c%h %o #",
+ dram1, dram1_addr, dram1_data, (IsFlowCtrl(dram1_instr) ? 98 : (IsMem(dram1_instr)) ? 109 : 97),
+ dram1_instr, dram1_id);
+ if (`NUM_MEM > 2)
+ $display("%d %h %h %c%h %o #",
+ dram2, dram2_addr, dram2_data, (IsFlowCtrl(dram2_instr) ? 98 : (IsMem(dram2_instr)) ? 109 : 97),
+ dram2_instr, dram2_id);
+ $display("%d %h %o %h #", dramA_v, dramA_bus, dramA_id, dramA_exc);
+ if (`NUM_MEM > 1)
+ $display("%d %h %o %h #", dramB_v, dramB_bus, dramB_id, dramB_exc);
+ if (`NUM_MEM > 2)
+ $display("%d %h %o %h #", dramC_v, dramC_bus, dramC_id, dramC_exc);
+ $display("ALU");
+ $display("%d %h %h %h %c%h %o %h #",
+ alu0_dataready, alu0_argI, alu0_argA, alu0_argB,
+ (IsFlowCtrl(alu0_instr) ? 98 : IsMem(alu0_instr) ? 109 : 97),
+ alu0_instr, alu0_sourceid, alu0_pc);
+ $display("%d %h %o 0 #", alu0_v, alu0_bus, alu0_id);
+ if (`NUM_ALU > 1) begin
+ $display("%d %h %h %h %c%h %o %h #",
+ alu1_dataready, alu1_argI, alu1_argA, alu1_argB,
+ (IsFlowCtrl(alu1_instr) ? 98 : IsMem(alu1_instr) ? 109 : 97),
+ alu1_instr, alu1_sourceid, alu1_pc);
+ $display("%d %h %o 0 #", alu1_v, alu1_bus, alu1_id);
+ end
+ $display("FCU");
+ $display("%d %h %h %h %h %c%c #", fcu_v, fcu_bus, fcu_argI, fcu_argA, fcu_argB, fcu_takb?"T":"-", fcu_pt?"T":"-");
+ $display("%c %h %h %h %h #", fcu_branchmiss?"m":" ", fcu_sourceid, fcu_misspc, fcu_nextpc, fcu_brdisp);
+ $display("Commit");
+ $display("0: %c %h %o %d #", commit0_v?"v":" ", commit0_bus, commit0_id, commit0_tgt[4:0]);
+ $display("1: %c %h %o %d #", commit1_v?"v":" ", commit1_bus, commit1_id, commit1_tgt[4:0]);
+ $display("instructions committed: %d valid committed: %d ticks: %d ", CC, I, tick);
+ $display("Write Buffer:");
+ for (n = `WB_DEPTH-1; n >= 0; n = n - 1)
+ $display("%c adr: %h dat: %h", wb_v[n]?" ":"*", wb_addr[n], wb_data[n]);
+ $display("Write merges: %d", wb_merges);
+`endif // SIM
+
+//
+// $display("\n\n\n\n\n\n\n\n");
+// $display("TIME %0d", $time);
+// $display(" pc0=%h", pc0);
+// $display(" pc1=%h", pc1);
+// $display(" reg0=%h, v=%d, src=%o", rf[0], rf_v[0], rf_source[0]);
+// $display(" reg1=%h, v=%d, src=%o", rf[1], rf_v[1], rf_source[1]);
+// $display(" reg2=%h, v=%d, src=%o", rf[2], rf_v[2], rf_source[2]);
+// $display(" reg3=%h, v=%d, src=%o", rf[3], rf_v[3], rf_source[3]);
+// $display(" reg4=%h, v=%d, src=%o", rf[4], rf_v[4], rf_source[4]);
+// $display(" reg5=%h, v=%d, src=%o", rf[5], rf_v[5], rf_source[5]);
+// $display(" reg6=%h, v=%d, src=%o", rf[6], rf_v[6], rf_source[6]);
+// $display(" reg7=%h, v=%d, src=%o", rf[7], rf_v[7], rf_source[7]);
+
+// $display("Fetch Buffers:");
+// $display(" %c%c fbA: v=%d instr=%h pc=%h %c%c fbC: v=%d instr=%h pc=%h",
+// fetchbuf?32:45, fetchbuf?32:62, fetchbufA_v, fetchbufA_instr, fetchbufA_pc,
+// fetchbuf?45:32, fetchbuf?62:32, fetchbufC_v, fetchbufC_instr, fetchbufC_pc);
+// $display(" %c%c fbB: v=%d instr=%h pc=%h %c%c fbD: v=%d instr=%h pc=%h",
+// fetchbuf?32:45, fetchbuf?32:62, fetchbufB_v, fetchbufB_instr, fetchbufB_pc,
+// fetchbuf?45:32, fetchbuf?62:32, fetchbufD_v, fetchbufD_instr, fetchbufD_pc);
+// $display(" branchback=%d backpc=%h", branchback, backpc);
+
+// $display("Instruction Queue:");
+// for (i=0; i<8; i=i+1)
+// $display(" %c%c%d: v=%d done=%d out=%d agen=%d res=%h op=%d bt=%d tgt=%d a1=%h (v=%d/s=%o) a2=%h (v=%d/s=%o) im=%h pc=%h exc=%h",
+// (i[`QBITS]==heads[0])?72:32, (i[`QBITS]==tail0)?84:32, i,
+// iqentry_v[i], iqentry_done[i], iqentry_out[i], iqentry_agen[i], iqentry_res[i], iqentry_op[i],
+// iqentry_bt[i], iqentry_tgt[i], iqentry_a1[i], iqentry_a1_v[i], iqentry_a1_s[i], iqentry_a2[i], iqentry_a2_v[i],
+// iqentry_a2_s[i], iqentry_a0[i], iqentry_pc[i], iqentry_exc[i]);
+
+// $display("Scheduling Status:");
+// $display(" iqentry0 issue=%d islot=%d stomp=%d source=%d - memready=%d memissue=%b",
+// iqentry_0_issue, iqentry_0_islot, iqentry_stomp[0], iqentry_source[0], iqentry_memready[0], iqentry_memissue[0]);
+// $display(" iqentry1 issue=%d islot=%d stomp=%d source=%d - memready=%d memissue=%b",
+// iqentry_1_issue, iqentry_1_islot, iqentry_stomp[1], iqentry_source[1], iqentry_memready[1], iqentry_memissue[1]);
+// $display(" iqentry2 issue=%d islot=%d stomp=%d source=%d - memready=%d memissue=%b",
+// iqentry_2_issue, iqentry_2_islot, iqentry_stomp[2], iqentry_source[2], iqentry_memready[2], iqentry_memissue[2]);
+// $display(" iqentry3 issue=%d islot=%d stomp=%d source=%d - memready=%d memissue=%b",
+// iqentry_3_issue, iqentry_3_islot, iqentry_stomp[3], iqentry_source[3], iqentry_memready[3], iqentry_memissue[3]);
+// $display(" iqentry4 issue=%d islot=%d stomp=%d source=%d - memready=%d memissue=%b",
+// iqentry_4_issue, iqentry_4_islot, iqentry_stomp[4], iqentry_source[4], iqentry_memready[4], iqentry_memissue[4]);
+// $display(" iqentry5 issue=%d islot=%d stomp=%d source=%d - memready=%d memissue=%b",
+// iqentry_5_issue, iqentry_5_islot, iqentry_stomp[5], iqentry_source[5], iqentry_memready[5], iqentry_memissue[5]);
+// $display(" iqentry6 issue=%d islot=%d stomp=%d source=%d - memready=%d memissue=%b",
+// iqentry_6_issue, iqentry_6_islot, iqentry_stomp[6], iqentry_source[6], iqentry_memready[6], iqentry_memissue[6]);
+// $display(" iqentry7 issue=%d islot=%d stomp=%d source=%d - memready=%d memissue=%b",
+// iqentry_7_issue, iqentry_7_islot, iqentry_stomp[7], iqentry_source[7], iqentry_memready[7], iqentry_memissue[7]);
+
+// $display("ALU Inputs:");
+// $display(" 0: avail=%d data=%d id=%o op=%d a1=%h a2=%h im=%h bt=%d",
+// alu0_available, alu0_dataready, alu0_sourceid, alu0_op, alu0_argA,
+// alu0_argB, alu0_argI, alu0_bt);
+// $display(" 1: avail=%d data=%d id=%o op=%d a1=%h a2=%h im=%h bt=%d",
+// alu1_available, alu1_dataready, alu1_sourceid, alu1_op, alu1_argA,
+// alu1_argB, alu1_argI, alu1_bt);
+
+// $display("ALU Outputs:");
+// $display(" 0: v=%d bus=%h id=%o bmiss=%d misspc=%h missid=%o",
+// alu0_v, alu0_bus, alu0_id, alu0_branchmiss, alu0_misspc, alu0_sourceid);
+// $display(" 1: v=%d bus=%h id=%o bmiss=%d misspc=%h missid=%o",
+// alu1_v, alu1_bus, alu1_id, alu1_branchmiss, alu1_misspc, alu1_sourceid);
+
+// $display("DRAM Status:");
+// $display(" OUT: v=%d data=%h tgt=%d id=%o", dram_v, dram_bus, dram_tgt, dram_id);
+// $display(" dram0: status=%h addr=%h data=%h op=%d tgt=%d id=%o",
+// dram0, dram0_addr, dram0_data, dram0_op, dram0_tgt, dram0_id);
+// $display(" dram1: status=%h addr=%h data=%h op=%d tgt=%d id=%o",
+// dram1, dram1_addr, dram1_data, dram1_op, dram1_tgt, dram1_id);
+// $display(" dram2: status=%h addr=%h data=%h op=%d tgt=%d id=%o",
+// dram2, dram2_addr, dram2_data, dram2_op, dram2_tgt, dram2_id);
+
+// $display("Commit Buses:");
+// $display(" 0: v=%d id=%o data=%h", commit0_v, commit0_id, commit0_bus);
+// $display(" 1: v=%d id=%o data=%h", commit1_v, commit1_id, commit1_bus);
+
+//
+// $display("Memory Contents:");
+// for (j=0; j<64; j=j+16)
+// $display(" %h %h %h %h %h %h %h %h %h %h %h %h %h %h %h %h",
+// m[j+0], m[j+1], m[j+2], m[j+3], m[j+4], m[j+5], m[j+6], m[j+7],
+// m[j+8], m[j+9], m[j+10], m[j+11], m[j+12], m[j+13], m[j+14], m[j+15]);
+
+ $display("");
+
+ if (|panic) begin
+ $display("");
+ $display("-----------------------------------------------------------------");
+ $display("-----------------------------------------------------------------");
+ $display("--------------- PANIC:%s -----------------", message[panic]);
+ $display("-----------------------------------------------------------------");
+ $display("-----------------------------------------------------------------");
+ $display("");
+ $display("instructions committed: %d", I);
+ $display("total execution cycles: %d", $time / 10);
+ $display("");
+ end
+ if (|panic && ~outstanding_stores) begin
+ $finish;
+ end
+/*
+ for (n = 0; n < QENTRIES; n = n + 1)
+ if (branchmiss) begin
+ if (!setpred[n]) begin
+ iqentry_instr[n][`INSTRUCTION_OP] <= `NOP;
+ iqentry_done[n] <= iqentry_v[n];
+ iqentry_cmt[n] <= iqentry_v[n];
+ end
+ end
+*/
+ rf_source[ 0] <= {`QBIT{1'b1}};
+ rf_source[32] <= {`QBIT{1'b1}};
+ rf_source[64] <= {`QBIT{1'b1}};
+ rf_source[96] <= {`QBIT{1'b1}};
+`ifdef SUPPORTSMT
+ rf_source[128] <= {`QBIT{1'b1}};
+ rf_source[160] <= {`QBIT{1'b1}};
+ rf_source[192] <= {`QBIT{1'b1}};
+ rf_source[224] <= {`QBIT{1'b1}};
+`endif
+
+end // clock domain
+/*
+always @(posedge clk)
+if (rst) begin
+ tail0 <= 3'd0;
+ tail1 <= 3'd1;
+end
+else begin
+if (!branchmiss) begin
+ case({fetchbuf0_v, fetchbuf1_v})
+ 2'b00: ;
+ 2'b01:
+ if (canq1) begin
+ tail0 <= idp1(tail0);
+ tail1 <= idp1(tail1);
+ end
+ 2'b10:
+ if (canq1) begin
+ tail0 <= idp1(tail0);
+ tail1 <= idp1(tail1);
+ end
+ 2'b11:
+ if (canq1) begin
+ if (IsBranch(fetchbuf0_instr) && predict_taken0) begin
+ tail0 <= idp1(tail0);
+ tail1 <= idp1(tail1);
+ end
+ else begin
+ if (vqe < vl || !IsVector(fetchbuf0_instr)) begin
+ if (canq2) begin
+ tail0 <= idp2(tail0);
+ tail1 <= idp2(tail1);
+ end
+ else begin // queued1 will be true
+ tail0 <= idp1(tail0);
+ tail1 <= idp1(tail1);
+ end
+ end
+ end
+ end
+ endcase
+end
+else begin // if branchmiss
+ if (iqentry_stomp[0] & ~iqentry_stomp[7]) begin
+ tail0 <= 3'd0;
+ tail1 <= 3'd1;
+ end
+ else if (iqentry_stomp[1] & ~iqentry_stomp[0]) begin
+ tail0 <= 3'd1;
+ tail1 <= 3'd2;
+ end
+ else if (iqentry_stomp[2] & ~iqentry_stomp[1]) begin
+ tail0 <= 3'd2;
+ tail1 <= 3'd3;
+ end
+ else if (iqentry_stomp[3] & ~iqentry_stomp[2]) begin
+ tail0 <= 3'd3;
+ tail1 <= 3'd4;
+ end
+ else if (iqentry_stomp[4] & ~iqentry_stomp[3]) begin
+ tail0 <= 3'd4;
+ tail1 <= 3'd5;
+ end
+ else if (iqentry_stomp[5] & ~iqentry_stomp[4]) begin
+ tail0 <= 3'd5;
+ tail1 <= 3'd6;
+ end
+ else if (iqentry_stomp[6] & ~iqentry_stomp[5]) begin
+ tail0 <= 3'd6;
+ tail1 <= 3'd7;
+ end
+ else if (iqentry_stomp[7] & ~iqentry_stomp[6]) begin
+ tail0 <= 3'd7;
+ tail1 <= 3'd0;
+ end
+ // otherwise, it is the last instruction in the queue that has been mispredicted ... do nothing
+end
+end
+*/
+
+// Update the write buffer.
+task wb_update;
+input [`QBITS] id;
+input rmw;
+input [7:0] sel;
+input [1:0] ol;
+input [`ABITS] addr;
+input [63:0] data;
+begin
+ if (wbm && wbptr > 1 && wb_addr[wbptr-1][AMSB:3]==addr[AMSB:3]
+ && wb_ol[wbptr-1]==ol && wb_rmw[wbptr-1]==rmw && wb_v[wbptr-1]) begin
+ // The write buffer is always shifted during the bus IDLE state. That means
+ // the data is out of place by a slot. The slot the data is moved from is
+ // invalidated.
+ wb_v[wbptr-2] <= `INV;
+ wb_v[wbptr-1] <= wb_en;
+ wb_id[wbptr-1] <= wb_id[wbptr-1] | (16'd1 << id);
+ wb_rmw[wbptr-1] <= rmw;
+ wb_ol[wbptr-1] <= ol;
+ wb_sel[wbptr-1] <= wb_sel[wbptr-1] | sel;
+ wb_addr[wbptr-1] <= wb_addr[wbptr-1];
+ wb_data[wbptr-1] <= wb_data[wbptr-1];
+ if (sel[0]) wb_data[wbptr-1][ 7: 0] <= data[ 7: 0];
+ if (sel[1]) wb_data[wbptr-1][15: 8] <= data[15: 8];
+ if (sel[2]) wb_data[wbptr-1][23:16] <= data[23:16];
+ if (sel[3]) wb_data[wbptr-1][31:24] <= data[31:24];
+ if (sel[4]) wb_data[wbptr-1][39:32] <= data[39:32];
+ if (sel[5]) wb_data[wbptr-1][47:40] <= data[47:40];
+ if (sel[6]) wb_data[wbptr-1][55:48] <= data[55:48];
+ if (sel[7]) wb_data[wbptr-1][63:56] <= data[63:56];
+ wb_merges <= wb_merges + 32'd1;
+ end
+ else begin
+ wb_v[wbptr] <= wb_en;
+ wb_id[wbptr] <= (16'd1 << id);
+ wb_rmw[wbptr] <= rmw;
+ wb_ol[wbptr] <= ol;
+ wb_sel[wbptr] <= sel;
+ wb_addr[wbptr] <= {addr[AMSB:3],3'b0};
+ wb_data[wbptr] <= data;
+ wbptr <= wbptr + 2'd1;
+ end
+end
+endtask
+
+// Increment the head pointers
+// Also increments the instruction counter
+// Used when instructions are committed.
+// Also clear any outstanding state bits that foul things up.
+//
+task head_inc;
+input [`QBITS] amt;
+begin
+ for (n = 0; n < QENTRIES; n = n + 1)
+ heads[n] <= (heads[n] + amt) % QENTRIES;
+ CC <= CC + amt;
+ if (amt==3'd3) begin
+ I = I + iqentry_v[heads[0]] + iqentry_v[heads[1]] + iqentry_v[heads[2]];
+ iqentry_state[heads[0]] <= IQS_INVALID;
+ iqentry_state[heads[1]] <= IQS_INVALID;
+ iqentry_state[heads[2]] <= IQS_INVALID;
+ iqentry_mem[heads[0]] <= `FALSE;
+ iqentry_mem[heads[1]] <= `FALSE;
+ iqentry_mem[heads[2]] <= `FALSE;
+ iqentry_iv[heads[0]] <= `INV;
+ iqentry_iv[heads[1]] <= `INV;
+ iqentry_iv[heads[2]] <= `INV;
+ iqentry_alu[heads[0]] <= `FALSE;
+ iqentry_alu[heads[1]] <= `FALSE;
+ iqentry_alu[heads[2]] <= `FALSE;
+ for (n = 0; n < QENTRIES; n = n + 1)
+ if (iqentry_v[n])
+ iqentry_sn[n] <= iqentry_sn[n] - (iqentry_v[heads[2]] ? iqentry_sn[heads[2]]
+ : iqentry_v[heads[1]] ? iqentry_sn[heads[1]]
+ : iqentry_v[heads[0]] ? iqentry_sn[heads[0]]
+ : 4'b0);
+ end
+ else if (amt==3'd2) begin
+ I = I + iqentry_v[heads[0]] + iqentry_v[heads[1]];
+ iqentry_state[heads[0]] <= IQS_INVALID;
+ iqentry_state[heads[1]] <= IQS_INVALID;
+ iqentry_mem[heads[0]] <= `FALSE;
+ iqentry_mem[heads[1]] <= `FALSE;
+ iqentry_iv[heads[0]] <= `INV;
+ iqentry_iv[heads[1]] <= `INV;
+ iqentry_alu[heads[0]] <= `FALSE;
+ iqentry_alu[heads[1]] <= `FALSE;
+ for (n = 0; n < QENTRIES; n = n + 1)
+ if (iqentry_v[n])
+ iqentry_sn[n] <= iqentry_sn[n] - (iqentry_v[heads[1]] ? iqentry_sn[heads[1]]
+ : iqentry_v[heads[0]] ? iqentry_sn[heads[0]]
+ : 4'b0);
+ end else if (amt==3'd1) begin
+ I = I + iqentry_v[heads[0]];
+ iqentry_state[heads[0]] <= IQS_INVALID;
+ iqentry_mem[heads[0]] <= `FALSE;
+ iqentry_iv[heads[0]] <= `INV;
+ iqentry_alu[heads[0]] <= `FALSE;
+ for (n = 0; n < QENTRIES; n = n + 1)
+ if (iqentry_v[n])
+ iqentry_sn[n] <= iqentry_sn[n] - (iqentry_v[heads[0]] ? iqentry_sn[heads[0]]
+ : 4'b0);
+ end
+end
+endtask
+
+task setargs;
+input [`QBITS] nn;
+input [`QBITSP1] id;
+input v;
+input [63:0] bus;
+begin
+ if (iqentry_a1_v[nn] == `INV && iqentry_a1_s[nn] == id && iqentry_v[nn] == `VAL && v == `VAL) begin
+ iqentry_a1[nn] <= bus;
+ iqentry_a1_v[nn] <= `VAL;
+ end
+ if (iqentry_a2_v[nn] == `INV && iqentry_a2_s[nn] == id && iqentry_v[nn] == `VAL && v == `VAL) begin
+ iqentry_a2[nn] <= bus;
+ iqentry_a2_v[nn] <= `VAL;
+ end
+ if (iqentry_a3_v[nn] == `INV && iqentry_a3_s[nn] == id && iqentry_v[nn] == `VAL && v == `VAL) begin
+ iqentry_a3[nn] <= bus;
+ iqentry_a3_v[nn] <= `VAL;
+ end
+end
+endtask
+
+task setinsn1;
+input [`QBITS] nn;
+input [143:0] bus;
+begin
+ iqentry_iv [nn] <= `VAL;
+// iqentry_Rt [nn] <= bus[`IB_RT];
+// iqentry_Rc [nn] <= bus[`IB_RC];
+// iqentry_Ra [nn] <= bus[`IB_RA];
+ iqentry_a0 [nn] <= bus[`IB_CONST];
+ iqentry_imm [nn] <= bus[`IB_IMM];
+// iqentry_insln[nn] <= bus[`IB_LN];
+`ifndef INLINE_DECODE
+ if (iqentry_insln[nn] != bus[`IB_LN]) begin
+ $display("Insn length mismatch.");
+ $stop;
+ end
+`endif
+ iqentry_cmp [nn] <= bus[`IB_CMP];
+ iqentry_tlb [nn] <= bus[`IB_TLB];
+ iqentry_sz [nn] <= bus[`IB_SZ];
+ iqentry_jal [nn] <= bus[`IB_JAL];
+ iqentry_ret [nn] <= bus[`IB_RET];
+ iqentry_irq [nn] <= bus[`IB_IRQ];
+ iqentry_brk [nn] <= bus[`IB_BRK];
+ iqentry_rti [nn] <= bus[`IB_RTI];
+ iqentry_bt [nn] <= bus[`IB_BT];
+ iqentry_alu [nn] <= bus[`IB_ALU];
+ iqentry_alu0 [nn] <= bus[`IB_ALU0];
+ iqentry_fpu [nn] <= bus[`IB_FPU];
+ iqentry_fc [nn] <= bus[`IB_FC];
+ iqentry_canex[nn] <= bus[`IB_CANEX];
+ iqentry_loadv[nn] <= bus[`IB_LOADV];
+ iqentry_load [nn] <= bus[`IB_LOAD];
+ iqentry_preload[nn]<= bus[`IB_PRELOAD];
+ iqentry_store[nn] <= bus[`IB_STORE];
+ iqentry_push [nn] <= bus[`IB_PUSH];
+ iqentry_oddball[nn] <= bus[`IB_ODDBALL];
+ iqentry_memsz[nn] <= bus[`IB_MEMSZ];
+ iqentry_mem [nn] <= bus[`IB_MEM];
+ iqentry_memndx[nn] <= bus[`IB_MEMNDX];
+ iqentry_rmw [nn] <= bus[`IB_RMW];
+ iqentry_memdb[nn] <= bus[`IB_MEMDB];
+ iqentry_memsb[nn] <= bus[`IB_MEMSB];
+ iqentry_shft [nn] <= bus[`IB_SHFT]; // 48 bit shift instructions
+ iqentry_sei [nn] <= bus[`IB_SEI];
+ iqentry_aq [nn] <= bus[`IB_AQ];
+ iqentry_rl [nn] <= bus[`IB_RL];
+ iqentry_jmp [nn] <= bus[`IB_JMP];
+ iqentry_br [nn] <= bus[`IB_BR];
+ iqentry_sync [nn] <= bus[`IB_SYNC];
+ iqentry_fsync[nn] <= bus[`IB_FSYNC];
+ iqentry_rfw [nn] <= bus[`IB_RFW];
+`ifdef SUPPORT_PREDICATION
+ iqentry_prfw [nn] <= bus[`IB_PRFW];
+`endif
+ iqentry_we [nn] <= bus[`IB_WE];
+end
+endtask
+
+task setinsn;
+input [`QBITS] nn;
+input [4:0] id;
+input v;
+input [143:0] bus;
+begin
+ if (iqentry_iv[nn] == `INV && iqentry_is[nn] == id && iqentry_v[nn] == `VAL && v == `VAL)
+ setinsn1(nn,bus);
+end
+endtask
+
+task a1_vs;
+begin
+ // if there is not an overlapping write to the register file.
+ if (Ra1s != Rt0s || !fetchbuf0_rfw) begin
+ iqentry_a1_v [tail1] <= regIsValid[Ra1s];
+ iqentry_a1_s [tail1] <= rf_source [Ra1s];
+ end
+ else begin
+ iqentry_a1_v [tail1] <= `INV;
+ iqentry_a1_s [tail1] <= { 1'b0, fetchbuf0_mem, tail0 };
+ end
+end
+endtask
+
+task a2_vs;
+begin
+ // if there is not an overlapping write to the register file.
+ if (Rb1s != Rt0s || !fetchbuf0_rfw) begin
+ iqentry_a2_v [tail1] <= regIsValid[Rb1s];
+ iqentry_a2_s [tail1] <= rf_source [Rb1s];
+ end
+ else begin
+ iqentry_a2_v [tail1] <= `INV;
+ iqentry_a2_s [tail1] <= { 1'b0, fetchbuf0_mem, tail0 };
+ end
+end
+endtask
+
+task a3_vs;
+begin
+ // if there is not an overlapping write to the register file.
+ if (Rc1s != Rt0s || !fetchbuf0_rfw) begin
+ iqentry_a3_v [tail1] <= regIsValid[Rc1s];
+ iqentry_a3_s [tail1] <= rf_source [Rc1s];
+ end
+ else begin
+ iqentry_a3_v [tail1] <= `INV;
+ iqentry_a3_s [tail1] <= { 1'b0, fetchbuf0_mem, tail0 };
+ end
+end
+endtask
+
+task enque0x;
+begin
+ if (IsVector(fetchbuf0_instr) && SUP_VECTOR) begin
+ vqe0 <= vqe0 + 4'd1;
+ if (IsVCmprss(fetchbuf0_instr)) begin
+ if (vm[fetchbuf0_instr[25:23]][vqe0])
+ vqet0 <= vqet0 + 4'd1;
+ end
+ else
+ vqet0 <= vqet0 + 4'd1;
+ if (vqe0 >= vl-2)
+ nop_fetchbuf <= fetchbuf ? 4'b1000 : 4'b0010;
+ enque0(tail0, fetchbuf0_thrd ? maxsn[1]+4'd1 : maxsn[0]+4'd1, vqe0);
+ iq_ctr = iq_ctr + 4'd1;
+ if (fetchbuf0_rfw) begin
+ rf_source[ Rt0s ] <= { 1'b0, fetchbuf0_mem, tail0 }; // top bit indicates ALU/MEM bus
+ rf_v[Rt0s] <= `INV;
+ end
+ if (canq2) begin
+ if (vqe0 < vl-2) begin
+ vqe0 <= vqe0 + 4'd2;
+ if (IsVCmprss(fetchbuf0_instr)) begin
+ if (vm[fetchbuf0_instr[25:23]][vqe0+6'd1])
+ vqet0 <= vqet0 + 4'd2;
+ end
+ else
+ vqet0 <= vqet0 + 4'd2;
+ enque0(tail1, fetchbuf0_thrd ? maxsn[1] + 4'd2 : maxsn[0]+4'd2, vqe0 + 6'd1);
+ iq_ctr = iq_ctr + 4'd2;
+ if (fetchbuf0_rfw) begin
+ rf_source[ Rt0s ] <= { 1'b0, fetchbuf0_mem, tail1 }; // top bit indicates ALU/MEM bus
+ rf_v[Rt0s] <= `INV;
+ end
+ end
+ end
+ end
+ else begin
+ enque0(tail0, fetchbuf0_thrd ? maxsn[1]+4'd1 : maxsn[0]+4'd1, 6'd0);
+ iq_ctr = iq_ctr + 4'd1;
+ if (fetchbuf0_rfw) begin
+ rf_source[ Rt0s ] <= { 1'b0, fetchbuf0_mem, tail0 }; // top bit indicates ALU/MEM bus
+ rf_v[Rt0s] <= `INV;
+ end
+ end
+end
+endtask
+
+// Enqueue fetchbuf0 onto the tail of the instruction queue
+task enque0;
+input [`QBITS] tail;
+input [`SNBITS] seqnum;
+input [5:0] venno;
+begin
+ iqentry_exc[tail] <= `FLT_NONE;
+`ifdef SUPPORT_DBG
+ if (dbg_imatchA)
+ iqentry_exc[tail] <= `FLT_DBG;
+ else if (dbg_ctrl[63])
+ iqentry_exc[tail] <= `FLT_SSM;
+`endif
+ iqentry_state[tail] <= IQS_QUEUED;
+ iqentry_sn [tail] <= seqnum;
+ iqentry_iv [tail] <= `INV;
+ iqentry_is [tail] <= tail;
+ iqentry_thrd [tail] <= fetchbuf0_thrd;
+ iqentry_res [tail] <= `ZERO;
+ iqentry_instr[tail] <= IsVLS(fetchbuf0_instr) ? (vm[fnM2(fetchbuf0_instr)] ? fetchbuf0_instr : `NOP_INSN) : fetchbuf0_instr;
+ iqentry_insln[tail] <= fetchbuf0_insln;
+ iqentry_fc [tail] <= `INV;
+ iqentry_mem [tail] <= `INV;
+ iqentry_alu [tail] <= `INV;
+ iqentry_fpu [tail] <= `INV;
+ iqentry_load [tail] <= `INV;
+ iqentry_pt [tail] <= predict_taken0;
+// If the previous instruction was a hardware interrupt and this instruction is a hardware interrupt
+// inherit the previous pc.
+//if (IsBrk(fetchbuf0_instr) && !fetchbuf0_instr[15] &&
+// (IsBrk(iqentry_instr[idm1(tail)]) && !iqentry_instr[idm1(tail1)][15] && iqentry_v[idm1(tail)]))
+// iqentry_pc [tail] <= iqentry_pc[idm1(tail)];
+//else
+ iqentry_pc [tail] <= fetchbuf0_pc;
+ iqentry_rtop [tail] <= IsRtop(fetchbuf0_instr);
+ iqentry_tgt [tail] <= Rt0;
+ iqentry_Ra [tail] <= Ra0;
+ iqentry_Rb [tail] <= Rb0;
+ iqentry_Rc [tail] <= Rc0;
+ iqentry_vl [tail] <= vl;
+ iqentry_ven [tail] <= venno;
+ iqentry_exc [tail] <= `EXC_NONE;
+ iqentry_a1 [tail] <= rfoa0;
+ iqentry_a1_v [tail] <= Source1Valid(fetchbuf0_instr) | regIsValid[Ra0s];
+ iqentry_a1_s [tail] <= rf_source[Ra0s];
+ iqentry_a2 [tail] <= rfob0;
+ iqentry_a2_v [tail] <= Source2Valid(fetchbuf0_instr) | regIsValid[Rb0s];
+ iqentry_a2_s [tail] <= rf_source[Rb0s];
+ iqentry_a3 [tail] <= rfoc0;
+ iqentry_a3_v [tail] <= Source3Valid(fetchbuf0_instr) | regIsValid[Rc0s];
+ iqentry_a3_s [tail] <= rf_source[Rc0s];
+`ifdef INLINE_DECODE
+/* This decoding cannot be done here because it'll introduce a 1 cycle delay
+ id1_Rt <= Rt0[4:0];
+ id1_vl <= vl;
+ id1_ven <= venno;
+ id1_id <= tail;
+ id1_pt <= predict_taken0;
+ id1_thrd <= fetchbuf0_thrd;
+*/
+ setinsn1(tail,id1_bus);
+`endif
+end
+endtask
+
+// Enque fetchbuf1. Fetchbuf1 might be the second instruction to queue so some
+// of this code checks to see which tail it is being queued on.
+task enque1;
+input [`QBITS] tail;
+input [`SNBITS] seqnum;
+input [5:0] venno;
+begin
+ iqentry_exc[tail] <= `FLT_NONE;
+`ifdef SUPPORT_DBG
+ if (dbg_imatchB)
+ iqentry_exc[tail] <= `FLT_DBG;
+ else if (dbg_ctrl[63])
+ iqentry_exc[tail] <= `FLT_SSM;
+`endif
+ iqentry_state[tail] <= IQS_QUEUED;
+ iqentry_sn [tail] <= seqnum;
+ iqentry_iv [tail] <= `INV;
+ iqentry_is [tail] <= tail;
+ iqentry_thrd [tail] <= fetchbuf1_thrd;
+ iqentry_res [tail] <= `ZERO;
+ iqentry_instr[tail] <= IsVLS(fetchbuf1_instr) ? (vm[fnM2(fetchbuf1_instr)] ? fetchbuf1_instr : `NOP_INSN) : fetchbuf1_instr;
+ iqentry_insln[tail] <= fetchbuf1_insln;
+ iqentry_fc [tail] <= `INV;
+ iqentry_mem [tail] <= `INV;
+ iqentry_alu [tail] <= `INV;
+ iqentry_fpu [tail] <= `INV;
+ iqentry_load [tail] <= `INV;
+ iqentry_pt [tail] <= predict_taken1;
+// If queing 2nd instruction must read from first
+if (tail==tail1) begin
+ // If the previous instruction was a hardware interrupt and this instruction is a hardware interrupt
+ // inherit the previous pc.
+// if (IsBrk(fetchbuf1_instr) && !fetchbuf1_instr[15] &&
+// IsBrk(fetchbuf0_instr) && !fetchbuf0_instr[15])
+// iqentry_pc [tail] <= fetchbuf0_pc;
+// else
+ iqentry_pc [tail] <= fetchbuf1_pc;
+end
+else begin
+ // If the previous instruction was a hardware interrupt and this instruction is a hardware interrupt
+ // inherit the previous pc.
+// if (IsBrk(fetchbuf1_instr) && !fetchbuf1_instr[15] &&
+// (IsBrk(iqentry_instr[idp7(tail)]) && !iqentry_instr[idm1(tail)][15] && iqentry_v[idm1(tail)]))
+// iqentry_pc [tail] <= iqentry_pc[idm1(tail)];
+// else
+ iqentry_pc [tail] <= fetchbuf1_pc;
+end
+ iqentry_rtop [tail] <= IsRtop(fetchbuf1_instr);
+ iqentry_tgt [tail] <= Rt1;
+ iqentry_Ra [tail] <= Ra1;
+ iqentry_Rb [tail] <= Rb1;
+ iqentry_Rc [tail] <= Rc1;
+ iqentry_vl [tail] <= vl;
+ iqentry_ven [tail] <= venno;
+ iqentry_exc [tail] <= `EXC_NONE;
+ iqentry_a1 [tail] <= rfoa1;
+ iqentry_a1_v [tail] <= Source1Valid(fetchbuf1_instr) | regIsValid[Ra1s];
+ iqentry_a1_s [tail] <= rf_source[Ra1s];
+ iqentry_a2 [tail] <= rfob1;
+ iqentry_a2_v [tail] <= Source2Valid(fetchbuf1_instr) | regIsValid[Rb1s];
+ iqentry_a2_s [tail] <= rf_source[Rb1s];
+ iqentry_a3 [tail] <= rfoc1;
+ iqentry_a3_v [tail] <= Source3Valid(fetchbuf1_instr) | regIsValid[Rc1s];
+ iqentry_a3_s [tail] <= rf_source[Rc1s];
+`ifdef INLINE_DECODE
+/* This decoding cannot be done here because it'll introduce a 1 cycle delay
+ id2_Rt <= Rt1[4:0];
+ id2_vl <= vl;
+ id2_ven <= venno;
+ id2_id <= tail;
+ id2_pt <= predict_taken1;
+ id2_thrd <= fetchbuf1_thrd;
+*/
+ setinsn1(tail,id2_bus);
+`endif
+end
+endtask
+
+// This task takes care of commits for things other than the register file.
+task oddball_commit;
+input v;
+input [`QBITS] head;
+input [1:0] which;
+reg thread;
+begin
+ thread = iqentry_thrd[head];
+ if (v) begin
+ if (|iqentry_exc[head]) begin
+ excmiss <= TRUE;
+`ifdef SUPPORT_SMT
+ excmisspc <= {tvec[3'd0][AMSB:8],1'b0,ol[thread],5'h00};
+ excthrd <= iqentry_thrd[head];
+ badaddr[{thread,2'd0}] <= iqentry_ma[head];
+ bad_instr[{thread,2'd0}] <= iqentry_instr[head];
+ epc0[thread] <= iqentry_pc[head];
+ epc1[thread] <= epc0[thread];
+ epc2[thread] <= epc1[thread];
+ epc3[thread] <= epc2[thread];
+ epc4[thread] <= epc3[thread];
+ epc5[thread] <= epc4[thread];
+ epc6[thread] <= epc5[thread];
+ epc7[thread] <= epc6[thread];
+ epc8[thread] <= epc7[thread];
+ im_stack[thread] <= {im_stack[thread][27:0],im};
+ ol_stack[thread] <= {ol_stack[thread][13:0],ol[thread]};
+ dl_stack[thread] <= {dl_stack[thread][13:0],dl[thread]};
+ pl_stack[thread] <= {pl_stack[thread][55:0],cpl[thread]};
+ rs_stack[thread] <= {rs_stack[thread][59:0],`EXC_RGS};
+ brs_stack[thread] <= {brs_stack[thread][59:0],`EXC_RGS};
+ cause[{thread,2'd0}] <= {8'd0,iqentry_exc[head]};
+ mstatus[thread][5:4] <= 2'd0;
+ mstatus[thread][13:6] <= 8'h00;
+ mstatus[thread][19:14] <= `EXC_RGS;
+`else
+ excmisspc <= {tvec[3'd0][AMSB:8],1'b0,ol,5'h00};
+ excthrd <= 1'b0;
+ badaddr[{1'b0,2'd0}] <= iqentry_ma[head];
+ bad_instr[3'd0] <= iqentry_instr[head];
+ epc0 <= iqentry_pc[head];
+ epc1 <= epc0;
+ epc2 <= epc1;
+ epc3 <= epc2;
+ epc4 <= epc3;
+ epc5 <= epc4;
+ epc6 <= epc5;
+ epc7 <= epc6;
+ epc8 <= epc7;
+ im_stack <= {im_stack[27:0],im};
+ ol_stack <= {ol_stack[13:0],ol};
+ dl_stack <= {dl_stack[13:0],dl};
+ pl_stack <= {pl_stack[55:0],cpl};
+ rs_stack <= {rs_stack[59:0],`EXC_RGS};
+ brs_stack <= {rs_stack[59:0],`EXC_RGS};
+ cause[3'd0] <= {8'd0,iqentry_exc[head]};
+ mstatus[5:4] <= 2'd0;
+ mstatus[13:6] <= 8'h00;
+ mstatus[19:14] <= `EXC_RGS;
+`endif
+ wb_en <= `TRUE;
+ sema[0] <= 1'b0;
+ ve_hold <= {vqet1,10'd0,vqe1,10'd0,vqet0,10'd0,vqe0};
+`ifdef SUPPORT_DBG
+ dbg_ctrl[62:55] <= {dbg_ctrl[61:55],dbg_ctrl[63]};
+ dbg_ctrl[63] <= FALSE;
+`endif
+ end
+ else
+ case(iqentry_instr[head][`INSTRUCTION_OP])
+`ifdef SUPPORT_PREDICATION
+ `CMPI: pregs[{rgs,iqentry_tgt[head][3:0]}] <= which==2'd1 ? cmt1nyb[iqentry_tgt[head][3:0]] : cmt0nyb[iqentry_tgt[head][3:0]];//commit_bus[3:0];
+`endif
+
+ `BRK:
+ // BRK is treated as a nop unless it's a software interrupt or a
+ // hardware interrupt at a higher priority than the current priority.
+ if ((|iqentry_instr[head][25:21]) || iqentry_instr[head][20:17] > im) begin
+ excmiss <= TRUE;
+`ifdef SUPPORT_SMT
+ excmisspc <= {tvec[3'd0][AMSB:8],1'b0,ol[thread],5'h00};
+ excthrd <= iqentry_thrd[head];
+ epc0[thread] <= iqentry_pc[head] + {iqentry_instr[head][25:21],1'b0};
+ epc1[thread] <= epc0[thread];
+ epc2[thread] <= epc1[thread];
+ epc3[thread] <= epc2[thread];
+ epc4[thread] <= epc3[thread];
+ epc5[thread] <= epc4[thread];
+ epc6[thread] <= epc5[thread];
+ epc7[thread] <= epc6[thread];
+ epc8[thread] <= epc7[thread];
+ im_stack[thread] <= {im_stack[thread][27:0],im};
+ ol_stack[thread] <= {ol_stack[thread][13:0],ol[thread]};
+ dl_stack[thread] <= {dl_stack[thread][13:0],dl[thread]};
+ pl_stack[thread] <= {pl_stack[thread][55:0],cpl[thread]};
+ rs_stack[thread] <= {rs_stack[thread][59:0],`BRK_RGS};
+ brs_stack[thread] <= {brs_stack[thread][59:0],`BRK_RGS};
+ cause[{thread,2'd0}] <= iqentry_res[head][7:0];
+ mstatus[thread][5:4] <= 2'd0;
+ mstatus[thread][13:6] <= 8'h00;
+ // For hardware interrupts only, set a new mask level. Setting a
+ // new mask level will effectively prevent subsequent brks that
+ // are streaming from an interrupt from being processed.
+ // Select register set according to interrupt level
+ if (iqentry_instr[head][25:21]==5'd0) begin
+ mstatus[thread][ 3: 0] <= iqentry_instr[head][20:17];
+ mstatus[thread][31:28] <= iqentry_instr[head][20:17];
+ mstatus[thread][19:14] <= {2'b0,iqentry_instr[head][20:17]};
+ rs_stack[thread][5:0] <= {2'b0,iqentry_instr[head][20:17]};
+ brs_stack[thread][5:0] <= {2'b0,iqentry_instr[head][20:17]};
+ end
+ else begin
+ mstatus[thread][19:14] <= `BRK_RGS;
+ rs_stack[thread][5:0] <= `BRK_RGS;
+ brs_stack[thread][5:0] <= `BRK_RGS;
+ end
+`else
+ excmisspc <= {tvec[3'd0][AMSB:8],1'b0,ol,5'h00};
+ excthrd <= 1'b0;
+ epc0 <= iqentry_pc[head] + {iqentry_instr[head][25:21],1'b0};
+ epc1 <= epc0;
+ epc2 <= epc1;
+ epc3 <= epc2;
+ epc4 <= epc3;
+ epc5 <= epc4;
+ epc6 <= epc5;
+ epc7 <= epc6;
+ epc8 <= epc7;
+ im_stack <= {im_stack[27:0],im};
+ ol_stack <= {ol_stack[13:0],ol};
+ dl_stack <= {dl_stack[13:0],dl};
+ pl_stack <= {pl_stack[55:0],cpl};
+ rs_stack <= {rs_stack[59:0],`BRK_RGS};
+ brs_stack <= {brs_stack[59:0],`BRK_RGS};
+ cause[3'd0] <= iqentry_res[head][7:0];
+ mstatus[5:4] <= 2'd0;
+ mstatus[13:6] <= 8'h00;
+ // For hardware interrupts only, set a new mask level. Setting a
+ // new mask level will effectively prevent subsequent brks that
+ // are streaming from an interrupt from being processed.
+ // Select register set according to interrupt level
+ if (iqentry_instr[head][25:21]==5'd0) begin
+ mstatus[ 3: 0] <= iqentry_instr[head][20:17];
+ mstatus[31:28] <= iqentry_instr[head][20:17];
+ mstatus[19:14] <= {2'b0,iqentry_instr[head][20:17]};
+ rs_stack[5:0] <= {2'b0,iqentry_instr[head][20:17]};
+ brs_stack[5:0] <= {2'b0,iqentry_instr[head][20:17]};
+ end
+ else begin
+ mstatus[19:14] <= `BRK_RGS;
+ rs_stack[5:0] <= `BRK_RGS;
+ brs_stack[5:0] <= `BRK_RGS;
+ end
+`endif
+ sema[0] <= 1'b0;
+ ve_hold <= {vqet1,10'd0,vqe1,10'd0,vqet0,10'd0,vqe0};
+`ifdef SUPPORT_DBG
+ dbg_ctrl[62:55] <= {dbg_ctrl[61:55],dbg_ctrl[63]};
+ dbg_ctrl[63] <= FALSE;
+`endif
+ end
+ `IVECTOR:
+ casez(iqentry_tgt[head])
+ 8'b00100???: vm[iqentry_tgt[head][2:0]] <= iqentry_res[head];
+ 8'b00101111: vl <= iqentry_res[head];
+ default: ;
+ endcase
+ `R2:
+ case(iqentry_instr[head][`INSTRUCTION_S2])
+`ifdef SUPPORT_PREDICATION
+ `CMP: pregs[{rgs,iqentry_tgt[head][3:0]}] <= which==2'd1 ? cmt1nyb[iqentry_tgt[head][3:0]] : cmt0nyb[iqentry_tgt[head][3:0]];//commit_bus[3:0];
+`endif
+ `R1: case(iqentry_instr[head][20:16])
+ `CHAIN_OFF: cr0[18] <= 1'b0;
+ `CHAIN_ON: cr0[18] <= 1'b1;
+ //`SETWB: wbrcd[pcr[5:0]] <= 1'b1;
+ default: ;
+ endcase
+ `VMOV: casez(iqentry_tgt[head])
+ 12'b1111111_00???: vm[iqentry_tgt[head][2:0]] <= iqentry_res[head];
+ 12'b1111111_01111: vl <= iqentry_res[head];
+ default: ;
+ endcase
+`ifdef SUPPORT_SMT
+ `SEI: mstatus[thread][3:0] <= iqentry_res[head][3:0]; // S1
+`else
+ `SEI: mstatus[3:0] <= iqentry_res[head][3:0]; // S1
+`endif
+ `RTI: begin
+ excmiss <= TRUE;
+`ifdef SUPPORT_SMT
+ excmisspc <= epc0[thread];
+ excthrd <= thread;
+ mstatus[thread][3:0] <= im_stack[thread][3:0];
+ mstatus[thread][5:4] <= ol_stack[thread][1:0];
+ mstatus[thread][21:20] <= dl_stack[thread][1:0];
+ mstatus[thread][13:6] <= pl_stack[thread][7:0];
+ mstatus[thread][19:14] <= rs_stack[thread][5:0];
+ im_stack[thread] <= {4'd15,im_stack[thread][31:4]};
+ ol_stack[thread] <= {2'd0,ol_stack[thread][15:2]};
+ dl_stack[thread] <= {2'd0,dl_stack[thread][15:2]};
+ pl_stack[thread] <= {8'h00,pl_stack[thread][63:8]};
+ rs_stack[thread] <= {6'h00,rs_stack[thread][59:6]};
+ brs_stack[thread] <= {6'h00,brs_stack[thread][59:6]};
+ epc0[thread] <= epc1[thread];
+ epc1[thread] <= epc2[thread];
+ epc2[thread] <= epc3[thread];
+ epc3[thread] <= epc4[thread];
+ epc4[thread] <= epc5[thread];
+ epc5[thread] <= epc6[thread];
+ epc6[thread] <= epc7[thread];
+ epc7[thread] <= epc8[thread];
+ epc8[thread] <= {tvec[0][AMSB:8], 1'b0, ol[thread], 5'h0};
+`else
+ excmisspc <= epc0;
+ excthrd <= thread;
+ mstatus[3:0] <= im_stack[3:0];
+ mstatus[5:4] <= ol_stack[1:0];
+ mstatus[21:20] <= dl_stack[1:0];
+ mstatus[13:6] <= pl_stack[7:0];
+ mstatus[19:14] <= rs_stack[5:0];
+ im_stack <= {4'd15,im_stack[31:4]};
+ ol_stack <= {2'd0,ol_stack[15:2]};
+ dl_stack <= {2'd0,dl_stack[15:2]};
+ pl_stack <= {8'h00,pl_stack[63:8]};
+ rs_stack <= {6'h00,rs_stack[59:6]};
+ brs_stack <= {6'h00,brs_stack[59:6]};
+ epc0 <= epc1;
+ epc1 <= epc2;
+ epc2 <= epc3;
+ epc3 <= epc4;
+ epc4 <= epc5;
+ epc5 <= epc6;
+ epc6 <= epc7;
+ epc7 <= epc8;
+ epc8 <= {tvec[0][AMSB:8], 1'b0, ol, 5'h0};
+`endif
+ sema[0] <= 1'b0;
+ sema[iqentry_res[head][5:0]] <= 1'b0;
+ vqe0 <= ve_hold[ 5: 0];
+ vqet0 <= ve_hold[21:16];
+ vqe1 <= ve_hold[37:32];
+ vqet1 <= ve_hold[53:48];
+`ifdef SUPPORT_DBG
+ dbg_ctrl[62:55] <= {FALSE,dbg_ctrl[62:56]};
+ dbg_ctrl[63] <= dbg_ctrl[55];
+`endif
+ end
+ default: ;
+ endcase
+ `MEMNDX:
+ case(iqentry_instr[head][`INSTRUCTION_S2])
+ `CACHEX:
+ case(iqentry_instr[head][22:18])
+ 5'h03: invic <= TRUE;
+ 5'h10: cr0[30] <= FALSE;
+ 5'h11: cr0[30] <= TRUE;
+ default: ;
+ endcase
+ default: ;
+ endcase
+ `CSRRW:
+ begin
+ write_csr(iqentry_instr[head][31:18],iqentry_a1[head],thread);
+ end
+ `REX:
+`ifdef SUPPORT_SMT
+ // Can only redirect to a lower level
+ if (ol[thread] < iqentry_instr[head][14:13]) begin
+ mstatus[thread][5:4] <= iqentry_instr[head][14:13];
+ badaddr[{thread,iqentry_instr[head][14:13]}] <= badaddr[{thread,ol[thread]}];
+ bad_instr[{thread,iqentry_instr[head][14:13]}] <= bad_instr[{thread,ol[thread]}];
+ cause[{thread,iqentry_instr[head][14:13]}] <= cause[{thread,ol[thread]}];
+ mstatus[thread][13:6] <= iqentry_instr[head][25:18] | iqentry_a1[head][7:0];
+ end
+`else
+ if (ol < iqentry_instr[head][14:13]) begin
+ mstatus[5:4] <= iqentry_instr[head][14:13];
+ badaddr[{1'b0,iqentry_instr[head][14:13]}] <= badaddr[{1'b0,ol}];
+ bad_instr[{1'b0,iqentry_instr[head][14:13]}] <= bad_instr[{1'b0,ol}];
+ cause[{1'b0,iqentry_instr[head][14:13]}] <= cause[{1'b0,ol}];
+ mstatus[13:6] <= iqentry_instr[head][25:18] | iqentry_a1[head][7:0];
+ end
+`endif
+ `CACHE:
+ case(iqentry_instr[head][17:13])
+ 5'h03: invic <= TRUE;
+ 5'h10: cr0[30] <= FALSE;
+ 5'h11: cr0[30] <= TRUE;
+ default: ;
+ endcase
+ `FLOAT:
+ case(iqentry_instr[head][`INSTRUCTION_S2])
+ `FRM: begin
+ fp_rm <= iqentry_res[head][2:0];
+ end
+ `FCX:
+ begin
+ fp_sx <= fp_sx & ~iqentry_res[head][5];
+ fp_inex <= fp_inex & ~iqentry_res[head][4];
+ fp_dbzx <= fp_dbzx & ~(iqentry_res[head][3]|iqentry_res[head][0]);
+ fp_underx <= fp_underx & ~iqentry_res[head][2];
+ fp_overx <= fp_overx & ~iqentry_res[head][1];
+ fp_giopx <= fp_giopx & ~iqentry_res[head][0];
+ fp_infdivx <= fp_infdivx & ~iqentry_res[head][0];
+ fp_zerozerox <= fp_zerozerox & ~iqentry_res[head][0];
+ fp_subinfx <= fp_subinfx & ~iqentry_res[head][0];
+ fp_infzerox <= fp_infzerox & ~iqentry_res[head][0];
+ fp_NaNCmpx <= fp_NaNCmpx & ~iqentry_res[head][0];
+ fp_swtx <= 1'b0;
+ end
+ `FDX:
+ begin
+ fp_inexe <= fp_inexe & ~iqentry_res[head][4];
+ fp_dbzxe <= fp_dbzxe & ~iqentry_res[head][3];
+ fp_underxe <= fp_underxe & ~iqentry_res[head][2];
+ fp_overxe <= fp_overxe & ~iqentry_res[head][1];
+ fp_invopxe <= fp_invopxe & ~iqentry_res[head][0];
+ end
+ `FEX:
+ begin
+ fp_inexe <= fp_inexe | iqentry_res[head][4];
+ fp_dbzxe <= fp_dbzxe | iqentry_res[head][3];
+ fp_underxe <= fp_underxe | iqentry_res[head][2];
+ fp_overxe <= fp_overxe | iqentry_res[head][1];
+ fp_invopxe <= fp_invopxe | iqentry_res[head][0];
+ end
+ default:
+ begin
+ // 31 to 29 is rounding mode
+ // 28 to 24 are exception enables
+ // 23 is nsfp
+ // 22 is a fractie
+ fp_fractie <= iqentry_ares[head][22];
+ fp_raz <= iqentry_ares[head][21];
+ // 20 is a 0
+ fp_neg <= iqentry_ares[head][19];
+ fp_pos <= iqentry_ares[head][18];
+ fp_zero <= iqentry_ares[head][17];
+ fp_inf <= iqentry_ares[head][16];
+ // 15 swtx
+ // 14
+ fp_inex <= fp_inex | (fp_inexe & iqentry_ares[head][14]);
+ fp_dbzx <= fp_dbzx | (fp_dbzxe & iqentry_ares[head][13]);
+ fp_underx <= fp_underx | (fp_underxe & iqentry_ares[head][12]);
+ fp_overx <= fp_overx | (fp_overxe & iqentry_ares[head][11]);
+ //fp_giopx <= fp_giopx | (fp_giopxe & iqentry_res2[head][10]);
+ //fp_invopx <= fp_invopx | (fp_invopxe & iqentry_res2[head][24]);
+ //
+ fp_cvtx <= fp_cvtx | (fp_giopxe & iqentry_ares[head][7]);
+ fp_sqrtx <= fp_sqrtx | (fp_giopxe & iqentry_ares[head][6]);
+ fp_NaNCmpx <= fp_NaNCmpx | (fp_giopxe & iqentry_ares[head][5]);
+ fp_infzerox <= fp_infzerox | (fp_giopxe & iqentry_ares[head][4]);
+ fp_zerozerox <= fp_zerozerox | (fp_giopxe & iqentry_ares[head][3]);
+ fp_infdivx <= fp_infdivx | (fp_giopxe & iqentry_ares[head][2]);
+ fp_subinfx <= fp_subinfx | (fp_giopxe & iqentry_ares[head][1]);
+ fp_snanx <= fp_snanx | (fp_giopxe & iqentry_ares[head][0]);
+
+ end
+ endcase
+ default: ;
+ endcase
+ // Once the flow control instruction commits, NOP it out to allow
+ // pending stores to be issued.
+ iqentry_instr[head][5:0] <= `NOP;
+ end
+end
+endtask
+
+// CSR access tasks
+// This task does not work. Possibly because the always block @* doesn't
+// evaluate into the task to see which signals are changing. The following
+// code is simply included as an always block above.
+task read_csr;
+input [11:0] csrno;
+output [63:0] dat;
+input thread;
+begin
+`ifdef SUPPORT_SMT
+ if (csrno[11:10] >= ol[thread])
+`else
+ if (csrno[11:10] >= ol)
+`endif
+ casez(csrno[9:0])
+ `CSR_CR0: dat <= cr0;
+ `CSR_HARTID: dat <= hartid;
+ `CSR_TICK: dat <= tick;
+ `CSR_PCR: dat <= pcr;
+ `CSR_PCR2: dat <= pcr2;
+ `CSR_PMR: dat <= pmr;
+ `CSR_WBRCD: dat <= wbrcd;
+ `CSR_SEMA: dat <= sema;
+ `CSR_KEYS: dat <= keys;
+ `CSR_TCB: dat <= tcb;
+ `CSR_FSTAT: dat <= {fp_rgs,fp_status};
+`ifdef SUPPORT_DBG
+ `CSR_DBAD0: dat <= dbg_adr0;
+ `CSR_DBAD1: dat <= dbg_adr1;
+ `CSR_DBAD2: dat <= dbg_adr2;
+ `CSR_DBAD3: dat <= dbg_adr3;
+ `CSR_DBCTRL: dat <= dbg_ctrl;
+ `CSR_DBSTAT: dat <= dbg_stat;
+`endif
+ `CSR_CAS: dat <= cas;
+ `CSR_TVEC: dat <= tvec[csrno[2:0]];
+ `CSR_BADADR: dat <= badaddr[{thread,csrno[11:10]}];
+ `CSR_BADINSTR: dat <= bad_instr[{thread,csrno[11:10]}];
+ `CSR_CAUSE: dat <= {48'd0,cause[{thread,csrno[11:10]}]};
+`ifdef SUPPORT_SMT
+ `CSR_IM_STACK: dat <= im_stack[thread];
+ `CSR_OL_STACK: dat <= {dl_stack[thread],ol_stack[thread]};
+ `CSR_PL_STACK: dat <= pl_stack[thread];
+ `CSR_RS_STACK: dat <= rs_stack[thread];
+ `CSR_STATUS: dat <= mstatus[thread][63:0];
+ `CSR_EPC0: dat <= epc0[thread];
+ `CSR_EPC1: dat <= epc1[thread];
+ `CSR_EPC2: dat <= epc2[thread];
+ `CSR_EPC3: dat <= epc3[thread];
+ `CSR_EPC4: dat <= epc4[thread];
+ `CSR_EPC5: dat <= epc5[thread];
+ `CSR_EPC6: dat <= epc6[thread];
+ `CSR_EPC7: dat <= epc7[thread];
+`else
+ `CSR_IM_STACK: dat <= im_stack;
+ `CSR_OL_STACK: dat <= {dl_stack,ol_stack};
+ `CSR_PL_STACK: dat <= pl_stack;
+ `CSR_RS_STACK: dat <= rs_stack;
+ `CSR_STATUS: dat <= mstatus[63:0];
+ `CSR_EPC0: dat <= epc0;
+ `CSR_EPC1: dat <= epc1;
+ `CSR_EPC2: dat <= epc2;
+ `CSR_EPC3: dat <= epc3;
+ `CSR_EPC4: dat <= epc4;
+ `CSR_EPC5: dat <= epc5;
+ `CSR_EPC6: dat <= epc6;
+ `CSR_EPC7: dat <= epc7;
+`endif
+ `CSR_CODEBUF: dat <= codebuf[csrno[5:0]];
+`ifdef SUPPORT_BBMS
+ `CSR_TB: dat <= tb;
+ `CSR_CBL: dat <= cbl;
+ `CSR_CBU: dat <= cbu;
+ `CSR_RO: dat <= ro;
+ `CSR_DBL: dat <= dbl;
+ `CSR_DBU: dat <= dbu;
+ `CSR_SBL: dat <= sbl;
+ `CSR_SBU: dat <= sbu;
+ `CSR_ENU: dat <= en;
+`endif
+`ifdef SUPPORT_PREDICATION
+ `CSR_PREGS: read_pregs(dat);
+`endif
+ `CSR_Q_CTR: dat <= iq_ctr;
+ `CSR_BM_CTR: dat <= bm_ctr;
+ `CSR_ICL_CTR: dat <= icl_ctr;
+ `CSR_IRQ_CTR: dat <= irq_ctr;
+ `CSR_TIME: dat <= wc_times;
+ `CSR_INFO:
+ case(csrno[3:0])
+ 4'd0: dat <= "Finitron"; // manufacturer
+ 4'd1: dat <= " ";
+ 4'd2: dat <= "64 bit "; // CPU class
+ 4'd3: dat <= " ";
+ 4'd4: dat <= "FT64 "; // Name
+ 4'd5: dat <= " ";
+ 4'd6: dat <= 64'd1; // model #
+ 4'd7: dat <= 64'd1; // serial number
+ 4'd8: dat <= {32'd16384,32'd16384}; // cache sizes instruction,data
+ 4'd9: dat <= 64'd0;
+ default: dat <= 64'd0;
+ endcase
+ default: begin
+ $display("Unsupported CSR:%h",csrno[10:0]);
+ dat <= 64'hEEEEEEEEEEEEEEEE;
+ end
+ endcase
+ else
+ dat <= 64'h0;
+end
+endtask
+
+task write_csr;
+input [13:0] csrno;
+input [63:0] dat;
+input thread;
+begin
+`ifdef SUPPORT_SMT
+ if (csrno[11:10] >= ol[thread])
+`else
+ if (csrno[11:10] >= ol)
+`endif
+ case(csrno[13:12])
+ 2'd1: // CSRRW
+ casez(csrno[9:0])
+ `CSR_CR0: cr0 <= dat;
+ `CSR_PCR: pcr <= dat[31:0];
+ `CSR_PCR2: pcr2 <= dat;
+ `CSR_PMR: case(`NUM_IDU)
+ 0,1: pmr[0] <= 1'b1;
+ 2:
+ begin
+ if (dat[1:0]==2'b00)
+ pmr[1:0] <= 2'b01;
+ else
+ pmr[1:0] <= dat[1:0];
+ pmr[63:2] <= dat[63:2];
+ end
+ 3:
+ begin
+ if (dat[2:0]==3'b000)
+ pmr[2:0] <= 3'b001;
+ else
+ pmr[2:0] <= dat[2:0];
+ pmr[63:3] <= dat[63:3];
+ end
+ default: pmr[0] <= 1'b1;
+ endcase
+ `CSR_WBRCD: wbrcd <= dat;
+ `CSR_SEMA: sema <= dat;
+ `CSR_KEYS: keys <= dat;
+ `CSR_TCB: tcb <= dat;
+ `CSR_FSTAT: fpu_csr[37:32] <= dat[37:32];
+ `CSR_BADADR: badaddr[{thread,csrno[11:10]}] <= dat;
+ `CSR_BADINSTR: bad_instr[{thread,csrno[11:10]}] <= dat;
+ `CSR_CAUSE: cause[{thread,csrno[11:10]}] <= dat[15:0];
+`ifdef SUPPORT_DBG
+ `CSR_DBAD0: dbg_adr0 <= dat[AMSB:0];
+ `CSR_DBAD1: dbg_adr1 <= dat[AMSB:0];
+ `CSR_DBAD2: dbg_adr2 <= dat[AMSB:0];
+ `CSR_DBAD3: dbg_adr3 <= dat[AMSB:0];
+ `CSR_DBCTRL: dbg_ctrl <= dat;
+`endif
+ `CSR_CAS: cas <= dat;
+ `CSR_TVEC: tvec[csrno[2:0]] <= dat[31:0];
+`ifdef SUPPORT_SMT
+ `CSR_IM_STACK: im_stack[thread] <= dat[31:0];
+ `CSR_OL_STACK: begin
+ ol_stack[thread] <= dat[15:0];
+ dl_stack[thread] <= dat[31:16];
+ end
+ `CSR_PL_STACK: pl_stack[thread] <= dat;
+ `CSR_RS_STACK: rs_stack[thread] <= dat;
+ `CSR_STATUS: mstatus[thread][63:0] <= dat;
+ `CSR_EPC0: epc0[thread] <= dat;
+ `CSR_EPC1: epc1[thread] <= dat;
+ `CSR_EPC2: epc2[thread] <= dat;
+ `CSR_EPC3: epc3[thread] <= dat;
+ `CSR_EPC4: epc4[thread] <= dat;
+ `CSR_EPC5: epc5[thread] <= dat;
+ `CSR_EPC6: epc6[thread] <= dat;
+ `CSR_EPC7: epc7[thread] <= dat;
+`else
+ `CSR_IM_STACK: im_stack <= dat[31:0];
+ `CSR_OL_STACK: begin
+ ol_stack <= dat[15:0];
+ dl_stack <= dat[31:16];
+ end
+ `CSR_PL_STACK: pl_stack <= dat;
+ `CSR_RS_STACK: rs_stack <= dat;
+ `CSR_STATUS: mstatus[63:0] <= dat;
+ `CSR_EPC0: epc0 <= dat;
+ `CSR_EPC1: epc1 <= dat;
+ `CSR_EPC2: epc2 <= dat;
+ `CSR_EPC3: epc3 <= dat;
+ `CSR_EPC4: epc4 <= dat;
+ `CSR_EPC5: epc5 <= dat;
+ `CSR_EPC6: epc6 <= dat;
+ `CSR_EPC7: epc7 <= dat;
+`endif
+`ifdef SUPPORT_BBMS
+ `CSR_TB: prg_base[brgs] <= dat;
+ `CSR_CBL: cl_barrier[brgs] <= dat;
+ `CSR_CBU: cu_barrier[brgs] <= dat;
+ `CSR_RO: ro_barrier[brgs] <= dat;
+ `CSR_DBL: dl_barrier[brgs] <= dat;
+ `CSR_DBU: du_barrier[brgs] <= dat;
+ `CSR_SBL: sl_barrier[brgs] <= dat;
+ `CSR_SBU: su_barrier[brgs] <= dat;
+ `CSR_ENU: en_barrier[brgs] <= dat;
+`endif
+`ifdef SUPPORT_PREDICATION
+ `CSR_PREGS: write_pregs(dat);
+`endif
+ `CSR_TIME: begin
+ ld_time <= 6'h3f;
+ wc_time_dat <= dat;
+ end
+ `CSR_CODEBUF: codebuf[csrno[5:0]] <= dat;
+ default: ;
+ endcase
+ 2'd2: // CSRRS
+ case(csrno[9:0])
+ `CSR_CR0: cr0 <= cr0 | dat;
+ `CSR_PCR: pcr[31:0] <= pcr[31:0] | dat[31:0];
+ `CSR_PCR2: pcr2 <= pcr2 | dat;
+ `CSR_PMR: pmr <= pmr | dat;
+ `CSR_WBRCD: wbrcd <= wbrcd | dat;
+`ifdef SUPPORT_DBG
+ `CSR_DBCTRL: dbg_ctrl <= dbg_ctrl | dat;
+`endif
+ `CSR_SEMA: sema <= sema | dat;
+`ifdef SUPPORT_SMT
+ `CSR_STATUS: mstatus[thread][63:0] <= mstatus[thread][63:0] | dat;
+`else
+ `CSR_STATUS: mstatus[63:0] <= mstatus[63:0] | dat;
+`endif
+ default: ;
+ endcase
+ 2'd3: // CSRRC
+ case(csrno[9:0])
+ `CSR_CR0: cr0 <= cr0 & ~dat;
+ `CSR_PCR: pcr <= pcr & ~dat;
+ `CSR_PCR2: pcr2 <= pcr2 & ~dat;
+ `CSR_PMR: begin
+ if (dat[1:0]==2'b11)
+ pmr[1:0] <= 2'b01;
+ else
+ pmr[1:0] <= pmr[1:0] & ~dat[1:0];
+ pmr[63:2] <= pmr[63:2] & ~dat[63:2];
+ end
+ `CSR_WBRCD: wbrcd <= wbrcd & ~dat;
+`ifdef SUPPORT_DBG
+ `CSR_DBCTRL: dbg_ctrl <= dbg_ctrl & ~dat;
+`endif
+ `CSR_SEMA: sema <= sema & ~dat;
+`ifdef SUPPORT_SMT
+ `CSR_STATUS: mstatus[thread][63:0] <= mstatus[thread][63:0] & ~dat;
+`else
+ `CSR_STATUS: mstatus[63:0] <= mstatus[63:0] & ~dat;
+`endif
+ default: ;
+ endcase
+ default: ;
+ endcase
+end
+endtask
+
+task tDram0Issue;
+input [`QBITSP1] n;
+begin
+// dramA_v <= `INV;
+ dram0 <= `DRAMSLOT_BUSY;
+ dram0_id <= { 1'b1, n[`QBITS] };
+ dram0_instr <= iqentry_instr[n];
+ dram0_rmw <= iqentry_rmw[n];
+ dram0_preload <= iqentry_preload[n];
+ dram0_tgt <= iqentry_tgt[n];
+ dram0_data <= iqentry_a2[n];
+ dram0_addr <= iqentry_ma[n];
+ // if (ol[iqentry_thrd[n]]==`OL_USER)
+ // dram0_seg <= (iqentry_Ra[n]==5'd30 || iqentry_Ra[n]==5'd31) ? {ss[iqentry_thrd[n]],13'd0} : {ds[iqentry_thrd[n]],13'd0};
+ // else
+ dram0_unc <= iqentry_ma[n][31:20]==12'hFFD || !dce || iqentry_loadv[n];
+ dram0_memsize <= iqentry_memsz[n];
+ dram0_load <= iqentry_load[n];
+ dram0_store <= iqentry_store[n];
+`ifdef SUPPORT_SMT
+ dram0_ol <= (iqentry_Ra[n][4:0]==5'd31 || iqentry_Ra[n][4:0]==5'd30) ? ol[iqentry_thrd[n]] : dl[iqentry_thrd[n]];
+`else
+ dram0_ol <= (iqentry_Ra[n][4:0]==5'd31 || iqentry_Ra[n][4:0]==5'd30) ? ol : dl;
+`endif
+ // Once the memory op is issued reset the a1_v flag.
+ // This will cause the a1 bus to look for new data from memory (a1_s is pointed to a memory bus)
+ // This is used for the load and compare instructions.
+ // must reset the a1 source too.
+ //iqentry_a1_v[n] <= `INV;
+ iqentry_state[n] <= IQS_MEM;
+end
+endtask
+
+task tDram1Issue;
+input [`QBITSP1] n;
+begin
+ dramB_v <= `INV;
+ dram1 <= `DRAMSLOT_BUSY;
+ dram1_id <= { 1'b1, n[`QBITS] };
+ dram1_instr <= iqentry_instr[n];
+ dram1_rmw <= iqentry_rmw[n];
+ dram1_preload <= iqentry_preload[n];
+ dram1_tgt <= iqentry_tgt[n];
+ dram1_data <= iqentry_a2[n];
+ dram1_addr <= iqentry_ma[n];
+ // if (ol[iqentry_thrd[n]]==`OL_USER)
+ // dram1_seg <= (iqentry_Ra[n]==5'd30 || iqentry_Ra[n]==5'd31) ? {ss[iqentry_thrd[n]],13'd0} : {ds[iqentry_thrd[n]],13'd0};
+ // else
+ dram1_unc <= iqentry_ma[n][31:20]==12'hFFD || !dce || iqentry_loadv[n];
+ dram1_memsize <= iqentry_memsz[n];
+ dram1_load <= iqentry_load[n];
+ dram1_store <= iqentry_store[n];
+`ifdef SUPPORT_SMT
+ dram1_ol <= (iqentry_Ra[n][4:0]==5'd31 || iqentry_Ra[n][4:0]==5'd30) ? ol[iqentry_thrd[n]] : dl[iqentry_thrd[n]];
+`else
+ dram1_ol <= (iqentry_Ra[n][4:0]==5'd31 || iqentry_Ra[n][4:0]==5'd30) ? ol : dl;
+`endif
+ //iqentry_a1_v[n] <= `INV;
+ iqentry_state[n] <= IQS_MEM;
+end
+endtask
+
+task tDram2Issue;
+input [`QBITSP1] n;
+begin
+ dramC_v <= `INV;
+ dram2 <= `DRAMSLOT_BUSY;
+ dram2_id <= { 1'b1, n[`QBITS] };
+ dram2_instr <= iqentry_instr[n];
+ dram2_rmw <= iqentry_rmw[n];
+ dram2_preload <= iqentry_preload[n];
+ dram2_tgt <= iqentry_tgt[n];
+ dram2_data <= iqentry_a2[n];
+ dram2_addr <= iqentry_ma[n];
+ // if (ol[iqentry_thrd[n]]==`OL_USER)
+ // dram2_seg <= (iqentry_Ra[n]==5'd30 || iqentry_Ra[n]==5'd31) ? {ss[iqentry_thrd[n]],13'd0} : {ds[iqentry_thrd[n]],13'd0};
+ // else
+ dram2_unc <= iqentry_ma[n][31:20]==12'hFFD || !dce || iqentry_loadv[n];
+ dram2_memsize <= iqentry_memsz[n];
+ dram2_load <= iqentry_load[n];
+ dram2_store <= iqentry_store[n];
+`ifdef SUPPORT_SMT
+ dram2_ol <= (iqentry_Ra[n][4:0]==5'd31 || iqentry_Ra[n][4:0]==5'd30) ? ol[iqentry_thrd[n]] : dl[iqentry_thrd[n]];
+`else
+ dram2_ol <= (iqentry_Ra[n][4:0]==5'd31 || iqentry_Ra[n][4:0]==5'd30) ? ol : dl;
+`endif
+ //iqentry_a1_v[n] <= `INV;
+ iqentry_state[n] <= IQS_MEM;
+end
+endtask
+
+task wb_nack;
+begin
+ cti_o <= 3'b000;
+ bte_o <= 2'b00;
+ cyc <= `LOW;
+ stb_o <= `LOW;
+ we <= `LOW;
+ sel_o <= 8'h00;
+end
+endtask
+
+endmodule
+
+
+module decoder5 (num, out);
+input [4:0] num;
+output [31:1] out;
+reg [31:1] out;
+
+always @(num)
+case (num)
+ 5'd0 : out <= 31'b0000000000000000000000000000000;
+ 5'd1 : out <= 31'b0000000000000000000000000000001;
+ 5'd2 : out <= 31'b0000000000000000000000000000010;
+ 5'd3 : out <= 31'b0000000000000000000000000000100;
+ 5'd4 : out <= 31'b0000000000000000000000000001000;
+ 5'd5 : out <= 31'b0000000000000000000000000010000;
+ 5'd6 : out <= 31'b0000000000000000000000000100000;
+ 5'd7 : out <= 31'b0000000000000000000000001000000;
+ 5'd8 : out <= 31'b0000000000000000000000010000000;
+ 5'd9 : out <= 31'b0000000000000000000000100000000;
+ 5'd10: out <= 31'b0000000000000000000001000000000;
+ 5'd11: out <= 31'b0000000000000000000010000000000;
+ 5'd12: out <= 31'b0000000000000000000100000000000;
+ 5'd13: out <= 31'b0000000000000000001000000000000;
+ 5'd14: out <= 31'b0000000000000000010000000000000;
+ 5'd15: out <= 31'b0000000000000000100000000000000;
+ 5'd16: out <= 31'b0000000000000001000000000000000;
+ 5'd17: out <= 31'b0000000000000010000000000000000;
+ 5'd18: out <= 31'b0000000000000100000000000000000;
+ 5'd19: out <= 31'b0000000000001000000000000000000;
+ 5'd20: out <= 31'b0000000000010000000000000000000;
+ 5'd21: out <= 31'b0000000000100000000000000000000;
+ 5'd22: out <= 31'b0000000001000000000000000000000;
+ 5'd23: out <= 31'b0000000010000000000000000000000;
+ 5'd24: out <= 31'b0000000100000000000000000000000;
+ 5'd25: out <= 31'b0000001000000000000000000000000;
+ 5'd26: out <= 31'b0000010000000000000000000000000;
+ 5'd27: out <= 31'b0000100000000000000000000000000;
+ 5'd28: out <= 31'b0001000000000000000000000000000;
+ 5'd29: out <= 31'b0010000000000000000000000000000;
+ 5'd30: out <= 31'b0100000000000000000000000000000;
+ 5'd31: out <= 31'b1000000000000000000000000000000;
+endcase
+
+endmodule
+
+module decoder6 (num, out);
+input [5:0] num;
+output [63:1] out;
+
+wire [63:0] out1;
+
+assign out1 = 64'd1 << num;
+assign out = out1[63:1];
+
+endmodule
+
+module decoder7 (num, out);
+input [6:0] num;
+output [127:1] out;
+
+wire [127:0] out1;
+
+assign out1 = 128'd1 << num;
+assign out = out1[127:1];
+
+endmodule
+
+module decoder8 (num, out);
+input [7:0] num;
+output [255:1] out;
+
+wire [255:0] out1;
+
+assign out1 = 256'd1 << num;
+assign out = out1[255:1];
+
+endmodule
+
Index: FT64v7/rtl/twoway/FT64_BTB.v
===================================================================
--- FT64v7/rtl/twoway/FT64_BTB.v (nonexistent)
+++ FT64v7/rtl/twoway/FT64_BTB.v (revision 60)
@@ -0,0 +1,199 @@
+// ============================================================================
+// __
+// \\__/ o\ (C) 2017-2018 Robert Finch, Waterloo
+// \ __ / All rights reserved.
+// \/_// robfinch@finitron.ca
+// ||
+//
+// FT64_BTB.v
+//
+// This source file is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published
+// by the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This source file is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+//
+// ============================================================================
+//
+module FT64_BTB(rst, wclk,
+ wr0, wadr0, wdat0, valid0,
+ wr1, wadr1, wdat1, valid1,
+ wr2, wadr2, wdat2, valid2,
+ rclk, pcA, btgtA, pcB, btgtB,
+ pcC, btgtC, pcD, btgtD, pcE, btgtE, pcF, btgtF,
+ hitA, hitB, hitC, hitD, hitE, hitF,
+ npcA, npcB, npcC, npcD, npcE, npcF);
+parameter AMSB = 63;
+parameter RSTPC = 64'hFFFFFFFFFFFC0100;
+input rst;
+input wclk;
+input wr0;
+input [AMSB:0] wadr0;
+input [AMSB:0] wdat0;
+input valid0;
+input wr1;
+input [AMSB:0] wadr1;
+input [AMSB:0] wdat1;
+input valid1;
+input wr2;
+input [AMSB:0] wadr2;
+input [AMSB:0] wdat2;
+input valid2;
+input rclk;
+input [AMSB:0] pcA;
+output [AMSB:0] btgtA;
+input [AMSB:0] pcB;
+output [AMSB:0] btgtB;
+input [AMSB:0] pcC;
+output [AMSB:0] btgtC;
+input [AMSB:0] pcD;
+output [AMSB:0] btgtD;
+input [AMSB:0] pcE;
+output [AMSB:0] btgtE;
+input [AMSB:0] pcF;
+output [AMSB:0] btgtF;
+output hitA;
+output hitB;
+output hitC;
+output hitD;
+output hitE;
+output hitF;
+input [AMSB:0] npcA;
+input [AMSB:0] npcB;
+input [AMSB:0] npcC;
+input [AMSB:0] npcD;
+input [AMSB:0] npcE;
+input [AMSB:0] npcF;
+
+integer n;
+reg [AMSB:0] pcs [0:31];
+reg [AMSB:0] wdats [0:31];
+reg [AMSB:0] wdat;
+reg [4:0] pcstail,pcshead;
+reg [AMSB:0] pc;
+reg takb;
+reg wrhist;
+
+reg [(AMSB+1)*2+1:0] mem [0:1023];
+reg [9:0] radrA, radrB, radrC, radrD, radrE, radrF;
+initial begin
+ for (n = 0; n < 1024; n = n + 1)
+ mem[n] <= RSTPC;
+end
+
+always @(posedge wclk)
+if (rst)
+ pcstail <= 5'd0;
+else begin
+ case({wr0,wr1,wr2})
+ 3'b000: ;
+ 3'b001:
+ begin
+ pcs[pcstail] <= {wadr2[31:1],valid2};
+ wdats[pcstail] <= wdat2;
+ pcstail <= pcstail + 5'd1;
+ end
+ 3'b010:
+ begin
+ pcs[pcstail] <= {wadr1[31:1],valid1};
+ wdats[pcstail] <= wdat1;
+ pcstail <= pcstail + 5'd1;
+ end
+ 3'b011:
+ begin
+ pcs[pcstail] <= {wadr1[31:1],valid1};
+ pcs[pcstail+1] <= {wadr2[31:1],valid2};
+ wdats[pcstail] <= wdat1;
+ wdats[pcstail+1] <= wdat2;
+ pcstail <= pcstail + 5'd2;
+ end
+ 3'b100:
+ begin
+ pcs[pcstail] <= {wadr0[31:1],valid0};
+ wdats[pcstail] <= wdat0;
+ pcstail <= pcstail + 5'd1;
+ end
+ 3'b101:
+ begin
+ pcs[pcstail] <= {wadr0[31:1],valid0};
+ pcs[pcstail+1] <= {wadr2[31:1],valid2};
+ wdats[pcstail] <= wdat0;
+ wdats[pcstail+1] <= wdat2;
+ pcstail <= pcstail + 5'd2;
+ end
+ 3'b110:
+ begin
+ pcs[pcstail] <= {wadr0[31:1],valid0};
+ pcs[pcstail+1] <= {wadr1[31:1],valid1};
+ wdats[pcstail] <= wdat0;
+ wdats[pcstail+1] <= wdat1;
+ pcstail <= pcstail + 5'd2;
+ end
+ 3'b111:
+ begin
+ pcs[pcstail] <= {wadr0[31:1],valid0};
+ pcs[pcstail+1] <= {wadr1[31:1],valid1};
+ pcs[pcstail+2] <= {wadr2[31:1],valid2};
+ wdats[pcstail] <= wdat0;
+ wdats[pcstail+1] <= wdat1;
+ wdats[pcstail+2] <= wdat2;
+ pcstail <= pcstail + 5'd3;
+ end
+ endcase
+end
+
+always @(posedge wclk)
+if (rst)
+ pcshead <= 5'd0;
+else begin
+ wrhist <= 1'b0;
+ if (pcshead != pcstail) begin
+ pc <= pcs[pcshead];
+ takb <= pcs[pcshead][0];
+ wdat <= wdats[pcshead];
+ wrhist <= 1'b1;
+ pcshead <= pcshead + 5'd1;
+ end
+end
+
+
+always @(posedge wclk)
+begin
+ if (wrhist) #1 mem[pc[9:0]][AMSB:0] <= wdat;
+ if (wrhist) #1 mem[pc[9:0]][(AMSB+1)*2:AMSB+1] <= pc;
+ if (wrhist) #1 mem[pc[9:0]][(AMSB+1)*2+1] <= takb;
+end
+
+always @(posedge rclk)
+ #1 radrA <= pcA[11:2];
+always @(posedge rclk)
+ #1 radrB <= pcB[11:2];
+always @(posedge rclk)
+ #1 radrC <= pcC[11:2];
+always @(posedge rclk)
+ #1 radrD <= pcD[11:2];
+always @(posedge rclk)
+ #1 radrE <= pcE[11:2];
+always @(posedge rclk)
+ #1 radrF <= pcF[11:2];
+assign hitA = mem[radrA][(AMSB+1)*2:AMSB+1]==pcA && mem[radrA][(AMSB+1)*2+1];
+assign hitB = mem[radrB][(AMSB+1)*2:AMSB+1]==pcB && mem[radrB][(AMSB+1)*2+1];
+assign hitC = mem[radrC][(AMSB+1)*2:AMSB+1]==pcC && mem[radrC][(AMSB+1)*2+1];
+assign hitD = mem[radrD][(AMSB+1)*2:AMSB+1]==pcD && mem[radrD][(AMSB+1)*2+1];
+assign hitE = mem[radrE][(AMSB+1)*2:AMSB+1]==pcE && mem[radrE][(AMSB+1)*2+1];
+assign hitF = mem[radrF][(AMSB+1)*2:AMSB+1]==pcF && mem[radrF][(AMSB+1)*2+1];
+assign btgtA = hitA ? mem[radrA][AMSB:0] : npcA;
+assign btgtB = hitB ? mem[radrB][AMSB:0] : npcB;
+assign btgtC = hitC ? mem[radrC][AMSB:0] : npcC;
+assign btgtD = hitD ? mem[radrD][AMSB:0] : npcD;
+assign btgtE = hitE ? mem[radrE][AMSB:0] : npcE;
+assign btgtF = hitF ? mem[radrF][AMSB:0] : npcF;
+
+endmodule
Index: FT64v7/rtl/twoway/FT64_BranchPredictor.v
===================================================================
--- FT64v7/rtl/twoway/FT64_BranchPredictor.v (nonexistent)
+++ FT64v7/rtl/twoway/FT64_BranchPredictor.v (revision 60)
@@ -0,0 +1,193 @@
+//=============================================================================
+// __
+// \\__/ o\ (C) 2013-2018 Robert Finch, Waterloo
+// \ __ / All rights reserved.
+// \/_// robfinch@finitron.ca
+// ||
+//
+// FT64_BranchPredictor.v
+//
+//
+// This source file is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published
+// by the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This source file is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+//
+//
+//=============================================================================
+//
+module FT64_BranchPredictor(rst, clk, en,
+ xisBranch0, xisBranch1, xisBranch2,
+ pcA, pcB, pcC, pcD, pcE, pcF, xpc0, xpc1, xpc2, takb0, takb1, takb2,
+ predict_takenA, predict_takenB, predict_takenC, predict_takenD,
+ predict_takenE, predict_takenF);
+parameter AMSB=63;
+parameter DBW=32;
+input rst;
+input clk;
+input en;
+input xisBranch0;
+input xisBranch1;
+input xisBranch2;
+input [AMSB:0] pcA;
+input [AMSB:0] pcB;
+input [AMSB:0] pcC;
+input [AMSB:0] pcD;
+input [AMSB:0] pcE;
+input [AMSB:0] pcF;
+input [AMSB:0] xpc0;
+input [AMSB:0] xpc1;
+input [AMSB:0] xpc2;
+input takb0;
+input takb1;
+input takb2;
+output predict_takenA;
+output predict_takenB;
+output predict_takenC;
+output predict_takenD;
+output predict_takenE;
+output predict_takenF;
+
+integer n;
+reg [AMSB:0] pcs [0:31];
+reg [AMSB:0] pc;
+reg takb;
+reg [4:0] pcshead,pcstail;
+reg wrhist;
+reg [2:0] gbl_branch_hist;
+reg [1:0] branch_history_table [511:0];
+// For simulation only, initialize the history table to zeros.
+// In the real world we don't care.
+initial begin
+ gbl_branch_hist = 3'b000;
+ for (n = 0; n < 512; n = n + 1)
+ branch_history_table[n] = 3;
+end
+wire [8:0] bht_wa = {pc[7:1],gbl_branch_hist[2:1]}; // write address
+wire [8:0] bht_raA = {pcA[7:1],gbl_branch_hist[2:1]}; // read address (IF stage)
+wire [8:0] bht_raB = {pcB[7:1],gbl_branch_hist[2:1]}; // read address (IF stage)
+wire [8:0] bht_raC = {pcC[7:1],gbl_branch_hist[2:1]}; // read address (IF stage)
+wire [8:0] bht_raD = {pcD[7:1],gbl_branch_hist[2:1]}; // read address (IF stage)
+wire [8:0] bht_raE = {pcE[7:1],gbl_branch_hist[2:1]}; // read address (IF stage)
+wire [8:0] bht_raF = {pcF[7:1],gbl_branch_hist[2:1]}; // read address (IF stage)
+wire [1:0] bht_xbits = branch_history_table[bht_wa];
+wire [1:0] bht_ibitsA = branch_history_table[bht_raA];
+wire [1:0] bht_ibitsB = branch_history_table[bht_raB];
+wire [1:0] bht_ibitsC = branch_history_table[bht_raC];
+wire [1:0] bht_ibitsD = branch_history_table[bht_raD];
+wire [1:0] bht_ibitsE = branch_history_table[bht_raE];
+wire [1:0] bht_ibitsF = branch_history_table[bht_raF];
+assign predict_takenA = (bht_ibitsA==2'd0 || bht_ibitsA==2'd1) && en;
+assign predict_takenB = (bht_ibitsB==2'd0 || bht_ibitsB==2'd1) && en;
+assign predict_takenC = (bht_ibitsC==2'd0 || bht_ibitsC==2'd1) && en;
+assign predict_takenD = (bht_ibitsD==2'd0 || bht_ibitsD==2'd1) && en;
+assign predict_takenE = (bht_ibitsE==2'd0 || bht_ibitsE==2'd1) && en;
+assign predict_takenF = (bht_ibitsF==2'd0 || bht_ibitsF==2'd1) && en;
+
+always @(posedge clk)
+if (rst)
+ pcstail <= 5'd0;
+else begin
+ case({xisBranch0,xisBranch1,xisBranch2})
+ 3'b000: ;
+ 3'b001:
+ begin
+ pcs[pcstail] <= {xpc2[31:1],takb2};
+ pcstail <= pcstail + 5'd1;
+ end
+ 3'b010:
+ begin
+ pcs[pcstail] <= {xpc1[31:1],takb1};
+ pcstail <= pcstail + 5'd1;
+ end
+ 3'b011:
+ begin
+ pcs[pcstail] <= {xpc1[31:1],takb1};
+ pcs[pcstail+1] <= {xpc2[31:1],takb2};
+ pcstail <= pcstail + 5'd2;
+ end
+ 3'b100:
+ begin
+ pcs[pcstail] <= {xpc0[31:1],takb0};
+ pcstail <= pcstail + 5'd1;
+ end
+ 3'b101:
+ begin
+ pcs[pcstail] <= {xpc0[31:1],takb0};
+ pcs[pcstail+1] <= {xpc2[31:1],takb2};
+ pcstail <= pcstail + 5'd2;
+ end
+ 3'b110:
+ begin
+ pcs[pcstail] <= {xpc0[31:1],takb0};
+ pcs[pcstail+1] <= {xpc1[31:1],takb1};
+ pcstail <= pcstail + 5'd2;
+ end
+ 3'b111:
+ begin
+ pcs[pcstail] <= {xpc0[31:1],takb0};
+ pcs[pcstail+1] <= {xpc1[31:1],takb1};
+ pcs[pcstail+2] <= {xpc2[31:1],takb2};
+ pcstail <= pcstail + 5'd3;
+ end
+ endcase
+end
+
+always @(posedge clk)
+if (rst)
+ pcshead <= 5'd0;
+else begin
+ wrhist <= 1'b0;
+ if (pcshead != pcstail) begin
+ pc <= pcs[pcshead];
+ takb <= pcs[pcshead][0];
+ wrhist <= 1'b1;
+ pcshead <= pcshead + 5'd1;
+ end
+end
+
+// Two bit saturating counter
+// If taking a branch in commit0 then a following branch
+// in commit1 is never encountered. So only update for
+// commit1 if commit0 is not taken.
+reg [1:0] xbits_new;
+always @*
+if (wrhist) begin
+ if (takb) begin
+ if (bht_xbits != 2'd1)
+ xbits_new <= bht_xbits + 2'd1;
+ else
+ xbits_new <= bht_xbits;
+ end
+ else begin
+ if (bht_xbits != 2'd2)
+ xbits_new <= bht_xbits - 2'd1;
+ else
+ xbits_new <= bht_xbits;
+ end
+end
+else
+ xbits_new <= bht_xbits;
+
+always @(posedge clk)
+if (rst)
+ gbl_branch_hist <= 3'b000;
+else begin
+ if (en) begin
+ if (wrhist) begin
+ gbl_branch_hist <= {gbl_branch_hist[1:0],takb};
+ branch_history_table[bht_wa] <= xbits_new;
+ end
+ end
+end
+
+endmodule
+
Index: FT64v7/rtl/twoway/FT64_TLB.v
===================================================================
--- FT64v7/rtl/twoway/FT64_TLB.v (nonexistent)
+++ FT64v7/rtl/twoway/FT64_TLB.v (revision 60)
@@ -0,0 +1,622 @@
+`include "FT64_defines.vh"
+`include "FT64_config.vh"
+//=============================================================================
+// __
+// \\__/ o\ (C) 2011-2018 Robert Finch, Waterloo
+// \ __ / All rights reserved.
+// \/_// robfinch@finitron.ca
+// ||
+//
+// FT64_TLB.v
+//
+//
+// This source file is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published
+// by the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This source file is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+//
+//
+// TLB
+// The TLB contains 256 entries, that are 16 way set associative.
+// The TLB is shared between the instruction and data streams.
+//
+//=============================================================================
+//
+`define TLBMissPage {DBW-13{1'b1}}
+
+module FT64_TLB(rst, clk, ld, done, idle, ol,
+ ASID, op, regno, dati, dato,
+ uncached,
+ icl_i, cyc_i, we_i, vadr_i, cyc_o, we_o, padr_o,
+ wrv_o, rdv_o, exv_o,
+ TLBMiss, HTLBVirtPageo);
+parameter DBW=64;
+parameter ABW=32;
+parameter ENTRIES=256;
+parameter IDLE = 4'd0;
+parameter ONE = 4'd1;
+parameter TWO = 4'd2;
+parameter READ = 4'd1;
+parameter INC1 = 4'd2;
+parameter INC2 = 4'd3;
+parameter INC3 = 4'd4;
+parameter AGE1 = 4'd5;
+parameter AGE2 = 4'd6;
+input rst;
+input clk;
+input ld;
+output done;
+output idle;
+input [1:0] ol; // operating level
+input [ABW-1:0] vadr_i;
+output reg [ABW-1:0] padr_o;
+output uncached;
+
+input icl_i;
+input cyc_i;
+input we_i;
+output reg cyc_o;
+output reg we_o;
+output reg exv_o;
+output reg wrv_o;
+output reg rdv_o;
+input [7:0] ASID;
+input [3:0] op;
+input [3:0] regno;
+input [DBW-1:0] dati;
+output reg [DBW-1:0] dato;
+output TLBMiss;
+output [DBW-1:0] HTLBVirtPageo;
+
+integer n;
+
+reg [3:0] state;
+assign done = state==(IDLE && !ld) || state==TWO;
+assign idle = state==IDLE && !ld;
+
+// Holding registers
+// These allow the TLB to updated in a single cycle as a unit
+reg [DBW-1:0] HTLBVirtPage;
+assign HTLBVirtPageo = {HTLBVirtPage,13'b0};
+reg [DBW-1:0] HTLBPhysPage;
+reg [7:0] HTLBASID;
+reg HTLBG;
+reg HTLBD;
+reg HTLBR, HTLBW, HTLBX, HTLBA, HTLBU, HTLBS;
+reg [2:0] HTLBC;
+reg [7:0] HTLBPL;
+reg [2:0] HTLBPageSize;
+reg HTLBValid;
+reg [ABW-1:0] miss_addr;
+
+reg TLBenabled;
+reg [7:0] i;
+reg [DBW-1:0] Index;
+reg [3:0] Random;
+reg [3:0] Wired;
+reg [2:0] PageSize;
+reg [15:0] Match;
+
+reg [4:0] q;
+wire doddpage;
+reg [DBW-1:0] TLBVirtPage [ENTRIES-1:0];
+reg [ENTRIES-1:0] TLBG;
+reg [ENTRIES-1:0] TLBD;
+reg [ENTRIES-1:0] TLBU;
+reg [ENTRIES-1:0] TLBS;
+reg [ENTRIES-1:0] TLBA;
+reg [2:0] TLBC [ENTRIES-1:0];
+reg [7:0] TLBASID [ENTRIES-1:0];
+reg [7:0] TLBPL [ENTRIES-1:0];
+reg [2:0] TLBPageSize [255:0];
+reg [ENTRIES-1:0] TLBValid;
+reg [DBW-1:0] imiss_addr;
+reg [DBW-1:0] dmiss_addr;
+reg [DBW-1:0] PageTblAddr;
+reg [DBW-1:0] PageTblCtrl;
+
+reg [23:0] age_lmt;
+reg [23:0] age_ctr;
+wire age_tick = age_ctr < 24'd5;
+reg cyc_en, age_en;
+reg [3:0] ar_state;
+reg ar_wr;
+reg [7:0] age_adr, ar_adr;
+reg [32:0] count;
+reg [31:0] ar_dati;
+wire [31:0] ar_dato;
+reg [31:0] ar_cdato;
+reg getset_age;
+reg doLoad;
+
+/*
+initial begin
+ for (n = 0; n < ENTRIES; n = n + 1)
+ begin
+ TLBVirtPage[n] = 0;
+ TLBG[n] = 0;
+ TLBASID[n] = 0;
+ TLBD[n] = 0;
+ TLBC[n] = 0;
+ TLBA[n] = 0;
+ TLBR[n] = 0;
+ TLBW[n] = 0;
+ TLBX[n] = 0;
+ TLBS[n] = 0;
+ TLBU[n] = 0;
+ TLBValid[n] = 0;
+ end
+end
+*/
+
+// Assume the instruction doesn't overlap between a mapped and unmapped area.
+wire unmappedArea = vadr_i[ABW-1:ABW-8]==8'hFF || !TLBenabled;
+wire m1UnmappedArea = padr_o[ABW-1:ABW-8]==8'hFF || !TLBenabled;
+wire hitIOPage = vadr_i[ABW-1:ABW-12]==12'hFFD;
+
+always @(posedge clk)
+ PageSize <= TLBPageSize[ASID];
+
+wire [ABW-1:0] vadrs = vadr_i[ABW-1:13] >> {PageSize,1'b0};
+wire [DBW-1:0] TLBPhysPage_rdo;
+wire [ABW-1:0] PFN;
+
+// Toolset didn't like the simpler distributed code where the RAM was inferred.
+// Resulted in combinatorial loop error message. Even though there weren't any
+// combinatorial loops.
+
+TLBPhysPageRam #(DBW) upgrm1
+(
+ .clk(clk),
+ .we(state==TWO && (op==`TLB_WR || op==`TLB_WI)),
+ .wa(i),
+ .i(HTLBPhysPage),
+ .ra0(i),
+ .ra1({q[3:0],vadrs[3:0]}),
+ .o0(TLBPhysPage_rdo),
+ .o1(PFN)
+);
+
+wire tlbRo0,tlbRo1;
+TLBRam #(1) uR
+(
+ .clk(clk),
+ .we(state==TWO && (op==`TLB_WR || op==`TLB_WI)),
+ .wa(i),
+ .i(HTLBR),
+ .ra0(i),
+ .ra1({q[3:0],vadrs[3:0]}),
+ .o0(tlbRo0),
+ .o1(tlbRo1)
+);
+
+wire tlbWo0,tlbWo1;
+TLBRam #(1) uW
+(
+ .clk(clk),
+ .we(state==TWO && (op==`TLB_WR || op==`TLB_WI)),
+ .wa(i),
+ .i(HTLBW),
+ .ra0(i),
+ .ra1({q[3:0],vadrs[3:0]}),
+ .o0(tlbWo0),
+ .o1(tlbWo1)
+);
+
+wire tlbXo0,tlbXo1;
+TLBRam #(1) uX
+(
+ .clk(clk),
+ .we(state==TWO && (op==`TLB_WR || op==`TLB_WI)),
+ .wa(i),
+ .i(HTLBX),
+ .ra0(i),
+ .ra1({q[3:0],vadrs[3:0]}),
+ .o0(tlbXo0),
+ .o1(tlbXo1)
+);
+
+always @(posedge clk)
+if (rst) begin
+ age_ctr <= 24'd0;
+end
+else begin
+ if (age_ctr==24'd0)
+ age_ctr <= age_lmt;
+ else
+ age_ctr <= age_ctr - 4'd1;
+end
+
+// Handle Random register
+always @(posedge clk)
+if (rst) begin
+ Random <= 4'hF;
+end
+else begin
+ if (Random==Wired)
+ Random <= 4'hF;
+ else
+ Random <= Random - 4'd1;
+ // Why would we want to update since random changes on the next clock
+ // anyways ?
+ if (state==ONE) begin
+ if (op==`TLB_WRREG && regno==`TLBRandom)
+ Random <= dati[3:0];
+ end
+end
+
+always @(posedge clk)
+if (rst) begin
+ state <= IDLE;
+end
+else begin
+case(state)
+IDLE:
+ if (ld)
+ state <= ONE;
+ONE:
+ if (op==`TLB_RDAGE || op==`TLB_WRAGE) begin
+ if (getset_age)
+ state <= TWO;
+ end
+ else
+ state <= TWO;
+TWO:
+ state <= IDLE;
+default:
+ state <= IDLE;
+endcase
+end
+
+// Set index to page table
+always @(posedge clk)
+if (rst) begin
+ i <= 8'd0;
+end
+else begin
+ if (state==ONE) begin
+ case(op)
+ `TLB_RD,`TLB_WI:
+ i <= {Index[7:4],(HTLBVirtPage >> {HTLBPageSize,1'b0}) & 4'hF};
+ `TLB_WR:
+ i <= {Random,(HTLBVirtPage >> {HTLBPageSize,1'b0}) & 4'hF};
+ endcase
+ end
+end
+
+always @(posedge clk)
+if (rst) begin
+ TLBenabled <= 1'b0;
+ Wired <= 4'd0;
+ PageTblAddr <= {DBW{1'b0}};
+ PageTblCtrl <= {DBW{1'b0}};
+ age_lmt <= 24'd20000;
+end
+else begin
+ if (miss_addr == {DBW{1'b0}} && TLBMiss)
+ miss_addr <= vadr_i;
+
+ if (state==ONE) begin
+ case(op)
+ `TLB_WRREG:
+ begin
+ case(regno)
+ `TLBWired: Wired <= dati[2:0];
+ `TLBIndex: Index <= dati[5:0];
+ //`TLBPageSize: PageSize <= dati[2:0];
+ `TLBVirtPage: HTLBVirtPage <= dati;
+ `TLBPhysPage: HTLBPhysPage <= dati;
+ `TLBASID: begin
+ HTLBValid <= |dati[2:0];
+ HTLBX <= dati[0];
+ HTLBW <= dati[1];
+ HTLBR <= dati[2];
+ HTLBC <= dati[5:3];
+ HTLBA <= dati[6];
+ HTLBS <= dati[7];
+ HTLBU <= dati[8];
+ HTLBD <= dati[9];
+ HTLBG <= dati[10];
+ HTLBPageSize <= dati[13:11];
+ HTLBASID <= dati[23:16];
+ HTLBPL <= dati[31:24];
+ end
+ `TLBMissAdr: miss_addr <= dati;
+ `TLBPageTblAddr: PageTblAddr <= dati;
+ `TLBPageTblCtrl: PageTblCtrl <= dati;
+ `TLBAFC: age_lmt <= dati[23:0];
+ default: ;
+ endcase
+ end
+ `TLB_EN:
+ TLBenabled <= 1'b1;
+ `TLB_DIS:
+ TLBenabled <= 1'b0;
+ `TLB_INVALL:
+ TLBValid <= 256'd0;
+ default: ;
+ endcase
+ end
+ else if (state==TWO) begin
+ case(op)
+ `TLB_P:
+ begin
+ Index[DBW-1] <= ~|Match;
+ end
+ `TLB_RD:
+ begin
+ HTLBVirtPage <= TLBVirtPage[i];
+ HTLBPhysPage <= TLBPhysPage_rdo;
+ HTLBASID <= TLBASID[i];
+ HTLBPL <= TLBPL[i];
+ HTLBPageSize <= TLBPageSize[i];
+ HTLBG <= TLBG[i];
+ HTLBD <= TLBD[i];
+ HTLBC <= TLBC[i];
+ HTLBR <= tlbRo0;
+ HTLBW <= tlbWo0;
+ HTLBX <= tlbXo0;
+ HTLBU <= TLBU[i];
+ HTLBS <= TLBS[i];
+ HTLBA <= TLBA[i];
+ HTLBValid <= TLBValid[i];
+ end
+ `TLB_WR,`TLB_WI:
+ begin
+ TLBVirtPage[i] <= HTLBVirtPage;
+ TLBASID[i] <= HTLBASID;
+ TLBPL[i] <= HTLBPL;
+ TLBPageSize[i] <= HTLBPageSize;
+ TLBG[i] <= HTLBG;
+ TLBD[i] <= HTLBD;
+ TLBC[i] <= HTLBC;
+ TLBA[i] <= HTLBA;
+ TLBU[i] <= HTLBU;
+ TLBS[i] <= HTLBS;
+ TLBValid[i] <= HTLBValid;
+ end
+ default: ;
+ endcase
+ end
+
+ // Set the dirty bit on a store
+ if (we_i)
+ if (!m1UnmappedArea & !q[4]) begin
+ TLBD[{q[3:0],vadrs[3:0]}] <= 1'b1;
+ end
+end
+
+always @(posedge clk)
+ case(regno)
+ `TLBWired: dato <= Wired;
+ `TLBIndex: dato <= Index;
+ `TLBRandom: dato <= Random;
+ `TLBPhysPage: dato <= HTLBPhysPage;
+ `TLBVirtPage: dato <= HTLBVirtPage;
+ `TLBPageSize: dato <= PageSize;
+ `TLBASID: begin
+ dato <= {DBW{1'b0}};
+ dato[0] <= HTLBX;
+ dato[1] <= HTLBW;
+ dato[2] <= HTLBR;
+ dato[5:3] <= HTLBC;
+ dato[6] <= HTLBA;
+ dato[7] <= HTLBS;
+ dato[8] <= HTLBU;
+ dato[9] <= HTLBD;
+ dato[10] <= HTLBG;
+ dato[13:11] <= HTLBPageSize;
+ dato[23:16] <= HTLBASID;
+ dato[31:24] <= HTLBPL;
+ end
+ `TLBMissAdr: dato <= miss_addr;
+ `TLBPageTblAddr: dato <= PageTblAddr;
+ `TLBPageTblCtrl: dato <= PageTblCtrl;
+ `TLBPageCount: dato <= {16'd0,ar_cdato};
+ default: dato <= {DBW{1'b0}};
+ endcase
+
+TLBAgeRam uar1(clk,ar_wr,ar_adr,ar_dati,ar_dato);
+
+always @(posedge clk)
+if (rst) begin
+ age_adr <= 4'd0;
+ ar_wr <= 1'b0;
+ ar_adr <= 4'd0;
+ ar_state <= IDLE;
+ cyc_en <= 1'b1;
+ age_en <= 1'b1;
+ doLoad <= 1'b0;
+end
+else begin
+ar_wr <= 1'b0;
+getset_age <= 1'b0;
+if (ld)
+ doLoad <= 1'b1;
+case(ar_state)
+IDLE:
+ begin
+ if (~cyc_i)
+ cyc_en <= 1'b1;
+ if (~age_tick)
+ age_en <= 1'b1;
+ if ((ld|doLoad) && (op==`TLB_RDAGE || op==`TLB_WRAGE)) begin
+ doLoad <= 1'b0;
+ ar_wr <= op==`TLB_WRAGE;
+ ar_adr <= i;
+ ar_dati <= dati[31:0];
+ ar_state <= READ;
+ end
+ else if (cyc_i & |Match & cyc_en) begin
+ cyc_en <= 1'b0;
+ ar_adr <= {q[3:0],vadrs[3:0]};
+ ar_state <= INC1;
+ end
+ else if (age_tick & age_en) begin
+ age_en <= 1'b0;
+ ar_adr <= age_adr;
+ age_adr <= age_adr + 4'd1;
+ ar_state <= AGE1;
+ end
+ end
+READ:
+ begin
+ getset_age <= 1'b1;
+ ar_cdato <= ar_dato;
+ ar_state <= IDLE;
+ end
+INC1:
+ begin
+ count <= ar_dato;
+ ar_state <= INC2;
+ end
+INC2:
+ begin
+ count <= {count[31:8] + 4'd1,count[7:0]};
+ ar_state <= INC3;
+ end
+INC3:
+ begin
+ ar_wr <= 1'b1;
+ ar_dati <= {count[32] ? 24'hFFFFFF :count[31:8],count[7:0]};
+ ar_state <= IDLE;
+ end
+AGE1:
+ begin
+ count <= ar_dato;
+ ar_state <= AGE2;
+ end
+AGE2:
+ begin
+ ar_wr <= 1'b1;
+ ar_dati <= count >> 1;
+ ar_state <= IDLE;
+ end
+endcase
+end
+
+always @*
+for (n = 0; n < 16; n = n + 1)
+ Match[n[3:0]] = (vadrs[ABW-1:4]==TLBVirtPage[{n[3:0],vadrs[3:0]}]) &&
+ ((TLBASID[{n[3:0],vadrs[3:0]}]==ASID) || TLBG[{n[3:0],vadrs[3:0]}]) &&
+ TLBValid[{q[3:0],vadrs[3:0]}];
+
+always @*
+begin
+ q = 5'd31;
+ for (n = 15; n >= 0; n = n - 1)
+ if (Match[n]) q = n;
+end
+
+assign uncached = TLBC[{q[3:0],vadrs[3:0]}]==3'd1;// || unmappedDataArea;
+
+assign TLBMiss = TLBenabled & (!unmappedArea & (q[4] | ~TLBValid[{q[3:0],vadrs[3:0]}]) ||
+ (ol!=2'b00 && hitIOPage));
+
+always @(posedge clk)
+ cyc_o <= cyc_i & (~TLBMiss | ~TLBenabled);
+
+always @(posedge clk)
+ we_o <= we_i & ((~TLBMiss & tlbWo1) | ~TLBenabled);
+
+always @(posedge clk)
+ wrv_o <= we_i & ~TLBMiss & ~tlbWo1 & TLBenabled;
+
+always @(posedge clk)
+ rdv_o <= ~we_i & ~TLBMiss & ~tlbRo1 & TLBenabled;
+
+always @(posedge clk)
+ exv_o <= icl_i & ~TLBMiss & ~tlbXo1 & TLBenabled;
+
+always @(posedge clk)
+if (rst)
+ padr_o <= 32'hFFFC0100;
+else begin
+if (TLBenabled && ol != 2'b00) begin
+ case(PageSize)
+ 3'd0: padr_o[ABW-1:13] <= unmappedArea ? vadr_i[ABW-1:13] : TLBMiss ? `TLBMissPage: PFN;
+ 3'd1: padr_o[ABW-1:13] <= {unmappedArea ? vadr_i[ABW-1:15] : TLBMiss ? `TLBMissPage: PFN,vadr_i[14:13]};
+ 3'd2: padr_o[ABW-1:13] <= {unmappedArea ? vadr_i[ABW-1:17] : TLBMiss ? `TLBMissPage: PFN,vadr_i[16:13]};
+ 3'd3: padr_o[ABW-1:13] <= {unmappedArea ? vadr_i[ABW-1:19] : TLBMiss ? `TLBMissPage: PFN,vadr_i[18:13]};
+ 3'd4: padr_o[ABW-1:13] <= {unmappedArea ? vadr_i[ABW-1:21] : TLBMiss ? `TLBMissPage: PFN,vadr_i[20:13]};
+ 3'd5: padr_o[ABW-1:13] <= {unmappedArea ? vadr_i[ABW-1:23] : TLBMiss ? `TLBMissPage: PFN,vadr_i[22:13]};
+ default: padr_o[ABW-1:13] <= vadr_i[ABW-1:13];
+ endcase
+ padr_o[12:0] <= vadr_i[12:0];
+end
+else
+ padr_o <= vadr_i;
+end
+
+endmodule
+
+module TLBRam(clk,we,wa,i,ra0,ra1,o0,o1);
+parameter DBW=1;
+input clk;
+input we;
+input [7:0] wa;
+input [DBW-1:0] i;
+input [7:0] ra0;
+input [7:0] ra1;
+output [DBW-1:0] o0;
+output [DBW-1:0] o1;
+
+reg [DBW-1:0] mem [0:255];
+
+always @(posedge clk)
+ if (we)
+ mem[wa] <= i;
+
+assign o0 = mem[ra0];
+assign o1 = mem[ra1];
+
+endmodule
+
+module TLBPhysPageRam(clk,we,wa,i,ra0,ra1,o0,o1);
+parameter DBW=64;
+input clk;
+input we;
+input [7:0] wa;
+input [DBW-1:0] i;
+input [7:0] ra0;
+input [7:0] ra1;
+output [DBW-1:0] o0;
+output [DBW-1:0] o1;
+
+reg [DBW-1:0] mem [0:255];
+
+always @(posedge clk)
+ if (we)
+ mem[wa] <= i;
+
+assign o0 = mem[ra0];
+assign o1 = mem[ra1];
+
+endmodule
+
+module TLBAgeRam(clk,we,a,i,o);
+parameter DBW=32;
+input clk;
+input we;
+input [7:0] a;
+input [DBW-1:0] i;
+output [DBW-1:0] o;
+
+reg [DBW-1:0] mem [0:255];
+
+always @(posedge clk)
+ if (we)
+ mem[a] <= i;
+
+assign o = mem[a];
+
+endmodule
+
Index: FT64v7/rtl/twoway/FT64_fetchbuf.v
===================================================================
--- FT64v7/rtl/twoway/FT64_fetchbuf.v (nonexistent)
+++ FT64v7/rtl/twoway/FT64_fetchbuf.v (revision 60)
@@ -0,0 +1,1200 @@
+// ============================================================================
+// __
+// \\__/ o\ (C) 2017-2018 Robert Finch, Waterloo
+// \ __ / All rights reserved.
+// \/_// robfinch@finitron.ca
+// ||
+//
+// FT64_fetchbuf.v
+//
+// This source file is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published
+// by the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This source file is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+//
+// ============================================================================
+//
+`include "FT64_config.vh"
+`include "FT64_defines.vh"
+
+// FETCH
+//
+// fetch exactly two instructions from memory into the fetch buffer
+// unless either one of the buffers is still full, in which case we
+// do nothing (kinda like alpha approach)
+// Like to turn this into an independent module at some point.
+//
+module FT64_fetchbuf(rst, clk4x, clk, fcu_clk,
+ cs_i, cyc_i, stb_i, ack_o, we_i, adr_i, dat_i,
+ cmpgrp,
+ freezePC, thread_en,
+ regLR,
+ insn0, insn1, phit,
+ threadx,
+ branchmiss, misspc, branchmiss_thrd, predict_taken0, predict_taken1,
+ predict_takenA, predict_takenB, predict_takenC, predict_takenD,
+ queued1, queued2, queuedNop,
+ pc0, pc1, fetchbuf, fetchbufA_v, fetchbufB_v, fetchbufC_v, fetchbufD_v,
+ fetchbufA_instr, fetchbufA_pc,
+ fetchbufB_instr, fetchbufB_pc,
+ fetchbufC_instr, fetchbufC_pc,
+ fetchbufD_instr, fetchbufD_pc,
+ fetchbuf0_instr, fetchbuf1_instr, fetchbuf0_insln, fetchbuf1_insln,
+ fetchbuf0_thrd, fetchbuf1_thrd,
+ fetchbuf0_pc, fetchbuf1_pc,
+ fetchbuf0_v, fetchbuf1_v,
+ codebuf0, codebuf1,
+ btgtA, btgtB, btgtC, btgtD,
+ nop_fetchbuf,
+ take_branch0, take_branch1,
+ stompedRets,
+ panic
+);
+parameter AMSB = `AMSB;
+parameter RSTPC = 64'hFFFC0100;
+parameter TRUE = 1'b1;
+parameter FALSE = 1'b0;
+input rst;
+input clk4x;
+input clk;
+input fcu_clk;
+input cs_i;
+input cyc_i;
+input stb_i;
+output ack_o;
+input we_i;
+input [15:0] adr_i;
+input [47:0] dat_i;
+input [2:0] cmpgrp;
+input freezePC;
+input thread_en;
+input [4:0] regLR;
+input [47:0] insn0;
+input [47:0] insn1;
+input phit;
+output threadx;
+input branchmiss;
+input [AMSB:0] misspc;
+input branchmiss_thrd;
+output predict_taken0;
+output predict_taken1;
+input predict_takenA;
+input predict_takenB;
+input predict_takenC;
+input predict_takenD;
+input queued1;
+input queued2;
+input queuedNop;
+output reg [AMSB:0] pc0;
+output reg [AMSB:0] pc1;
+output reg fetchbuf;
+output reg fetchbufA_v;
+output reg fetchbufB_v;
+output reg fetchbufC_v;
+output reg fetchbufD_v;
+output fetchbuf0_thrd;
+output fetchbuf1_thrd;
+output reg [47:0] fetchbufA_instr;
+output reg [47:0] fetchbufB_instr;
+output reg [47:0] fetchbufC_instr;
+output reg [47:0] fetchbufD_instr;
+output reg [AMSB:0] fetchbufA_pc;
+output reg [AMSB:0] fetchbufB_pc;
+output reg [AMSB:0] fetchbufC_pc;
+output reg [AMSB:0] fetchbufD_pc;
+output [47:0] fetchbuf0_instr;
+output [47:0] fetchbuf1_instr;
+output [AMSB:0] fetchbuf0_pc;
+output [AMSB:0] fetchbuf1_pc;
+output [2:0] fetchbuf0_insln;
+output [2:0] fetchbuf1_insln;
+output fetchbuf0_v;
+output fetchbuf1_v;
+input [47:0] codebuf0;
+input [47:0] codebuf1;
+input [AMSB:0] btgtA;
+input [AMSB:0] btgtB;
+input [AMSB:0] btgtC;
+input [AMSB:0] btgtD;
+input [3:0] nop_fetchbuf;
+output take_branch0;
+output take_branch1;
+input [3:0] stompedRets;
+output reg [3:0] panic;
+integer n;
+
+//`include "FT64_decode.vh"
+
+function IsBranch;
+input [47:0] isn;
+casex(isn[`INSTRUCTION_OP])
+`Bcc: IsBranch = TRUE;
+`BBc: IsBranch = TRUE;
+`BEQI: IsBranch = TRUE;
+`BCHK: IsBranch = TRUE;
+default: IsBranch = FALSE;
+endcase
+endfunction
+
+function IsJAL;
+input [47:0] isn;
+IsJAL = isn[`INSTRUCTION_OP]==`JAL;
+endfunction
+
+function IsJmp;
+input [47:0] isn;
+IsJmp = isn[`INSTRUCTION_OP]==`JMP;
+endfunction
+
+function IsCall;
+input [47:0] isn;
+IsCall = isn[`INSTRUCTION_OP]==`CALL;
+endfunction
+
+function IsRet;
+input [47:0] isn;
+IsRet = isn[`INSTRUCTION_OP]==`RET;
+endfunction
+
+function IsBrk;
+input [47:0] isn;
+IsBrk = isn[`INSTRUCTION_OP]==`BRK;
+endfunction
+
+function IsRTI;
+input [47:0] isn;
+IsRTI = isn[`INSTRUCTION_OP]==`R2 && isn[`INSTRUCTION_S2]==`RTI;
+endfunction
+
+
+function [2:0] fnInsLength;
+input [47:0] ins;
+`ifdef SUPPORT_DCI
+if (ins[`INSTRUCTION_OP]==`CMPRSSD)
+ fnInsLength = 3'd2;
+else
+`endif
+ case(ins[7:6])
+ 2'd0: fnInsLength = 3'd4;
+ 2'd1: fnInsLength = 3'd6;
+ default: fnInsLength = 3'd2;
+ endcase
+endfunction
+
+wire [2:0] fetchbufA_inslen;
+wire [2:0] fetchbufB_inslen;
+wire [2:0] fetchbufC_inslen;
+wire [2:0] fetchbufD_inslen;
+FT64_InsLength uilA (fetchbufA_instr, fetchbufA_inslen);
+FT64_InsLength uilB (fetchbufB_instr, fetchbufB_inslen);
+FT64_InsLength uilC (fetchbufC_instr, fetchbufC_inslen);
+FT64_InsLength uilD (fetchbufD_instr, fetchbufD_inslen);
+
+wire [47:0] xinsn0;
+wire [47:0] xinsn1;
+
+FT64_iexpander ux1
+(
+ .cinstr(insn0[15:0]),
+ .expand(xinsn0)
+);
+FT64_iexpander ux2
+(
+ .cinstr(insn1[15:0]),
+ .expand(xinsn1)
+);
+
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+// Table of decompressed instructions.
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+assign ack_o = cs_i & cyc_i & stb_i;
+`ifdef SUPPORT_DCI
+reg [47:0] DecompressTable [0:2047];
+always @(posedge clk)
+ if (cs_i & cyc_i & stb_i & we_i)
+ DecompressTable[adr_i[12:3]] <= dat_i[47:0];
+wire [47:0] expand0 = DecompressTable[{cmpgrp,insn0[15:8]}];
+wire [47:0] expand1 = DecompressTable[{cmpgrp,insn1[15:8]}];
+`endif
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+reg thread;
+reg stompedRet;
+reg ret0Counted, ret1Counted;
+wire [AMSB:0] retpc0, retpc1;
+
+reg did_branchback0;
+reg did_branchback1;
+
+assign predict_taken0 = (fetchbuf==1'b0) ? predict_takenA : predict_takenC;
+assign predict_taken1 = (fetchbuf==1'b0) ? predict_takenB : predict_takenD;
+
+reg [AMSB:0] branch_pcA;
+reg [AMSB:0] branch_pcB;
+reg [AMSB:0] branch_pcC;
+reg [AMSB:0] branch_pcD;
+
+always @*
+case(fetchbufA_instr[`INSTRUCTION_OP])
+`RET: branch_pcA = retpc0;
+`JMP,`CALL: branch_pcA = fetchbufA_instr[6] ? {fetchbufA_instr[39:8],1'b0} : {fetchbufA_pc[31:25],fetchbufA_instr[31:8],1'b0};
+`R2: branch_pcA = btgtA; // RTI
+`BRK,`JAL: branch_pcA = btgtA;
+default:
+ begin
+ branch_pcA[31:8] = fetchbufA_pc[31:8] +
+ (fetchbufA_instr[7:6]==2'b01 ? {{4{fetchbufA_instr[47]}},fetchbufA_instr[47:28]} : {{20{fetchbufA_instr[31]}},fetchbufA_instr[31:28]});
+ branch_pcA[7:0] = {fetchbufA_instr[27:23],fetchbufA_instr[17:16],1'b0};
+ branch_pcA[63:32] = fetchbufA_pc[63:32];
+ end
+endcase
+
+always @*
+case(fetchbufB_instr[`INSTRUCTION_OP])
+`RET: branch_pcB = retpc1;
+`JMP,`CALL: branch_pcB = fetchbufB_instr[6] ? {fetchbufB_instr[39:8],1'b0} : {fetchbufB_pc[31:25],fetchbufB_instr[31:8],1'b0};
+`R2: branch_pcB = btgtB; // RTI
+`BRK,`JAL: branch_pcB = btgtB;
+default:
+ begin
+ branch_pcB[31:8] = fetchbufB_pc[31:8] +
+ (fetchbufB_instr[7:6]==2'b01 ? {{4{fetchbufB_instr[47]}},fetchbufB_instr[47:28]} : {{20{fetchbufB_instr[31]}},fetchbufB_instr[31:28]});
+ branch_pcB[7:0] = {fetchbufB_instr[27:23],fetchbufB_instr[17:16],1'b0};
+ branch_pcB[63:32] = fetchbufB_pc[63:32];
+ end
+endcase
+
+always @*
+case(fetchbufC_instr[`INSTRUCTION_OP])
+`RET: branch_pcC = retpc0;
+`JMP,`CALL: branch_pcC = fetchbufC_instr[6] ? {fetchbufC_instr[39:8],1'b0} : {fetchbufC_pc[31:25],fetchbufC_instr[31:8],1'b0};
+`R2: branch_pcC = btgtC; // RTI
+`BRK,`JAL: branch_pcC = btgtC;
+default:
+ begin
+ branch_pcC[31:8] = fetchbufC_pc[31:8] +
+ (fetchbufC_instr[7:6]==2'b01 ? {{4{fetchbufC_instr[47]}},fetchbufC_instr[47:28]} : {{20{fetchbufC_instr[31]}},fetchbufC_instr[31:28]});
+ branch_pcC[7:0] = {fetchbufC_instr[27:23],fetchbufC_instr[17:16],1'b0};
+ branch_pcC[63:32] = fetchbufC_pc[63:32];
+ end
+endcase
+
+always @*
+case(fetchbufD_instr[`INSTRUCTION_OP])
+`RET: branch_pcD = retpc1;
+`JMP,`CALL: branch_pcD = fetchbufD_instr[6] ? {fetchbufD_instr[39:8],1'b0} : {fetchbufD_pc[31:25],fetchbufD_instr[31:8],1'b0};
+`R2: branch_pcD = btgtD; // RTI
+`BRK,`JAL: branch_pcD = btgtD;
+default:
+ begin
+ branch_pcD[31:8] = fetchbufD_pc[31:8] +
+ (fetchbufD_instr[7:6]==2'b01 ? {{4{fetchbufD_instr[47]}},fetchbufD_instr[47:28]} : {{20{fetchbufD_instr[31]}},fetchbufD_instr[31:28]});
+ branch_pcD[7:0] = {fetchbufD_instr[27:23],fetchbufD_instr[17:16],1'b0};
+ branch_pcD[63:32] = fetchbufD_pc[63:32];
+ end
+endcase
+
+wire take_branchA = ({fetchbufA_v, IsBranch(fetchbufA_instr), predict_takenA} == {`VAL, `TRUE, `TRUE}) ||
+ ((IsRet(fetchbufA_instr)||IsJmp(fetchbufA_instr)||IsCall(fetchbufA_instr)||
+ IsRTI(fetchbufA_instr)|| IsBrk(fetchbufA_instr) || IsJAL(fetchbufA_instr)) &&
+ fetchbufA_v);
+wire take_branchB = ({fetchbufB_v, IsBranch(fetchbufB_instr), predict_takenB} == {`VAL, `TRUE, `TRUE}) ||
+ ((IsRet(fetchbufB_instr)|IsJmp(fetchbufB_instr)|IsCall(fetchbufB_instr) ||
+ IsRTI(fetchbufB_instr)|| IsBrk(fetchbufB_instr) || IsJAL(fetchbufB_instr)) &&
+ fetchbufB_v);
+wire take_branchC = ({fetchbufC_v, IsBranch(fetchbufC_instr), predict_takenC} == {`VAL, `TRUE, `TRUE}) ||
+ ((IsRet(fetchbufC_instr)|IsJmp(fetchbufC_instr)|IsCall(fetchbufC_instr) ||
+ IsRTI(fetchbufC_instr)|| IsBrk(fetchbufC_instr) || IsJAL(fetchbufC_instr)) &&
+ fetchbufC_v);
+wire take_branchD = ({fetchbufD_v, IsBranch(fetchbufD_instr), predict_takenD} == {`VAL, `TRUE, `TRUE}) ||
+ ((IsRet(fetchbufD_instr)|IsJmp(fetchbufD_instr)|IsCall(fetchbufD_instr) ||
+ IsRTI(fetchbufD_instr)|| IsBrk(fetchbufD_instr) || IsJAL(fetchbufD_instr)) &&
+ fetchbufD_v);
+
+assign take_branch0 = fetchbuf==1'b0 ? take_branchA : take_branchC;
+assign take_branch1 = fetchbuf==1'b0 ? take_branchB : take_branchD;
+wire take_branch = take_branch0 || take_branch1;
+/*
+always @*
+begin
+ pc0 <= thread_en ? (fetchbuf ? pc0b : pc0a) : pc0a;
+ pc1 <= thread_en ? (fetchbuf ? pc1b : pc1a) : pc1a;
+end
+*/
+assign threadx = fetchbuf;
+
+`ifdef FCU_ENH
+FT64_RSB #(AMSB) ursb1
+(
+ .rst(rst),
+ .clk(fcu_clk),
+ .regLR(regLR),
+ .queued1(queued1),
+ .queued2(queued2),
+ .fetchbuf0_v(fetchbuf0_v),
+ .fetchbuf0_pc(fetchbuf0_pc),
+ .fetchbuf0_instr(fetchbuf0_instr),
+ .fetchbuf1_v(fetchbuf1_v),
+ .fetchbuf1_pc(fetchbuf1_pc),
+ .fetchbuf1_instr(fetchbuf1_instr),
+ .stompedRets(stompedRets),
+ .stompedRet(stompedRet),
+ .pc(retpc0)
+);
+
+FT64_RSB #(AMSB) ursb2
+(
+ .rst(rst),
+ .clk(fcu_clk),
+ .regLR(regLR),
+ .queued1(queued1),
+ .queued2(1'b0),
+ .fetchbuf0_v(fetchbuf1_v),
+ .fetchbuf0_pc(fetchbuf1_pc),
+ .fetchbuf0_instr(fetchbuf1_instr),
+ .fetchbuf1_v(1'b0),
+ .fetchbuf1_pc(32'h00000000),
+ .fetchbuf1_instr(`NOP_INSN),
+ .stompedRets(stompedRets[3:1]),
+ .stompedRet(stompedRet),
+ .pc(retpc1)
+);
+`else
+assign retpc0 = RSTPC;
+assign retpc1 = RSTPC;
+`endif
+
+wire peclk, neclk;
+edge_det ued1 (.rst(rst), .clk(clk4x), .ce(1'b1), .i(clk), .pe(peclk), .ne(neclk), .ee());
+
+always @(posedge clk)
+if (rst) begin
+ pc0 <= RSTPC;
+ pc1 <= RSTPC;
+ fetchbufA_v <= 0;
+ fetchbufB_v <= 0;
+ fetchbufC_v <= 0;
+ fetchbufD_v <= 0;
+ fetchbuf <= 0;
+ panic <= `PANIC_NONE;
+end
+else begin
+
+ did_branchback0 <= take_branch0;
+ did_branchback1 <= take_branch1;
+
+ stompedRet = FALSE;
+
+ begin
+
+ // On a branch miss with threading enabled all fectch buffers are
+ // invalidated even though the data in the fetch buffer would be valid
+ // for the thread that isn't in a branchmiss state. This is done to
+ // keep things simple. For the thread that doesn't miss the current
+ // data for the fetch buffer needs to be retrieved again, so the pc
+ // for that thread is assigned the current fetchbuf pc.
+ // For the thread that misses the pc is simply assigned the misspc.
+ if (branchmiss) begin
+ $display("***********");
+ $display("Branch miss");
+ $display("***********");
+ if (branchmiss_thrd) begin
+ pc1 <= misspc;
+ fetchbufB_v <= `INV;
+ fetchbufD_v <= `INV;
+ end
+ else begin
+ pc0 <= misspc;
+ if (thread_en) begin
+ fetchbufA_v <= `INV;
+ fetchbufC_v <= `INV;
+ end
+ else begin
+ fetchbufA_v <= `INV;
+ fetchbufB_v <= `INV;
+ fetchbufC_v <= `INV;
+ fetchbufD_v <= `INV;
+ fetchbuf <= 1'b0;
+ end
+ end
+ $display("********************");
+ $display("********************");
+ $display("********************");
+ $display("Branch miss");
+ $display("misspc=%h", misspc);
+ $display("********************");
+ $display("********************");
+ $display("********************");
+ end
+ else if (take_branch) begin
+
+ // update the fetchbuf valid bits as well as fetchbuf itself
+ // ... this must be based on which things are backwards branches, how many things
+ // will get enqueued (0, 1, or 2), and how old the instructions are
+ if (fetchbuf == 1'b0) case ({fetchbufA_v, fetchbufB_v, fetchbufC_v, fetchbufD_v})
+
+ 4'b0000: ; // do nothing
+ 4'b0001: if (thread_en) FetchC();
+ 4'b0010: if (thread_en) FetchD();
+ 4'b0011: ;
+ 4'b0100 :
+ begin
+ if (thread_en) begin
+ FetchC();
+ pc1 <= branch_pcB;
+ end
+ else
+ pc0 <= branch_pcB;
+ fetchbufB_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbuf <= fetchbuf + (queued1|queuedNop);
+ end
+ 4'b0101:
+ begin
+ if (thread_en) begin
+ pc1 <= branch_pcB;
+ FetchC();
+ end
+ else
+ pc0 <= branch_pcB;
+ fetchbufD_v <= `INV;
+ fetchbufB_v <= !(queued1|queuedNop);
+ end
+ 4'b0110:
+ begin
+ if (thread_en)
+ pc1 <= branch_pcB;
+ else begin
+ pc0 <= branch_pcB;
+ fetchbufC_v <= `INV;
+ end
+ fetchbufB_v <= !(queued1|queuedNop);
+ end
+ 4'b0111:
+ begin
+ if (thread_en) begin
+ pc1 <= branch_pcB;
+ fetchbufD_v <= `INV;
+ end
+ else begin
+ pc0 <= branch_pcB;
+ fetchbufC_v <= `INV;
+ fetchbufD_v <= `INV;
+ end
+ fetchbufB_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbuf <= fetchbuf + (queued1|queuedNop);
+ end
+ 4'b1000 :
+ begin
+ if (thread_en) FetchD();
+ pc0 <= branch_pcA;
+ fetchbufA_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbuf <= fetchbuf + (queued1|queuedNop);
+ end
+ 4'b1001:
+ begin
+ pc0 <= branch_pcA;
+ if (!thread_en)
+ fetchbufD_v <= `INV;
+ fetchbufA_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbuf <= fetchbuf + (queued1|queuedNop);
+ end
+ 4'b1010:
+ begin
+ pc0 <= branch_pcA;
+ fetchbufC_v <= `INV;
+ if (thread_en) FetchD();
+ fetchbufA_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbuf <= fetchbuf + (queued1|queuedNop);
+ end
+ 4'b1011:
+ begin
+ pc0 <= branch_pcA;
+ fetchbufC_v <= `INV;
+ if (!thread_en)
+ fetchbufD_v <= `INV;
+ fetchbufA_v <=!(queued1|queuedNop); // if it can be queued, it will
+ fetchbuf <= fetchbuf + (queued1|queuedNop);
+ end
+ 4'b1100:
+ if (thread_en) begin
+ if (take_branchA && take_branchB) begin
+ pc0 <= branch_pcA;
+ pc1 <= branch_pcB;
+ fetchbufA_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbufB_v <= !(queued2|queuedNop); // if it can be queued, it will
+ if ((queued2|queuedNop)) fetchbuf <= 1'b1;
+ end
+ else if (take_branchA) begin
+ FetchD();
+ pc0 <= branch_pcA;
+ fetchbufA_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbufB_v <= !(queued2|queuedNop); // if it can be queued, it will
+ if ((queued2|queuedNop)) fetchbuf <= 1'b1;
+ end
+ else if (take_branchB) begin
+ FetchC();
+ pc1 <= branch_pcB;
+ fetchbufA_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbufB_v <= !(queued2|queuedNop); // if it can be queued, it will
+ if ((queued2|queuedNop)) fetchbuf <= 1'b1;
+ end
+ end
+ else begin
+ if (take_branchA) begin
+ pc0 <= branch_pcA;
+ fetchbufA_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbufB_v <= `INV;
+ if ((queued1|queuedNop)) fetchbuf <= 1'b1;
+ end
+ else if (take_branchB) begin
+ pc0 <= branch_pcB;
+ fetchbufA_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbufB_v <= !(queued2|queuedNop); // if it can be queued, it will
+ if ((queued2|queuedNop)) fetchbuf <= 1'b1;
+ end
+ // else hardware error
+ end
+ 4'b1101:
+ if (thread_en) begin
+ if (take_branchA && take_branchB) begin
+ pc0 <= branch_pcA;
+ pc1 <= branch_pcB;
+ fetchbufD_v <= `INV;
+ fetchbufA_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbufB_v <= !(queued2|queuedNop); // if it can be queued, it will
+ if ((queued2|queuedNop)) fetchbuf <= 1'b1;
+ end
+ else if (take_branchA) begin
+ pc0 <= branch_pcA;
+ fetchbufA_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbufB_v <= !(queued2|queuedNop); // if it can be queued, it will
+ if ((queued2|queuedNop)) fetchbuf <= 1'b1;
+ end
+ else if (take_branchB) begin
+ FetchC();
+ pc1 <= branch_pcB;
+ fetchbufD_v <= `INV;
+ fetchbufA_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbufB_v <= !(queued2|queuedNop); // if it can be queued, it will
+ if ((queued2|queuedNop)) fetchbuf <= 1'b1;
+ end
+ end
+ else begin
+ fetchbufD_v <= `INV;
+ if (take_branchA) begin
+ pc0 <= branch_pcA;
+ fetchbufA_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbufB_v <= `INV;
+ if ((queued1|queuedNop)) fetchbuf <= 1'b1;
+ end
+ else if (take_branchB) begin
+ pc0 <= branch_pcB;
+ fetchbufA_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbufB_v <= !(queued2|queuedNop); // if it can be queued, it will
+ if ((queued2|queuedNop)) fetchbuf <= 1'b1;
+ end
+ // else hardware error
+ end
+ 4'b1110:
+ if (thread_en) begin
+ if (take_branchA && take_branchB) begin
+ pc0 <= branch_pcA;
+ pc1 <= branch_pcB;
+ fetchbufC_v <= `INV;
+ fetchbufA_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbufB_v <= !(queued2|queuedNop); // if it can be queued, it will
+ if ((queued2|queuedNop)) fetchbuf <= 1'b1;
+ end
+ else if (take_branchA) begin
+ FetchD();
+ pc0 <= branch_pcA;
+ fetchbufC_v <= `INV;
+ fetchbufA_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbufB_v <= !(queued2|queuedNop); // if it can be queued, it will
+ if ((queued2|queuedNop)) fetchbuf <= 1'b1;
+ end
+ else if (take_branchB) begin
+ pc1 <= branch_pcB;
+ fetchbufA_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbufB_v <= !(queued2|queuedNop); // if it can be queued, it will
+ if ((queued2|queuedNop)) fetchbuf <= 1'b1;
+ end
+ end
+ else begin
+ fetchbufC_v <= `INV;
+ if (take_branchA) begin
+ pc0 <= branch_pcA;
+ fetchbufA_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbufB_v <= `INV;
+ if ((queued1|queuedNop)) fetchbuf <= 1'b1;
+ end
+ else if (take_branchB) begin
+ pc0 <= branch_pcB;
+ fetchbufA_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbufB_v <= !(queued2|queuedNop); // if it can be queued, it will
+ if ((queued2|queuedNop)) fetchbuf <= 1'b1;
+ end
+ // else hardware error
+ end
+ 4'b1111:
+ begin
+ if (thread_en) begin
+ if (take_branchA & take_branchB) begin
+ pc0 <= branch_pcA;
+ pc1 <= branch_pcB;
+ fetchbufC_v <= `INV;
+ fetchbufD_v <= `INV;
+ fetchbufA_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbufB_v <= !(queued2|queuedNop); // if it can be queued, it will
+ fetchbuf <= fetchbuf + (queued2|queuedNop);
+ end
+ else if (take_branchA) begin
+ pc0 <= branch_pcA;
+ fetchbufC_v <= `INV;
+ fetchbufA_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbufB_v <= !(queued2|queuedNop); // if it can be queued, it will
+ fetchbuf <= fetchbuf + (queued2|queuedNop);
+ end
+ else if (take_branchB) begin
+ pc1 <= branch_pcB;
+ fetchbufD_v <= `INV;
+ fetchbufA_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbufB_v <= !(queued2|queuedNop); // if it can be queued, it will
+ fetchbuf <= fetchbuf + (queued2|queuedNop);
+ end
+ end
+ else begin
+ if (take_branchA) begin
+ pc0 <= branch_pcA;
+ fetchbufB_v <= `INV;
+ fetchbufC_v <= `INV;
+ fetchbufD_v <= `INV;
+ fetchbufA_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbuf <= fetchbuf + (queued1|queuedNop);
+ end
+ else if (take_branchB) begin
+ pc0 <= branch_pcB;
+ fetchbufC_v <= `INV;
+ fetchbufD_v <= `INV;
+ fetchbufA_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbufB_v <= !(queued2|queuedNop); // if it can be queued, it will
+ fetchbuf <= fetchbuf + (queued2|queuedNop);
+ end
+ end
+ end
+ default: ;
+ endcase
+ else case ({fetchbufC_v, fetchbufD_v, fetchbufA_v, fetchbufB_v})
+
+ 4'b0000: ; // do nothing
+ 4'b0001: if (thread_en) FetchA();
+ 4'b0010: if (thread_en) FetchB();
+ 4'b0011: ;
+ 4'b0100 :
+ begin
+ if (thread_en) begin
+ FetchA();
+ pc1 <= branch_pcD;
+ end
+ else
+ pc0 <= branch_pcD;
+ fetchbufD_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbuf <= fetchbuf + (queued1|queuedNop);
+ end
+ 4'b0101:
+ begin
+ if (thread_en) begin
+ pc1 <= branch_pcD;
+ FetchA();
+ end
+ else
+ pc0 <= branch_pcD;
+ fetchbufB_v <= `INV;
+ fetchbufD_v <= !(queued1|queuedNop);
+ end
+ 4'b0110:
+ begin
+ if (thread_en)
+ pc1 <= branch_pcD;
+ else begin
+ pc0 <= branch_pcD;
+ fetchbufA_v <= `INV;
+ end
+ fetchbufD_v <= !(queued1|queuedNop);
+ end
+ 4'b0111:
+ begin
+ if (thread_en) begin
+ pc1 <= branch_pcD;
+ fetchbufB_v <= `INV;
+ end
+ else begin
+ pc0 <= branch_pcD;
+ fetchbufA_v <= `INV;
+ fetchbufB_v <= `INV;
+ end
+ fetchbufD_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbuf <= fetchbuf + (queued1|queuedNop);
+ end
+ 4'b1000 :
+ begin
+ if (thread_en) FetchB();
+ pc0 <= branch_pcC;
+ fetchbufC_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbuf <= fetchbuf + (queued1|queuedNop);
+ end
+ 4'b1001:
+ begin
+ pc0 <= branch_pcC;
+ if (!thread_en)
+ fetchbufB_v <= `INV;
+ fetchbufC_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbuf <= fetchbuf + (queued1|queuedNop);
+ end
+ 4'b1010:
+ begin
+ pc0 <= branch_pcC;
+ fetchbufA_v <= `INV;
+ if (thread_en) FetchB();
+ fetchbufC_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbuf <= fetchbuf + (queued1|queuedNop);
+ end
+ 4'b1011:
+ begin
+ pc0 <= branch_pcC;
+ fetchbufA_v <= `INV;
+ if (!thread_en)
+ fetchbufB_v <= `INV;
+ fetchbufC_v <=!(queued1|queuedNop); // if it can be queued, it will
+ fetchbuf <= fetchbuf + (queued1|queuedNop);
+ end
+ 4'b1100:
+ if (thread_en) begin
+ if (take_branchC && take_branchD) begin
+ pc0 <= branch_pcC;
+ pc1 <= branch_pcD;
+ fetchbufC_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbufD_v <= !(queued2|queuedNop); // if it can be queued, it will
+ if ((queued2|queuedNop)) fetchbuf <= 1'b1;
+ end
+ else if (take_branchC) begin
+ FetchB();
+ pc0 <= branch_pcC;
+ fetchbufC_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbufD_v <= !(queued2|queuedNop); // if it can be queued, it will
+ if ((queued2|queuedNop)) fetchbuf <= 1'b1;
+ end
+ else if (take_branchD) begin
+ FetchA();
+ pc1 <= branch_pcD;
+ fetchbufC_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbufD_v <= !(queued2|queuedNop); // if it can be queued, it will
+ if ((queued2|queuedNop)) fetchbuf <= 1'b1;
+ end
+ end
+ else begin
+ if (take_branchC) begin
+ pc0 <= branch_pcC;
+ fetchbufC_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbufD_v <= `INV;
+ if ((queued1|queuedNop)) fetchbuf <= 1'b1;
+ end
+ else if (take_branchD) begin
+ pc0 <= branch_pcD;
+ fetchbufC_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbufD_v <= !(queued2|queuedNop); // if it can be queued, it will
+ if ((queued2|queuedNop)) fetchbuf <= 1'b1;
+ end
+ // else hardware error
+ end
+ 4'b1101:
+ if (thread_en) begin
+ if (take_branchC && take_branchD) begin
+ pc0 <= branch_pcC;
+ pc1 <= branch_pcD;
+ fetchbufB_v <= `INV;
+ fetchbufC_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbufD_v <= !(queued2|queuedNop); // if it can be queued, it will
+ if ((queued2|queuedNop)) fetchbuf <= 1'b1;
+ end
+ else if (take_branchC) begin
+ pc0 <= branch_pcC;
+ fetchbufC_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbufD_v <= !(queued2|queuedNop); // if it can be queued, it will
+ if ((queued2|queuedNop)) fetchbuf <= 1'b1;
+ end
+ else if (take_branchC) begin
+ FetchA();
+ pc1 <= branch_pcD;
+ fetchbufB_v <= `INV;
+ fetchbufC_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbufD_v <= !(queued2|queuedNop); // if it can be queued, it will
+ if ((queued2|queuedNop)) fetchbuf <= 1'b1;
+ end
+ end
+ else begin
+ fetchbufB_v <= `INV;
+ if (take_branchC) begin
+ pc0 <= branch_pcC;
+ fetchbufC_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbufD_v <= `INV;
+ if ((queued1|queuedNop)) fetchbuf <= 1'b1;
+ end
+ else if (take_branchD) begin
+ pc0 <= branch_pcD;
+ fetchbufC_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbufD_v <= !(queued2|queuedNop); // if it can be queued, it will
+ if ((queued2|queuedNop)) fetchbuf <= 1'b1;
+ end
+ // else hardware error
+ end
+ 4'b1110:
+ if (thread_en) begin
+ if (take_branchC && take_branchD) begin
+ pc0 <= branch_pcC;
+ pc1 <= branch_pcD;
+ fetchbufA_v <= `INV;
+ fetchbufC_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbufD_v <= !(queued2|queuedNop); // if it can be queued, it will
+ if ((queued2|queuedNop)) fetchbuf <= 1'b1;
+ end
+ else if (take_branchC) begin
+ FetchB();
+ pc0 <= branch_pcC;
+ fetchbufA_v <= `INV;
+ fetchbufC_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbufD_v <= !(queued2|queuedNop); // if it can be queued, it will
+ if ((queued2|queuedNop)) fetchbuf <= 1'b1;
+ end
+ else if (take_branchD) begin
+ pc1 <= branch_pcD;
+ fetchbufC_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbufD_v <= !(queued2|queuedNop); // if it can be queued, it will
+ if ((queued2|queuedNop)) fetchbuf <= 1'b1;
+ end
+ end
+ else begin
+ fetchbufA_v <= `INV;
+ if (take_branchC) begin
+ pc0 <= branch_pcC;
+ fetchbufC_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbufD_v <= `INV;
+ if ((queued1|queuedNop)) fetchbuf <= 1'b1;
+ end
+ else if (take_branchD) begin
+ pc0 <= branch_pcD;
+ fetchbufC_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbufD_v <= !(queued2|queuedNop); // if it can be queued, it will
+ if ((queued2|queuedNop)) fetchbuf <= 1'b1;
+ end
+ // else hardware error
+ end
+ 4'b1111:
+ begin
+ if (thread_en) begin
+ if (take_branchC & take_branchD) begin
+ pc0 <= branch_pcC;
+ pc1 <= branch_pcD;
+ fetchbufA_v <= `INV;
+ fetchbufB_v <= `INV;
+ fetchbufC_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbufD_v <= !(queued2|queuedNop); // if it can be queued, it will
+ fetchbuf <= fetchbuf + (queued2|queuedNop);
+ end
+ else if (take_branchC) begin
+ pc0 <= branch_pcD;
+ fetchbufA_v <= `INV;
+ fetchbufC_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbufD_v <= !(queued2|queuedNop); // if it can be queued, it will
+ fetchbuf <= fetchbuf + (queued2|queuedNop);
+ end
+ else if (take_branchD) begin
+ pc1 <= branch_pcD;
+ fetchbufB_v <= `INV;
+ fetchbufC_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbufD_v <= !(queued2|queuedNop); // if it can be queued, it will
+ fetchbuf <= fetchbuf + (queued2|queuedNop);
+ end
+ end
+ else begin
+ if (take_branchC) begin
+ pc0 <= branch_pcC;
+ fetchbufD_v <= `INV;
+ fetchbufA_v <= `INV;
+ fetchbufB_v <= `INV;
+ fetchbufC_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbuf <= fetchbuf + (queued1|queuedNop);
+ end
+ else if (take_branchD) begin
+ pc0 <= branch_pcD;
+ fetchbufA_v <= `INV;
+ fetchbufB_v <= `INV;
+ fetchbufC_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbufD_v <= !(queued2|queuedNop); // if it can be queued, it will
+ fetchbuf <= fetchbuf + (queued2|queuedNop);
+ end
+ end
+ end
+ default: ;
+ endcase
+ end // if branchback
+
+ else begin // there is no branchback in the system
+ //
+ // update fetchbufX_v and fetchbuf ... relatively simple, as
+ // there are no backwards branches in the mix
+ if (fetchbuf == 1'b0) case ({fetchbufA_v, fetchbufB_v, (queued1|queuedNop), (queued2|queuedNop)})
+ 4'b00_00 : ; // do nothing
+ 4'b00_01: ;
+ 4'b00_10: ;
+ 4'b00_11: ;
+ 4'b01_00: ; // do nothing
+ 4'b01_01: ;
+ 4'b01_10,
+ 4'b01_11:
+ begin // enqueue fbB and flip fetchbuf
+ fetchbufB_v <= `INV;
+ fetchbuf <= ~fetchbuf;
+ end
+ 4'b10_00: ; // do nothing
+ 4'b10_01: ;
+ 4'b10_10,
+ 4'b10_11:
+ begin // enqueue fbA and flip fetchbuf
+ fetchbufA_v <= `INV;
+ fetchbuf <= ~fetchbuf;
+ end
+ 4'b11_00: ; // do nothing
+ 4'b11_01: ;
+ 4'b11_10:
+ begin // enqueue fbA but leave fetchbuf
+ fetchbufA_v <= `INV;
+ end
+ 4'b11_11:
+ begin // enqueue both and flip fetchbuf
+ fetchbufA_v <= `INV;
+ fetchbufB_v <= `INV;
+ fetchbuf <= ~fetchbuf;
+ end
+ default: panic <= `PANIC_INVALIDIQSTATE;
+ endcase
+ else case ({fetchbufC_v, fetchbufD_v, (queued1|queuedNop), (queued2|queuedNop)})
+ 4'b00_00 : ; // do nothing
+ 4'b00_01: ;
+ 4'b00_10 : ; // do nothing
+ 4'b00_11 : ; // do nothing
+ 4'b01_00 : ; // do nothing
+ 4'b01_01 : ;
+ 4'b01_10,
+ 4'b01_11 :
+ begin // enqueue fbD and flip fetchbuf
+ fetchbufD_v <= `INV;
+ fetchbuf <= ~fetchbuf;
+ end
+ 4'b10_00 : ; // do nothing
+ 4'b10_01: ;
+ 4'b10_10,
+ 4'b10_11:
+ begin // enqueue fbC and flip fetchbuf
+ fetchbufC_v <= `INV;
+ fetchbuf <= ~fetchbuf;
+ end
+ 4'b11_00 : ; // do nothing
+ 4'b11_01: ;
+ 4'b11_10:
+ begin // enqueue fbC but leave fetchbuf
+ fetchbufC_v <= `INV;
+ end
+ 4'b11_11:
+ begin // enqueue both and flip fetchbuf
+ fetchbufC_v <= `INV;
+ fetchbufD_v <= `INV;
+ fetchbuf <= ~fetchbuf;
+ end
+ default: panic <= `PANIC_INVALIDIQSTATE;
+ endcase
+ //
+ // get data iff the fetch buffers are empty
+ //
+ if (fetchbufA_v == `INV && fetchbufB_v == `INV) begin
+ FetchAB();
+ // fetchbuf steering logic correction
+ if (fetchbufC_v==`INV && fetchbufD_v==`INV && phit)
+ fetchbuf <= 1'b0;
+ end
+ else if (fetchbufC_v == `INV && fetchbufD_v == `INV)
+ FetchCD();
+ end
+ //
+ // get data iff the fetch buffers are empty
+ //
+ if (fetchbufA_v == `INV && fetchbufB_v == `INV && fetchbufC_v==`INV && fetchbufD_v==`INV) begin
+ FetchAB();
+ fetchbuf <= 1'b0;
+ end
+ end
+
+ // The fetchbuffer is invalidated at the end of a vector instruction
+ // queue.
+ if (nop_fetchbuf[0]) fetchbufA_v <= `INV;
+ if (nop_fetchbuf[1]) fetchbufB_v <= `INV;
+ if (nop_fetchbuf[2]) fetchbufC_v <= `INV;
+ if (nop_fetchbuf[3]) fetchbufD_v <= `INV;
+end
+
+assign fetchbuf0_instr = (fetchbuf == 1'b0) ? fetchbufA_instr : fetchbufC_instr;
+assign fetchbuf0_insln = (fetchbuf == 1'b0) ? fetchbufA_inslen: fetchbufC_inslen;
+assign fetchbuf0_v = (fetchbuf == 1'b0) ? fetchbufA_v : fetchbufC_v ;
+assign fetchbuf0_pc = (fetchbuf == 1'b0) ? fetchbufA_pc : fetchbufC_pc ;
+assign fetchbuf1_instr = (fetchbuf == 1'b0) ? fetchbufB_instr : fetchbufD_instr;
+assign fetchbuf1_insln = (fetchbuf == 1'b0) ? fetchbufB_inslen: fetchbufD_inslen;
+assign fetchbuf1_v = (fetchbuf == 1'b0) ? fetchbufB_v : fetchbufD_v ;
+assign fetchbuf1_pc = (fetchbuf == 1'b0) ? fetchbufB_pc : fetchbufD_pc ;
+assign fetchbuf0_thrd = 1'b0;
+assign fetchbuf1_thrd = thread_en;
+
+reg [2:0] insln0, insln1;
+always @*
+begin
+`ifdef SUPPORT_DCI
+ if (insn0[5:0]==`CMPRSSD)
+ insln0 <= 3'd2;
+ else
+`endif
+ if (insn0[7:6]==2'b00 && insn0[`INSTRUCTION_OP]==`EXEC)
+ insln0 <= fnInsLength(codebuf0);
+ else
+ insln0 <= fnInsLength(insn0);
+end
+
+always @*
+begin
+`ifdef SUPPORT_DCI
+ if (insn1[5:0]==`CMPRSSD)
+ insln1 <= 3'd2;
+ else
+`endif
+ if (insn1[7:6]==2'b00 && insn1[`INSTRUCTION_OP]==`EXEC)
+ insln1 <= fnInsLength(codebuf1);
+ else
+ insln1 <= fnInsLength(insn1);
+end
+
+reg [47:0] cinsn0, cinsn1;
+
+always @*
+begin
+`ifdef SUPPORT_DCI
+ if (insn0[5:0]==`CMPRSSD)
+ cinsn0 <= expand0;
+ else
+`endif
+ if (insn0[7:6]==2'b00 && insn0[`INSTRUCTION_OP]==`EXEC)
+ cinsn0 <= codebuf0;
+ else if (insn0[7])
+ cinsn0 <= xinsn0;
+ else
+ cinsn0 <= insn0;
+end
+
+always @*
+begin
+`ifdef SUPPORT_DCI
+ if (insn1[5:0]==`CMPRSSD)
+ cinsn1 <= expand1;
+ else
+`endif
+ if (insn1[7:6]==2'b00 && insn1[`INSTRUCTION_OP]==`EXEC)
+ cinsn1 <= codebuf1;
+ else if (insn1[7])
+ cinsn1 <= xinsn1;
+ else
+ cinsn1 <= insn1;
+end
+
+task FetchA;
+begin
+ fetchbufA_instr <= cinsn0;
+ fetchbufA_v <= `VAL;
+ fetchbufA_pc <= pc0;
+ if (phit && ~freezePC) begin
+ if (thread_en)
+ pc0 <= pc0 + insln0;
+ else if (`WAYS > 1)
+ pc0 <= pc0 + insln0 + insln1;
+ else
+ pc0 <= pc0 + insln0;
+ end
+end
+endtask
+
+task FetchB;
+begin
+ fetchbufB_instr <= cinsn1;
+ fetchbufB_v <= `WAYS > 1;
+ if (thread_en)
+ fetchbufB_pc <= pc1;
+ else
+ fetchbufB_pc <= pc0 + insln0;
+ if (phit & thread_en)
+ pc1 <= pc1 + insln1;
+end
+endtask
+
+
+task FetchAB;
+begin
+ FetchA();
+ FetchB();
+end
+endtask
+
+task FetchC;
+begin
+ fetchbufC_instr <= cinsn0;
+ fetchbufC_v <= `VAL;
+ fetchbufC_pc <= pc0;
+ if (phit && ~freezePC) begin
+ if (thread_en)
+ pc0 <= pc0 + insln0;
+ else if (`WAYS > 1)
+ pc0 <= pc0 + insln0 + insln1;
+ else
+ pc0 <= pc0 + insln0;
+ end
+end
+endtask
+
+task FetchD;
+begin
+ fetchbufD_instr <= cinsn1;
+ fetchbufD_v <= `WAYS > 1;
+ if (thread_en)
+ fetchbufD_pc <= pc1;
+ else
+ fetchbufD_pc <= pc0 + insln0;
+ if (phit & thread_en)
+ pc1 <= pc1 + insln1;
+end
+endtask
+
+task FetchCD;
+begin
+ FetchC();
+ FetchD();
+end
+endtask
+
+endmodule
+
Index: FT64v7/rtl/twoway/FT64_fetchbuf_x1.v
===================================================================
--- FT64v7/rtl/twoway/FT64_fetchbuf_x1.v (nonexistent)
+++ FT64v7/rtl/twoway/FT64_fetchbuf_x1.v (revision 60)
@@ -0,0 +1,527 @@
+// ============================================================================
+// __
+// \\__/ o\ (C) 2018 Robert Finch, Waterloo
+// \ __ / All rights reserved.
+// \/_// robfinch@finitron.ca
+// ||
+//
+// FT64_fetchbuf_x1.v
+//
+// This source file is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published
+// by the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This source file is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+//
+// ============================================================================
+//
+`include "FT64_config.vh"
+`include "FT64_defines.vh"
+
+// FETCH
+//
+// fetch exactly one instructions from memory into the fetch buffer
+// unless either one of the buffers is still full, in which case we
+// do nothing (kinda like alpha approach)
+//
+module FT64_fetchbuf_x1(rst, clk4x, clk, fcu_clk,
+ cs_i, cyc_i, stb_i, ack_o, we_i, adr_i, dat_i,
+ cmpgrp,
+ freezePC, thread_en, pred_on,
+ regLR,
+ insn0, phit,
+ threadx,
+ branchmiss, misspc, branchmiss_thrd, predict_taken0,
+ predict_takenA, predict_takenB,
+ queued1, queuedNop,
+ pc0, fetchbuf, fetchbufA_v, fetchbufB_v,
+ fetchbufA_instr, fetchbufA_pc, fetchbufA_pbyte,
+ fetchbufB_instr, fetchbufB_pc, fetchbufB_pbyte,
+ fetchbuf0_instr, fetchbuf0_insln,
+ fetchbuf0_thrd,
+ fetchbuf0_pc,
+ fetchbuf0_v,
+ fetchbuf0_pbyte,
+ codebuf0,
+ btgtA, btgtB,
+ nop_fetchbuf,
+ take_branch0,
+ stompedRets,
+ panic
+);
+parameter AMSB = `AMSB;
+parameter RSTPC = 64'hFFFFFFFFFFFC0100;
+parameter TRUE = 1'b1;
+parameter FALSE = 1'b0;
+input rst;
+input clk4x;
+input clk;
+input fcu_clk;
+input cs_i;
+input cyc_i;
+input stb_i;
+output ack_o;
+input we_i;
+input [15:0] adr_i;
+input [55:0] dat_i;
+input [2:0] cmpgrp;
+input freezePC;
+input thread_en;
+input pred_on;
+input [4:0] regLR;
+input [55:0] insn0;
+input phit;
+output threadx;
+input branchmiss;
+input [AMSB:0] misspc;
+input branchmiss_thrd;
+output predict_taken0;
+input predict_takenA;
+input predict_takenB;
+input queued1;
+input queuedNop;
+output reg [AMSB:0] pc0;
+output reg fetchbuf;
+output reg fetchbufA_v;
+output reg fetchbufB_v;
+output fetchbuf0_thrd;
+output reg [47:0] fetchbufA_instr;
+output reg [7:0] fetchbufA_pbyte;
+output reg [47:0] fetchbufB_instr;
+output reg [7:0] fetchbufB_pbyte;
+output reg [AMSB:0] fetchbufA_pc;
+output reg [AMSB:0] fetchbufB_pc;
+output [47:0] fetchbuf0_instr;
+output [AMSB:0] fetchbuf0_pc;
+output [2:0] fetchbuf0_insln;
+output fetchbuf0_v;
+output [7:0] fetchbuf0_pbyte;
+input [55:0] codebuf0;
+input [AMSB:0] btgtA;
+input [AMSB:0] btgtB;
+input [3:0] nop_fetchbuf;
+output take_branch0;
+input [3:0] stompedRets;
+output reg [3:0] panic;
+integer n;
+
+reg [55:0] cinsn0;
+
+//`include "FT64_decode.vh"
+
+function IsBranch;
+input [47:0] isn;
+casex(isn[`INSTRUCTION_OP])
+`Bcc: IsBranch = TRUE;
+`BBc: IsBranch = TRUE;
+`BEQI: IsBranch = TRUE;
+`BCHK: IsBranch = TRUE;
+default: IsBranch = FALSE;
+endcase
+endfunction
+
+function IsJAL;
+input [47:0] isn;
+IsJAL = isn[`INSTRUCTION_OP]==`JAL;
+endfunction
+
+function IsJmp;
+input [47:0] isn;
+IsJmp = isn[`INSTRUCTION_OP]==`JMP;
+endfunction
+
+function IsCall;
+input [47:0] isn;
+IsCall = isn[`INSTRUCTION_OP]==`CALL;
+endfunction
+
+function IsRet;
+input [47:0] isn;
+IsRet = isn[`INSTRUCTION_OP]==`RET;
+endfunction
+
+function IsBrk;
+input [47:0] isn;
+IsBrk = isn[`INSTRUCTION_OP]==`BRK;
+endfunction
+
+function IsRTI;
+input [47:0] isn;
+IsRTI = isn[`INSTRUCTION_OP]==`R2 && isn[`INSTRUCTION_S2]==`RTI;
+endfunction
+
+function [2:0] fnInsLength;
+input [47:0] ins;
+`ifdef SUPPORT_DCI
+if (ins[`INSTRUCTION_OP]==`CMPRSSD)
+ fnInsLength = 3'd2 | pred_on;
+else
+`endif
+ case(ins[7:6])
+ 2'd0: fnInsLength = 3'd4 | pred_on;
+ 2'd1: fnInsLength = 3'd6 | pred_on;
+ default: fnInsLength = 3'd2 | pred_on;
+ endcase
+endfunction
+
+wire [2:0] fetchbufA_inslen;
+wire [2:0] fetchbufB_inslen;
+FT64_InsLength uilA (fetchbufA_instr, fetchbufA_inslen, pred_on);
+FT64_InsLength uilB (fetchbufB_instr, fetchbufB_inslen, pred_on);
+
+wire [47:0] xinsn0;
+
+FT64_iexpander ux1
+(
+ .cinstr(pred_on ? insn0[23:8] : insn0[15:0]),
+ .expand(xinsn0)
+);
+
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+// Table of decompressed instructions.
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+assign ack_o = cs_i & cyc_i & stb_i;
+`ifdef SUPPORT_DCI
+reg [47:0] DecompressTable [0:2047];
+always @(posedge clk)
+ if (cs_i & cyc_i & stb_i & we_i)
+ DecompressTable[adr_i[12:3]] <= dat_i[47:0];
+wire [47:0] expand0 = DecompressTable[{cmpgrp,pred_on ? insn0[23:16]:insn0[15:8]}];
+`endif
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+reg thread;
+reg stompedRet;
+reg ret0Counted;
+wire [AMSB:0] retpc0;
+
+assign predict_taken0 = (fetchbuf==1'b0) ? ({fetchbufA_v, IsBranch(fetchbufA_instr), predict_takenA} == {`VAL, `TRUE, `TRUE})
+ : ({fetchbufB_v, IsBranch(fetchbufB_instr), predict_takenB} == {`VAL, `TRUE, `TRUE});
+
+reg [AMSB:0] branch_pcA;
+reg [AMSB:0] branch_pcB;
+
+always @*
+begin
+case(fetchbufA_instr[`INSTRUCTION_OP])
+`RET: branch_pcA = retpc0;
+`JMP,`CALL:
+ begin
+ branch_pcA[39:0] = fetchbufA_instr[6] ? {fetchbufA_instr[47:8]} : {fetchbufA_pc[39:24],fetchbufA_instr[31:8]};
+ branch_pcA[63:40] = fetchbufA_pc[63:40];
+ end
+`R2: branch_pcA = btgtA; // RTI
+`BRK,`JAL: branch_pcA = btgtA;
+default:
+ begin
+ branch_pcA[31:8] = fetchbufA_pc[31:8] +
+ ((fetchbufA_instr[7:6]==2'b01) ? {{5{fetchbufA_instr[47]}},fetchbufA_instr[47:29]} : {{21{fetchbufA_instr[31]}},fetchbufA_instr[31:29]});
+ branch_pcA[7:0] = {fetchbufA_instr[28:23],fetchbufA_instr[17:16]};
+ branch_pcA[63:32] = fetchbufA_pc[63:32];
+ end
+endcase
+end
+
+always @*
+begin
+case(fetchbufB_instr[`INSTRUCTION_OP])
+`RET: branch_pcB = retpc0;
+`JMP,`CALL:
+ begin
+ branch_pcB[39:0] = fetchbufB_instr[6] ? {fetchbufB_instr[47:8]} : {fetchbufB_pc[39:24],fetchbufB_instr[31:8]};
+ branch_pcB[63:40] = fetchbufB_pc[63:40];
+ end
+`R2: branch_pcB = btgtB; // RTI
+`BRK,`JAL: branch_pcB = btgtB;
+default:
+ begin
+ branch_pcB[31:8] = fetchbufB_pc[31:8] +
+ ((fetchbufB_instr[7:6]==2'b01) ? {{5{fetchbufB_instr[47]}},fetchbufB_instr[47:29]} : {{21{fetchbufB_instr[31]}},fetchbufB_instr[31:29]});
+ branch_pcB[7:0] = {fetchbufB_instr[28:23],fetchbufB_instr[17:16]};
+ branch_pcB[63:32] = fetchbufB_pc[63:32];
+ end
+endcase
+end
+
+wire take_branchA = ({fetchbufA_v, IsBranch(fetchbufA_instr), predict_takenA} == {`VAL, `TRUE, `TRUE}) || ((
+`ifdef FCU_ENH
+ IsRet(fetchbufA_instr)
+ || IsRTI(fetchbufA_instr)|| IsBrk(fetchbufA_instr) || IsJAL(fetchbufA_instr) ||
+`endif
+ IsJmp(fetchbufA_instr)||IsCall(fetchbufA_instr)) &&
+ fetchbufA_v);
+wire take_branchB = ({fetchbufB_v, IsBranch(fetchbufB_instr), predict_takenB} == {`VAL, `TRUE, `TRUE}) || ((
+`ifdef FCU_ENH
+ IsRet(fetchbufB_instr)
+ || IsRTI(fetchbufB_instr)|| IsBrk(fetchbufB_instr) || IsJAL(fetchbufB_instr) ||
+`endif
+ IsJmp(fetchbufB_instr)||IsCall(fetchbufB_instr)) &&
+ fetchbufB_v);
+
+wire take_branch = (fetchbuf==1'b0) ? take_branchA : take_branchB;
+assign take_branch0 = take_branch;
+
+/*
+always @*
+begin
+ pc0 <= thread_en ? (fetchbuf ? pc0b : pc0a) : pc0a;
+ pc1 <= thread_en ? (fetchbuf ? pc1b : pc1a) : pc1a;
+end
+*/
+assign threadx = fetchbuf;
+
+`ifdef FCU_ENH
+FT64_RSB #(AMSB) ursb1
+(
+ .rst(rst),
+ .clk(fcu_clk),
+ .regLR(regLR),
+ .queued1(queued1),
+ .queued2(1'b0),
+ .fetchbuf0_v(fetchbuf0_v),
+ .fetchbuf0_pc(fetchbuf0_pc),
+ .fetchbuf0_instr(fetchbuf0_instr),
+ .fetchbuf1_v(1'b0),
+ .fetchbuf1_pc(RSTPC),
+ .fetchbuf1_instr(`NOP_INSN),
+ .stompedRets(stompedRets),
+ .stompedRet(stompedRet),
+ .pc(retpc0)
+);
+
+`else
+assign retpc0 = RSTPC;
+assign retpc1 = RSTPC;
+`endif
+
+wire peclk, neclk;
+edge_det ued1 (.rst(rst), .clk(clk4x), .ce(1'b1), .i(clk), .pe(peclk), .ne(neclk), .ee());
+
+reg did_branch;
+
+always @(posedge clk)
+if (rst) begin
+ pc0 <= RSTPC;
+ fetchbufA_v <= 1'b0;
+ fetchbufB_v <= 1'b0;
+ fetchbuf <= 1'b0;
+ panic <= `PANIC_NONE;
+ did_branch <= 1'b0;
+end
+else begin
+
+ did_branch <= take_branch & ~branchmiss;
+
+ begin
+
+ // On a branch miss with threading enabled all fectch buffers are
+ // invalidated even though the data in the fetch buffer would be valid
+ // for the thread that isn't in a branchmiss state. This is done to
+ // keep things simple. For the thread that doesn't miss the current
+ // data for the fetch buffer needs to be retrieved again, so the pc
+ // for that thread is assigned the current fetchbuf pc.
+ // For the thread that misses the pc is simply assigned the misspc.
+ if (branchmiss) begin
+ pc0 <= misspc;
+ fetchbufA_v <= `INV;
+ fetchbufB_v <= `INV;
+ fetchbuf <= 1'b0;
+ $display("********************");
+ $display("********************");
+ $display("********************");
+ $display("Branch miss");
+ $display("misspc=%h", misspc);
+ $display("********************");
+ $display("********************");
+ $display("********************");
+ end
+// else if (cinsn0[`INSTRUCTION_OP]==`CALL || cinsn0[`INSTRUCTION_OP]==`JMP) begin
+// pc0[31:0] = cinsn0[6] ? {cinsn0[47:8]} : {pc0[31:24],cinsn0[31:8]};
+// fetchbufA_v <= `INV;
+// fetchbufB_v <= `INV;
+// fetchbuf <= 1'b0;
+// end
+ else if (take_branch) begin
+ if (fetchbuf == 1'b0) begin
+ // In this case fetchbufA must be valid, or take_branch wouldn't be.
+ case(fetchbufB_v)
+ 1'b0:
+ begin
+ pc0 <= branch_pcA;
+ fetchbufA_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbuf <= (queued1|queuedNop);
+ end
+ 1'b1:
+ if (did_branch) begin
+ fetchbufA_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbuf <= (queued1|queuedNop);
+ FetchB();
+ end
+ else
+ begin
+ pc0 <= branch_pcA;
+ fetchbufA_v <= !(queued1|queuedNop); // if it can be queued, it will
+ fetchbufB_v <= `INV;
+ fetchbuf <= (queued1|queuedNop);
+ end
+ endcase
+ end
+ else begin
+ case(fetchbufA_v)
+ 1'b0:
+ begin
+ pc0 <= branch_pcB;
+ fetchbufB_v <= !(queued1|queuedNop);
+ fetchbuf <= !(queued1|queuedNop);
+ end
+ 1'b1:
+ if (did_branch) begin
+ fetchbufB_v <= !(queued1|queuedNop);
+ fetchbuf <= ~(queued1|queuedNop);
+ FetchA();
+ end
+ else
+ begin
+ pc0 <= branch_pcB;
+ fetchbufB_v <= !(queued1|queuedNop);
+ fetchbufA_v <= `INV;
+ fetchbuf <= !(queued1|queuedNop);
+ end
+ endcase
+ end
+ end // if branch
+
+ else begin // there is no branchback in the system
+ // update fetchbufX_v and fetchbuf ... relatively simple, as
+ // there are no backwards branches in the mix
+ if (fetchbuf == 1'b0) case ({fetchbufA_v, (queued1|queuedNop)})
+ 2'b00: ; // do nothing
+ 2'b10: ;
+ 2'b11: begin fetchbufA_v <= `INV; fetchbuf <= ~fetchbuf; end
+ default: panic <= `PANIC_INVALIDIQSTATE;
+ endcase
+ else case ({fetchbufB_v, (queued1|queuedNop)})
+ 2'b00: ; // do nothing
+ 2'b10: ;
+ 2'b11: begin fetchbufB_v <= `INV; fetchbuf <= ~fetchbuf; end
+ default: panic <= `PANIC_INVALIDIQSTATE;
+ endcase
+ //
+ // get data iff the fetch buffers are empty
+ //
+ if (fetchbufA_v == `INV) begin
+ FetchA();
+ // fetchbuf steering logic correction
+ if (fetchbufB_v==`INV && phit)
+ fetchbuf <= 1'b0;
+ end
+ else if (fetchbufB_v == `INV) begin
+ FetchB();
+ end
+ end
+ //
+ // get data iff the fetch buffers are empty
+ //
+ if (fetchbufA_v == `INV && fetchbufB_v == `INV) begin
+ FetchA();
+ fetchbuf <= 1'b0;
+ end
+// // Steer fetchbuf to the valid buffer.
+// else if (fetchbufB_v == `INV)
+// fetchbuf <= 1'b0;
+// else if (fetchbufA_v == `INV)
+// fetchbuf <= 1'b1;
+// else if (fetchbufA_v == `INV) begin
+// FetchA();
+// end
+// else if (fetchbufB_v == `INV) begin
+// FetchB();
+// end
+end
+
+ // The fetchbuffer is invalidated at the end of a vector instruction
+ // queue.
+ if (nop_fetchbuf[0]) fetchbufA_v <= `INV;
+ if (nop_fetchbuf[1]) fetchbufB_v <= `INV;
+end
+
+assign fetchbuf0_instr = (fetchbuf == 1'b0) ? fetchbufA_instr : fetchbufB_instr;
+assign fetchbuf0_insln = (fetchbuf == 1'b0) ? fetchbufA_inslen: fetchbufB_inslen;
+assign fetchbuf0_v = (fetchbuf == 1'b0) ? fetchbufA_v : fetchbufB_v ;
+assign fetchbuf0_pc = (fetchbuf == 1'b0) ? fetchbufA_pc : fetchbufB_pc ;
+assign fetchbuf0_thrd = 1'b0;
+assign fetchbuf0_pbyte = (fetchbuf == 1'b0) ? fetchbufA_pbyte : fetchbufB_pbyte;
+
+reg [2:0] insln0;
+always @*
+begin
+`ifdef SUPPORT_DCI
+ if (insn0[5:0]==`CMPRSSD)
+ insln0 <= 3'd2 | pred_on;
+ else
+`endif
+ if (insn0[7:6]==2'b00 && insn0[`INSTRUCTION_OP]==`EXEC)
+ insln0 <= fnInsLength(codebuf0);
+ else
+ insln0 <= fnInsLength(insn0);
+end
+
+
+always @*
+begin
+`ifdef SUPPORT_DCI
+ if (insn0[13:8]==`CMPRSSD && pred_on)
+ cinsn0 <= expand0;
+ else if (insn0[5:0]==`CMPRSSD && !pred_on)
+ cinsn0 <= expand0;
+ else
+`endif
+ if (insn0[7:6]==2'b00 && insn0[`INSTRUCTION_OP]==`EXEC && !pred_on)
+ cinsn0 <= codebuf0;
+ else if (insn0[15:14]==2'b00 && insn0[`INSTRUCTION_OP]==`EXEC && pred_on)
+ cinsn0 <= codebuf0;
+ else if (insn0[15] & pred_on)
+ cinsn0 <= {xinsn0,insn0[7:0]};
+ else if (insn0[7] & ~pred_on)
+ cinsn0 <= xinsn0;
+ else
+ cinsn0 <= insn0;
+end
+
+task FetchA;
+begin
+ fetchbufA_instr <= pred_on ? cinsn0[55:8] : cinsn0[47:0];
+ fetchbufA_pbyte = cinsn0[7:0];
+ fetchbufA_v <= `VAL;
+ fetchbufA_pc <= pc0;
+ if (phit && ~freezePC)
+ pc0 <= pc0 + insln0;
+ else
+ pc0 <= pc0;
+end
+endtask
+
+task FetchB;
+begin
+ fetchbufB_instr <= pred_on ? cinsn0[55:8] : cinsn0[47:0];
+ fetchbufB_pbyte = cinsn0[7:0];
+ fetchbufB_v <= `VAL;
+ fetchbufB_pc <= pc0;
+ if (phit && ~freezePC)
+ pc0 <= pc0 + insln0;
+ else
+ pc0 <= pc0;
+end
+endtask
+
+endmodule
Index: FT64v7/rtl/twoway/FT64_regfile1w4r_oc.v
===================================================================
--- FT64v7/rtl/twoway/FT64_regfile1w4r_oc.v (nonexistent)
+++ FT64v7/rtl/twoway/FT64_regfile1w4r_oc.v (revision 60)
@@ -0,0 +1,282 @@
+`timescale 1ns / 1ps
+// ============================================================================
+// __
+// \\__/ o\ (C) 2018 Robert Finch, Waterloo
+// \ __ / All rights reserved.
+// \/_// robfinch@finitron.ca
+// ||
+//
+// This source file is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published
+// by the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This source file is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+//
+//
+// Register file with two write ports and six read ports.
+// ============================================================================
+//
+`include "FT64_config.vh"
+
+module FT64_regfileRam_sim_oc(clka, ena, wea, addra, dina, clkb, enb, addrb, doutb);
+parameter WID=64;
+parameter RBIT = 11;
+input clka;
+input ena;
+input [7:0] wea;
+input [RBIT:0] addra;
+input [WID-1:0] dina;
+input clkb;
+input enb;
+input [RBIT:0] addrb;
+output [WID-1:0] doutb;
+
+integer n;
+(* RAM_STYLE="BLOCK" *)
+reg [64:0] mem [0:4095];
+reg [RBIT:0] raddrb;
+
+initial begin
+ for (n = 0; n < 4096; n = n + 1)
+ mem[n] = 0;
+end
+
+always @(posedge clka) if (ena & wea[0]) mem[addra][7:0] <= dina[7:0];
+always @(posedge clka) if (ena & wea[1]) mem[addra][15:8] <= dina[15:8];
+always @(posedge clka) if (ena & wea[2]) mem[addra][23:16] <= dina[23:16];
+always @(posedge clka) if (ena & wea[3]) mem[addra][31:24] <= dina[31:24];
+always @(posedge clka) if (ena & wea[4]) mem[addra][39:32] <= dina[39:32];
+always @(posedge clka) if (ena & wea[5]) mem[addra][47:40] <= dina[47:40];
+always @(posedge clka) if (ena & wea[6]) mem[addra][55:48] <= dina[55:48];
+always @(posedge clka) if (ena & wea[7]) mem[addra][63:56] <= dina[63:56];
+
+always @(posedge clkb)
+ raddrb <= addrb;
+assign doutb = mem[raddrb];
+
+endmodule
+
+module FT64_regfile1w4r_oc(clk, wr0, we0, wa0, i0,
+ rclk, ra0, ra1, ra2, ra3, o0, o1, o2, o3);
+parameter WID=64;
+parameter RBIT = 11;
+input clk;
+input wr0;
+input [7:0] we0;
+input [RBIT:0] wa0;
+input [WID-1:0] i0;
+input rclk;
+input [RBIT:0] ra0;
+input [RBIT:0] ra1;
+input [RBIT:0] ra2;
+input [RBIT:0] ra3;
+output [WID-1:0] o0;
+output [WID-1:0] o1;
+output [WID-1:0] o2;
+output [WID-1:0] o3;
+
+reg wr;
+reg [RBIT:0] wa;
+reg [WID-1:0] i;
+reg [7:0] we;
+wire [WID-1:0] o00, o01, o02, o03;
+
+integer n;
+
+`ifdef SIM
+FT64_regfileRam_sim_oc urf10 (
+ .clka(clk),
+ .ena(wr),
+ .wea(we),
+ .addra(wa),
+ .dina(i),
+ .clkb(rclk),
+ .enb(1'b1),
+ .addrb(ra0),
+ .doutb(o00)
+);
+
+FT64_regfileRam_sim_oc urf11 (
+ .clka(clk),
+ .ena(wr),
+ .wea(we),
+ .addra(wa),
+ .dina(i),
+ .clkb(rclk),
+ .enb(1'b1),
+ .addrb(ra1),
+ .doutb(o01)
+);
+
+FT64_regfileRam_sim_oc urf12 (
+ .clka(clk),
+ .ena(wr),
+ .wea(we),
+ .addra(wa),
+ .dina(i),
+ .clkb(rclk),
+ .enb(1'b1),
+ .addrb(ra2),
+ .doutb(o02)
+);
+
+FT64_regfileRam_sim_oc urf13 (
+ .clka(clk),
+ .ena(wr),
+ .wea(we),
+ .addra(wa),
+ .dina(i),
+ .clkb(rclk),
+ .enb(1'b1),
+ .addrb(ra3),
+ .doutb(o03)
+);
+
+`else
+FT64_regfileRam urf10 (
+ .clka(clk),
+ .ena(wr),
+ .wea(we),
+ .addra(wa),
+ .dina(i),
+ .douta(),
+ .clkb(rclk),
+ .enb(1'b1),
+ .web(8'b0),
+ .addrb(ra0),
+ .dinb(64'h00),
+ .doutb(o00)
+);
+
+FT64_regfileRam urf11 (
+ .clka(clk),
+ .ena(wr),
+ .wea(we),
+ .addra(wa),
+ .dina(i),
+ .douta(),
+ .clkb(rclk),
+ .enb(1'b1),
+ .web(8'b0),
+ .addrb(ra1),
+ .dinb(64'h00),
+ .doutb(o01)
+);
+
+FT64_regfileRam urf12 (
+ .clka(clk),
+ .ena(wr),
+ .wea(we),
+ .addra(wa),
+ .dina(i),
+ .douta(),
+ .clkb(rclk),
+ .enb(1'b1),
+ .web(8'b0),
+ .addrb(ra2),
+ .dinb(64'h00),
+ .doutb(o02)
+);
+
+FT64_regfileRam urf13 (
+ .clka(clk),
+ .ena(wr),
+ .wea(we),
+ .addra(wa),
+ .dina(i),
+ .douta(),
+ .clkb(rclk),
+ .enb(1'b1),
+ .web(8'b0),
+ .addrb(ra3),
+ .dinb(64'h00),
+ .doutb(o03)
+);
+
+`endif
+
+always @*
+begin
+ wr <= wr0;
+ we <= we0;
+ wa <= wa0;
+ i <= i0;
+end
+
+assign o0[7:0] = ra0[4:0]==5'd0 ? {8{1'b0}} :
+ (wr0 && we0[0] && (ra0==wa0)) ? i0[7:0] : o00[7:0];
+assign o0[15:8] = ra0[4:0]==5'd0 ? {8{1'b0}} :
+ (wr0 && we0[1] && (ra0==wa0)) ? i0[15:8] : o00[15:8];
+assign o0[23:16] = ra0[4:0]==5'd0 ? {8{1'b0}} :
+ (wr0 && we0[2] && (ra0==wa0)) ? i0[23:16] : o00[23:16];
+assign o0[31:24] = ra0[4:0]==5'd0 ? {8{1'b0}} :
+ (wr0 && we0[3] && (ra0==wa0)) ? i0[31:24] : o00[31:24];
+assign o0[39:32] = ra0[4:0]==5'd0 ? {8{1'b0}} :
+ (wr0 && we0[4] && (ra0==wa0)) ? i0[39:32] : o00[39:32];
+assign o0[47:40] = ra0[4:0]==5'd0 ? {8{1'b0}} :
+ (wr0 && we0[5] && (ra0==wa0)) ? i0[47:40] : o00[47:40];
+assign o0[55:48] = ra0[4:0]==5'd0 ? {8{1'b0}} :
+ (wr0 && we0[6] && (ra0==wa0)) ? i0[55:48] : o00[55:48];
+assign o0[63:56] = ra0[4:0]==5'd0 ? {8{1'b0}} :
+ (wr0 && we0[7] && (ra0==wa0)) ? i0[63:56] : o00[63:56];
+
+assign o1[7:0] = ra1[4:0]==5'd0 ? {8{1'b0}} :
+ (wr0 && we0[0] && (ra1==wa0)) ? i0[7:0] : o01[7:0];
+assign o1[15:8] = ra1[4:0]==5'd0 ? {8{1'b0}} :
+ (wr0 && we0[1] && (ra1==wa0)) ? i0[15:8] : o01[15:8];
+assign o1[23:16] = ra1[4:0]==5'd0 ? {8{1'b0}} :
+ (wr0 && we0[2] && (ra1==wa0)) ? i0[23:16] : o01[23:16];
+assign o1[31:24] = ra1[4:0]==5'd0 ? {8{1'b0}} :
+ (wr0 && we0[3] && (ra1==wa0)) ? i0[31:24] : o01[31:24];
+assign o1[39:32] = ra1[4:0]==5'd0 ? {8{1'b0}} :
+ (wr0 && we0[4] && (ra1==wa0)) ? i0[39:32] : o01[39:32];
+assign o1[47:40] = ra1[4:0]==5'd0 ? {8{1'b0}} :
+ (wr0 && we0[5] && (ra1==wa0)) ? i0[47:40] : o01[47:40];
+assign o1[55:48] = ra1[4:0]==5'd0 ? {8{1'b0}} :
+ (wr0 && we0[6] && (ra1==wa0)) ? i0[55:48] : o01[55:48];
+assign o1[63:56] = ra1[4:0]==5'd0 ? {8{1'b0}} :
+ (wr0 && we0[7] && (ra1==wa0)) ? i0[63:56] : o01[63:56];
+
+assign o2[7:0] = ra2[4:0]==5'd0 ? {8{1'b0}} :
+ (wr0 && we0[0] && (ra2==wa0)) ? i0[7:0] : o02[7:0];
+assign o2[15:8] = ra2[4:0]==5'd0 ? {8{1'b0}} :
+ (wr0 && we0[1] && (ra2==wa0)) ? i0[15:8] : o02[15:8];
+assign o2[23:16] = ra2[4:0]==5'd0 ? {8{1'b0}} :
+ (wr0 && we0[2] && (ra2==wa0)) ? i0[23:16] : o02[23:16];
+assign o2[31:24] = ra2[4:0]==5'd0 ? {8{1'b0}} :
+ (wr0 && we0[3] && (ra2==wa0)) ? i0[31:24] : o02[31:24];
+assign o2[39:32] = ra2[4:0]==5'd0 ? {8{1'b0}} :
+ (wr0 && we0[4] && (ra2==wa0)) ? i0[39:32] : o02[39:32];
+assign o2[47:40] = ra2[4:0]==5'd0 ? {8{1'b0}} :
+ (wr0 && we0[5] && (ra2==wa0)) ? i0[47:40] : o02[47:40];
+assign o2[55:48] = ra2[4:0]==5'd0 ? {8{1'b0}} :
+ (wr0 && we0[6] && (ra2==wa0)) ? i0[55:48] : o02[55:48];
+assign o2[63:56] = ra2[4:0]==5'd0 ? {8{1'b0}} :
+ (wr0 && we0[7] && (ra2==wa0)) ? i0[63:56] : o02[63:56];
+
+assign o3[7:0] = ra3[4:0]==5'd0 ? {8{1'b0}} :
+ (wr0 && we0[0] && (ra3==wa0)) ? i0[7:0] : o03[7:0];
+assign o3[15:8] = ra3[4:0]==5'd0 ? {8{1'b0}} :
+ (wr0 && we0[1] && (ra3==wa0)) ? i0[15:8] : o03[15:8];
+assign o3[23:16] = ra3[4:0]==5'd0 ? {8{1'b0}} :
+ (wr0 && we0[2] && (ra3==wa0)) ? i0[23:16] : o03[23:16];
+assign o3[31:24] = ra3[4:0]==5'd0 ? {8{1'b0}} :
+ (wr0 && we0[3] && (ra3==wa0)) ? i0[31:24] : o03[31:24];
+assign o3[39:32] = ra3[4:0]==5'd0 ? {8{1'b0}} :
+ (wr0 && we0[4] && (ra3==wa0)) ? i0[39:32] : o03[39:32];
+assign o3[47:40] = ra3[4:0]==5'd0 ? {8{1'b0}} :
+ (wr0 && we0[5] && (ra3==wa0)) ? i0[47:40] : o03[47:40];
+assign o3[55:48] = ra3[4:0]==5'd0 ? {8{1'b0}} :
+ (wr0 && we0[6] && (ra3==wa0)) ? i0[55:48] : o03[55:48];
+assign o3[63:56] = ra3[4:0]==5'd0 ? {8{1'b0}} :
+ (wr0 && we0[7] && (ra3==wa0)) ? i0[63:56] : o03[63:56];
+
+endmodule
+
Index: FT64v7/rtl/twoway/FT64_regfile2w6r_oc.v
===================================================================
--- FT64v7/rtl/twoway/FT64_regfile2w6r_oc.v (nonexistent)
+++ FT64v7/rtl/twoway/FT64_regfile2w6r_oc.v (revision 60)
@@ -0,0 +1,516 @@
+`timescale 1ns / 1ps
+// ============================================================================
+// __
+// \\__/ o\ (C) 2013-2018 Robert Finch, Waterloo
+// \ __ / All rights reserved.
+// \/_// robfinch@finitron.ca
+// ||
+//
+// This source file is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published
+// by the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This source file is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+//
+//
+// Register file with two write ports and six read ports.
+// ============================================================================
+//
+`include "FT64_config.vh"
+
+module FT64_regfileRam_sim(clka, ena, wea, addra, dina, clkb, enb, addrb, doutb);
+parameter WID=64;
+parameter RBIT = 11;
+input clka;
+input ena;
+input [7:0] wea;
+input [RBIT:0] addra;
+input [WID-1:0] dina;
+input clkb;
+input enb;
+input [RBIT:0] addrb;
+output [WID-1:0] doutb;
+
+integer n;
+(* RAM_STYLE="BLOCK" *)
+reg [64:0] mem [0:4095];
+reg [RBIT:0] raddrb;
+
+initial begin
+ for (n = 0; n < 4096; n = n + 1)
+ mem[n] = 0;
+end
+
+always @(posedge clka) if (ena & wea[0]) mem[addra][7:0] <= dina[7:0];
+always @(posedge clka) if (ena & wea[1]) mem[addra][15:8] <= dina[15:8];
+always @(posedge clka) if (ena & wea[2]) mem[addra][23:16] <= dina[23:16];
+always @(posedge clka) if (ena & wea[3]) mem[addra][31:24] <= dina[31:24];
+always @(posedge clka) if (ena & wea[4]) mem[addra][39:32] <= dina[39:32];
+always @(posedge clka) if (ena & wea[5]) mem[addra][47:40] <= dina[47:40];
+always @(posedge clka) if (ena & wea[6]) mem[addra][55:48] <= dina[55:48];
+always @(posedge clka) if (ena & wea[7]) mem[addra][63:56] <= dina[63:56];
+
+always @(posedge clkb)
+ raddrb <= addrb;
+assign doutb = mem[raddrb];
+
+endmodule
+
+module FT64_regfile2w6r_oc(clk4x, clk, wr0, wr1, we0, we1, wa0, wa1, i0, i1,
+ rclk, ra0, ra1, ra2, ra3, ra4, ra5,
+ o0, o1, o2, o3, o4, o5);
+parameter WID=64;
+parameter RBIT = 11;
+input clk4x;
+input clk;
+input wr0;
+input wr1;
+input [7:0] we0;
+input [7:0] we1;
+input [RBIT:0] wa0;
+input [RBIT:0] wa1;
+input [WID-1:0] i0;
+input [WID-1:0] i1;
+input rclk;
+input [RBIT:0] ra0;
+input [RBIT:0] ra1;
+input [RBIT:0] ra2;
+input [RBIT:0] ra3;
+input [RBIT:0] ra4;
+input [RBIT:0] ra5;
+output [WID-1:0] o0;
+output [WID-1:0] o1;
+output [WID-1:0] o2;
+output [WID-1:0] o3;
+output [WID-1:0] o4;
+output [WID-1:0] o5;
+
+reg wr;
+reg [RBIT:0] wa;
+reg [WID-1:0] i;
+reg [7:0] we;
+wire [WID-1:0] o00, o01, o02, o03, o04, o05;
+reg wr1x;
+reg [RBIT:0] wa1x;
+reg [WID-1:0] i1x;
+reg [7:0] we1x;
+reg holdwr0,holdwr1;
+reg [63:0] holdi0, holdi1;
+reg [RBIT:0] holdwa0,holdwa1;
+
+integer n;
+
+`ifdef SIM
+FT64_regfileRam_sim urf10 (
+ .clka(clk4x),
+ .ena(wr),
+ .wea(we),
+ .addra(wa),
+ .dina(i),
+ .clkb(rclk),
+ .enb(1'b1),
+ .addrb(ra0),
+ .doutb(o00)
+);
+
+FT64_regfileRam_sim urf11 (
+ .clka(clk4x),
+ .ena(wr),
+ .wea(we),
+ .addra(wa),
+ .dina(i),
+ .clkb(rclk),
+ .enb(1'b1),
+ .addrb(ra1),
+ .doutb(o01)
+);
+
+FT64_regfileRam_sim urf12 (
+ .clka(clk4x),
+ .ena(wr),
+ .wea(we),
+ .addra(wa),
+ .dina(i),
+ .clkb(rclk),
+ .enb(1'b1),
+ .addrb(ra2),
+ .doutb(o02)
+);
+
+FT64_regfileRam_sim urf13 (
+ .clka(clk4x),
+ .ena(wr),
+ .wea(we),
+ .addra(wa),
+ .dina(i),
+ .clkb(rclk),
+ .enb(1'b1),
+ .addrb(ra3),
+ .doutb(o03)
+);
+
+FT64_regfileRam_sim urf14 (
+ .clka(clk4x),
+ .ena(wr),
+ .wea(we),
+ .addra(wa),
+ .dina(i),
+ .clkb(rclk),
+ .enb(1'b1),
+ .addrb(ra4),
+ .doutb(o04)
+);
+
+FT64_regfileRam_sim urf15 (
+ .clka(clk4x),
+ .ena(wr),
+ .wea(we),
+ .addra(wa),
+ .dina(i),
+ .clkb(rclk),
+ .enb(1'b1),
+ .addrb(ra5),
+ .doutb(o05)
+);
+`else
+FT64_regfileRam urf10 (
+ .clka(clk4x),
+ .ena(wr),
+ .wea(we),
+ .addra(wa),
+ .dina(i),
+ .clkb(rclk),
+ .enb(1'b1),
+ .web(1'b0),
+ .addrb(ra0),
+ .dinb(8'h00),
+ .doutb(o00)
+);
+
+FT64_regfileRam urf11 (
+ .clka(clk4x),
+ .ena(wr),
+ .wea(we),
+ .addra(wa),
+ .dina(i),
+ .clkb(rclk),
+ .enb(1'b1),
+ .web(1'b0),
+ .addrb(ra1),
+ .dinb(8'h00),
+ .doutb(o01)
+);
+
+FT64_regfileRam urf12 (
+ .clka(clk4x),
+ .ena(wr),
+ .wea(we),
+ .addra(wa),
+ .dina(i),
+ .clkb(rclk),
+ .enb(1'b1),
+ .web(1'b0),
+ .addrb(ra2),
+ .dinb(8'h00),
+ .doutb(o02)
+);
+
+FT64_regfileRam urf13 (
+ .clka(clk4x),
+ .ena(wr),
+ .wea(we),
+ .addra(wa),
+ .dina(i),
+ .clkb(rclk),
+ .enb(1'b1),
+ .web(1'b0),
+ .addrb(ra3),
+ .dinb(8'h00),
+ .doutb(o03)
+);
+
+FT64_regfileRam urf14 (
+ .clka(clk4x),
+ .ena(wr),
+ .wea(we),
+ .addra(wa),
+ .dina(i),
+ .clkb(rclk),
+ .enb(1'b1),
+ .web(1'b0),
+ .addrb(ra4),
+ .dinb(8'h00),
+ .doutb(o04)
+);
+
+FT64_regfileRam urf15 (
+ .clka(clk4x),
+ .ena(wr),
+ .wea(we),
+ .addra(wa),
+ .dina(i),
+ .clkb(rclk),
+ .enb(1'b1),
+ .web(1'b0),
+ .addrb(ra5),
+ .dinb(8'h00),
+ .doutb(o05)
+);
+`endif
+
+// Record what was written in the previous clock cycle so that read
+// forwarding logic may use it.
+always @(posedge clk)
+ holdwr0 <= wr0;
+always @(posedge clk)
+ holdwr1 <= wr1;
+always @(posedge clk)
+ holdwa0 <= wa0;
+always @(posedge clk)
+ holdwa1 <= wa1;
+always @(posedge clk)
+ holdi0 <= i0;
+always @(posedge clk)
+ holdi1 <= i1;
+
+// The same clock edge that would normally update the register file is the
+// clock edge that causes the data to disappear for the next cycle. The
+// data needs to be held onto so that it can update the register file on
+// the next 4x clock.
+always @(posedge clk)
+begin
+ wr1x <= wr1;
+ we1x <= we1;
+ wa1x <= wa1;
+ i1x <= i1;
+end
+
+reg wclk2;
+always @(posedge clk4x)
+begin
+ wclk2 <= clk;
+ if (clk & ~wclk2) begin
+ wr <= wr0;
+ we <= 8'hFF;
+ wa <= wa0;
+ i <= i0;
+ end
+ else if (clk & wclk2) begin
+ wr <= wr1x;
+ we <= 8'hFF;
+ wa <= wa1x;
+ i <= i1x;
+ end
+ else begin
+ wr <= 1'b0;
+ we <= 8'hFF;
+ wa <= 'd0;
+ i <= 'd0;
+ end
+end
+
+
+function [63:0] fwdmux;
+input [RBIT:0] ra;
+input wr0;
+input wr1;
+input hwr0;
+input hwr1;
+input [RBIT:0] wa0;
+input [RBIT:0] wa1;
+input [RBIT:0] hwa0;
+input [RBIT:0] hwa1;
+input [63:0] i0;
+input [63:0] i1;
+input [63:0] hi0;
+input [63:0] hi1;
+input [63:0] oo;
+begin
+ if (ra[4:0]==5'd0)
+ fwdmux = 64'd0;
+ else if (wr1 && ra==wa1)
+ fwdmux = i1;
+ else if (wr0 && ra==wa0)
+ fwdmux = i0;
+ else if (hwr1 && ra==hwa1)
+ fwdmux = hi1;
+ else if (hwr0 && ra==hwa0)
+ fwdmux = hi0;
+ else
+ fwdmux = oo;
+end
+endfunction
+
+assign o0 = fwdmux(ra0,wr0,wr1,holdwr0,holdwr1,wa0,wa1,holdwa0,holdwa1,i0,i1,holdi0,holdi1,o00);
+assign o1 = fwdmux(ra1,wr0,wr1,holdwr0,holdwr1,wa0,wa1,holdwa0,holdwa1,i0,i1,holdi0,holdi1,o01);
+assign o2 = fwdmux(ra2,wr0,wr1,holdwr0,holdwr1,wa0,wa1,holdwa0,holdwa1,i0,i1,holdi0,holdi1,o02);
+assign o3 = fwdmux(ra3,wr0,wr1,holdwr0,holdwr1,wa0,wa1,holdwa0,holdwa1,i0,i1,holdi0,holdi1,o03);
+assign o4 = fwdmux(ra4,wr0,wr1,holdwr0,holdwr1,wa0,wa1,holdwa0,holdwa1,i0,i1,holdi0,holdi1,o04);
+assign o5 = fwdmux(ra5,wr0,wr1,holdwr0,holdwr1,wa0,wa1,holdwa0,holdwa1,i0,i1,holdi0,holdi1,o05);
+
+/*
+assign o0[7:0] = ra0[4:0]==5'd0 ? {8{1'b0}} :
+ (wr1 && we1[0] && (ra0==wa1)) ? i1[7:0] :
+ (wr0 && we0[0] && (ra0==wa0)) ? i0[7:0] : o00[7:0];
+assign o0[15:8] = ra0[4:0]==5'd0 ? {8{1'b0}} :
+ (wr1 && we1[1] && (ra0==wa1)) ? i1[15:8] :
+ (wr0 && we0[1] && (ra0==wa0)) ? i0[15:8] : o00[15:8];
+assign o0[23:16] = ra0[4:0]==5'd0 ? {8{1'b0}} :
+ (wr1 && we1[2] && (ra0==wa1)) ? i1[23:16] :
+ (wr0 && we0[2] && (ra0==wa0)) ? i0[23:16] : o00[23:16];
+assign o0[31:24] = ra0[4:0]==5'd0 ? {8{1'b0}} :
+ (wr1 && we1[3] && (ra0==wa1)) ? i1[31:24] :
+ (wr0 && we0[3] && (ra0==wa0)) ? i0[31:24] : o00[31:24];
+assign o0[39:32] = ra0[4:0]==5'd0 ? {8{1'b0}} :
+ (wr1 && we1[4] && (ra0==wa1)) ? i1[39:32] :
+ (wr0 && we0[4] && (ra0==wa0)) ? i0[39:32] : o00[39:32];
+assign o0[47:40] = ra0[4:0]==5'd0 ? {8{1'b0}} :
+ (wr1 && we1[5] && (ra0==wa1)) ? i1[47:40] :
+ (wr0 && we0[5] && (ra0==wa0)) ? i0[47:40] : o00[47:40];
+assign o0[55:48] = ra0[4:0]==5'd0 ? {8{1'b0}} :
+ (wr1 && we1[6] && (ra0==wa1)) ? i1[55:48] :
+ (wr0 && we0[6] && (ra0==wa0)) ? i0[55:48] : o00[55:48];
+assign o0[63:56] = ra0[4:0]==5'd0 ? {8{1'b0}} :
+ (wr1 && we1[7] && (ra0==wa1)) ? i1[63:56] :
+ (wr0 && we0[7] && (ra0==wa0)) ? i0[63:56] : o00[63:56];
+
+assign o1[7:0] = ra1[4:0]==5'd0 ? {8{1'b0}} :
+ (wr1 && we1[0] && (ra1==wa1)) ? i1[7:0] :
+ (wr0 && we0[0] && (ra1==wa0)) ? i0[7:0] : o01[7:0];
+assign o1[15:8] = ra1[4:0]==5'd0 ? {8{1'b0}} :
+ (wr1 && we1[1] && (ra1==wa1)) ? i1[15:8] :
+ (wr0 && we0[1] && (ra1==wa0)) ? i0[15:8] : o01[15:8];
+assign o1[23:16] = ra1[4:0]==5'd0 ? {8{1'b0}} :
+ (wr1 && we1[2] && (ra1==wa1)) ? i1[23:16] :
+ (wr0 && we0[2] && (ra1==wa0)) ? i0[23:16] : o01[23:16];
+assign o1[31:24] = ra1[4:0]==5'd0 ? {8{1'b0}} :
+ (wr1 && we1[3] && (ra1==wa1)) ? i1[31:24] :
+ (wr0 && we0[3] && (ra1==wa0)) ? i0[31:24] : o01[31:24];
+assign o1[39:32] = ra1[4:0]==5'd0 ? {8{1'b0}} :
+ (wr1 && we1[4] && (ra1==wa1)) ? i1[39:32] :
+ (wr0 && we0[4] && (ra1==wa0)) ? i0[39:32] : o01[39:32];
+assign o1[47:40] = ra1[4:0]==5'd0 ? {8{1'b0}} :
+ (wr1 && we1[5] && (ra1==wa1)) ? i1[47:40] :
+ (wr0 && we0[5] && (ra1==wa0)) ? i0[47:40] : o01[47:40];
+assign o1[55:48] = ra1[4:0]==5'd0 ? {8{1'b0}} :
+ (wr1 && we1[6] && (ra1==wa1)) ? i1[55:48] :
+ (wr0 && we0[6] && (ra1==wa0)) ? i0[55:48] : o01[55:48];
+assign o1[63:56] = ra1[4:0]==5'd0 ? {8{1'b0}} :
+ (wr1 && we1[7] && (ra1==wa1)) ? i1[63:56] :
+ (wr0 && we0[7] && (ra1==wa0)) ? i0[63:56] : o01[63:56];
+
+assign o2[7:0] = ra2[4:0]==5'd0 ? {8{1'b0}} :
+ (wr1 && we1[0] && (ra2==wa1)) ? i1[7:0] :
+ (wr0 && we0[0] && (ra2==wa0)) ? i0[7:0] : o02[7:0];
+assign o2[15:8] = ra2[4:0]==5'd0 ? {8{1'b0}} :
+ (wr1 && we1[1] && (ra2==wa1)) ? i1[15:8] :
+ (wr0 && we0[1] && (ra2==wa0)) ? i0[15:8] : o02[15:8];
+assign o2[23:16] = ra2[4:0]==5'd0 ? {8{1'b0}} :
+ (wr1 && we1[2] && (ra2==wa1)) ? i1[23:16] :
+ (wr0 && we0[2] && (ra2==wa0)) ? i0[23:16] : o02[23:16];
+assign o2[31:24] = ra2[4:0]==5'd0 ? {8{1'b0}} :
+ (wr1 && we1[3] && (ra2==wa1)) ? i1[31:24] :
+ (wr0 && we0[3] && (ra2==wa0)) ? i0[31:24] : o02[31:24];
+assign o2[39:32] = ra2[4:0]==5'd0 ? {8{1'b0}} :
+ (wr1 && we1[4] && (ra2==wa1)) ? i1[39:32] :
+ (wr0 && we0[4] && (ra2==wa0)) ? i0[39:32] : o02[39:32];
+assign o2[47:40] = ra2[4:0]==5'd0 ? {8{1'b0}} :
+ (wr1 && we1[5] && (ra2==wa1)) ? i1[47:40] :
+ (wr0 && we0[5] && (ra2==wa0)) ? i0[47:40] : o02[47:40];
+assign o2[55:48] = ra2[4:0]==5'd0 ? {8{1'b0}} :
+ (wr1 && we1[6] && (ra2==wa1)) ? i1[55:48] :
+ (wr0 && we0[6] && (ra2==wa0)) ? i0[55:48] : o02[55:48];
+assign o2[63:56] = ra2[4:0]==5'd0 ? {8{1'b0}} :
+ (wr1 && we1[7] && (ra2==wa1)) ? i1[63:56] :
+ (wr0 && we0[7] && (ra2==wa0)) ? i0[63:56] : o02[63:56];
+
+assign o3[7:0] = ra3[4:0]==5'd0 ? {8{1'b0}} :
+ (wr1 && we1[0] && (ra3==wa1)) ? i1[7:0] :
+ (wr0 && we0[0] && (ra3==wa0)) ? i0[7:0] : o03[7:0];
+assign o3[15:8] = ra3[4:0]==5'd0 ? {8{1'b0}} :
+ (wr1 && we1[1] && (ra3==wa1)) ? i1[15:8] :
+ (wr0 && we0[1] && (ra3==wa0)) ? i0[15:8] : o03[15:8];
+assign o3[23:16] = ra3[4:0]==5'd0 ? {8{1'b0}} :
+ (wr1 && we1[2] && (ra3==wa1)) ? i1[23:16] :
+ (wr0 && we0[2] && (ra3==wa0)) ? i0[23:16] : o03[23:16];
+assign o3[31:24] = ra3[4:0]==5'd0 ? {8{1'b0}} :
+ (wr1 && we1[3] && (ra3==wa1)) ? i1[31:24] :
+ (wr0 && we0[3] && (ra3==wa0)) ? i0[31:24] : o03[31:24];
+assign o3[39:32] = ra3[4:0]==5'd0 ? {8{1'b0}} :
+ (wr1 && we1[4] && (ra3==wa1)) ? i1[39:32] :
+ (wr0 && we0[4] && (ra3==wa0)) ? i0[39:32] : o03[39:32];
+assign o3[47:40] = ra3[4:0]==5'd0 ? {8{1'b0}} :
+ (wr1 && we1[5] && (ra3==wa1)) ? i1[47:40] :
+ (wr0 && we0[5] && (ra3==wa0)) ? i0[47:40] : o03[47:40];
+assign o3[55:48] = ra3[4:0]==5'd0 ? {8{1'b0}} :
+ (wr1 && we1[6] && (ra3==wa1)) ? i1[55:48] :
+ (wr0 && we0[6] && (ra3==wa0)) ? i0[55:48] : o03[55:48];
+assign o3[63:56] = ra3[4:0]==5'd0 ? {8{1'b0}} :
+ (wr1 && we1[7] && (ra3==wa1)) ? i1[63:56] :
+ (wr0 && we0[7] && (ra3==wa0)) ? i0[63:56] : o03[63:56];
+
+assign o4[7:0] = ra4[4:0]==5'd0 ? {8{1'b0}} :
+ (wr1 && we1[0] && (ra4==wa1)) ? i1[7:0] :
+ (wr0 && we0[0] && (ra4==wa0)) ? i0[7:0] : o04[7:0];
+assign o4[15:8] = ra4[4:0]==5'd0 ? {8{1'b0}} :
+ (wr1 && we1[1] && (ra4==wa1)) ? i1[15:8] :
+ (wr0 && we0[1] && (ra4==wa0)) ? i0[15:8] : o04[15:8];
+assign o4[23:16] = ra4[4:0]==5'd0 ? {8{1'b0}} :
+ (wr1 && we1[2] && (ra4==wa1)) ? i1[23:16] :
+ (wr0 && we0[2] && (ra4==wa0)) ? i0[23:16] : o04[23:16];
+assign o4[31:24] = ra4[4:0]==5'd0 ? {8{1'b0}} :
+ (wr1 && we1[3] && (ra4==wa1)) ? i1[31:24] :
+ (wr0 && we0[3] && (ra4==wa0)) ? i0[31:24] : o04[31:24];
+assign o4[39:32] = ra4[4:0]==5'd0 ? {8{1'b0}} :
+ (wr1 && we1[4] && (ra4==wa1)) ? i1[39:32] :
+ (wr0 && we0[4] && (ra4==wa0)) ? i0[39:32] : o04[39:32];
+assign o4[47:40] = ra4[4:0]==5'd0 ? {8{1'b0}} :
+ (wr1 && we1[5] && (ra4==wa1)) ? i1[47:40] :
+ (wr0 && we0[5] && (ra4==wa0)) ? i0[47:40] : o04[47:40];
+assign o4[55:48] = ra4[4:0]==5'd0 ? {8{1'b0}} :
+ (wr1 && we1[6] && (ra4==wa1)) ? i1[55:48] :
+ (wr0 && we0[6] && (ra4==wa0)) ? i0[55:48] : o04[55:48];
+assign o4[63:56] = ra4[4:0]==5'd0 ? {8{1'b0}} :
+ (wr1 && we1[7] && (ra4==wa1)) ? i1[63:56] :
+ (wr0 && we0[7] && (ra4==wa0)) ? i0[63:56] : o04[63:56];
+
+assign o5[7:0] = ra5[4:0]==5'd0 ? {8{1'b0}} :
+ (wr1 && we1[0] && (ra5==wa1)) ? i1[7:0] :
+ (wr0 && we0[0] && (ra5==wa0)) ? i0[7:0] : o05[7:0];
+assign o5[15:8] = ra5[4:0]==5'd0 ? {8{1'b0}} :
+ (wr1 && we1[1] && (ra5==wa1)) ? i1[15:8] :
+ (wr0 && we0[1] && (ra5==wa0)) ? i0[15:8] : o05[15:8];
+assign o5[23:16] = ra5[4:0]==5'd0 ? {8{1'b0}} :
+ (wr1 && we1[2] && (ra5==wa1)) ? i1[23:16] :
+ (wr0 && we0[2] && (ra5==wa0)) ? i0[23:16] : o05[23:16];
+assign o5[31:24] = ra5[4:0]==5'd0 ? {8{1'b0}} :
+ (wr1 && we1[3] && (ra5==wa1)) ? i1[31:24] :
+ (wr0 && we0[3] && (ra5==wa0)) ? i0[31:24] : o05[31:24];
+assign o5[39:32] = ra5[4:0]==5'd0 ? {8{1'b0}} :
+ (wr1 && we1[4] && (ra5==wa1)) ? i1[39:32] :
+ (wr0 && we0[4] && (ra5==wa0)) ? i0[39:32] : o05[39:32];
+assign o5[47:40] = ra5[4:0]==5'd0 ? {8{1'b0}} :
+ (wr1 && we1[5] && (ra5==wa1)) ? i1[47:40] :
+ (wr0 && we0[5] && (ra5==wa0)) ? i0[47:40] : o05[47:40];
+assign o5[55:48] = ra5[4:0]==5'd0 ? {8{1'b0}} :
+ (wr1 && we1[6] && (ra5==wa1)) ? i1[55:48] :
+ (wr0 && we0[6] && (ra5==wa0)) ? i0[55:48] : o05[55:48];
+assign o5[63:56] = ra5[4:0]==5'd0 ? {8{1'b0}} :
+ (wr1 && we1[7] && (ra5==wa1)) ? i1[63:56] :
+ (wr0 && we0[7] && (ra5==wa0)) ? i0[63:56] : o05[63:56];
+*/
+/*
+assign o5 = ra5[4:0]==5'd0 ? {WID{1'b0}} :
+ (wr1 && (ra5==wa1)) ? i1 :
+ (wr0 && (ra5==wa0)) ? i0 : o05;
+
+*/
+endmodule
+
Index: FT64v7/software/AS64/bin/AS64.exe
===================================================================
Cannot display: file marked as a binary type.
svn:mime-type = application/octet-stream
Index: FT64v7/software/AS64/bin/AS64.exe
===================================================================
--- FT64v7/software/AS64/bin/AS64.exe (nonexistent)
+++ FT64v7/software/AS64/bin/AS64.exe (revision 60)
FT64v7/software/AS64/bin/AS64.exe
Property changes :
Added: svn:mime-type
## -0,0 +1 ##
+application/octet-stream
\ No newline at end of property
Index: FT64v7/software/CC64/bin/CC64.exe
===================================================================
Cannot display: file marked as a binary type.
svn:mime-type = application/octet-stream
Index: FT64v7/software/CC64/bin/CC64.exe
===================================================================
--- FT64v7/software/CC64/bin/CC64.exe (nonexistent)
+++ FT64v7/software/CC64/bin/CC64.exe (revision 60)
FT64v7/software/CC64/bin/CC64.exe
Property changes :
Added: svn:mime-type
## -0,0 +1 ##
+application/octet-stream
\ No newline at end of property