OpenCores
URL https://opencores.org/ocsvn/thor/thor/trunk

Subversion Repositories thor

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /
    from Rev 59 to Rev 60
    Reverse comparison

Rev 59 → Rev 60

/thor/trunk/FT64v7/doc/FT64v7.docx Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream
thor/trunk/FT64v7/doc/FT64v7.docx Property changes : Added: svn:mime-type ## -0,0 +1 ## +application/octet-stream \ No newline at end of property Index: thor/trunk/FT64v7/rtl/common/FT64_AMO_alu.v =================================================================== --- thor/trunk/FT64v7/rtl/common/FT64_AMO_alu.v (nonexistent) +++ thor/trunk/FT64v7/rtl/common/FT64_AMO_alu.v (revision 60) @@ -0,0 +1,244 @@ +// ============================================================================ +// __ +// \\__/ o\ (C) 2018 Robert Finch, Waterloo +// \ __ / All rights reserved. +// \/_// robfinch@finitron.ca +// || +// +// FT64_AMO_alu.v +// +// This source file is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This source file is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// +// +// ALU for atomic memory operations (AMO) +// AMO ops have their own limited ALU since they can't wait on the usual +// ALU. +// ============================================================================ +// +`include "FT64_defines.vh" + +module FT64_AMO_alu(instr, a, b, res); +input [31:0] instr; +input [63:0] a; +input [63:0] b; +output reg [63:0] res; + +wire [4:0] op = instr[30:26]; + +always @* +case(instr[5:0]) +`AMO: + case(op) + `AMO_SWAP: res <= b; + `AMO_ADD: case(instr[23:21]) + 3'd0,3'd4: + begin + res[7:0] <= a[7:0] + b[7:0]; + res[15:8] <= a[15:8] + b[15:8]; + res[23:16] <= a[23:16] + b[23:16]; + res[31:24] <= a[31:24] + b[31:24]; + res[39:32] <= a[39:32] + b[39:32]; + res[47:40] <= a[47:40] + b[47:40]; + res[55:48] <= a[55:48] + b[55:48]; + res[63:56] <= a[63:56] + b[63:56]; + end + 3'd1,3'd5: + begin + res[15:0] <= a[15:0] + b[15:0]; + res[31:16] <= a[31:16] + b[31:16]; + res[47:32] <= a[47:32] + b[47:32]; + res[63:48] <= a[63:48] + b[63:48]; + end + 3'd2,3'd6: + begin + res[31:0] <= a[31:0] + b[31:0]; + res[63:32] <= a[63:32] + b[63:32]; + end + 3'd3,3'd7: res <= a + b; + endcase + `AMO_AND: res <= a & b; + `AMO_OR: res <= a | b; + `AMO_XOR: res <= a ^ b; + + `AMO_SHL: + case(instr[23:21]) + 3'd0,3'd4: + begin + res[7:0] <= a[7:0] << b[2:0]; + res[15:8] <= a[15:8] << b[2:0]; + res[23:16] <= a[23:16] << b[2:0]; + res[31:24] <= a[31:24] << b[2:0]; + res[39:32] <= a[39:32] << b[2:0]; + res[47:40] <= a[47:40] << b[2:0]; + res[55:48] <= a[55:48] << b[2:0]; + res[63:56] <= a[63:56] << b[2:0]; + end + 3'd1,3'd5: + begin + res[15:0] <= a[15:0] << b[3:0]; + res[31:16] <= a[31:16] << b[3:0]; + res[47:32] <= a[47:32] << b[3:0]; + res[63:48] <= a[63:48] << b[3:0]; + end + 3'd2,3'd6: + begin + res[31:0] <= a[31:0] << b[4:0]; + res[63:32] <= a[63:32] << b[4:0]; + end + 3'd3,3'd7: res <= a << b[5:0]; + endcase + + `AMO_SHR: + case(instr[23:21]) + 3'd0,3'd4: + begin + res[7:0] <= a[7:0] >> b[2:0]; + res[15:8] <= a[15:8] >> b[2:0]; + res[23:16] <= a[23:16] >> b[2:0]; + res[31:24] <= a[31:24] >> b[2:0]; + res[39:32] <= a[39:32] >> b[2:0]; + res[47:40] <= a[47:40] >> b[2:0]; + res[55:48] <= a[55:48] >> b[2:0]; + res[63:56] <= a[63:56] >> b[2:0]; + end + 3'd1,3'd5: + begin + res[15:0] <= a[15:0] >> b[3:0]; + res[31:16] <= a[31:16] >> b[3:0]; + res[47:32] <= a[47:32] >> b[3:0]; + res[63:48] <= a[63:48] >> b[3:0]; + end + 3'd2,3'd6: + begin + res[31:0] <= a[31:0] >> b[4:0]; + res[63:32] <= a[63:32] >> b[4:0]; + end + 3'd3,3'd7: res <= a >> b[5:0]; + endcase + + `AMO_MIN: + case(instr[23:21]) + 3'd0,3'd4: + begin + res[7:0] <= $signed(a[7:0]) < $signed(b[7:0]) ? a[7:0] : b[7:0]; + res[15:8] <= $signed(a[15:8]) < $signed(b[15:8]) ? a[15:8] : b[15:8]; + res[23:16] <= $signed(a[23:16]) < $signed(b[23:16]) ? a[23:16] : b[23:16]; + res[31:24] <= $signed(a[31:24]) < $signed(b[31:24]) ? a[31:24] : b[31:24]; + res[39:32] <= $signed(a[39:32]) < $signed(b[39:32]) ? a[39:32] : b[39:32]; + res[47:40] <= $signed(a[47:40]) < $signed(b[47:40]) ? a[47:40] : b[47:40]; + res[55:48] <= $signed(a[55:48]) < $signed(b[55:48]) ? a[55:48] : b[55:48]; + res[63:56] <= $signed(a[63:56]) < $signed(b[63:56]) ? a[63:56] : b[63:56]; + end + 3'd1,3'd5: + begin + res[15:0] <= $signed(a[15:0]) < $signed(b[15:0]) ? a[15:0] : b[15:0]; + res[31:16] <= $signed(a[31:16]) < $signed(b[31:16]) ? a[31:16] : b[31:16]; + res[47:32] <= $signed(a[47:32]) < $signed(b[47:32]) ? a[47:32] : b[47:32]; + res[63:48] <= $signed(a[63:48]) < $signed(b[63:48]) ? a[63:48] : b[63:48]; + end + 3'd2,3'd6: + begin + res[31:0] <= $signed(a[31:0]) < $signed(b[31:0]) ? a[31:0] : b[31:0]; + res[63:32] <= $signed(a[63:32]) < $signed(b[63:32]) ? a[63:32] : b[63:32]; + end + 3'd3,3'd7: res <= $signed(a) < $signed(b) ? a : b; + endcase + `AMO_MAX: + case(instr[23:21]) + 3'd0,3'd4: + begin + res[7:0] <= $signed(a[7:0]) > $signed(b[7:0]) ? a[7:0] : b[7:0]; + res[15:8] <= $signed(a[15:8]) > $signed(b[15:8]) ? a[15:8] : b[15:8]; + res[23:16] <= $signed(a[23:16]) > $signed(b[23:16]) ? a[23:16] : b[23:16]; + res[31:24] <= $signed(a[31:24]) > $signed(b[31:24]) ? a[31:24] : b[31:24]; + res[39:32] <= $signed(a[39:32]) > $signed(b[39:32]) ? a[39:32] : b[39:32]; + res[47:40] <= $signed(a[47:40]) > $signed(b[47:40]) ? a[47:40] : b[47:40]; + res[55:48] <= $signed(a[55:48]) > $signed(b[55:48]) ? a[55:48] : b[55:48]; + res[63:56] <= $signed(a[63:56]) > $signed(b[63:56]) ? a[63:56] : b[63:56]; + end + 3'd1,3'd5: + begin + res[15:0] <= $signed(a[15:0]) > $signed(b[15:0]) ? a[15:0] : b[15:0]; + res[31:16] <= $signed(a[31:16]) > $signed(b[31:16]) ? a[31:16] : b[31:16]; + res[47:32] <= $signed(a[47:32]) > $signed(b[47:32]) ? a[47:32] : b[47:32]; + res[63:48] <= $signed(a[63:48]) > $signed(b[63:48]) ? a[63:48] : b[63:48]; + end + 3'd2,3'd6: + begin + res[31:0] <= $signed(a[31:0]) > $signed(b[31:0]) ? a[31:0] : b[31:0]; + res[63:32] <= $signed(a[63:32]) > $signed(b[63:32]) ? a[63:32] : b[63:32]; + end + 3'd3,3'd7: res <= $signed(a) > $signed(b) ? a : b; + endcase + `AMO_MINU: + case(instr[23:21]) + 3'd0,3'd4: + begin + res[7:0] <= $unsigned(a[7:0]) < $unsigned(b[7:0]) ? a[7:0] : b[7:0]; + res[15:8] <= $unsigned(a[15:8]) < $unsigned(b[15:8]) ? a[15:8] : b[15:8]; + res[23:16] <= $unsigned(a[23:16]) < $unsigned(b[23:16]) ? a[23:16] : b[23:16]; + res[31:24] <= $unsigned(a[31:24]) < $unsigned(b[31:24]) ? a[31:24] : b[31:24]; + res[39:32] <= $unsigned(a[39:32]) < $unsigned(b[39:32]) ? a[39:32] : b[39:32]; + res[47:40] <= $unsigned(a[47:40]) < $unsigned(b[47:40]) ? a[47:40] : b[47:40]; + res[55:48] <= $unsigned(a[55:48]) < $unsigned(b[55:48]) ? a[55:48] : b[55:48]; + res[63:56] <= $unsigned(a[63:56]) < $unsigned(b[63:56]) ? a[63:56] : b[63:56]; + end + 3'd1,3'd5: + begin + res[15:0] <= $unsigned(a[15:0]) < $unsigned(b[15:0]) ? a[15:0] : b[15:0]; + res[31:16] <= $unsigned(a[31:16]) < $unsigned(b[31:16]) ? a[31:16] : b[31:16]; + res[47:32] <= $unsigned(a[47:32]) < $unsigned(b[47:32]) ? a[47:32] : b[47:32]; + res[63:48] <= $unsigned(a[63:48]) < $unsigned(b[63:48]) ? a[63:48] : b[63:48]; + end + 3'd2,3'd6: + begin + res[31:0] <= $unsigned(a[31:0]) < $unsigned(b[31:0]) ? a[31:0] : b[31:0]; + res[63:32] <= $unsigned(a[63:32]) < $unsigned(b[63:32]) ? a[63:32] : b[63:32]; + end + 3'd3,3'd7: res <= $unsigned(a) < $unsigned(b) ? a : b; + endcase + `AMO_MAXU: + case(instr[23:21]) + 3'd0,3'd4: + begin + res[7:0] <= $unsigned(a[7:0]) > $unsigned(b[7:0]) ? a[7:0] : b[7:0]; + res[15:8] <= $unsigned(a[15:8]) > $unsigned(b[15:8]) ? a[15:8] : b[15:8]; + res[23:16] <= $unsigned(a[23:16]) > $unsigned(b[23:16]) ? a[23:16] : b[23:16]; + res[31:24] <= $unsigned(a[31:24]) > $unsigned(b[31:24]) ? a[31:24] : b[31:24]; + res[39:32] <= $unsigned(a[39:32]) > $unsigned(b[39:32]) ? a[39:32] : b[39:32]; + res[47:40] <= $unsigned(a[47:40]) > $unsigned(b[47:40]) ? a[47:40] : b[47:40]; + res[55:48] <= $unsigned(a[55:48]) > $unsigned(b[55:48]) ? a[55:48] : b[55:48]; + res[63:56] <= $unsigned(a[63:56]) > $unsigned(b[63:56]) ? a[63:56] : b[63:56]; + end + 3'd1,3'd5: + begin + res[15:0] <= $unsigned(a[15:0]) > $unsigned(b[15:0]) ? a[15:0] : b[15:0]; + res[31:16] <= $unsigned(a[31:16]) > $unsigned(b[31:16]) ? a[31:16] : b[31:16]; + res[47:32] <= $unsigned(a[47:32]) > $unsigned(b[47:32]) ? a[47:32] : b[47:32]; + res[63:48] <= $unsigned(a[63:48]) > $unsigned(b[63:48]) ? a[63:48] : b[63:48]; + end + 3'd2,3'd6: + begin + res[31:0] <= $unsigned(a[31:0]) > $unsigned(b[31:0]) ? a[31:0] : b[31:0]; + res[63:32] <= $unsigned(a[63:32]) > $unsigned(b[63:32]) ? a[63:32] : b[63:32]; + end + 3'd3,3'd7: res <= $unsigned(a) > $unsigned(b) ? a : b; + endcase + default: res <= 64'hDEADDEADDEADDEAD; + endcase +`INC: res <= a + b; +default: res <= 64'hDEADDEADDEADDEAD; +endcase + +endmodule Index: thor/trunk/FT64v7/rtl/common/FT64_BMM.v =================================================================== --- thor/trunk/FT64v7/rtl/common/FT64_BMM.v (nonexistent) +++ thor/trunk/FT64v7/rtl/common/FT64_BMM.v (revision 60) @@ -0,0 +1,85 @@ +// ============================================================================ +// __ +// \\__/ o\ (C) 2017 Robert Finch, Waterloo +// \ __ / All rights reserved. +// \/_// robfinch@finitron.ca +// || +// +// FT64_BMM.v +// +// This source file is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This source file is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// +// ============================================================================ +// +module FT64_BMM(op,a,b,o); +parameter DBW=64; +parameter N=7; +input op; // 0 = MOR, 1 = MXOR +input [DBW-1:0] a; +input [DBW-1:0] b; +output reg [DBW-1:0] o; + +integer n,i,j; +reg omor[0:N][0:N]; +reg omxor[0:N][0:N]; +reg am[0:N][0:N]; +reg bm[0:N][0:N]; + +always @* +for (i = 0; i <= N; i = i + 1) begin + for (j = 0; j <= N; j = j + 1) begin + am[i][j] = a[(N-i)*(N+1)+(N-j)]; + bm[i][j] = b[(N-i)*(N+1)+(N-j)]; + end +end + +always @* +for (i = 0; i <= N; i = i + 1) begin + for (j = 0; j <= N; j = j + 1) begin + omor[i][j] = + (am[i][0]&bm[0][j]) + |(am[i][1]&bm[1][j]) + |(am[i][2]&bm[2][j]) + |(am[i][3]&bm[3][j]) + |(am[i][4]&bm[4][j]) + |(am[i][5]&bm[5][j]) + |(am[i][6]&bm[6][j]) + |(am[i][7]&bm[7][j]); + omxor[i][j] = + (am[i][0]&bm[0][j]) + ^(am[i][1]&bm[1][j]) + ^(am[i][2]&bm[2][j]) + ^(am[i][3]&bm[3][j]) + ^(am[i][4]&bm[4][j]) + ^(am[i][5]&bm[5][j]) + ^(am[i][6]&bm[6][j]) + ^(am[i][7]&bm[7][j]); + end +end + +always @* +case (op) +1'b0: begin + for (i = 0; i <= N; i = i + 1) + for (j = 0; j <= N; j = j + 1) + o[(N-i)*(N+1)+(N-j)] = omor[i][j]; + end +1'b1: begin + for (i = 0; i <= N; i = i + 1) + for (j = 0; j <= N; j = j + 1) + o[(N-i)*(N+1)+(N-j)] = omxor[i][j]; + end +endcase + +endmodule Index: thor/trunk/FT64v7/rtl/common/FT64_EvalBranch.v =================================================================== --- thor/trunk/FT64v7/rtl/common/FT64_EvalBranch.v (nonexistent) +++ thor/trunk/FT64v7/rtl/common/FT64_EvalBranch.v (revision 60) @@ -0,0 +1,79 @@ +// ============================================================================ +// __ +// \\__/ o\ (C) 2017-2018 Robert Finch, Waterloo +// \ __ / All rights reserved. +// \/_// robfinch@finitron.ca +// || +// +// FT64_EvalBranch.v +// - FT64 branch evaluation +// +// This source file is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This source file is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// +// ============================================================================ +// +`define TRUE 1'b1 +`define BBc 6'h26 +`define Bcc 6'h30 +`define BEQI 6'h32 +`define BCHK 6'h33 +`define CHK 6'h34 + +`define BEQ 3'h0 +`define BNE 3'h1 +`define BLT 3'h2 +`define BGE 3'h3 +`define BLTU 3'h6 +`define BGEU 3'h7 + +`define IBNE 2'd2 +`define DBNZ 2'd3 + +module FT64_EvalBranch(instr, a, b, c, takb); +parameter WID=64; +input [47:0] instr; +input [WID-1:0] a; +input [WID-1:0] b; +input [WID-1:0] c; +output reg takb; + +wire [5:0] opcode = instr[5:0]; + +//Evaluate branch condition +always @* +case(opcode) +`Bcc: + case(instr[15:13]) + `BEQ: takb <= a==b; + `BNE: takb <= a!=b; + `BLT: takb <= $signed(a) < $signed(b); + `BGE: takb <= $signed(a) >= $signed(b); + `BLTU: takb <= a < b; + `BGEU: takb <= a >= b; + default: takb <= `TRUE; + endcase +`BEQI: takb <= a=={{56{instr[22]}},instr[22:18],instr[15:13]}; +`BBc: + case(instr[14:13]) + 2'd0: takb <= a[{instr[22:18],instr[15]}]; // BBS + 2'd1: takb <= ~a[{instr[22:18],instr[15]}]; // BBC + `IBNE: takb <= (a + 64'd1) !=b; + `DBNZ: takb <= (a - 64'd1) !=b; + default: takb <= `TRUE; + endcase +`CHK,`BCHK: takb <= a >= b && a < c; +default: takb <= `TRUE; +endcase + +endmodule Index: thor/trunk/FT64v7/rtl/common/FT64_FCU_Calc.v =================================================================== --- thor/trunk/FT64v7/rtl/common/FT64_FCU_Calc.v (nonexistent) +++ thor/trunk/FT64v7/rtl/common/FT64_FCU_Calc.v (revision 60) @@ -0,0 +1,66 @@ +// ============================================================================ +// __ +// \\__/ o\ (C) 2017-2018 Robert Finch, Waterloo +// \ __ / All rights reserved. +// \/_// robfinch@finitron.ca +// || +// +// FT64_FCU_Calc.v +// - FT64 flow control calcs +// +// This source file is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This source file is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// +// ============================================================================ +// +`include ".\FT64_defines.vh" + +module FT64_FCU_Calc(ol, instr, tvec, a, pc, nextpc, im, waitctr, bus); +parameter WID = 64; +parameter AMSB = 31; +input [1:0] ol; +input [47:0] instr; +input [WID-1:0] tvec; +input [WID-1:0] a; +input [AMSB:0] pc; +input [AMSB:0] nextpc; +input [3:0] im; +input [WID-1:0] waitctr; +output reg [WID-1:0] bus; + +always @* +begin + casez(instr[`INSTRUCTION_OP]) + `BRK: bus <= instr[16] ? {56'd0,a[7:0]} : {56'b0,instr[15:8]}; + `BBc: + case(instr[20:19]) + `IBNE: bus <= a + 64'd1; + `DBNZ: bus <= a - 64'd1; + default: bus <= 64'hCCCCCCCCCCCCCCCC; + endcase + `JAL: bus <= nextpc; + `CALL: bus <= nextpc; + `RET: bus <= a + (instr[7:6]==2'b01 ? {instr[47:23],3'b0} : {instr[31:23],3'b0}); + `REX: + case(ol) + `OL_USER: bus <= 64'hCCCCCCCCCCCCCCCC; + // ToDo: fix im test + default: bus <= (im < ~{ol,2'b00}) ? tvec : nextpc; + endcase + `WAIT: bus = waitctr==64'd1; + default: bus <= 64'hCCCCCCCCCCCCCCCC; + endcase +end + +endmodule + Index: thor/trunk/FT64v7/rtl/common/FT64_InsLength.v =================================================================== --- thor/trunk/FT64v7/rtl/common/FT64_InsLength.v (nonexistent) +++ thor/trunk/FT64v7/rtl/common/FT64_InsLength.v (revision 60) @@ -0,0 +1,48 @@ +// ============================================================================ +// __ +// \\__/ o\ (C) 2018 Robert Finch, Waterloo +// \ __ / All rights reserved. +// \/_// robfinch@finitron.ca +// || +// +// FT64_InsLength.v +// +// This source file is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This source file is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// +// Computes the length of an instruction. +// There are also other places in code where the length is determined +// without the use of this module. +// ============================================================================ +// +`include "FT64_config.vh" +`include "FT64_defines.vh" + +module FT64_InsLength(ins, len, pred_on); +input [47:0] ins; +output reg [2:0] len; +input pred_on; + +always @* +`ifdef SUPPORT_DCI +if (ins[`INSTRUCTION_OP]==`CMPRSSD) + len <= 3'd2 | pred_on; +else +`endif + case(ins[7:6]) + 2'd0: len <= 3'd4 | pred_on; + 2'd1: len <= 3'd6 | pred_on; + default: len <= 3'd2 | pred_on; + endcase + +endmodule Index: thor/trunk/FT64v7/rtl/common/FT64_RMW_alu.v =================================================================== --- thor/trunk/FT64v7/rtl/common/FT64_RMW_alu.v (nonexistent) +++ thor/trunk/FT64v7/rtl/common/FT64_RMW_alu.v (revision 60) @@ -0,0 +1,282 @@ +// ============================================================================ +// __ +// \\__/ o\ (C) 2018 Robert Finch, Waterloo +// \ __ / All rights reserved. +// \/_// robfinch@finitron.ca +// || +// +// FT64_RMW_alu.v +// +// This source file is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This source file is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// +// +// ALU for atomic memory operations (AMO) +// AMO ops have their own limited ALU since they can't wait on the usual +// ALU. +// ============================================================================ +// +`include "FT64_defines.vh" + +module FT64_RMW_alu(instr, a, b, c, res); +input [47:0] instr; +input [63:0] a; +input [63:0] b; +input [63:0] c; +output reg [63:0] res; + +wire [4:0] op = instr[30:26]; + +always @* +begin +case(instr[5:0]) +`R2: + case(instr[31:26]) + `INC: begin + res[63:0] <= a + b; + end + default: res[63:0] <= 64'hDEADDEADDEADDEAD; + endcase +`AMO: + case(op) + `AMO_SWAP: res[63:0] <= b; + `AMO_ADD: case(instr[23:21]) + 3'd0,3'd4: + begin + res[7:0] <= a[7:0] + b[7:0]; + res[15:8] <= a[15:8] + b[15:8]; + res[23:16] <= a[23:16] + b[23:16]; + res[31:24] <= a[31:24] + b[31:24]; + res[39:32] <= a[39:32] + b[39:32]; + res[47:40] <= a[47:40] + b[47:40]; + res[55:48] <= a[55:48] + b[55:48]; + res[63:56] <= a[63:56] + b[63:56]; + end + 3'd1,3'd5: + begin + res[15:0] <= a[15:0] + b[15:0]; + res[31:16] <= a[31:16] + b[31:16]; + res[47:32] <= a[47:32] + b[47:32]; + res[63:48] <= a[63:48] + b[63:48]; + end + 3'd2,3'd6: + begin + res[31:0] <= a[31:0] + b[31:0]; + res[63:32] <= a[63:32] + b[63:32]; + end + 3'd3,3'd7: + begin + res[63:0] <= a + b; + end + endcase + `AMO_AND: + begin + res[63:0] <= a & b; + end + `AMO_OR: begin + res[63:0] <= a | b; + end + `AMO_XOR: begin + res[63:0] <= a ^ b; + end + `AMO_SHL: + case(instr[23:21]) + 3'd0,3'd4: + begin + res[7:0] <= a[7:0] << b[2:0]; + res[15:8] <= a[15:8] << b[2:0]; + res[23:16] <= a[23:16] << b[2:0]; + res[31:24] <= a[31:24] << b[2:0]; + res[39:32] <= a[39:32] << b[2:0]; + res[47:40] <= a[47:40] << b[2:0]; + res[55:48] <= a[55:48] << b[2:0]; + res[63:56] <= a[63:56] << b[2:0]; + end + 3'd1,3'd5: + begin + res[15:0] <= a[15:0] << b[3:0]; + res[31:16] <= a[31:16] << b[3:0]; + res[47:32] <= a[47:32] << b[3:0]; + res[63:48] <= a[63:48] << b[3:0]; + end + 3'd2,3'd6: + begin + res[31:0] <= a[31:0] << b[4:0]; + res[63:32] <= a[63:32] << b[4:0]; + end + 3'd3,3'd7: + begin + res[63:0] <= a << b[5:0]; + res[64] <= a[64]; + end + endcase + `AMO_SHR: + case(instr[23:21]) + 3'd0,3'd4: + begin + res[7:0] <= a[7:0] >> b[2:0]; + res[15:8] <= a[15:8] >> b[2:0]; + res[23:16] <= a[23:16] >> b[2:0]; + res[31:24] <= a[31:24] >> b[2:0]; + res[39:32] <= a[39:32] >> b[2:0]; + res[47:40] <= a[47:40] >> b[2:0]; + res[55:48] <= a[55:48] >> b[2:0]; + res[63:56] <= a[63:56] >> b[2:0]; + end + 3'd1,3'd5: + begin + res[15:0] <= a[15:0] >> b[3:0]; + res[31:16] <= a[31:16] >> b[3:0]; + res[47:32] <= a[47:32] >> b[3:0]; + res[63:48] <= a[63:48] >> b[3:0]; + end + 3'd2,3'd6: + begin + res[31:0] <= a[31:0] >> b[4:0]; + res[63:32] <= a[63:32] >> b[4:0]; + end + 3'd3,3'd7: + begin + res[63:0] <= a >> b[5:0]; + res[64] <= a[64]; + end + endcase + `AMO_MIN: + case(instr[23:21]) + 3'd0,3'd4: + begin + res[7:0] <= $signed(a[7:0]) < $signed(b[7:0]) ? a[7:0] : b[7:0]; + res[15:8] <= $signed(a[15:8]) < $signed(b[15:8]) ? a[15:8] : b[15:8]; + res[23:16] <= $signed(a[23:16]) < $signed(b[23:16]) ? a[23:16] : b[23:16]; + res[31:24] <= $signed(a[31:24]) < $signed(b[31:24]) ? a[31:24] : b[31:24]; + res[39:32] <= $signed(a[39:32]) < $signed(b[39:32]) ? a[39:32] : b[39:32]; + res[47:40] <= $signed(a[47:40]) < $signed(b[47:40]) ? a[47:40] : b[47:40]; + res[55:48] <= $signed(a[55:48]) < $signed(b[55:48]) ? a[55:48] : b[55:48]; + res[63:56] <= $signed(a[63:56]) < $signed(b[63:56]) ? a[63:56] : b[63:56]; + end + 3'd1,3'd5: + begin + res[15:0] <= $signed(a[15:0]) < $signed(b[15:0]) ? a[15:0] : b[15:0]; + res[31:16] <= $signed(a[31:16]) < $signed(b[31:16]) ? a[31:16] : b[31:16]; + res[47:32] <= $signed(a[47:32]) < $signed(b[47:32]) ? a[47:32] : b[47:32]; + res[63:48] <= $signed(a[63:48]) < $signed(b[63:48]) ? a[63:48] : b[63:48]; + end + 3'd2,3'd6: + begin + res[31:0] <= $signed(a[31:0]) < $signed(b[31:0]) ? a[31:0] : b[31:0]; + res[63:32] <= $signed(a[63:32]) < $signed(b[63:32]) ? a[63:32] : b[63:32]; + end + 3'd3,3'd7: + begin + res[63:0] <= $signed(a) < $signed(b) ? a : b; + end + endcase + `AMO_MAX: + case(instr[23:21]) + 3'd0,3'd4: + begin + res[7:0] <= $signed(a[7:0]) > $signed(b[7:0]) ? a[7:0] : b[7:0]; + res[15:8] <= $signed(a[15:8]) > $signed(b[15:8]) ? a[15:8] : b[15:8]; + res[23:16] <= $signed(a[23:16]) > $signed(b[23:16]) ? a[23:16] : b[23:16]; + res[31:24] <= $signed(a[31:24]) > $signed(b[31:24]) ? a[31:24] : b[31:24]; + res[39:32] <= $signed(a[39:32]) > $signed(b[39:32]) ? a[39:32] : b[39:32]; + res[47:40] <= $signed(a[47:40]) > $signed(b[47:40]) ? a[47:40] : b[47:40]; + res[55:48] <= $signed(a[55:48]) > $signed(b[55:48]) ? a[55:48] : b[55:48]; + res[63:56] <= $signed(a[63:56]) > $signed(b[63:56]) ? a[63:56] : b[63:56]; + end + 3'd1,3'd5: + begin + res[15:0] <= $signed(a[15:0]) > $signed(b[15:0]) ? a[15:0] : b[15:0]; + res[31:16] <= $signed(a[31:16]) > $signed(b[31:16]) ? a[31:16] : b[31:16]; + res[47:32] <= $signed(a[47:32]) > $signed(b[47:32]) ? a[47:32] : b[47:32]; + res[63:48] <= $signed(a[63:48]) > $signed(b[63:48]) ? a[63:48] : b[63:48]; + end + 3'd2,3'd6: + begin + res[31:0] <= $signed(a[31:0]) > $signed(b[31:0]) ? a[31:0] : b[31:0]; + res[63:32] <= $signed(a[63:32]) > $signed(b[63:32]) ? a[63:32] : b[63:32]; + end + 3'd3,3'd7: + begin + res[63:0] <= $signed(a) > $signed(b) ? a : b; + end + endcase + `AMO_MINU: + case(instr[23:21]) + 3'd0,3'd4: + begin + res[7:0] <= $unsigned(a[7:0]) < $unsigned(b[7:0]) ? a[7:0] : b[7:0]; + res[15:8] <= $unsigned(a[15:8]) < $unsigned(b[15:8]) ? a[15:8] : b[15:8]; + res[23:16] <= $unsigned(a[23:16]) < $unsigned(b[23:16]) ? a[23:16] : b[23:16]; + res[31:24] <= $unsigned(a[31:24]) < $unsigned(b[31:24]) ? a[31:24] : b[31:24]; + res[39:32] <= $unsigned(a[39:32]) < $unsigned(b[39:32]) ? a[39:32] : b[39:32]; + res[47:40] <= $unsigned(a[47:40]) < $unsigned(b[47:40]) ? a[47:40] : b[47:40]; + res[55:48] <= $unsigned(a[55:48]) < $unsigned(b[55:48]) ? a[55:48] : b[55:48]; + res[63:56] <= $unsigned(a[63:56]) < $unsigned(b[63:56]) ? a[63:56] : b[63:56]; + end + 3'd1,3'd5: + begin + res[15:0] <= $unsigned(a[15:0]) < $unsigned(b[15:0]) ? a[15:0] : b[15:0]; + res[31:16] <= $unsigned(a[31:16]) < $unsigned(b[31:16]) ? a[31:16] : b[31:16]; + res[47:32] <= $unsigned(a[47:32]) < $unsigned(b[47:32]) ? a[47:32] : b[47:32]; + res[63:48] <= $unsigned(a[63:48]) < $unsigned(b[63:48]) ? a[63:48] : b[63:48]; + end + 3'd2,3'd6: + begin + res[31:0] <= $unsigned(a[31:0]) < $unsigned(b[31:0]) ? a[31:0] : b[31:0]; + res[63:32] <= $unsigned(a[63:32]) < $unsigned(b[63:32]) ? a[63:32] : b[63:32]; + end + 3'd3,3'd7: + begin + res[63:0] <= $unsigned(a) < $unsigned(b) ? a : b; + end + endcase + `AMO_MAXU: + case(instr[23:21]) + 3'd0,3'd4: + begin + res[7:0] <= $unsigned(a[7:0]) > $unsigned(b[7:0]) ? a[7:0] : b[7:0]; + res[15:8] <= $unsigned(a[15:8]) > $unsigned(b[15:8]) ? a[15:8] : b[15:8]; + res[23:16] <= $unsigned(a[23:16]) > $unsigned(b[23:16]) ? a[23:16] : b[23:16]; + res[31:24] <= $unsigned(a[31:24]) > $unsigned(b[31:24]) ? a[31:24] : b[31:24]; + res[39:32] <= $unsigned(a[39:32]) > $unsigned(b[39:32]) ? a[39:32] : b[39:32]; + res[47:40] <= $unsigned(a[47:40]) > $unsigned(b[47:40]) ? a[47:40] : b[47:40]; + res[55:48] <= $unsigned(a[55:48]) > $unsigned(b[55:48]) ? a[55:48] : b[55:48]; + res[63:56] <= $unsigned(a[63:56]) > $unsigned(b[63:56]) ? a[63:56] : b[63:56]; + end + 3'd1,3'd5: + begin + res[15:0] <= $unsigned(a[15:0]) > $unsigned(b[15:0]) ? a[15:0] : b[15:0]; + res[31:16] <= $unsigned(a[31:16]) > $unsigned(b[31:16]) ? a[31:16] : b[31:16]; + res[47:32] <= $unsigned(a[47:32]) > $unsigned(b[47:32]) ? a[47:32] : b[47:32]; + res[63:48] <= $unsigned(a[63:48]) > $unsigned(b[63:48]) ? a[63:48] : b[63:48]; + end + 3'd2,3'd6: + begin + res[31:0] <= $unsigned(a[31:0]) > $unsigned(b[31:0]) ? a[31:0] : b[31:0]; + res[63:32] <= $unsigned(a[63:32]) > $unsigned(b[63:32]) ? a[63:32] : b[63:32]; + end + 3'd3,3'd7: + begin + res[63:0] <= $unsigned(a) > $unsigned(b) ? a : b; + end + endcase + default: res[63:0] <= 64'hDEADDEADDEADDEAD; + endcase +`INC: begin + res[63:0] <= a + b; + end +default: res[63:0] <= 64'hDEADDEADDEADDEAD; +endcase +end +endmodule Index: thor/trunk/FT64v7/rtl/common/FT64_RSB.v =================================================================== --- thor/trunk/FT64v7/rtl/common/FT64_RSB.v (nonexistent) +++ thor/trunk/FT64v7/rtl/common/FT64_RSB.v (revision 60) @@ -0,0 +1,166 @@ +// ============================================================================ +// __ +// \\__/ o\ (C) 2018 Robert Finch, Waterloo +// \ __ / All rights reserved. +// \/_// robfinch@finitron.ca +// || +// +// FT64_RSB.v +// +// This source file is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This source file is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// +// ============================================================================ +// +`include "FT64_defines.vh" + +// Return address stack predictor is updated during the fetch stage on the +// assumption that previous flow controls (branches) predicted correctly. +// Otherwise many small routines wouldn't predict the return address +// correctly because they hit the RET before the CALL reaches the +// commit stage. + +module FT64_RSB(rst, clk, regLR, queued1, queued2, + fetchbuf0_v, fetchbuf0_pc, fetchbuf0_instr, + fetchbuf1_v, fetchbuf1_pc, fetchbuf1_instr, + stompedRets, stompedRet, + pc +); +parameter AMSB = 31; +parameter DEPTH = 16; +input rst; +input clk; +input [4:0] regLR; +input queued1; +input queued2; +input fetchbuf0_v; +input [47:0] fetchbuf0_instr; +input [AMSB:0] fetchbuf0_pc; +input fetchbuf1_v; +input [47:0] fetchbuf1_instr; +input [AMSB:0] fetchbuf1_pc; +input [3:0] stompedRets; +input stompedRet; +output [AMSB:0] pc; + +parameter RSTPC = 32'hFFFC0100; +integer n; +reg [AMSB:0] ras [0:DEPTH-1]; +reg [3:0] rasp; +assign pc = ras[rasp]; +reg [47:0] lasti0, lasti1; + +always @(posedge clk) +if (rst) begin + lasti0 <= `NOP_INSN; + lasti1 <= `NOP_INSN; + for (n = 0; n < DEPTH; n = n + 1) + ras[n] <= RSTPC; + rasp <= 4'd0; +end +else begin + if (fetchbuf0_v && fetchbuf1_v && (queued1 || queued2)) begin + // Make sure the instruction changed between clock cycles. + lasti0 <= fetchbuf0_instr; + lasti1 <= fetchbuf1_instr; + if (fetchbuf0_instr != lasti0 || fetchbuf1_instr != lasti1) begin + case(fetchbuf0_instr[`INSTRUCTION_OP]) + `JAL: + begin + // JAL LR,xxxx assume call + if (fetchbuf0_instr[`INSTRUCTION_RB]==regLR) begin + ras[((rasp-6'd1)&(DEPTH-1))] <= fetchbuf0_pc + (fetchbuf0_instr[6] ? 32'd6 : 32'd4); + rasp <= rasp - 4'd1; + end + // JAL r0,[r29] assume a ret + else if (fetchbuf0_instr[`INSTRUCTION_RB]==5'd00 && + fetchbuf0_instr[`INSTRUCTION_RA]==regLR) begin + rasp <= rasp + 4'd1; + end + end + `CALL: + begin + ras[((rasp-6'd1)&(DEPTH-1))] <= fetchbuf0_pc + (fetchbuf0_instr[6] ? 32'd6 : 32'd4); + rasp <= rasp - 4'd1; + end + `RET: begin + $display("RSP: Added 1"); + rasp <= rasp + 4'd1; + end + default: ; + endcase + end + end + else if (fetchbuf1_v && queued1) + lasti1 <= fetchbuf1_instr; + if (fetchbuf1_instr != lasti1) begin + case(fetchbuf1_instr[`INSTRUCTION_OP]) + `JAL: + if (fetchbuf1_instr[`INSTRUCTION_RB]==regLR) begin + ras[((rasp-6'd1)&(DEPTH-1))] <= fetchbuf1_pc + (fetchbuf1_instr[6] ? 32'd6 : 32'd4); + rasp <= rasp - 4'd1; + end + else if (fetchbuf1_instr[`INSTRUCTION_RB]==5'd00 && + fetchbuf1_instr[`INSTRUCTION_RA]==regLR) begin + rasp <= rasp + 4'd1; + end + `CALL: + begin + ras[((rasp-6'd1)&(DEPTH-1))] <= fetchbuf1_pc + (fetchbuf1_instr[6] ? 32'd6 : 32'd4); + rasp <= rasp - 4'd1; + end + `RET: begin + rasp <= rasp + 4'd1; + $display("RSP: Added 1"); + end + default: ; + endcase + end + else if (fetchbuf0_v && queued1) + lasti0 <= fetchbuf0_instr; + if (lasti0 != fetchbuf0_instr) begin + case(fetchbuf0_instr[`INSTRUCTION_OP]) + `JAL: + if (fetchbuf0_instr[`INSTRUCTION_RB]==regLR) begin + ras[((rasp-6'd1)&(DEPTH-1))] <= fetchbuf0_pc + (fetchbuf0_instr[6] ? 32'd6 : 32'd4); + rasp <= rasp - 4'd1; + end + else if (fetchbuf0_instr[`INSTRUCTION_RB]==5'd00 && + fetchbuf0_instr[`INSTRUCTION_RA]==regLR) begin + rasp <= rasp + 4'd1; + end + `CALL: + begin + ras[((rasp-6'd1)&(DEPTH-1))] <= fetchbuf0_pc + (fetchbuf0_instr[6] ? 32'd6 : 32'd4); + rasp <= rasp - 4'd1; + end + `RET: begin + $display("RSP: Added 1"); + rasp <= rasp + 4'd1; + end + default: ; + endcase + end +/* + if (stompedRets > 4'd0) begin + $display("Stomped Rets: %d", stompedRets); + rasp <= rasp - stompedRets; + end + else if (stompedRet) begin + $display("Stomped Ret"); + rasp <= rasp - 5'd1; + end +*/ +end + +endmodule Index: thor/trunk/FT64v7/rtl/common/FT64_alu.v =================================================================== --- thor/trunk/FT64v7/rtl/common/FT64_alu.v (nonexistent) +++ thor/trunk/FT64v7/rtl/common/FT64_alu.v (revision 60) @@ -0,0 +1,1875 @@ +// ============================================================================ +// __ +// \\__/ o\ (C) 2017-2018 Robert Finch, Waterloo +// \ __ / All rights reserved. +// \/_// robfinch@finitron.ca +// || +// +// FT64_alu.v +// +// This source file is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This source file is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// +// ============================================================================ +// +`include "FT64_defines.vh" +`include "FT64_config.vh" + +module FT64_alu(rst, clk, ld, abort, instr, sz, tlb, store, a, b, c, pc, Ra, tgt, tgt2, ven, vm, + csr, o, ob, done, idle, excen, exc, thrd, ptrmask, state, mem, shift, + ol, dl, ASID, icl_i, cyc_i, we_i, vadr_i, cyc_o, we_o, padr_o, uncached, tlb_miss, + exv_o, rdv_o, wrv_o +`ifdef SUPPORT_SEGMENTATION + , zs_base, ds_base, es_base, fs_base, gs_base, hs_base, ss_base, cs_base, + zsub, dsub, esub, fsub, gsub, hsub, ssub, csub, + zslb, dslb, eslb, fslb, gslb, hslb, sslb, cslb +`endif +`ifdef SUPPORT_BBMS + , pb, cbl, cbu, ro, dbl, dbu, sbl, sbu, en +`endif + ); +parameter DBW = 64; +parameter ABW = 64; +parameter BIG = 1'b1; +parameter SUP_VECTOR = 1; +parameter TRUE = 1'b1; +parameter FALSE = 1'b0; +parameter PTR = 20'hFFF01; +input rst; +input clk; +input ld; +input abort; +input [47:0] instr; +input [2:0] sz; +input tlb; +input store; +input [63:0] a; +input [63:0] b; +input [63:0] c; +input [31:0] pc; +input [11:0] Ra; +input [11:0] tgt; +input [7:0] tgt2; +input [5:0] ven; +input [15:0] vm; +input [63:0] csr; +output reg [63:0] o; +output reg [63:0] ob; +output reg done; +output reg idle; +input [4:0] excen; +output reg [8:0] exc; +input thrd; +input [63:0] ptrmask; +input [1:0] state; +input mem; +input shift; +input [1:0] ol; +input [1:0] dl; +input [7:0] ASID; +input icl_i; +input cyc_i; +input we_i; +input [ABW-1:0] vadr_i; +output cyc_o; +output we_o; +output [ABW-1:0] padr_o; +output uncached; +output tlb_miss; +output wrv_o; +output rdv_o; +output exv_o; +`ifdef SUPPORT_SEGMENTATION +input [63:0] zs_base; +input [63:0] ds_base; +input [63:0] es_base; +input [63:0] fs_base; +input [63:0] gs_base; +input [63:0] hs_base; +input [63:0] ss_base; +input [63:0] cs_base; +input [63:0] zslb; +input [63:0] dslb; +input [63:0] eslb; +input [63:0] fslb; +input [63:0] gslb; +input [63:0] hslb; +input [63:0] sslb; +input [63:0] cslb; +input [63:0] zsub; +input [63:0] dsub; +input [63:0] esub; +input [63:0] fsub; +input [63:0] gsub; +input [63:0] hsub; +input [63:0] ssub; +input [63:0] csub; +`endif +`ifdef SUPPORT_BBMS +input [63:0] pb; +input [63:0] cbl; +input [63:0] cbu; +input [63:0] ro; +input [63:0] dbl; +input [63:0] dbu; +input [63:0] sbl; +input [63:0] sbu; +input [63:0] en; +`endif + +parameter byt = 3'd0; +parameter char = 3'd1; +parameter half = 3'd2; +parameter word = 3'd3; +parameter byt_para = 3'd4; +parameter char_para = 3'd5; +parameter half_para = 3'd6; +parameter word_para = 3'd7; + +integer n; + +reg adrDone, adrIdle; +reg [63:0] usa; // unsegmented address +`ifdef SUPPORT_SEGMENTATION +reg [63:0] pb; +reg [63:0] ub; +reg [63:0] lb; +always @* +case(usa[63:61]) +3'd0: pb <= zs_base; +3'd1: pb <= ds_base; +3'd2: pb <= es_base; +3'd3: pb <= fs_base; +3'd4: pb <= gs_base; +3'd5: pb <= hs_base; +3'd6: pb <= ss_base; +3'd7: pb <= cs_base; +endcase +always @* +case(usa[63:61]) +3'd0: ub <= zsub; +3'd1: ub <= dsub; +3'd2: ub <= esub; +3'd3: ub <= fsub; +3'd4: ub <= gsub; +3'd5: ub <= hsub; +3'd6: ub <= ssub; +3'd7: ub <= csub; +endcase +always @* +case(usa[63:61]) +3'd0: lb <= zslb; +3'd1: lb <= dslb; +3'd2: lb <= eslb; +3'd3: lb <= fslb; +3'd4: lb <= gslb; +3'd5: lb <= hslb; +3'd6: lb <= sslb; +3'd7: lb <= cslb; +endcase +`else +`ifndef SUPPORT_BBMS +reg [63:0] pb = 64'h0; +`endif +`endif +reg [63:0] addro; +reg [63:0] adr; // load / store address +reg [63:0] shift8; + +wire [7:0] a8 = a[7:0]; +wire [15:0] a16 = a[15:0]; +wire [31:0] a32 = a[31:0]; +wire [7:0] b8 = b[7:0]; +wire [15:0] b16 = b[15:0]; +wire [31:0] b32 = b[31:0]; +wire [63:0] orb = instr[6] ? {34'd0,b[29:0]} : {50'd0,b[13:0]}; +wire [63:0] andb = b;//((instr[6]==1'b1) ? {34'h3FFFFFFFF,b[29:0]} : {50'h3FFFFFFFFFFFF,b[13:0]}); + +wire [21:0] qimm = instr[39:18]; +wire [63:0] imm = {{45{instr[39]}},instr[39:21]}; +wire [DBW-1:0] divq, rem; +wire divByZero; +wire [15:0] prod80, prod81, prod82, prod83, prod84, prod85, prod86, prod87; +wire [31:0] prod160, prod161, prod162, prod163; +wire [63:0] prod320, prod321; +wire [DBW*2-1:0] prod; +wire mult_done8, mult_idle8, div_done8, div_idle8; +wire mult_done80, mult_idle80, div_done80, div_idle80; +wire mult_done81, mult_idle81, div_done81, div_idle81; +wire mult_done82, mult_idle82, div_done82, div_idle82; +wire mult_done83, mult_idle83, div_done83, div_idle83; +wire mult_done84, mult_idle84, div_done84, div_idle84; +wire mult_done85, mult_idle85, div_done85, div_idle85; +wire mult_done86, mult_idle86, div_done86, div_idle86; +wire mult_done87, mult_idle87, div_done87, div_idle87; +wire mult_done16, mult_idle16, div_done16, div_idle16; +wire mult_done160, mult_idle160, div_done160, div_idle160; +wire mult_done161, mult_idle161, div_done161, div_idle161; +wire mult_done162, mult_idle162, div_done162, div_idle162; +wire mult_done163, mult_idle163, div_done163, div_idle163; +wire mult_done320, mult_idle320, div_done320, div_idle320; +wire mult_done321, mult_idle321, div_done321, div_idle321; +wire mult_done, mult_idle, div_done, div_idle; +wire aslo; +wire [6:0] clzo,cloo,cpopo; +wire [63:0] shftho; +reg [63:0] shift9; + +function IsLoad; +input [47:0] isn; +case(isn[`INSTRUCTION_OP]) +`MEMNDX: + if (isn[`INSTRUCTION_L2]==2'b00) + IsLoad = !isn[31]; + else + IsLoad = FALSE; +`LB: IsLoad = TRUE; +`LBU: IsLoad = TRUE; +`Lx: IsLoad = TRUE; +`LxU: IsLoad = TRUE; +`LWR: IsLoad = TRUE; +`LV: IsLoad = TRUE; +`LVx: IsLoad = TRUE; +`LVxU: IsLoad = TRUE; +default: IsLoad = FALSE; +endcase +endfunction + +function IsMul; +input [47:0] isn; +case(isn[`INSTRUCTION_OP]) +`IVECTOR: + case(isn[`INSTRUCTION_S2]) + `VMUL,`VMULS: IsMul = TRUE; + default: IsMul = FALSE; + endcase +`R2: + case(isn[`INSTRUCTION_S2]) + `MULU,`MULSU,`MUL: IsMul = TRUE; + `MULUH,`MULSUH,`MULH: IsMul = TRUE; + `FXMUL: IsMul = TRUE; + default: IsMul = FALSE; + endcase +`MULUI,`MULI: IsMul = TRUE; +default: IsMul = FALSE; +endcase +endfunction + +function IsDivmod; +input [47:0] isn; +case(isn[`INSTRUCTION_OP]) +`IVECTOR: + case(isn[`INSTRUCTION_S2]) + `VDIV,`VDIVS: IsDivmod = TRUE; + default: IsDivmod = FALSE; + endcase +`R2: + case(isn[`INSTRUCTION_S2]) + `DIVU,`DIVSU,`DIV: IsDivmod = TRUE; + `MODU,`MODSU,`MOD: IsDivmod = TRUE; + default: IsDivmod = FALSE; + endcase +`DIVUI,`DIVI,`MODI: IsDivmod = TRUE; +default: IsDivmod = FALSE; +endcase +endfunction + +function IsSgn; +input [47:0] isn; +case(isn[`INSTRUCTION_OP]) +`IVECTOR: + case(isn[`INSTRUCTION_S2]) + `VMUL,`VMULS,`VDIV,`VDIVS: IsSgn = TRUE; + default: IsSgn = FALSE; + endcase +`R2: + case(isn[`INSTRUCTION_S2]) + `MUL,`DIV,`MOD,`MULH: IsSgn = TRUE; + `FXMUL: IsSgn = TRUE; + default: IsSgn = FALSE; + endcase +`MULI,`DIVI,`MODI: IsSgn = TRUE; +default: IsSgn = FALSE; +endcase +endfunction + +function IsSgnus; +input [47:0] isn; +case(isn[`INSTRUCTION_OP]) +`R2: + case(isn[`INSTRUCTION_S2]) + `MULSU,`MULSUH,`DIVSU,`MODSU: IsSgnus = TRUE; + default: IsSgnus = FALSE; + endcase +default: IsSgnus = FALSE; +endcase +endfunction + +function IsShiftAndOp; +input [47:0] isn; +IsShiftAndOp = FALSE; +endfunction + +wire [63:0] bfout,shfto; +wire [63:0] shftob; +wire [63:0] shftco; + +always @(posedge clk) + shift9 <= shift8; + +wire tlb_done, tlb_idle; +wire [DBW-1:0] tlbo; + +`ifdef SUPPORT_TLB +FT64_TLB utlb1 ( + .rst(rst), + .clk(clk), + .ld(ld & tlb), + .done(tlb_done), + .idle(tlb_idle), + .ol(ol), + .ASID(ASID), + .op(instr[25:22]), + .regno(instr[21:18]), + .dati(a), + .dato(tlbo), + .uncached(uncached), + .icl_i(icl_i), + .cyc_i(cyc_i), + .we_i(we_i), + .vadr_i(vadr_i), + .cyc_o(cyc_o), + .we_o(we_o), + .padr_o(padr_o), + .TLBMiss(tlb_miss), + .wrv_o(wrv_o), + .rdv_o(rdv_o), + .exv_o(exv_o), + .HTLBVirtPageo() +); +`else +assign tlbo = 64'hDEADDEADDEADDEAD; +assign uncached = 1'b0; +assign padr_o = vadr_i; +assign cyc_o = cyc_i; +assign we_o = we_i; +assign tlb_miss = 1'b0; +assign wrv_o = 1'b0; +assign rdv_o = 1'b0; +assign exv_o = 1'b0; +`endif + +FT64_bitfield #(DBW) ubf1 +( + .inst(instr), + .a(a), + .b(b), + .c(c), + .o(bfout), + .masko() +); + +FT64_multiplier #(DBW) umult1 +( + .rst(rst), + .clk(clk), + .ld(ld && IsMul(instr)&& (sz==word || sz==word_para)), + .abort(abort), + .sgn(IsSgn(instr)), + .sgnus(IsSgnus(instr)), + .a(a), + .b(b), + .o(prod), + .done(mult_done), + .idle(mult_idle) +); + +FT64_multiplier #(32) umulth0 +( + .rst(rst), + .clk(clk), + .ld(ld && IsMul(instr) && (sz==half || sz==half_para)), + .abort(abort), + .sgn(IsSgn(instr)), + .sgnus(IsSgnus(instr)), + .a(a[31:0]), + .b(b[31:0]), + .o(prod320), + .done(mult_done320), + .idle(mult_idle320) +); + +FT64_multiplier #(16) umultc0 +( + .rst(rst), + .clk(clk), + .ld(ld && IsMul(instr) && (sz==char || sz==char_para)), + .abort(abort), + .sgn(IsSgn(instr)), + .sgnus(IsSgnus(instr)), + .a(a[15:0]), + .b(b[15:0]), + .o(prod160), + .done(mult_done160), + .idle(mult_idle160) +); + +FT64_multiplier #(8) umultb0 +( + .rst(rst), + .clk(clk), + .ld(ld && IsMul(instr) && (sz==byt || sz==byt_para)), + .abort(abort), + .sgn(IsSgn(instr)), + .sgnus(IsSgnus(instr)), + .a(a[7:0]), + .b(b[7:0]), + .o(prod80), + .done(mult_done80), + .idle(mult_idle80) +); + +`ifdef SIMD +FT64_multiplier #(32) umulth1 +( + .rst(rst), + .clk(clk), + .ld(ld && IsMul(instr) && (sz==half || sz==half_para)), + .abort(abort), + .sgn(IsSgn(instr)), + .sgnus(IsSgnus(instr)), + .a(a[63:32]), + .b(b[63:32]), + .o(prod321), + .done(mult_done321), + .idle(mult_idle321) +); + +FT64_multiplier #(16) umultc1 +( + .rst(rst), + .clk(clk), + .ld(ld && IsMul(instr) && (sz==char_para)), + .abort(abort), + .sgn(IsSgn(instr)), + .sgnus(IsSgnus(instr)), + .a(a[31:16]), + .b(b[31:16]), + .o(prod161), + .done(mult_done161), + .idle(mult_idle161) +); + +FT64_multiplier #(16) umultc2 +( + .rst(rst), + .clk(clk), + .ld(ld && IsMul(instr) && (sz==char_para)), + .abort(abort), + .sgn(IsSgn(instr)), + .sgnus(IsSgnus(instr)), + .a(a[47:32]), + .b(b[47:32]), + .o(prod162), + .done(mult_done162), + .idle(mult_idle162) +); + +FT64_multiplier #(16) umultc3 +( + .rst(rst), + .clk(clk), + .ld(ld && IsMul(instr) && (sz==char_para)), + .abort(abort), + .sgn(IsSgn(instr)), + .sgnus(IsSgnus(instr)), + .a(a[63:48]), + .b(b[63:48]), + .o(prod163), + .done(mult_done163), + .idle(mult_idle163) +); + +FT64_multiplier #(8) umultb1 +( + .rst(rst), + .clk(clk), + .ld(ld && IsMul(instr) && (sz==byt_para)), + .abort(abort), + .sgn(IsSgn(instr)), + .sgnus(IsSgnus(instr)), + .a(a[15:8]), + .b(b[15:8]), + .o(prod81), + .done(mult_done81), + .idle(mult_idle81) +); + +FT64_multiplier #(8) umultb2 +( + .rst(rst), + .clk(clk), + .ld(ld && IsMul(instr) && (sz==byt_para)), + .abort(abort), + .sgn(IsSgn(instr)), + .sgnus(IsSgnus(instr)), + .a(a[23:16]), + .b(b[23:16]), + .o(prod82), + .done(mult_done82), + .idle(mult_idle82) +); + +FT64_multiplier #(8) umultb3 +( + .rst(rst), + .clk(clk), + .ld(ld && IsMul(instr) && (sz==byt_para)), + .abort(abort), + .sgn(IsSgn(instr)), + .sgnus(IsSgnus(instr)), + .a(a[31:24]), + .b(b[31:24]), + .o(prod83), + .done(mult_done83), + .idle(mult_idle83) +); + +FT64_multiplier #(8) umultb4 +( + .rst(rst), + .clk(clk), + .ld(ld && IsMul(instr) && (sz==byt_para)), + .abort(abort), + .sgn(IsSgn(instr)), + .sgnus(IsSgnus(instr)), + .a(a[39:32]), + .b(b[39:32]), + .o(prod84), + .done(mult_done84), + .idle(mult_idle84) +); + +FT64_multiplier #(8) umultb5 +( + .rst(rst), + .clk(clk), + .ld(ld && IsMul(instr) && (sz==byt_para)), + .abort(abort), + .sgn(IsSgn(instr)), + .sgnus(IsSgnus(instr)), + .a(a[47:40]), + .b(b[47:40]), + .o(prod85), + .done(mult_done85), + .idle(mult_idle85) +); + +FT64_multiplier #(8) umultb6 +( + .rst(rst), + .clk(clk), + .ld(ld && IsMul(instr) && (sz==byt_para)), + .abort(abort), + .sgn(IsSgn(instr)), + .sgnus(IsSgnus(instr)), + .a(a[55:48]), + .b(b[55:48]), + .o(prod86), + .done(mult_done86), + .idle(mult_idle86) +); + +FT64_multiplier #(8) umultb7 +( + .rst(rst), + .clk(clk), + .ld(ld && IsMul(instr) && (sz==byt_para)), + .abort(abort), + .sgn(IsSgn(instr)), + .sgnus(IsSgnus(instr)), + .a(a[63:56]), + .b(b[63:56]), + .o(prod87), + .done(mult_done87), + .idle(mult_idle87) +); +`endif + +FT64_divider #(DBW) udiv1 +( + .rst(rst), + .clk(clk), + .ld(ld && IsDivmod(instr) && (sz==word || sz==word_para)), + .abort(abort), + .sgn(IsSgn(instr)), + .sgnus(IsSgnus(instr)), + .a(a), + .b(b), + .qo(divq), + .ro(rem), + .dvByZr(divByZero), + .done(div_done), + .idle(div_idle) +); + +wire [5:0] bshift = instr[31:26]==`SHIFTR ? b[5:0] : {instr[30],instr[22:18]}; + +FT64_shift ushft1 +( + .instr(instr), + .a(a), + .b(bshift), + .res(shfto), + .ov(aslo) +); + +FT64_shifth ushfthL +( + .instr(instr), + .a(a[31:0]), + .b(bshift), + .res(shftho[31:0]), + .ov() +); + +FT64_shifth ushfthH +( + .instr(instr), + .a(a[63:32]), + .b(b[63:32]), + .res(shftho[63:32]), + .ov() +); + +FT64_shiftc ushftc0 +( + .instr(instr), + .a(a[15:0]), + .b(bshift), + .res(shftco[15:0]), + .ov() +); + +FT64_shiftc ushftc1 +( + .instr(instr), + .a(a[31:16]), + .b(b[31:16]), + .res(shftco[31:16]), + .ov() +); + +FT64_shiftc ushftc2 +( + .instr(instr), + .a(a[47:32]), + .b(b[47:32]), + .res(shftco[47:32]), + .ov() +); + +FT64_shiftc ushftc3 +( + .instr(instr), + .a(a[63:48]), + .b(b[63:48]), + .res(shftco[63:48]), + .ov() +); + +FT64_shiftb ushftb0 +( + .instr(instr), + .a(a[7:0]), + .b(bshift), + .res(shftob[7:0]), + .ov() +); + +FT64_shiftb ushftb1 +( + .instr(instr), + .a(a[15:8]), + .b(b[15:8]), + .res(shftob[15:8]), + .ov() +); + +FT64_shiftb ushftb2 +( + .instr(instr), + .a(a[23:16]), + .b(b[23:16]), + .res(shftob[23:16]), + .ov() +); + +FT64_shiftb ushftb3 +( + .instr(instr), + .a(a[31:24]), + .b(b[31:24]), + .res(shftob[31:24]), + .ov() +); + +FT64_shiftb ushftb4 +( + .instr(instr), + .a(a[39:32]), + .b(b[39:32]), + .res(shftob[39:32]), + .ov() +); + +FT64_shiftb ushftb5 +( + .instr(instr), + .a(a[47:40]), + .b(b[47:40]), + .res(shftob[47:40]), + .ov() +); + +FT64_shiftb ushftb6 +( + .instr(instr), + .a(a[55:48]), + .b(b[55:48]), + .res(shftob[55:48]), + .ov() +); + +FT64_shiftb ushftb7 +( + .instr(instr), + .a(a[63:56]), + .b(b[63:56]), + .res(shftob[63:56]), + .ov() +); + +cntlz64 uclz1 +( + .i(sz==2'd0 ? {56'hFFFFFFFFFFFFFF,a[7:0]} : + sz==2'd1 ? {48'hFFFFFFFFFFFF,a[15:0]} : + sz==2'd2 ? {32'hFFFFFFFF,a[31:0]} : a), + .o(clzo) +); + +cntlo64 uclo1 +( + .i(sz==2'd0 ? a[7:0] : sz==2'd1 ? a[15:0] : sz==2'd2 ? a[31:0] : a), + .o(cloo) +); + +cntpop64 ucpop1 +( + .i(sz==2'd0 ? a[7:0] : sz==2'd1 ? a[15:0] : sz==2'd2 ? a[31:0] : a), + .o(cpopo) +); + +wire [7:0] bcdaddo,bcdsubo; +wire [15:0] bcdmulo; +BCDAdd ubcd1 (1'b0,a,b,bcdaddo); +BCDSub ubcd2 (1'b0,a,b,bcdsubo); +BCDMul2 ubcd3 (a,b,bcdmulo); + +wire [7:0] s8 = a[7:0] + b[7:0]; +wire [15:0] s16 = a[15:0] + b[15:0]; +wire [31:0] s32 = a[31:0] + b[31:0]; +wire [7:0] d8 = a[7:0] - b[7:0]; +wire [15:0] d16 = a[15:0] - b[15:0]; +wire [31:0] d32 = a[31:0] - b[31:0]; +wire [63:0] and64 = a & b; +wire [63:0] or64 = a | b; +wire [63:0] xor64 = a ^ b; +wire [63:0] redor64 = {63'd0,|a}; +wire [63:0] redor32 = {31'd0,|a[63:32],31'd0,|a[31:0]}; +wire [63:0] redor16 = {15'd0,|a[63:48],15'd0,|a[47:32],15'd0,|a[31:16],15'd0,|a[15:0]}; +wire [63:0] redor8 = {7'b0,|a[63:56],6'b0,|a[55:48],7'd0,|a[47:40],7'd0,|a[39:32],7'd0, + |a[31:24],7'd0,|a[23:16],7'd0,|a[15:8],7'd0,|a[7:0]}; +wire [63:0] zxb10 = {54'd0,b[9:0]}; +wire [63:0] sxb10 = {{54{b[9]}},b[9:0]}; +wire [63:0] zxb26 = {38'd0,instr[47:32],instr[27:18]}; +wire [63:0] sxb26 = {{38{instr[47]}},instr[47:32],instr[27:18]}; +reg [15:0] mask; +wire [4:0] cpopom; +wire signed [63:0] as = a; +wire signed [63:0] bs = b; +wire signed [63:0] cs = c; + +always @* +for (n = 0; n < 16; n = n + 1) + if (n <= ven) + mask[n] = 1'b1; + else + mask[n] = 1'b0; + +cntpop16 ucpop2 +( + .i(vm & mask), + .o(cpopom) +); + +wire [5:0] lsto, fsto; +ffz24 uffo1 +( + .i(~{8'h00,a[15:0]}), + .o(lsto) +); + +flz24 uflo1 +( + .i(~{8'h00,a[15:0]}), + .o(fsto) +); + +wire [DBW-1:0] bmmo; +FT64_BMM ubmm1 +( + .op(1'b0), + .a(a), + .b(b), + .o(bmmo) +); + +always @* +begin +case(instr[`INSTRUCTION_OP]) +`IVECTOR: + if (SUP_VECTOR) + case(instr[`INSTRUCTION_S2]) + `VABS: o[63:0] = a[63] ? -a : a; + `VSIGN: o[63:0] = a[63] ? 64'hFFFFFFFFFFFFFFFF : a==64'd0 ? 64'd0 : 64'd1; + `VMxx: + case(instr[25:23]) + `VMAND: o[63:0] = and64; + `VMOR: o[63:0] = or64; + `VMXOR: o[63:0] = xor64; + `VMXNOR: o[63:0] = ~(xor64); + `VMPOP: o[63:0] = {57'd0,cpopo}; + `VMFILL: for (n = 0; n < 64; n = n + 1) + o[n] = (n < a); + // Change the following when VL > 16 + `VMFIRST: o[63:0] = fsto==5'd31 ? 64'd64 : fsto; + `VMLAST: o[63:0] = lsto==5'd31 ? 64'd64 : lsto; + endcase + `VADD,`VADDS: o[63:0] = vm[ven] ? a + b : c; + `VSUB,`VSUBS: o[63:0] = vm[ven] ? a - b : c; + `VMUL,`VMULS: o[63:0] = vm[ven] ? prod[DBW-1:0] : c; + `VDIV,`VDIVS: o[63:0] = BIG ? (vm[ven] ? divq : c) : 64'hCCCCCCCCCCCCCCCC; + `VAND,`VANDS: o[63:0] = vm[ven] ? a & b : c; + `VOR,`VORS: o[63:0] = vm[ven] ? a | b : c; + `VXOR,`VXORS: o[63:0] = vm[ven] ? a ^ b : c; + `VCNTPOP: o[63:0] = {57'd0,cpopo}; + `VSHLV: o[63:0] = a; // no masking here + `VSHRV: o[63:0] = a; + `VCMPRSS: o[63:0] = a; + `VCIDX: o[63:0] = a * ven; + `VSCAN: o[63:0] = a * (cpopom==0 ? 0 : cpopom-1); + `VSxx,`VSxxS, + `VSxxb,`VSxxSb: + case({instr[26],instr[20:19]}) + `VSEQ: begin + o[63:0] = c; + o[ven] = vm[ven] ? a==b : c[ven]; + end + `VSNE: begin + o[63:0] = c; + o[ven] = vm[ven] ? a!=b : c[ven]; + end + `VSLT: begin + o[63:0] = c; + o[ven] = vm[ven] ? $signed(a) < $signed(b) : c[ven]; + end + `VSGE: begin + o[63:0] = c; + o[ven] = vm[ven] ? $signed(a) >= $signed(b) : c[ven]; + end + `VSLE: begin + o[63:0] = c; + o[ven] = vm[ven] ? $signed(a) <= $signed(b) : c[ven]; + end + `VSGT: begin + o[63:0] = c; + o[ven] = vm[ven] ? $signed(a) > $signed(b) : c[ven]; + end + default: o[63:0] = 64'hCCCCCCCCCCCCCCCC; + endcase + `VSxxU,`VSxxSU, + `VSxxUb,`VSxxSUb: + case({instr[26],instr[20:19]}) + `VSEQ: begin + o[63:0] = c; + o[ven] = vm[ven] ? a==b : c[ven]; + end + `VSNE: begin + o[63:0] = c; + o[ven] = vm[ven] ? a!=b : c[ven]; + end + `VSLT: begin + o[63:0] = c; + o[ven] = vm[ven] ? a < b : c[ven]; + end + `VSGE: begin + o[63:0] = c; + o[ven] = vm[ven] ? a >= b : c[ven]; + end + `VSLE: begin + o[63:0] = c; + o[ven] = vm[ven] ? a <= b : c[ven]; + end + `VSGT: begin + o[63:0] = c; + o[ven] = vm[ven] ? a > b : c[ven]; + end + default: o[63:0] = 64'hCCCCCCCCCCCCCCCC; + endcase + `VBITS2V: o[63:0] = vm[ven] ? a[ven] : c; + `V2BITS: begin + o[63:0] = b; + o[ven] = vm[ven] ? a[0] : b[ven]; + end + `VSHL,`VSHR,`VASR: o[63:0] = BIG ? shfto : 64'hCCCCCCCCCCCCCCCC; + `VXCHG: o[63:0] = vm[ven] ? b : a; + default: o[63:0] = 64'hCCCCCCCCCCCCCCCC; + endcase + else + o[63:0] <= 64'hCCCCCCCCCCCCCCCC; +`R2: + if (instr[6]) + case(instr[47:42]) + `SHIFTR: + begin + case(instr[35:33]) + `ASL,`ASR,`ROL,`ROR: + case(instr[32:30]) // size + 3'd0: shift8 = {{56{shftob[7]}},shftob[7:0]}; + 3'd1: shift8 = {{48{shftob[15]}},shftco[15:0]}; + 3'd2: shift8 = {{32{shftho[31]}},shftho[31:0]}; + 3'd3,3'd7: shift8 = shfto; + 3'd4: shift8 = shftob; + 3'd5: shift8 = shftco; + 3'd6: shift8 = shftho; + endcase + `SHL,`SHR: + case(instr[32:30]) // size + 3'd0: shift8 = {56'd0,shftob[7:0]}; + 3'd1: shift8 = {48'd0,shftco[15:0]}; + 3'd2: shift8 = {32'd0,shftho[31:0]}; + 3'd3,3'd7: shift8 = shfto; + 3'd4: shift8 = shftob; + 3'd5: shift8 = shftco; + 3'd6: shift8 = shftho; + endcase + default: o[63:0] = 64'hDCDCDCDCDCDCDCDC; + endcase + case(instr[35:33]) + `ASL,`ASR,`SHL,`SHR,`ROL,`ROR: + o[63:0] = shift9; + default: o[63:0] = 64'hDCDCDCDCDCDCDCDC; + endcase + end + `MIN: + case(instr[30:28]) + 3'd3: + if (as < bs && as < cs) + o[63:0] = as; + else if (bs < cs) + o[63:0] = bs; + else + o[63:0] = cs; + default: o = 64'hDEADDEADDEADDEAD; + endcase + `CMOVEZ: begin + o = (a==64'd0) ? b : c; + end + `CMOVNZ: if (instr[41:39]==3'd4) + o = (a!=64'd0) ? b : {{48{instr[38]}},instr[38:23]}; + else + o = (a!=64'd0) ? b : c; + default: o = 64'hDEADDEADDEADDEAD; + endcase + else + casez(instr[`INSTRUCTION_S2]) + `BCD: + case(instr[`INSTRUCTION_S1]) + `BCDADD: o[63:0] = BIG ? bcdaddo : 64'hCCCCCCCCCCCCCCCC; + `BCDSUB: o[63:0] = BIG ? bcdsubo : 64'hCCCCCCCCCCCCCCCC; + `BCDMUL: o[63:0] = BIG ? bcdmulo : 64'hCCCCCCCCCCCCCCCC; + default: o[63:0] = 64'hDEADDEADDEADDEAD; + endcase + `MOV: begin + o[63:0] = a; + end + `VMOV: o[63:0] = a; + `R1: + case(instr[`INSTRUCTION_S1]) + `CNTLZ: o[63:0] = BIG ? {57'd0,clzo} : 64'hCCCCCCCCCCCCCCCC; + `CNTLO: o[63:0] = BIG ? {57'd0,cloo} : 64'hCCCCCCCCCCCCCCCC; + `CNTPOP: o[63:0] = BIG ? {57'd0,cpopo} : 64'hCCCCCCCCCCCCCCCC; + `ABS: case(sz[1:0]) + 2'd0: o[63:0] = BIG ? (a[7] ? -a[7:0] : a[7:0]) : 64'hCCCCCCCCCCCCCCCC; + 2'd1: o[63:0] = BIG ? (a[15] ? -a[15:0] : a[15:0]) : 64'hCCCCCCCCCCCCCCCC; + 2'd2: o[63:0] = BIG ? (a[31] ? -a[31:0] : a[31:0]) : 64'hCCCCCCCCCCCCCCCC; + 2'd3: o[63:0] = BIG ? (a[63] ? -a : a) : 64'hCCCCCCCCCCCCCCCC; + endcase + `NOT: case(sz[1:0]) + 2'd0: o = {~|a[63:56],~|a[55:48],~|a[47:40],~|a[39:32],~|a[31:24],~|a[23:16],~|a[15:8],~|a[7:0]}; + 2'd1: o = {~|a[63:48],~|a[47:32],~|a[31:16],~|a[15:0]}; + 2'd2: o = {~|a[63:32],~|a[31:0]}; + 2'd3: o = ~|a[63:0]; + endcase + `NEG: + case(sz[1:0]) + 2'd0: o = {-a[63:56],-a[55:48],-a[47:40],-a[39:32],-a[31:24],-a[23:16],-a[15:8],-a[7:0]}; + 2'd1: o = {-a[63:48],-a[47:32],-a[31:16],-a[15:0]}; + 2'd2: o = {-a[63:32],-a[31:0]}; + 2'd3: o = -a; + endcase + `REDOR: case(sz[1:0]) + 2'd0: o = redor8; + 2'd1: o = redor16; + 2'd2: o = redor32; + 2'd3: o = redor64; + endcase + `ZXH: o[63:0] = {32'd0,a[31:0]}; + `ZXC: o[63:0] = {48'd0,a[15:0]}; + `ZXB: o[63:0] = {56'd0,a[7:0]}; + `SXH: o[63:0] = {{32{a[31]}},a[31:0]}; + `SXC: o[63:0] = {{48{a[15]}},a[15:0]}; + `SXB: o[63:0] = {{56{a[7]}},a[7:0]}; +// 5'h1C: o[63:0] = tmem[a[9:0]]; + default: o = 64'hDEADDEADDEADDEAD; + endcase + `BMM: o[63:0] = BIG ? bmmo : 64'hCCCCCCCCCCCCCCCC; + `SHIFT31, + `SHIFT63, + `SHIFTR: + begin + if (instr[25:23]==`SHL || instr[25:23]==`ASL) + o[63:0] = shfto; + else + o[63:0] = BIG ? shfto : 64'hCCCCCCCCCCCCCCCC; + $display("BIG=%d",BIG); + if(!BIG) + $stop; + end + `ADD: +`ifdef SIMD + case(sz) + 3'd0,3'd4: + begin + o[7:0] = a[7:0] + b[7:0]; + o[15:8] = a[15:8] + b[15:8]; + o[23:16] = a[23:16] + b[23:16]; + o[31:24] = a[31:24] + b[31:24]; + o[39:32] = a[39:32] + b[39:32]; + o[47:40] = a[47:40] + b[47:40]; + o[55:48] = a[55:48] + b[55:48]; + o[63:56] = a[63:56] + b[63:56]; + end + 3'd1,3'd5: + begin + o[15:0] = a[15:0] + b[15:0]; + o[31:16] = a[31:16] + b[31:16]; + o[47:32] = a[47:32] + b[47:32]; + o[63:48] = a[63:48] + b[63:48]; + end + 3'd2,3'd6: + begin + o[31:0] = a[31:0] + b[31:0]; + o[63:32] = a[63:32] + b[63:32]; + end + default: + begin + o[63:0] = a + b; + end + endcase +`else + o = a + b; +`endif + `SUB: +`ifdef SIMD + case(sz) + 3'd0,3'd4: + begin + o[7:0] = a[7:0] - b[7:0]; + o[15:8] = a[15:8] - b[15:8]; + o[23:16] = a[23:16] - b[23:16]; + o[31:24] = a[31:24] - b[31:24]; + o[39:32] = a[39:32] - b[39:32]; + o[47:40] = a[47:40] - b[47:40]; + o[55:48] = a[55:48] - b[55:48]; + o[63:56] = a[63:56] - b[63:56]; + end + 3'd1,3'd5: + begin + o[15:0] = a[15:0] - b[15:0]; + o[31:16] = a[31:16] - b[31:16]; + o[47:32] = a[47:32] - b[47:32]; + o[63:48] = a[63:48] - b[63:48]; + end + 3'd2,3'd6: + begin + o[31:0] = a[31:0] - b[31:0]; + o[63:32] = a[63:32] - b[63:32]; + end + default: + begin + o[63:0] = a - b; + end + endcase +`else + o = a - b; +`endif + `SLT: tskSlt(instr,instr[25:23],a,b,o); + `SLTU: tskSltu(instr,instr[25:23],a,b,o); + `SLE: tskSle(instr,instr[25:23],a,b,o); + `SLEU: tskSleu(instr,instr[25:23],a,b,o); + `AND: o = and64; + `OR: o = or64; + `XOR: o = xor64; + `NAND: o = ~and64; + `NOR: o = ~or64; + `XNOR: o = ~xor64; + `SEI: o = a | instr[21:16]; + `RTI: o = a | instr[21:16]; + `MUX: for (n = 0; n < 64; n = n + 1) + o[n] <= a[n] ? b[n] : c[n]; + `MULU,`MULSU,`MUL: + case(sz) + byt_para: o[63:0] = {prod87[7:0],prod86[7:0],prod85[7:0],prod84[7:0],prod83[7:0],prod82[7:0],prod81[7:0],prod80[7:0]}; + char_para: o[63:0] = {prod163[15:0],prod162[15:0],prod161[15:0],prod160[15:0]}; + half_para: o[63:0] = {prod321[31:0],prod320[31:0]}; + default: o[63:0] = prod[DBW-1:0]; + endcase + `FXMUL: + case(sz) + half_para: o = {prod321[47:16] + prod321[15],prod320[47:16] + prod320[15]}; + default: o = prod[95:32] + prod[31]; + endcase + `MULF: o = a[23:0] * b[15:0]; + `DIVU: o[63:0] = BIG ? divq : 64'hCCCCCCCCCCCCCCCC; + `DIVSU: o[63:0] = BIG ? divq : 64'hCCCCCCCCCCCCCCCC; + `DIV: o[63:0] = BIG ? divq : 64'hCCCCCCCCCCCCCCCC; + `MODU: o[63:0] = BIG ? rem : 64'hCCCCCCCCCCCCCCCC; + `MODSU: o[63:0] = BIG ? rem : 64'hCCCCCCCCCCCCCCCC; + `MOD: o[63:0] = BIG ? rem : 64'hCCCCCCCCCCCCCCCC; + `LEAX: + begin + o[63:0] = BIG ? a + (b << instr[22:21]) : 64'hCCCCCCCCEEEEEEEE; + //o[63:44] = PTR; + end + `MIN: +`ifdef SIMD + case(sz) + 3'd0,3'd4: + begin + o[7:0] = BIG ? ($signed(a[7:0]) < $signed(b[7:0]) ? a[7:0] : b[7:0]) : 8'hCC; + o[15:8] = BIG ? ($signed(a[15:8]) < $signed(b[15:8]) ? a[15:8] : b[15:8]) : 64'hCCCCCCCCCCCCCCCC; + o[23:16] = BIG ? ($signed(a[23:16]) < $signed(b[23:16]) ? a[23:16] : b[23:16]) : 64'hCCCCCCCCCCCCCCCC; + o[31:24] = BIG ? ($signed(a[31:24]) < $signed(b[31:24]) ? a[31:24] : b[31:24]) : 64'hCCCCCCCCCCCCCCCC; + o[39:32] = BIG ? ($signed(a[39:32]) < $signed(b[39:32]) ? a[39:32] : b[39:32]) : 64'hCCCCCCCCCCCCCCCC; + o[47:40] = BIG ? ($signed(a[47:40]) < $signed(b[47:40]) ? a[47:40] : b[47:40]) : 64'hCCCCCCCCCCCCCCCC; + o[55:48] = BIG ? ($signed(a[55:48]) < $signed(b[55:48]) ? a[55:48] : b[55:48]) : 64'hCCCCCCCCCCCCCCCC; + o[63:56] = BIG ? ($signed(a[63:56]) < $signed(b[63:56]) ? a[63:56] : b[63:56]) : 64'hCCCCCCCCCCCCCCCC; + end + 3'd1,3'd5: + begin + o[15:0] = BIG ? ($signed(a[15:0]) < $signed(b[15:0]) ? a[15:0] : b[15:0]) : 64'hCCCCCCCCCCCCCCCC; + o[32:16] = BIG ? ($signed(a[32:16]) < $signed(b[32:16]) ? a[32:16] : b[32:16]) : 64'hCCCCCCCCCCCCCCCC; + o[47:32] = BIG ? ($signed(a[47:32]) < $signed(b[47:32]) ? a[47:32] : b[47:32]) : 64'hCCCCCCCCCCCCCCCC; + o[63:48] = BIG ? ($signed(a[63:48]) < $signed(b[63:48]) ? a[63:48] : b[63:48]) : 64'hCCCCCCCCCCCCCCCC; + end + 3'd2,3'd6: + begin + o[31:0] = BIG ? ($signed(a[31:0]) < $signed(b[31:0]) ? a[31:0] : b[31:0]) : 64'hCCCCCCCCCCCCCCCC; + o[63:32] = BIG ? ($signed(a[63:32]) < $signed(b[63:32]) ? a[63:32] : b[63:32]) : 64'hCCCCCCCCCCCCCCCC; + end + 3'd3,3'd7: + begin + o[63:0] = BIG ? ($signed(a) < $signed(b) ? a : b) : 64'hCCCCCCCCCCCCCCCC; + end + endcase +`else + o[63:0] = BIG ? ($signed(a) < $signed(b) ? a : b) : 64'hCCCCCCCCCCCCCCCC; +`endif + `MAX: +`ifdef SIMD + case(sz) + 3'd0,3'd4: + begin + o[7:0] = BIG ? ($signed(a[7:0]) > $signed(b[7:0]) ? a[7:0] : b[7:0]) : 64'hCCCCCCCCCCCCCCCC; + o[15:8] = BIG ? ($signed(a[15:8]) > $signed(b[15:8]) ? a[15:8] : b[15:8]) : 64'hCCCCCCCCCCCCCCCC; + o[23:16] = BIG ? ($signed(a[23:16]) > $signed(b[23:16]) ? a[23:16] : b[23:16]) : 64'hCCCCCCCCCCCCCCCC; + o[31:24] = BIG ? ($signed(a[31:24]) > $signed(b[31:24]) ? a[31:24] : b[31:24]) : 64'hCCCCCCCCCCCCCCCC; + o[39:32] = BIG ? ($signed(a[39:32]) > $signed(b[39:32]) ? a[39:32] : b[39:32]) : 64'hCCCCCCCCCCCCCCCC; + o[47:40] = BIG ? ($signed(a[47:40]) > $signed(b[47:40]) ? a[47:40] : b[47:40]) : 64'hCCCCCCCCCCCCCCCC; + o[55:48] = BIG ? ($signed(a[55:48]) > $signed(b[55:48]) ? a[55:48] : b[55:48]) : 64'hCCCCCCCCCCCCCCCC; + o[63:56] = BIG ? ($signed(a[63:56]) > $signed(b[63:56]) ? a[63:56] : b[63:56]) : 64'hCCCCCCCCCCCCCCCC; + end + 3'd1,3'd5: + begin + o[15:0] = BIG ? ($signed(a[15:0]) > $signed(b[15:0]) ? a[15:0] : b[15:0]) : 64'hCCCCCCCCCCCCCCCC; + o[32:16] = BIG ? ($signed(a[32:16]) > $signed(b[32:16]) ? a[32:16] : b[32:16]) : 64'hCCCCCCCCCCCCCCCC; + o[47:32] = BIG ? ($signed(a[47:32]) > $signed(b[47:32]) ? a[47:32] : b[47:32]) : 64'hCCCCCCCCCCCCCCCC; + o[63:48] = BIG ? ($signed(a[63:48]) > $signed(b[63:48]) ? a[63:48] : b[63:48]) : 64'hCCCCCCCCCCCCCCCC; + end + 3'd2,3'd6: + begin + o[31:0] = BIG ? ($signed(a[31:0]) > $signed(b[31:0]) ? a[31:0] : b[31:0]) : 64'hCCCCCCCCCCCCCCCC; + o[63:32] = BIG ? ($signed(a[63:32]) > $signed(b[63:32]) ? a[63:32] : b[63:32]) : 64'hCCCCCCCCCCCCCCCC; + end + 3'd3,3'd7: + begin + o[63:0] = BIG ? ($signed(a) > $signed(b) ? a : b) : 64'hCCCCCCCCCCCCCCCC; + end + endcase +`else + o[63:0] = BIG ? ($signed(a) > $signed(b) ? a : b) : 64'hCCCCCCCCCCCCCCCC; +`endif + `MAJ: o = (a & b) | (a & c) | (b & c); + `CHK: o[63:0] = (a >= b && a < c); + /* + `RTOP: case(c[5:0]) + `RTADD: o = a + b; + `RTSUB: o = a - b; + `RTAND: o = and64; + `RTOR: o = or64; + `RTXOR: o = xor64; + `RTNAND: o = ~and64; + `RTNOR: o = ~or64; + `RTXNOR: o = ~xor64; + `RTSLT: o = as < bs; + `RTSGE: o = as >= bs; + `RTSLE: o = as <= bs; + `RTSGT: o = as > bs; + `RTSEQ: o = as==bs; + `RTSNE: o = as!=bs; + endcase + */ + `TLB: o = BIG ? tlbo : 64'hDEADDEADDEADDEAD; + default: o[63:0] = 64'hDEADDEADDEADDEAD; + endcase +`MEMNDX: + if (instr[7:6]==2'b10) begin + if (instr[31]) + case({instr[31:28],instr[17:16]}) + `PUSH: + begin + usa = a - 4'd8; + o = {pb[50:0],13'd0} + usa; + end + default: o = 64'hDEADDEADDEADDEAD; + endcase + else + o = 64'hDEADDEADDEADDEAD; + end + else if (instr[7:6]==2'b00) begin + if (!instr[31]) + case({instr[31:28],instr[22:21]}) + `CACHEX,`LVX, + `LBX,`LBUX,`LCX,`LCUX, + `LVBX,`LVBUX,`LVCX,`LVCUX,`LVHX,`LVHUX,`LVWX, + `LHX,`LHUX,`LWX,`LWRX: + if (BIG) begin + usa = a + (c << instr[19:18]); + o = {pb[50:0],13'd0} + usa; + end + else + o = 64'hCCCCCCCCEEEEEEEE; + `LVX,`SVX: + if (BIG) begin + usa = a + (c << 2'd3); + o = {pb[50:0],13'd0} + usa; + end + else + o = 64'hCCCCCCCCCCCCCCCC; + `LVWS,`SVWS: + if (BIG) begin + usa = a + ({c * ven,3'b000}); + o = {pb[50:0],13'd0} + usa; + end + else + o = 64'hCCCCCCCCCCCCCCCC; + default: o = 64'hDEADDEADDEADDEAD; + endcase + else + case({instr[31:28],instr[17:16]}) + `PUSH: + begin + usa = a - 4'd8; + o = {pb[50:0],13'd0} + usa; + end + `SBX,`SCX,`SHX,`SWX,`SWCX: + if (BIG) begin + usa = a + (c << instr[14:13]); + o = {pb[50:0],13'd0} + usa; + end + else + o = 64'hCCCCCCCCEEEEEEEE; + `SVX: if (BIG) begin + usa = a + (c << 2'd3); + o = {pb[50:0],13'd0} + usa; + end + else + o = 64'hCCCCCCCCCCCCCCCC; + `SVWS: + if (BIG) begin + usa = a + ({c * ven,3'b000}); + o = {pb[50:0],13'd0} + usa; + end + else + o = 64'hCCCCCCCCCCCCCCCC; + default: o = 64'hDEADDEADDEADDEAD; + endcase + end + else + o[63:0] = 64'hDEADDEADDEADDEAD; +`AUIPC: + begin + if (instr[7:6]==2'b01) + o[63:0] = pc + {instr[47:18],instr[12:8],30'd0}; + else + o[63:0] = pc + {{15{instr[31]}},instr[31:18],instr[12:8],30'd0}; + o[29:0] = 30'd0; +// o[63:44] = PTR; + end +`LUI: + begin + if (instr[7:6]==2'b01) + o = {instr[47:18],instr[12:8],30'd0}; + else + o = {{15{instr[31]}},instr[31:18],instr[12:8],30'd0}; + end +`ADDI: o = a + b; +`SLTI: o = $signed(a) < $signed(b); +`SLTUI: o = a < b; +`SGTI: o = $signed(a) > $signed(b); +`SGTUI: o = a > b; +`ANDI: o = a & andb; +`ORI: o = a | orb; +`XORI: o = a ^ orb; +`XNORI: o = ~(a ^ orb); +`MULUI: o = prod[DBW-1:0]; +`MULI: o = prod[DBW-1:0]; +`MULFI: o = a[23:0] * b[15:0]; +`DIVUI: o = BIG ? divq : 64'hCCCCCCCCCCCCCCCC; +`DIVI: o = BIG ? divq : 64'hCCCCCCCCCCCCCCCC; +`MODI: o = BIG ? rem : 64'hCCCCCCCCCCCCCCCC; +`LB,`LBU,`SB: + begin + usa = a + b; + o = {pb[50:0],13'd0} + usa; + end +`Lx,`LxU,`Sx,`LVx,`LVxU: + begin + casez(b[2:0]) + 3'b100: + begin + usa = a + {b[63:3],3'b0}; // LW / SW + o = {pb[50:0],13'd0} + usa; + end + 3'b?10: + begin + usa = a + {b[63:2],2'b0}; // LH / LHU / SH + o = {pb[50:0],13'd0} + usa; + end + default: + begin + usa = a + {b[63:1],1'b0}; // LC / LCU / SC + o = {pb[50:0],13'd0} + usa; + end + endcase + end +`LWR,`SWC,`CAS,`CACHE: + begin + usa = a + b; + o = {pb[50:0],13'd0} + usa; + end +`LV,`SV: + begin + usa = a + b + {ven,3'b0}; + o = {pb[50:0],13'd0} + usa; + end +`CSRRW: + case(instr[27:18]) + 10'h044: o = BIG ? (csr | {39'd0,thrd,24'h0}) : 64'hDDDDDDDDDDDDDDDD; + default: o = BIG ? csr : 64'hDDDDDDDDDDDDDDDD; + endcase +`BITFIELD: o = BIG ? bfout : 64'hCCCCCCCCCCCCCCCC; +default: o = 64'hDEADDEADDEADDEAD; +endcase +end + +always @(posedge clk) +if (rst) + adrDone <= TRUE; +else begin + if (ld) + adrDone <= FALSE; + else if (mem|shift) + adrDone <= TRUE; +end + +always @(posedge clk) +if (rst) + adrIdle <= TRUE; +else begin + if (ld) + adrIdle <= FALSE; + else if (mem|shift) + adrIdle <= TRUE; +end + +always @(posedge clk) +case(instr[`INSTRUCTION_OP]) +`R2: + if (instr[`INSTRUCTION_L2]==2'b01) + case(instr[47:42]) + `ADD,`SUB, + `AND,`OR,`XOR,`NAND,`NOR,`XNOR, + `SHIFTR: + case(instr[41:36]) + `R1: + case(instr[22:18]) + `COM: addro[63:0] = ~shift8; + `NOT: addro[63:0] = ~|shift8; + `NEG: addro[63:0] = -shift8; + default: addro[63:0] = 64'hDCDCDCDCDCDCDCDC; + endcase + `ADD: addro[63:0] = shift8 + c; + `SUB: addro[63:0] = shift8 - c; + `AND: addro[63:0] = shift8 & c; + `OR: addro[63:0] = shift8 | c; + `XOR: addro[63:0] = shift8 ^ c; + default: addro[63:0] = 64'hDCDCDCDCDCDCDCDC; + endcase + default: addro[63:0] = 64'hDCDCDCDCDCDCDCDC; + endcase + else + addro = 64'hCCCCCCCCCCCCCCCE; +default: addro = 64'hCCCCCCCCCCCCCCCE; +endcase + +reg sao_done, sao_idle; +always @(posedge clk) +if (rst) begin + sao_done <= 1'b1; + sao_idle <= 1'b1; +end +else begin +if (ld & IsShiftAndOp(instr) & BIG) begin + sao_done <= 1'b0; + sao_idle <= 1'b0; +end +else begin + if (IsShiftAndOp(instr) & BIG) begin + sao_done <= 1'b1; + sao_idle <= 1'b1; + end +end +end + +// Generate done signal +always @* +if (rst) + done <= TRUE; +else begin + if (IsMul(instr)) begin + case(sz) + byt,byt_para: done <= mult_done80; + char,char_para: done <= mult_done160; + half,half_para: done <= mult_done320; + default: done <= mult_done; + endcase + end + else if (IsDivmod(instr) & BIG) + done <= div_done; + else if (IsShiftAndOp(instr) & BIG) + done <= sao_done; + else if (shift) + done <= adrDone; + else if (tlb & BIG) + done <= tlb_done; + else + done <= TRUE; +end + +// Generate idle signal +always @* +if (rst) + idle <= TRUE; +else begin + if (IsMul(instr)) begin + case(sz) + byt,byt_para: idle <= mult_idle80; + char,char_para: idle <= mult_idle160; + half,half_para: idle <= mult_idle320; + default: idle <= mult_idle; + endcase + end + else if (IsDivmod(instr) & BIG) + idle <= div_idle; + else if (IsShiftAndOp(instr) & BIG) + idle <= sao_idle; + else if (shift) + idle <= adrIdle; + else if (tlb & BIG) + idle <= tlb_idle; + else + idle <= TRUE; +end + +function fnOverflow; +input op; // 0 = add, 1=sub +input a; +input b; +input s; +fnOverflow = (op ^ s ^ b) & (~op ^ a ^ b); +endfunction + +always @* +begin +//if ((tgt[4:0]==5'd31 || tgt[4:0]==5'd30) && (o[ABW-1:0] < {sbl[50:13],13'd0} || o[ABW-1:0] > {pl[50:0],13'h1FFF})) +// exc <= `FLT_STK; +//else +case(instr[`INSTRUCTION_OP]) +`R2: + case(instr[`INSTRUCTION_S2]) + `ADD: exc <= (fnOverflow(0,a[63],b[63],o[63]) & excen[0] & instr[24]) ? `FLT_OFL : `FLT_NONE; + `SUB: exc <= (fnOverflow(1,a[63],b[63],o[63]) & excen[1] & instr[24]) ? `FLT_OFL : `FLT_NONE; +// `ASL,`ASLI: exc <= (BIG & aslo & excen[2]) ? `FLT_OFL : `FLT_NONE; + `MUL,`MULSU: exc <= prod[63] ? (prod[127:64] != 64'hFFFFFFFFFFFFFFFF && excen[3] ? `FLT_OFL : `FLT_NONE ): + (prod[127:64] != 64'd0 && excen[3] ? `FLT_OFL : `FLT_NONE); + `FXMUL: exc <= prod[95] ? (prod[127:96] != 32'hFFFFFFFF && excen[3] ? `FLT_OFL : `FLT_NONE ): + (prod[127:96] != 32'd0 && excen[3] ? `FLT_OFL : `FLT_NONE); + `MULU: exc <= prod[127:64] != 64'd0 && excen[3] ? `FLT_OFL : `FLT_NONE; + `DIV,`DIVSU,`DIVU: exc <= BIG && excen[4] & divByZero ? `FLT_DBZ : `FLT_NONE; + `MOD,`MODSU,`MODU: exc <= BIG && excen[4] & divByZero ? `FLT_DBZ : `FLT_NONE; + default: exc <= `FLT_NONE; + endcase +`MULI: exc <= prod[63] ? (prod[127:64] != 64'hFFFFFFFFFFFFFFFF & excen[3] ? `FLT_OFL : `FLT_NONE): + (prod[127:64] != 64'd0 & excen[3] ? `FLT_OFL : `FLT_NONE); +`DIVI: exc <= BIG & excen[4] & divByZero & instr[27] ? `FLT_DBZ : `FLT_NONE; +`MODI: exc <= BIG & excen[4] & divByZero & instr[27] ? `FLT_DBZ : `FLT_NONE; +`CSRRW: exc <= (instr[27:21]==7'b0011011) ? `FLT_SEG : `FLT_NONE; +`MEMNDX: + begin +`ifdef SUPPORT_SEGMENTATION + if (usa < {lb[50:0],13'h0000} && usa > {ub[50:0],13'h1fff} && dl!=2'b00) + exc <= (Ra[4:0]==5'd30 || Ra[4:0]==5'd31) ? `FLT_STK : `FLT_SGB; + else +`endif +`ifdef SUPPORT_BBMS + if ((Ra[4:0]==5'd30 || Ra[4:0]==5'd31) && (usa < {sbl[50:0],13'd0} || usa > {sbu[50:0],13'h1FF8}) && dl!=2'b00) + exc <= `FLT_STK; + else if (usa > {sbu[50:0],13'h1FFF} && dl!=2'b00) + exc <= `FLT_SGB; + else if (usa < {sbl[50:0],13'h0000} && usa > {dbu[50:0],13'h1fff} && dl!=2'b00) + exc <= `FLT_SGB; + else if (usa > {en[50:0],13'h1fff} && usa < {dbl[50:0],13'd0} && dl!=2'b00) + exc <= `FLT_SGB; + else if (usa < {ro[50:0],13'd0} && store && dl!=2'b00) + exc <= `FLT_WRV; + else +`endif + begin + if (instr[7:6]==2'b10) begin + if (instr[31]) + case({instr[31:28],instr[17:16]}) + `PUSH: exc <= |o[2:0] ? `FLT_ALN : `FLT_NONE; + default: exc <= `FLT_UNIMP; + endcase + else + exc <= `FLT_UNIMP; + end + else if (instr[7:6]==2'b00) begin + if (!instr[31]) begin + if (BIG) begin + case({instr[31:28],instr[22:21]}) + `LBX,`LBUX,`LVBX,`LVBUX: exc <= `FLT_NONE; + `LCX,`LCUX,`LVCX,`LVCUX: exc <= |o[ 0] ? `FLT_ALN : `FLT_NONE; + `LVHX,`LVHUX,`LHX,`LHUX: exc <= |o[1:0] ? `FLT_ALN : `FLT_NONE; + `LWX,`LVWX,`LWRX, + `CACHEX,`LVX: exc <= |o[2:0] ? `FLT_ALN : `FLT_NONE; + `LVX,`SVX,`LVWS,`SVWS: exc <= |o[2:0] ? `FLT_ALN : `FLT_NONE; + default: exc <= `FLT_UNIMP; + endcase + end + else + exc <= `FLT_UNIMP; + end + else begin + if (BIG) begin + case({instr[31:28],instr[17:16]}) + `PUSH: exc <= |o[2:0] ? `FLT_ALN : `FLT_NONE; + `SBX: exc <= `FLT_NONE; + `SCX: exc <= |o[ 0] ? `FLT_ALN : `FLT_NONE; + `SHX: exc <= |o[1:0] ? `FLT_ALN : `FLT_NONE; + `SWX,`SWCX: exc <= |o[2:0] ? `FLT_ALN : `FLT_NONE; + `SVX: exc <= |o[2:0] ? `FLT_ALN : `FLT_NONE; + `SVWS: exc <= |o[2:0] ? `FLT_ALN : `FLT_NONE; + default: exc <= `FLT_UNIMP; + endcase + end + else + exc <= `FLT_UNIMP; + end + end + else + exc <= `FLT_UNIMP; + end + end +`ifdef SUPPORT_SEGMENTATION +`LB,`LBU,`SB: + if (usa < {lb[50:0],13'h0000} && usa > {ub[50:0],13'h1fff} && dl!=2'b00) + exc <= (Ra[4:0]==5'd30 || Ra[4:0]==5'd31) ? `FLT_STK : `FLT_SGB; +`endif +`ifdef SUPPORT_BBMS +`LB,`LBU,`SB: + if ((Ra[4:0]==5'd30 || Ra[4:0]==5'd31) && (usa < {sbl[50:0],13'd0} || usa > {sbu[50:0],13'h1FF8}) && dl!=2'b00) + exc <= `FLT_STK; + else if (usa > {sbu[50:0],13'h1FFF} && dl!=2'b00) + exc <= `FLT_SGB; + else if (usa < {sbl[50:0],13'h0000} && usa > {dbu[50:0],13'h1fff} && dl!=2'b00) + exc <= `FLT_SGB; + else if (usa > {en[50:0],13'h1fff} && usa < {dbl[50:0],13'd0} && dl!=2'b00) + exc <= `FLT_SGB; + else if (usa < {ro[50:0],13'd0} && store && dl!=2'b00) + exc <= `FLT_WRV; +`endif +`Lx,`Sx,`LxU,`LVx,`LVxU: + begin +`ifdef SUPPORT_SEGMENTATION + if (usa < {lb[50:0],13'h0000} && usa > {ub[50:0],13'h1fff} && dl!=2'b00) + exc <= (Ra[4:0]==5'd30 || Ra[4:0]==5'd31) ? `FLT_STK : `FLT_SGB; + else +`endif +`ifdef SUPPORT_BBMS + if ((Ra[4:0]==5'd30 || Ra[4:0]==5'd31) && (usa < {sbl[50:0],13'd0} || usa > {sbu[50:0],13'h1FF8}) && dl!=2'b00) + exc <= `FLT_STK; + else if (usa > {sbu[50:0],13'h1FFF} && dl!=2'b00) + exc <= `FLT_SGB; + else if (usa < {sbl[50:0],13'h0000} && usa > {dbu[50:0],13'h1fff} && dl!=2'b00) + exc <= `FLT_SGB; + else if (usa > {en[50:0],13'h1fff} && usa < {dbl[50:0],13'd0} && dl!=2'b00) + exc <= `FLT_SGB; + else if (usa < {ro[50:0],13'd0} && store && dl!=2'b00) + exc <= `FLT_WRV; + else +`endif + casez(b[2:0]) + 3'b100: exc <= |o[2:0] ? `FLT_ALN : `FLT_NONE; // LW / SW + 3'b?10: exc <= |o[1:0] ? `FLT_ALN : `FLT_NONE; // LH / LHU / SH + default: exc <= |o[ 0] ? `FLT_ALN : `FLT_NONE; // LC / LCU / SC + endcase + end +`LWR,`SWC,`CAS,`CACHE: + begin +`ifdef SUPPORT_SEGMENTATION + if (usa < {lb[50:0],13'h0000} && usa > {ub[50:0],13'h1fff} && dl!=2'b00) + exc <= (Ra[4:0]==5'd30 || Ra[4:0]==5'd31) ? `FLT_STK : `FLT_SGB; + else +`endif +`ifdef SUPPORT_BBMS + if ((Ra[4:0]==5'd30 || Ra[4:0]==5'd31) && (usa < {sbl[50:0],13'd0} || usa > {sbu[50:0],13'h1FF8}) && dl!=2'b00) + exc <= `FLT_STK; + else if (usa > {sbu[50:0],13'h1FFF} && dl!=2'b00) + exc <= `FLT_SGB; + else if (usa < {sbl[50:0],13'h0000} && usa > {dbu[50:0],13'h1fff} && dl!=2'b00) + exc <= `FLT_SGB; + else if (usa > {en[50:0],13'h1fff} && usa < {dbl[50:0],13'd0} && dl!=2'b00) + exc <= `FLT_SGB; + else if (usa < {ro[50:0],13'd0} && store && dl!=2'b00) + exc <= `FLT_WRV; + else +`endif + exc <= |o[2:0] ? `FLT_ALN : `FLT_NONE; + end +default: exc <= `FLT_NONE; +endcase +end + +reg [63:0] aa, bb; + +always @(posedge clk) +begin + aa <= shfto; + bb <= c; +end + +task tskSlt; +input [47:0] instr; +input [2:0] sz; +input [63:0] a; +input [63:0] b; +output [63:0] o; +begin +`ifdef SIMD + case(sz[2:0]) + 3'd0: o[63:0] = $signed(a[7:0]) < $signed(b[7:0]); + 3'd1: o[63:0] = $signed(a[15:0]) < $signed(b[15:0]); + 3'd2: o[63:0] = $signed(a[31:0]) < $signed(b[31:0]); + 3'd3: o[63:0] = $signed(a) < $signed(b); + 3'd4: o[63:0] = { + 7'h0,$signed(a[7:0]) < $signed(b[7:0]), + 7'h0,$signed(a[15:8]) < $signed(b[15:8]), + 7'h0,$signed(a[23:16]) < $signed(b[23:16]), + 7'h0,$signed(a[31:24]) < $signed(b[31:24]), + 7'h0,$signed(a[39:32]) < $signed(b[39:32]), + 7'h0,$signed(a[47:40]) < $signed(b[47:40]), + 7'h0,$signed(a[55:48]) < $signed(b[55:48]), + 7'h0,$signed(a[63:56]) < $signed(b[63:56]) + }; + 3'd5: o[63:0] = { + 15'h0,$signed(a[15:0]) < $signed(b[15:0]), + 15'h0,$signed(a[31:16]) < $signed(b[31:16]), + 15'h0,$signed(a[47:32]) < $signed(b[47:32]), + 15'h0,$signed(a[63:48]) < $signed(b[63:48]) + }; + 3'd6: o[63:0] = { + 31'h0,$signed(a[31:0]) < $signed(b[31:0]), + 31'h0,$signed(a[63:32]) < $signed(b[63:32]) + }; + 3'd7: o[63:0] = $signed(a[63:0]) < $signed(b[63:0]); + endcase +`else + o[63:0] = $signed(a[63:0]) < $signed(b[63:0]); +`endif +end +endtask + +task tskSle; +input [47:0] instr; +input [2:0] sz; +input [63:0] a; +input [63:0] b; +output [63:0] o; +begin +`ifdef SIMD + case(sz[2:0]) + 3'd0: o[63:0] = $signed(a[7:0]) <= $signed(b[7:0]); + 3'd1: o[63:0] = $signed(a[15:0]) <= $signed(b[15:0]); + 3'd2: o[63:0] = $signed(a[31:0]) <= $signed(b[31:0]); + 3'd3: o[63:0] = $signed(a) <= $signed(b); + 3'd4: o[63:0] = { + 7'h0,$signed(a[7:0]) <= $signed(b[7:0]), + 7'h0,$signed(a[15:8]) <= $signed(b[15:8]), + 7'h0,$signed(a[23:16]) <= $signed(b[23:16]), + 7'h0,$signed(a[31:24]) <= $signed(b[31:24]), + 7'h0,$signed(a[39:32]) <= $signed(b[39:32]), + 7'h0,$signed(a[47:40]) <= $signed(b[47:40]), + 7'h0,$signed(a[55:48]) <= $signed(b[55:48]), + 7'h0,$signed(a[63:56]) <= $signed(b[63:56]) + }; + 3'd5: o[63:0] = { + 15'h0,$signed(a[15:0]) <= $signed(b[15:0]), + 15'h0,$signed(a[31:16]) <= $signed(b[31:16]), + 15'h0,$signed(a[47:32]) <= $signed(b[47:32]), + 15'h0,$signed(a[63:48]) <= $signed(b[63:48]) + }; + 3'd6: o[63:0] = { + 31'h0,$signed(a[31:0]) <= $signed(b[31:0]), + 31'h0,$signed(a[63:32]) <= $signed(b[63:32]) + }; + 3'd7: o[63:0] = $signed(a[63:0]) <= $signed(b[63:0]); + endcase +`else + o[63:0] = $signed(a[63:0]) <= $signed(b[63:0]); +`endif +end +endtask + +task tskSltu; +input [47:0] instr; +input [2:0] sz; +input [63:0] a; +input [63:0] b; +output [63:0] o; +begin +`ifdef SIMD + case(sz[2:0]) + 3'd4,3'd0: o = { + 7'h0,(a[7:0]) < (b[7:0]), + 7'h0,(a[15:8]) < (b[15:8]), + 7'h0,(a[23:16]) < (b[23:16]), + 7'h0,(a[31:24]) < (b[31:24]), + 7'h0,(a[39:32]) < (b[39:32]), + 7'h0,(a[47:40]) < (b[47:40]), + 7'h0,(a[55:48]) < (b[55:48]), + 7'h0,(a[63:56]) < (b[63:56]) + }; + 3'd5,3'd1: o = { + 15'h0,(a[15:0]) < (b[15:0]), + 15'h0,(a[31:16]) < (b[31:16]), + 15'h0,(a[47:32]) < (b[47:32]), + 15'h0,(a[63:48]) < (b[63:48]) + }; + 3'd6,3'd2: o = { + 31'h0,(a[31:0]) < (b[31:0]), + 31'h0,(a[63:32]) < (b[63:32]) + }; + 3'd7,3'd3: o = (a[63:0]) < (b[63:0]); + endcase +`else + o = (a) < (b); +`endif +end +endtask + +task tskSleu; +input [47:0] instr; +input [2:0] sz; +input [63:0] a; +input [63:0] b; +output [63:0] o; +begin +`ifdef SIMD + case(sz[2:0]) + 3'd0: o[63:0] = (a[7:0]) <= (b[7:0]); + 3'd1: o[63:0] = (a[15:0]) <= (b[15:0]); + 3'd2: o[63:0] = (a[31:0]) <= (b[31:0]); + 3'd3: o[63:0] = (a) <= (b); + 3'd4: o[63:0] = { + 7'h0,(a[7:0]) <= (b[7:0]), + 7'h0,(a[15:8]) <= (b[15:8]), + 7'h0,(a[23:16]) <= (b[23:16]), + 7'h0,(a[31:24]) <= (b[31:24]), + 7'h0,(a[39:32]) <= (b[39:32]), + 7'h0,(a[47:40]) <= (b[47:40]), + 7'h0,(a[55:48]) <= (b[55:48]), + 7'h0,(a[63:56]) <= (b[63:56]) + }; + 3'd5: o[63:0] = { + 15'h0,(a[15:0]) <= (b[15:0]), + 15'h0,(a[31:16]) <= (b[31:16]), + 15'h0,(a[47:32]) <= (b[47:32]), + 15'h0,(a[63:48]) <= (b[63:48]) + }; + 3'd6: o[63:0] = { + 31'h0,(a[31:0]) <= (b[31:0]), + 31'h0,(a[63:32]) <= (b[63:32]) + }; + 3'd7: o[63:0] = (a[63:0]) <= (b[63:0]); + endcase +`else + o[63:0] = (a[63:0]) <= (b[63:0]); +`endif +end +endtask + +endmodule Index: thor/trunk/FT64v7/rtl/common/FT64_bitfield.v =================================================================== --- thor/trunk/FT64v7/rtl/common/FT64_bitfield.v (nonexistent) +++ thor/trunk/FT64v7/rtl/common/FT64_bitfield.v (revision 60) @@ -0,0 +1,110 @@ +`timescale 1ns / 1ps +// ============================================================================ +// __ +// \\__/ o\ (C) 2016-2018 Robert Finch, Waterloo +// \ __ / All rights reserved. +// \/_// robfinch@finitron.ca +// || +// +// FT64_bitfield.v +// +// +// This source file is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This source file is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// +// +// ============================================================================ +// +`ifndef BFSET +`define BFSET 4'd0 +`define BFCLR 4'd1 +`define BFCHG 4'd2 +`define BFINS 4'd3 +`define BFINSI 4'd4 +`define BFEXT 4'd5 +`define BFEXTU 4'd6 +`define BFFFO 4'd8 +`endif + +module FT64_bitfield(inst, a, b, c, o, masko); +parameter DWIDTH=64; +input [47:0] inst; +input [DWIDTH-1:0] a; +input [DWIDTH-1:0] b; +input [DWIDTH-1:0] c; +output [DWIDTH-1:0] o; +reg [DWIDTH-1:0] o; +output [DWIDTH-1:0] masko; + +reg [DWIDTH-1:0] o1; +reg [DWIDTH-1:0] o2; +wire [6:0] ffoo; + +// generate mask +reg [DWIDTH-1:0] mask; +assign masko = mask; +wire [3:0] op = inst[31:28]; +wire [5:0] mb = inst[30] ? a[5:0] : {inst[28],inst[12:8]}; +wire [5:0] mw = inst[31] ? b[5:0] : {inst[29],inst[17:13]}; +wire [63:0] da = inst[32] ? c : {inst[43:33],inst[22:18]}; +wire [5:0] me = mb + mw; +wire [5:0] ml = mw; // mask length-1 +wire [63:0] imm = {59'd0,inst[10:6]}; + +integer nn,n; +always @(mb or me or nn) + for (nn = 0; nn < DWIDTH; nn = nn + 1) + mask[nn] <= (nn >= mb) ^ (nn <= me) ^ (me >= mb); + +ffo96 u1 ({32'h0,o1},ffoo); + +always @(op,mask,b,a,da,imm,mb,ml) +case (op) +// ToDo: Fix bitfield inserts +`BFINS: begin + o2 = a << mb; + for (n = 0; n < DWIDTH; n = n + 1) o[n] = (mask[n] ? o2[n] : b[n]); + end +`BFINSI: begin + o2 = imm << mb; + for (n = 0; n < DWIDTH; n = n + 1) o[n] = (mask[n] ? o2[n] : b[n]); + end +`BFSET: begin for (n = 0; n < DWIDTH; n = n + 1) o[n] = mask[n] ? 1'b1 : da[n]; end +`BFCLR: begin for (n = 0; n < DWIDTH; n = n + 1) o[n] = mask[n] ? 1'b0 : da[n]; end +`BFCHG: begin for (n = 0; n < DWIDTH; n = n + 1) o[n] = mask[n] ? ~da[n] : da[n]; end +`BFEXTU: begin + for (n = 0; n < DWIDTH; n = n + 1) + o1[n] = mask[n] ? da[n] : 1'b0; + o = o1 >> mb; + end +`BFEXT: begin + for (n = 0; n < DWIDTH; n = n + 1) + o1[n] = mask[n] ? da[n] : 1'b0; + o2 = o1 >> mb; + for (n = 0; n < DWIDTH; n = n + 1) + o[n] = n > ml ? o2[ml] : o2[n]; + end +`BFFFO: + begin + for (n = 0; n < DWIDTH; n = n + 1) + o1[n] = mask[n] ? da[n] : 1'b0; + o = (ffoo==7'd127) ? -64'd1 : ffoo; // ffoo returns 127 if no one was found + end +`ifdef I_SEXT +`SEXT: begin for (n = 0; n < DWIDTH; n = n + 1) o[n] = mask[n] ? da[mb] : da[n]; end +`endif +default: o = {DWIDTH{1'b0}}; +endcase + +endmodule + Index: thor/trunk/FT64v7/rtl/common/FT64_config.vh =================================================================== --- thor/trunk/FT64v7/rtl/common/FT64_config.vh (nonexistent) +++ thor/trunk/FT64v7/rtl/common/FT64_config.vh (revision 60) @@ -0,0 +1,119 @@ +// ============================================================================ +// __ +// \\__/ o\ (C) 2017-2018 Robert Finch, Waterloo +// \ __ / All rights reserved. +// \/_// robfinch@finitron.ca +// || +// +// FT64_config.vh +// +// This source file is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This source file is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// +// ============================================================================ +// +// The following line is to enable simulation versions of some modules. +// Comment out for synthesis. +//`define SIM 1'b1 + +//`define SUPPORT_SMT 1'b1 +//`define SUPPORT_VECTOR 1'b1 +//`define SUPPORT_DCI 1'b1 // dynamically compressed instructions +//`define SUPPORT_BBMS 1'b1 +//`define SUPPORT_SEGMENTATION 1'b1 +//`define SUPPORT_PREDICATION 1'b1 +//`define DEBUG_LOGIC 1'b1 +`define L1_ICACHE_SIZE 2 // 2 or 4 for 2 or 4 kB + +// One way to tweak the size of the core a little bit is to limit the number +// of address bits processed. The test system for instance has only 512MB of +// memory, so the address size is limited to 32 bits. +// ** The ASID is stored in the upper 8 bits of the address +`define AMSB 63 +`define ABITS `AMSB:0 + + +// bitfield representing a queue entry index. The field must be large +// enough to accomodate a queue entry number, determined by the number +// of queue entries below. +`define QBIT 4 +`define QBITS 3:0 +`define QBITSP1 4:0 + +// The following bitfield spec is for the instruction sequence number. It +// must have at least one more bit in it than the QBITS above as the counter +// can overflow a little bit. +`define SNBITS 4:0 + +// If set greater than 10, then memory instructions won't +// issue until they are within 10 of the head of the queue. +`define QENTRIES 4 + +// Bitfield for representing exception codes +`define XBITS 7:0 + +//`define SUPPORT_DBG 1'b1 + +// Issue logic is not really required for every possible distance from +// the head of the queue. Later queue entries tend to depend on prior +// ones and hence may not be ready to be issued. Also note that +// instruction decode takes a cycle making the last entry or two in the +// queue not ready to be issued. Commenting out this line will limit +// much of the issue logic to the first six queue slots relative to the +// head of the queue. +`define FULL_ISSUE_LOGIC 1'b1 + +// The WAYS config define affects things like the number of ports on the +// register file, the number of ports on the instruction cache, and how +// many entries are contained in the fetch buffers. It also indirectly +// affects how many instructions are queued. +`define WAYS 1 // number of ways parallel (1-3 3 not working yet) +`define NUM_IDU 1 // number of instruction decode units (1-3) +`define NUM_ALU 1 // number of ALU's (1-2) +`define NUM_MEM 1 // number of memory queues (1-3) +`define NUM_FPU 0 // number of floating-point units (0-2) +// Note that even with just a single commit bus, multiple instructions may +// commit if they do not target any registers. Up to three instruction may +// commit even with just a single bus. +`define NUM_CMT 1 // number of commit busses (1-3) +// Comment out the following to remove FCU enhancements (branch predictor, BTB, RSB) +//`define FCU_ENH 1 +// Comment out the following to remove bypassing logic on the functional units +//`define FU_BYPASS 1 + +//`define SUPPORT_TLB 1 + +// These are unit availability settings at reset. +`define ID1_AVAIL 1'b1 +`define ID2_AVAIL 1'b1 +`define ID3_AVAIL 1'b0 +`define ALU0_AVAIL 1'b1 +`define ALU1_AVAIL 1'b1 +`define FPU1_AVAIL 1'b1 +`define FPU2_AVAIL 1'b0 +`define MEM1_AVAIL 1'b1 +`define MEM2_AVAIL 1'b1 +`define MEM3_AVAIL 1'b0 +`define FCU_AVAIL 1'b1 + +// Comment out to remove the write buffer from the core. +`define HAS_WB 1'b1 +`define WB_DEPTH 5 // must be one more than desired depth + +// Uncomment to allow SIMD operations +//`define SIMD 1'b1 + +// Comment the following to disable registering the output of instruction decoders. +// Inline decoding should not be registered. +//`define REGISTER_DECODE 1'b1 +`define INLINE_DECODE 1'b1 Index: thor/trunk/FT64v7/rtl/common/FT64_dcache.v =================================================================== --- thor/trunk/FT64v7/rtl/common/FT64_dcache.v (nonexistent) +++ thor/trunk/FT64v7/rtl/common/FT64_dcache.v (revision 60) @@ -0,0 +1,202 @@ +// ============================================================================ +// __ +// \\__/ o\ (C) 2018 Robert Finch, Waterloo +// \ __ / All rights reserved. +// \/_// robfinch@finitron.ca +// || +// +// FT64_dcache.v +// - a simple direct mapped cache +// - three cycle latency +// +// +// This source file is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This source file is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// +// +// ============================================================================ +// +module FT64_dcache(rst, dce, wclk, wr, sel, wadr, whit, i, li, rclk, rdsize, radr, o, lo, rhit); +input rst; +input dce; // data cache enable +input wclk; +input wr; +input [7:0] sel; +input [37:0] wadr; +output whit; +input [63:0] i; +input [255:0] li; // line input +input rclk; +input [2:0] rdsize; +input [37:0] radr; +output reg [63:0] o; +output reg [255:0] lo; // line out +output reg rhit; +parameter byt = 3'd0; +parameter wyde = 3'd1; +parameter tetra = 3'd2; +parameter octa = 3'd3; + +wire [255:0] dc; +wire [31:0] v; +wire rhita; + +dcache_mem u1 ( + .rst(rst), + .clka(wclk), + .ena(dce & wr), + .wea(sel), + .addra(wadr[13:0]), + .dina(i), + .clkb(rclk), + .enb(dce), + .addrb(radr[13:0]), + .doutb(dc), + .ov(v) +); + +FT64_dcache_tag u3 +( + .wclk(wclk), + .dce(dce), + .wr(wr && wadr[4:3]==2'b11), + .wadr(wadr), + .rclk(rclk), + .radr(radr), + .whit(whit), + .rhit(rhita) +); + +wire [7:0] va = v >> radr[4:0]; +always @(posedge rclk) +begin +case(rdsize) +byt: rhit <= rhita & va[ 0]; +wyde: rhit <= rhita & &va[1:0]; +tetra: rhit <= rhita & &va[3:0]; +default:rhit <= rhita & &va[7:0]; +endcase +end + +// hit is also delayed by a clock already +always @(posedge rclk) + lo <= dc; +always @(posedge rclk) + o <= dc >> {radr[4:3],6'b0}; + +endmodule + +// ----------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- + +module dcache_mem(rst, clka, ena, wea, addra, dina, clkb, enb, addrb, doutb, ov); +input rst; +input clka; +input ena; +input [7:0] wea; +input [13:0] addra; +input [63:0] dina; +input clkb; +input enb; +input [13:0] addrb; +output reg [255:0] doutb; +output reg [31:0] ov; + +reg [255:0] mem [0:511]; +reg [31:0] valid [0:511]; +reg [255:0] doutb1; +reg [31:0] ov1; + +integer n; + +initial begin + for (n = 0; n < 512; n = n + 1) + valid[n] = 32'h00; +end + +genvar g; +generate begin +for (g = 0; g < 4; g = g + 1) +always @(posedge clka) +begin + if (ena & wea[0] & addra[4:3]==g) mem[addra[13:5]][g*64+7:g*64] <= dina[7:0]; + if (ena & wea[1] & addra[4:3]==g) mem[addra[13:5]][g*64+15:g*64+8] <= dina[15:8]; + if (ena & wea[2] & addra[4:3]==g) mem[addra[13:5]][g*64+23:g*64+16] <= dina[23:16]; + if (ena & wea[3] & addra[4:3]==g) mem[addra[13:5]][g*64+31:g*64+24] <= dina[31:24]; + if (ena & wea[4] & addra[4:3]==g) mem[addra[13:5]][g*64+39:g*64+32] <= dina[39:32]; + if (ena & wea[5] & addra[4:3]==g) mem[addra[13:5]][g*64+47:g*64+40] <= dina[47:40]; + if (ena & wea[6] & addra[4:3]==g) mem[addra[13:5]][g*64+55:g*64+48] <= dina[55:48]; + if (ena & wea[7] & addra[4:3]==g) mem[addra[13:5]][g*64+63:g*64+56] <= dina[63:56]; + if (ena & wea[0] & addra[4:3]==g) valid[addra[13:5]][g*8] <= 1'b1; + if (ena & wea[1] & addra[4:3]==g) valid[addra[13:5]][g*8+1] <= 1'b1; + if (ena & wea[2] & addra[4:3]==g) valid[addra[13:5]][g*8+2] <= 1'b1; + if (ena & wea[3] & addra[4:3]==g) valid[addra[13:5]][g*8+3] <= 1'b1; + if (ena & wea[4] & addra[4:3]==g) valid[addra[13:5]][g*8+4] <= 1'b1; + if (ena & wea[5] & addra[4:3]==g) valid[addra[13:5]][g*8+5] <= 1'b1; + if (ena & wea[6] & addra[4:3]==g) valid[addra[13:5]][g*8+6] <= 1'b1; + if (ena & wea[7] & addra[4:3]==g) valid[addra[13:5]][g*8+7] <= 1'b1; +end +end +endgenerate +always @(posedge clkb) + if (enb) + doutb1 <= mem[addrb[13:5]]; +always @(posedge clkb) + if (enb) + doutb <= doutb1; +always @(posedge clkb) + if (enb) + ov1 <= valid[addrb[13:5]]; +always @(posedge clkb) + if (enb) + ov <= ov1; +endmodule + +// ----------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- + +module FT64_dcache_tag(wclk, dce, wr, wadr, rclk, radr, whit, rhit); +input wclk; +input dce; // data cache enable +input wr; +input [37:0] wadr; +input rclk; +input [37:0] radr; +output reg whit; // write hit +output reg rhit; // read hit + +wire [31:0] rtago; +wire [31:0] wtago; + +FT64_dcache_tag2 u1 ( + .clka(wclk), + .ena(dce), + .wea(wr), + .addra(wadr[13:5]), + .dina(wadr[37:14]), + .douta(wtago), + .clkb(rclk), + .web(1'b0), + .dinb(32'd0), + .enb(dce), + .addrb(radr[13:5]), + .doutb(rtago) +); + +always @(posedge rclk) + rhit <= rtago[23:0]==radr[37:14]; +always @(posedge wclk) + whit <= wtago[23:0]==wadr[37:14]; + +endmodule + Index: thor/trunk/FT64v7/rtl/common/FT64_defines.vh =================================================================== --- thor/trunk/FT64v7/rtl/common/FT64_defines.vh (nonexistent) +++ thor/trunk/FT64v7/rtl/common/FT64_defines.vh (revision 60) @@ -0,0 +1,555 @@ +// ============================================================================ +// __ +// \\__/ o\ (C) 2017-2018 Robert Finch, Waterloo +// \ __ / All rights reserved. +// \/_// robfinch@finitron.ca +// || +// +// FT64_defines.v +// +// This source file is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This source file is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// +// ============================================================================ +// +`define HIGH 1'b1 +`define LOW 1'b0 +`define TRUE 1'b1 +`define FALSE 1'b0 +//`define Q2VECTORS 1'b1 + +`define ZERO 64'd0 + +`define BRK 6'h00 +`define FVECTOR 6'h01 +`define VCMPRSS 6'h00 +`define VCIDX 6'h01 +`define VSCAN 6'h02 +`define VABS 6'h03 +`define VADD 6'h04 +`define VSUB 6'h05 +`define VSxx 6'h06 +`define VSEQ 3'd0 +`define VSNE 3'd1 +`define VSLT 3'd2 +`define VSGE 3'd3 +`define VSLE 3'd4 +`define VSGT 3'd5 +`define VSUN 3'd7 +`define VSxxS 6'h07 +`define VAND 6'h08 +`define VOR 6'h09 +`define VXOR 6'h0A +`define VXCHG 6'h0B +`define VSHL 6'h0C +`define VSHR 6'h0D +`define VASR 6'h0E +`define VSxxSb 6'h0F +`define VSHLV 6'h10 +`define VSHRV 6'h11 +`define VROLV 6'h12 +`define VRORV 6'h13 +`define VADDS 6'h14 +`define VSUBS 6'h15 +`define VSxxSU 6'h17 +`define VANDS 6'h18 +`define VORS 6'h19 +`define VXORS 6'h1A +`define VSxxSUb 6'h1F +`define VBITS2V 6'h20 +`define V2BITS 6'h21 +`define VEINS 6'h22 +`define VEX 6'h23 +`define VFLT2INT 6'h24 +`define VINT2FLT 6'h25 +`define VSIGN 6'h26 +`define VSxxU 6'h27 +`define VCNTPOP 6'h28 +`define VMULS 6'h2A +`define VDIVS 6'h2E +`define VSxxUb 6'h2F +`define VMxx 6'h30 +`define VMAND 3'h0 +`define VMOR 3'h1 +`define VMXOR 3'h2 +`define VMXNOR 3'h3 +`define VMPOP 3'h4 +`define VMFILL 3'h5 +`define VMFIRST 3'h6 +`define VMLAST 3'h7 +`define VMUL 6'h3A +`define VDIV 6'h3E +`define VSxxb 6'h3F +`define R2 6'h02 +`define RR 6'h02 +`define BCD 6'h00 +`define BCDADD 5'h00 +`define BCDSUB 5'h01 +`define BCDMUL 5'h02 +`define PCRELX 6'h02 +`define AUIPC 6'h03 +`define SHL 4'h0 +`define SHR 4'h1 +`define ASL 4'h2 +`define ASR 4'h3 +`define ROL 4'h4 +`define ROR 4'h5 +`define SHLI 4'h8 +`define SHRI 4'h9 +`define ASLI 4'hA +`define ASRI 4'hB +`define ROLI 4'hC +`define RORI 4'hD +// Register / Miscellaneous (01) Ops +`define R1 6'h01 +`define CNTLZ 5'h00 +`define CNTLO 5'h01 +`define CNTPOP 5'h02 +`define COM 5'h03 +`define ABS 5'h04 +`define NOT 5'h05 +`define REDOR 5'h06 +`define NEG 5'h07 +`define ZXH 5'h08 +`define ZXC 5'h09 +`define ZXB 5'h0A +`define MEMDB 5'h10 +`define MEMSB 5'h11 +`define SYNC 5'h12 +`define CHAIN_OFF 5'h14 +`define CHAIN_ON 5'h15 +`define SETWB 5'h16 +`define SXH 5'h18 +`define SXC 5'h19 +`define SXB 5'h1A +// Register-Register (02) Ops +`define RTOP 6'h00 +`define BMM 6'h03 +`define ADD 6'h04 +`define SUB 6'h05 +`define SLT 6'h06 +`define SLTU 6'h07 +`define AND 6'h08 +`define OR 6'h09 +`define XOR 6'h0A +`define NAND 6'h0C +`define NOR 6'h0D +`define XNOR 6'h0E +`define SHIFT31 6'h0F +`define CMP 6'h12 +`define MODU 6'h14 +`define MODSU 6'h15 +`define MOD 6'h16 +`define LEAX 6'h18 +`define MUX 6'h1B +`define SHIFT63 6'h1F +`define MOV 6'b01001? +`define MULUH 6'h24 +`define MULSUH 6'h25 +`define MULH 6'h26 +`define SLE 6'h28 +`define SLEU 6'h29 +`define MULF 6'h2A +// The following two instructions are 48 bit ops +`define CMOVEZ 6'h28 +`define CMOVNZ 6'h29 +`define MIN 6'h2C +`define MAX 6'h2D +`define MAJ 6'h2E +`define SHIFTR 6'h2F +`define SEI 6'h30 +`define WAIT 6'h31 +`define RTI 6'h32 +`define RTE 6'h32 +`define VMOV 6'h33 +`define MULU 6'h38 +`define MULSU 6'h39 +`define MUL 6'h3A +`define FXMUL 6'h3B +`define DIVU 6'h3C +`define DIVSU 6'h3D +`define DIV 6'h3E +`define SHIFTH 6'h3F +// Root Level Ops +`define ADDI 6'h04 +`define CSRRW 6'h05 +`define SLTI 6'h06 +`define SLTUI 6'h07 +`define ANDI 6'h08 +`define ORI 6'h09 +`define XORI 6'h0A +`define EXEC 6'h0B +`define REX 6'h0D +`define XNORI 6'h0E +`define FLOAT 6'h0F +`define LDCS 6'h10 +`define LVxU 6'h11 +`define CMPI 6'h12 +`define LB 6'h13 +`define SB 6'h15 +`define MEMNDX 6'h16 +`define LVBX 6'h00 +`define LVBUX 6'h01 +`define LVCX 6'h02 +`define LVCUX 6'h03 +`define LVHX 6'h04 +`define LVHUX 6'h05 +`define LVWX 6'h06 +`define LCX 6'h08 +`define LCUX 6'h09 +`define LBUX 6'h0A +`define LHX 6'h10 +`define LHUX 6'h11 +`define LWX 6'h12 +`define LBX 6'h13 +`define LWRX 6'h14 +`define LVWS 6'h18 +`define LVX 6'h19 +`define CACHEX 6'h1E +`define SHX 6'h21 +`define SBX 6'h20 +`define SWX 6'h22 +`define SWCX 6'h23 +`define SCX 6'h24 +`define CASX 6'h25 +`define SVWS 6'h27 +`define INCX 6'h2A +`define PUSH 6'h33 +`define SVX 6'h37 +`define SWC 6'h17 +`define JAL 6'h18 +`define CALL 6'h19 +`define INC 6'h1A +`define LFx 6'h1B +`define SGTUI 6'h1C +`define LWR 6'h1D +`define CACHE 6'h1E +`define Lx 6'h20 +`define LxU 6'h21 +`define BITFIELD 6'h22 +`define BFINSI 4'h4 +`define LBU 6'h23 +`define Sx 6'h24 +`define CAS 6'h25 +`define BBc 6'h26 +`define IBNE 2'd2 +`define DBNZ 2'd3 +`define LUI 6'h27 +`define JMP 6'h28 +`define RET 6'h29 +`define MULFI 6'h2A +`define SFx 6'h2B +`define SGTI 6'h2C +`define CMPRSSD 6'h2D +`define MODI 6'h2E +`define AMO 6'h2F +`define AMO_SWAP 6'h00 +`define AMO_ADD 6'h04 +`define AMO_AND 6'h08 +`define AMO_OR 6'h09 +`define AMO_XOR 6'h0A +`define AMO_SHL 6'h0C +`define AMO_SHR 6'h0D +`define AMO_MIN 6'h1C +`define AMO_MAX 6'h1D +`define AMO_MINU 6'h1E +`define AMO_MAXU 6'h1F +`define Bcc 6'h30 +`define BEQ 3'd0 +`define BNE 3'd1 +`define BLT 3'd2 +`define BGE 3'd3 +`define BLTU 3'd6 +`define BGEU 3'd7 +`define IVECTOR 6'h31 +`define BEQI 6'h32 +`define BCHK 6'h33 +`define CHK 6'h34 +`define LV 6'h36 +`define SV 6'h37 +`define MULUI 6'h38 +`define MULSUI 6'h39 +`define MULI 6'h3A +`define LVx 6'h3B +`define DIVUI 6'h3C +`define NOP 6'h3D +`define DIVI 6'h3E + +`define FMOV 6'h10 +`define FTOI 6'h12 +`define ITOF 6'h13 +`define FNEG 6'h14 +`define FABS 6'h15 +`define FSIGN 6'h16 +`define FMAN 6'h17 +`define FNABS 6'h18 +`define FCVTSD 6'h19 +`define FCVTSQ 6'h1B +`define FSTAT 6'h1C +`define FTX 6'h20 +`define FCX 6'h21 +`define FEX 6'h22 +`define FDX 6'h23 +`define FRM 6'h24 +`define FCVTDS 6'h29 +`define FSYNC 6'h36 + +`define FADD 6'h04 +`define FSUB 6'h05 +`define FCMP 6'h06 +`define FMUL 6'h08 +`define FDIV 6'h09 + +`define EXR 8'h7F + +`define NOP_INSN {42'd0,`NOP} +`define INSN_FLT_EXF 16'h1180 +`define INSN_FLT_IBE 16'h10A0 +`define INSN_FLT_TLB 16'h1280 + +`define CSR_CR0 10'h000 +`define CSR_HARTID 10'h001 +`define CSR_TICK 10'h002 +`define CSR_PCR 10'h003 +`define CSR_PMR 10'h005 +`define CSR_CAUSE 10'h006 +`define CSR_BADADR 10'h007 +`define CSR_PCR2 10'h008 +`define CSR_SCRATCH 10'h009 +`define CSR_WBRCD 10'h00A +`define CSR_BADINSTR 10'h00B +`define CSR_SEMA 10'h00C +`define CSR_KEYS 10'h00E +`define CSR_TCB 10'h010 +`define CSR_FSTAT 10'h014 +`define CSR_DBAD0 10'h018 +`define CSR_DBAD1 10'h019 +`define CSR_DBAD2 10'h01A +`define CSR_DBAD3 10'h01B +`define CSR_DBCTRL 10'h01C +`define CSR_DBSTAT 10'h01D +`define CSR_CAS 10'h02C +`define CSR_TVEC 10'b00000110??? +`define CSR_IM_STACK 10'h040 +`define CSR_OL_STACK 10'h041 +`define CSR_PL_STACK 10'h042 +`define CSR_RS_STACK 10'h043 +`define CSR_STATUS 10'h044 +`define CSR_BRS_STACK 10'h046 +`define CSR_EPC0 10'h048 +`define CSR_EPC1 10'h049 +`define CSR_EPC2 10'h04A +`define CSR_EPC3 10'h04B +`define CSR_EPC4 10'h04C +`define CSR_EPC5 10'h04D +`define CSR_EPC6 10'h04E +`define CSR_EPC7 10'h04F +`define CSR_GOLEX0 10'h050 +`define CSR_GOLEX1 10'h051 +`define CSR_GOLEX2 10'h052 +`define CSR_GOLEX3 10'h053 +`define CSR_GOLEXVP 10'h054 +`define CSR_CODEBUF 10'b00010?????? +`define CSR_TB 10'h0C0 +`define CSR_CBL 10'h0C1 +`define CSR_CBU 10'h0C2 +`define CSR_RO 10'h0C3 +`define CSR_DBL 10'h0C4 +`define CSR_DBU 10'h0C5 +`define CSR_SBL 10'h0C6 +`define CSR_SBU 10'h0C7 +`define CSR_ENU 10'h0C8 +`define CSR_PREGS 10'h0F0 +`define CSR_Q_CTR 10'h3C0 +`define CSR_BM_CTR 10'h3C1 +`define CSR_ICL_CTR 10'h3C2 +`define CSR_IRQ_CTR 10'h3C3 +`define CSR_TIME 10'h3E0 +`define CSR_INFO 10'b11_1111_???? + +`define OL_USER 2'd3 +`define OL_SUPERVISOR 2'd2 +`define OL_HYPERVISOR 2'd1 +`define OL_MACHINE 2'd0 + +// JALR and EXTENDED are synonyms +`define EXTEND 3'd7 + +// system-call subclasses: +`define SYS_NONE 3'd0 +`define SYS_CALL 3'd1 +`define SYS_MFSR 3'd2 +`define SYS_MTSR 3'd3 +`define SYS_RFU1 3'd4 +`define SYS_RFU2 3'd5 +`define SYS_RFU3 3'd6 +`define SYS_EXC 3'd7 // doesn't need to be last, but what the heck + +// exception types: +`define EXC_NONE 9'd000 +`define EXC_HALT 9'd1 +`define EXC_TLBMISS 9'd2 +`define EXC_SIGSEGV 9'd3 +`define EXC_INVALID 9'd4 + +`define FLT_NONE 8'd00 +`define FLT_IBE 8'd01 +`define FLT_EXF 8'd02 +`define FLT_TLB 8'd04 +`define FLT_SSM 8'd32 +`define FLT_DBG 8'd33 +`define FLT_TGT 8'd34 +`define FLT_IADR 8'd36 +`define FLT_UNIMP 8'd37 +`define FLT_FLT 8'd38 +`define FLT_CHK 8'd39 +`define FLT_DBZ 8'd40 +`define FLT_OFL 8'd41 +`define FLT_SEG 8'd47 +`define FLT_ALN 8'd48 +`define FLT_DWF 8'd50 +`define FLT_DRF 8'd51 +`define FLT_SGB 8'd52 +`define FLT_PRIV 8'd53 +`define FLT_CMT 8'd54 +`define FLT_BD 8'd55 +`define FLT_STK 8'd56 +`define FLT_DBE 8'd60 +`define FLT_RET 8'd230 +`define FLT_CS 8'd231 +`define FLT_ZS_LD 8'd232 +`define FLT_DS_LD 8'd233 +`define FLT_ES_LD 8'd234 +`define FLT_FS_LD 8'd235 +`define FLT_GS_LD 8'd236 +`define FLT_HS_LD 8'd237 +`define FLT_SS_LD 8'd238 +`define FLT_CS_LD 8'd239 + +`define INSTRUCTION_OP 5:0 +`define INSTRUCTION_L2 7:6 +`define INSTRUCTION_RA 12:8 +`define INSTRUCTION_RT 17:13 +`define INSTRUCTION_RB 22:18 +`define INSTRUCTION_RC 27:23 +`define INSTRUCTION_IM 31:18 +`define INSTRUCTION_IML 47:18 +`define INSTRUCTION_SB 31 +`define INSTRUCTION_S1 22:18 +`define INSTRUCTION_S2 31:26 +`define INSTRUCTION_S2L 47:42 +`define INSTRUCTION_COND 21:18 + +`define FORW_BRANCH 1'b0 +`define BACK_BRANCH 1'b1 + +`define DRAMSLOT_AVAIL 3'b000 +`define DRAMSLOT_BUSY 3'b001 +`define DRAMSLOT_REQBUS 3'b101 +`define DRAMSLOT_HASBUS 3'b110 +`define DRAMREQ_READY 3'b111 + +`define INV 1'b0 +`define VAL 1'b1 + +// +// define PANIC types +// +`define PANIC_NONE 4'd0 +`define PANIC_FETCHBUFBEQ 4'd1 +`define PANIC_INVALIDISLOT 4'd2 +`define PANIC_MEMORYRACE 4'd3 +`define PANIC_IDENTICALDRAMS 4'd4 +`define PANIC_OVERRUN 4'd5 +`define PANIC_HALTINSTRUCTION 4'd6 +`define PANIC_INVALIDMEMOP 4'd7 +`define PANIC_INVALIDFBSTATE 4'd9 +`define PANIC_INVALIDIQSTATE 4'd10 +`define PANIC_BRANCHBACK 4'd11 +`define PANIC_BADTARGETID 4'd12 +`define PANIC_ALU0ONLY 4'd13 + +`define IB_CONST 143:80 +`define IB_LN 78:76 +`define IB_RT 75:71 +`define IB_RC 70:66 +`define IB_RB 65:61 +`define IB_RA 60:56 +`define IB_PRFW 52 +`define IB_CMP 51 +`define IB_PUSH 47 +`define IB_TLB 46 +`define IB_SZ 45:43 +`define IB_IRQ 42 +`define IB_RTI 41 +`define IB_BRK 40 +`define IB_RET 39 +`define IB_JAL 38 +`define IB_ODDBALL 37 +`define IB_STORE 36 +`define IB_MEMSZ 35:33 +`define IB_LOADV 32 +`define IB_IMM 31 +`define IB_MEM 30 +`define IB_BT 28 +`define IB_ALU 27 +`define IB_ALU0 26 +`define IB_FPU 25 +`define IB_FC 24 +`define IB_CANEX 23 +`define IB_LOAD 22 +`define IB_PRELOAD 21 +`define IB_MEMNDX 20 +`define IB_RMW 19 +`define IB_MEMDB 18 +`define IB_MEMSB 17 +`define IB_SHFT 16 +`define IB_SEI 15 +`define IB_AQ 14 +`define IB_RL 13 +`define IB_JMP 12 +`define IB_BR 11 +`define IB_SYNC 10 +`define IB_FSYNC 9 +`define IB_RFW 8 +`define IB_WE 7:0 + +`define TLB 6'h3F +`define TLB_NOP 4'd0 +`define TLB_P 4'd1 +`define TLB_RD 4'd2 +`define TLB_WR 4'd3 +`define TLB_WI 4'd4 +`define TLB_EN 4'd5 +`define TLB_DIS 4'd6 +`define TLB_RDREG 4'd7 +`define TLB_WRREG 4'd8 +`define TLB_INVALL 4'd9 +`define TLB_RDAGE 4'd10 +`define TLB_WRAGE 4'd11 + +`define TLBWired 4'h0 +`define TLBIndex 4'h1 +`define TLBRandom 4'h2 +`define TLBPageSize 4'h3 +`define TLBVirtPage 4'h4 +`define TLBPhysPage 4'h5 +`define TLBASID 4'h7 +`define TLBMissAdr 4'd8 +`define TLBPageTblAddr 4'd10 +`define TLBPageTblCtrl 4'd11 +`define TLBAFC 4'd12 +`define TLBPageCount 4'd13 + +`define EXC_RGS 6'h00 +`define BRK_RGS 6'h10 Index: thor/trunk/FT64v7/rtl/common/FT64_divider.v =================================================================== --- thor/trunk/FT64v7/rtl/common/FT64_divider.v (nonexistent) +++ thor/trunk/FT64v7/rtl/common/FT64_divider.v (revision 60) @@ -0,0 +1,206 @@ +// ============================================================================ +// __ +// \\__/ o\ (C) 2013-2018 Robert Finch, Waterloo +// \ __ / All rights reserved. +// \/_// robfinch@finitron.ca +// || +// +// This source file is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This source file is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// +// +// FT64 Superscaler +// FT64_divider.v +// - 64 bit divider +// +// ============================================================================ +// +module FT64_divider(rst, clk, ld, abort, sgn, sgnus, a, b, qo, ro, dvByZr, done, idle); +parameter WID=64; +parameter DIV=3'd3; +parameter IDLE=3'd4; +parameter DONE=3'd5; +input clk; +input rst; +input ld; +input abort; +input sgn; +input sgnus; +input [WID-1:0] a; +input [WID-1:0] b; +output [WID-1:0] qo; +reg [WID-1:0] qo; +output [WID-1:0] ro; +reg [WID-1:0] ro; +output done; +output idle; +output dvByZr; +reg dvByZr; + +reg [WID-1:0] bb; +reg so; +reg [2:0] state; +reg [7:0] cnt; +wire cnt_done = cnt==8'd0; +assign done = state==DONE||(state==IDLE && !ld); +assign idle = state==IDLE; +reg ce1; +reg [WID-1:0] q; +reg [WID:0] r; +wire b0 = bb <= r; +wire [WID-1:0] r1 = b0 ? r - bb : r; + +initial begin + q = 64'd0; + r = 64'd0; + qo = 64'd0; + ro = 64'd0; +end + +always @(posedge clk) +if (rst) + state <= IDLE; +else + case(state) + IDLE: + if (ld) + state <= DIV; + DIV: + if (dvByZr) + state <= DONE; + else if (cnt_done) + state <= DONE; + DONE: + state <= IDLE; + default: state <= IDLE; + endcase + +always @(posedge clk) +if (rst) + cnt <= 8'h00; +else begin + if (abort) + cnt <= 8'd00; + else if (ld) + cnt <= WID+1; + else if (!cnt_done) + cnt <= cnt - 8'd1; +end + +always @(posedge clk) +if (rst) + dvByZr <= 1'b0; +else begin + if (ld) + dvByZr <= b=={WID{1'b0}}; +end + +always @(posedge clk) +if (rst) begin + bb <= {WID{1'b0}}; + q <= {WID{1'b0}}; + r <= {WID{1'b0}}; + qo <= {WID{1'b0}}; + ro <= {WID{1'b0}}; +end +else +begin + +case(state) +IDLE: + if (ld) begin + if (sgn) begin + q <= a[WID-1] ? -a : a; + bb <= b[WID-1] ? -b : b; + so <= a[WID-1] ^ b[WID-1]; + end + else if (sgnus) begin + q <= a[WID-1] ? -a : a; + bb <= b; + so <= a[WID-1]; + end + else begin + q <= a; + bb <= b; + so <= 1'b0; + $display("bb=%d", b); + end + r <= {WID{1'b0}}; + end +DIV: + if (!cnt_done && !dvByZr) begin + $display("cnt:%d r1=%h q[63:0]=%h", cnt,r1,q); + q <= {q[WID-2:0],b0}; + r <= {r1,q[WID-1]}; + end + else begin + $display("cnt:%d r1=%h q[63:0]=%h", cnt,r1,q); + if (sgn|sgnus) begin + if (so) begin + qo <= dvByZr ? {1'b1,{WID-1{1'b0}}} : -q; + ro <= dvByZr ? {1'b1,{WID-1{1'b0}}} : -r[WID:1]; + end + else begin + qo <= dvByZr ? {WID-1{1'b1}} : q; + ro <= dvByZr ? {WID-1{1'b1}} : r[WID:1]; + end + end + else begin + qo <= dvByZr ? {WID-1{1'b1}} : q; + ro <= dvByZr ? {WID-1{1'b1}} : r[WID:1]; + end + end +default: ; +endcase +end + +endmodule + +module FT64_divider_tb(); +parameter WID=64; +reg rst; +reg clk; +reg ld; +wire done; +wire [WID-1:0] qo,ro; + +initial begin + clk = 1; + rst = 0; + #100 rst = 1; + #100 rst = 0; + #100 ld = 1; + #150 ld = 0; +end + +always #10 clk = ~clk; // 50 MHz + + +FT64_divider #(WID) u1 +( + .rst(rst), + .clk(clk), + .ld(ld), + .sgn(1'b1), + .isDivi(1'b0), + .a(64'd10005), + .b(64'd27), + .imm(64'd123), + .qo(qo), + .ro(ro), + .dvByZr(), + .done(done) +); + +endmodule + Index: thor/trunk/FT64v7/rtl/common/FT64_icache.v =================================================================== --- thor/trunk/FT64v7/rtl/common/FT64_icache.v (nonexistent) +++ thor/trunk/FT64v7/rtl/common/FT64_icache.v (revision 60) @@ -0,0 +1,729 @@ +// ============================================================================ +// __ +// \\__/ o\ (C) 2017-2018 Robert Finch, Waterloo +// \ __ / All rights reserved. +// \/_// robfinch@finitron.ca +// || +// +// FT64_cache.v +// +// +// This source file is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This source file is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// +// +// ============================================================================ +// +`define TRUE 1'b1 +`define FALSE 1'b0 + +// ----------------------------------------------------------------------------- +// Small, 64 line cache memory (2kiB) made from distributed RAM. Access is +// within a single clock cycle. +// ----------------------------------------------------------------------------- + +module FT64_L1_icache_mem(rst, clk, wr, en, lineno, i, o, ov, invall, invline); +parameter pLines = 64; +parameter pLineWidth = 298; +localparam pLNMSB = pLines==128 ? 6 : 5; +input rst; +input clk; +input wr; +input [8:0] en; +input [pLNMSB:0] lineno; +input [pLineWidth-1:0] i; +output [pLineWidth-1:0] o; +output [8:0] ov; +input invall; +input invline; + +integer n; + +(* ram_style="distributed" *) +reg [pLineWidth-1:0] mem [0:pLines-1]; +reg [pLines-1:0] valid0; +reg [pLines-1:0] valid1; +reg [pLines-1:0] valid2; +reg [pLines-1:0] valid3; +reg [pLines-1:0] valid4; +reg [pLines-1:0] valid5; +reg [pLines-1:0] valid6; +reg [pLines-1:0] valid7; +reg [pLines-1:0] valid8; + +initial begin + for (n = 0; n < pLines; n = n + 1) + mem[n] <= 2'b00; +end + +always @(posedge clk) + if (wr & en[0]) mem[lineno][31:0] <= i[31:0]; +always @(posedge clk) + if (wr & en[1]) mem[lineno][63:32] <= i[63:32]; +always @(posedge clk) + if (wr & en[2]) mem[lineno][95:64] <= i[95:64]; +always @(posedge clk) + if (wr & en[3]) mem[lineno][127:96] <= i[127:96]; +always @(posedge clk) + if (wr & en[4]) mem[lineno][159:128] <= i[159:128]; +always @(posedge clk) + if (wr & en[5]) mem[lineno][191:160] <= i[191:160]; +always @(posedge clk) + if (wr & en[6]) mem[lineno][223:192] <= i[223:192]; +always @(posedge clk) + if (wr & en[7]) mem[lineno][255:224] <= i[255:224]; +always @(posedge clk) + if (wr & en[8]) mem[lineno][297:256] <= i[297:256]; +always @(posedge clk) +if (rst) begin + valid0 <= 64'd0; + valid1 <= 64'd0; + valid2 <= 64'd0; + valid3 <= 64'd0; + valid4 <= 64'd0; + valid5 <= 64'd0; + valid6 <= 64'd0; + valid7 <= 64'd0; + valid8 <= 64'd0; +end +else begin + if (invall) begin + valid0 <= 64'd0; + valid1 <= 64'd0; + valid2 <= 64'd0; + valid3 <= 64'd0; + valid4 <= 64'd0; + valid5 <= 64'd0; + valid6 <= 64'd0; + valid7 <= 64'd0; + valid8 <= 64'd0; + end + else if (invline) begin + valid0[lineno] <= 1'b0; + valid1[lineno] <= 1'b0; + valid2[lineno] <= 1'b0; + valid3[lineno] <= 1'b0; + valid4[lineno] <= 1'b0; + valid5[lineno] <= 1'b0; + valid6[lineno] <= 1'b0; + valid7[lineno] <= 1'b0; + valid8[lineno] <= 1'b0; + end + else if (wr) begin + if (en[0]) valid0[lineno] <= 1'b1; + if (en[1]) valid1[lineno] <= 1'b1; + if (en[2]) valid2[lineno] <= 1'b1; + if (en[3]) valid3[lineno] <= 1'b1; + if (en[4]) valid4[lineno] <= 1'b1; + if (en[5]) valid5[lineno] <= 1'b1; + if (en[6]) valid6[lineno] <= 1'b1; + if (en[7]) valid7[lineno] <= 1'b1; + if (en[8]) valid8[lineno] <= 1'b1; + end +end + +assign o = mem[lineno]; +assign ov[0] = valid0[lineno]; +assign ov[1] = valid1[lineno]; +assign ov[2] = valid2[lineno]; +assign ov[3] = valid3[lineno]; +assign ov[4] = valid4[lineno]; +assign ov[5] = valid5[lineno]; +assign ov[6] = valid6[lineno]; +assign ov[7] = valid7[lineno]; +assign ov[8] = valid8[lineno]; + +endmodule + +// ----------------------------------------------------------------------------- +// Fully associative (64 way) tag memory for L1 icache. +// +// ----------------------------------------------------------------------------- + +module FT64_L1_icache_camtag(rst, clk, nxt, wlineno, wr, wadr, adr, hit, lineno); +input rst; +input clk; +input nxt; +output [5:0] wlineno; +input wr; +input [37:0] adr; +input [37:0] wadr; +output hit; +output reg [5:0] lineno; + +wire [35:0] wtagi = {9'b0,wadr[37:5]}; +wire [35:0] tagi = {9'b0,adr[37:5]}; +wire [63:0] match_addr; + +reg [5:0] cntr; +always @(posedge clk) +if (rst) + cntr <= 6'd0; +else begin + if (nxt) cntr <= cntr + 6'd1; +end +assign wlineno = cntr; + +//wire [21:0] lfsro; +//lfsr #(22,22'h0ACE1) u1 (rst, clk, !(wr3|wr2|wr), 1'b0, lfsro); + +cam36x64 u01 (rst, clk, wr, cntr[5:0], wtagi, tagi, match_addr); +assign hit = |match_addr; + +integer n; +always @* +begin +lineno = 0; +for (n = 0; n < 64; n = n + 1) + if (match_addr[n]) lineno = n; +end + +endmodule + + +// ----------------------------------------------------------------------------- +// Four way set associative tag memory for L1 cache. +// ----------------------------------------------------------------------------- + +module FT64_L1_icache_cmptag4way(rst, clk, nxt, wr, adr, lineno, hit); +parameter pLines = 64; +parameter AMSB = 63; +localparam pLNMSB = pLines==128 ? 6 : 5; +localparam pMSB = pLines==128 ? 9 : 8; +input rst; +input clk; +input nxt; +input wr; +input [AMSB+8:0] adr; +output reg [pLNMSB:0] lineno; +output hit; + +(* ram_style="distributed" *) +reg [AMSB+8-5:0] mem0 [0:pLines/4-1]; +reg [AMSB+8-5:0] mem1 [0:pLines/4-1]; +reg [AMSB+8-5:0] mem2 [0:pLines/4-1]; +reg [AMSB+8-5:0] mem3 [0:pLines/4-1]; +reg [AMSB+8:0] rradr; +integer n; +initial begin + for (n = 0; n < pLines/4; n = n + 1) + begin + mem0[n] = 0; + mem1[n] = 0; + mem2[n] = 0; + mem3[n] = 0; + end +end + +wire [21:0] lfsro; +lfsr #(22,22'h0ACE3) u1 (rst, clk, nxt, 1'b0, lfsro); +reg [pLNMSB:0] wlineno; +always @(posedge clk) +if (rst) + wlineno <= 6'h00; +else begin + if (wr) begin + case(lfsro[1:0]) + 2'b00: begin mem0[adr[pMSB:5]] <= adr[AMSB+8:5]; wlineno <= {2'b00,adr[pMSB:5]}; end + 2'b01: begin mem1[adr[pMSB:5]] <= adr[AMSB+8:5]; wlineno <= {2'b01,adr[pMSB:5]}; end + 2'b10: begin mem2[adr[pMSB:5]] <= adr[AMSB+8:5]; wlineno <= {2'b10,adr[pMSB:5]}; end + 2'b11: begin mem3[adr[pMSB:5]] <= adr[AMSB+8:5]; wlineno <= {2'b11,adr[pMSB:5]}; end + endcase + end +end + +wire hit0 = mem0[adr[pMSB:5]]==adr[AMSB+8:5]; +wire hit1 = mem1[adr[pMSB:5]]==adr[AMSB+8:5]; +wire hit2 = mem2[adr[pMSB:5]]==adr[AMSB+8:5]; +wire hit3 = mem3[adr[pMSB:5]]==adr[AMSB+8:5]; +always @* + //if (wr2) lineno = wlineno; + if (hit0) lineno = {2'b00,adr[pMSB:5]}; + else if (hit1) lineno = {2'b01,adr[pMSB:5]}; + else if (hit2) lineno = {2'b10,adr[pMSB:5]}; + else lineno = {2'b11,adr[pMSB:5]}; +assign hit = hit0|hit1|hit2|hit3; +endmodule + + +// ----------------------------------------------------------------------------- +// 32 way, 16 set associative tag memory for L2 cache +// ----------------------------------------------------------------------------- + +module FT64_L2_icache_camtag(rst, clk, wr, adr, hit, lineno); +parameter AMSB=63; +input rst; +input clk; +input wr; +input [AMSB+8:0] adr; +output hit; +output [8:0] lineno; + +wire [3:0] set = adr[13:10]; +wire [AMSB+8-5:0] tagi = {7'd0,adr[AMSB+8:14],adr[9:5]}; +reg [4:0] encadr; +assign lineno[4:0] = encadr; +assign lineno[8:5] = adr[13:10]; +reg [15:0] we; +wire [31:0] ma [0:15]; +always @* +begin + we <= 16'h0000; + we[set] <= wr; +end + +reg wr2; +wire [21:0] lfsro; +lfsr #(22,22'h0ACE2) u1 (rst, clk, !(wr2|wr), 1'b0, lfsro); + +always @(posedge clk) + wr2 <= wr; + +genvar g; +generate +begin +for (g = 0; g < 16; g = g + 1) + cam36x32 u01 (clk, we[g], lfsro[4:0], tagi, tagi, ma[g]); +end +endgenerate +wire [31:0] match_addr = ma[set]; +assign hit = |match_addr; + +integer n; +always @* +begin +encadr = 0; +for (n = 0; n < 32; n = n + 1) + if (match_addr[n]) encadr = n; +end + +endmodule + +// ----------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- + +module FT64_L1_icache(rst, clk, nxt, wr, wr_ack, en, wadr, adr, i, o, fault, hit, invall, invline); +parameter pSize = 2; +parameter CAMTAGS = 1'b0; // 32 way +parameter FOURWAY = 1'b1; +parameter AMSB = 63; +localparam pLines = pSize==4 ? 128 : 64; +localparam pLNMSB = pSize==4 ? 6 : 5; +input rst; +input clk; +input nxt; +input wr; +output wr_ack; +input [8:0] en; +input [AMSB+8:0] adr; +input [AMSB+8:0] wadr; +input [297:0] i; +output reg [55:0] o; +output reg [1:0] fault; +output hit; +input invall; +input invline; + +wire [297:0] ic; +reg [297:0] i1, i2; +wire [8:0] lv; // line valid +wire [pLNMSB:0] lineno; +wire [pLNMSB:0] wlineno; +wire taghit; +reg wr1,wr2; +reg [8:0] en1, en2; +reg invline1, invline2; + +// Must update the cache memory on the cycle after a write to the tag memmory. +// Otherwise lineno won't be valid. Tag memory takes two clock cycles to update. +always @(posedge clk) + wr1 <= wr; +always @(posedge clk) + wr2 <= wr1; +always @(posedge clk) + i1 <= i[297:0]; +always @(posedge clk) + i2 <= i1; +always @(posedge clk) + en1 <= en; +always @(posedge clk) + en2 <= en1; +always @(posedge clk) + invline1 <= invline; +always @(posedge clk) + invline2 <= invline1; + +generate begin : tags +if (FOURWAY) begin + +FT64_L1_icache_mem #(.pLines(pLines)) u1 +( + .rst(rst), + .clk(clk), + .wr(wr1), + .en(en1), + .i(i1), + .lineno(lineno), + .o(ic), + .ov(lv), + .invall(invall), + .invline(invline1) +); + +FT64_L1_icache_cmptag4way #(.pLines(pLines)) u3 +( + .rst(rst), + .clk(clk), + .nxt(nxt), + .wr(wr), + .adr(adr), + .lineno(lineno), + .hit(taghit) +); +end +else if (CAMTAGS) begin + +FT64_L1_icache_mem u1 +( + .rst(rst), + .clk(clk), + .wr(wr2), + .en(en2), + .i(i2), + .lineno(lineno), + .o(ic), + .ov(lv), + .invall(invall), + .invline(invline2) +); + +FT64_L1_icache_camtag u2 +( + .rst(rst), + .clk(clk), + .nxt(nxt), + .wlineno(wlineno), + .wadr(wadr), + .wr(wr), + .adr(adr), + .lineno(lineno), + .hit(taghit) +); +end +end +endgenerate + +// Valid if a 64-bit area encompassing a potential 48-bit instruction is valid. +assign hit = taghit & lv[adr[4:2]] & lv[adr[4:2]+4'd1]; + +//always @(radr or ic0 or ic1) +always @(adr or ic) + o <= ic >> {adr[4:0],3'h0}; +always @* + fault <= ic[297:296]; + +assign wr_ack = wr2; + +endmodule + +// ----------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- + +module FT64_L2_icache_mem(clk, wr, lineno, sel, i, fault, o, ov, invall, invline); +input clk; +input wr; +input [8:0] lineno; +input [2:0] sel; +input [63:0] i; +input [1:0] fault; +output [297:0] o; +output reg ov; +input invall; +input invline; + +(* ram_style="block" *) +reg [63:0] mem0 [0:511]; +reg [63:0] mem1 [0:511]; +reg [63:0] mem2 [0:511]; +reg [63:0] mem3 [0:511]; +reg [39:0] mem4 [0:511]; +reg [1:0] memf [0:511]; +reg [511:0] valid; +reg [8:0] rrcl; + +// instruction parcels per cache line +wire [8:0] cache_line; +integer n; +initial begin + for (n = 0; n < 512; n = n + 1) begin + valid[n] <= 0; + memf[n] <= 2'b00; + end +end + +always @(posedge clk) + if (invall) valid <= 512'd0; + else if (invline) valid[lineno] <= 1'b0; + else if (wr) valid[lineno] <= 1'b1; + +always @(posedge clk) +begin + if (wr) begin + case(sel[2:0]) + 3'd0: begin mem0[lineno] <= i; memf[lineno] <= fault; end + 3'd1: begin mem1[lineno] <= i; memf[lineno] <= memf[lineno] | fault; end + 3'd2: begin mem2[lineno] <= i; memf[lineno] <= memf[lineno] | fault; end + 3'd3: begin mem3[lineno] <= i; memf[lineno] <= memf[lineno] | fault; end + 3'd4: begin mem4[lineno] <= i[39:0]; memf[lineno] <= memf[lineno] | fault; end + endcase + end +end + +always @(posedge clk) + rrcl <= lineno; + +always @(posedge clk) + ov <= valid[lineno]; + +assign o = {memf[rrcl],mem4[rrcl],mem3[rrcl],mem2[rrcl],mem1[rrcl],mem0[rrcl]}; + +endmodule + +// ----------------------------------------------------------------------------- +// Because the line to update is driven by the output of the cam tag memory, +// the tag write should occur only during the first half of the line load. +// Otherwise the line number would change in the middle of the line. The +// first half of the line load is signified by an even hexibyte address ( +// address bit 4). +// ----------------------------------------------------------------------------- + +module FT64_L2_icache(rst, clk, nxt, wr, wr_ack, rd_ack, xsel, adr, cnt, exv_i, i, err_i, o, hit, invall, invline); +parameter CAMTAGS = 1'b0; // 32 way +parameter FOURWAY = 1'b1; +parameter AMSB = 63; +input rst; +input clk; +input nxt; +input wr; +output wr_ack; +output rd_ack; +input xsel; +input [AMSB+8:0] adr; +input [2:0] cnt; +input exv_i; +input [63:0] i; +input err_i; +output [297:0] o; +output hit; +input invall; +input invline; + +wire lv; // line valid +wire [8:0] lineno; +wire taghit; +reg wr1,wr2; +reg [2:0] sel1,sel2; +reg [63:0] i1,i2; +reg [1:0] f1, f2; +reg [AMSB+8:0] last_adr; + +// Must update the cache memory on the cycle after a write to the tag memmory. +// Otherwise lineno won't be valid. camTag memory takes two clock cycles to update. +always @(posedge clk) + wr1 <= wr; +always @(posedge clk) + wr2 <= wr1; +always @(posedge clk) + sel1 <= {xsel,adr[4:3]}; +always @(posedge clk) + sel2 <= sel1; +always @(posedge clk) + last_adr <= adr; +always @(posedge clk) + f1 <= {err_i,exv_i}; +always @(posedge clk) + f2 <= f1; + +reg [3:0] rdackx; +always @(posedge clk) +if (rst) + rdackx <= 4'b0; +else begin + if (last_adr != adr || wr || wr1 || wr2) + rdackx <= 4'b0; + else + rdackx <= {rdackx,~(wr|wr1|wr2)}; +end + +assign rd_ack = rdackx[3] & ~(last_adr!=adr || wr || wr1 || wr2); + +always @(posedge clk) + i1 <= i; +always @(posedge clk) + i2 <= i1; + +wire pe_wr; +edge_det u3 (.rst(rst), .clk(clk), .ce(1'b1), .i(wr && cnt==3'd0), .pe(pe_wr), .ne(), .ee() ); + +FT64_L2_icache_mem u1 +( + .clk(clk), + .wr(wr2), + .lineno(lineno), + .sel(sel2), + .i(i2), + .fault(f2), + .o(o), + .ov(lv), + .invall(invall), + .invline(invline) +); + +generate +begin : tags +if (FOURWAY) +FT64_L2_icache_cmptag4way u2 +( + .rst(rst), + .clk(clk), + .nxt(nxt), + .wr(pe_wr), + .adr(adr), + .lineno(lineno), + .hit(taghit) +); +else if (CAMTAGS) +FT64_L2_icache_camtag u2 +( + .rst(rst), + .clk(clk), + .wr(pe_wr), + .adr(adr), + .lineno(lineno), + .hit(taghit) +); +else +FT64_L2_icache_cmptag u2 +( + .rst(rst), + .clk(clk), + .wr(pe_wr), + .adr(adr), + .lineno(lineno), + .hit(taghit) +); +end +endgenerate + +assign hit = taghit & lv; +assign wr_ack = wr2; + +endmodule + +// Four way set associative tag memory +module FT64_L2_icache_cmptag4way(rst, clk, nxt, wr, adr, lineno, hit); +parameter AMSB = 63; +input rst; +input clk; +input nxt; +input wr; +input [AMSB+8:0] adr; +output reg [8:0] lineno; +output hit; + +(* ram_style="block" *) +reg [AMSB+8-5:0] mem0 [0:127]; +reg [AMSB+8-5:0] mem1 [0:127]; +reg [AMSB+8-5:0] mem2 [0:127]; +reg [AMSB+8-5:0] mem3 [0:127]; +reg [AMSB+8:0] rradr; +integer n; +initial begin + for (n = 0; n < 128; n = n + 1) + begin + mem0[n] = 0; + mem1[n] = 0; + mem2[n] = 0; + mem3[n] = 0; + end +end + +reg wr2; +wire [21:0] lfsro; +lfsr #(22,22'h0ACE3) u1 (rst, clk, nxt, 1'b0, lfsro); +reg [8:0] wlineno; +always @(posedge clk) +if (rst) + wlineno <= 9'h000; +else begin + wr2 <= wr; + if (wr) begin + case(lfsro[1:0]) + 2'b00: begin mem0[adr[11:5]] <= adr[AMSB+8:5]; wlineno <= {2'b00,adr[11:5]}; end + 2'b01: begin mem1[adr[11:5]] <= adr[AMSB+8:5]; wlineno <= {2'b01,adr[11:5]}; end + 2'b10: begin mem2[adr[11:5]] <= adr[AMSB+8:5]; wlineno <= {2'b10,adr[11:5]}; end + 2'b11: begin mem3[adr[11:5]] <= adr[AMSB+8:5]; wlineno <= {2'b11,adr[11:5]}; end + endcase + end + rradr <= adr; +end + +wire hit0 = mem0[rradr[11:5]]==rradr[AMSB+8:5]; +wire hit1 = mem1[rradr[11:5]]==rradr[AMSB+8:5]; +wire hit2 = mem2[rradr[11:5]]==rradr[AMSB+8:5]; +wire hit3 = mem3[rradr[11:5]]==rradr[AMSB+8:5]; +always @* + if (wr2) lineno = wlineno; + else if (hit0) lineno = {2'b00,rradr[11:5]}; + else if (hit1) lineno = {2'b01,rradr[11:5]}; + else if (hit2) lineno = {2'b10,rradr[11:5]}; + else lineno = {2'b11,rradr[11:5]}; +assign hit = hit0|hit1|hit2|hit3; +endmodule + +// Simple tag array, 1-way direct mapped +module FT64_L2_icache_cmptag(rst, clk, wr, adr, lineno, hit); +parameter AMSB = 63; +input rst; +input clk; +input wr; +input [AMSB+8:0] adr; +output reg [8:0] lineno; +output hit; + +reg [AMSB+8-14:0] mem [0:511]; +reg [AMSB+8:0] rradr; +integer n; +initial begin + for (n = 0; n < 512; n = n + 1) + begin + mem[n] = 0; + end +end + +reg wr2; +always @(posedge clk) + wr2 <= wr; +reg [8:0] wlineno; +always @(posedge clk) +begin + if (wr) begin mem[adr[13:5]] <= adr[AMSB+8:14]; wlineno <= adr[13:5]; end +end +always @(posedge clk) + rradr <= adr; +wire hit = mem[rradr[13:5]]==rradr[AMSB+8:14]; +always @* + if (wr2) lineno = wlineno; + else lineno = rradr[13:5]; +endmodule + Index: thor/trunk/FT64v7/rtl/common/FT64_idecoder.v =================================================================== --- thor/trunk/FT64v7/rtl/common/FT64_idecoder.v (nonexistent) +++ thor/trunk/FT64v7/rtl/common/FT64_idecoder.v (revision 60) @@ -0,0 +1,1208 @@ +// ============================================================================ +// __ +// \\__/ o\ (C) 2017-2018 Robert Finch, Waterloo +// \ __ / All rights reserved. +// \/_// robfinch@finitron.ca +// || +// +// FT64_idecoder.v +// +// This source file is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This source file is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// +// ============================================================================ +// +`include ".\FT64_config.vh" +`include ".\FT64_defines.vh" + +module FT64_idecoder(clk,idv_i,id_i,instr,vl,ven,thrd,predict_taken,Rt,bus,id_o,idv_o,debug_on,pred_on); +input clk; +input idv_i; +input [4:0] id_i; +input [47:0] instr; +input [7:0] vl; +input [5:0] ven; +input thrd; +input predict_taken; +input [4:0] Rt; +output reg [143:0] bus; +output reg [4:0] id_o; +output reg idv_o; +input debug_on; +input pred_on; + +parameter TRUE = 1'b1; +parameter FALSE = 1'b0; +// Memory access sizes +parameter byt = 3'd0; +parameter wyde = 3'd1; +parameter tetra = 3'd2; +parameter octa = 3'd3; + +// Really IsPredictableBranch +// Does not include BccR's +//function IsBranch; +//input [47:0] isn; +//casez(isn[`INSTRUCTION_OP]) +//`Bcc: IsBranch = TRUE; +//`BBc: IsBranch = TRUE; +//`BEQI: IsBranch = TRUE; +//`CHK: IsBranch = TRUE; +//default: IsBranch = FALSE; +//endcase +//endfunction + +wire [10:0] brdisp = instr[31:21]; + +wire iAlu; +mIsALU uialu1 +( + .instr(instr), + .IsALU(iAlu) +); + +function IsTLB; +input [47:0] isn; +case(isn[`INSTRUCTION_OP]) +`R2: + case(isn[`INSTRUCTION_S2]) + `TLB: IsTLB = TRUE; + default: IsTLB = FALSE; + endcase +default: IsTLB = FALSE; +endcase +endfunction + +reg IsALU; +always @* +case(instr[`INSTRUCTION_OP]) +`R2: + if (instr[`INSTRUCTION_L2]==2'b00) + case(instr[`INSTRUCTION_S2]) + `VMOV: IsALU = TRUE; + `RTI: IsALU = FALSE; + default: IsALU = TRUE; + endcase + else + IsALU = TRUE; +`BRK: IsALU = FALSE; +`Bcc: IsALU = FALSE; +`BBc: IsALU = FALSE; +`BEQI: IsALU = FALSE; +`CHK: IsALU = FALSE; +`JAL: IsALU = FALSE; +`JMP: IsALU = FALSE; +`CALL: IsALU = FALSE; +`RET: IsALU = FALSE; +`FVECTOR: + case(instr[`INSTRUCTION_S2]) + `VSHL,`VSHR,`VASR: IsALU = TRUE; + default: IsALU = FALSE; // Integer + endcase +`IVECTOR: + case(instr[`INSTRUCTION_S2]) + `VSHL,`VSHR,`VASR: IsALU = TRUE; + default: IsALU = TRUE; // Integer + endcase +`FLOAT: IsALU = FALSE; +default: IsALU = TRUE; +endcase + +function IsAlu0Only; +input [47:0] isn; +begin +case(isn[`INSTRUCTION_OP]) +`R2: + if (isn[`INSTRUCTION_L2]==2'b00) + case(isn[`INSTRUCTION_S2]) + `TLB: IsAlu0Only = TRUE; + `R1: IsAlu0Only = TRUE; + `SHIFTR,`SHIFT31,`SHIFT63: + IsAlu0Only = !(instr[25:23]==`SHL || instr[25:23]==`ASL); + `MULU,`MULSU,`MUL, + `MULUH,`MULSUH,`MULH, + `MODU,`MODSU,`MOD: IsAlu0Only = TRUE; + `DIVU,`DIVSU,`DIV: IsAlu0Only = TRUE; + `MIN,`MAX: IsAlu0Only = TRUE; + default: IsAlu0Only = FALSE; + endcase + else + IsAlu0Only = FALSE; +`MEMNDX: IsAlu0Only = TRUE; +`IVECTOR,`FVECTOR: + case(isn[`INSTRUCTION_S2]) + `VSHL,`VSHR,`VASR: IsAlu0Only = TRUE; + default: IsAlu0Only = FALSE; + endcase +`BITFIELD: IsAlu0Only = TRUE; +`MULUI,`MULI, +`DIVUI,`DIVI, +`MODI: IsAlu0Only = TRUE; +`CSRRW: IsAlu0Only = TRUE; +default: IsAlu0Only = FALSE; +endcase +end +endfunction + +function IsFPU; +input [47:0] isn; +begin +case(isn[`INSTRUCTION_OP]) +`FLOAT: IsFPU = TRUE; +`FVECTOR: + case(isn[`INSTRUCTION_S2]) + `VSHL,`VSHR,`VASR: IsFPU = FALSE; + default: IsFPU = TRUE; + endcase +default: IsFPU = FALSE; +endcase +end +endfunction + +reg IsFlowCtrl; +always @* +case(instr[`INSTRUCTION_OP]) +`BRK: IsFlowCtrl <= TRUE; +`R2: case(instr[`INSTRUCTION_S2]) + `RTI: IsFlowCtrl <= TRUE; + default: IsFlowCtrl <= FALSE; + endcase +`Bcc: IsFlowCtrl <= TRUE; +`BBc: IsFlowCtrl <= TRUE; +`BEQI: IsFlowCtrl <= TRUE; +`CHK: IsFlowCtrl <= TRUE; +`JAL: IsFlowCtrl <= TRUE; +`JMP: IsFlowCtrl <= TRUE; +`CALL: IsFlowCtrl <= TRUE; +`RET: IsFlowCtrl <= TRUE; +default: IsFlowCtrl <= FALSE; +endcase + +//function IsFlowCtrl; +//input [47:0] isn; +//begin +//case(isn[`INSTRUCTION_OP]) +//`BRK: IsFlowCtrl = TRUE; +//`RR: case(isn[`INSTRUCTION_S2]) +// `RTI: IsFlowCtrl = TRUE; +// default: IsFlowCtrl = FALSE; +// endcase +//`Bcc: IsFlowCtrl = TRUE; +//`BBc: IsFlowCtrl = TRUE; +//`BEQI: IsFlowCtrl = TRUE; +//`CHK: IsFlowCtrl = TRUE; +//`JAL: IsFlowCtrl = TRUE; +//`JMP: IsFlowCtrl = TRUE; +//`CALL: IsFlowCtrl = TRUE; +//`RET: IsFlowCtrl = TRUE; +//default: IsFlowCtrl = FALSE; +//endcase +//end +//endfunction + +// fnCanException +// +// Used by memory issue logic (stores). +// Returns TRUE if the instruction can cause an exception. +// In debug mode any instruction could potentially cause a breakpoint exception. +// Rather than check all the addresses for potential debug exceptions it's +// simpler to just have it so that all instructions could exception. This will +// slow processing down somewhat as stores will only be done at the head of the +// instruction queue, but it's debug mode so we probably don't care. +// +function fnCanException; +input [47:0] isn; +begin +// ToDo add debug_on as input +`ifdef SUPPORT_DBG +if (debug_on) + fnCanException = `TRUE; +else +`endif +case(isn[`INSTRUCTION_OP]) +`FLOAT: + case(isn[`INSTRUCTION_S2]) + `FDIV,`FMUL,`FADD,`FSUB,`FTX: + fnCanException = `TRUE; + default: fnCanException = `FALSE; + endcase +`DIVI,`MODI,`MULI: + fnCanException = `TRUE; +`R2: + case(isn[`INSTRUCTION_S2]) + `MUL, + `DIV,`MULSU,`DIVSU, + `MOD,`MODSU: + fnCanException = TRUE; + `RTI: fnCanException = TRUE; + default: fnCanException = FALSE; + endcase +// Had branches that could exception if looping to self. But in a tight loop +// it affects store performance. +// -> A branch may only exception if it loops back to itself. +`Bcc,`BBc,`BEQI: fnCanException = isn[7] ? brdisp == 11'h7FF : brdisp == 11'h7FE; +`CHK: fnCanException = TRUE; +default: +// Stores can stil exception if there is a write buffer, but we allow following +// stores to be issued by ignoring the fact they can exception because the stores +// can be undone by invalidating the write buffer. +`ifdef HAS_WB + fnCanException = IsMem(isn) && !IsStore(isn); +`else + fnCanException = IsMem(isn); +`endif +endcase +end +endfunction + +function IsLoad; +input [47:0] isn; +case(isn[`INSTRUCTION_OP]) +`MEMNDX: IsLoad = !isn[31]; +`LB: IsLoad = TRUE; +`LBU: IsLoad = TRUE; +`Lx: IsLoad = TRUE; +`LxU: IsLoad = TRUE; +`LWR: IsLoad = TRUE; +`LV: IsLoad = TRUE; +`LVx: IsLoad = TRUE; +`LVxU: IsLoad = TRUE; +default: IsLoad = FALSE; +endcase +endfunction + +function IsVolatileLoad; +input [47:0] isn; +case(isn[`INSTRUCTION_OP]) +`MEMNDX: + if (isn[`INSTRUCTION_L2]==2'b00) + case({isn[31:28],isn[22:21]}) + `LWRX: IsVolatileLoad = TRUE; + `LVBX: IsVolatileLoad = TRUE; + `LVBUX: IsVolatileLoad = TRUE; + `LVCX: IsVolatileLoad = TRUE; + `LVCUX: IsVolatileLoad = TRUE; + `LVHX: IsVolatileLoad = TRUE; + `LVHUX: IsVolatileLoad = TRUE; + `LVWX: IsVolatileLoad = TRUE; + default: IsVolatileLoad = FALSE; + endcase + else + IsVolatileLoad = FALSE; +`LWR: IsVolatileLoad = TRUE; +`LVx: IsVolatileLoad = TRUE; +`LVxU: IsVolatileLoad = TRUE; +default: IsVolatileLoad = FALSE; +endcase +endfunction + +function IsStore; +input [47:0] isn; +case(isn[`INSTRUCTION_OP]) +`MEMNDX: + if (isn[`INSTRUCTION_L2]==2'b10) begin + if (isn[31]) + case({isn[31:28],isn[17:16]}) + `PUSH: IsStore = TRUE; + default: IsStore = FALSE; + endcase + else + IsStore = FALSE; + end + else if (isn[`INSTRUCTION_L2]==2'b00) + case({isn[31:28],isn[17:16]}) + `PUSH: IsStore = TRUE; + `SBX: IsStore = TRUE; + `SCX: IsStore = TRUE; + `SHX: IsStore = TRUE; + `SWX: IsStore = TRUE; + `SWCX: IsStore = TRUE; + `SVX: IsStore = TRUE; + `CASX: IsStore = TRUE; + `INC: IsStore = TRUE; + default: IsStore = FALSE; + endcase + else + IsStore = FALSE; +`SB: IsStore = TRUE; +`Sx: IsStore = TRUE; +`SWC: IsStore = TRUE; +`INC: IsStore = TRUE; +`SV: IsStore = TRUE; +`CAS: IsStore = TRUE; +`AMO: IsStore = TRUE; +default: IsStore = FALSE; +endcase +endfunction + +function IsPush; +input [47:0] isn; +case(isn[`INSTRUCTION_OP]) +`MEMNDX: + if (isn[`INSTRUCTION_L2]==2'b10) begin + if (isn[31]) + case({isn[31:28],isn[17:16]}) + `PUSH: IsPush = TRUE; + default: IsPush = FALSE; + endcase + else + IsPush = FALSE; + end + else if (isn[`INSTRUCTION_L2]==2'b00) + case({isn[31:28],isn[17:16]}) + `PUSH: IsPush = TRUE; + default: IsPush = FALSE; + endcase + else + IsPush = FALSE; +default: IsPush = FALSE; +endcase +endfunction + +function [0:0] IsMem; +input [47:0] isn; +case(isn[`INSTRUCTION_OP]) +`MEMNDX: IsMem = TRUE; +`AMO: IsMem = TRUE; +`LB: IsMem = TRUE; +`LBU: IsMem = TRUE; +`Lx: IsMem = TRUE; +`LxU: IsMem = TRUE; +`LWR: IsMem = TRUE; +`LV,`SV: IsMem = TRUE; +`INC: IsMem = TRUE; +`SB: IsMem = TRUE; +`Sx: IsMem = TRUE; +`SWC: IsMem = TRUE; +`CAS: IsMem = TRUE; +`LVx: IsMem = TRUE; +`LVxU: IsMem = TRUE; +default: IsMem = FALSE; +endcase +endfunction + +function IsMemNdx; +input [47:0] isn; +case(isn[`INSTRUCTION_OP]) +`MEMNDX: IsMemNdx = TRUE; +default: IsMemNdx = FALSE; +endcase +endfunction + +function [2:0] MemSize; +input [47:0] isn; +case(isn[`INSTRUCTION_OP]) +`MEMNDX: + if (isn[`INSTRUCTION_L2]==2'b00) begin + if (IsLoad(isn)) + case({isn[31:28],isn[22:21]}) + `LVBX,`LVBUX: MemSize = byt; + `LBX,`LBUX: MemSize = byt; + `LVCX,`LVCUX: MemSize = wyde; + `LCX,`LCUX: MemSize = wyde; + `LVHX,`LVHUX: MemSize = tetra; + `LHX: MemSize = tetra; + `LHUX: MemSize = tetra; + `LVWX: MemSize = octa; + `LWX: MemSize = octa; + `LWRX: MemSize = octa; + `LVX: MemSize = octa; + `LVx: + case(isn[20:18]) + 3'd0,3'd1: MemSize = byt; + 3'd2,3'd3: MemSize = wyde; + 3'd4,3'd5: MemSize = tetra; + default: MemSize = octa; + endcase + default: MemSize = octa; + endcase + else + case({isn[31:28],isn[17:16]}) + `SBX: MemSize = byt; + `SCX: MemSize = wyde; + `SHX: MemSize = tetra; + `SWX: MemSize = octa; + `SWCX: MemSize = octa; + `SVX: MemSize = octa; + default: MemSize = octa; + endcase + end + else + MemSize = octa; +`LB,`LBU: MemSize = byt; +`Lx,`LxU,`LVx,`LVxU: + casez(isn[20:18]) + 3'b100: MemSize = octa; + 3'b?10: MemSize = tetra; + 3'b??1: MemSize = wyde; + default: MemSize = octa; + endcase +`LWR: MemSize = octa; +`LV: MemSize = octa; +`AMO: + case(isn[23:21]) + 3'd0: MemSize = byt; + 3'd1: MemSize = wyde; + 3'd2: MemSize = tetra; + 3'd3: MemSize = octa; + default: MemSize = octa; + endcase +`SB: MemSize = byt; +`Sx: + casez(isn[15:13]) + 3'b100: MemSize = octa; + 3'b?10: MemSize = tetra; + 3'b??1: MemSize = wyde; + default: MemSize = octa; + endcase +`SWC: MemSize = octa; +`SV: MemSize = octa; +default: MemSize = octa; +endcase +endfunction + +function IsCAS; +input [47:0] isn; +case(isn[`INSTRUCTION_OP]) +`MEMNDX: + if (isn[`INSTRUCTION_L2]==2'b00) + case({isn[31:28],isn[17:16]}) + `CASX: IsCAS = TRUE; + default: IsCAS = FALSE; + endcase + else + IsCAS = FALSE; +`CAS: IsCAS = TRUE; +default: IsCAS = FALSE; +endcase +endfunction + +function IsAMO; +input [47:0] isn; +case(isn[`INSTRUCTION_OP]) +`AMO: IsAMO = TRUE; +default: IsAMO = FALSE; +endcase +endfunction + +function IsInc; +input [47:0] isn; +case(isn[`INSTRUCTION_OP]) +`MEMNDX: + if (isn[`INSTRUCTION_L2]==2'b00) + case({isn[31:28],isn[17:16]}) + `INCX: IsInc = TRUE; + default: IsInc = FALSE; + endcase + else + IsInc = FALSE; +`INC: IsInc = TRUE; +default: IsInc = FALSE; +endcase +endfunction + +function IsFSync; +input [47:0] isn; +IsFSync = (isn[`INSTRUCTION_OP]==`FLOAT && isn[`INSTRUCTION_L2]==2'b00 && isn[`INSTRUCTION_S2]==`FSYNC); +endfunction + +function IsMemdb; +input [47:0] isn; +IsMemdb = (isn[`INSTRUCTION_OP]==`R2 && isn[`INSTRUCTION_L2]==2'b00 && isn[`INSTRUCTION_S2]==`R1 && isn[22:18]==`MEMDB); +endfunction + +function IsMemsb; +input [47:0] isn; +IsMemsb = (isn[`INSTRUCTION_OP]==`RR && isn[`INSTRUCTION_L2]==2'b00 && isn[`INSTRUCTION_S2]==`R1 && isn[22:18]==`MEMSB); +endfunction + +function IsSEI; +input [47:0] isn; +IsSEI = (isn[`INSTRUCTION_OP]==`R2 && isn[`INSTRUCTION_L2]==2'b00 && isn[`INSTRUCTION_S2]==`SEI); +endfunction + +function IsShift48; +input [47:0] isn; +case(isn[`INSTRUCTION_OP]) +`R2: + if (isn[`INSTRUCTION_L2]==2'b01) + case(isn[47:42]) + `SHIFTR: IsShift48 = TRUE; + default: IsShift48 = FALSE; + endcase + else + IsShift48 = FALSE; +default: IsShift48 = FALSE; +endcase +endfunction + +function IsShift; +input [47:0] isn; +case(isn[`INSTRUCTION_OP]) +`R2: + if (isn[`INSTRUCTION_L2]==2'b00) + case(isn[31:26]) + `SHIFTR: IsShift = TRUE; + `SHIFT31: IsShift = TRUE; + `SHIFT63: IsShift = TRUE; + default: IsShift = FALSE; + endcase + else + IsShift = FALSE; +default: IsShift = FALSE; +endcase +endfunction + +function IsCmp; +input [47:0] isn; +case(isn[`INSTRUCTION_OP]) +`R2: + if (isn[`INSTRUCTION_L2]==2'b00) + case(isn[31:26]) + `CMP: IsCmp = TRUE; + default: IsCmp = FALSE; + endcase + else + IsCmp = FALSE; +`CMPI: IsCmp = TRUE; +default: IsCmp = FALSE; +endcase +endfunction + +function IsLWRX; +input [47:0] isn; +case(isn[`INSTRUCTION_OP]) +`MEMNDX: + if (isn[`INSTRUCTION_L2]==2'b00) + case({isn[31:28],isn[22:21]}) + `LWRX: IsLWRX = TRUE; + default: IsLWRX = FALSE; + endcase + else + IsLWRX = FALSE; +default: IsLWRX = FALSE; +endcase +endfunction + +// Aquire / release bits are only available on indexed SWC / LWR +function IsSWCX; +input [47:0] isn; +case(isn[`INSTRUCTION_OP]) +`MEMNDX: + if (isn[`INSTRUCTION_L2]==2'b00) + case({isn[31:28],isn[17:16]}) + `SWCX: IsSWCX = TRUE; + default: IsSWCX = FALSE; + endcase + else + IsSWCX = FALSE; +default: IsSWCX = FALSE; +endcase +endfunction + +function IsJmp; +input [47:0] isn; +IsJmp = isn[`INSTRUCTION_OP]==`JMP; +endfunction + +// Really IsPredictableBranch +// Does not include BccR's +function IsBranch; +input [47:0] isn; +casez(isn[`INSTRUCTION_OP]) +`Bcc: IsBranch = TRUE; +`BBc: IsBranch = TRUE; +`BEQI: IsBranch = TRUE; +`CHK: IsBranch = TRUE; +default: IsBranch = FALSE; +endcase +endfunction + +function IsJAL; +input [47:0] isn; +IsJAL = isn[`INSTRUCTION_OP]==`JAL; +endfunction + +function IsRet; +input [47:0] isn; +IsRet = isn[`INSTRUCTION_OP]==`RET; +endfunction + +function IsIrq; +input [47:0] isn; +IsIrq = isn[`INSTRUCTION_OP]==`BRK && isn[25:21]==5'h0; +endfunction + +function IsBrk; +input [47:0] isn; +IsBrk = isn[`INSTRUCTION_OP]==`BRK; +endfunction + +function IsRti; +input [47:0] isn; +IsRti = isn[`INSTRUCTION_OP]==`RR && isn[`INSTRUCTION_S2]==`RTI; +endfunction + +function IsSync; +input [47:0] isn; +IsSync = (isn[`INSTRUCTION_OP]==`R2 && isn[`INSTRUCTION_L2]==2'b00 && isn[`INSTRUCTION_S2]==`R1 && isn[22:18]==`SYNC) || IsRti(isn); +endfunction + +// Has an extendable 14-bit constant +function HasConst; +input [47:0] isn; +casez(isn[`INSTRUCTION_OP]) +`ADDI: HasConst = TRUE; +`SLTI: HasConst = TRUE; +`SLTUI: HasConst = TRUE; +`SGTI: HasConst = TRUE; +`SGTUI: HasConst = TRUE; +`ANDI: HasConst = TRUE; +`ORI: HasConst = TRUE; +`XORI: HasConst = TRUE; +`XNORI: HasConst = TRUE; +`MULUI: HasConst = TRUE; +`MULI: HasConst = TRUE; +`MULFI: HasConst = TRUE; +`DIVUI: HasConst = TRUE; +`DIVI: HasConst = TRUE; +`MODI: HasConst = TRUE; +`LB: HasConst = TRUE; +`LBU: HasConst = TRUE; +`Lx: HasConst = TRUE; +`LxU: HasConst = TRUE; +`LWR: HasConst = TRUE; +`LV: HasConst = TRUE; +`SB: HasConst = TRUE; +`Sx: HasConst = TRUE; +`SWC: HasConst = TRUE; +`INC: HasConst = TRUE; +`SV: HasConst = TRUE; +`CAS: HasConst = TRUE; +`JAL: HasConst = TRUE; +`CALL: HasConst = TRUE; +`RET: HasConst = TRUE; +`LVx: HasConst = TRUE; +`LVxU: HasConst = TRUE; +default: HasConst = FALSE; +endcase +endfunction + +function IsOddball; +input [47:0] instr; +//if (|iqentry_exc[head]) +// IsOddball = TRUE; +//else +case(instr[`INSTRUCTION_OP]) +`BRK: IsOddball = TRUE; +`IVECTOR: + case(instr[`INSTRUCTION_S2]) + `VSxx: IsOddball = TRUE; + default: IsOddball = FALSE; + endcase +`RR: + case(instr[`INSTRUCTION_S2]) + `VMOV: IsOddball = TRUE; + `SEI,`RTI: IsOddball = TRUE; + default: IsOddball = FALSE; + endcase +`MEMNDX: + case({instr[31:28],instr[17:16]}) + `CACHEX: IsOddball = TRUE; + default: IsOddball = FALSE; + endcase +`CSRRW,`REX,`CACHE,`FLOAT: IsOddball = TRUE; +default: IsOddball = FALSE; +endcase +endfunction + +function IsRFW; +input [47:0] isn; +casez(isn[`INSTRUCTION_OP]) +`IVECTOR: IsRFW = TRUE; +`FVECTOR: IsRFW = TRUE; +`R2: + if (isn[`INSTRUCTION_L2]==2'b00) + casez(isn[`INSTRUCTION_S2]) + `TLB: IsRFW = TRUE; + `R1: + case(isn[22:18]) + `MEMDB,`MEMSB,`SYNC,`SETWB,5'h14,5'h15: IsRFW = FALSE; + default: IsRFW = TRUE; + endcase + `ADD: IsRFW = TRUE; + `SUB: IsRFW = TRUE; + `SLT: IsRFW = TRUE; + `SLTU: IsRFW = TRUE; + `SLE: IsRFW = TRUE; + `SLEU: IsRFW = TRUE; + `AND: IsRFW = TRUE; + `OR: IsRFW = TRUE; + `XOR: IsRFW = TRUE; + `NAND: IsRFW = TRUE; + `NOR: IsRFW = TRUE; + `XNOR: IsRFW = TRUE; + `MULU: IsRFW = TRUE; + `MULSU: IsRFW = TRUE; + `MUL: IsRFW = TRUE; + `MULUH: IsRFW = TRUE; + `MULSUH: IsRFW = TRUE; + `MULH: IsRFW = TRUE; + `MULF: IsRFW = TRUE; + `FXMUL: IsRFW = TRUE; + `DIVU: IsRFW = TRUE; + `DIVSU: IsRFW = TRUE; + `DIV:IsRFW = TRUE; + `MODU: IsRFW = TRUE; + `MODSU: IsRFW = TRUE; + `MOD:IsRFW = TRUE; + `MOV: IsRFW = TRUE; + `VMOV: IsRFW = TRUE; + `SHIFTR,`SHIFT31,`SHIFT63: + IsRFW = TRUE; + `MIN,`MAX: IsRFW = TRUE; + `SEI: IsRFW = TRUE; + default: IsRFW = FALSE; + endcase + else if (isn[`INSTRUCTION_L2]==2'b01) + case(isn[47:42]) + `CMOVEZ: IsRFW = TRUE; + `CMOVNZ: IsRFW = TRUE; + default: IsRFW = FALSE; + endcase + else if (isn[7]==1'b1) + casez(isn[`INSTRUCTION_S2]) + `ADD: IsRFW = TRUE; + `SUB: IsRFW = TRUE; + `AND: IsRFW = TRUE; + `OR: IsRFW = TRUE; + `XOR: IsRFW = TRUE; + `MOV: IsRFW = TRUE; + `SHIFTR,`SHIFT31,`SHIFT63: + IsRFW = TRUE; + default: IsRFW = FALSE; + endcase + else + IsRFW = FALSE; +`MEMNDX: + if (isn[`INSTRUCTION_L2]==2'b10) begin + if (IsLoad(isn)) + IsRFW = TRUE; + else + case({isn[31:28],isn[17:16]}) + `PUSH: IsRFW = TRUE; + `CASX: IsRFW = TRUE; + default: IsRFW = FALSE; + endcase + end + else if (isn[`INSTRUCTION_L2]==2'b00) begin + if (IsLoad(isn)) + case({isn[31:28],isn[22:21]}) + `LBX: IsRFW = TRUE; + `LBUX: IsRFW = TRUE; + `LCX: IsRFW = TRUE; + `LCUX: IsRFW = TRUE; + `LHX: IsRFW = TRUE; + `LHUX: IsRFW = TRUE; + `LWX: IsRFW = TRUE; + `LVBX: IsRFW = TRUE; + `LVBUX: IsRFW = TRUE; + `LVCX: IsRFW = TRUE; + `LVCUX: IsRFW = TRUE; + `LVHX: IsRFW = TRUE; + `LVHUX: IsRFW = TRUE; + `LVWX: IsRFW = TRUE; + `LWX: IsRFW = TRUE; + `LWRX: IsRFW = TRUE; + `LVX: IsRFW = TRUE; + default: IsRFW = FALSE; + endcase + else + case({isn[31:28],isn[17:16]}) + `PUSH: IsRFW = TRUE; + `CASX: IsRFW = TRUE; + default: IsRFW = FALSE; + endcase + end + else + IsRFW = FALSE; +`BBc: IsRFW = FALSE; +`BITFIELD: IsRFW = TRUE; +`ADDI: IsRFW = TRUE; +`SLTI: IsRFW = TRUE; +`SLTUI: IsRFW = TRUE; +`SGTI: IsRFW = TRUE; +`SGTUI: IsRFW = TRUE; +`ANDI: IsRFW = TRUE; +`ORI: IsRFW = TRUE; +`XORI: IsRFW = TRUE; +`XNORI: IsRFW = TRUE; +`MULUI: IsRFW = TRUE; +`MULI: IsRFW = TRUE; +`MULFI: IsRFW = TRUE; +`DIVUI: IsRFW = TRUE; +`DIVI: IsRFW = TRUE; +`MODI: IsRFW = TRUE; +`JAL: IsRFW = TRUE; +`CALL: IsRFW = TRUE; +`RET: IsRFW = TRUE; +`LB: IsRFW = TRUE; +`LBU: IsRFW = TRUE; +`Lx: IsRFW = TRUE; +`LxU: IsRFW = TRUE; +`LWR: IsRFW = TRUE; +`LV: IsRFW = TRUE; +`LVx: IsRFW = TRUE; +`LVxU: IsRFW = TRUE; +`CAS: IsRFW = TRUE; +`AMO: IsRFW = TRUE; +`CSRRW: IsRFW = TRUE; +`AUIPC: IsRFW = TRUE; +`LUI: IsRFW = TRUE; +default: IsRFW = FALSE; +endcase +endfunction + +// Determines which lanes of the target register get updated. +function [7:0] fnWe; +input [47:0] isn; +casez(isn[`INSTRUCTION_OP]) +`R2: + case(isn[`INSTRUCTION_S2]) + `CMP: fnWe = 8'h00; // CMP sets predicate registers so doesn't update general register file. + default: fnWe = 8'hFF; + endcase +`CMPI: fnWe = 8'h00; +default: fnWe = 8'hFF; +endcase +/* +casez(isn[`INSTRUCTION_OP]) +`R2: + case(isn[`INSTRUCTION_S2]) + `R1: + case(isn[22:18]) + `ABS,`CNTLZ,`CNTLO,`CNTPOP: + case(isn[25:23]) + 3'b000: fnWe = 8'h01; + 3'b001: fnWe = 8'h03; + 3'b010: fnWe = 8'h0F; + 3'b011: fnWe = 8'hFF; + default: fnWe = 8'hFF; + endcase + default: fnWe = 8'hFF; + endcase + `SHIFT31: fnWe = (~isn[25] & isn[21]) ? 8'hFF : 8'hFF; + `SHIFT63: fnWe = (~isn[25] & isn[21]) ? 8'hFF : 8'hFF; + `SLT,`SLTU,`SLE,`SLEU, + `ADD,`SUB, + `AND,`OR,`XOR, + `NAND,`NOR,`XNOR, + `DIV,`DIVU,`DIVSU, + `MOD,`MODU,`MODSU, + `MUL,`MULU,`MULSU, + `MULH,`MULUH,`MULSUH, + `FXMUL: + case(isn[25:23]) + 3'b000: fnWe = 8'h01; + 3'b001: fnWe = 8'h03; + 3'b010: fnWe = 8'h0F; + 3'b011: fnWe = 8'hFF; + default: fnWe = 8'hFF; + endcase + default: fnWe = 8'hFF; + endcase +default: fnWe = 8'hFF; +endcase +*/ +endfunction + +// Detect if a source is automatically valid +function Source1Valid; +input [47:0] isn; +casez(isn[`INSTRUCTION_OP]) +`BRK: Source1Valid = TRUE; +`Bcc: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`BBc: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`BEQI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`CHK: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`R2: case(isn[`INSTRUCTION_S2]) + `SHIFT31: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; + `SHIFT63: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; + `SHIFTR: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; + default: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; + endcase +`MEMNDX: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`ADDI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`SLTI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`SLTUI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`SGTI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`SGTUI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`ANDI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`ORI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`XORI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`XNORI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`MULUI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`AMO: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`LB: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`LBU: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`Lx: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`LxU: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`LWR: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`LV: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`LVx: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`SB: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`Sx: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`SWC: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`SV: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`INC: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`CAS: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`JAL: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`RET: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`CSRRW: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`BITFIELD: case(isn[31:28]) + `BFINSI: Source1Valid = TRUE; + default: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; + endcase +`IVECTOR: + Source1Valid = FALSE; +default: Source1Valid = TRUE; +endcase +endfunction + +function Source2Valid; +input [47:0] isn; +casez(isn[`INSTRUCTION_OP]) +`BRK: Source2Valid = TRUE; +`Bcc: Source2Valid = isn[`INSTRUCTION_RB]==5'd0; +`BBc: Source2Valid = TRUE; +`BEQI: Source2Valid = TRUE; +`CHK: Source2Valid = isn[`INSTRUCTION_RB]==5'd0; +`R2: case(isn[`INSTRUCTION_S2]) + `R1: Source2Valid = TRUE; + `SHIFTR: Source2Valid = isn[25] ? 1'b1 : isn[`INSTRUCTION_RB]==5'd0; + `SHIFT31: Source2Valid = isn[25] ? 1'b1 : isn[`INSTRUCTION_RB]==5'd0; + `SHIFT63: Source2Valid = isn[25] ? 1'b1 : isn[`INSTRUCTION_RB]==5'd0; + default: Source2Valid = isn[`INSTRUCTION_RB]==5'd0; + endcase +`MEMNDX: + if (IsLoad(isn)) + case({isn[31:28],isn[22:21]}) + `LVX: Source2Valid = FALSE; + default: Source2Valid = isn[`INSTRUCTION_RB]==5'd0; + endcase + else + case({isn[31:28],isn[17:16]}) + `SVX: Source2Valid = FALSE; + default: Source2Valid = isn[`INSTRUCTION_RB]==5'd0; + endcase +`ADDI: Source2Valid = TRUE; +`SLTI: Source2Valid = TRUE; +`SLTUI: Source2Valid = TRUE; +`SGTI: Source2Valid = TRUE; +`SGTUI: Source2Valid = TRUE; +`ANDI: Source2Valid = TRUE; +`ORI: Source2Valid = TRUE; +`XORI: Source2Valid = TRUE; +`XNORI: Source2Valid = TRUE; +`MULUI: Source2Valid = TRUE; +`LB: Source2Valid = TRUE; +`LBU: Source2Valid = TRUE; +`Lx: Source2Valid = TRUE; +`LxU: Source2Valid = TRUE; +`LWR: Source2Valid = TRUE; +`LVx: Source2Valid = TRUE; +`INC: Source2Valid = TRUE; +`SB: Source2Valid = isn[`INSTRUCTION_RB]==5'd0; +`Sx: Source2Valid = isn[`INSTRUCTION_RB]==5'd0; +`SWC: Source2Valid = isn[`INSTRUCTION_RB]==5'd0; +`CAS: Source2Valid = isn[`INSTRUCTION_RB]==5'd0; +`JAL: Source2Valid = TRUE; +`RET: Source2Valid = isn[`INSTRUCTION_RB]==5'd0; +`IVECTOR: + case(isn[`INSTRUCTION_S2]) + `VABS: Source2Valid = TRUE; + `VMAND,`VMOR,`VMXOR,`VMXNOR,`VMPOP: + Source2Valid = FALSE; + `VADDS,`VSUBS,`VANDS,`VORS,`VXORS: + Source2Valid = isn[`INSTRUCTION_RB]==5'd0; + `VBITS2V: Source2Valid = TRUE; + `V2BITS: Source2Valid = isn[`INSTRUCTION_RB]==5'd0; + `VSHL,`VSHR,`VASR: Source2Valid = isn[22:21]==2'd2; + default: Source2Valid = FALSE; + endcase +`LV: Source2Valid = TRUE; +`SV: Source2Valid = FALSE; +`AMO: Source2Valid = isn[31] || isn[`INSTRUCTION_RB]==5'd0; +default: Source2Valid = TRUE; +endcase +endfunction + +function Source3Valid; +input [47:0] isn; +case(isn[`INSTRUCTION_OP]) +`IVECTOR: + case(isn[`INSTRUCTION_S2]) + `VEX: Source3Valid = TRUE; + default: Source3Valid = TRUE; + endcase +`CHK: Source3Valid = isn[`INSTRUCTION_RC]==5'd0; +`R2: + if (isn[`INSTRUCTION_L2]==2'b01) + case(isn[47:42]) + `CMOVEZ,`CMOVNZ: Source3Valid = isn[`INSTRUCTION_RC]==5'd0; + default: Source3Valid = TRUE; + endcase + else + case(isn[`INSTRUCTION_S2]) + `MAJ: Source3Valid = isn[`INSTRUCTION_RC]==5'd0; + default: Source3Valid = TRUE; + endcase +`MEMNDX: + if (isn[`INSTRUCTION_L2]==2'b00) + case({isn[31:28],isn[17:16]}) + `SBX: Source3Valid = isn[`INSTRUCTION_RC]==5'd0; + `SCX: Source3Valid = isn[`INSTRUCTION_RC]==5'd0; + `SHX: Source3Valid = isn[`INSTRUCTION_RC]==5'd0; + `SWX: Source3Valid = isn[`INSTRUCTION_RC]==5'd0; + `SWCX: Source3Valid = isn[`INSTRUCTION_RC]==5'd0; + `CASX: Source3Valid = isn[`INSTRUCTION_RC]==5'd0; + default: Source3Valid = TRUE; + endcase + else + Source3Valid = TRUE; +default: Source3Valid = TRUE; +endcase +endfunction + +wire isRet = IsRet(instr); +wire isJal = IsJAL(instr); +wire isBrk = IsBrk(instr); +wire isRti = IsRti(instr); + +`ifdef REGISTER_DECODE +always @(posedge clk) +`else +always @* +`endif +begin + bus <= 144'h0; + bus[`IB_CMP] <= IsCmp(instr); + if (IsStore(instr)) + bus[`IB_CONST] <= instr[6]==1'b1 ? {{34{instr[47]}},instr[47:23],instr[17:13]} : + {{50{instr[31]}},instr[31:23],instr[17:13]}; + else + bus[`IB_CONST] <= instr[6]==1'b1 ? {{34{instr[47]}},instr[47:18]} : + {{50{instr[31]}},instr[31:18]}; +`ifdef SUPPORT_DCI + if (instr[`INSTRUCTION_OP]==`CMPRSSD) + bus[`IB_LN] <= 3'd2 | pred_on; + else +`endif + case(instr[7:6]) + 2'b00: bus[`IB_LN] <= 3'd4 | pred_on; + 2'b01: bus[`IB_LN] <= 3'd6 | pred_on; + default: bus[`IB_LN] <= 3'd2 | pred_on; + endcase +// bus[`IB_RT] <= fnRt(instr,ven,vl,thrd) | {thrd,7'b0}; +// bus[`IB_RC] <= fnRc(instr,ven,thrd) | {thrd,7'b0}; +// bus[`IB_RA] <= fnRa(instr,ven,vl,thrd) | {thrd,7'b0}; + bus[`IB_IMM] <= HasConst(instr); +// bus[`IB_A3V] <= Source3Valid(instr); +// bus[`IB_A2V] <= Source2Valid(instr); +// bus[`IB_A1V] <= Source1Valid(instr); + bus[`IB_TLB] <= IsTLB(instr); + bus[`IB_SZ] <= instr[`INSTRUCTION_OP]==`R2 ? instr[25:23] : 3'd3; // 3'd3=word size + bus[`IB_IRQ] <= IsIrq(instr); + bus[`IB_BRK] <= isBrk; + bus[`IB_RTI] <= isRti; + bus[`IB_RET] <= isRet; + bus[`IB_JAL] <= isJal; + // IB_BT is now used to indicate when to update the branch target buffer. + // This occurs when one of the instructions with an unknown or calculated + // target is present. + bus[`IB_BT] <= isJal | isRet | isBrk | isRti; + bus[`IB_ALU] <= IsALU; + bus[`IB_ALU0] <= IsAlu0Only(instr); + bus[`IB_FPU] <= IsFPU(instr); + bus[`IB_FC] <= IsFlowCtrl; + bus[`IB_CANEX] <= fnCanException(instr); + bus[`IB_LOADV] <= IsVolatileLoad(instr); + bus[`IB_LOAD] <= IsLoad(instr); + bus[`IB_PRELOAD] <= IsLoad(instr) && Rt==5'd0; + bus[`IB_STORE] <= IsStore(instr); + bus[`IB_PUSH] <= IsPush(instr); + bus[`IB_ODDBALL] <= IsOddball(instr); + bus[`IB_MEMSZ] <= MemSize(instr); + bus[`IB_MEM] <= IsMem(instr); + bus[`IB_MEMNDX] <= IsMemNdx(instr); + bus[`IB_RMW] <= IsCAS(instr) || IsAMO(instr) || IsInc(instr); + bus[`IB_MEMDB] <= IsMemdb(instr); + bus[`IB_MEMSB] <= IsMemsb(instr); + bus[`IB_SHFT] <= IsShift48(instr);//|IsShift(instr); + bus[`IB_SEI] <= IsSEI(instr); + bus[`IB_AQ] <= (IsAMO(instr)|IsLWRX(instr)|IsSWCX(instr)) & instr[25]; + bus[`IB_RL] <= (IsAMO(instr)|IsLWRX(instr)|IsSWCX(instr)) & instr[24]; + bus[`IB_JMP] <= IsJmp(instr); + bus[`IB_BR] <= IsBranch(instr); + bus[`IB_SYNC] <= IsSync(instr)||IsBrk(instr)||IsRti(instr); + bus[`IB_FSYNC] <= IsFSync(instr); + bus[`IB_RFW] <= (Rt==5'd0) ? 1'b0 : IsRFW(instr);// && !IsCmp(instr); + bus[`IB_PRFW] <= IsCmp(instr); + bus[`IB_WE] <= fnWe(instr); + id_o <= id_i; + idv_o <= idv_i; +end + +endmodule + +module mIsALU(instr, IsALU); +input [47:0] instr; +output reg IsALU; +parameter TRUE = 1'b1; +parameter FALSE = 1'b0; + +always @* +casez(instr[`INSTRUCTION_OP]) +`R2: + if (instr[`INSTRUCTION_L2]==2'b00) + case(instr[`INSTRUCTION_S2]) + `VMOV: IsALU = TRUE; + `RTI: IsALU = FALSE; + default: IsALU = TRUE; + endcase + else + IsALU = TRUE; +`BRK: IsALU = FALSE; +`Bcc: IsALU = FALSE; +`BBc: IsALU = FALSE; +`BEQI: IsALU = FALSE; +`CHK: IsALU = FALSE; +`JAL: IsALU = FALSE; +`JMP: IsALU = FALSE; +`CALL: IsALU = FALSE; +`RET: IsALU = FALSE; +`FVECTOR: + case(instr[`INSTRUCTION_S2]) + `VSHL,`VSHR,`VASR: IsALU = TRUE; + default: IsALU = FALSE; // Integer + endcase +`IVECTOR: + case(instr[`INSTRUCTION_S2]) + `VSHL,`VSHR,`VASR: IsALU = TRUE; + default: IsALU = TRUE; // Integer + endcase +`FLOAT: IsALU = FALSE; +default: IsALU = TRUE; +endcase + +endmodule Index: thor/trunk/FT64v7/rtl/common/FT64_iexpander.v =================================================================== --- thor/trunk/FT64v7/rtl/common/FT64_iexpander.v (nonexistent) +++ thor/trunk/FT64v7/rtl/common/FT64_iexpander.v (revision 60) @@ -0,0 +1,406 @@ +// ============================================================================ +// __ +// \\__/ o\ (C) 2017-2018 Robert Finch, Waterloo +// \ __ / All rights reserved. +// \/_// robfinch@finitron.ca +// || +// +// FT64_iexpander.v +// +// This source file is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This source file is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// +// ============================================================================ +// +`include ".\FT64_defines.vh" + +module FT64_iexpander(cinstr,expand); +input [15:0] cinstr; +output reg [47:0] expand; + +// Maps a subset of registers for compressed instructions. +function [4:0] fnRp; +input [2:0] rg; +case(rg) +3'd0: fnRp = 5'd1; // return value 0 +3'd1: fnRp = 5'd3; // temp +3'd2: fnRp = 5'd4; // temp +3'd3: fnRp = 5'd11; // regvar +3'd4: fnRp = 5'd12; // regvar +3'd5: fnRp = 5'd18; // arg1 +3'd6: fnRp = 5'd19; // arg2 +3'd7: fnRp = 5'd20; // arg3 +endcase +endfunction + +always @* +casez({cinstr[15:12],cinstr[6]}) +5'b00000: // NOP / ADDI + case(cinstr[4:0]) + 5'd31: begin + expand[47:32] = 16'h0000; + expand[31:18] = {{6{cinstr[11]}},cinstr[11:8],cinstr[5],3'b0}; + expand[17:13] = cinstr[4:0]; + expand[12:8] = cinstr[4:0]; + expand[7:6] = 2'b10; + expand[5:0] = `ADDI; + end + default: + begin + expand[47:32] = 16'h0000; + expand[31:18] = {{9{cinstr[11]}},cinstr[11:8],cinstr[5]}; + expand[17:13] = cinstr[4:0]; + expand[12:8] = cinstr[4:0]; + expand[7:6] = 2'b10; + expand[5:0] = `ADDI; + end + endcase +5'b00010: // SYS + if (cinstr[4:0]==5'd0) begin + expand[47:32] = 16'h0000; + expand[5:0] = `BRK; + expand[7:6] = 2'b10; + expand[15:8] = {3'd1,cinstr[11:8],cinstr[5]}; + expand[16] = 1'b0; + expand[20:17] = 4'd0; + expand[23:21] = 3'd1; + expand[31:24] = 8'd0; + end + // LDI + else begin + expand[47:32] = 16'h0000; + expand[31:18] = {{9{cinstr[11]}},cinstr[11:8],cinstr[5]}; + expand[17:13] = cinstr[4:0]; + expand[12:8] = 5'd0; + expand[7:6] = 2'b10; + expand[5:0] = `ADDI; // ADDI to sign extend + end +5'b00100: // RET / ANDI + if (cinstr[4:0]==5'd0) begin + expand[47:32] = 16'h0000; + expand[31:23] = {4'd0,cinstr[11:8],cinstr[5]}; + expand[22:18] = 5'd29; + expand[17:13] = 5'd31; + expand[12:8] = 5'd31; + expand[7:6] = 2'b10; + expand[5:0] = `RET; + end + else begin + expand[47:32] = 16'h0000; + expand[5:0] = `ANDI; + expand[7:6] = 2'b10; + expand[12:8] = cinstr[4:0]; + expand[17:13] = cinstr[4:0]; + expand[31:18] = {{11{cinstr[11]}},cinstr[11:8],cinstr[5]}; + end +5'b00110: // SHLI + begin + expand[47:32] = 16'h0000; + expand[31:26] = 6'h0F; // immediate mode 0-31 + expand[25:23] = 3'd0; // SHL + expand[22:18] = {cinstr[11:8],cinstr[5]}; // amount + expand[17:13] = cinstr[4:0]; + expand[12:8] = cinstr[4:0]; + expand[7:6] = 2'b10; + expand[5:0] = 8'h02; // R2 instruction + end +5'b01000: + case(cinstr[5:4]) + 2'd0: // SHRI + begin + expand[47:32] = 16'h0000; + expand[31:26] = 6'h0F; // shift immediate 0-31 + expand[25:23] = 3'd1; // SHR + expand[22:18] = {cinstr[11:8],cinstr[3]}; // amount + expand[17:13] = fnRp(cinstr[2:0]); + expand[12:8] = fnRp(cinstr[2:0]); + expand[7:6] = 2'b10; + expand[5:0] = 8'h02; // R2 instruction + end + 2'd1: // ASRI + begin + expand[47:32] = 16'h0000; + expand[31:26] = 6'h0F; // shift immediate 0-31 + expand[25:23] = 3'd3; // ASR + expand[22:18] = {cinstr[11:8],cinstr[3]}; // amount + expand[17:13] = fnRp(cinstr[2:0]); + expand[12:8] = fnRp(cinstr[2:0]); + expand[7:6] = 2'b10; + expand[5:0] = 8'h02; // R2 instruction + end + 2'd2: // ORI + begin + expand[47:32] = 16'h0000; + expand[31:18] = {{9{cinstr[11]}},cinstr[11:8],cinstr[3]}; + expand[17:13] = fnRp(cinstr[2:0]); + expand[12:8] = fnRp(cinstr[2:0]); + expand[7:6] = 2'b10; + expand[5:0] = `ORI; + end + 2'd3: + case(cinstr[11:10]) + 2'd0: begin + expand[47:32] = 16'h0000; + expand[31:26] = `SUB; + expand[25:23] = 3'b011; // word size + expand[22:18] = fnRp({cinstr[9:8],cinstr[3]}); + expand[17:13] = fnRp(cinstr[2:0]); + expand[12:8] = fnRp(cinstr[2:0]); + expand[7:6] = 2'b10; + expand[5:0] = 8'h02; // R2 instruction + end + 2'd1: begin + expand[47:32] = 16'h0000; + expand[31:26] = `AND; + expand[25:23] = 3'b011; // word size + expand[22:18] = fnRp({cinstr[9:8],cinstr[3]}); + expand[17:13] = fnRp(cinstr[2:0]); + expand[12:8] = fnRp(cinstr[2:0]); + expand[7:6] = 2'b10; + expand[5:0] = 8'h02; // R2 instruction + end + 2'd2: begin + expand[47:32] = 16'h0000; + expand[31:26] = `OR; + expand[25:23] = 3'b011; // word size + expand[22:18] = fnRp({cinstr[9:8],cinstr[3]}); + expand[17:13] = fnRp(cinstr[2:0]); + expand[12:8] = fnRp(cinstr[2:0]); + expand[7:6] = 2'b10; + expand[5:0] = 8'h02; // R2 instruction + end + 2'd3: begin + expand[47:32] = 16'h0000; + expand[31:26] = `XOR; + expand[25:23] = 3'b011; // word size + expand[22:18] = fnRp({cinstr[9:8],cinstr[3]}); + expand[17:13] = fnRp(cinstr[2:0]); + expand[12:8] = fnRp(cinstr[2:0]); + expand[7:6] = 2'b10; + expand[5:0] = 8'h02; // R2 instruction + end + endcase + endcase +5'b01110: + begin + expand[47:32] = 16'h0000; + expand[31:23] = {{1{cinstr[11]}},{cinstr[11:8],cinstr[5:2]}}; + expand[22:18] = 5'd0; // Rb = 0 + expand[17:16] = cinstr[1:0]; + expand[15:13] = 3'd0; // BEQ + expand[12:8] = 5'd0; // r0==r0 + expand[7:6] = 2'b10; + expand[5:0] = `Bcc; // 0x38 + end +5'b10??0: + begin + expand[47:32] = 16'h0000; + expand[31:23] = {{4{cinstr[13]}},cinstr[13:9]}; + expand[22:18] = 5'd0; // r0 + expand[17:16] = {cinstr[8],cinstr[5]}; + expand[15:13] = 3'd0; // BEQ + expand[12:8] = cinstr[4:0]; // Ra + expand[7:6] = 2'b10; + expand[5:0] = `Bcc; + end +5'b11??0: + begin + expand[47:32] = 16'h0000; + expand[31:23] = {{4{cinstr[13]}},cinstr[13:9]}; + expand[22:18] = 5'd0; // r0 + expand[17:16] = {cinstr[8],cinstr[5]}; + expand[15:13] = 3'd1; // BNE + expand[12:8] = cinstr[4:0]; // Ra + expand[7:6] = 2'b10; + expand[5:0] = `Bcc; + end +5'b00001: + begin + expand[47:32] = 16'h0000; + expand[31:26] = `MOV; // `MOV is 6'b01001? + expand[26] = 1'b0; + expand[25:23] = 3'd7; // move current to current + expand[22:18] = 5'd0; // register set (ignored) + expand[17:13] = {cinstr[11:8],cinstr[5]}; + expand[12:8] = cinstr[4:0]; + expand[7:6] = 2'b10; + expand[5:0] = 6'h02; + end +5'b00011: // ADD + begin + expand[47:32] = 16'h0000; + expand[31:26] = `ADD; + expand[27:23] = 3'b011; // word size + expand[22:18] = {cinstr[11:8],cinstr[5]}; + expand[17:13] = cinstr[4:0]; + expand[12:8] = cinstr[4:0]; + expand[7:6] = 2'b10; + expand[5:0] = 6'h02; // R2 instruction + end +5'b00101: // JALR + begin + expand[47:32] = 16'h0000; + expand[31:18] = 14'd0; + expand[17:13] = {cinstr[11:8],cinstr[5]}; + expand[12:8] = cinstr[4:0]; + expand[7:6] = 2'b10; + expand[5:0] = `JAL; + end +5'b00111: + if ({cinstr[11:8]==4'h1}) begin + expand[47:32] = 16'h0000; + expand[31:26] = 6'h36; // SEG instructions + expand[22:18] = {2'b0,cinstr[2:0]}; + expand[17:13] = 5'd0; // no target + expand[12:8] = 5'd0; + expand[7:6] = 2'b10; + expand[5:0] = 6'h02; + end + else if ({cinstr[11:8],cinstr[5]}==5'b0) begin // PUSH + expand[47:32] = 16'h0000; + expand[31:28] = 4'hC; + expand[27:23] = 5'd0; + expand[22:18] = cinstr[4:0]; + expand[17:13] = 5'd31; + expand[12:8] = 5'd31; + expand[7:6] = 2'b10; + expand[5:0] = `MEMNDX; + end + else begin + expand[47:8] = 40'd0; + expand[7:6] = 2'b10; + expand[5:0] = `NOP; + end +5'b01001: // LH Rt,d[SP] + begin + expand[47:32] = 16'h0000; + expand[31:18] = {{7{cinstr[11]}},cinstr[11:8],cinstr[5],2'd2}; + expand[17:13] = {cinstr[4:0]}; + expand[12:8] = 5'd31; + expand[7:6] = 2'b10; + expand[5:0] = `Lx; + end +5'b01011: // LW Rt,d[SP] + begin + expand[47:32] = 16'h0000; + expand[31:18] = {{6{cinstr[11]}},cinstr[11:8],cinstr[5],3'd4}; + expand[17:13] = cinstr[4:0]; + expand[12:8] = 5'd31; + expand[7:6] = 2'b10; + expand[5:0] = `Lx; + end +5'b01101: // LH Rt,d[fP] + begin + expand[47:32] = 16'h0000; + expand[31:18] = {{7{cinstr[11]}},cinstr[11:8],cinstr[5],2'd2}; + expand[17:13] = cinstr[4:0]; + expand[12:8] = 5'd30; + expand[7:6] = 2'b10; + expand[5:0] = `Lx; + end +5'b01111: // LW Rt,d[FP] + begin + expand[47:32] = 16'h0000; + expand[31:18] = {{6{cinstr[11]}},cinstr[11:8],cinstr[5],3'd4}; + expand[17:13] = cinstr[4:0]; + expand[12:8] = 5'd30; + expand[7:6] = 2'b10; + expand[5:0] = `Lx; + end +5'b10001: // SH Rt,d[SP] + begin + expand[47:32] = 16'h0000; + expand[31:23] = {{7{cinstr[11]}},cinstr[11:10]}; + expand[22:18] = cinstr[4:0]; + expand[17:13] = {cinstr[9:8],cinstr[5],2'd2}; + expand[12:8] = 5'd31; + expand[7:6] = 2'b10; + expand[5:0] = `Sx; + end +5'b10011: // SW Rt,d[SP] + begin + expand[47:32] = 16'h0000; + expand[31:23] = {{6{cinstr[11]}},cinstr[11:9]}; + expand[22:18] = cinstr[4:0]; + expand[17:13] = {cinstr[8],cinstr[5],3'd4}; + expand[12:8] = 5'd31; + expand[7:6] = 2'b10; + expand[5:0] = `Sx; + end +5'b10101: // SH Rt,d[fP] + begin + expand[47:32] = 16'h0000; + expand[31:23] = {{7{cinstr[11]}},cinstr[11:10]}; + expand[22:18] = cinstr[4:0]; + expand[17:13] = {cinstr[9:8],cinstr[5],2'd2}; + expand[12:8] = 5'd30; + expand[7:6] = 2'b10; + expand[5:0] = `Sx; + end +5'b10111: // SW Rt,d[FP] + begin + expand[47:32] = 16'h0000; + expand[31:23] = {{6{cinstr[11]}},cinstr[11:9]}; + expand[22:18] = cinstr[4:0]; + expand[17:13] = {cinstr[8],cinstr[5],3'd4}; + expand[12:8] = 5'd30; + expand[7:6] = 2'b10; + expand[5:0] = `Sx; + end +5'b11001: + begin // LH + expand[47:32] = 16'h0000; + expand[31:18] = {{8{cinstr[11]}},cinstr[11:10],cinstr[4:3],2'd2}; + expand[17:13] = fnRp({cinstr[9:8],cinstr[5]}); + expand[12:8] = fnRp(cinstr[2:0]); + expand[7:6] = 2'b10; + expand[5:0] = `Lx; + end +5'b11011: // LW + begin + expand[47:32] = 16'h0000; + expand[31:18] = {{7{cinstr[11]}},cinstr[11:10],cinstr[4:3],3'd4}; + expand[17:13] = fnRp({cinstr[9:8],cinstr[5]}); + expand[12:8] = fnRp(cinstr[2:0]); + expand[7:6] = 2'b10; + expand[5:0] = `Lx; + end +5'b11101: // SH + begin + expand[47:32] = 16'h0000; + expand[31:23] = {{8{cinstr[11]}},cinstr[11]}; + expand[22:18] = fnRp({cinstr[9:8],cinstr[5]}); + expand[17:13] = {cinstr[10],cinstr[4:3],2'd2}; + expand[12:8] = fnRp(cinstr[2:0]); + expand[7:6] = 2'b10; + expand[5:0] = `Sx; + end +5'b11111: // SW + begin + expand[47:32] = 16'h0000; + expand[31:23] = {{7{cinstr[11]}},cinstr[11:10]}; + expand[22:18] = fnRp({cinstr[9:8],cinstr[5]}); + expand[17:13] = {cinstr[4:3],3'd4}; + expand[12:8] = fnRp(cinstr[2:0]); + expand[7:6] = 2'b10; + expand[5:0] = `Sx; + end +default: + begin + expand[47:8] = 40'd0; + expand[7:6] = 2'b10; + expand[5:0] = `NOP; + end +endcase + +endmodule Index: thor/trunk/FT64v7/rtl/common/FT64_ipt.v =================================================================== --- thor/trunk/FT64v7/rtl/common/FT64_ipt.v (nonexistent) +++ thor/trunk/FT64v7/rtl/common/FT64_ipt.v (revision 60) @@ -0,0 +1,412 @@ +// ============================================================================ +// __ +// \\__/ o\ (C) 2018 Robert Finch, Waterloo +// \ __ / All rights reserved. +// \/_// robfinch@finitron.ca +// || +// +// FT64_ipt.v +// - 64 bit CPU inverted page table memory management unit +// +// This source file is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This source file is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// +// ============================================================================ +// +`ifndef TRUE +`define TRUE 1'b1 +`define FALSE 1'b0 +`endif + +module FT64_ipt(rst, clk, pkeys_i, ol_i, cti_i, cs_i, icl_i, cyc_i, stb_i, ack_o, we_i, sel_i, vadr_i, dat_i, dat_o, + cyc_o, ack_i, we_o, padr_o, exv_o, rdv_o, wrv_o, prv_o, page_fault); +input rst; +input clk; +input [63:0] pkeys_i; +input [1:0] ol_i; +input [2:0] cti_i; +input cs_i; +input icl_i; +input cyc_i; +input stb_i; +output reg ack_o; +input we_i; +input [7:0] sel_i; +input [63:0] vadr_i; +input [63:0] dat_i; +output reg [63:0] dat_o; +output reg cyc_o; +input ack_i; +output reg we_o; +output reg [31:0] padr_o; +output reg exv_o; +output reg rdv_o; +output reg wrv_o; +output reg prv_o; +output reg page_fault; + +parameter S_IDLE = 4'd0; +parameter S_CMP1 = 4'd1; +parameter S_CMP2 = 4'd2; +parameter S_CMP3 = 4'd3; +parameter S_CMP4 = 4'd4; +parameter S_CMP5 = 4'd5; +parameter S_CMP6 = 4'd6; +parameter S_WAIT1 = 4'd7; +parameter S_ACK = 4'd8; + +integer n; +wire [9:0] pkey [0:5]; +assign pkey[0] = pkeys_i[9:0]; +assign pkey[1] = pkeys_i[19:10]; +assign pkey[2] = pkeys_i[29:20]; +assign pkey[3] = pkeys_i[39:30]; +assign pkey[4] = pkeys_i[49:40]; +assign pkey[5] = pkeys_i[59:50]; +reg [3:0] state; +reg [15:0] pt_ad; +reg upd; +reg upd_done; +reg probe, probe_done; +reg pte_last; +reg [7:0] pte_asid; +reg [3:0] pte_drwx; +reg [18:0] pte_vadr; +reg [9:0] pte_key; +reg pt_wr; +reg [41:0] pt_dati; +wire [41:0] pt_dat; + +FT64_iptram uram1 ( + .clka(clk), + .ena(1'b1), + .wea(pt_wr), + .addra(pt_ad), + .dina(pt_dati), + .douta(pt_dat) +); + +wire pt_last = pt_dat[23]; +wire [18:0] pt_vadr = pt_dat[22:4]; +wire [7:0] pt_asid = pt_dat[31:24]; +wire [3:0] pt_drwx = pt_dat[3:0]; +wire [9:0] pt_key = pt_dat[41:32]; + +reg keymatch; +always @* +begin +keymatch = 1'b0; +for (n = 0; n < 6; n = n + 1) + if (pt_key==pkey[n] || pt_key==10'h0) + keymatch = 1'b1; +end + +function [15:0] Hash1; +input [39:0] vadr; +begin + Hash1 = {1'b0,vadr[37:32],vadr[21:13]}; +end +endfunction + +function [15:0] Hash2; +input [39:0] vadr; +begin + Hash2 = {1'b1,vadr[37:32],vadr[21:13]}; +end +endfunction + +always @(posedge clk) + case(vadr_i[5:3]) + 3'd1: + dat_o <= pt_ad; + 3'd2: + begin + dat_o[41:32] <= pte_key; + dat_o[31:24] <= pte_asid; + dat_o[23] <= pte_last; + dat_o[2:0] <= pte_drwx[2:0]; + dat_o[7] <= pte_drwx[3]; + end + 3'd3: + dat_o <= pte_vadr; + default: dat_o <= 1'b0; + endcase + +always @(posedge clk) +if (rst) begin + cyc_o <= 1'b0; + padr_o <= 32'hFFFC0100; + ack_o <= 1'b0; + exv_o <= 1'b0; + rdv_o <= 1'b0; + wrv_o <= 1'b0; + prv_o <= 1'b0; + pt_wr <= 1'b0; + upd <= 1'b0; + probe <= 1'b0; + upd_done <= 1'b0; + probe_done <= 1'b0; + goto(S_IDLE); +end +else begin + pt_wr <= 1'b0; + page_fault <= 1'b0; + ack_o <= 1'b0; +case(state) +S_IDLE: + if (cyc_i) begin + if (cs_i) begin + ack_o <= 1'b1; + case(vadr_i[5:3]) + 3'd0: + begin + if (dat_i[0] & !upd_done) begin + pt_ad <= Hash1({pte_asid,pte_vadr}); + upd <= 1'b1; + goto(S_CMP1); + end + else if (dat_i[1] & !probe_done) begin + pt_ad <= Hash1({pte_asid,pte_vadr}); + probe <= 1'b1; + goto(S_CMP1); + end + end + 3'd2: + begin + pte_key <= dat_i[41:32]; + pte_asid <= dat_i[31:24]; + pte_last <= dat_i[22]; + pte_drwx <= {dat_i[7],dat_i[2:0]}; + end + 3'd3: + begin + pte_vadr <= dat_i[18:0]; + end + endcase + end + else begin + upd_done <= 1'b0; + probe_done <= 1'b0; + upd <= 1'b0; + probe <= 1'b0; + if (ol_i==2'b0) begin + cyc_o <= 1'b1; + we_o <= we_i; + padr_o <= vadr_i; + goto(S_ACK); + end + else begin + // Video frame buffer ($00xxxxxx) and ROM / IO ($FFxxxxxx) regions are + // not mapped. + if (vadr_i[31:24]==8'hFF || vadr_i[31:24]==8'h00) begin + cyc_o <= 1'b1; + we_o <= we_i; + padr_o <= vadr_i; + goto(S_ACK); + end + else begin + pt_ad <= Hash1({vadr_i[63:56],vadr_i}); + goto(S_CMP1); + end + end + end + end + else begin + exv_o <= 1'b0; + rdv_o <= 1'b0; + wrv_o <= 1'b0; + prv_o <= 1'b0; + end + +S_CMP1: + goto(S_CMP2); +S_CMP2: + goto(S_CMP3); +S_CMP3: + if (pt_drwx[2:0]==3'b0) begin + if (upd) begin + pte_key <= 10'h0; + pte_last <= 1'b0; + pte_drwx <= 4'd0; + pt_wr <= 1'b1; + pt_dati <= {pte_key,pte_asid,pte_last,pte_vadr[18:0],pte_drwx}; + upd_done <= 1'b1; + goto(S_IDLE); + end + else if (probe) begin + pte_drwx <= 3'b0; + pte_vadr <= 19'b0; + pte_asid <= 8'b0; + pte_last <= 1'b0; + pte_key <= 10'h0; + probe_done <= 1'b1; + goto(S_IDLE); + end + else begin + page_fault <= 1'b1; + goto(S_WAIT1); + end + end + else if (pt_asid==vadr_i[63:56] && pt_vadr==vadr_i[31:13]) begin + if (upd) begin + if (keymatch) begin + pte_key <= pt_key; + pte_last <= pt_last; + pte_drwx <= pt_drwx; + pt_wr <= 1'b1; + pt_dati <= {pte_key,pt_dat[31:4],pte_drwx}; + end + else + prv_o <= 1'b1; + upd_done <= 1'b1; + goto(S_IDLE); + end + else if (probe) begin + if (keymatch) begin + pte_key <= pt_key; + pte_last <= pt_last; + pte_drwx <= pt_drwx; + end + else + prv_o <= 1'b1; + probe_done <= 1'b1; + goto(S_IDLE); + end + else if (~ack_i) begin + if (keymatch) begin + cyc_o <= 1'b1; + we_o <= we_i & pt_drwx[1]; + if (!pt_drwx[1] & we_i) wrv_o <= 1'b1; + if (!pt_drwx[2] & ~we_i) rdv_o <= 1'b1; + if (!pt_drwx[0] & icl_i) exv_o <= 1'b1; + padr_o <= {pt_ad,vadr_i[12:0]}; + end + else begin + cyc_o <= 1'b1; + we_o <= 1'b0; + padr_o <= 64'hFFFFFFFFFFFFFFF8; + prv_o <= 1'b1; + end + goto(S_ACK); + end + end + else begin + if (upd|probe) + pt_ad <= Hash2({pte_asid,pte_vadr}); + else + pt_ad <= Hash2({vadr_i[63:56],vadr_i}); + goto(S_CMP4); + end + +S_CMP4: + goto(S_CMP5); +S_CMP5: + goto(S_CMP6); +S_CMP6: + if (pt_drwx[2:0]==3'b0) begin + if (upd) begin + pte_key <= 10'h0; + pte_last <= 1'b0; + pte_drwx <= 4'd0; + pt_wr <= 1'b1; + pt_dati <= {pte_key,pte_asid,pte_last,pte_vadr[18:0],pte_drwx}; + upd_done <= 1'b1; + goto(S_IDLE); + end + else if (probe) begin + pte_key <= 10'h0; + pte_drwx <= 43'b0; + pte_vadr <= 19'b0; + pte_asid <= 8'b0; + pte_last <= 1'b0; + probe_done <= 1'b1; + goto(S_IDLE); + end + else begin + page_fault <= 1'b1; + goto(S_WAIT1); + end + end + else if (pt_asid==vadr_i[63:56] && pt_vadr==vadr_i[31:13]) begin + if (upd) begin + if (keymatch) begin + pte_key <= pt_key; + pte_last <= pt_last; + pte_drwx <= pt_drwx; + pt_wr <= 1'b1; + pt_dati <= {pte_key,pt_dat[31:4],pte_drwx}; + end + else + prv_o <= 1'b1; + upd_done <= 1'b1; + goto(S_IDLE); + end + else if (probe) begin + if (keymatch) begin + pte_key <= pt_key; + pte_last <= pt_last; + pte_drwx <= pt_drwx; + probe_done <= 1'b1; + end + else + prv_o <= 1'b1; + goto(S_IDLE); + end + else if (~ack_i) begin + if (keymatch) begin + cyc_o <= 1'b1; + we_o <= we_i & pt_drwx[1]; + if (!pt_drwx[1] & we_i) wrv_o <= 1'b1; + if (!pt_drwx[2] & ~we_i) rdv_o <= 1'b1; + if (!pt_drwx[0] & icl_i) exv_o <= 1'b1; + padr_o <= {pt_ad,vadr_i[12:0]}; + end + else begin + cyc_o <= 1'b1; + we_o <= 1'b0; + padr_o <= 64'hFFFFFFFFFFFFFFF8; + prv_o <= 1'b1; + end + goto(S_ACK); + end + end + else begin + pt_ad <= {pt_ad+8'd65}; + goto(S_CMP4); + end + +// Wait a clock cycle for a page fault to register. +S_WAIT1: + goto(S_IDLE); + +S_ACK: + if (ack_i) begin + if (cti_i==3'b000 || cti_i==3'b111) begin + cyc_o <= 1'b0; + we_o <= 1'b0; + goto(S_WAIT1); + end + end + +endcase +end + +task goto; +input [3:0] nst; +begin + state <= nst; +end +endtask + +endmodule + Index: thor/trunk/FT64v7/rtl/common/FT64_mmu.v =================================================================== --- thor/trunk/FT64v7/rtl/common/FT64_mmu.v (nonexistent) +++ thor/trunk/FT64v7/rtl/common/FT64_mmu.v (revision 60) @@ -0,0 +1,142 @@ +`timescale 1ns / 1ps +// ============================================================================ +// __ +// \\__/ o\ (C) 2016-2018 Robert Finch, Waterloo +// \ __ / All rights reserved. +// \/_// robfinch@finitron.ca +// || +// +// FT64_MMU.v +// +// +// This source file is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This source file is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// +// +// ============================================================================ +// +`define LOW 1'b0 +`define HIGH 1'b1 + +module FT64_mmu(rst_i, clk_i, ol_i, pcr_i, pcr2_i, mapen_i, s_ex_i, s_cyc_i, s_stb_i, s_ack_o, s_wr_i, s_adr_i, s_dat_i, s_dat_o, + pea_o, cyc_o, stb_o, + exv_o, rdv_o, wrv_o); +input rst_i; +input clk_i; +input [2:0] ol_i; +input [31:0] pcr_i; // paging enabled +input [63:0] pcr2_i; +input mapen_i; +input s_ex_i; // executable address +input s_cyc_i; +input s_stb_i; +input s_wr_i; // write strobe +output s_ack_o; +input [31:0] s_adr_i; // virtual address +input [31:0] s_dat_i; +output [31:0] s_dat_o; +output reg [31:0] pea_o; +output reg cyc_o; +output reg stb_o; +output reg exv_o; // execute violation +output reg rdv_o; // read violation +output reg wrv_o; // write violation + +wire cs = s_cyc_i && s_stb_i && (s_adr_i[31:12]==20'hFFDC4); +wire [5:0] okey = pcr_i[5:0]; +wire [5:0] akey = pcr_i[13:8]; +wire mol = ol_i==3'b000; // machine operating level + +reg ack1, ack2, ack3; +always @(posedge clk_i) + ack1 <= cs; +always @(posedge clk_i) + ack2 <= ack1 & (cs); +assign s_ack_o = (cs) ? ack2 : 1'b0; + +reg cyc1,cyc2,stb1,stb2; +wire [20:0] douta,doutb; +wire [20:0] doutca; +wire [2:0] cwrx = doutb[18:16]; + +always @(posedge clk_i) + exv_o <= s_ex_i & ~cwrx[0] & cyc2 & stb2 & mapen_i; +always @(posedge clk_i) + rdv_o <= ~(s_wr_i | s_ex_i) & ~cwrx[1] & cyc2 & stb2 & mapen_i; +always @(posedge clk_i) + wrv_o <= s_wr_i & ~cwrx[2] & cyc2 & stb2 & mapen_i; + +wire [15:0] addra = {akey,s_adr_i[11:2]}; +wire [15:0] addrb = pcr2_i[okey] ? {okey,s_adr_i[28:19]} : + {okey,s_adr_i[22:13]}; + +FT64_MMURam1 u1 ( + .clka(clk_i), // input wire clka + .ena(cs), // input wire ena + .wea(cs & s_wr_i), // input wire [0 : 0] wea + .addra(addra), // input wire [15 : 0] addra + .dina(s_dat_i[20:0]), // input wire [12 : 0] dina + .douta(douta), + .clkb(clk_i), // input wire clkb + .enb(mapen_i), // input wire enb + .web(1'b0), + .addrb(addrb), // input wire [13 : 0] addrb + .dinb(21'h0), + .doutb(doutb) // output wire [51 : 0] doutb +); + +assign s_dat_o = {11'd0,douta}; + +// The following delay reg is to keep all the address bits in sync +// with the output of the map table. So there are no intermediate +// invalid addresses. +reg mapen1, mapen2; +reg [31:0] s_adr1, s_adr2; +reg _4MB1, _4MB2; +always @(posedge clk_i) + s_adr1 <= s_adr_i; +always @(posedge clk_i) + s_adr2 <= s_adr1; +always @(posedge clk_i) + _4MB1 <= pcr2_i[okey]; +always @(posedge clk_i) + _4MB2 <= _4MB1 | !mapen1; +always @(posedge clk_i) + mapen1 <= !mol && mapen_i && (s_adr_i[31:29]==3'h0); +always @(posedge clk_i) + mapen2 <= mapen1; +always @(posedge clk_i) + cyc1 <= s_cyc_i; +always @(posedge clk_i) + cyc2 <= cyc1 & s_cyc_i; +always @(posedge clk_i) + stb1 <= s_stb_i; +always @(posedge clk_i) + stb2 <= stb1 & s_stb_i; + +always @(posedge clk_i) +if (rst_i) begin + cyc_o <= 1'b0; + stb_o <= 1'b0; + pea_o <= 32'hFFFC0100; +end +else begin + pea_o[12:0] <= s_adr2[12:0]; + pea_o[18:13] <= mapen2 ? (_4MB2 ? s_adr2[18:13] : doutb[5:0]) : s_adr2[18:13]; + pea_o[28:19] <= mapen2 ? doutb[15:6] : s_adr2[28:19]; + pea_o[31:29] <= s_adr2[31:29]; + cyc_o <= cyc2 & s_cyc_i; + stb_o <= stb2 & s_stb_i; +end + +endmodule Index: thor/trunk/FT64v7/rtl/common/FT64_mpu.v =================================================================== --- thor/trunk/FT64v7/rtl/common/FT64_mpu.v (nonexistent) +++ thor/trunk/FT64v7/rtl/common/FT64_mpu.v (revision 60) @@ -0,0 +1,309 @@ +`timescale 1ns / 1ps +// ============================================================================ +// __ +// \\__/ o\ (C) 2017-2018 Robert Finch, Waterloo +// \ __ / All rights reserved. +// \/_// robfinch@finitron.ca +// || +// +// FT64_MPU.v +// +// +// This source file is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This source file is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// +// +// ============================================================================ +// +//`define CARD_MEMORY 1'b1 + +module FT64_mpu(hartid_i,rst_i, clk4x_i, clk_i, tm_clk_i, + pit_clk2, pit_gate2, pit_out2, + irq_o, + i1,i2,i3,i4,i5,i6,i7,i8,i9,i10,i11,i12,i13,i14,i15,i16,i17,i18,i19, + i20,i21,i22,i23,i24,i25,i26,i27,i28, + cti_o,bte_o,bok_i,cyc_o,stb_o,ack_i,err_i,we_o,sel_o,adr_o,dat_o,dat_i, + sr_o, cr_o, rb_i); +input [63:0] hartid_i; +input rst_i; +input clk4x_i; +input clk_i; +input tm_clk_i; +input pit_clk2; +input pit_gate2; +output pit_out2; +output [3:0] irq_o; +input i1; +input i2; +input i3; +input i4; +input i5; +input i6; +input i7; +input i8; +input i9; +input i10; +input i11; +input i12; +input i13; +input i14; +input i15; +input i16; +input i17; +input i18; +input i19; +input i20; +input i21; +input i22; +input i23; +input i24; +input i25; +input i26; +input i27; +input i28; +output reg [2:0] cti_o; +output reg [1:0] bte_o; +input bok_i; +output cyc_o; +output reg stb_o; +input ack_i; +input err_i; +output we_o; +output reg [7:0] sel_o; +output [31:0] adr_o; +output reg [63:0] dat_o; +input [63:0] dat_i; +output sr_o; +output cr_o; +input rb_i; + +wire [3:0] cti; +wire [2:0] bte; +wire cyc,stb,we; +wire [7:0] sel; +wire [63:0] adr; +reg [63:0] dati; +wire [63:0] dato; +wire [3:0] irq; +wire [7:0] cause; +wire pic_ack; +wire [31:0] pic_dato; +wire pit_ack; +wire [31:0] pit_dato; +wire pit_out0, pit_out1; +wire crd_ack; +wire [63:0] crd_dato; +reg ack; +wire [63:0] ipt_dato; +wire ipt_ack; +wire [1:0] ol; +wire [31:0] pcr; +wire [63:0] pcr2; +wire icl; // instruction cache load +wire exv,rdv,wrv; +wire pulse60; +wire sptr_o; +wire [63:0] pkeys; + +always @(posedge clk_i) + cti_o <= cti; +always @(posedge clk_i) + bte_o <= bte; +//always @(posedge clk_i) +// cyc_o <= cyc; +always @(posedge clk_i) + stb_o <= stb; +//always @(posedge clk_i) +// we_o <= we; +always @(posedge clk_i) + sel_o <= sel; +//always @(posedge clk_i) +// adr_o <= adr; +always @(posedge clk_i) + dat_o <= dato; + +wire cs_pit = adr[31:8]==24'hFFDC11; +wire cs_ipt = adr[31:8]==24'hFFDCD0; +`ifdef CARD_MEMORY +wire cs_crd = adr[31:11]==21'd0; // $00000000 in virtual address space +`else +wire cs_crd = 1'b0; +`endif + +// Need to recreate the a2 address bit for 32 bit peripherals. +wire [31:0] adr32 = {adr[31:3],|sel[7:4],2'b00}; +wire [31:0] dat32 = |sel[7:4] ? dato[63:32] : dato[31:0]; + +FT64_pit upit1 +( + .rst_i(rst_i), + .clk_i(clk_i), + .cs_i(cs_pit), + .cyc_i(cyc_o), + .stb_i(stb_o), + .ack_o(pit_ack), + .sel_i(sel_o[7:4]|sel_o[3:0]), + .we_i(we_o), + .adr_i(adr32[5:0]), + .dat_i(dat32), + .dat_o(pit_dato), + .clk0(1'b0), + .gate0(1'b0), + .out0(pit_out0), + .clk1(1'b0), + .gate1(1'b0), + .out1(pit_out1), + .clk2(1'b0), + .gate2(1'b0), + .out2(pit_out2) +); + +FT64_pic upic1 +( + .rst_i(rst_i), // reset + .clk_i(clk_i), // system clock + .cyc_i(cyc_o), + .stb_i(stb_o), + .ack_o(pic_ack), // controller is ready + .wr_i(we_o), // write + .adr_i(adr32), // address + .dat_i(dat32), + .dat_o(pic_dato), + .vol_o(), // volatile register selected + .i1(i1), + .i2(i2), + .i3(i3), + .i4(i4), + .i5(i5), + .i6(i6), + .i7(i7), + .i8(i8), + .i9(i9), + .i10(i10), + .i11(i11), + .i12(i12), + .i13(i13), + .i14(i14), + .i15(i15), + .i16(i16), + .i17(i17), + .i18(i18), + .i19(i19), + .i20(i20), + .i21(i21), + .i22(i22), + .i23(i23), + .i24(i24), + .i25(i25), + .i26(i26), + .i27(i27), + .i28(i28), + .i29(pit_out2), // garbage collector stop interrupt + .i30(pit_out1), // garbage collector interrupt + .i31(pit_out0), // time slice interrupt + .irqo(irq), + .nmii(1'b0), + .nmio(), + .causeo(cause) +); + +assign irq_o = irq; + +`ifdef CARD_MEMORY +CardMemory ucrd1 +( + .clk_i(clk_i), + .cs_i(cs_crd & cyc_o & stb_o), + .ack_o(crd_ack), + .wr_i(we_o), + .adr_i(adr), + .dat_i(dato), + .dat_o(crd_dato), + .stp(1'b0), + .mapno(pcr[5:0]) +); +`else +assign crd_dato = 64'd0; +assign crd_ack = 1'b0; +`endif + +FT64_ipt uipt1 +( + .rst(rst_i), + .clk(clk_i), + .pkeys_i(pkeys), + .ol_i(ol), + .cti_i(cti), + .cs_i(cs_ipt), + .icl_i(icl), + .cyc_i(cyc), + .stb_i(stb), + .ack_o(ipt_ack), + .we_i(we), + .sel_i(sel), + .vadr_i(adr), + .dat_i(dato), + .dat_o(ipt_dato), + .cyc_o(cyc_o), + .ack_i(ack), + .we_o(we_o), + .padr_o(adr_o), + .exv_o(exv), + .rdv_o(rdv), + .wrv_o(wrv) +); + +always @(posedge clk_i) +casez({pic_ack,pit_ack,crd_ack,cs_ipt}) +4'b1???: dati <= {2{pic_dato}}; +4'b01??: dati <= {2{pit_dato}}; +4'b001?: dati <= crd_dato; +4'b0001: dati <= ipt_dato; +default: dati <= dat_i; +endcase + +always @(posedge clk_i) + ack <= ack_i|pic_ack|pit_ack|crd_ack|ipt_ack; + +FT64 ucpu1 +( + .hartid(hartid_i), + .rst(rst_i), + .clk_i(clk_i), + .clk4x(clk4x_i), + .tm_clk_i(tm_clk_i), + .irq_i(irq), + .vec_i(cause), + .cti_o(cti), + .bte_o(bte), + .bok_i(bok_i), + .cyc_o(cyc), + .stb_o(stb), + .ack_i(ack), + .err_i(err_i), + .we_o(we), + .sel_o(sel), + .adr_o(adr), + .dat_o(dato), + .dat_i(dati), + .ol_o(ol), + .pcr_o(pcr), + .pcr2_o(pcr2), + .pkeys_o(pkeys), + .icl_o(icl), + .sr_o(sr_o), + .cr_o(cr_o), + .rbi_i(rb_i) +); + +endmodule Index: thor/trunk/FT64v7/rtl/common/FT64_multiplier.v =================================================================== --- thor/trunk/FT64v7/rtl/common/FT64_multiplier.v (nonexistent) +++ thor/trunk/FT64v7/rtl/common/FT64_multiplier.v (revision 60) @@ -0,0 +1,185 @@ +// ============================================================================ +// __ +// \\__/ o\ (C) 2013-2018 Robert Finch, Waterloo +// \ __ / All rights reserved. +// \/_// robfinch@finitron.ca +// || +// +// This source file is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This source file is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// +// +// FT64 Superscaler +// FT64_multiplier.v +// - 64 bit multiplier +// +// ============================================================================ +// +module FT64_multiplier(rst, clk, ld, abort, sgn, sgnus, a, b, o, done, idle); +parameter WID=64; +parameter SGNADJO=3'd2; +parameter MULT=3'd3; +parameter IDLE=3'd4; +parameter DONE=3'd5; +input clk; +input rst; +input ld; +input abort; +input sgn; +input sgnus; +input [WID-1:0] a; +input [WID-1:0] b; +output [WID*2-1:0] o; +reg [WID*2-1:0] o; +output done; +output idle; + +reg [WID-1:0] aa,bb; +reg so; +reg [2:0] state; +reg [7:0] cnt; +wire cnt_done = cnt==8'd0; +assign done = state==DONE || (state==IDLE && !ld); // State == DONE +assign idle = state==IDLE; + +wire [127:0] pp; + +generate begin : gMults +if (WID > 32) begin +FT64_mult umul1 +( + .CLK(clk), // input wire CLK + .A(aa), // input wire [63 : 0] A + .B(bb), // input wire [63 : 0] B + .P(pp) // output wire [127 : 0] P +); +end +else if (WID > 16) begin +FT64_mult32 umul1 +( + .CLK(clk), // input wire CLK + .A(aa), // input wire [63 : 0] A + .B(bb), // input wire [63 : 0] B + .P(pp) // output wire [127 : 0] P +); +end +else if (WID > 8) begin +FT64_mult16 umul1 +( + .CLK(clk), // input wire CLK + .A(aa), // input wire [63 : 0] A + .B(bb), // input wire [63 : 0] B + .P(pp) // output wire [127 : 0] P +); +end +else begin +FT64_mult8 umul1 +( + .CLK(clk), // input wire CLK + .A(aa), // input wire [63 : 0] A + .B(bb), // input wire [63 : 0] B + .P(pp) // output wire [127 : 0] P +); +end +end +endgenerate + +always @(posedge clk) +if (rst) begin + aa <= {WID{1'b0}}; + bb <= {WID{1'b0}}; + o <= {WID*2{1'b0}}; + state <= IDLE; +end +else +begin +if (abort) + cnt <= 8'd00; +else if (!cnt_done) + cnt <= cnt - 8'd1; + +case(state) +IDLE: + if (ld) begin + if (sgnus) begin + aa <= a[WID-1] ? -a : a; + bb <= b; + so = a[WID-1]; + end + else if (sgn) begin + aa <= a[WID-1] ? -a : a; + bb <= b[WID-1] ? -b : b; + so <= a[WID-1] ^ b[WID-1]; + end + else begin + aa <= a; + bb <= b; + so <= 1'b0; + end + cnt <= 8'd20; + state <= MULT; + end +MULT: + if (cnt_done) begin + if (sgn|sgnus) begin + if (so) + o <= -pp; + else + o <= pp; + end + else + o <= pp; + state <= DONE; + end +DONE: + state <= IDLE; +default: + state <= IDLE; +endcase +end + +endmodule + +module FT64_multiplier_tb(); + +reg rst; +reg clk; +reg ld; +wire [127:0] o; + +initial begin + clk = 1; + rst = 0; + #100 rst = 1; + #100 rst = 0; + #100 ld = 1; + #150 ld = 0; +end + +always #10 clk = ~clk; // 50 MHz + + +FT64_multiplier u1 +( + .rst(rst), + .clk(clk), + .ld(ld), + .sgn(1'b1), + .isMuli(1'b0), + .a(64'd0), + .b(64'd48), + .o(o) +); + +endmodule + Index: thor/trunk/FT64v7/rtl/common/FT64_pic.v =================================================================== --- thor/trunk/FT64v7/rtl/common/FT64_pic.v (nonexistent) +++ thor/trunk/FT64v7/rtl/common/FT64_pic.v (revision 60) @@ -0,0 +1,189 @@ +`timescale 1ns / 1ps +// ============================================================================ +// __ +// \\__/ o\ (C) 2013-2018 Robert Finch, Waterloo +// \ __ / All rights reserved. +// \/_// robfinch@finitron.ca +// || +// +// This source file is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This source file is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// +// +// Encodes discrete interrupt request signals into five +// bit code using a priority encoder. +// +// reg +// 0x00 - encoded request number (read / write) +// This register contains the number identifying +// the current requester in bits 0 to 4 +// If there is no +// active request, then this number will be +// zero. +// bits 8 to 15 set the base number for the vector +// +// 0x04 - request enable (read / write) +// this register contains request enable bits +// for each request line. 1 = request +// enabled, 0 = request disabled. On reset this +// register is set to zero (disable all ints). +// bit zero is specially reserved for nmi +// +// 0x08 - write only +// this register disables the interrupt indicated +// by the low order five bits of the input data +// +// 0x0C - write only +// this register enables the interrupt indicated +// by the low order five bits of the input data +// +// 0x10 - write only +// this register indicates which interrupt inputs are +// edge sensitive +// +// 0x14 - write only +// This register resets the edge sense circuitry +// indicated by the low order five bits of the input data. +// +// 0x80 - irq control for irq #0 +// 0x84 - irq control for irq #1 +// bits 0 to 7 = cause code to issue +// bits 8 to 11 = irq level to issue +// bit 16 = irq enable +// bit 17 = edge sensitivity +//============================================================================= + +module FT64_pic +( + input rst_i, // reset + input clk_i, // system clock + input cyc_i, + input stb_i, + output ack_o, // controller is ready + input wr_i, // write + input [31:0] adr_i, // address + input [31:0] dat_i, + output reg [31:0] dat_o, + output vol_o, // volatile register selected + input i1, i2, i3, i4, i5, i6, i7, + i8, i9, i10, i11, i12, i13, i14, i15, + i16, i17, i18, i19, i20, i21, i22, i23, + i24, i25, i26, i27, i28, i29, i30, i31, + output [3:0] irqo, // normally connected to the processor irq + input nmii, // nmi input connected to nmi requester + output nmio, // normally connected to the nmi of cpu + output [6:0] causeo +); +parameter pIOAddress = 32'hFFDC_0F00; + +reg [31:0] ie; // interrupt enable register +reg rdy1; +reg [4:0] irqenc; +wire [31:0] i = { i31,i30,i29,i28,i27,i26,i25,i24,i23,i22,i21,i20,i19,i18,i17,i16, + i15,i14,i13,i12,i11,i10,i9,i8,i7,i6,i5,i4,i3,i2,i1,nmii}; +reg [31:0] ib; +reg [31:0] iedge; +reg [31:0] rste; +reg [31:0] es; +reg [3:0] irq [0:31]; +reg [7:0] cause [0:31]; +integer n; + +initial begin + ie <= 32'h0; + es <= 32'hFFFFFFFF; + rste <= 32'h0; + for (n = 0; n < 32; n = n + 1) begin + cause[n] <= 8'h00; + irq[n] <= 4'h8; + end +end + +wire cs = cyc_i && stb_i && adr_i[31:8]==pIOAddress[31:8]; +assign vol_o = cs; + +always @(posedge clk_i) + rdy1 <= cs; +assign ack_o = cs ? (wr_i ? 1'b1 : rdy1) : 1'b0; + +// write registers +always @(posedge clk_i) + if (rst_i) begin + ie <= 32'h0; + rste <= 32'h0; + end + else begin + rste <= 32'h0; + if (cs & wr_i) begin + casez (adr_i[7:2]) + 6'd0: ; + 6'd1: + begin + ie[31:0] <= dat_i[31:0]; + end + 6'd2,6'd3: + ie[dat_i[4:0]] <= adr_i[2]; + 6'd4: es <= dat_i[31:0]; + 6'd5: rste[dat_i[4:0]] <= 1'b1; + 6'b1?????: + begin + cause[adr_i[6:2]] <= dat_i[7:0]; + irq[adr_i[6:2]] <= dat_i[11:8]; + ie[adr_i[6:2]] <= dat_i[16]; + es[adr_i[6:2]] <= dat_i[17]; + end + endcase + end + end + +// read registers +always @(posedge clk_i) +begin + if (irqenc!=5'd0) + $display("PIC: %d",irqenc); + if (cs) + casez (adr_i[7:2]) + 6'd0: dat_o <= cause[irqenc]; + 6'b1?????: dat_o <= {es[adr_i[6:2]],ie[adr_i[6:2]],4'b0,irq[adr_i[6:2]],cause[adr_i[6:2]]}; + default: dat_o <= ie; + endcase + else + dat_o <= 32'h0000; +end + +assign irqo = (irqenc == 5'h0) ? 4'd0 : irq[irqenc]; +assign causeo = (irqenc == 5'h0) ? 8'd0 : cause[irqenc]; +assign nmio = nmii & ie[0]; + +// Edge detect circuit +always @(posedge clk_i) +begin + for (n = 1; n < 32; n = n + 1) + begin + ib[n] <= i[n]; + if (i[n] & !ib[n]) iedge[n] <= 1'b1; + if (rste[n]) iedge[n] <= 1'b0; + end +end + +// irq requests are latched on every rising clock edge to prevent +// misreads +// nmi is not encoded +always @(posedge clk_i) +begin + irqenc <= 5'd0; + for (n = 31; n > 0; n = n - 1) + if (ie[n] & (es[n] ? iedge[n] : i[n])) irqenc <= n; +end + +endmodule Index: thor/trunk/FT64v7/rtl/common/FT64_pit.v =================================================================== --- thor/trunk/FT64v7/rtl/common/FT64_pit.v (nonexistent) +++ thor/trunk/FT64v7/rtl/common/FT64_pit.v (revision 60) @@ -0,0 +1,184 @@ +`timescale 1ns / 1ps +// ============================================================================ +// __ +// \\__/ o\ (C) 2017-2018 Robert Finch, Waterloo +// \ __ / All rights reserved. +// \/_// robfinch@finitron.ca +// || +// +// FT64_pit.v +// - programmable interval timer +// +// +// This source file is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This source file is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// +// +// Reg Description +// 00 current count (read only) +// 04 max count (read-write) +// 08 on time (read-write) +// 0C control +// byte 0 for counter 0, byte 1 for counter 1, byte 2 for counter 2 +// bit in byte +// 0 = 1 = load, automatically clears +// 1 = 1 = enable counting, 0 = disable counting +// 2 = 1 = auto-reload on terminal count, 0 = no reload +// 3 = 1 = use external clock, 0 = internal clk_i +// 4 = 1 = use gate to enable count, 0 = ignore gate +// 10 current count 1 +// 14 max count 1 +// 18 on time 1 +// 20 current count 2 +// 24 max count 2 +// 28 on time 2 +// +// - all three counter controls can be written at the same time with a +// single instruction allowing synchronization of the counters. +// ============================================================================ +// +module FT64_pit(rst_i, clk_i, cs_i, cyc_i, stb_i, ack_o, sel_i, we_i, adr_i, dat_i, dat_o, + clk0, gate0, out0, clk1, gate1, out1, clk2, gate2, out2 + ); +input rst_i; +input clk_i; +input cs_i; +input cyc_i; +input stb_i; +output ack_o; +input [3:0] sel_i; +input we_i; +input [5:0] adr_i; +input [31:0] dat_i; +output reg [31:0] dat_o; +input clk0; +input gate0; +output out0; +input clk1; +input gate1; +output out1; +input clk2; +input gate2; +output out2; + +integer n; +reg [31:0] maxcount [0:2]; +reg [31:0] count [0:2]; +reg [31:0] ont [0:2]; +wire [2:0] gate; +wire [2:0] pulse; +reg ld [0:2]; +reg ce [0:2]; +reg ar [0:2]; +reg ge [0:2]; +reg xc [0:2]; +reg out [0:2]; + +wire cs = cyc_i & stb_i & cs_i; +reg rdy; +always @(posedge clk_i) + rdy <= cs; +assign ack_o = cs ? (we_i ? 1'b1 : rdy) : 1'b0; + +assign out0 = out[0]; +assign out1 = out[1]; +assign out2 = out[2]; +assign gate[0] = gate0; +assign gate[1] = gate1; +assign gate[2] = gate2; + +edge_det ued0 (.rst(rst_i), .clk(clk_i), .ce(1'b1), .i(clk0), .pe(pulse[0]), .ne()); +edge_det ued1 (.rst(rst_i), .clk(clk_i), .ce(1'b1), .i(clk1), .pe(pulse[1]), .ne()); +edge_det ued2 (.rst(rst_i), .clk(clk_i), .ce(1'b1), .i(clk2), .pe(pulse[2]), .ne()); + +initial begin + for (n = 0; n < 3; n = n + 1) begin + maxcount[n] <= 32'd0; + count[n] <= 32'd0; + ont[n] <= 32'd0; + ld[n] <= 1'b0; + ce[n] <= 1'b0; + ar[n] <= 1'b0; + ge[n] <= 1'b0; + xc[n] <= 1'b0; + out[n] <= 1'b0; + end +end + +always @(posedge clk_i) +if (rst_i) begin + for (n = 0; n < 3; n = n + 1) begin + ld[n] <= 1'b0; + ce[n] <= 1'b0; + ar[n] <= 1'b1; + ge[n] <= 1'b0; + out[n] <= 1'b0; + end +end +else begin + for (n = 0; n < 3; n = n + 1) begin + ld[n] <= 1'b0; + if (cs && we_i && adr_i[5:4]==n) + case(adr_i[3:2]) + 2'd1: maxcount[n] <= dat_i; + 2'd2: ont[n] <= dat_i; + 2'd3: begin + if (sel_i[0]) begin + ld[0] <= dat_i[0]; + ce[0] <= dat_i[1]; + ar[0] <= dat_i[2]; + xc[0] <= dat_i[3]; + ge[0] <= dat_i[4]; + end + if (sel_i[1]) begin + ld[1] <= dat_i[8]; + ce[1] <= dat_i[9]; + ar[1] <= dat_i[10]; + xc[1] <= dat_i[11]; + ge[1] <= dat_i[12]; + end + if (sel_i[2]) begin + ld[2] <= dat_i[16]; + ce[2] <= dat_i[17]; + ar[2] <= dat_i[18]; + xc[2] <= dat_i[19]; + ge[2] <= dat_i[20]; + end + end + endcase + if (adr_i[5:4]==n) + case(adr_i[3:2]) + 2'd0: dat_o <= count[n]; + 2'd1: dat_o <= maxcount[n]; + 2'd2: dat_o <= ont[n]; + 2'd3: dat_o <= {ge[2],xc[2],ar[2],ce[2],4'b0,ge[1],xc[1],ar[1],ce[1],4'b0,ge[0],xc[0],ar[0],ce[0],1'b0}; + endcase + + if (ld[n]) + count[n] <= maxcount[n]; + else if ((xc[n] ? pulse[n] & ce[n] : ce[n]) & (ge[n] ? gate[n] : 1'b1)) begin + count[n] <= count[n] - 32'd1; + if (count[n]==ont[n]) + out[n] <= 1'b1; + else if (count[n]==32'd0) begin + out[n] <= 1'b0; + if (ar[n]) + count[n] <= maxcount[n]; + else + ce[n] <= 1'b0; + end + end + end +end + +endmodule Index: thor/trunk/FT64v7/rtl/common/FT64_pmmu.v =================================================================== --- thor/trunk/FT64v7/rtl/common/FT64_pmmu.v (nonexistent) +++ thor/trunk/FT64v7/rtl/common/FT64_pmmu.v (revision 60) @@ -0,0 +1,715 @@ +// ============================================================================ +// __ +// \\__/ o\ (C) 2007-2018 Robert Finch, Waterloo +// \ __ / All rights reserved. +// \/_// robfinch@finitron.ca +// || +// +// FT64_pmmu.v +// - 64 bit CPU paged memory management unit +// - 512 entry TLB, 8 way associative +// - variable page table depth +// - address short-cutting for larger page sizes (8MB) +// - hardware clearing of access bit +// +// This source file is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This source file is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// +// ============================================================================ +// +`ifndef TRUE +`define TRUE 1'b1 +`define FALSE 1'b0 +`endif +`define _8MBPG 5 + +module FT64_pmmu +#( +parameter + AMSB = 31, + pAssociativity = 8, // number of ways (parallel compares) + pTLB_size = 64, + S_WAIT_MISS = 0, + S_WR_PTL0L = 1, + S_WR_PTL0H = 2, + S_RD_PTL0L = 3, + S_RD_PTL0H = 4, + S_RD_PTL1L = 5, + S_RD_PTL1H = 6, + S_RD_PTL2 = 7, + S_RD_PTL3 = 8, + S_RD_PTL4 = 9, + S_RD_PTL5 = 10, + S_RD_PTL5_ACK = 11, + S_RD_PTL = 12, + S_WR_PTL = 13 +) +( +// syscon +input rst_i, +input clk_i, + +input age_tick_i, // indicates when to age reference counts + +// master +output reg m_cyc_o, // valid memory address +output reg m_lock_o, // lock the bus +input m_ack_i, // acknowledge from memory system +output reg m_we_o, // write enable output +output reg [15:0] m_sel_o, // lane selects (always all active) +output reg [AMSB:0] m_adr_o, +input [127:0] m_dat_i, // data input from memory +output reg [127:0] m_dat_o, // data to memory + +// Translation request / control +input invalidate, // invalidate a specific entry +input invalidate_all, // causes all entries to be invalidated +input [47:0] pta, // page directory/table address register +output reg page_fault, + +input [7:0] asid_i, +input [7:0] pl_i, +input [1:0] ol_i, // operating level +input icl_i, // instruction cache load +input cyc_i, +input we_i, // cpu is performing write cycle +input [7:0] sel_i, +input [63:0] vadr_i, // virtual address to translate + +output reg cyc_o, +output reg we_o, +output reg [7:0] sel_o, +output reg [AMSB:0] padr_o, // translated address +output reg cac_o, // cachable +output reg prv_o, // privilege violation +output reg exv_o, // execute violation +output reg rdv_o, // read violation +output reg wrv_o // write violation +); + +integer nn; +reg [8:0] tlb_wa; +reg [8:0] tlb_ra; +reg [8:0] tlb_ua; +reg [AMSB:0] tmpadr; +reg pv_o; +reg v_o; +reg r_o; +reg w_o; +reg x_o; +reg c_o; +reg a_o; +reg [2:0] nnx; +reg [127:0] pte; // holding place for data +reg [AMSB-4:0] pte_adr; +reg [3:0] state; +reg [3:0] stkstate; +reg [2:0] cnt; // tlb replacement counter +reg [2:0] whichSet; // which set to update +reg dbit; // temp dirty bit +reg miss; +reg proc; +reg [63:0] miss_adr; +wire pta_changed; +assign ack_o = !miss||page_fault; +wire pgen = pta[11]; + +wire [AMSB:0] tlb_pte_adr [pAssociativity-1:0]; +wire [pAssociativity-1:0] tlb_d; +wire [ 6: 0] tlb_flags [pAssociativity-1:0]; +wire [ 7: 0] tlb_pl [pAssociativity-1:0]; +wire [ 7: 0] tlb_asid [pAssociativity-1:0]; +wire [31: 0] tlb_refcount [pAssociativity-1:0]; +wire tlb_g [pAssociativity-1:0]; +wire [63:19] tlb_vadr [pAssociativity-1:0]; +wire [34:0] tlb_tadr [pAssociativity-1:0]; + +//wire wr_tlb = state==S_WR_PTL0; +reg wr_tlb; +always @(posedge clk_i) + cyc_o <= cyc_i & v_o & ~pv_o; +always @(posedge clk_i) + we_o <= we_i & v_o & ~pv_o & w_o; +always @(posedge clk_i) + sel_o <= sel_i & {8{~pv_o}}; +always @(posedge clk_i) + prv_o <= pv_o & v_o && ol_i!=2'b00; +always @(posedge clk_i) + exv_o <= icl_i & v_o & ~x_o && ol_i!=2'b00; +always @(posedge clk_i) + rdv_o <= ~icl_i & v_o & ~r_o && ol_i!=2'b00; +always @(posedge clk_i) + wrv_o <= ~icl_i & v_o & ~w_o && ol_i!=2'b00; +always @(posedge clk_i) + cac_o <= c_o & v_o; + +genvar g; +generate + for (g = 0; g < pAssociativity; g = g + 1) + begin : genTLB + ram_ar1w1r #(45,pTLB_size) tlbVadr + ( + .clk(clk_i), + .ce(whichSet==g), + .we(wr_tlb), + .wa(miss_adr[18:13]), + .ra(vadr_i[18:13]), + .i(miss_adr[63:19]), + .o(tlb_vadr[g]) + ); + ram_ar1w1r #(AMSB+1,pTLB_size) tlbPteAdr + ( + .clk(clk_i), + .ce(whichSet==g), + .we(wr_tlb), + .wa(miss_adr[18:13]), + .ra(vadr_i[18:13]), + .i(pte_adr), + .o(tlb_pte_adr[g]) + ); + ram_ar1w1r #( 7,pTLB_size) tlbFlag + ( + .clk(clk_i), + .ce(whichSet==g), + .we(wr_tlb), + .wa(miss_adr[18:13]), + .ra(vadr_i[18:13]), + .i(pte[6:0]), + .o(tlb_flags[g]) + ); + ram_ar1w1r #(8,pTLB_size) tlbPL + ( + .clk(clk_i), + .ce(whichSet==g), + .we(wr_tlb), + .wa(miss_adr[18:13]), + .ra(vadr_i[18:13]), + .i(pte[15:8]), + .o(tlb_pl[g]) + ); + ram_ar1w1r #( 1,pTLB_size) tlbG + ( + .clk(clk_i), + .ce(whichSet==g), + .we(wr_tlb), + .wa(miss_adr[18:13]), + .ra(vadr_i[18:13]), + .i(pte[23]), + .o(tlb_g[g]) + ); + ram_ar1w1r #(8,pTLB_size) tlbASID + ( + .clk(clk_i), + .ce(whichSet==g), + .we(wr_tlb), + .wa(miss_adr[18:13]), + .ra(vadr_i[18:13]), + .i(pte[31:24]), + .o(tlb_asid[g]) + ); + ram_ar1w1r #(32,pTLB_size) tlbRefCount + ( + .clk(clk_i), + .ce(whichSet==g), + .we(wr_tlb), + .wa(miss_adr[18:13]), + .ra(vadr_i[18:13]), + .i(pte[63:32]), + .o(tlb_refcount[g]) + ); + ram_ar1w1r #(32,pTLB_size) tlbRefCount + ( + .clk(clk_i), + .ce(wr_tlb?whichSet==g:nnx==g), + .we(wr_tlb||state==S_WAIT_MISS && !miss && cyc_i), + .wa(wr_tlb?miss_adr[18:13]:vadr_i[18:13]), + .ra(vadr_i[18:13]), + .i(pte[63:32]), + .o(tlb_refcount[g]) + ); + ram_ar1w1r #(35,pTLB_size) tlbTadr + ( + .clk(clk_i), + .ce(whichSet==g), + .we(wr_tlb), + .wa(miss_adr[18:13]), + .ra(vadr_i[18:13]), + .i(pte[98:64]), + .o(tlb_tadr[g]) + ); + ram_ar1w1r #( 1,pTLB_size) tlbD + ( + .clk(clk_i), + .ce(wr_tlb?whichSet==g:nnx==g), + .we(wr_tlb||state==S_WAIT_MISS && wr && !miss && cyc_i), + .wa(wr_tlb?miss_adr[18:13]:vadr_i[18:13]), + .ra(vadr_i[18:13]), + .i(!wr_tlb), + .o(tlb_d[g]) + ); + end +endgenerate + +reg [pAssociativity*pTLB_size-1:0] tlb_v; // valid + +// The following reg allows detection of when the page table address changes +change_det #(48) u1 +( + .rst(rst_i), + .clk(clk_i), + .ce(1'b1), + .i(pta), + .cd(pta_changed) +); + +// This must be fast !!! +// Lookup the virtual address in the tlb +// Translate the address +// I/O and system BIOS addresses are not mapped +// Cxxx_xxxx_xxxx_xxxx to FFFF_FFFF_FFFF_FFFF not mapped (kernel segment) +// 0000_0000_0000_0000 to 0000_0000_0000_xxxx not mapped (kernel data segement) +always @(posedge clk_i) +begin + miss <= 1; + nnx <= pAssociativity; + a_o <= 1; + c_o <= 1; + r_o <= 1; + x_o <= 1; + w_o <= 1; + v_o <= 0; + pv_o <= 0; + padr_o[12: 0] <= vadr_i[12: 0]; + padr_o[47:13] <= vadr_i[47:13]; + if (vadr_i[63:16]==48'h0 || vadr_i[63:20]==44'hFFFF_FFFF_FFD) begin + miss <= 0; + c_o <= 1; + v_o <= 1; + end + else if (&vadr_i[47:46]) begin + miss <= 0; + c_o <= vadr_i[45:44]==2'b00; // C000_0000_0000 to CFFF_FFFF_FFFF is cacheable + v_o <= 1; + end + else begin + if (!pgen) begin + miss <= 0; + v_o <= 1; + end + else + for (nn = 0; nn < pAssociativity; nn = nn + 1) + if (tlb_v[{nn,vadr_i[18:13]}] && vadr_i[63:19]==tlb_vadr[nn]) begin + if (tlb_flags[nn][`_8MBPG]) + padr_o[47:13] <= {tlb_tadr[nn][34:10],vadr_i[22:13]}; + else + padr_o[47:13] <= tlb_tadr[nn]; + miss <= 1'b0; + nnx <= nn; + a_o <= tlb_flags[nn][4]; + c_o <= tlb_flags[nn][3]; + r_o <= tlb_flags[nn][2]; + w_o <= tlb_flags[nn][1]; + x_o <= tlb_flags[nn][0]; + v_o <= tlb_flags[nn][2]|tlb_flags[nn][1]|tlb_flags[nn][0]; + pv_o <= (cyc_i & icl_i) ? pl != tlb_pl[nn] && pl!=8'h00 : pl > tlb_pl[nn]; + end + end +end + +reg age_tick_r; +wire pe_age_rtick; +edge_det ued1(.clk(clk_i), .ce(1'b1), .i(age_tick), .pe(pe_age_tick), .ne(), .ee()); + +// The following state machine loads the tlb buffer on a +// miss. +always @(posedge clk_i) +if (rst_i) begin + nack(); + wr_tlb <= 1'b0; + m_adr_o <= 1'b0; + goto(S_WAIT_MISS); + dbit <= 1'b0; + whichSet <= 1'b0; + for (nn = 0; nn < pAssociativity * pTLB_size; nn = nn + 1) + tlb_v[nn] <= 1'b0; // all entries are invalid on reset + page_fault <= `FALSE; + age_tick_r <= 1'b0; +end +else begin + wr_tlb <= 1'b0; + + // page fault pulses + page_fault <= `FALSE; + + if (pe_age_tick) + age_tick_r <= 1'b1; + + // changing the address of the page table invalidates all entries + if (invalidate_all) + for (nn = 0; nn < pAssociativity * pTLB_size; nn = nn + 1) + tlb_v[nn] <= 1'b0; + + // handle invalidate command + if (invalidate) + for (nn = 0; nn < pAssociativity; nn = nn + 1) + if (vadr_i[63:19]==tlb_vadr[nn] && (tlb_g[nn] || tlb_asid[nn]==asid_i)) + tlb_v[{nn,vadr_i[18:13]}] <= 1'b0; + + case (state) // synopsys full_case parallel_case + + // Wait for a miss to occur. then initiate bus cycle + // Output either the page directory address + // or the page table address, depending on the + // size of the app. + S_WAIT_MISS: + begin + goto(S_WAIT_MISS); + dbit <= we_i; + proc <= `FALSE; + + if (miss) begin + proc <= `TRUE; + miss_adr <= vadr_i; + // try and pick an empty tlb entry + whichSet <= cnt; + for (nn = 0; nn < pAssociativity; nn = nn + 1) + if (!tlb_v[{nn,vadr_i[18:13]}]) + whichSet <= nn; + goto(S_RD_PTL5); + end + // If there's a write cycle, check to see if the + // dirty bit is set. If the dirty bit hasn't been + // set yet, then set it and write the dirty status + // to memory. + else if (cyc_i && we_i && !tlb_d[nnx]) begin + miss_adr <= vadr_i; + whichSet <= nnx; + goto(S_RD_PTL5); + end + else if (age_tick_r) begin + age_tick_r <= 1'b0; + tlb_wa <= tlb_ua + 3'd1; + tlb_ra <= tlb_ua + 3'd1; + tlb_ua <= tlb_ua + 3'd1; + goto(S_AGE); + end + else begin + tlb_wa <= {nnx,vadr_i[18:13]}; + tlb_ra <= {nnx,vadr_i[18:13]}; + goto(S_COUNT); + end + end + + S_RD_PTL5: + if (~m_ack_i & ~m_cyc_o) begin + tlb_ra <= {whichSet,miss_adr[18:13]}; + tlb_wa <= {whichSet,miss_adr[18:13]}; + m_cyc_o <= 1'b1; + m_sel_o <= 8'hFF; + m_lock_o <= 1'b0; + m_we_o <= 1'b0; + case(pta[10:8]) + 3'd0: state <= S_RD_PTL0L; + 3'd1: state <= S_RD_PTL1L; + 3'd2: state <= S_RD_PTL2; + 3'd3: state <= S_RD_PTL3; + 3'd4: state <= S_RD_PTL4; + 3'd5: state <= S_RD_PTL5_ACK; + default: ; + endcase + // Set page table address for lookup + case(pta[10:8]) + 3'b000: m_adr_o <= {pta[47:14],miss_adr[22:13],4'h0}; // 8MB translations + 3'b001: m_adr_o <= {pta[47:14],miss_adr[32:23],4'h0}; // 8GB translations + 3'b010: m_adr_o <= {pta[47:14],miss_adr[42:33],4'h8}; // 8TB translations + 3'b011: m_adr_o <= {pta[47:14],miss_adr[52:43],4'h8}; // 8XB translations + 3'b100: m_adr_o <= {pta[47:14],miss_adr[62:53],4'h8}; // translations + 3'b101: m_adr_o <= {pta[47:14],9'b00,miss_adr[63],4'h8}; // translations + default: ; + endcase + end + // Wait for ack from system + // Setup to access page table + // If app uses a page directory, now address the page table + S_RD_PTL5_ACK: + if (m_ack_i) begin + nack(); + if (|m_dat_i[2:0]) begin // pte valid bit + tmpadr <= {m_dat_i[33:0],miss_adr[62:53],4'h8}; + call(S_RD_PTL,S_RD_PTL4); + end + else begin + if (clock) begin + clock_adr[64:63] <= clock_adr[64:63] + 4'h1; + clock_adr[62:0] <= 4'h0; + goto (S_WAIT_MISS); + end + else + raise_page_fault(); + // not a valid translation + // OS messed up ? + end + end + + // Wait for ack from system + // Setup to access page table + // If app uses a page directory, now address the page table + S_RD_PTL4: + if (m_ack_i) begin + nack(); + if (|m_dat_i[2:0]) begin // pte valid bit + tmpadr <= {m_dat_i[50:16],miss_adr[52:43],3'b0}; + call(S_RD_PTL,S_RD_PTL3); + end + else begin + if (clock) begin + clock_adr[64:53] <= clock_adr[64:53] + 4'h1; + clock_adr[52:0] <= 4'h0; + goto (S_WAIT_MISS); + end + else + raise_page_fault(); + end + end + + // Wait for ack from system + // Setup to access page table + // If app uses a page directory, now address the page table + S_RD_PTL3: + if (m_ack_i) begin + nack(); + if (|m_dat_i[2:0]) begin // pte valid bit + tmpadr <= {m_dat_i[50:16],miss_adr[42:33],3'b0}; + call(S_RD_PTL,S_RD_PTL2); + end + else begin + if (clock) begin + clock_adr[64:43] <= clock_adr[64:43] + 4'h1; + clock_adr[32:0] <= 4'h0; + goto (S_WAIT_MISS); + end + else + raise_page_fault(); + end + end + + // Wait for ack from system + // Setup to access page table + // If app uses a page directory, now address the page table + S_RD_PTL2: + if (m_ack_i) begin + nack(); + if (|m_dat_i[2:0]) begin // pte valid bit + tmpadr <= {m_dat_i[50:16],miss_adr[32:23],3'b0}; + call(S_RD_PTL,S_RD_PTL1); + end + else begin + if (clock) begin + clock_adr[64:33] <= clock_adr[64:33] + 4'h1; + clock_adr[32:0] <= 4'h0; + goto (S_WAIT_MISS); + end + else + raise_page_fault(); + end + end + + // Wait for ack from system + // Setup to access page table + // If app uses a page directory, now address the page table + S_RD_PTL1: + if (m_ack_i) begin + nack(); + if (|m_dat_i[2:0]) begin // pte valid bit + // Shortcut 8MiB page ? + if (m_dat_i[`_8MBPG]) begin + pte <= m_dat_i; + m_dat_o <= m_dat_i|{dbit,2'b00,~clock,4'b0}; + m_dat_o[4] <= ~clock; + call(S_WR_PTL,S_WR_PTL0); + end + else begin + tmpadr <= {m_dat_i[50:16],miss_adr[22:13],3'b0}; + call(S_RD_PTL,S_RD_PTL0); + end + end + else begin + if (clock) begin + clock_adr[64:23] <= clock_adr[64:23] + 4'h1; + clock_adr[22:0] <= 4'h0; + goto (S_WAIT_MISS); + end + else + raise_page_fault(); + end + end + + //--------------------------------------------------- + // This section of the state machine performs a + // read then write of a PTE + //--------------------------------------------------- + // Perform a read cycle of page table level 0 entry + S_RD_PTL0: + // The tlb has been updated so the page must have been accessed + // set the accessed bit for the page table entry + // Also set dirty bit if a write access. + if (m_ack_i) begin + nack(); + tlb_wr <= 1'b1; + pte_adr <= m_adr_o[AMSB:4]; + m_dat_o <= m_dat_i|{dbit,2'b00,1'b1,4'b0}; // This line will only set bits + pte <= m_dat_i|{dbit,2'b00,1'b1,4'b0}; + // If the tlb entry is already marked dirty don't bother with updating + // the pte in memory. Only write on a new dirty status. + if (tlb_d[tlb_ra[8:6]]) + goto(S_WAIT_MISS); + else + call(S_WR_PTL,S_WR_PTL0); + end + + S_WR_PTL0: + if (m_ack_i) begin + tlb_wr <= 1'b1; + nack(); + tlb_v[tlb_wa] <= |pte[2:0]; + if (~|pte[2:0]) + raise_page_fault(); + goto(S_WAIT_MISS); + end + + //--------------------------------------------------- + // Take care of reference counting and aging. + //--------------------------------------------------- + + S_COUNT: + begin + pte[6:0] <= tlb_flags[tlb_ra[8:6]]; + pte[7] <= tlb_d[tlb_ra[8:6]]; + pte[15:8] <= tlb_pl[tlb_ra[8:6]]; + pte[23] <= tlb_g[tlb_ra[8:6]]; + pte[31:24] <= tlb_asid[tlb_ra[8:6]]; + pte[63:32] <= {tlb_refcount[tlb_ra[8:6]][63:42] + 4'd1,tlb_refcount[tlb_ra[8:6]][41:32]}; + pte[127:64] <= tlb_tadr[tlb_ra[8:6]]; + tlb_wr <= 1'b1; + goto(S_WAIT_MISS); + end + + S_AGE: + begin + pte[6:0] <= tlb_flags[tlb_ra[8:6]]; + pte[7] <= tlb_d[tlb_ra[8:6]]; + pte[15:8] <= tlb_pl[tlb_ra[8:6]]; + pte[23] <= tlb_g[tlb_ra[8:6]]; + pte[31:24] <= tlb_asid[tlb_ra[8:6]]; + pte[63:32] <= {1'b0,tlb_refcount[tlb_ra[8:6]][63:33]}; + pte[127:64] <= tlb_tadr[tlb_ra[8:6]]; + tlb_wr <= 1'b1; + goto(S_WAIT_MISS); + end + + //--------------------------------------------------- + // Subroutine: initiate read cycle + //--------------------------------------------------- + S_RD_PTL: + if (~m_ack_i & ~m_cyc_o) begin + m_cyc_o <= 1'b1; + m_sel_o <= 8'hFF; + m_lock_o <= 1'b0; + m_we_o <= 1'b0; + m_adr_o <= tmpadr; + return(); + end + + //--------------------------------------------------- + // Subroutine: initiate write cycle + //--------------------------------------------------- + S_WR_PTL: + if (~m_ack_i & ~m_cyc_o) begin + m_cyc_o <= 1'b1; + m_sel_o <= 8'hFF; + m_lock_o <= 1'b0; + m_we_o <= 1'b1; + // Address comes from a previous read address +// m_adr_o <= tmpadr; + return(); + end + + //--------------------------------------------------- + // This state can't happen without a hardware error + //--------------------------------------------------- + default: + begin + nack(); + goto(S_WAIT_MISS); + end + + endcase +end + + +// This counter is used to select the tlb entry that gets +// replaced when a new entry is entered into the buffer. +// It just increments every time an entry is updated. +always @(posedge clk_i) +if (rst_i) + cnt <= 0; +else if (state==S_WAIT_MISS && miss) begin + if (cnt == pAssociativity-1) + cnt <= 0; + else + cnt <= cnt + 1; +end + +task nack; +begin + m_cyc_o <= 1'b0; + m_sel_o <= 8'h00; + m_lock_o <= 1'b0; + m_we_o <= 1'b0; +end +endtask + +task raise_page_fault; +begin + nack(); + if (proc) + page_fault <= `TRUE; + proc <= `FALSE; + state <= S_WAIT_MISS; +end +endtask + +task goto; +input [3:0] nst; +begin + state <= nst; +end +endtask + +task call; +input [3:0] nst; +input [3:0] rst; +begin + goto(nst); + stkstate <= rst; +end +endtask + +task return; +begin + state <= stkstate; +end +endtask + +endmodule + Index: thor/trunk/FT64v7/rtl/common/FT64_shift.v =================================================================== --- thor/trunk/FT64v7/rtl/common/FT64_shift.v (nonexistent) +++ thor/trunk/FT64v7/rtl/common/FT64_shift.v (revision 60) @@ -0,0 +1,135 @@ +`timescale 1ns / 1ps +// ============================================================================ +// __ +// \\__/ o\ (C) 2016-2018 Robert Finch, Waterloo +// \ __ / All rights reserved. +// \/_// robfinch@finitron.ca +// || +// +// FT64_shift.v +// +// +// This source file is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This source file is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// +// +// ============================================================================ +// +//`ifndef SHL +`define IVECTOR 6'h01 +`define VSHL 6'h0C +`define VSHR 6'h0D +`define VASR 6'h0E +`define RR 6'h02 +`define SHIFTR 6'h2F +`define SHIFT31 6'h0F +`define SHIFT63 6'h1F +`define AMO 6'h2F +`define AMOSHL 6'h0C +`define AMOSHR 6'h0D +`define AMOASR 6'h0E +`define AMOROL 6'h0F +`define AMOSHLI 6'h2C +`define AMOSHRI 6'h2D +`define AMOASRI 6'h2E +`define AMOROLI 6'h2F +`define SHL 3'h0 +`define SHR 3'h1 +`define ASL 3'h2 +`define ASR 3'h3 +`define ROL 3'h4 +`define ROR 3'h5 +//`endif +`define HIGHWORD 127:64 + +module FT64_shift(instr, a, b, res, ov); +parameter DMSB=63; +parameter SUP_VECTOR = 1; +input [47:0] instr; +input [DMSB:0] a; +input [DMSB:0] b; +output [DMSB:0] res; +reg [DMSB:0] res; +output ov; +parameter ROTATE_INSN = 1; + +wire [5:0] opcode = instr[5:0]; +wire [5:0] func = instr[31:26]; +wire [2:0] shiftop = instr[25:23]; + +wire [127:0] shl = {64'd0,a} << b[5:0]; +wire [127:0] shr = {a,64'd0} >> b[5:0]; + +assign ov = shl[127:64] != {64{a[63]}}; + +always @* +case(opcode) +`IVECTOR: + if (SUP_VECTOR) + case(func) + `VSHL: res <= shl[DMSB:0]; + `VSHR: res <= shr[`HIGHWORD]; + `VASR: if (a[DMSB]) + res <= (shr[`HIGHWORD]) | ~({64{1'b1}} >> b[5:0]); + else + res <= shr[`HIGHWORD]; + default: res <= 64'd0; + endcase + else + res <= 64'd0; +`RR: + case(func) + `SHIFTR: + case(shiftop) + `SHL,`ASL: res <= shl[DMSB:0]; + `SHR: res <= shr[`HIGHWORD]; + `ASR: if (a[DMSB]) + res <= (shr[`HIGHWORD]) | ~({64{1'b1}} >> b[5:0]); + else + res <= shr[`HIGHWORD]; + `ROL: res <= ROTATE_INSN ? shl[63:0]|shl[`HIGHWORD] : 64'hDEADDEADDEAD; + `ROR: res <= ROTATE_INSN ? shr[63:0]|shr[`HIGHWORD] : 64'hDEADDEADDEAD; + default: res <= 64'd0; + endcase + `SHIFT31, + `SHIFT63: + case(shiftop) + `SHL,`ASL:res <= shl[DMSB:0]; + `SHR: res <= shr[`HIGHWORD]; + `ASR: if (a[DMSB]) + res <= (shr[`HIGHWORD]) | ~({64{1'b1}} >> b[5:0]); + else + res <= shr[`HIGHWORD]; + `ROL: res <= ROTATE_INSN ? shl[63:0]|shl[`HIGHWORD] : 64'hDEADDEADDEAD; + `ROR: res <= ROTATE_INSN ? shr[63:0]|shr[`HIGHWORD] : 64'hDEADDEADDEAD; + default: res <= 64'd0; + endcase + default: res <= 64'd0; + endcase +`AMO: + case(func) + `AMOSHL,`AMOSHLI: res <= shl[DMSB:0]; + `AMOSHR,`AMOSHRI: res <= shr[`HIGHWORD]; + `AMOASR,`AMOASRI: if (a[DMSB]) + res <= (shr[`HIGHWORD]) | ~({64{1'b1}} >> b[5:0]); + else + res <= shr[`HIGHWORD]; + `AMOROL: res <= ROTATE_INSN ? shl[63:0]|shl[`HIGHWORD] : 64'hDEADDEADDEAD; + `AMOROLI: res <= ROTATE_INSN ? shl[63:0]|shl[`HIGHWORD] : 64'hDEADDEADDEAD; + default: res <= 64'd0; + endcase +default: res <= 64'd0; +endcase + +endmodule + Index: thor/trunk/FT64v7/rtl/common/FT64_shiftb.v =================================================================== --- thor/trunk/FT64v7/rtl/common/FT64_shiftb.v (nonexistent) +++ thor/trunk/FT64v7/rtl/common/FT64_shiftb.v (revision 60) @@ -0,0 +1,77 @@ +`timescale 1ns / 1ps +// ============================================================================ +// __ +// \\__/ o\ (C) 2016-2018 Robert Finch, Waterloo +// \ __ / All rights reserved. +// \/_// robfinch@finitron.ca +// || +// +// FT64_shiftb.v +// +// +// This source file is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This source file is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// +// +// ============================================================================ +// +//`ifndef SHL +`define R2 6'h02 +`define SHL 3'h0 +`define SHR 3'h1 +`define ASL 3'h2 +`define ASR 3'h3 +`define ROL 3'h4 +`define ROR 3'h5 +//`endif +`define HIGHWORDB 15:8 + +module FT64_shiftb(instr, a, b, res, ov); +parameter DMSB=7; +input [47:0] instr; +input [DMSB:0] a; +input [DMSB:0] b; +output [DMSB:0] res; +reg [DMSB:0] res; +output ov; +parameter ROTATE_INSN = 1; + +wire [5:0] opcode = instr[5:0]; +wire [3:0] shiftop = instr[35:33]; +wire [2:0] bb = instr[29] ? instr[15:13] : b[2:0]; + +wire [15:0] shl = {8'd0,a} << bb[2:0]; +wire [15:0] shr = {a,8'd0} >> bb[2:0]; + +assign ov = 1'b0; + +always @* +case(opcode) +`R2: + if (instr[7:6]==2'b01) + case(shiftop) + `SHL,`ASL: res <= shl[DMSB:0]; + `SHR: res <= shr[`HIGHWORDB]; + `ASR: if (a[DMSB]) + res <= (shr[`HIGHWORDB]) | ~({8{1'b1}} >> bb[2:0]); + else + res <= shr[`HIGHWORDB]; + `ROL: res <= ROTATE_INSN ? shl[DMSB:0]|shl[`HIGHWORDB] : 8'hDE; + `ROR: res <= ROTATE_INSN ? shr[DMSB:0]|shr[`HIGHWORDB] : 8'hDE; + default: res <= 8'd0; + endcase +default: res <= 8'd0; +endcase + +endmodule + Index: thor/trunk/FT64v7/rtl/common/FT64_shiftc.v =================================================================== --- thor/trunk/FT64v7/rtl/common/FT64_shiftc.v (nonexistent) +++ thor/trunk/FT64v7/rtl/common/FT64_shiftc.v (revision 60) @@ -0,0 +1,76 @@ +`timescale 1ns / 1ps +// ============================================================================ +// __ +// \\__/ o\ (C) 2016-2018 Robert Finch, Waterloo +// \ __ / All rights reserved. +// \/_// robfinch@finitron.ca +// || +// +// FT64_shiftc.v +// +// +// This source file is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This source file is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// +// +// ============================================================================ +// +//`ifndef SHL +`define R2 6'h02 +`define SHL 3'h0 +`define SHR 3'h1 +`define ASL 3'h2 +`define ASR 3'h3 +`define ROL 3'h4 +`define ROR 3'h5 +//`endif +`define HIGHWORDC 31:16 + +module FT64_shiftc(instr, a, b, res, ov); +parameter DMSB=15; +input [47:0] instr; +input [DMSB:0] a; +input [DMSB:0] b; +output [DMSB:0] res; +reg [DMSB:0] res; +output ov; +parameter ROTATE_INSN = 1; + +wire [5:0] opcode = instr[5:0]; +wire [5:0] func = instr[31:26]; +wire [3:0] shiftop = instr[35:33]; +wire [3:0] bb = instr[29] ? instr[16:13] : b[3:0]; +wire [31:0] shl = {16'd0,a} << bb; +wire [31:0] shr = {a,16'd0} >> bb; + +assign ov = 1'b0; + +always @* +case(opcode) +`RR: + case(shiftop) + `SHL,`ASL: res <= shl[DMSB:0]; + `SHR: res <= shr[`HIGHWORDC]; + `ASR: if (a[DMSB]) + res <= (shr[`HIGHWORDC]) | ~({16{1'b1}} >> bb); + else + res <= shr[`HIGHWORDC]; + `ROL: res <= ROTATE_INSN ? shl[DMSB:0]|shl[`HIGHWORDC] : 16'hDEAD; + `ROR: res <= ROTATE_INSN ? shr[DMSB:0]|shr[`HIGHWORDC] : 16'hDEAD; + default: res <= 16'd0; + endcase +default: res <= 16'd0; +endcase + +endmodule + Index: thor/trunk/FT64v7/rtl/common/FT64_shifth.v =================================================================== --- thor/trunk/FT64v7/rtl/common/FT64_shifth.v (nonexistent) +++ thor/trunk/FT64v7/rtl/common/FT64_shifth.v (revision 60) @@ -0,0 +1,97 @@ +`timescale 1ns / 1ps +// ============================================================================ +// __ +// \\__/ o\ (C) 2016-2018 Robert Finch, Waterloo +// \ __ / All rights reserved. +// \/_// robfinch@finitron.ca +// || +// +// FT64_shifth.v +// +// +// This source file is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This source file is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// +// +// ============================================================================ +// +//`ifndef SHL +`define R2 6'h02 +`define AMO 6'h2F +`define AMOSHL 6'h0C +`define AMOSHR 6'h0D +`define AMOASR 6'h0E +`define AMOROL 6'h0F +`define AMOSHLI 6'h2C +`define AMOSHRI 6'h2D +`define AMOASRI 6'h2E +`define AMOROLI 6'h2F +`define SHL 3'h0 +`define SHR 3'h1 +`define ASL 3'h2 +`define ASR 3'h3 +`define ROL 3'h4 +`define ROR 3'h5 +//`endif +`define HIGHWORDH 63:32 + +module FT64_shifth(instr, a, b, res, ov); +parameter DMSB=31; +input [47:0] instr; +input [DMSB:0] a; +input [DMSB:0] b; +output [DMSB:0] res; +reg [DMSB:0] res; +output ov; +parameter ROTATE_INSN = 1; + +wire [5:0] opcode = instr[5:0]; +wire [5:0] func = instr[31:26]; +wire [3:0] shiftop = instr[35:33]; +wire [4:0] bb = instr[29] ? instr[17:13] : b[4:0]; +wire [63:0] shl = {32'd0,a} << bb; +wire [63:0] shr = {a,32'd0} >> bb; + +assign ov = 1'b0; + +always @* +case(opcode) +`R2: + case(shiftop) + `SHL,`ASL: res <= shl[DMSB:0]; + `SHR: res <= shr[`HIGHWORDH]; + `ASR: if (a[DMSB]) + res <= (shr[`HIGHWORDH]) | ~({32{1'b1}} >> bb); + else + res <= shr[`HIGHWORDH]; + `ROL: res <= ROTATE_INSN ? shl[DMSB:0]|shl[`HIGHWORDH] : 32'hDEADDEAD; + `ROR: res <= ROTATE_INSN ? shr[DMSB:0]|shr[`HIGHWORDH] : 32'hDEADDEAD; + default: res <= 32'd0; + endcase +`AMO: + case(func) + `AMOSHL,`AMOSHLI: res <= shl[DMSB:0]; + `AMOSHR,`AMOSHRI: res <= shr[`HIGHWORDH]; + `AMOASR,`AMOASRI: if (a[DMSB]) + res <= (shr[`HIGHWORDH]) | ~({32{1'b1}} >> b[4:0]); + else + res <= shr[`HIGHWORDH]; + `AMOROL: res <= ROTATE_INSN ? shl[DMSB:0]|shl[`HIGHWORDH] : 32'hDEADDEAD; + `AMOROLI: res <= ROTATE_INSN ? shl[DMSB:0]|shl[`HIGHWORDH] : 32'hDEADDEAD; + default: res <= 32'd0; + endcase +default: res <= 32'd0; +endcase + +endmodule + Index: thor/trunk/FT64v7/rtl/lib/BCDMath.v =================================================================== --- thor/trunk/FT64v7/rtl/lib/BCDMath.v (nonexistent) +++ thor/trunk/FT64v7/rtl/lib/BCDMath.v (revision 60) @@ -0,0 +1,291 @@ +`timescale 1ns / 1ps +//============================================================================= +// __ +// \\__/ o\ (C) 2012 Robert Finch +// \ __ / All rights reserved. +// \/_// robfinch@opencores.org +// || +// +// BCDMath.v +// +// +// This source file is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This source file is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// +// +//============================================================================= +// +module BCDAdd(ci,a,b,o,c); +input ci; // carry input +input [7:0] a; +input [7:0] b; +output [7:0] o; +output c; + +wire c0,c1; + +wire [4:0] hsN0 = a[3:0] + b[3:0] + ci; +wire [4:0] hsN1 = a[7:4] + b[7:4] + c0; + +BCDAddAdjust u1 (hsN0,o[3:0],c0); +BCDAddAdjust u2 (hsN1,o[7:4],c); + +endmodule + +module BCDAdd4(ci,a,b,o,c); +input ci; // carry input +input [15:0] a; +input [15:0] b; +output [15:0] o; +output c; + +wire c0,c1,c2; + +wire [4:0] hsN0 = a[3:0] + b[3:0] + ci; +wire [4:0] hsN1 = a[7:4] + b[7:4] + c0; +wire [4:0] hsN2 = a[11:8] + b[11:8] + c1; +wire [4:0] hsN3 = a[15:12] + b[15:12] + c2; + +BCDAddAdjust u1 (hsN0,o[3:0],c0); +BCDAddAdjust u2 (hsN1,o[7:4],c1); +BCDAddAdjust u3 (hsN2,o[11:8],c2); +BCDAddAdjust u4 (hsN3,o[15:12],c); + +endmodule + +module BCDSub(ci,a,b,o,c); +input ci; // carry input +input [7:0] a; +input [7:0] b; +output [7:0] o; +output c; + +wire c0,c1; + +wire [4:0] hdN0 = a[3:0] - b[3:0] - ci; +wire [4:0] hdN1 = a[7:4] - b[7:4] - c0; + +BCDSubAdjust u1 (hdN0,o[3:0],c0); +BCDSubAdjust u2 (hdN1,o[7:4],c); + +endmodule + +module BCDAddAdjust(i,o,c); +input [4:0] i; +output [3:0] o; +reg [3:0] o; +output c; +reg c; +always @(i) +case(i) +5'h0: begin o = 4'h0; c = 1'b0; end +5'h1: begin o = 4'h1; c = 1'b0; end +5'h2: begin o = 4'h2; c = 1'b0; end +5'h3: begin o = 4'h3; c = 1'b0; end +5'h4: begin o = 4'h4; c = 1'b0; end +5'h5: begin o = 4'h5; c = 1'b0; end +5'h6: begin o = 4'h6; c = 1'b0; end +5'h7: begin o = 4'h7; c = 1'b0; end +5'h8: begin o = 4'h8; c = 1'b0; end +5'h9: begin o = 4'h9; c = 1'b0; end +5'hA: begin o = 4'h0; c = 1'b1; end +5'hB: begin o = 4'h1; c = 1'b1; end +5'hC: begin o = 4'h2; c = 1'b1; end +5'hD: begin o = 4'h3; c = 1'b1; end +5'hE: begin o = 4'h4; c = 1'b1; end +5'hF: begin o = 4'h5; c = 1'b1; end +5'h10: begin o = 4'h6; c = 1'b1; end +5'h11: begin o = 4'h7; c = 1'b1; end +5'h12: begin o = 4'h8; c = 1'b1; end +5'h13: begin o = 4'h9; c = 1'b1; end +default: begin o = 4'h9; c = 1'b1; end +endcase +endmodule + +module BCDSubAdjust(i,o,c); +input [4:0] i; +output [3:0] o; +reg [3:0] o; +output c; +reg c; +always @(i) +case(i) +5'h0: begin o = 4'h0; c = 1'b0; end +5'h1: begin o = 4'h1; c = 1'b0; end +5'h2: begin o = 4'h2; c = 1'b0; end +5'h3: begin o = 4'h3; c = 1'b0; end +5'h4: begin o = 4'h4; c = 1'b0; end +5'h5: begin o = 4'h5; c = 1'b0; end +5'h6: begin o = 4'h6; c = 1'b0; end +5'h7: begin o = 4'h7; c = 1'b0; end +5'h8: begin o = 4'h8; c = 1'b0; end +5'h9: begin o = 4'h9; c = 1'b0; end +5'h16: begin o = 4'h0; c = 1'b1; end +5'h17: begin o = 4'h1; c = 1'b1; end +5'h18: begin o = 4'h2; c = 1'b1; end +5'h19: begin o = 4'h3; c = 1'b1; end +5'h1A: begin o = 4'h4; c = 1'b1; end +5'h1B: begin o = 4'h5; c = 1'b1; end +5'h1C: begin o = 4'h6; c = 1'b1; end +5'h1D: begin o = 4'h7; c = 1'b1; end +5'h1E: begin o = 4'h8; c = 1'b1; end +5'h1F: begin o = 4'h9; c = 1'b1; end +default: begin o = 4'h9; c = 1'b1; end +endcase +endmodule + +// Multiply two BCD digits +// Method used is table lookup +module BCDMul1(a,b,o); +input [3:0] a; +input [3:0] b; +output [7:0] o; +reg [7:0] o; + +always @(a or b) +casex({a,b}) +8'h00: o = 8'h00; +8'h01: o = 8'h00; +8'h02: o = 8'h00; +8'h03: o = 8'h00; +8'h04: o = 8'h00; +8'h05: o = 8'h00; +8'h06: o = 8'h00; +8'h07: o = 8'h00; +8'h08: o = 8'h00; +8'h09: o = 8'h00; +8'h10: o = 8'h00; +8'h11: o = 8'h01; +8'h12: o = 8'h02; +8'h13: o = 8'h03; +8'h14: o = 8'h04; +8'h15: o = 8'h05; +8'h16: o = 8'h06; +8'h17: o = 8'h07; +8'h18: o = 8'h08; +8'h19: o = 8'h09; +8'h20: o = 8'h00; +8'h21: o = 8'h02; +8'h22: o = 8'h04; +8'h23: o = 8'h06; +8'h24: o = 8'h08; +8'h25: o = 8'h10; +8'h26: o = 8'h12; +8'h27: o = 8'h14; +8'h28: o = 8'h16; +8'h29: o = 8'h18; +8'h30: o = 8'h00; +8'h31: o = 8'h03; +8'h32: o = 8'h06; +8'h33: o = 8'h09; +8'h34: o = 8'h12; +8'h35: o = 8'h15; +8'h36: o = 8'h18; +8'h37: o = 8'h21; +8'h38: o = 8'h24; +8'h39: o = 8'h27; +8'h40: o = 8'h00; +8'h41: o = 8'h04; +8'h42: o = 8'h08; +8'h43: o = 8'h12; +8'h44: o = 8'h16; +8'h45: o = 8'h20; +8'h46: o = 8'h24; +8'h47: o = 8'h28; +8'h48: o = 8'h32; +8'h49: o = 8'h36; +8'h50: o = 8'h00; +8'h51: o = 8'h05; +8'h52: o = 8'h10; +8'h53: o = 8'h15; +8'h54: o = 8'h20; +8'h55: o = 8'h25; +8'h56: o = 8'h30; +8'h57: o = 8'h35; +8'h58: o = 8'h40; +8'h59: o = 8'h45; +8'h60: o = 8'h00; +8'h61: o = 8'h06; +8'h62: o = 8'h12; +8'h63: o = 8'h18; +8'h64: o = 8'h24; +8'h65: o = 8'h30; +8'h66: o = 8'h36; +8'h67: o = 8'h42; +8'h68: o = 8'h48; +8'h69: o = 8'h54; +8'h70: o = 8'h00; +8'h71: o = 8'h07; +8'h72: o = 8'h14; +8'h73: o = 8'h21; +8'h74: o = 8'h28; +8'h75: o = 8'h35; +8'h76: o = 8'h42; +8'h77: o = 8'h49; +8'h78: o = 8'h56; +8'h79: o = 8'h63; +8'h80: o = 8'h00; +8'h81: o = 8'h08; +8'h82: o = 8'h16; +8'h83: o = 8'h24; +8'h84: o = 8'h32; +8'h85: o = 8'h40; +8'h86: o = 8'h48; +8'h87: o = 8'h56; +8'h88: o = 8'h64; +8'h89: o = 8'h72; +8'h90: o = 8'h00; +8'h91: o = 8'h09; +8'h92: o = 8'h18; +8'h93: o = 8'h27; +8'h94: o = 8'h36; +8'h95: o = 8'h45; +8'h96: o = 8'h54; +8'h97: o = 8'h63; +8'h98: o = 8'h72; +8'h99: o = 8'h81; +default: o = 8'h00; +endcase +endmodule + + +// Multiply two pairs of BCD digits +// handles from 0x0 to 99x99 +module BCDMul2(a,b,o); +input [7:0] a; +input [7:0] b; +output [15:0] o; + +wire [7:0] p1,p2,p3,p4; +wire [15:0] s1; + +BCDMul1 u1 (a[3:0],b[3:0],p1); +BCDMul1 u2 (a[7:4],b[3:0],p2); +BCDMul1 u3 (a[3:0],b[7:4],p3); +BCDMul1 u4 (a[7:4],b[7:4],p4); + +BCDAdd4 u5 (1'b0,{p4,p1},{4'h0,p2,4'h0},s1); +BCDAdd4 u6 (1'b0,s1,{4'h0,p3,4'h0},o); + +endmodule + +module BCDMul_tb(); + +wire [15:0] o1,o2,o3,o4; + +BCDMul2 u1 (8'h00,8'h00,o1); +BCDMul2 u2 (8'h99,8'h99,o2); +BCDMul2 u3 (8'h25,8'h18,o3); +BCDMul2 u4 (8'h37,8'h21,o4); + +endmodule Index: thor/trunk/FT64v7/rtl/lib/ack_gen.v =================================================================== --- thor/trunk/FT64v7/rtl/lib/ack_gen.v (nonexistent) +++ thor/trunk/FT64v7/rtl/lib/ack_gen.v (revision 60) @@ -0,0 +1,61 @@ +// ============================================================================ +// __ +// \\__/ o\ (C) 2018 Robert Finch, Waterloo +// \ __ / All rights reserved. +// \/_// robfinch@finitron.ca +// || +// +// This source file is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This source file is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// +// ack_gen.v +// - generates a acknowledge signal after a specified number of clocks. +// - separate stages for read and write +// +// ============================================================================ +// +module ack_gen(clk_i, ce_i, i, we_i, o); +input clk_i; +input ce_i; +input i; +input we_i; +output reg o; +parameter READ_STAGES = 3; +parameter WRITE_STAGES = 0; +parameter ACK_LEVEL = 1'b0; +parameter REGISTER_OUTPUT = 1'b0; + +wire ro, wo; +generate begin : gRdy +if (READ_STAGES==0) +assign ro = i; +else begin +ready_gen #(READ_STAGES) urrdy (clk_i, ce_i, i, ro); +end +if (WRITE_STAGES==0) +assign wo = i; +else begin +ready_gen #(READ_STAGES) uwrdy (clk_i, ce_i, we_i, wo); +end +if (REGISTER_OUTPUT) begin +always @(posedge clk_i) + o <= (i & we_i) ? wo : i ? ro : ACK_LEVEL; +end +else begin +always @* + o <= (i & we_i) ? wo : i ? ro : ACK_LEVEL; +end +end +endgenerate + +endmodule Index: thor/trunk/FT64v7/rtl/lib/cntlz.v =================================================================== --- thor/trunk/FT64v7/rtl/lib/cntlz.v (nonexistent) +++ thor/trunk/FT64v7/rtl/lib/cntlz.v (revision 60) @@ -0,0 +1,613 @@ +/* =============================================================== + (C) 2006 Robert Finch + All rights reserved. + rob@birdcomputer.ca + + cntlz.v + - count number of leading zeros in a byte + - count number of leading ones in a byte + - simple fast approach - lookup table + + This source code is free for use and modification for + non-commercial or evaluation purposes, provided this + copyright statement and disclaimer remains present in + the file. + + If the code is modified, please state the origin and + note that the code has been modified. + + NO WARRANTY. + THIS Work, IS PROVIDEDED "AS IS" WITH NO WARRANTIES OF + ANY KIND, WHETHER EXPRESS OR IMPLIED. The user must assume + the entire risk of using the Work. + + IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR + ANY INCIDENTAL, CONSEQUENTIAL, OR PUNITIVE DAMAGES + WHATSOEVER RELATING TO THE USE OF THIS WORK, OR YOUR + RELATIONSHIP WITH THE AUTHOR. + + IN ADDITION, IN NO EVENT DOES THE AUTHOR AUTHORIZE YOU + TO USE THE WORK IN APPLICATIONS OR SYSTEMS WHERE THE + WORK'S FAILURE TO PERFORM CAN REASONABLY BE EXPECTED + TO RESULT IN A SIGNIFICANT PHYSICAL INJURY, OR IN LOSS + OF LIFE. ANY SUCH USE BY YOU IS ENTIRELY AT YOUR OWN RISK, + AND YOU AGREE TO HOLD THE AUTHOR AND CONTRIBUTORS HARMLESS + FROM ANY CLAIMS OR LOSSES RELATING TO SUCH UNAUTHORIZED + USE. + + Ref: Webpack 8.1i Spartan3-4 xc3s1000-4ft256 + 11 LUTs / 6 slices / 12.2 ns + +=============================================================== */ + +module cntlz8( + input [7:0] i, + output reg [3:0] o +); + + always @(i) + case (i) + 8'b00000000: o = 8; + 8'b00000001: o = 7; + 8'b00000010: o = 6; + 8'b00000011: o = 6; + 8'b00000100: o = 5; + 8'b00000101: o = 5; + 8'b00000110: o = 5; + 8'b00000111: o = 5; + 8'b00001000: o = 4; + 8'b00001001: o = 4; + 8'b00001010: o = 4; + 8'b00001011: o = 4; + 8'b00001100: o = 4; + 8'b00001101: o = 4; + 8'b00001110: o = 4; + 8'b00001111: o = 4; + + 8'b00010000: o = 3; + 8'b00010001: o = 3; + 8'b00010010: o = 3; + 8'b00010011: o = 3; + 8'b00010100: o = 3; + 8'b00010101: o = 3; + 8'b00010110: o = 3; + 8'b00010111: o = 3; + 8'b00011000: o = 3; + 8'b00011001: o = 3; + 8'b00011010: o = 3; + 8'b00011011: o = 3; + 8'b00011100: o = 3; + 8'b00011101: o = 3; + 8'b00011110: o = 3; + 8'b00011111: o = 3; + + 8'b00100000: o = 2; + 8'b00100001: o = 2; + 8'b00100010: o = 2; + 8'b00100011: o = 2; + 8'b00100100: o = 2; + 8'b00100101: o = 2; + 8'b00100110: o = 2; + 8'b00100111: o = 2; + 8'b00101000: o = 2; + 8'b00101001: o = 2; + 8'b00101010: o = 2; + 8'b00101011: o = 2; + 8'b00101100: o = 2; + 8'b00101101: o = 2; + 8'b00101110: o = 2; + 8'b00101111: o = 2; + + 8'b00110000: o = 2; + 8'b00110001: o = 2; + 8'b00110010: o = 2; + 8'b00110011: o = 2; + 8'b00110100: o = 2; + 8'b00110101: o = 2; + 8'b00110110: o = 2; + 8'b00110111: o = 2; + 8'b00111000: o = 2; + 8'b00111001: o = 2; + 8'b00111010: o = 2; + 8'b00111011: o = 2; + 8'b00111100: o = 2; + 8'b00111101: o = 2; + 8'b00111110: o = 2; + 8'b00111111: o = 2; + + // 44 - 1 + 8'b01000000: o = 1; + 8'b01000001: o = 1; + 8'b01000010: o = 1; + 8'b01000011: o = 1; + 8'b01000100: o = 1; + 8'b01000101: o = 1; + 8'b01000110: o = 1; + 8'b01000111: o = 1; + 8'b01001000: o = 1; + 8'b01001001: o = 1; + 8'b01001010: o = 1; + 8'b01001011: o = 1; + 8'b01001100: o = 1; + 8'b01001101: o = 1; + 8'b01001110: o = 1; + 8'b01001111: o = 1; + + 8'b01010000: o = 1; + 8'b01010001: o = 1; + 8'b01010010: o = 1; + 8'b01010011: o = 1; + 8'b01010100: o = 1; + 8'b01010101: o = 1; + 8'b01010110: o = 1; + 8'b01010111: o = 1; + 8'b01011000: o = 1; + 8'b01011001: o = 1; + 8'b01011010: o = 1; + 8'b01011011: o = 1; + 8'b01011100: o = 1; + 8'b01011101: o = 1; + 8'b01011110: o = 1; + 8'b01011111: o = 1; + + 8'b01100000: o = 1; + 8'b01100001: o = 1; + 8'b01100010: o = 1; + 8'b01100011: o = 1; + 8'b01100100: o = 1; + 8'b01100101: o = 1; + 8'b01100110: o = 1; + 8'b01100111: o = 1; + 8'b01101000: o = 1; + 8'b01101001: o = 1; + 8'b01101010: o = 1; + 8'b01101011: o = 1; + 8'b01101100: o = 1; + 8'b01101101: o = 1; + 8'b01101110: o = 1; + 8'b01101111: o = 1; + + 8'b01110000: o = 1; + 8'b01110001: o = 1; + 8'b01110010: o = 1; + 8'b01110011: o = 1; + 8'b01110100: o = 1; + 8'b01110101: o = 1; + 8'b01110110: o = 1; + 8'b01110111: o = 1; + 8'b01111000: o = 1; + 8'b01111001: o = 1; + 8'b01111010: o = 1; + 8'b01111011: o = 1; + 8'b01111100: o = 1; + 8'b01111101: o = 1; + 8'b01111110: o = 1; + 8'b01111111: o = 1; + + // - 2 + 8'b10000000: o = 0; + 8'b10000001: o = 0; + 8'b10000010: o = 0; + 8'b10000011: o = 0; + 8'b10000100: o = 0; + 8'b10000101: o = 0; + 8'b10000110: o = 0; + 8'b10000111: o = 0; + 8'b10001000: o = 0; + 8'b10001001: o = 0; + 8'b10001010: o = 0; + 8'b10001011: o = 0; + 8'b10001100: o = 0; + 8'b10001101: o = 0; + 8'b10001110: o = 0; + 8'b10001111: o = 0; + + 8'b10010000: o = 0; + 8'b10010001: o = 0; + 8'b10010010: o = 0; + 8'b10010011: o = 0; + 8'b10010100: o = 0; + 8'b10010101: o = 0; + 8'b10010110: o = 0; + 8'b10010111: o = 0; + 8'b10011000: o = 0; + 8'b10011001: o = 0; + 8'b10011010: o = 0; + 8'b10011011: o = 0; + 8'b10011100: o = 0; + 8'b10011101: o = 0; + 8'b10011110: o = 0; + 8'b10011111: o = 0; + + 8'b10100000: o = 0; + 8'b10100001: o = 0; + 8'b10100010: o = 0; + 8'b10100011: o = 0; + 8'b10100100: o = 0; + 8'b10100101: o = 0; + 8'b10100110: o = 0; + 8'b10100111: o = 0; + 8'b10101000: o = 0; + 8'b10101001: o = 0; + 8'b10101010: o = 0; + 8'b10101011: o = 0; + 8'b10101100: o = 0; + 8'b10101101: o = 0; + 8'b10101110: o = 0; + 8'b10101111: o = 0; + + 8'b10110000: o = 0; + 8'b10110001: o = 0; + 8'b10110010: o = 0; + 8'b10110011: o = 0; + 8'b10110100: o = 0; + 8'b10110101: o = 0; + 8'b10110110: o = 0; + 8'b10110111: o = 0; + 8'b10111000: o = 0; + 8'b10111001: o = 0; + 8'b10111010: o = 0; + 8'b10111011: o = 0; + 8'b10111100: o = 0; + 8'b10111101: o = 0; + 8'b10111110: o = 0; + 8'b10111111: o = 0; + + // 44 - 3 + 8'b11000000: o = 0; + 8'b11000001: o = 0; + 8'b11000010: o = 0; + 8'b11000011: o = 0; + 8'b11000100: o = 0; + 8'b11000101: o = 0; + 8'b11000110: o = 0; + 8'b11000111: o = 0; + 8'b11001000: o = 0; + 8'b11001001: o = 0; + 8'b11001010: o = 0; + 8'b11001011: o = 0; + 8'b11001100: o = 0; + 8'b11001101: o = 0; + 8'b11001110: o = 0; + 8'b11001111: o = 0; + + 8'b11010000: o = 0; + 8'b11010001: o = 0; + 8'b11010010: o = 0; + 8'b11010011: o = 0; + 8'b11010100: o = 0; + 8'b11010101: o = 0; + 8'b11010110: o = 0; + 8'b11010111: o = 0; + 8'b11011000: o = 0; + 8'b11011001: o = 0; + 8'b11011010: o = 0; + 8'b11011011: o = 0; + 8'b11011100: o = 0; + 8'b11011101: o = 0; + 8'b11011110: o = 0; + 8'b11011111: o = 0; + + 8'b11100000: o = 0; + 8'b11100001: o = 0; + 8'b11100010: o = 0; + 8'b11100011: o = 0; + 8'b11100100: o = 0; + 8'b11100101: o = 0; + 8'b11100110: o = 0; + 8'b11100111: o = 0; + 8'b11101000: o = 0; + 8'b11101001: o = 0; + 8'b11101010: o = 0; + 8'b11101011: o = 0; + 8'b11101100: o = 0; + 8'b11101101: o = 0; + 8'b11101110: o = 0; + 8'b11101111: o = 0; + + 8'b11110000: o = 0; + 8'b11110001: o = 0; + 8'b11110010: o = 0; + 8'b11110011: o = 0; + 8'b11110100: o = 0; + 8'b11110101: o = 0; + 8'b11110110: o = 0; + 8'b11110111: o = 0; + 8'b11111000: o = 0; + 8'b11111001: o = 0; + 8'b11111010: o = 0; + 8'b11111011: o = 0; + 8'b11111100: o = 0; + 8'b11111101: o = 0; + 8'b11111110: o = 0; + 8'b11111111: o = 0; + + endcase + + +endmodule + + +module cntlz16( + input [15:0] i, + output [4:0] o +); + + wire [3:0] cnt1, cnt2; + + cntlz8 u1 (i[ 7:0],cnt1); + cntlz8 u2 (i[15:8],cnt2); + + assign o = cnt2[3] ? cnt1 + 4'h8 : cnt2; + +endmodule + + +// 39 slices / 67 LUTs / 19.3ns +module cntlz24( + input [23:0] i, + output [4:0] o +); + + wire [3:0] cnt1, cnt2, cnt3; + + // cntlz8 results in faster result than cntlz16 + cntlz8 u1 (i[ 7: 0],cnt1); + cntlz8 u2 (i[15: 8],cnt2); + cntlz8 u3 (i[23:16],cnt3); + + assign o = + !cnt3[3] ? cnt3 : + !cnt2[3] ? cnt2 + 5'd8 : + cnt1 + 5'd16; + +endmodule + +// 39 slices / 67 LUTs / 19.3ns +module cntlz32( + input [31:0] i, + output [5:0] o +); + + wire [3:0] cnt1, cnt2, cnt3, cnt4; + + // cntlz8 results in faster result than cntlz16 + cntlz8 u1 (i[ 7: 0],cnt1); + cntlz8 u2 (i[15: 8],cnt2); + cntlz8 u3 (i[23:16],cnt3); + cntlz8 u4 (i[31:24],cnt4); + + assign o = + !cnt4[3] ? cnt4 : + !cnt3[3] ? cnt3 + 6'd8 : + !cnt2[3] ? cnt2 + 6'd16 : + cnt1 + 6'd24; + +endmodule + + +// 88 slices / 154 LUTs / 22.5 ns +module cntlz48( + input [47:0] i, + output [5:0] o +); + + wire [4:0] cnt1, cnt2, cnt3; + + cntlz16 u1 (i[15: 0],cnt1); + cntlz16 u2 (i[31:16],cnt2); + cntlz16 u3 (i[47:32],cnt3); + + assign o = + !cnt3[4] ? cnt3 : + !cnt2[4] ? cnt2 + 7'd16 : + cnt1 + 7'd32; + +endmodule + + +// 88 slices / 154 LUTs / 22.5 ns +module cntlz64( + input [63:0] i, + output [6:0] o +); + + wire [4:0] cnt1, cnt2, cnt3, cnt4; + + cntlz16 u1 (i[15: 0],cnt1); + cntlz16 u2 (i[31:16],cnt2); + cntlz16 u3 (i[47:32],cnt3); + cntlz16 u4 (i[63:48],cnt4); + + assign o = + !cnt4[4] ? cnt4 : + !cnt3[4] ? cnt3 + 7'd16 : + !cnt2[4] ? cnt2 + 7'd32 : + cnt1 + 7'd48; + +endmodule + + +module cntlz80( + input [79:0] i, + output [6:0] o +); + + wire [4:0] cnt3; + wire [5:0] cnt1, cnt2; + + cntlz32 u1 (i[31: 0],cnt1); + cntlz32 u2 (i[63:32],cnt2); + cntlz16 u3 (i[79:64],cnt3); + + assign o = + !cnt3[4] ? {2'b0,cnt3} : + !cnt2[5] ? {1'b0,cnt2} + 8'd16 : + {1'b0,cnt1} + 7'd48; + +endmodule + + +module cntlz96( + input [95:0] i, + output [7:0] o +); + + wire [5:0] cnt1, cnt2, cnt3; + + cntlz32 u1 (i[31: 0],cnt1); + cntlz32 u2 (i[63:32],cnt2); + cntlz32 u3 (i[95:64],cnt3); + + assign o = + !cnt3[5] ? cnt3 : + !cnt2[5] ? cnt2 + 8'd32 : + cnt1 + 8'd64; + +endmodule + + +module cntlz128( + input [127:0] i, + output [7:0] o +); + + wire [5:0] cnt1, cnt2, cnt3, cnt4; + + cntlz32 u1 (i[31: 0],cnt1); + cntlz32 u2 (i[63:32],cnt2); + cntlz32 u3 (i[95:64],cnt3); + cntlz32 u4 (i[127:96],cnt4); + + assign o = + !cnt4[5] ? cnt4 : + !cnt3[5] ? cnt3 + 8'd32 : + !cnt2[5] ? cnt2 + 8'd64 : + cnt1 + 8'd96; + +endmodule + + +module cntlz32Reg( + input clk, + input ce, + input [31:0] i, + output reg [5:0] o +); + + wire [5:0] o1; + cntlz32 u1 (i,o1); + always @(posedge clk) + if (ce) o <= o1; + +endmodule + + +module cntlz64Reg( + input clk, + input ce, + input [63:0] i, + output reg [6:0] o +); + + wire [6:0] o1; + cntlz64 u1 (i,o1); + always @(posedge clk) + if (ce) o <= o1; + +endmodule + +module cntlz80Reg( + input clk, + input ce, + input [79:0] i, + output reg [6:0] o +); + + wire [6:0] o1; + cntlz80 u1 (i,o1); + always @(posedge clk) + if (ce) o <= o1; + +endmodule + +module cntlz96Reg( + input clk, + input ce, + input [95:0] i, + output reg [7:0] o +); + + wire [7:0] o1; + cntlz96 u1 (i,o1); + always @(posedge clk) + if (ce) o <= o1; + +endmodule + +module cntlz128Reg( + input clk, + input ce, + input [127:0] i, + output reg [7:0] o +); + + wire [7:0] o1; + cntlz128 u1 (i,o1); + always @(posedge clk) + if (ce) o <= o1; + +endmodule + +// 5 slices / 10 LUTs / 7.702 ns +module cntlo8( + input [7:0] i, + output [3:0] o +); + + cntlz8 u1 (~i,o); + +endmodule + + +module cntlo16( + input [15:0] i, + output [4:0] o +); + + cntlz16 u1 (~i,o); + +endmodule + + +module cntlo32( + input [31:0] i, + output [5:0] o +); + + cntlz32 u1 (~i,o); + +endmodule + + +module cntlo48( + input [47:0] i, + output [5:0] o +); + + cntlz48 u1 (~i,o); + +endmodule + + +// 59 slices / 99 LUTs / 14.065 ns +module cntlo64( + input [63:0] i, + output [6:0] o +); + + cntlz64 u1 (~i,o); + +endmodule + + Index: thor/trunk/FT64v7/rtl/lib/cntpop.v =================================================================== --- thor/trunk/FT64v7/rtl/lib/cntpop.v (nonexistent) +++ thor/trunk/FT64v7/rtl/lib/cntpop.v (revision 60) @@ -0,0 +1,381 @@ +/* =============================================================== + (C) 2006 Robert Finch + All rights reserved. + rob@birdcomputer.ca + + cntpop.v + - count number of one bits in a byte + - simple fast approach - lookup table + + This source code is free for use and modification for + non-commercial or evaluation purposes, provided this + copyright statement and disclaimer remains present in + the file. + + If the code is modified, please state the origin and + note that the code has been modified. + + NO WARRANTY. + THIS Work, IS PROVIDEDED "AS IS" WITH NO WARRANTIES OF + ANY KIND, WHETHER EXPRESS OR IMPLIED. The user must assume + the entire risk of using the Work. + + IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR + ANY INCIDENTAL, CONSEQUENTIAL, OR PUNITIVE DAMAGES + WHATSOEVER RELATING TO THE USE OF THIS WORK, OR YOUR + RELATIONSHIP WITH THE AUTHOR. + + IN ADDITION, IN NO EVENT DOES THE AUTHOR AUTHORIZE YOU + TO USE THE WORK IN APPLICATIONS OR SYSTEMS WHERE THE + WORK'S FAILURE TO PERFORM CAN REASONABLY BE EXPECTED + TO RESULT IN A SIGNIFICANT PHYSICAL INJURY, OR IN LOSS + OF LIFE. ANY SUCH USE BY YOU IS ENTIRELY AT YOUR OWN RISK, + AND YOU AGREE TO HOLD THE AUTHOR AND CONTRIBUTORS HARMLESS + FROM ANY CLAIMS OR LOSSES RELATING TO SUCH UNAUTHORIZED + USE. + + Ref: Webpack 8.1i Spartan3-4 xc3s1000-4ft256 + 11 LUTs / 6 slices / 12.2 ns + +=============================================================== */ + +module cntpop8( + input [7:0] i, + output reg [3:0] o +); + + always @(i) + case (i) + 8'b00000000: o = 0; + 8'b00000001: o = 1; + 8'b00000010: o = 1; + 8'b00000011: o = 2; + 8'b00000100: o = 1; + 8'b00000101: o = 2; + 8'b00000110: o = 2; + 8'b00000111: o = 3; + 8'b00001000: o = 1; + 8'b00001001: o = 2; + 8'b00001010: o = 2; + 8'b00001011: o = 3; + 8'b00001100: o = 2; + 8'b00001101: o = 3; + 8'b00001110: o = 3; + 8'b00001111: o = 4; + + 8'b00010000: o = 1; + 8'b00010001: o = 2; + 8'b00010010: o = 2; + 8'b00010011: o = 3; + 8'b00010100: o = 2; + 8'b00010101: o = 3; + 8'b00010110: o = 3; + 8'b00010111: o = 4; + 8'b00011000: o = 2; + 8'b00011001: o = 3; + 8'b00011010: o = 3; + 8'b00011011: o = 4; + 8'b00011100: o = 3; + 8'b00011101: o = 4; + 8'b00011110: o = 4; + 8'b00011111: o = 5; + + 8'b00100000: o = 1; + 8'b00100001: o = 2; + 8'b00100010: o = 2; + 8'b00100011: o = 3; + 8'b00100100: o = 2; + 8'b00100101: o = 3; + 8'b00100110: o = 3; + 8'b00100111: o = 4; + 8'b00101000: o = 2; + 8'b00101001: o = 3; + 8'b00101010: o = 3; + 8'b00101011: o = 4; + 8'b00101100: o = 3; + 8'b00101101: o = 4; + 8'b00101110: o = 4; + 8'b00101111: o = 5; + + 8'b00110000: o = 2; + 8'b00110001: o = 3; + 8'b00110010: o = 3; + 8'b00110011: o = 4; + 8'b00110100: o = 3; + 8'b00110101: o = 4; + 8'b00110110: o = 4; + 8'b00110111: o = 5; + 8'b00111000: o = 3; + 8'b00111001: o = 4; + 8'b00111010: o = 4; + 8'b00111011: o = 5; + 8'b00111100: o = 4; + 8'b00111101: o = 5; + 8'b00111110: o = 5; + 8'b00111111: o = 6; + + // 44 - 1 + 8'b01000000: o = 1; + 8'b01000001: o = 2; + 8'b01000010: o = 2; + 8'b01000011: o = 3; + 8'b01000100: o = 2; + 8'b01000101: o = 3; + 8'b01000110: o = 3; + 8'b01000111: o = 4; + 8'b01001000: o = 2; + 8'b01001001: o = 3; + 8'b01001010: o = 3; + 8'b01001011: o = 4; + 8'b01001100: o = 3; + 8'b01001101: o = 4; + 8'b01001110: o = 4; + 8'b01001111: o = 5; + + 8'b01010000: o = 2; + 8'b01010001: o = 3; + 8'b01010010: o = 3; + 8'b01010011: o = 4; + 8'b01010100: o = 3; + 8'b01010101: o = 4; + 8'b01010110: o = 4; + 8'b01010111: o = 5; + 8'b01011000: o = 3; + 8'b01011001: o = 4; + 8'b01011010: o = 4; + 8'b01011011: o = 5; + 8'b01011100: o = 4; + 8'b01011101: o = 5; + 8'b01011110: o = 5; + 8'b01011111: o = 6; + + 8'b01100000: o = 2; + 8'b01100001: o = 3; + 8'b01100010: o = 3; + 8'b01100011: o = 4; + 8'b01100100: o = 3; + 8'b01100101: o = 4; + 8'b01100110: o = 4; + 8'b01100111: o = 5; + 8'b01101000: o = 3; + 8'b01101001: o = 4; + 8'b01101010: o = 4; + 8'b01101011: o = 5; + 8'b01101100: o = 4; + 8'b01101101: o = 5; + 8'b01101110: o = 5; + 8'b01101111: o = 6; + + 8'b01110000: o = 3; + 8'b01110001: o = 4; + 8'b01110010: o = 4; + 8'b01110011: o = 5; + 8'b01110100: o = 4; + 8'b01110101: o = 5; + 8'b01110110: o = 5; + 8'b01110111: o = 6; + 8'b01111000: o = 4; + 8'b01111001: o = 5; + 8'b01111010: o = 5; + 8'b01111011: o = 6; + 8'b01111100: o = 5; + 8'b01111101: o = 6; + 8'b01111110: o = 6; + 8'b01111111: o = 7; + + // - 2 + 8'b10000000: o = 1; + 8'b10000001: o = 2; + 8'b10000010: o = 2; + 8'b10000011: o = 3; + 8'b10000100: o = 2; + 8'b10000101: o = 3; + 8'b10000110: o = 3; + 8'b10000111: o = 4; + 8'b10001000: o = 2; + 8'b10001001: o = 3; + 8'b10001010: o = 3; + 8'b10001011: o = 4; + 8'b10001100: o = 3; + 8'b10001101: o = 4; + 8'b10001110: o = 4; + 8'b10001111: o = 5; + + 8'b10010000: o = 2; + 8'b10010001: o = 3; + 8'b10010010: o = 3; + 8'b10010011: o = 4; + 8'b10010100: o = 3; + 8'b10010101: o = 4; + 8'b10010110: o = 4; + 8'b10010111: o = 5; + 8'b10011000: o = 3; + 8'b10011001: o = 4; + 8'b10011010: o = 4; + 8'b10011011: o = 5; + 8'b10011100: o = 4; + 8'b10011101: o = 5; + 8'b10011110: o = 5; + 8'b10011111: o = 6; + + 8'b10100000: o = 2; + 8'b10100001: o = 3; + 8'b10100010: o = 3; + 8'b10100011: o = 4; + 8'b10100100: o = 3; + 8'b10100101: o = 4; + 8'b10100110: o = 4; + 8'b10100111: o = 5; + 8'b10101000: o = 3; + 8'b10101001: o = 4; + 8'b10101010: o = 4; + 8'b10101011: o = 5; + 8'b10101100: o = 4; + 8'b10101101: o = 5; + 8'b10101110: o = 5; + 8'b10101111: o = 6; + + 8'b10110000: o = 3; + 8'b10110001: o = 4; + 8'b10110010: o = 4; + 8'b10110011: o = 5; + 8'b10110100: o = 4; + 8'b10110101: o = 5; + 8'b10110110: o = 5; + 8'b10110111: o = 6; + 8'b10111000: o = 4; + 8'b10111001: o = 5; + 8'b10111010: o = 5; + 8'b10111011: o = 6; + 8'b10111100: o = 5; + 8'b10111101: o = 6; + 8'b10111110: o = 6; + 8'b10111111: o = 7; + + // 44 - 3 + 8'b11000000: o = 2; + 8'b11000001: o = 3; + 8'b11000010: o = 3; + 8'b11000011: o = 4; + 8'b11000100: o = 3; + 8'b11000101: o = 4; + 8'b11000110: o = 4; + 8'b11000111: o = 5; + 8'b11001000: o = 3; + 8'b11001001: o = 4; + 8'b11001010: o = 4; + 8'b11001011: o = 5; + 8'b11001100: o = 4; + 8'b11001101: o = 5; + 8'b11001110: o = 5; + 8'b11001111: o = 6; + + 8'b11010000: o = 3; + 8'b11010001: o = 4; + 8'b11010010: o = 4; + 8'b11010011: o = 5; + 8'b11010100: o = 4; + 8'b11010101: o = 5; + 8'b11010110: o = 5; + 8'b11010111: o = 6; + 8'b11011000: o = 4; + 8'b11011001: o = 5; + 8'b11011010: o = 5; + 8'b11011011: o = 6; + 8'b11011100: o = 5; + 8'b11011101: o = 6; + 8'b11011110: o = 6; + 8'b11011111: o = 7; + + 8'b11100000: o = 3; + 8'b11100001: o = 4; + 8'b11100010: o = 4; + 8'b11100011: o = 5; + 8'b11100100: o = 4; + 8'b11100101: o = 5; + 8'b11100110: o = 5; + 8'b11100111: o = 6; + 8'b11101000: o = 4; + 8'b11101001: o = 5; + 8'b11101010: o = 5; + 8'b11101011: o = 6; + 8'b11101100: o = 5; + 8'b11101101: o = 6; + 8'b11101110: o = 6; + 8'b11101111: o = 7; + + 8'b11110000: o = 4; + 8'b11110001: o = 5; + 8'b11110010: o = 5; + 8'b11110011: o = 6; + 8'b11110100: o = 5; + 8'b11110101: o = 6; + 8'b11110110: o = 6; + 8'b11110111: o = 7; + 8'b11111000: o = 5; + 8'b11111001: o = 6; + 8'b11111010: o = 6; + 8'b11111011: o = 7; + 8'b11111100: o = 6; + 8'b11111101: o = 7; + 8'b11111110: o = 7; + 8'b11111111: o = 8; + + endcase + + +endmodule + + +module cntpop16( + input [15:0] i, + output [4:0] o +); + + wire [3:0] cnt1, cnt2; + + cntpop8 u1 (i[ 7:0],cnt1); + cntpop8 u2 (i[15:8],cnt2); + + assign o = cnt1 + cnt2; + +endmodule + + +// 76 slices / 147 LUTs / 19 ns +module cntpop32( + input [31:0] i, + output [5:0] o +); + + wire [3:0] cnt1, cnt2, cnt3, cnt4; + + // cntpop8 results in faster result than cntpop16 + cntpop8 u1 (i[ 7: 0],cnt1); + cntpop8 u2 (i[15: 8],cnt2); + cntpop8 u3 (i[23:16],cnt3); + cntpop8 u4 (i[31:24],cnt4); + + assign o = cnt1+cnt2+cnt3+cnt4; + +endmodule + + +// 156 slices / 300 LUTs / 22.2 ns +module cntpop64( + input [63:0] i, + output [6:0] o +); + + wire [4:0] cnt1, cnt2, cnt3, cnt4; + + cntpop16 u1 (i[15: 0],cnt1); + cntpop16 u2 (i[31:16],cnt2); + cntpop16 u3 (i[47:32],cnt3); + cntpop16 u4 (i[63:48],cnt4); + + assign o = cnt1+cnt2+cnt3+cnt4; + +endmodule + + Index: thor/trunk/FT64v7/rtl/lib/delay.v =================================================================== --- thor/trunk/FT64v7/rtl/lib/delay.v (nonexistent) +++ thor/trunk/FT64v7/rtl/lib/delay.v (revision 60) @@ -0,0 +1,160 @@ +/* =============================================================== + (C) 2006 Robert Finch + All rights reserved. + rob@birdcomputer.ca + + delay.v + - delays signals by so many clock cycles + + + This source code is free for use and modification for + non-commercial or evaluation purposes, provided this + copyright statement and disclaimer remains present in + the file. + + If you do modify the code, please state the origin and + note that you have modified the code. + + NO WARRANTY. + THIS Work, IS PROVIDEDED "AS IS" WITH NO WARRANTIES OF + ANY KIND, WHETHER EXPRESS OR IMPLIED. The user must assume + the entire risk of using the Work. + + IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR + ANY INCIDENTAL, CONSEQUENTIAL, OR PUNITIVE DAMAGES + WHATSOEVER RELATING TO THE USE OF THIS WORK, OR YOUR + RELATIONSHIP WITH THE AUTHOR. + + IN ADDITION, IN NO EVENT DOES THE AUTHOR AUTHORIZE YOU + TO USE THE WORK IN APPLICATIONS OR SYSTEMS WHERE THE + WORK'S FAILURE TO PERFORM CAN REASONABLY BE EXPECTED + TO RESULT IN A SIGNIFICANT PHYSICAL INJURY, OR IN LOSS + OF LIFE. ANY SUCH USE BY YOU IS ENTIRELY AT YOUR OWN RISK, + AND YOU AGREE TO HOLD THE AUTHOR AND CONTRIBUTORS HARMLESS + FROM ANY CLAIMS OR LOSSES RELATING TO SUCH UNAUTHORIZED + USE. + +=============================================================== */ + +module delay1 + #(parameter WID = 1) + ( + input clk, + input ce, + input [WID:1] i, + output reg [WID:1] o + ); + + always @(posedge clk) + if (ce) + o <= i; + +endmodule + + +module delay2 + #(parameter WID = 1) + ( + input clk, + input ce, + input [WID:1] i, + output reg [WID:1] o + ); + + + reg [WID:1] r1; + + always @(posedge clk) + if (ce) + r1 <= i; + + always @(posedge clk) + if (ce) + o <= r1; + +endmodule + + +module delay3 + #(parameter WID = 1) + ( + input clk, + input ce, + input [WID:1] i, + output reg [WID:1] o + ); + + reg [WID:1] r1, r2; + + always @(posedge clk) + if (ce) + r1 <= i; + + always @(posedge clk) + if (ce) + r2 <= r1; + + always @(posedge clk) + if (ce) + o <= r2; + +endmodule + +module delay4 + #(parameter WID = 1) + ( + input clk, + input ce, + input [WID-1:0] i, + output reg [WID-1:0] o + ); + + reg [WID:1] r1, r2, r3; + + always @(posedge clk) + if (ce) + r1 <= i; + + always @(posedge clk) + if (ce) + r2 <= r1; + + always @(posedge clk) + if (ce) + r3 <= r2; + + always @(posedge clk) + if (ce) + o <= r3; + +endmodule + + +module delay5 +#(parameter WID = 1) +( + input clk, + input ce, + input [WID:1] i, + output reg [WID:1] o +); + + reg [WID:1] r1, r2, r3, r4; + + always @(posedge clk) + if (ce) r1 <= i; + + always @(posedge clk) + if (ce) r2 <= r1; + + always @(posedge clk) + if (ce) r3 <= r2; + + always @(posedge clk) + if (ce) r4 <= r3; + + always @(posedge clk) + if (ce) o <= r4; + +endmodule + Index: thor/trunk/FT64v7/rtl/lib/edge_det.v =================================================================== --- thor/trunk/FT64v7/rtl/lib/edge_det.v (nonexistent) +++ thor/trunk/FT64v7/rtl/lib/edge_det.v (revision 60) @@ -0,0 +1,49 @@ +// ============================================================================ +// (C) 2007 Robert Finch +// All Rights Reserved. +// +// edge_det.v +// +// +// This source file is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This source file is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// +// Notes: +// +// Edge detector +// This little core detects an edge (positive, negative, and +// either) in the input signal. +// +// ============================================================================ +// +module edge_det(rst, clk, ce, i, pe, ne, ee); +input rst; // reset +input clk; // clock +input ce; // clock enable +input i; // input signal +output pe; // positive transition detected +output ne; // negative transition detected +output ee; // either edge (positive or negative) transition detected + +reg ed; +always @(posedge clk) + if (rst) + ed <= 1'b0; + else if (ce) + ed <= i; + +assign pe = ~ed & i; // positive: was low and is now high +assign ne = ed & ~i; // negative: was high and is now low +assign ee = ed ^ i; // either: signal is now opposite to what it was + +endmodule Index: thor/trunk/FT64v7/rtl/lib/ffo.v =================================================================== --- thor/trunk/FT64v7/rtl/lib/ffo.v (nonexistent) +++ thor/trunk/FT64v7/rtl/lib/ffo.v (revision 60) @@ -0,0 +1,85 @@ + +// Find first one +module ffo6(i, o); +input [5:0] i; +output reg [2:0] o; +always @* +casex(i) +6'b1xxxxx: o <= 3'd5; +6'b01xxxx: o <= 3'd4; +6'b001xxx: o <= 3'd3; +6'b0001xx: o <= 3'd2; +6'b00001x: o <= 3'd1; +6'b000001: o <= 3'd0; +default: o <= 3'd7; +endcase +endmodule + +module ffo12(i, o); +input [11:0] i; +output reg [3:0] o; + +wire [2:0] o1,o2; +ffo6 u1 (i[11:6],o1); +ffo6 u2 (i[5:0],o2); +always @* +if (o1==3'd7 && o2==3'd7) + o <= 4'd15; +else if (o1==3'd7) + o <= o2; +else + o <= 3'd6 + o1; + +endmodule + +module ffo24(i, o); +input [23:0] i; +output reg [4:0] o; + +wire [3:0] o1,o2; +ffo12 u1 (i[23:12],o1); +ffo12 u2 (i[11:0],o2); +always @* +if (o1==4'd15 && o2==4'd15) + o <= 5'd31; +else if (o1==4'd15) + o <= o2; +else + o <= 4'd12 + o1; + +endmodule + +module ffo48(i, o); +input [47:0] i; +output reg [5:0] o; + +wire [4:0] o1,o2; +ffo24 u1 (i[47:24],o1); +ffo24 u2 (i[23:0],o2); +always @* +if (o1==5'd31 && o2==5'd31) + o <= 6'd63; +else if (o1==5'd31) + o <= o2; +else + o <= 5'd24 + o1; + +endmodule + +module ffo96(i, o); +input [95:0] i; +output reg [6:0] o; + +wire [5:0] o1,o2; +ffo48 u1 (i[95:48],o1); +ffo48 u2 (i[47:0],o2); +always @* +if (o1==6'd63 && o2==6'd63) + o <= 7'd127; +else if (o1==6'd63) + o <= o2; +else + o <= 6'd48 + o1; + +endmodule + Index: thor/trunk/FT64v7/rtl/lib/ffz.v =================================================================== --- thor/trunk/FT64v7/rtl/lib/ffz.v (nonexistent) +++ thor/trunk/FT64v7/rtl/lib/ffz.v (revision 60) @@ -0,0 +1,174 @@ + +// Find first zero +module ffz6(i, o); +input [5:0] i; +output reg [2:0] o; +always @* +casex(i) +6'b0xxxxx: o <= 3'd5; +6'b10xxxx: o <= 3'd4; +6'b110xxx: o <= 3'd3; +6'b1110xx: o <= 3'd2; +6'b11110x: o <= 3'd1; +6'b111110: o <= 3'd0; +default: o <= 3'd7; +endcase +endmodule + +module ffz12(i, o); +input [11:0] i; +output reg [3:0] o; + +wire [2:0] o1,o2; +ffz6 u1 (i[11:6],o1); +ffz6 u2 (i[5:0],o2); +always @* +if (o1==3'd7 && o2==3'd7) + o <= 4'd15; +else if (o1==3'd7) + o <= o2; +else + o <= 3'd6 + o1; + +endmodule + +module ffz24(i, o); +input [23:0] i; +output reg [4:0] o; + +wire [3:0] o1,o2; +ffz12 u1 (i[23:12],o1); +ffz12 u2 (i[11:0],o2); +always @* +if (o1==4'd15 && o2==4'd15) + o <= 5'd31; +else if (o1==4'd15) + o <= o2; +else + o <= 4'd12 + o1; + +endmodule + +module ffz48(i, o); +input [47:0] i; +output reg [5:0] o; + +wire [4:0] o1,o2; +ffz24 u1 (i[47:24],o1); +ffz24 u2 (i[23:0],o2); +always @* +if (o1==5'd31 && o2==5'd31) + o <= 6'd63; +else if (o1==5'd31) + o <= o2; +else + o <= 5'd24 + o1; + +endmodule + +module ffz96(i, o); +input [95:0] i; +output reg [6:0] o; + +wire [5:0] o1,o2; +ffz48 u1 (i[95:48],o1); +ffz48 u2 (i[47:0],o2); +always @* +if (o1==6'd63 && o2==6'd63) + o <= 7'd127; +else if (o1==6'd63) + o <= o2; +else + o <= 6'd48 + o1; + +endmodule + +// Find last zero +module flz6(i, o); +input [5:0] i; +output reg [2:0] o; +always @* +casex(i) +6'bxxxxx0: o <= 3'd0; +6'bxxxx01: o <= 3'd1; +6'bxxx011: o <= 3'd2; +6'bxx0111: o <= 3'd3; +6'bx01111: o <= 3'd4; +6'b011111: o <= 3'd5; +default: o <= 3'd7; +endcase + +endmodule + +module flz12(i, o); +input [11:0] i; +output reg [3:0] o; + +wire [2:0] o1,o2; +flz6 u1 (i[11:6],o1); +flz6 u2 (i[5:0],o2); + +always @* +if (o1==3'd7 && o2==3'd7) + o <= 4'd15; +else if (o2==3'd7) + o <= 4'd6 + o1; +else + o <= o2; + +endmodule + +module flz24(i, o); +input [23:0] i; +output reg [4:0] o; + +wire [3:0] o1,o2; +flz12 u1 (i[23:12],o1); +flz12 u2 (i[11:0],o2); + +always @* +if (o1==4'd15 && o2==4'd15) + o <= 5'd31; +else if (o2==4'd15) + o <= 4'd12 + o1; +else + o <= o2; + +endmodule + +module flz48(i, o); +input [47:0] i; +output reg [5:0] o; + +wire [4:0] o1,o2; +flz24 u1 (i[47:24],o1); +flz24 u2 (i[23:0],o2); + +always @* +if (o1==5'd31 && o2==5'd31) + o <= 6'd63; +else if (o2==5'd31) + o <= 5'd24 + o1; +else + o <= o2; + +endmodule + +module flz96(i, o); +input [95:0] i; +output reg [6:0] o; + +wire [5:0] o1,o2; +flz48 u1 (i[95:48],o1); +flz48 u2 (i[47:0],o2); + +always @* +if (o1==6'd63 && o2==6'd63) + o <= 7'd127; +else if (o2==6'd63) + o <= 6'd48 + o1; +else + o <= o2; + +endmodule + Index: thor/trunk/FT64v7/rtl/lib/lfsr.v =================================================================== --- thor/trunk/FT64v7/rtl/lib/lfsr.v (nonexistent) +++ thor/trunk/FT64v7/rtl/lib/lfsr.v (revision 60) @@ -0,0 +1,88 @@ +// ============================================================================ +// __ +// \\__/ o\ (C) 2003-2018 Robert Finch, Waterloo +// \ __ / All rights reserved. +// \/_// robfinch@finitron.ca +// || +// +// lfsr.v +// - linear feedback shift register +// - parameterized +// +// +// This source file is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This source file is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// +// +// ============================================================================ +// +module lfsr(rst, clk, ce, cyc, o); + parameter WID=17; + parameter RST_VAL=0; + localparam MSB=WID-1; + + input rst; + input clk; + input ce; + input cyc; // shorten the feedback cycle + output [WID:1] o; + + reg [WID:0] c; + reg [23:0] n; + assign o = c[WID:1]; + + always @(posedge clk) begin + case (WID) + 3: n <= 24'h00_0003; + 4: n <= 24'h00_0004; + 5: n <= 24'h00_0003; + 6: n <= 24'h00_0005; + 7: n <= 24'h00_0006; + 8: n <= 24'h06_0504; + 9: n <= 24'h00_0005; + 10: n <= 24'h00_0007; + 11: n <= 24'h00_0009; + 12: n <= 24'h06_0401; + 13: n <= 24'h04_0301; + 14: n <= 24'h05_0301; + 15: n <= 24'h00_000E; + 16: n <= 24'h0F_0D04; + 17: n <= 24'h00_000E; + 18: n <= 24'h00_000B; + 19: n <= 24'h06_0201; + 20: n <= 24'h00_0011; + 21: n <= 24'h00_0013; + 22: n <= 24'h00_0015; + 23: n <= 24'h00_0012; + 24: n <= 24'h17_1611; + 25: n <= 24'h00_0016; + 26: n <= 24'h06_0201; + 27: n <= 24'h05_0201; + 28: n <= 24'h00_0019; + 29: n <= 24'h00_001B; + 30: n <= 24'h06_0401; + 31: n <= 24'h00_001C; + default: + n <= 24'h00_0000; + endcase + end + + + always @(posedge clk) + if (rst) + c <= RST_VAL; + else if (ce) + c <= {c[MSB:0],~(c[WID]^c[n[23:16]]^c[n[15:8]]^c[n[7:0]]^cyc)}; + +endmodule + Index: thor/trunk/FT64v7/rtl/lib/ready_gen.v =================================================================== --- thor/trunk/FT64v7/rtl/lib/ready_gen.v (nonexistent) +++ thor/trunk/FT64v7/rtl/lib/ready_gen.v (revision 60) @@ -0,0 +1,45 @@ +// ============================================================================ +// __ +// \\__/ o\ (C) 2018 Robert Finch, Waterloo +// \ __ / All rights reserved. +// \/_// robfinch@opencores.org +// || +// +// This source file is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This source file is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// +// ready_gen.v +// - generates a ready signal after a specified number of clocks. +// - this is not a simple delay line. Output is set low as soom as the +// input goes low. +// +// ============================================================================ +// +module ready_gen(clk_i, ce_i, i, o); +input clk_i; +input ce_i; +input i; +output reg o = 1'd0; +parameter STAGES = 3; + +integer n; +reg rdy[STAGES-1:0]; +always @(posedge clk_i) +if (ce_i) begin + rdy[0] <= i; + for (n = 1; n < STAGES; n = n + 1) + rdy[n] <= rdy[n-1] & i; + o <= rdy[STAGES-1] & i; +end + +endmodule Index: thor/trunk/FT64v7/rtl/lib/redor128.v =================================================================== --- thor/trunk/FT64v7/rtl/lib/redor128.v (nonexistent) +++ thor/trunk/FT64v7/rtl/lib/redor128.v (revision 60) @@ -0,0 +1,141 @@ +module redor128 +( + input [6:0] a, + input [127:0] b, + output reg o +); + + always @(a,b) + case (a) + 7'd0: o = b[0]; + 7'd1: o = |b[1:0]; + 7'd2: o = |b[2:0]; + 7'd3: o = |b[3:0]; + 7'd4: o = |b[4:0]; + 7'd5: o = |b[5:0]; + 7'd6: o = |b[6:0]; + 7'd7: o = |b[7:0]; + 7'd8: o = |b[8:0]; + 7'd9: o = |b[9:0]; + 7'd10: o = |b[10:0]; + 7'd11: o = |b[11:0]; + 7'd12: o = |b[12:0]; + 7'd13: o = |b[13:0]; + 7'd14: o = |b[14:0]; + 7'd15: o = |b[15:0]; + 7'd16: o = |b[16:0]; + 7'd17: o = |b[17:0]; + 7'd18: o = |b[18:0]; + 7'd19: o = |b[19:0]; + 7'd20: o = |b[20:0]; + 7'd21: o = |b[21:0]; + 7'd22: o = |b[22:0]; + 7'd23: o = |b[23:0]; + 7'd24: o = |b[24:0]; + 7'd25: o = |b[25:0]; + 7'd26: o = |b[26:0]; + 7'd27: o = |b[27:0]; + 7'd28: o = |b[28:0]; + 7'd29: o = |b[29:0]; + 7'd30: o = |b[30:0]; + 7'd31: o = |b[31:0]; + 7'd32: o = |b[32:0]; + 7'd33: o = |b[33:0]; + 7'd34: o = |b[34:0]; + 7'd35: o = |b[35:0]; + 7'd36: o = |b[36:0]; + 7'd37: o = |b[37:0]; + 7'd38: o = |b[38:0]; + 7'd39: o = |b[39:0]; + 7'd40: o = |b[40:0]; + 7'd41: o = |b[41:0]; + 7'd42: o = |b[42:0]; + 7'd43: o = |b[43:0]; + 7'd44: o = |b[44:0]; + 7'd45: o = |b[45:0]; + 7'd46: o = |b[46:0]; + 7'd47: o = |b[47:0]; + 7'd48: o = |b[48:0]; + 7'd49: o = |b[49:0]; + 7'd50: o = |b[50:0]; + 7'd51: o = |b[51:0]; + 7'd52: o = |b[52:0]; + 7'd53: o = |b[53:0]; + 7'd54: o = |b[54:0]; + 7'd55: o = |b[55:0]; + 7'd56: o = |b[56:0]; + 7'd57: o = |b[57:0]; + 7'd58: o = |b[58:0]; + 7'd59: o = |b[59:0]; + 7'd60: o = |b[60:0]; + 7'd61: o = |b[61:0]; + 7'd62: o = |b[62:0]; + 7'd63: o = |b[63:0]; + + 7'd64: o = |b[64:0]; + 7'd65: o = |b[65:0]; + 7'd66: o = |b[66:0]; + 7'd67: o = |b[67:0]; + 7'd68: o = |b[68:0]; + 7'd69: o = |b[69:0]; + 7'd70: o = |b[70:0]; + 7'd71: o = |b[71:0]; + 7'd72: o = |b[72:0]; + 7'd73: o = |b[73:0]; + 7'd74: o = |b[74:0]; + 7'd75: o = |b[75:0]; + 7'd76: o = |b[76:0]; + 7'd77: o = |b[77:0]; + 7'd78: o = |b[78:0]; + 7'd79: o = |b[79:0]; + 7'd80: o = |b[80:0]; + 7'd81: o = |b[81:0]; + 7'd82: o = |b[82:0]; + 7'd83: o = |b[83:0]; + 7'd84: o = |b[84:0]; + 7'd85: o = |b[85:0]; + 7'd86: o = |b[86:0]; + 7'd87: o = |b[87:0]; + 7'd88: o = |b[88:0]; + 7'd89: o = |b[89:0]; + 7'd90: o = |b[90:0]; + 7'd91: o = |b[91:0]; + 7'd92: o = |b[92:0]; + 7'd93: o = |b[93:0]; + 7'd94: o = |b[94:0]; + 7'd95: o = |b[95:0]; + 7'd96: o = |b[96:0]; + 7'd97: o = |b[97:0]; + 7'd98: o = |b[98:0]; + 7'd99: o = |b[99:0]; + 7'd100: o = |b[100:0]; + 7'd101: o = |b[101:0]; + 7'd102: o = |b[102:0]; + 7'd103: o = |b[103:0]; + 7'd104: o = |b[104:0]; + 7'd105: o = |b[105:0]; + 7'd106: o = |b[106:0]; + 7'd107: o = |b[107:0]; + 7'd108: o = |b[108:0]; + 7'd109: o = |b[109:0]; + 7'd110: o = |b[110:0]; + 7'd111: o = |b[111:0]; + 7'd112: o = |b[112:0]; + 7'd113: o = |b[113:0]; + 7'd114: o = |b[114:0]; + 7'd115: o = |b[115:0]; + 7'd116: o = |b[116:0]; + 7'd117: o = |b[117:0]; + 7'd118: o = |b[118:0]; + 7'd119: o = |b[119:0]; + 7'd120: o = |b[120:0]; + 7'd121: o = |b[121:0]; + 7'd122: o = |b[122:0]; + 7'd123: o = |b[123:0]; + 7'd124: o = |b[124:0]; + 7'd125: o = |b[125:0]; + 7'd126: o = |b[126:0]; + 7'd127: o = |b[127:0]; + endcase + +endmodule Index: thor/trunk/FT64v7/rtl/lib/redor32.v =================================================================== --- thor/trunk/FT64v7/rtl/lib/redor32.v (nonexistent) +++ thor/trunk/FT64v7/rtl/lib/redor32.v (revision 60) @@ -0,0 +1,44 @@ +module redor32 +( + input [4:0] a, + input [31:0] b, + output reg o +); + + always @(a,b) + case (a) + 5'd0: o = b[0]; + 5'd1: o = |b[1:0]; + 5'd2: o = |b[2:0]; + 5'd3: o = |b[3:0]; + 5'd4: o = |b[4:0]; + 5'd5: o = |b[5:0]; + 5'd6: o = |b[6:0]; + 5'd7: o = |b[7:0]; + 5'd8: o = |b[8:0]; + 5'd9: o = |b[9:0]; + 5'd10: o = |b[10:0]; + 5'd11: o = |b[11:0]; + 5'd12: o = |b[12:0]; + 5'd13: o = |b[13:0]; + 5'd14: o = |b[14:0]; + 5'd15: o = |b[15:0]; + 5'd16: o = |b[16:0]; + 5'd17: o = |b[17:0]; + 5'd18: o = |b[18:0]; + 5'd19: o = |b[19:0]; + 5'd20: o = |b[20:0]; + 5'd21: o = |b[21:0]; + 5'd22: o = |b[22:0]; + 5'd23: o = |b[23:0]; + 5'd24: o = |b[24:0]; + 5'd25: o = |b[25:0]; + 5'd26: o = |b[26:0]; + 5'd27: o = |b[27:0]; + 5'd28: o = |b[28:0]; + 5'd29: o = |b[29:0]; + 5'd30: o = |b[30:0]; + 5'd31: o = |b[31:0]; + endcase + +endmodule Index: thor/trunk/FT64v7/rtl/lib/redor64.v =================================================================== --- thor/trunk/FT64v7/rtl/lib/redor64.v (nonexistent) +++ thor/trunk/FT64v7/rtl/lib/redor64.v (revision 60) @@ -0,0 +1,76 @@ +module redor64 +( + input [5:0] a, + input [63:0] b, + output reg o +); + + always @(a,b) + case (a) + 6'd0: o = b[0]; + 6'd1: o = |b[1:0]; + 6'd2: o = |b[2:0]; + 6'd3: o = |b[3:0]; + 6'd4: o = |b[4:0]; + 6'd5: o = |b[5:0]; + 6'd6: o = |b[6:0]; + 6'd7: o = |b[7:0]; + 6'd8: o = |b[8:0]; + 6'd9: o = |b[9:0]; + 6'd10: o = |b[10:0]; + 6'd11: o = |b[11:0]; + 6'd12: o = |b[12:0]; + 6'd13: o = |b[13:0]; + 6'd14: o = |b[14:0]; + 6'd15: o = |b[15:0]; + 6'd16: o = |b[16:0]; + 6'd17: o = |b[17:0]; + 6'd18: o = |b[18:0]; + 6'd19: o = |b[19:0]; + 6'd20: o = |b[20:0]; + 6'd21: o = |b[21:0]; + 6'd22: o = |b[22:0]; + 6'd23: o = |b[23:0]; + 6'd24: o = |b[24:0]; + 6'd25: o = |b[25:0]; + 6'd26: o = |b[26:0]; + 6'd27: o = |b[27:0]; + 6'd28: o = |b[28:0]; + 6'd29: o = |b[29:0]; + 6'd30: o = |b[30:0]; + 6'd31: o = |b[31:0]; + 6'd32: o = |b[32:0]; + 6'd33: o = |b[33:0]; + 6'd34: o = |b[34:0]; + 6'd35: o = |b[35:0]; + 6'd36: o = |b[36:0]; + 6'd37: o = |b[37:0]; + 6'd38: o = |b[38:0]; + 6'd39: o = |b[39:0]; + 6'd40: o = |b[40:0]; + 6'd41: o = |b[41:0]; + 6'd42: o = |b[42:0]; + 6'd43: o = |b[43:0]; + 6'd44: o = |b[44:0]; + 6'd45: o = |b[45:0]; + 6'd46: o = |b[46:0]; + 6'd47: o = |b[47:0]; + 6'd48: o = |b[48:0]; + 6'd49: o = |b[49:0]; + 6'd50: o = |b[50:0]; + 6'd51: o = |b[51:0]; + 6'd52: o = |b[52:0]; + 6'd53: o = |b[53:0]; + 6'd54: o = |b[54:0]; + 6'd55: o = |b[55:0]; + 6'd56: o = |b[56:0]; + 6'd57: o = |b[57:0]; + 6'd58: o = |b[58:0]; + 6'd59: o = |b[59:0]; + 6'd60: o = |b[60:0]; + 6'd61: o = |b[61:0]; + 6'd62: o = |b[62:0]; + 6'd63: o = |b[63:0]; + endcase + +endmodule Index: thor/trunk/FT64v7/rtl/lib/redor80.v =================================================================== --- thor/trunk/FT64v7/rtl/lib/redor80.v (nonexistent) +++ thor/trunk/FT64v7/rtl/lib/redor80.v (revision 60) @@ -0,0 +1,93 @@ +module redor80 +( + input [6:0] a, + input [79:0] b, + output reg o +); + + always @(a,b) + case (a) + 7'd0: o = b[0]; + 7'd1: o = |b[1:0]; + 7'd2: o = |b[2:0]; + 7'd3: o = |b[3:0]; + 7'd4: o = |b[4:0]; + 7'd5: o = |b[5:0]; + 7'd6: o = |b[6:0]; + 7'd7: o = |b[7:0]; + 7'd8: o = |b[8:0]; + 7'd9: o = |b[9:0]; + 7'd10: o = |b[10:0]; + 7'd11: o = |b[11:0]; + 7'd12: o = |b[12:0]; + 7'd13: o = |b[13:0]; + 7'd14: o = |b[14:0]; + 7'd15: o = |b[15:0]; + 7'd16: o = |b[16:0]; + 7'd17: o = |b[17:0]; + 7'd18: o = |b[18:0]; + 7'd19: o = |b[19:0]; + 7'd20: o = |b[20:0]; + 7'd21: o = |b[21:0]; + 7'd22: o = |b[22:0]; + 7'd23: o = |b[23:0]; + 7'd24: o = |b[24:0]; + 7'd25: o = |b[25:0]; + 7'd26: o = |b[26:0]; + 7'd27: o = |b[27:0]; + 7'd28: o = |b[28:0]; + 7'd29: o = |b[29:0]; + 7'd30: o = |b[30:0]; + 7'd31: o = |b[31:0]; + 7'd32: o = |b[32:0]; + 7'd33: o = |b[33:0]; + 7'd34: o = |b[34:0]; + 7'd35: o = |b[35:0]; + 7'd36: o = |b[36:0]; + 7'd37: o = |b[37:0]; + 7'd38: o = |b[38:0]; + 7'd39: o = |b[39:0]; + 7'd40: o = |b[40:0]; + 7'd41: o = |b[41:0]; + 7'd42: o = |b[42:0]; + 7'd43: o = |b[43:0]; + 7'd44: o = |b[44:0]; + 7'd45: o = |b[45:0]; + 7'd46: o = |b[46:0]; + 7'd47: o = |b[47:0]; + 7'd48: o = |b[48:0]; + 7'd49: o = |b[49:0]; + 7'd50: o = |b[50:0]; + 7'd51: o = |b[51:0]; + 7'd52: o = |b[52:0]; + 7'd53: o = |b[53:0]; + 7'd54: o = |b[54:0]; + 7'd55: o = |b[55:0]; + 7'd56: o = |b[56:0]; + 7'd57: o = |b[57:0]; + 7'd58: o = |b[58:0]; + 7'd59: o = |b[59:0]; + 7'd60: o = |b[60:0]; + 7'd61: o = |b[61:0]; + 7'd62: o = |b[62:0]; + 7'd63: o = |b[63:0]; + + 7'd64: o = |b[64:0]; + 7'd65: o = |b[65:0]; + 7'd66: o = |b[66:0]; + 7'd67: o = |b[67:0]; + 7'd68: o = |b[68:0]; + 7'd69: o = |b[69:0]; + 7'd70: o = |b[70:0]; + 7'd71: o = |b[71:0]; + 7'd72: o = |b[72:0]; + 7'd73: o = |b[73:0]; + 7'd74: o = |b[74:0]; + 7'd75: o = |b[75:0]; + 7'd76: o = |b[76:0]; + 7'd77: o = |b[77:0]; + 7'd78: o = |b[78:0]; + 7'd79: o = |b[79:0]; + endcase + +endmodule Index: thor/trunk/FT64v7/rtl/lib/redor96.v =================================================================== --- thor/trunk/FT64v7/rtl/lib/redor96.v (nonexistent) +++ thor/trunk/FT64v7/rtl/lib/redor96.v (revision 60) @@ -0,0 +1,109 @@ +module redor96 +( + input [6:0] a, + input [95:0] b, + output reg o +); + + always @(a,b) + case (a) + 7'd0: o = b[0]; + 7'd1: o = |b[1:0]; + 7'd2: o = |b[2:0]; + 7'd3: o = |b[3:0]; + 7'd4: o = |b[4:0]; + 7'd5: o = |b[5:0]; + 7'd6: o = |b[6:0]; + 7'd7: o = |b[7:0]; + 7'd8: o = |b[8:0]; + 7'd9: o = |b[9:0]; + 7'd10: o = |b[10:0]; + 7'd11: o = |b[11:0]; + 7'd12: o = |b[12:0]; + 7'd13: o = |b[13:0]; + 7'd14: o = |b[14:0]; + 7'd15: o = |b[15:0]; + 7'd16: o = |b[16:0]; + 7'd17: o = |b[17:0]; + 7'd18: o = |b[18:0]; + 7'd19: o = |b[19:0]; + 7'd20: o = |b[20:0]; + 7'd21: o = |b[21:0]; + 7'd22: o = |b[22:0]; + 7'd23: o = |b[23:0]; + 7'd24: o = |b[24:0]; + 7'd25: o = |b[25:0]; + 7'd26: o = |b[26:0]; + 7'd27: o = |b[27:0]; + 7'd28: o = |b[28:0]; + 7'd29: o = |b[29:0]; + 7'd30: o = |b[30:0]; + 7'd31: o = |b[31:0]; + 7'd32: o = |b[32:0]; + 7'd33: o = |b[33:0]; + 7'd34: o = |b[34:0]; + 7'd35: o = |b[35:0]; + 7'd36: o = |b[36:0]; + 7'd37: o = |b[37:0]; + 7'd38: o = |b[38:0]; + 7'd39: o = |b[39:0]; + 7'd40: o = |b[40:0]; + 7'd41: o = |b[41:0]; + 7'd42: o = |b[42:0]; + 7'd43: o = |b[43:0]; + 7'd44: o = |b[44:0]; + 7'd45: o = |b[45:0]; + 7'd46: o = |b[46:0]; + 7'd47: o = |b[47:0]; + 7'd48: o = |b[48:0]; + 7'd49: o = |b[49:0]; + 7'd50: o = |b[50:0]; + 7'd51: o = |b[51:0]; + 7'd52: o = |b[52:0]; + 7'd53: o = |b[53:0]; + 7'd54: o = |b[54:0]; + 7'd55: o = |b[55:0]; + 7'd56: o = |b[56:0]; + 7'd57: o = |b[57:0]; + 7'd58: o = |b[58:0]; + 7'd59: o = |b[59:0]; + 7'd60: o = |b[60:0]; + 7'd61: o = |b[61:0]; + 7'd62: o = |b[62:0]; + 7'd63: o = |b[63:0]; + + 7'd64: o = |b[64:0]; + 7'd65: o = |b[65:0]; + 7'd66: o = |b[66:0]; + 7'd67: o = |b[67:0]; + 7'd68: o = |b[68:0]; + 7'd69: o = |b[69:0]; + 7'd70: o = |b[70:0]; + 7'd71: o = |b[71:0]; + 7'd72: o = |b[72:0]; + 7'd73: o = |b[73:0]; + 7'd74: o = |b[74:0]; + 7'd75: o = |b[75:0]; + 7'd76: o = |b[76:0]; + 7'd77: o = |b[77:0]; + 7'd78: o = |b[78:0]; + 7'd79: o = |b[79:0]; + 7'd80: o = |b[80:0]; + 7'd81: o = |b[81:0]; + 7'd82: o = |b[82:0]; + 7'd83: o = |b[83:0]; + 7'd84: o = |b[84:0]; + 7'd85: o = |b[85:0]; + 7'd86: o = |b[86:0]; + 7'd87: o = |b[87:0]; + 7'd88: o = |b[88:0]; + 7'd89: o = |b[89:0]; + 7'd90: o = |b[90:0]; + 7'd91: o = |b[91:0]; + 7'd92: o = |b[92:0]; + 7'd93: o = |b[93:0]; + 7'd94: o = |b[94:0]; + 7'd95: o = |b[95:0]; + endcase + +endmodule Index: thor/trunk/FT64v7/rtl/lib/round_robin.v =================================================================== --- thor/trunk/FT64v7/rtl/lib/round_robin.v (nonexistent) +++ thor/trunk/FT64v7/rtl/lib/round_robin.v (revision 60) @@ -0,0 +1,85 @@ +// ============================================================================ +// __ +// \\__/ o\ (C) 2005-2018 Robert Finch, Waterloo +// \ __ / All rights reserved. +// \/_// robfinch@finitron.ca +// || +// +// This source file is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This source file is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// +// ============================================================================ +// +module round_robin(rst, clk, ce, req, lock, sel); +parameter N=12; +localparam B=$clog2(N); +input rst; // reset +input clk; // clock +input ce; // clock enable +input [N-1:0] req; // request +input [N-1:0] lock; // lock selection +output [N-1:0] sel; // select + +integer n; +reg [N-1:0] sel; + +reg [B-1:0] rot; // forward rotate applied to request lines +reg [B-1:0] amt; // how much to rotate forward after a grant +reg [N-1:0] rgrnt; // rotated value of grant +wire [N-1:0] nextGrant; // unrotated value of grant +wire [N-1:0] rr1; // rotated request imtermediate +wire [N-1:0] ng1; // intermediate grant rotation +wire [N-1:0] rreq; // rotated request + +// rotate the request lines to set priority +wire [2*N-1:0] rreq1 = {req,{N{1'b0}}} >> rot; +assign rreq = rreq1[2*N-1:N]|rreq1[N-1:0]; + +// rotate the rotated grant value back into place +wire [2*N-1:0] rgnt1 = {{N{1'b0}},rgrnt} << rot; +assign nextGrant = rgnt1[2*N-1:N]|rgnt1[N-1:0]; + +// If there is a request, determine how far the request +// lines should be rotated when there is a grant +always @* +begin + amt <= 0; + for (n = N-1; n >= 0; n = n - 1) + if (rreq[n]) + amt <= n; +end + +// set grant (if request present) based on which request +// was honored. +always @* + rgrnt <= {{N{1'b0}},|rreq} << ((amt-1) % N); + +// rotate the priorities on a grant +always @(posedge clk) +if (rst) + rot = 0; +else if (ce) + if (!(lock & sel)) + rot = rot + amt; + +// Assign the next owner, if bus isn't locked +always @(posedge clk) +if (rst) + sel = 0; +else if (ce) + if (!(lock & sel)) + sel = nextGrant; + +endmodule + + Index: thor/trunk/FT64v7/rtl/lib/vtdl.v =================================================================== --- thor/trunk/FT64v7/rtl/lib/vtdl.v (nonexistent) +++ thor/trunk/FT64v7/rtl/lib/vtdl.v (revision 60) @@ -0,0 +1,59 @@ +//============================================================================= +// (C) 2007,2012 Robert Finch, Stratford +// robfinch@opencores.org +// +// +// vtdl - variable tap delay line +// (dynamic shift register) +// +// +// +// This source file is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This source file is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// +// +// Notes: +// +// This module acts like a clocked delay line with a variable tap. +// Miscellaneous usage in rate control circuitry such as fifo's. +// Capable of delaying a signal bus. +// Signal bus width is specified with the WID parameter. +// +// Verilog 1995 +// Ref: Webpack9.1i xc3s1000-4ft256 +// 4 slices / 8 LUTs / < 10ns +//============================================================================= +// +module vtdl(clk, ce, a, d, q); +parameter WID = 8; +parameter DEP = 16; +localparam AMSB = DEP>64?6:DEP>32?5:DEP>16?4:DEP>8?3:DEP>4?2:DEP>2?1:0; +input clk; +input ce; +input [AMSB:0] a; +input [WID-1:0] d; +output [WID-1:0] q; + +reg [WID-1:0] m [DEP-1:0]; +integer n; + +always @(posedge clk) + if (ce) begin + for (n = 1; n < DEP; n = n + 1) + m[n] <= m[n-1]; + m[0] <= d; + end + +assign q = m[a]; + +endmodule Index: thor/trunk/FT64v7/rtl/twoway/FT64.v =================================================================== --- thor/trunk/FT64v7/rtl/twoway/FT64.v (nonexistent) +++ thor/trunk/FT64v7/rtl/twoway/FT64.v (revision 60) @@ -0,0 +1,10642 @@ +// ============================================================================ +// __ +// \\__/ o\ (C) 2017-2018 Robert Finch, Waterloo +// \ __ / All rights reserved. +// \/_// robfinch@finitron.ca +// || +// +// FT64.v +// Features include: +// - 16/32/48 bit instructions +// - vector instruction set, +// - SIMD instructions +// - data width of 64 bits +// - 32 general purpose registers +// - 32 floating point registers +// - 32 vector registers, length 63 +// - powerful branch prediction +// - branch target buffer (BTB) +// - return address predictor (RSB) +// - bus interface unit +// - instruction and data caches +// - fine-grained simultaneous multi-threading (SMT) +// - bus randomizer on exceptional conditions +// +// This source file is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This source file is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// +// Approx 41,000 LUTs. 66,000 LC's. +// ============================================================================ +// +`include "FT64_config.vh" +`include "FT64_defines.vh" + +module FT64(hartid, rst, clk_i, clk4x, tm_clk_i, irq_i, vec_i, bte_o, cti_o, bok_i, cyc_o, stb_o, ack_i, err_i, we_o, sel_o, adr_o, dat_o, dat_i, + ol_o, pcr_o, pcr2_o, pkeys_o, icl_o, sr_o, cr_o, rbi_i, signal_i); +input [63:0] hartid; +input rst; +input clk_i; +input clk4x; +input tm_clk_i; +input [3:0] irq_i; +input [7:0] vec_i; +output reg [1:0] bte_o; +output reg [2:0] cti_o; +input bok_i; +output cyc_o; +output reg stb_o; +input ack_i; +input err_i; +output we_o; +output reg [7:0] sel_o; +output [`ABITS] adr_o; +output reg [63:0] dat_o; +input [63:0] dat_i; +output reg [1:0] ol_o; +output [31:0] pcr_o; +output [63:0] pcr2_o; +output [63:0] pkeys_o; +output reg icl_o; +output reg cr_o; +output reg sr_o; +input rbi_i; +input [31:0] signal_i; + +parameter TM_CLKFREQ = 20000000; +parameter QENTRIES = `QENTRIES; +parameter RSTPC = 64'hFFFFFFFFFFFC0100; +parameter BRKPC = 64'hFFFFFFFFFFFC0000; +`ifdef SUPPORT_SMT +parameter PREGS = 256; // number of physical registers - 1 +parameter AREGS = 256; // number of architectural registers +`else +parameter PREGS = 128; +parameter AREGS = 128; +`endif +parameter RBIT = 11; +parameter DEBUG = 1'b0; +parameter NMAP = QENTRIES; +parameter BRANCH_PRED = 1'b0; +parameter SUP_TXE = 1'b0; +`ifdef SUPPORT_VECTOR +parameter SUP_VECTOR = 1'b1; +`else +parameter SUP_VECTOR = 1'b0; +`endif +parameter DBW = 64; +parameter ABW = 64; +parameter AMSB = ABW-1; +parameter NTHREAD = 1; +reg [7:0] i; +integer n; +integer j, k; +genvar g, h; +parameter TRUE = 1'b1; +parameter FALSE = 1'b0; +// Memory access sizes +parameter byt = 3'd0; +parameter wyde = 3'd1; +parameter tetra = 3'd2; +parameter octa = 3'd3; +// IQ states +parameter IQS_INVALID = 3'd0; +parameter IQS_QUEUED = 3'd1; +parameter IQS_OUT = 3'd2; +parameter IQS_AGEN = 3'd3; +parameter IQS_MEM = 3'd4; +parameter IQS_DONE = 3'd5; +parameter IQS_CMT = 3'd6; + +wire clk; +//BUFG uclkb1 +//( +// .I(clk_i), +// .O(clk) +//); +assign clk = clk_i; + +wire exv_i; +wire rdv_i; +wire wrv_i; +reg [ABW-1:0] vadr; +reg cyc; +reg we; + +wire dc_ack; +wire acki = ack_i|dc_ack; +wire [RBIT:0] Ra0, Ra1, Ra2; +wire [RBIT:0] Rb0, Rb1, Rb2; +wire [RBIT:0] Rc0, Rc1, Rc2; +wire [RBIT:0] Rt0, Rt1, Rt2; +wire [63:0] rfoa0,rfob0,rfoc0,rfoc0a,rfot0; +wire [63:0] rfoa1,rfob1,rfoc1,rfoc1a,rfot1; +wire [63:0] rfoa2,rfob2,rfoc2,rfoc2a,rfot2; +`ifdef SUPPORT_SMT +wire [7:0] Ra0s = {Ra0[7:0]}; +wire [7:0] Ra1s = {Ra1[7:0]}; +wire [7:0] Ra2s = {Ra2[7:0]}; +wire [7:0] Rb0s = {Rb0[7:0]}; +wire [7:0] Rb1s = {Rb1[7:0]}; +wire [7:0] Rb2s = {Rb2[7:0]}; +wire [7:0] Rc0s = {Rc0[7:0]}; +wire [7:0] Rc1s = {Rc1[7:0]}; +wire [7:0] Rc2s = {Rc2[7:0]}; +wire [7:0] Rt0s = {Rt0[7:0]}; +wire [7:0] Rt1s = {Rt1[7:0]}; +wire [7:0] Rt2s = {Rt2[7:0]}; +`else +wire [6:0] Ra0s = {Ra0[7],Ra0[5:0]}; +wire [6:0] Ra1s = {Ra1[7],Ra1[5:0]}; +wire [6:0] Ra2s = {Ra2[7],Ra2[5:0]}; +wire [6:0] Rb0s = {Rb0[7],Rb0[5:0]}; +wire [6:0] Rb1s = {Rb1[7],Rb1[5:0]}; +wire [6:0] Rb2s = {Rb2[7],Rb2[5:0]}; +wire [6:0] Rc0s = {Rc0[7],Rc0[5:0]}; +wire [6:0] Rc1s = {Rc1[7],Rc1[5:0]}; +wire [6:0] Rc2s = {Rc2[7],Rc2[5:0]}; +wire [6:0] Rt0s = {Rt0[7],Rt0[5:0]}; +wire [6:0] Rt1s = {Rt1[7],Rt1[5:0]}; +wire [6:0] Rt2s = {Rt2[7],Rt2[5:0]}; +/* +wire [5:0] Ra0s = {Ra0[5:0]}; +wire [5:0] Ra1s = {Ra1[5:0]}; +wire [5:0] Rb0s = {Rb0[5:0]}; +wire [5:0] Rb1s = {Rb1[5:0]}; +wire [5:0] Rc0s = {Rc0[5:0]}; +wire [5:0] Rc1s = {Rc1[5:0]}; +wire [5:0] Rt0s = {Rt0[5:0]}; +wire [5:0] Rt1s = {Rt1[5:0]}; +*/ +`endif + +`ifdef SUPPORT_PREDICATION +reg [3:0] pregs [0:1023]; +`endif + +reg [63:0] wbrcd; +wire [5:0] brgs; +`ifdef SUPPORT_SEGMENTATION +reg [23:0] currentCSSelector; +reg [63:0] zs_base [0:63]; +reg [63:0] ds_base [0:63]; +reg [63:0] es_base [0:63]; +reg [63:0] fs_base [0:63]; +reg [63:0] gs_base [0:63]; +reg [63:0] hs_base [0:63]; +reg [63:0] ss_base [0:63]; +reg [63:0] cs_base [0:63]; +reg [63:0] zsx_base; +reg [63:0] dsx_base; +reg [63:0] esx_base; +reg [63:0] fsx_base; +reg [63:0] gsx_base; +reg [63:0] hsx_base; +reg [63:0] ssx_base; +reg [63:0] csx_base; +reg [63:0] zs_lb [0:63]; +reg [63:0] ds_lb [0:63]; +reg [63:0] es_lb [0:63]; +reg [63:0] fs_lb [0:63]; +reg [63:0] gs_lb [0:63]; +reg [63:0] hs_lb [0:63]; +reg [63:0] ss_lb [0:63]; +reg [63:0] cs_lb [0:63]; +reg [63:0] zslb; +reg [63:0] dslb; +reg [63:0] eslb; +reg [63:0] fslb; +reg [63:0] gslb; +reg [63:0] hslb; +reg [63:0] sslb; +reg [63:0] cslb; +reg [63:0] zs_ub [0:63]; +reg [63:0] ds_ub [0:63]; +reg [63:0] es_ub [0:63]; +reg [63:0] fs_ub [0:63]; +reg [63:0] gs_ub [0:63]; +reg [63:0] hs_ub [0:63]; +reg [63:0] ss_ub [0:63]; +reg [63:0] cs_ub [0:63]; +reg [63:0] zsub; +reg [63:0] dsub; +reg [63:0] esub; +reg [63:0] fsub; +reg [63:0] gsub; +reg [63:0] hsub; +reg [63:0] ssub; +reg [63:0] csub; +reg [23:0] zs_sel [0:63]; +reg [23:0] ds_sel [0:63]; +reg [23:0] es_sel [0:63]; +reg [23:0] fs_sel [0:63]; +reg [23:0] gs_sel [0:63]; +reg [23:0] hs_sel [0:63]; +reg [23:0] ss_sel [0:63]; +reg [23:0] cs_sel [0:63]; +reg [15:0] zs_acr [0:63]; +reg [15:0] ds_acr [0:63]; +reg [15:0] es_acr [0:63]; +reg [15:0] fs_acr [0:63]; +reg [15:0] gs_acr [0:63]; +reg [15:0] hs_acr [0:63]; +reg [15:0] ss_acr [0:63]; +reg [15:0] cs_acr [0:63]; +initial begin + for (n = 0; n < 64; n = n + 1) begin + zs_base[n] <= 64'h0; + ds_base[n] <= 64'h0; + es_base[n] <= 64'h0; + fs_base[n] <= 64'h0; + gs_base[n] <= 64'h0; + hs_base[n] <= 64'h0; + ss_base[n] <= 64'h0; + cs_base[n] <= 64'h0; + zs_lb[n] <= 64'h0; + ds_lb[n] <= 64'h0; + es_lb[n] <= 64'h0; + fs_lb[n] <= 64'h0; + gs_lb[n] <= 64'h0; + hs_lb[n] <= 64'h0; + ss_lb[n] <= 64'h0; + cs_lb[n] <= 64'h0; + zs_ub[n] <= 64'hFFFFFFFFFFFFFFFF; + ds_ub[n] <= 64'hFFFFFFFFFFFFFFFF; + es_ub[n] <= 64'hFFFFFFFFFFFFFFFF; + fs_ub[n] <= 64'hFFFFFFFFFFFFFFFF; + gs_ub[n] <= 64'hFFFFFFFFFFFFFFFF; + hs_ub[n] <= 64'hFFFFFFFFFFFFFFFF; + ss_ub[n] <= 64'hFFFFFFFFFFFFFFFF; + cs_ub[n] <= 64'hFFFFFFFFFFFFFFFF; + zs_sel[n] <= 24'h0; + ds_sel[n] <= 24'h0; + es_sel[n] <= 24'h0; + fs_sel[n] <= 24'h0; + gs_sel[n] <= 24'h0; + hs_sel[n] <= 24'h0; + ss_sel[n] <= 24'h0; + cs_sel[n] <= 24'h0; + zs_acr[n] <= 16'h8000; + ds_acr[n] <= 16'h9200; + es_acr[n] <= 16'h8000; + fs_acr[n] <= 16'h8000; + gs_acr[n] <= 16'h8000; + hs_acr[n] <= 16'h8000; + ss_acr[n] <= 16'h9600; + cs_acr[n] <= 16'h9A00; + end +end +always @(posedge clk_i) +begin + zsx_base <= zs_base[brgs]; + dsx_base <= ds_base[brgs]; + esx_base <= es_base[brgs]; + fsx_base <= fs_base[brgs]; + gsx_base <= gs_base[brgs]; + hsx_base <= hs_base[brgs]; + ssx_base <= ss_base[brgs]; + csx_base <= cs_base[brgs]; + zsub <= zs_ub[brgs]; + dsub <= ds_ub[brgs]; + esub <= es_ub[brgs]; + fsub <= fs_ub[brgs]; + gsub <= gs_ub[brgs]; + hsub <= hs_ub[brgs]; + ssub <= ss_ub[brgs]; + csub <= cs_ub[brgs]; + zslb <= zs_lb[brgs]; + dslb <= ds_lb[brgs]; + eslb <= es_lb[brgs]; + fslb <= fs_lb[brgs]; + gslb <= gs_lb[brgs]; + hslb <= hs_lb[brgs]; + sslb <= ss_lb[brgs]; + cslb <= cs_lb[brgs]; + currentCSSelector <= cs_sel[brgs]; +end +`endif +`ifdef SUPPORT_BBMS +reg [15:0] thrd_handle [0:63]; +reg [63:0] prg_base [0:63]; +reg [63:0] prg_limit [0:63]; +reg [63:0] en_barrier [0:63]; // environment bound +reg [63:0] cl_barrier [0:63]; +reg [63:0] cu_barrier [0:63]; +reg [63:0] ro_barrier [0:63]; +reg [63:0] dl_barrier [0:63]; +reg [63:0] du_barrier [0:63]; +reg [63:0] sl_barrier [0:63]; +reg [63:0] su_barrier [0:63]; +reg [7:0] env_priv [0:63]; +reg [7:0] cod_priv [0:63]; +reg [7:0] rdo_priv [0:63]; +reg [7:0] dat_priv [0:63]; +reg [7:0] stk_priv [0:63]; +reg [15:0] th; +reg [63:0] pb; +reg [63:0] cbl; +reg [63:0] cbu; +reg [63:0] ro; +reg [63:0] dbl; +reg [63:0] dbu; +reg [63:0] sbl; +reg [63:0] sbu; +reg [63:0] en; +reg [7:0] env_pl; +reg [7:0] cod_pl; +reg [7:0] rdo_pl; +reg [7:0] dat_pl; +reg [7:0] stk_pl; +initial begin + for (n = 0; n < 64; n = n + 1) + begin + thrd_handle[n] <= 1'd0; + prg_base[n] <= 1'd0; + cl_barrier[n] <= 1'd0; + cu_barrier[n] <= 64'hFFFFFFFFFFFFFFFF; + ro_barrier[n] <= 1'd0; + dl_barrier[n] <= 1'd0; + du_barrier[n] <= 64'hFFFFFFFFFFFFFFFF; + sl_barrier[n] <= 1'd0; + su_barrier[n] <= 64'hFFFFFFFFFFFFFFFF; + env_priv[n] <= 8'h00; + cod_priv[n] <= 8'h00; + rdo_priv[n] <= 8'h00; + dat_priv[n] <= 8'h00; + stk_priv[n] <= 8'h00; + end +end +always @(posedge clk_i) +begin + th <= thrd_handle[brgs]; + pb <= prg_base[brgs]; + cbl <= cl_barrier[brgs]; + cbu <= cu_barrier[brgs]; + ro <= ro_barrier[brgs]; + dbl <= dl_barrier[brgs]; + dbu <= du_barrier[brgs]; + sbl <= sl_barrier[brgs]; + sbu <= su_barrier[brgs]; + en <= en_barrier[brgs]; + env_pl <= env_priv[brgs]; + cod_pl <= cod_priv[brgs]; + rdo_pl <= rdo_priv[brgs]; + dat_pl <= dat_priv[brgs]; + stk_pl <= stk_priv[brgs]; +end +//wire [23:0] currentPrgSelector = prg_selector[brgs]; +`else +wire [63:0] pb = 1'd0; +wire [63:0] cbl = 1'd0; +wire [63:0] cbu = 64'hFFFFFFFFFFFFFFFF; +wire [63:0] ro = 1'd0; +wire [63:0] dbl = 1'd0; +wire [63:0] dbu = 64'hFFFFFFFFFFFFFFFF; +wire [63:0] sbl = 1'd0; +wire [63:0] sbu = 64'hFFFFFFFFFFFFFFFF; +wire [63:0] en = 1'd0; +wire [7:0] env_pl = 8'h00; +wire [7:0] cod_pl = 8'h00; +wire [7:0] rdo_pl = 8'h00; +wire [7:0] dat_pl = 8'h00; +wire [7:0] stk_pl = 8'h00; +`endif + +reg [PREGS-1:0] rf_v; +reg [`QBITSP1] rf_source[0:AREGS-1]; +reg [15:0] prf_v; +reg [`QBITSP1] prf_source[0:15]; +initial begin +for (n = 0; n < AREGS; n = n + 1) + rf_source[n] = 1'b0; +for (n = 0; n < 16; n = n + 1) + prf_source[n] <= 1'b0; +end +wire [`ABITS] pc0a; +wire [`ABITS] pc1a; +wire [`ABITS] pc2a; +`ifdef SUPPORT_SEGMENTATION +wire [`ABITS] pc0 = (pc0a[47:40]==8'hFF||ol==2'b00) ? pc0a : {csx_base[50:0],13'd0} + pc0a[47:0]; +wire [`ABITS] pc1 = (pc1a[47:40]==8'hFF||ol==2'b00) ? pc1a : {csx_base[50:0],13'd0} + pc1a[47:0]; +wire [`ABITS] pc2 = (pc2a[47:40]==8'hFF||ol==2'b00) ? pc2a : {csx_base[50:0],13'd0} + pc2a[47:0]; +`else +`ifdef SUPPORT_BBMS +wire [`ABITS] pc0 = (pc0a[47:40]==8'hFF||ol==2'b00) ? pc0a : {pb[50:0],13'd0} + pc0a[47:0]; +wire [`ABITS] pc1 = (pc1a[47:40]==8'hFF||ol==2'b00) ? pc1a : {pb[50:0],13'd0} + pc1a[47:0]; +wire [`ABITS] pc2 = (pc2a[47:40]==8'hFF||ol==2'b00) ? pc2a : {pb[50:0],13'd0} + pc2a[47:0]; +`else +wire [`ABITS] pc0 = pc0a; +wire [`ABITS] pc1 = pc1a; +wire [`ABITS] pc2 = pc2a; +`endif +`endif + +reg excmiss; +reg [`ABITS] excmisspc; +reg excthrd; +reg exception_set; +reg rdvq; // accumulated read violation +reg errq; // accumulated err_i input status +reg exvq; + +// Vector +reg [5:0] vqe0, vqe1, vqe2; // vector element being queued +reg [5:0] vqet0, vqet1, vqet2; +reg [7:0] vl; // vector length +reg [63:0] vm [0:7]; // vector mask registers +reg [1:0] m2; + +reg [31:0] wb_merges; +// CSR's +reg [63:0] cr0; +wire snr = cr0[17]; // sequence number reset +wire dce = cr0[30]; // data cache enable +wire bpe = cr0[32]; // branch predictor enable +wire wbm = cr0[34]; +wire sple = cr0[35]; // speculative load enable +wire ctgtxe = cr0[33]; +`ifdef SUPPORT_PREDICATION +wire pred_on = cr0[36]; // predicated execution mode on +`else +wire pred_on = 1'b0; +`endif +reg [63:0] pmr; +wire id1_available = pmr[0]; +wire id2_available = pmr[1]; +wire id3_available = pmr[2]; +wire alu0_available = pmr[8]; +wire alu1_available = pmr[9]; +wire fpu1_available = pmr[16]; +wire fpu2_available = pmr[17]; +wire mem1_available = pmr[24]; +wire mem2_available = pmr[25]; +wire mem3_available = pmr[26]; +wire fcu_available = pmr[32]; +// Simply setting this flag to zero should strip out almost all the logic +// associated SMT. +`ifdef SUPPORT_SMT +wire thread_en = cr0[16]; +`else +wire thread_en = 1'b0; +`endif +wire vechain = cr0[18]; +reg [39:0] iq_ctr; +reg [39:0] irq_ctr; // count of number of interrupts +reg [39:0] bm_ctr; // branch miss counter +reg [39:0] icl_ctr; // instruction cache load counter + +reg [7:0] fcu_timeout; +reg [63:0] tick; +reg [63:0] wc_time; +reg [31:0] pcr; +reg [63:0] pcr2; +assign pcr_o = pcr; +assign pcr2_o = pcr2; +reg [63:0] aec; +reg [15:0] cause[0:15]; +`ifdef SUPPORT_SMT +reg [`ABITS] epc [0:NTHREAD]; +reg [`ABITS] epc0 [0:NTHREAD]; +reg [`ABITS] epc1 [0:NTHREAD]; +reg [`ABITS] epc2 [0:NTHREAD]; +reg [`ABITS] epc3 [0:NTHREAD]; +reg [`ABITS] epc4 [0:NTHREAD]; +reg [`ABITS] epc5 [0:NTHREAD]; +reg [`ABITS] epc6 [0:NTHREAD]; +reg [`ABITS] epc7 [0:NTHREAD]; +reg [`ABITS] epc8 [0:NTHREAD]; // exception pc and stack +reg [63:0] mstatus [0:NTHREAD]; // machine status +wire [3:0] im = mstatus[0][3:0]; +wire [1:0] ol [0:NTHREAD]; +wire [1:0] dl [0:NTHREAD]; +assign ol[0] = mstatus[0][5:4]; // operating level +assign dl[0] = mstatus[0][21:20]; +wire [7:0] cpl [0:NTHREAD]; +assign cpl[0] = mstatus[0][13:6]; // current privilege level +wire [5:0] rgs [0:NTHREAD]; +assign ol[1] = mstatus[1][5:4]; // operating level +assign cpl[1] = mstatus[1][13:6]; // current privilege level +assign dl[1] = mstatus[1][21:20]; +wire [7:0] ASID = mstatus[0][47:40]; +reg [15:0] ol_stack [0:NTHREAD]; +reg [15:0] dl_stack [0:NTHREAD]; +reg [31:0] im_stack [0:NTHREAD]; +reg [63:0] pl_stack [0:NTHREAD]; +reg [63:0] rs_stack [0:NTHREAD]; +reg [63:0] brs_stack [0:NTHREAD]; +reg [63:0] fr_stack [0:NTHREAD]; +assign rgs[0] = rs_stack[0][5:0]; +assign rgs[1] = rs_stack[1][5:0]; +wire mprv = mstatus[0][55]; +wire [5:0] fprgs = mstatus[0][25:20]; +//assign ol_o = mprv ? ol_stack[0][2:0] : ol[0]; +wire vca = mstatus[0][32]; // vector chaining active +`else +reg [`ABITS] epc ; +reg [`ABITS] epc0 ; +reg [`ABITS] epc1 ; +reg [`ABITS] epc2 ; +reg [`ABITS] epc3 ; +reg [`ABITS] epc4 ; +reg [`ABITS] epc5 ; +reg [`ABITS] epc6 ; +reg [`ABITS] epc7 ; +reg [`ABITS] epc8 ; // exception pc and stack +reg [63:0] mstatus ; // machine status +wire [3:0] im = mstatus[3:0]; +wire [1:0] ol ; +wire [1:0] dl; +assign ol = mstatus[5:4]; // operating level +assign dl = mstatus[21:20]; +wire [7:0] cpl ; +assign cpl = mstatus[13:6]; // current privilege level +wire [5:0] rgs ; +reg [15:0] ol_stack ; +reg [15:0] dl_stack ; +reg [31:0] im_stack ; +reg [63:0] pl_stack ; +reg [63:0] rs_stack ; +reg [63:0] brs_stack ; +reg [63:0] fr_stack ; +assign rgs = rs_stack[5:0]; +assign brgs = brs_stack[5:0]; +wire mprv = mstatus[55]; +wire [7:0] ASID = mstatus[47:40]; +wire [5:0] fprgs = mstatus[25:20]; +//assign ol_o = mprv ? ol_stack[2:0] : ol; +wire vca = mstatus[32]; // vector chaining active +`endif +reg [63:0] keys; +assign pkeys_o = keys; +reg [63:0] tcb; +reg [47:0] bad_instr[0:15]; +reg [`ABITS] badaddr[0:15]; +reg [`ABITS] tvec[0:7]; +reg [63:0] sema; +reg [63:0] vm_sema; +reg [63:0] cas; // compare and swap +reg [63:0] ve_hold; +reg isCAS, isAMO, isInc, isSpt, isRMW; +reg [`QBITS] casid; +reg [4:0] regLR = 5'd29; + + +reg [2:0] fp_rm; +reg fp_inexe; +reg fp_dbzxe; +reg fp_underxe; +reg fp_overxe; +reg fp_invopxe; +reg fp_giopxe; +reg fp_nsfp = 1'b0; +reg fp_fractie; +reg fp_raz; + +reg fp_neg; +reg fp_pos; +reg fp_zero; +reg fp_inf; + +reg fp_inex; // inexact exception +reg fp_dbzx; // divide by zero exception +reg fp_underx; // underflow exception +reg fp_overx; // overflow exception +reg fp_giopx; // global invalid operation exception +reg fp_sx; // summary exception +reg fp_swtx; // software triggered exception +reg fp_gx; +reg fp_invopx; + +reg fp_infzerox; +reg fp_zerozerox; +reg fp_subinfx; +reg fp_infdivx; +reg fp_NaNCmpx; +reg fp_cvtx; +reg fp_sqrtx; +reg fp_snanx; + +wire [31:0] fp_status = { + + fp_rm, + fp_inexe, + fp_dbzxe, + fp_underxe, + fp_overxe, + fp_invopxe, + fp_nsfp, + + fp_fractie, + fp_raz, + 1'b0, + fp_neg, + fp_pos, + fp_zero, + fp_inf, + + fp_swtx, + fp_inex, + fp_dbzx, + fp_underx, + fp_overx, + fp_giopx, + fp_gx, + fp_sx, + + fp_cvtx, + fp_sqrtx, + fp_NaNCmpx, + fp_infzerox, + fp_zerozerox, + fp_infdivx, + fp_subinfx, + fp_snanx + }; + +reg [63:0] fpu_csr; +wire [5:0] fp_rgs = fpu_csr[37:32]; + +//reg [25:0] m[0:8191]; +reg [3:0] panic; // indexes the message structure +reg [128:0] message [0:15]; // indexed by panic + +wire int_commit; +reg StatusHWI; +(* mark_debug = "true" *) +reg [55:0] insn0, insn1, insn2; +wire [55:0] insn0a, insn1b, insn2b; +reg [55:0] insn1a, insn2a; +// Only need enough bits in the seqnence number to cover the instructions in +// the queue plus an extra count for skipping on branch misses. In this case +// that would be four bits minimum (count 0 to 8). +wire [63:0] rdat0,rdat1,rdat2; +reg [63:0] xdati; + +reg canq1, canq2, canq3; +(* mark_debug = "true" *) +reg queued1; +reg queued2; +reg queued3; +(* mark_debug = "true" *) +reg queuedNop; + +reg [47:0] codebuf[0:63]; +reg [QENTRIES-1:0] setpred; + +// instruction queue (ROB) +// State and stqte decodes +reg [2:0] iqentry_state [0:QENTRIES-1]; +reg [QENTRIES-1:0] iqentry_v; // entry valid? -- this should be the first bit +reg [QENTRIES-1:0] iqentry_done; +reg [QENTRIES-1:0] iqentry_out; +reg [QENTRIES-1:0] iqentry_agen; +reg [`SNBITS] iqentry_sn [0:QENTRIES-1]; // instruction sequence number +reg [QENTRIES-1:0] iqentry_iv; // instruction is valid +reg [`QBITSP1] iqentry_is [0:QENTRIES-1]; // source of instruction +reg [QENTRIES-1:0] iqentry_thrd; // which thread the instruction is in +reg [QENTRIES-1:0] iqentry_pt; // predict taken +reg [QENTRIES-1:0] iqentry_bt; // update branch target buffer +reg [QENTRIES-1:0] iqentry_takb; // take branch record +reg [QENTRIES-1:0] iqentry_jal; +reg [2:0] iqentry_sz [0:QENTRIES-1]; +reg [QENTRIES-1:0] iqentry_alu = 8'h00; // alu type instruction +reg [QENTRIES-1:0] iqentry_alu0; // only valid on alu #0 +reg [QENTRIES-1:0] iqentry_fpu; // floating point instruction +reg [QENTRIES-1:0] iqentry_fc; // flow control instruction +reg [QENTRIES-1:0] iqentry_canex = 8'h00; // true if it's an instruction that can exception +reg [QENTRIES-1:0] iqentry_oddball = 8'h00; // writes to register file +reg [QENTRIES-1:0] iqentry_load; // is a memory load instruction +reg [QENTRIES-1:0] iqentry_loadv; // is a volatile memory load instruction +reg [QENTRIES-1:0] iqentry_store; // is a memory store instruction +reg [QENTRIES-1:0] iqentry_preload; // is a memory preload instruction +reg [QENTRIES-1:0] iqentry_ldcmp; +reg [QENTRIES-1:0] iqentry_mem; // touches memory: 1 if LW/SW +reg [QENTRIES-1:0] iqentry_memndx; // indexed memory operation +reg [2:0] iqentry_memsz [0:QENTRIES-1]; // size of memory op +reg [QENTRIES-1:0] iqentry_rmw; // memory RMW op +reg [QENTRIES-1:0] iqentry_push; +reg [QENTRIES-1:0] iqentry_memdb; +reg [QENTRIES-1:0] iqentry_memsb; +reg [QENTRIES-1:0] iqentry_rtop; +reg [QENTRIES-1:0] iqentry_sei; +reg [QENTRIES-1:0] iqentry_aq; // memory aquire +reg [QENTRIES-1:0] iqentry_rl; // memory release +reg [QENTRIES-1:0] iqentry_shft; +reg [QENTRIES-1:0] iqentry_jmp; // changes control flow: 1 if BEQ/JALR +reg [QENTRIES-1:0] iqentry_br; // Bcc (for predictor) +reg [QENTRIES-1:0] iqentry_ret; +reg [QENTRIES-1:0] iqentry_irq; +reg [QENTRIES-1:0] iqentry_brk; +reg [QENTRIES-1:0] iqentry_rti; +reg [QENTRIES-1:0] iqentry_sync; // sync instruction +reg [QENTRIES-1:0] iqentry_fsync; +reg [QENTRIES-1:0] iqentry_tlb; +reg [QENTRIES-1:0] iqentry_cmp; +reg [QENTRIES-1:0] iqentry_rfw = 1'b0; // writes to register file +reg [QENTRIES-1:0] iqentry_prfw = 1'b0; +reg [7:0] iqentry_we [0:QENTRIES-1]; // enable strobe +reg [63:0] iqentry_res [0:QENTRIES-1]; // instruction result +reg [63:0] iqentry_ares [0:QENTRIES-1]; // alternate instruction result +reg [47:0] iqentry_instr[0:QENTRIES-1]; // instruction opcode +reg [2:0] iqentry_insln[0:QENTRIES-1]; // instruction length +reg [7:0] iqentry_exc [0:QENTRIES-1]; // only for branches ... indicates a HALT instruction +reg [RBIT:0] iqentry_tgt[0:QENTRIES-1]; // Rt field or ZERO -- this is the instruction's target (if any) +reg [7:0] iqentry_vl [0:QENTRIES-1]; +reg [5:0] iqentry_ven [0:QENTRIES-1]; // vector element number +reg [AMSB:0] iqentry_ma [0:QENTRIES-1]; // memory address +reg [63:0] iqentry_a0 [0:QENTRIES-1]; // argument 0 (immediate) +reg [63:0] iqentry_a1 [0:QENTRIES-1]; // argument 1 +reg [QENTRIES-1:0] iqentry_a1_v; // arg1 valid +reg [`QBITSP1] iqentry_a1_s [0:QENTRIES-1]; // arg1 source (iq entry # with top bit representing ALU/DRAM bus) +reg [63:0] iqentry_a2 [0:QENTRIES-1]; // argument 2 +reg iqentry_a2_v [0:QENTRIES-1]; // arg2 valid +reg [`QBITSP1] iqentry_a2_s [0:QENTRIES-1]; // arg2 source (iq entry # with top bit representing ALU/DRAM bus) +reg [63:0] iqentry_a3 [0:QENTRIES-1]; // argument 3 +reg iqentry_a3_v [0:QENTRIES-1]; // arg3 valid +reg [`QBITSP1] iqentry_a3_s [0:QENTRIES-1]; // arg3 source (iq entry # with top bit representing ALU/DRAM bus) +reg [`ABITS] iqentry_pc [0:QENTRIES-1]; // program counter for this instruction +reg [RBIT:0] iqentry_Ra [0:QENTRIES-1]; +reg [RBIT:0] iqentry_Rb [0:QENTRIES-1]; +reg [RBIT:0] iqentry_Rc [0:QENTRIES-1]; + +// debugging +//reg [4:0] iqentry_ra [0:7]; // Ra +initial begin +for (n = 0; n < QENTRIES; n = n + 1) + iqentry_a1_s[n] <= 5'd0; + iqentry_a2_s[n] <= 5'd0; + iqentry_a3_s[n] <= 5'd0; +end + +reg [QENTRIES-1:0] iqentry_source = {QENTRIES{1'b0}}; +reg [QENTRIES-1:0] iqentry_imm; +reg [QENTRIES-1:0] iqentry_memready; +reg [QENTRIES-1:0] iqentry_memopsvalid; + +reg [QENTRIES-1:0] memissue = {QENTRIES{1'b0}}; +reg [1:0] missued; +reg [7:0] last_issue0, last_issue1, last_issue2; +reg [QENTRIES-1:0] iqentry_memissue; +reg [QENTRIES-1:0] iqentry_stomp; +reg [3:0] stompedOnRets; +reg [QENTRIES-1:0] iqentry_alu0_issue; +reg [QENTRIES-1:0] iqentry_alu1_issue; +reg [QENTRIES-1:0] iqentry_alu2_issue; +reg [QENTRIES-1:0] iqentry_id1issue; +reg [QENTRIES-1:0] iqentry_id2issue; +reg [QENTRIES-1:0] iqentry_id3issue; +reg [1:0] iqentry_mem_islot [0:QENTRIES-1]; +reg [QENTRIES-1:0] iqentry_fcu_issue; +reg [QENTRIES-1:0] iqentry_fpu1_issue; +reg [QENTRIES-1:0] iqentry_fpu2_issue; + +reg [PREGS-1:1] livetarget; +reg [PREGS-1:1] iqentry_livetarget [0:QENTRIES-1]; +reg [PREGS-1:1] iqentry_latestID [0:QENTRIES-1]; +reg [PREGS-1:1] iqentry_cumulative [0:QENTRIES-1]; +`ifdef SUPPORT_PREDICATION +reg [QENTRIES-1:0] iqentry_psource = {QENTRIES{1'b0}}; +reg [15:0] plivetarget; +reg [15:0] iqentry_plivetarget [0:QENTRIES-1]; +reg [15:0] iqentry_platestID [0:QENTRIES-1]; +reg [15:0] iqentry_pcumulative [0:QENTRIES-1]; +`endif +wire [PREGS-1:1] iq_out [0:QENTRIES-1]; + +reg [`QBITS] tail0; +reg [`QBITS] tail1; +reg [`QBITS] tail2; +reg [`QBITS] heads[0:QENTRIES-1]; + +// To detect a head change at time of commit. Some values need to pulsed +// with a single pulse. +reg [`QBITS] ohead[0:2]; +reg ocommit0_v, ocommit1_v, ocommit2_v; +reg [11:0] cmt_timer; + +wire take_branch0; +wire take_branch1; + +reg [3:0] nop_fetchbuf; +wire fetchbuf; // determines which pair to read from & write to +wire [3:0] fb_panic; + +wire [47:0] fetchbuf0_instr; +wire [2:0] fetchbuf0_insln; +wire [`ABITS] fetchbuf0_pc; +(* mark_debug = "true" *) +wire fetchbuf0_v; +wire fetchbuf0_thrd; +wire fetchbuf0_mem; +wire fetchbuf0_rfw; +wire [47:0] fetchbuf1_instr; +wire [2:0] fetchbuf1_insln; +wire [`ABITS] fetchbuf1_pc; +wire fetchbuf1_v; +wire fetchbuf1_thrd; +wire fetchbuf1_mem; +wire fetchbuf1_rfw; +wire [47:0] fetchbuf2_instr; +wire [2:0] fetchbuf2_insln; +wire [`ABITS] fetchbuf2_pc; +wire fetchbuf2_v; +wire fetchbuf2_thrd; +wire fetchbuf2_mem; +wire fetchbuf2_rfw; +`ifdef SUPPORT_PREDICATION +wire fetchbuf0_prfw; +wire [7:0] fetchbuf0_pbyte; +wire fetchbuf1_prfw; +wire [7:0] fetchbuf1_pbyte; +wire fetchbuf2_prfw; +wire [7:0] fetchbuf2_pbyte; +`endif +wire [47:0] fetchbufA_instr; +wire [`ABITS] fetchbufA_pc; +wire fetchbufA_v; +wire [47:0] fetchbufB_instr; +wire [`ABITS] fetchbufB_pc; +wire fetchbufB_v; +wire [47:0] fetchbufC_instr; +wire [`ABITS] fetchbufC_pc; +wire fetchbufC_v; +wire [47:0] fetchbufD_instr; +wire [`ABITS] fetchbufD_pc; +wire fetchbufD_v; +wire [47:0] fetchbufE_instr; +wire [`ABITS] fetchbufE_pc; +wire fetchbufE_v; +wire [47:0] fetchbufF_instr; +wire [`ABITS] fetchbufF_pc; +wire fetchbufF_v; + +//reg did_branchback0; +//reg did_branchback1; + +reg id1_v; +reg [`QBITSP1] id1_id; +reg [47:0] id1_instr; +reg [5:0] id1_ven; +reg [7:0] id1_vl; +reg id1_thrd; +reg id1_pt; +reg [4:0] id1_Rt; +wire [143:0] id1_bus; + +reg id2_v; +reg [`QBITSP1] id2_id; +reg [47:0] id2_instr; +reg [5:0] id2_ven; +reg [7:0] id2_vl; +reg id2_thrd; +reg id2_pt; +reg [4:0] id2_Rt; +wire [143:0] id2_bus; + +reg id3_v; +reg [`QBITSP1] id3_id; +reg [47:0] id3_instr; +reg [5:0] id3_ven; +reg [7:0] id3_vl; +reg id3_thrd; +reg id3_pt; +reg [4:0] id3_Rt; +wire [143:0] id3_bus; + +reg [63:0] alu0_xs = 64'd0; +reg [63:0] alu1_xs = 64'd0; + +reg [3:0] alu0_pred; +reg alu0_cmt; +wire alu0_abort; +reg alu0_ld; +reg alu0_dataready; +wire alu0_done; +wire alu0_idle; +reg [`QBITSP1] alu0_sourceid; +reg [47:0] alu0_instr; +reg alu0_tlb; +reg alu0_mem; +reg alu0_load; +reg alu0_store; +reg alu0_push; +reg alu0_shft; +reg [RBIT:0] alu0_Ra; +reg [63:0] alu0_argA; +reg [63:0] alu0_argB; +reg [63:0] alu0_argC; +reg [63:0] alu0_argT; +reg [63:0] alu0_argI; // only used by BEQ +reg [2:0] alu0_sz; +reg [RBIT:0] alu0_tgt; +reg [5:0] alu0_ven; +reg alu0_thrd; +reg [`ABITS] alu0_pc; +reg [63:0] alu0_bus; +wire [63:0] alu0b_bus; +wire [63:0] alu0_out; +wire [`QBITSP1] alu0_id; +wire [`XBITS] alu0_exc; +wire alu0_v; +wire alu0_branchmiss; +wire [`ABITS] alu0_misspc; + +reg [3:0] alu1_pred; +reg alu1_cmt; +wire alu1_abort; +reg alu1_ld; +reg alu1_dataready; +wire alu1_done; +wire alu1_idle; +reg [`QBITSP1] alu1_sourceid; +reg [47:0] alu1_instr; +reg alu1_mem; +reg alu1_load; +reg alu1_store; +reg alu1_push; +reg alu1_shft; +reg [RBIT:0] alu1_Ra; +reg [63:0] alu1_argA; +reg [63:0] alu1_argB; +reg [63:0] alu1_argC; +reg [63:0] alu1_argT; +reg [63:0] alu1_argI; // only used by BEQ +reg [2:0] alu1_sz; +reg [RBIT:0] alu1_tgt; +reg [5:0] alu1_ven; +reg [`ABITS] alu1_pc; +reg alu1_thrd; +reg [63:0] alu1_bus; +wire [63:0] alu1b_bus; +wire [63:0] alu1_out; +wire [`QBITSP1] alu1_id; +wire [`XBITS] alu1_exc; +wire alu1_v; +wire alu1_branchmiss; +wire [`ABITS] alu1_misspc; + +wire [`XBITS] fpu_exc; +reg [3:0] fpu1_pred; +reg fpu1_cmt; +reg fpu1_ld; +reg fpu1_dataready = 1'b1; +wire fpu1_done = 1'b1; +wire fpu1_idle; +reg [`QBITSP1] fpu1_sourceid; +reg [47:0] fpu1_instr; +reg [63:0] fpu1_argA; +reg [63:0] fpu1_argB; +reg [63:0] fpu1_argC; +reg [63:0] fpu1_argT; +reg [63:0] fpu1_argI; // only used by BEQ +reg [RBIT:0] fpu1_tgt; +reg [`ABITS] fpu1_pc; +wire [63:0] fpu1_out = 64'h0; +reg [63:0] fpu1_bus = 64'h0; +wire [`QBITSP1] fpu1_id; +wire [`XBITS] fpu1_exc = 9'h000; +wire fpu1_v; +wire [31:0] fpu1_status; + +reg [3:0] fpu2_pred; +reg fpu2_cmt; +reg fpu2_ld; +reg fpu2_dataready = 1'b1; +wire fpu2_done = 1'b1; +wire fpu2_idle; +reg [`QBITSP1] fpu2_sourceid; +reg [47:0] fpu2_instr; +reg [63:0] fpu2_argA; +reg [63:0] fpu2_argB; +reg [63:0] fpu2_argC; +reg [63:0] fpu2_argT; +reg [63:0] fpu2_argI; // only used by BEQ +reg [RBIT:0] fpu2_tgt; +reg [`ABITS] fpu2_pc; +wire [63:0] fpu2_out = 64'h0; +reg [63:0] fpu2_bus = 64'h0; +wire [`QBITSP1] fpu2_id; +wire [`XBITS] fpu2_exc = 9'h000; +wire fpu2_v; +wire [31:0] fpu2_status; + +reg [7:0] fccnt; +reg [47:0] waitctr; +reg [3:0] fcu_pred; +reg fcu_cmt; +reg fcu_ld; +reg fcu_dataready; +reg fcu_done; +reg fcu_idle = 1'b1; +reg [`QBITSP1] fcu_sourceid; +reg [47:0] fcu_instr; +reg [47:0] fcu_prevInstr; +reg [2:0] fcu_insln; +reg fcu_pt; // predict taken +reg fcu_branch; +reg fcu_call; +reg fcu_ret; +reg fcu_jal; +reg fcu_brk; +reg fcu_rti; +reg [63:0] fcu_argA; +reg [63:0] fcu_argB; +reg [63:0] fcu_argC; +reg [63:0] fcu_argI; // only used by BEQ +reg [63:0] fcu_argT; +reg [63:0] fcu_argT2; +reg [`ABITS] fcu_pc; +reg [`ABITS] fcu_nextpc; +reg [`ABITS] fcu_brdisp; +wire [63:0] fcu_out; +reg [63:0] fcu_bus; +wire [`QBITSP1] fcu_id; +reg [`XBITS] fcu_exc; +wire fcu_v; +reg fcu_thrd; +reg fcu_branchmiss; +reg fcu_clearbm; +reg [`ABITS] fcu_misspc; + +reg [63:0] rmw_argA; +reg [63:0] rmw_argB; +reg [63:0] rmw_argC; +wire [63:0] rmw_res; +reg [47:0] rmw_instr; + +// write buffer +reg [63:0] wb_data [0:`WB_DEPTH-1]; +reg [`ABITS] wb_addr [0:`WB_DEPTH-1]; +reg [1:0] wb_ol [0:`WB_DEPTH-1]; +reg [`WB_DEPTH-1:0] wb_v; +reg [`WB_DEPTH-1:0] wb_rmw; +reg [QENTRIES-1:0] wb_id [0:`WB_DEPTH-1]; +reg [QENTRIES-1:0] wbo_id; +reg [7:0] wb_sel [0:`WB_DEPTH-1]; +reg wb_en; +reg wb_shift; + +reg branchmiss = 1'b0; +reg branchmiss_thrd = 1'b0; +reg [`ABITS] misspc; +reg [`QBITS] missid; + +wire take_branch; +wire take_branchA; +wire take_branchB; +wire take_branchC; +wire take_branchD; + +wire dram_avail; +reg [2:0] dram0; // state of the DRAM request (latency = 4; can have three in pipeline) +reg [2:0] dram1; // state of the DRAM request (latency = 4; can have three in pipeline) +reg [2:0] dram2; // state of the DRAM request (latency = 4; can have three in pipeline) +reg [63:0] dram0_data; +reg [`ABITS] dram0_addr; +reg [47:0] dram0_instr; +reg dram0_rmw; +reg dram0_preload; +reg [RBIT:0] dram0_tgt; +reg [`QBITSP1] dram0_id; +reg [`XBITS] dram0_exc; +reg dram0_unc; +reg [2:0] dram0_memsize; +reg dram0_load; // is a load operation +reg dram0_store; +reg [1:0] dram0_ol; +reg [63:0] dram1_data; +reg [`ABITS] dram1_addr; +reg [47:0] dram1_instr; +reg dram1_rmw; +reg dram1_preload; +reg [RBIT:0] dram1_tgt; +reg [`QBITSP1] dram1_id; +reg [`XBITS] dram1_exc; +reg dram1_unc; +reg [2:0] dram1_memsize; +reg dram1_load; +reg dram1_store; +reg [1:0] dram1_ol; +reg [63:0] dram2_data; +reg [`ABITS] dram2_addr; +reg [47:0] dram2_instr; +reg dram2_rmw; +reg dram2_preload; +reg [RBIT:0] dram2_tgt; +reg [`QBITSP1] dram2_id; +reg [`XBITS] dram2_exc; +reg dram2_unc; +reg [2:0] dram2_memsize; +reg dram2_load; +reg dram2_store; +reg [1:0] dram2_ol; + +reg dramA_v; +reg [`QBITSP1] dramA_id; +reg [63:0] dramA_bus; +reg [`XBITS] dramA_exc; +reg dramB_v; +reg [`QBITSP1] dramB_id; +reg [63:0] dramB_bus; +reg [`XBITS] dramB_exc; +reg dramC_v; +reg [`QBITSP1] dramC_id; +reg [63:0] dramC_bus; +reg [`XBITS] dramC_exc; + +wire outstanding_stores; +reg [63:0] I; // instruction count +reg [63:0] CC; // commit count + +reg commit0_v; +reg [`QBITSP1] commit0_id; +reg [RBIT:0] commit0_tgt; +reg [7:0] commit0_we = 8'h00; +reg [63:0] commit0_bus; +reg commit1_v; +reg [`QBITSP1] commit1_id; +reg [RBIT:0] commit1_tgt; +reg [7:0] commit1_we = 8'h00; +reg [63:0] commit1_bus; +reg commit2_v; +reg [`QBITSP1] commit2_id; +reg [RBIT:0] commit2_tgt; +reg [7:0] commit2_we = 8'h00; +reg [63:0] commit2_bus; + +reg StoreAck1; +reg [4:0] bstate; +parameter BIDLE = 5'd0; +parameter B_StoreAck = 5'd1; +parameter B_DCacheLoadStart = 5'd2; +parameter B_DCacheLoadStb = 5'd3; +parameter B_DCacheLoadWait1 = 5'd4; +parameter B_DCacheLoadWait2 = 5'd5; +parameter B_DCacheLoadResetBusy = 5'd6; +parameter B_ICacheAck = 5'd7; +parameter B8 = 5'd8; +parameter B_ICacheNack = 5'd9; +parameter B_ICacheNack2 = 5'd10; +parameter B11 = 5'd11; +parameter B12 = 5'd12; +parameter B_DLoadAck = 5'd13; +parameter B14 = 5'd14; +parameter B15 = 5'd15; +parameter B16 = 5'd16; +parameter B17 = 5'd17; +parameter B18 = 5'd18; +parameter B19 = 5'd19; +parameter B2a = 5'd20; +parameter B2b = 5'd21; +parameter B2c = 5'd22; +parameter B_DCacheLoadAck = 5'd23; +parameter B20 = 5'd24; +parameter B21 = 5'd25; +parameter B_DCacheLoadWait3 = 5'd26; +reg [1:0] bwhich; +reg [3:0] icstate,picstate; +parameter IDLE = 4'd0; +parameter IC1 = 4'd1; +parameter IC2 = 4'd2; +parameter IC3 = 4'd3; +parameter IC_WaitL2 = 4'd4; +parameter IC5 = 4'd5; +parameter IC6 = 4'd6; +parameter IC7 = 4'd7; +parameter IC_Next = 4'd8; +parameter IC9 = 4'd9; +parameter IC10 = 4'd10; +parameter IC3a = 4'd11; +reg invic, invdc; +reg [1:0] icwhich; +reg icnxt,L2_nxt; +wire ihit0,ihit1,ihit2,ihitL2; +wire ihit = ihit0&ihit1&ihit2; +reg phit; +wire threadx; +always @* + phit <= ihit&&icstate==IDLE; +reg [2:0] iccnt; +reg L1_wr0,L1_wr1,L1_wr2; +reg L1_invline; +wire [1:0] ic0_fault,ic1_fault,ic2_fault; +reg [8:0] L1_en; +reg [71:0] L1_adr, L2_adr; +reg [297:0] L2_rdat; +wire [297:0] L2_dato; +reg L2_xsel; + +generate begin : gRegfileInst +if (`WAYS > 2) begin : gb1 +FT64_regfile2w9r_oc #(.RBIT(RBIT)) urf1 +( + .clk(clk), + .clk4x(clk4x), + .wr0(commit0_v), + .wr1(commit1_v), + .we0(commit0_we), + .we1(commit1_we), + .wa0(commit0_tgt), + .wa1(commit1_tgt), + .i0(commit0_bus), + .i1(commit1_bus), + .rclk(~clk), + .ra0(Ra0), + .ra1(Rb0), + .ra2(Rc0), + .o0(rfoa0), + .o1(rfob0), + .o2(rfoc0a), + .ra3(Ra1), + .ra4(Rb1), + .ra5(Rc1), + .o3(rfoa1), + .o4(rfob1), + .o5(rfoc1a), + .ra6(Ra2), + .ra7(Rb2), + .ra8(Rc2), + .o6(rfoa2), + .o7(rfob2), + .o8(rfoc2a) +); +assign rfoc0 = Rc0[11:6]==6'h3F ? vm[Rc0[2:0]] : rfoc0a; +assign rfoc1 = Rc1[11:6]==6'h3F ? vm[Rc1[2:0]] : rfoc1a; +assign rfoc2 = Rc2[11:6]==6'h3F ? vm[Rc2[2:0]] : rfoc2a; +end +else if (`WAYS > 1) begin : gb1 +FT64_regfile2w6r_oc #(.RBIT(RBIT)) urf1 +( + .clk(clk), + .clk4x(clk4x), + .wr0(commit0_v), + .wr1(commit1_v), + .we0(commit0_we), + .we1(commit1_we), + .wa0(commit0_tgt), + .wa1(commit1_tgt), + .i0(commit0_bus), + .i1(commit1_bus), + .rclk(~clk), + .ra0(Ra0), + .ra1(Rb0), + .ra2(Rc0), + .o0(rfoa0), + .o1(rfob0), + .o2(rfoc0a), + .ra3(Ra1), + .ra4(Rb1), + .ra5(Rc1), + .o3(rfoa1), + .o4(rfob1), + .o5(rfoc1a) +); +assign rfoc0 = Rc0[11:6]==6'h3F ? vm[Rc0[2:0]] : rfoc0a; +assign rfoc1 = Rc1[11:6]==6'h3F ? vm[Rc1[2:0]] : rfoc1a; +end +else begin : gb1 +FT64_regfile1w4r_oc #(.RBIT(RBIT)) urf1 +( + .clk(clk), + .wr0(commit0_v), + .wa0(commit0_tgt), + .we0(8'hFF), + .i0(commit0_bus), + .rclk(~clk), + .ra0(Ra0), + .ra1(Rb0), + .ra2(Rc0), + .ra3(Rt0), + .o0(rfoa0), + .o1(rfob0), + .o2(rfoc0a), + .o3(rfot0) +); +end +assign rfoc0 = Rc0[11:6]==6'h3F ? vm[Rc0[2:0]] : rfoc0a; +end +endgenerate + +function [3:0] fnInsLength; +input [47:0] ins; +`ifdef SUPPORT_DCI +if (ins[`INSTRUCTION_OP]==`CMPRSSD) + fnInsLength = 4'd2 | pred_on; +else +`endif + case(ins[7:6]) + 2'd0: fnInsLength = 4'd4 | pred_on; + 2'd1: fnInsLength = 4'd6 | pred_on; + default: fnInsLength = 4'd2 | pred_on; + endcase +endfunction + +wire [`ABITS] pc0plus6 = pc0 + 32'd7; +wire [`ABITS] pc0plus12 = pc0 + 32'd14; + +generate begin : gInsnVar + if (`WAYS > 1) begin + always @* + if (thread_en) + insn1a <= insn1b; + else + insn1a <= {insn1b,insn0a} >> {fnInsLength(insn0a),3'b0}; + end + if (`WAYS > 2) begin + always @* + if (thread_en) + insn2a <= insn2b; + else + insn2a <= {insn2b,insn1b,insn0a} >> {fnInsLength(insn0a) + fnInsLength(insn1a),3'b0}; + end +end +endgenerate + +FT64_L1_icache #(.pSize(`L1_ICACHE_SIZE)) uic0 +( + .rst(rst), + .clk(clk), + .nxt(icnxt), + .wr(L1_wr0), + .wr_ack(), + .en(L1_en), + .adr((icstate==IDLE||icstate==IC_Next) ? {pcr[7:0],pc0} : L1_adr), + .wadr(L1_adr), + .i(L2_rdat), + .o(insn0a), + .fault(ic0_fault), + .hit(ihit0), + .invall(invic), + .invline(L1_invline) +); +generate begin : gICacheInst +if (`WAYS > 1) begin +FT64_L1_icache #(.pSize(`L1_ICACHE_SIZE)) uic1 +( + .rst(rst), + .clk(clk), + .nxt(icnxt), + .wr(L1_wr1), + .wr_ack(), + .en(L1_en), + .adr((icstate==IDLE||icstate==IC_Next) ? (thread_en ? {pcr[7:0],pc1}: {pcr[7:0],pc0plus6} ): L1_adr), + .wadr(L1_adr), + .i(L2_rdat), + .o(insn1b), + .fault(ic1_fault), + .hit(ihit1), + .invall(invic), + .invline(L1_invline) +); +end +else begin +assign ihit1 = 1'b1; +end +if (`WAYS > 2) begin +FT64_L1_icache #(.pSize(`L1_ICACHE_SIZE)) uic2 +( + .rst(rst), + .clk(clk), + .nxt(icnxt), + .wr(L1_wr2), + .wr_ack(), + .en(L1_en), + .adr((icstate==IDLE||icstate==IC_Next) ? (thread_en ? {pcr[7:0],pc2} : {pcr[7:0],pc0plus12}) : L1_adr), + .wadr(L1_adr), + .i(L2_rdat), + .o(insn2b), + .fault(ic2_fault), + .hit(ihit2), + .invall(invic), + .invline(L1_invline) +); +end +else +assign ihit2 = 1'b1; +end +endgenerate + +FT64_L2_icache uic2 +( + .rst(rst), + .clk(clk), + .nxt(L2_nxt), + .wr(bstate==B_ICacheAck && (ack_i|err_i)), + .xsel(L2_xsel), + .adr(L2_adr), + .cnt(iccnt), + .exv_i(exvq), + .i(dat_i), + .err_i(errq), + .o(L2_dato), + .hit(ihitL2), + .invall(invic), + .invline() +); + +wire predict_taken; +wire predict_taken0; +wire predict_taken1; +wire predict_taken2; +wire predict_takenA; +wire predict_takenB; +wire predict_takenC; +wire predict_takenD; +wire predict_takenE; +wire predict_takenF; +wire predict_takenA1; +wire predict_takenB1; +wire predict_takenC1; +wire predict_takenD1; + +wire [`ABITS] btgtA, btgtB, btgtC, btgtD, btgtE, btgtF; +wire btbwr0 = iqentry_v[heads[0]] && iqentry_state[heads[0]]==IQS_CMT && + (iqentry_fc[heads[0]]); +generate begin: gbtbvar +if (`WAYS > 1) begin +wire btbwr1 = iqentry_v[heads[1]] && iqentry_state[heads[1]]==IQS_CMT && + (iqentry_fc[heads[1]]); +end +if (`WAYS > 2) begin +wire btbwr2 = iqentry_v[heads[2]] && iqentry_state[heads[2]]==IQS_CMT && + (iqentry_fc[heads[2]]); +end +end +endgenerate + +wire fcu_clk; +`ifdef FCU_ENH +//BUFGCE ufcuclk +//( +// .I(clk_i), +// .CE(fcu_available), +// .O(fcu_clk) +//); +`endif +assign fcu_clk = clk_i; + +generate begin: gBTBInst +if (`WAYS > 2) begin +`ifdef FCU_ENH +FT64_BTB #(.AMSB(AMSB)) ubtb1 +( + .rst(rst), + .wclk(fcu_clk), + .wr0(btbwr0), + .wadr0(iqentry_pc[heads[0]]), + .wdat0(iqentry_ma[heads[0]]), + .valid0((iqentry_br[heads[0]] ? iqentry_takb[heads[0]] : iqentry_bt[heads[0]]) & iqentry_v[heads[0]]), + .wr1(btbwr1), + .wadr1(iqentry_pc[heads[1]]), + .wdat1(iqentry_ma[heads[1]]), + .valid1((iqentry_br[heads[1]] ? iqentry_takb[heads[1]] : iqentry_bt[heads[1]]) & iqentry_v[heads[1]]), + .wr2(btbwr2), + .wadr2(iqentry_pc[heads[2]]), + .wdat2(iqentry_ma[heads[2]]), + .valid2((iqentry_br[heads[2]] ? iqentry_takb[heads[2]] : iqentry_bt[heads[2]]) & iqentry_v[heads[2]]), + .rclk(~clk), + .pcA(fetchbufA_pc), + .btgtA(btgtA), + .pcB(fetchbufB_pc), + .btgtB(btgtB), + .pcC(fetchbufC_pc), + .btgtC(btgtC), + .pcD(fetchbufD_pc), + .btgtD(btgtD), + .pcE(fetchbufE_pc), + .btgtE(btgtE), + .pcF(fetchbufF_pc), + .btgtF(btgtF), + .npcA(BRKPC), + .npcB(BRKPC), + .npcC(BRKPC), + .npcD(BRKPC), + .npcE(BRKPC), + .npcF(BRKPC) +); +`else +// Branch tergets are picked up by fetchbuf logic and need to be present. +// Without a target predictor they are just set to the reset address. +// This virtually guarentees a miss. +assign btgtA = RSTPC; +assign btgtB = RSTPC; +assign btgtC = RSTPC; +assign btgtD = RSTPC; +assign btgtE = RSTPC; +assign btgtF = RSTPC; +`endif +end +else if (`WAYS > 1) begin +`ifdef FCU_ENH +FT64_BTB #(.AMSB(AMSB)) ubtb1 +( + .rst(rst), + .wclk(fcu_clk), + .wr0(btbwr0), + .wadr0(iqentry_pc[heads[0]]), + .wdat0(iqentry_ma[heads[0]]), + .valid0((iqentry_br[heads[0]] ? iqentry_takb[heads[0]] : iqentry_bt[heads[0]]) & iqentry_v[heads[0]]), + .wr1(btbwr1), + .wadr1(iqentry_pc[heads[1]]), + .wdat1(iqentry_ma[heads[1]]), + .valid1((iqentry_br[heads[1]] ? iqentry_takb[heads[1]] : iqentry_bt[heads[1]]) & iqentry_v[heads[1]]), + .rclk(~clk), + .pcA(fetchbufA_pc), + .btgtA(btgtA), + .pcB(fetchbufB_pc), + .btgtB(btgtB), + .pcC(fetchbufC_pc), + .btgtC(btgtC), + .pcD(fetchbufD_pc), + .btgtD(btgtD), + .pcE(32'd0), + .btgtE(), + .pcF(32'd0), + .btgtF(), + .npcA(BRKPC), + .npcB(BRKPC), + .npcC(BRKPC), + .npcD(BRKPC), + .npcE(BRKPC), + .npcF(BRKPC) +); +`else +// Branch tergets are picked up by fetchbuf logic and need to be present. +// Without a target predictor they are just set to the reset address. +// This virtually guarentees a miss. +assign btgtA = RSTPC; +assign btgtB = RSTPC; +assign btgtC = RSTPC; +assign btgtD = RSTPC; +`endif +end +else begin +`ifdef FCU_ENH +FT64_BTB #(.AMSB(AMSB)) ubtb1 +( + .rst(rst), + .wclk(fcu_clk), + .wr0(btbwr0), + .wadr0(iqentry_pc[heads[0]]), + .wdat0(iqentry_ma[heads[0]]), + .valid0((iqentry_br[heads[0]] ? iqentry_takb[heads[0]] : iqentry_bt[heads[0]]) & iqentry_v[heads[0]]), + .wr1(1'b0); + .wadr1(RSTPC), + .wdat1(RSTPC), + .valid1(1'b0), + .wr2(1'b0); + .wadr2(RSTPC), + .wdat2(RSTPC), + .valid2(1'b0), + .rclk(~clk), + .pcA(fetchbufA_pc), + .btgtA(btgtA), + .pcB(fetchbufB_pc), + .btgtB(btgtB), + .pcC(32'd0), + .btgtC(), + .pcD(32'd0), + .btgtD(), + .pcE(32'd0), + .btgtE(), + .pcF(32'd0), + .btgtF(), + .hitA(), + .hitB(), + .hitC(), + .hitD(), + .hitE(), + .hitF(), + .npcA(BRKPC), + .npcB(BRKPC), + .npcC(BRKPC), + .npcD(BRKPC), + .npcE(BRKPC), + .npcF(BRKPC) +); +`else +// Branch tergets are picked up by fetchbuf logic and need to be present. +// Without a target predictor they are just set to the reset address. +// This virtually guarentees a miss. +assign btgtA = RSTPC; +assign btgtB = RSTPC; +`endif +end +end +endgenerate + +generate begin: gBPInst +if (`WAYS > 2) begin +`ifdef FCU_ENH +FT64_BranchPredictor ubp1 +( + .rst(rst), + .clk(fcu_clk), + .en(bpe), + .xisBranch0(iqentry_br[heads[0]] & commit0_v), + .xisBranch1(iqentry_br[heads[1]] & commit1_v), + .xisBranch2(iqentry_br[heads[2]] & commit2_v), + .pcA(fetchbufA_pc), + .pcB(fetchbufB_pc), + .pcC(fetchbufC_pc), + .pcD(fetchbufD_pc), + .pcE(fetchbufE_pc), + .pcF(fetchbufF_pc), + .xpc0(iqentry_pc[heads[0]]), + .xpc1(iqentry_pc[heads[1]]), + .xpc2(iqentry_pc[heads[2]]), + .takb0(commit0_v & iqentry_takb[heads[0]]), + .takb1(commit1_v & iqentry_takb[heads[1]]), + .takb2(commit2_v & iqentry_takb[heads[2]]), + .predict_takenA(predict_takenA), + .predict_takenB(predict_takenB), + .predict_takenC(predict_takenC), + .predict_takenD(predict_takenD), + .predict_takenE(predict_takenE), + .predict_takenF(predict_takenF) +); +`else +// Predict based on sign of displacement +assign predict_takenA = fetchbufA_instr[6] ? fetchbufA_instr[47] : fetchbufA_instr[31]; +assign predict_takenB = fetchbufB_instr[6] ? fetchbufB_instr[47] : fetchbufB_instr[31]; +assign predict_takenC = fetchbufC_instr[6] ? fetchbufC_instr[47] : fetchbufC_instr[31]; +assign predict_takenD = fetchbufD_instr[6] ? fetchbufD_instr[47] : fetchbufD_instr[31]; +assign predict_takenE = fetchbufE_instr[6] ? fetchbufE_instr[47] : fetchbufE_instr[31]; +assign predict_takenF = fetchbufF_instr[6] ? fetchbufF_instr[47] : fetchbufF_instr[31]; +`endif +end +else if (`WAYS > 1) begin +`ifdef FCU_ENH +FT64_BranchPredictor ubp1 +( + .rst(rst), + .clk(fcu_clk), + .en(bpe), + .xisBranch0(iqentry_br[heads[0]] & commit0_v), + .xisBranch1(iqentry_br[heads[1]] & commit1_v), + .xisBranch2(iqentry_br[heads[2]] & commit2_v), + .pcA(fetchbufA_pc), + .pcB(fetchbufB_pc), + .pcC(fetchbufC_pc), + .pcD(fetchbufD_pc), + .pcE(32'd0), + .pcF(32'd0), + .xpc0(iqentry_pc[heads[0]]), + .xpc1(iqentry_pc[heads[1]]), + .xpc2(iqentry_pc[heads[2]]), + .takb0(commit0_v & iqentry_takb[heads[0]]), + .takb1(commit1_v & iqentry_takb[heads[1]]), + .takb2(commit2_v & iqentry_takb[heads[2]]), + .predict_takenA(predict_takenA), + .predict_takenB(predict_takenB), + .predict_takenC(predict_takenC), + .predict_takenD(predict_takenD), + .predict_takenE(), + .predict_takenF() +); +`else +// Predict based on sign of displacement +assign predict_takenA = fetchbufA_instr[6] ? fetchbufA_instr[47] : fetchbufA_instr[31]; +assign predict_takenB = fetchbufB_instr[6] ? fetchbufB_instr[47] : fetchbufB_instr[31]; +assign predict_takenC = fetchbufC_instr[6] ? fetchbufC_instr[47] : fetchbufC_instr[31]; +assign predict_takenD = fetchbufD_instr[6] ? fetchbufD_instr[47] : fetchbufD_instr[31]; +`endif +end +else begin +`ifdef FCU_ENH +FT64_BranchPredictor ubp1 +( + .rst(rst), + .clk(fcu_clk), + .en(bpe), + .xisBranch0(iqentry_br[heads[0]] & commit0_v), + .xisBranch1(iqentry_br[heads[1]] & commit1_v), + .xisBranch2(iqentry_br[heads[2]] & commit2_v), + .pcA(fetchbufA_pc), + .pcB(fetchbufB_pc), + .pcC(32'd0), + .pcD(32'd0), + .pcE(32'd0), + .pcF(32'd0), + .xpc0(iqentry_pc[heads[0]]), + .xpc1(iqentry_pc[heads[1]]), + .xpc2(iqentry_pc[heads[2]]), + .takb0(commit0_v & iqentry_takb[heads[0]]), + .takb1(commit1_v & iqentry_takb[heads[1]]), + .takb2(commit2_v & iqentry_takb[heads[2]]), + .predict_takenA(predict_takenA), + .predict_takenB(predict_takenB), + .predict_takenC(), + .predict_takenD(), + .predict_takenE(), + .predict_takenF() +); +`else +// Predict based on sign of displacement +assign predict_takenA = fetchbufA_instr[6] ? fetchbufA_instr[47] : fetchbufA_instr[31]; +assign predict_takenB = fetchbufB_instr[6] ? fetchbufB_instr[47] : fetchbufB_instr[31]; +`endif +end +end +endgenerate + +//----------------------------------------------------------------------------- +// Debug +//----------------------------------------------------------------------------- +`ifdef SUPPORT_DBG + +wire [DBW-1:0] dbg_stat1x; +reg [DBW-1:0] dbg_stat; +reg [DBW-1:0] dbg_ctrl; +reg [ABW-1:0] dbg_adr0; +reg [ABW-1:0] dbg_adr1; +reg [ABW-1:0] dbg_adr2; +reg [ABW-1:0] dbg_adr3; +reg dbg_imatchA0,dbg_imatchA1,dbg_imatchA2,dbg_imatchA3,dbg_imatchA; +reg dbg_imatchB0,dbg_imatchB1,dbg_imatchB2,dbg_imatchB3,dbg_imatchB; + +wire dbg_lmatch00 = + dbg_ctrl[0] && dbg_ctrl[17:16]==2'b11 && dram0_addr[AMSB:3]==dbg_adr0[AMSB:3] && + ((dbg_ctrl[19:18]==2'b00 && dram0_addr[2:0]==dbg_adr0[2:0]) || + (dbg_ctrl[19:18]==2'b01 && dram0_addr[2:1]==dbg_adr0[2:1]) || + (dbg_ctrl[19:18]==2'b10 && dram0_addr[2]==dbg_adr0[2]) || + dbg_ctrl[19:18]==2'b11) + ; +wire dbg_lmatch01 = + dbg_ctrl[0] && dbg_ctrl[17:16]==2'b11 && dram1_addr[AMSB:3]==dbg_adr0[AMSB:3] && + ((dbg_ctrl[19:18]==2'b00 && dram1_addr[2:0]==dbg_adr0[2:0]) || + (dbg_ctrl[19:18]==2'b01 && dram1_addr[2:1]==dbg_adr0[2:1]) || + (dbg_ctrl[19:18]==2'b10 && dram1_addr[2]==dbg_adr0[2]) || + dbg_ctrl[19:18]==2'b11) + ; +wire dbg_lmatch02 = + dbg_ctrl[0] && dbg_ctrl[17:16]==2'b11 && dram2_addr[AMSB:3]==dbg_adr0[AMSB:3] && + ((dbg_ctrl[19:18]==2'b00 && dram2_addr[2:0]==dbg_adr0[2:0]) || + (dbg_ctrl[19:18]==2'b01 && dram2_addr[2:1]==dbg_adr0[2:1]) || + (dbg_ctrl[19:18]==2'b10 && dram2_addr[2]==dbg_adr0[2]) || + dbg_ctrl[19:18]==2'b11) + ; +wire dbg_lmatch10 = + dbg_ctrl[1] && dbg_ctrl[21:20]==2'b11 && dram0_addr[AMSB:3]==dbg_adr1[AMSB:3] && + ((dbg_ctrl[23:22]==2'b00 && dram0_addr[2:0]==dbg_adr1[2:0]) || + (dbg_ctrl[23:22]==2'b01 && dram0_addr[2:1]==dbg_adr1[2:1]) || + (dbg_ctrl[23:22]==2'b10 && dram0_addr[2]==dbg_adr1[2]) || + dbg_ctrl[23:22]==2'b11) + ; +wire dbg_lmatch11 = + dbg_ctrl[1] && dbg_ctrl[21:20]==2'b11 && dram1_addr[AMSB:3]==dbg_adr1[AMSB:3] && + ((dbg_ctrl[23:22]==2'b00 && dram1_addr[2:0]==dbg_adr1[2:0]) || + (dbg_ctrl[23:22]==2'b01 && dram1_addr[2:1]==dbg_adr1[2:1]) || + (dbg_ctrl[23:22]==2'b10 && dram1_addr[2]==dbg_adr1[2]) || + dbg_ctrl[23:22]==2'b11) + ; +wire dbg_lmatch12 = + dbg_ctrl[1] && dbg_ctrl[21:20]==2'b11 && dram2_addr[AMSB:3]==dbg_adr1[AMSB:3] && + ((dbg_ctrl[23:22]==2'b00 && dram2_addr[2:0]==dbg_adr1[2:0]) || + (dbg_ctrl[23:22]==2'b01 && dram2_addr[2:1]==dbg_adr1[2:1]) || + (dbg_ctrl[23:22]==2'b10 && dram2_addr[2]==dbg_adr1[2]) || + dbg_ctrl[23:22]==2'b11) + ; +wire dbg_lmatch20 = + dbg_ctrl[2] && dbg_ctrl[25:24]==2'b11 && dram0_addr[AMSB:3]==dbg_adr2[AMSB:3] && + ((dbg_ctrl[27:26]==2'b00 && dram0_addr[2:0]==dbg_adr2[2:0]) || + (dbg_ctrl[27:26]==2'b01 && dram0_addr[2:1]==dbg_adr2[2:1]) || + (dbg_ctrl[27:26]==2'b10 && dram0_addr[2]==dbg_adr2[2]) || + dbg_ctrl[27:26]==2'b11) + ; +wire dbg_lmatch21 = + dbg_ctrl[2] && dbg_ctrl[25:24]==2'b11 && dram1_addr[AMSB:3]==dbg_adr2[AMSB:3] && + ((dbg_ctrl[27:26]==2'b00 && dram1_addr[2:0]==dbg_adr2[2:0]) || + (dbg_ctrl[27:26]==2'b01 && dram1_addr[2:1]==dbg_adr2[2:1]) || + (dbg_ctrl[27:26]==2'b10 && dram1_addr[2]==dbg_adr2[2]) || + dbg_ctrl[27:26]==2'b11) + ; +wire dbg_lmatch22 = + dbg_ctrl[2] && dbg_ctrl[25:24]==2'b11 && dram2_addr[AMSB:3]==dbg_adr2[AMSB:3] && + ((dbg_ctrl[27:26]==2'b00 && dram2_addr[2:0]==dbg_adr2[2:0]) || + (dbg_ctrl[27:26]==2'b01 && dram2_addr[2:1]==dbg_adr2[2:1]) || + (dbg_ctrl[27:26]==2'b10 && dram2_addr[2]==dbg_adr2[2]) || + dbg_ctrl[27:26]==2'b11) + ; +wire dbg_lmatch30 = + dbg_ctrl[3] && dbg_ctrl[29:28]==2'b11 && dram0_addr[AMSB:3]==dbg_adr3[AMSB:3] && + ((dbg_ctrl[31:30]==2'b00 && dram0_addr[2:0]==dbg_adr3[2:0]) || + (dbg_ctrl[31:30]==2'b01 && dram0_addr[2:1]==dbg_adr3[2:1]) || + (dbg_ctrl[31:30]==2'b10 && dram0_addr[2]==dbg_adr3[2]) || + dbg_ctrl[31:30]==2'b11) + ; +wire dbg_lmatch31 = + dbg_ctrl[3] && dbg_ctrl[29:28]==2'b11 && dram1_addr[AMSB:3]==dbg_adr3[AMSB:3] && + ((dbg_ctrl[31:30]==2'b00 && dram1_addr[2:0]==dbg_adr3[2:0]) || + (dbg_ctrl[31:30]==2'b01 && dram1_addr[2:1]==dbg_adr3[2:1]) || + (dbg_ctrl[31:30]==2'b10 && dram1_addr[2]==dbg_adr3[2]) || + dbg_ctrl[31:30]==2'b11) + ; +wire dbg_lmatch32 = + dbg_ctrl[3] && dbg_ctrl[29:28]==2'b11 && dram2_addr[AMSB:3]==dbg_adr3[AMSB:3] && + ((dbg_ctrl[31:30]==2'b00 && dram2_addr[2:0]==dbg_adr3[2:0]) || + (dbg_ctrl[31:30]==2'b01 && dram2_addr[2:1]==dbg_adr3[2:1]) || + (dbg_ctrl[31:30]==2'b10 && dram2_addr[2]==dbg_adr3[2]) || + dbg_ctrl[31:30]==2'b11) + ; +wire dbg_lmatch0 = dbg_lmatch00|dbg_lmatch10|dbg_lmatch20|dbg_lmatch30; +wire dbg_lmatch1 = dbg_lmatch01|dbg_lmatch11|dbg_lmatch21|dbg_lmatch31; +wire dbg_lmatch2 = dbg_lmatch02|dbg_lmatch12|dbg_lmatch22|dbg_lmatch32; +wire dbg_lmatch = dbg_lmatch00|dbg_lmatch10|dbg_lmatch20|dbg_lmatch30| + dbg_lmatch01|dbg_lmatch11|dbg_lmatch21|dbg_lmatch31| + dbg_lmatch02|dbg_lmatch12|dbg_lmatch22|dbg_lmatch32 + ; + +wire dbg_smatch00 = + dbg_ctrl[0] && dbg_ctrl[17:16]==2'b11 && dram0_addr[AMSB:3]==dbg_adr0[AMSB:3] && + ((dbg_ctrl[19:18]==2'b00 && dram0_addr[2:0]==dbg_adr0[2:0]) || + (dbg_ctrl[19:18]==2'b01 && dram0_addr[2:1]==dbg_adr0[2:1]) || + (dbg_ctrl[19:18]==2'b10 && dram0_addr[2]==dbg_adr0[2]) || + dbg_ctrl[19:18]==2'b11) + ; +wire dbg_smatch01 = + dbg_ctrl[0] && dbg_ctrl[17:16]==2'b11 && dram1_addr[AMSB:3]==dbg_adr0[AMSB:3] && + ((dbg_ctrl[19:18]==2'b00 && dram1_addr[2:0]==dbg_adr0[2:0]) || + (dbg_ctrl[19:18]==2'b01 && dram1_addr[2:1]==dbg_adr0[2:1]) || + (dbg_ctrl[19:18]==2'b10 && dram1_addr[2]==dbg_adr0[2]) || + dbg_ctrl[19:18]==2'b11) + ; +wire dbg_smatch02 = + dbg_ctrl[0] && dbg_ctrl[17:16]==2'b11 && dram2_addr[AMSB:3]==dbg_adr0[AMSB:3] && + ((dbg_ctrl[19:18]==2'b00 && dram2_addr[2:0]==dbg_adr0[2:0]) || + (dbg_ctrl[19:18]==2'b01 && dram2_addr[2:1]==dbg_adr0[2:1]) || + (dbg_ctrl[19:18]==2'b10 && dram2_addr[2]==dbg_adr0[2]) || + dbg_ctrl[19:18]==2'b11) + ; +wire dbg_smatch10 = + dbg_ctrl[1] && dbg_ctrl[21:20]==2'b11 && dram0_addr[AMSB:3]==dbg_adr1[AMSB:3] && + ((dbg_ctrl[23:22]==2'b00 && dram0_addr[2:0]==dbg_adr1[2:0]) || + (dbg_ctrl[23:22]==2'b01 && dram0_addr[2:1]==dbg_adr1[2:1]) || + (dbg_ctrl[23:22]==2'b10 && dram0_addr[2]==dbg_adr1[2]) || + dbg_ctrl[23:22]==2'b11) + ; +wire dbg_smatch11 = + dbg_ctrl[1] && dbg_ctrl[21:20]==2'b11 && dram1_addr[AMSB:3]==dbg_adr1[AMSB:3] && + ((dbg_ctrl[23:22]==2'b00 && dram1_addr[2:0]==dbg_adr1[2:0]) || + (dbg_ctrl[23:22]==2'b01 && dram1_addr[2:1]==dbg_adr1[2:1]) || + (dbg_ctrl[23:22]==2'b10 && dram1_addr[2]==dbg_adr1[2]) || + dbg_ctrl[23:22]==2'b11) + ; +wire dbg_smatch12 = + dbg_ctrl[1] && dbg_ctrl[21:20]==2'b11 && dram2_addr[AMSB:3]==dbg_adr1[AMSB:3] && + ((dbg_ctrl[23:22]==2'b00 && dram2_addr[2:0]==dbg_adr1[2:0]) || + (dbg_ctrl[23:22]==2'b01 && dram2_addr[2:1]==dbg_adr1[2:1]) || + (dbg_ctrl[23:22]==2'b10 && dram2_addr[2]==dbg_adr1[2]) || + dbg_ctrl[23:22]==2'b11) + ; +wire dbg_smatch20 = + dbg_ctrl[2] && dbg_ctrl[25:24]==2'b11 && dram0_addr[AMSB:3]==dbg_adr2[AMSB:3] && + ((dbg_ctrl[27:26]==2'b00 && dram0_addr[2:0]==dbg_adr2[2:0]) || + (dbg_ctrl[27:26]==2'b01 && dram0_addr[2:1]==dbg_adr2[2:1]) || + (dbg_ctrl[27:26]==2'b10 && dram0_addr[2]==dbg_adr2[2]) || + dbg_ctrl[27:26]==2'b11) + ; +wire dbg_smatch21 = + dbg_ctrl[2] && dbg_ctrl[25:24]==2'b11 && dram1_addr[AMSB:3]==dbg_adr2[AMSB:3] && + ((dbg_ctrl[27:26]==2'b00 && dram1_addr[2:0]==dbg_adr2[2:0]) || + (dbg_ctrl[27:26]==2'b01 && dram1_addr[2:1]==dbg_adr2[2:1]) || + (dbg_ctrl[27:26]==2'b10 && dram1_addr[2]==dbg_adr2[2]) || + dbg_ctrl[27:26]==2'b11) + ; +wire dbg_smatch22 = + dbg_ctrl[2] && dbg_ctrl[25:24]==2'b11 && dram2_addr[AMSB:3]==dbg_adr2[AMSB:3] && + ((dbg_ctrl[27:26]==2'b00 && dram2_addr[2:0]==dbg_adr2[2:0]) || + (dbg_ctrl[27:26]==2'b01 && dram2_addr[2:1]==dbg_adr2[2:1]) || + (dbg_ctrl[27:26]==2'b10 && dram2_addr[2]==dbg_adr2[2]) || + dbg_ctrl[27:26]==2'b11) + ; +wire dbg_smatch30 = + dbg_ctrl[3] && dbg_ctrl[29:28]==2'b11 && dram0_addr[AMSB:3]==dbg_adr3[AMSB:3] && + ((dbg_ctrl[31:30]==2'b00 && dram0_addr[2:0]==dbg_adr3[2:0]) || + (dbg_ctrl[31:30]==2'b01 && dram0_addr[2:1]==dbg_adr3[2:1]) || + (dbg_ctrl[31:30]==2'b10 && dram0_addr[2]==dbg_adr3[2]) || + dbg_ctrl[31:30]==2'b11) + ; +wire dbg_smatch31 = + dbg_ctrl[3] && dbg_ctrl[29:28]==2'b11 && dram1_addr[AMSB:3]==dbg_adr3[AMSB:3] && + ((dbg_ctrl[31:30]==2'b00 && dram1_addr[2:0]==dbg_adr3[2:0]) || + (dbg_ctrl[31:30]==2'b01 && dram1_addr[2:1]==dbg_adr3[2:1]) || + (dbg_ctrl[31:30]==2'b10 && dram1_addr[2]==dbg_adr3[2]) || + dbg_ctrl[31:30]==2'b11) + ; +wire dbg_smatch32 = + dbg_ctrl[3] && dbg_ctrl[29:28]==2'b11 && dram2_addr[AMSB:3]==dbg_adr3[AMSB:3] && + ((dbg_ctrl[31:30]==2'b00 && dram2_addr[2:0]==dbg_adr3[2:0]) || + (dbg_ctrl[31:30]==2'b01 && dram2_addr[2:1]==dbg_adr3[2:1]) || + (dbg_ctrl[31:30]==2'b10 && dram2_addr[2]==dbg_adr3[2]) || + dbg_ctrl[31:30]==2'b11) + ; +wire dbg_smatch0 = dbg_smatch00|dbg_smatch10|dbg_smatch20|dbg_smatch30; +wire dbg_smatch1 = dbg_smatch01|dbg_smatch11|dbg_smatch21|dbg_smatch31; +wire dbg_smatch2 = dbg_smatch02|dbg_smatch12|dbg_smatch22|dbg_smatch32; + +wire dbg_smatch = dbg_smatch00|dbg_smatch10|dbg_smatch20|dbg_smatch30| + dbg_smatch01|dbg_smatch11|dbg_smatch21|dbg_smatch31| + dbg_smatch02|dbg_smatch12|dbg_smatch22|dbg_smatch32 + ; + +wire dbg_stat0 = dbg_imatchA0 | dbg_imatchB0 | dbg_lmatch00 | dbg_lmatch01 | dbg_lmatch02 | dbg_smatch00 | dbg_smatch01 | dbg_smatch02; +wire dbg_stat1 = dbg_imatchA1 | dbg_imatchB1 | dbg_lmatch10 | dbg_lmatch11 | dbg_lmatch12 | dbg_smatch10 | dbg_smatch11 | dbg_smatch12; +wire dbg_stat2 = dbg_imatchA2 | dbg_imatchB2 | dbg_lmatch20 | dbg_lmatch21 | dbg_lmatch22 | dbg_smatch20 | dbg_smatch21 | dbg_smatch22; +wire dbg_stat3 = dbg_imatchA3 | dbg_imatchB3 | dbg_lmatch30 | dbg_lmatch31 | dbg_lmatch32 | dbg_smatch30 | dbg_smatch31 | dbg_smatch32; +assign dbg_stat1x = {dbg_stat3,dbg_stat2,dbg_stat1,dbg_stat0}; +wire debug_on = |dbg_ctrl[3:0]|dbg_ctrl[7]|dbg_ctrl[63]; + +always @* +begin + if (dbg_ctrl[0] && dbg_ctrl[17:16]==2'b00 && fetchbuf0_pc==dbg_adr0) + dbg_imatchA0 = `TRUE; + if (dbg_ctrl[1] && dbg_ctrl[21:20]==2'b00 && fetchbuf0_pc==dbg_adr1) + dbg_imatchA1 = `TRUE; + if (dbg_ctrl[2] && dbg_ctrl[25:24]==2'b00 && fetchbuf0_pc==dbg_adr2) + dbg_imatchA2 = `TRUE; + if (dbg_ctrl[3] && dbg_ctrl[29:28]==2'b00 && fetchbuf0_pc==dbg_adr3) + dbg_imatchA3 = `TRUE; + if (dbg_imatchA0|dbg_imatchA1|dbg_imatchA2|dbg_imatchA3) + dbg_imatchA = `TRUE; +end + +always @* +begin + if (dbg_ctrl[0] && dbg_ctrl[17:16]==2'b00 && fetchbuf1_pc==dbg_adr0) + dbg_imatchB0 = `TRUE; + if (dbg_ctrl[1] && dbg_ctrl[21:20]==2'b00 && fetchbuf1_pc==dbg_adr1) + dbg_imatchB1 = `TRUE; + if (dbg_ctrl[2] && dbg_ctrl[25:24]==2'b00 && fetchbuf1_pc==dbg_adr2) + dbg_imatchB2 = `TRUE; + if (dbg_ctrl[3] && dbg_ctrl[29:28]==2'b00 && fetchbuf1_pc==dbg_adr3) + dbg_imatchB3 = `TRUE; + if (dbg_imatchB0|dbg_imatchB1|dbg_imatchB2|dbg_imatchB3) + dbg_imatchB = `TRUE; +end +`endif + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +// freezePC squashes the pc increment if there's an irq. +// If there is a segment prefix present then defer the freezing of the pc. +// If a hardware interrupt instruction is encountered in the instruction stream +// flag it as a privilege violation. +wire freezePC = (irq_i > im) && !int_commit; +always @* +if (freezePC) begin + insn0 <= {8'h00,6'd0,5'd0,irq_i,1'b0,vec_i,2'b00,`BRK}; +end +else if (phit) begin +// if (insn0a[`INSTRUCTION_OP]==`BRK && insn0a[25:21]==5'd0 && insn0a[`INSTRUCTION_L2]==2'b00) +// insn0 <= {6'd1,5'd0,4'b0,1'b0,`FLT_PRIV,2'b00,`BRK}; +// else + insn0 <= insn0a; +end +else begin + insn0 <= {8'h00,`NOP_INSN}; +end +generate begin : gInsnMux +if (`WAYS > 1) begin +always @* +if (freezePC && !thread_en) begin + insn1 <= {8'h00,6'd0,5'd0,irq_i,1'b0,vec_i,2'b00,`BRK}; +end +else if (phit) begin +// if (insn1a[`INSTRUCTION_OP]==`BRK && insn1a[25:21]==5'd0 && insn1a[`INSTRUCTION_L2]==2'b00) +// insn1 <= {6'd1,5'd0,4'b0,1'b0,`FLT_PRIV,2'b00,`BRK}; +// else + insn1 <= insn1a; +end +else begin + insn1 <= {8'h00,`NOP_INSN}; +end +end +if (`WAYS > 2) begin +always @* +if (freezePC && !thread_en) + insn2 <= {6'd0,5'd0,irq_i,1'b0,vec_i,2'b00,`BRK}; +else if (phit) begin +// if (insn2a[`INSTRUCTION_OP]==`BRK && insn1a[25:21]==5'd0 && insn2a[`INSTRUCTION_L2]==2'b00) +// insn2 <= {6'd1,5'd0,4'b0,1'b0,`FLT_PRIV,2'b00,`BRK}; +// else + insn2 <= insn2a; +end +else + insn2 <= `NOP_INSN; +end +end +endgenerate + +wire [63:0] dc0_out, dc1_out, dc2_out; +assign rdat0 = dram0_unc ? xdati : dc0_out; +assign rdat1 = dram1_unc ? xdati : dc1_out; +assign rdat2 = dram2_unc ? xdati : dc2_out; + +reg preload; +reg [1:0] dccnt; +wire dhit0, dhit1, dhit2; +wire dhit0a, dhit1a, dhit2a; +wire dhit00, dhit10, dhit20; +wire dhit01, dhit11, dhit21; +reg [`ABITS] dc_wadr; +reg [63:0] dc_wdat; +reg isStore; + +// If the data is in the write buffer, give the buffer a chance to +// write out the data before trying to load from the cache. +reg wb_hit0, wb_hit1, wb_hit2; +always @* +begin + wb_hit0 <= FALSE; + wb_hit1 <= FALSE; + wb_hit2 <= FALSE; + for (n = 0; n < `WB_DEPTH; n = n + 1) begin + if (wb_v[n] && wb_addr[n][AMSB:3]==dram0_addr[AMSB:3]) + wb_hit0 <= TRUE; + if (`NUM_MEM > 1 && wb_v[n] && wb_addr[n][AMSB:3]==dram1_addr[AMSB:3]) + wb_hit1 <= TRUE; + if (`NUM_MEM > 2 && wb_v[n] && wb_addr[n][AMSB:3]==dram2_addr[AMSB:3]) + wb_hit2 <= TRUE; + end +end + +assign dhit0 = dhit0a && !wb_hit0; +assign dhit1 = dhit1a && !wb_hit1; +assign dhit2 = dhit2a && !wb_hit2; +wire whit0, whit1, whit2; + +wire wr_dcache0 = (bstate==B_DCacheLoadAck && ack_i)||(((bstate==B_StoreAck && StoreAck1) || (bstate==B19 && isStore)) && whit0); +wire wr_dcache1 = (bstate==B_DCacheLoadAck && ack_i)||(((bstate==B_StoreAck && StoreAck1) || (bstate==B19 && isStore)) && whit1); +wire wr_dcache2 = (bstate==B_DCacheLoadAck && ack_i)||(((bstate==B_StoreAck && StoreAck1) || (bstate==B19 && isStore)) && whit2); + +FT64_dcache udc0 +( + .rst(rst), + .wclk(clk), + .dce(dce), + .wr(wr_dcache0), + .sel(sel_o), + .wadr({pcr[7:0],vadr}), + .whit(whit0), + .i((bstate==B_DCacheLoadAck) ? dat_i : dat_o), + .rclk(clk), + .rdsize(dram0_memsize), + .radr({pcr[7:0],dram0_addr}), + .o(dc0_out), + .rhit(dhit0a) +); +generate begin : gDCacheInst +if (`NUM_MEM > 1) begin +FT64_dcache udc1 +( + .rst(rst), + .wclk(clk), + .dce(dce), + .wr(wr_dcache1), + .sel(sel_o), + .wadr({pcr[7:0],vadr}), + .whit(whit1), + .i((bstate==B_DCacheLoadAck) ? dat_i : dat_o), + .rclk(clk), + .rdsize(dram1_memsize), + .radr({pcr[7:0],dram1_addr}), + .o(dc1_out), + .rhit(dhit1a) +); +end +if (`NUM_MEM > 2) begin +FT64_dcache udc2 +( + .rst(rst), + .wclk(clk), + .dce(dce), + .wr(wr_dcache2), + .sel(sel_o), + .wadr({pcr[7:0],vadr}), + .whit(whit2), + .i((bstate==B_DCacheLoadAck) ? dat_i : dat_o), + .rclk(clk), + .rdsize(dram2_memsize), + .radr({pcr[7:0],dram2_addr}), + .o(dc2_out), + .rhit(dhit2a) +); +end +end +endgenerate + +`ifdef SUPPORT_SMT +function [RBIT:0] fnRa; +input [47:0] isn; +input [5:0] vqei; +input [5:0] vli; +input thrd; +case(isn[`INSTRUCTION_OP]) +`IVECTOR: + case(isn[`INSTRUCTION_S2]) + `VCIDX,`VSCAN: fnRa = {6'd0,1'b1,isn[`INSTRUCTION_RA]}; + `VMxx: + case(isn[25:23]) + `VMAND,`VMOR,`VMXOR,`VMXNOR,`VMPOP,`VMFIRST,`VMLAST: + fnRa = {6'h3F,1'b1,2'b0,isn[10:8]}; + `VMFILL:fnRa = {6'd0,1'b1,isn[`INSTRUCTION_RA]}; + default:fnRa = {6'h3F,1'b1,2'b0,isn[10:8]}; + endcase + `VSHLV: fnRa = (vqei+1+isn[15:11] >= vli) ? 11'h000 : {vli-vqei-isn[15:11]-1,1'b1,isn[`INSTRUCTION_RA]}; + `VSHRV: fnRa = (vqei+isn[15:11] >= vli) ? 11'h000 : {vqei+isn[15:11],1'b1,isn[`INSTRUCTION_RA]}; + `VSxx,`VSxxU,`VSxxS,`VSxxSU: fnRa = {vqei,1'b1,isn[`INSTRUCTION_RA]}; + default: fnRa = {vqei,1'b1,isn[`INSTRUCTION_RA]}; + endcase +`R2: casez(isn[`INSTRUCTION_S2]) + `MOV: + case(isn[25:23]) + 3'd0: fnRa = {rgs[thrd],1'b0,isn[`INSTRUCTION_RA]}; + 3'd1: fnRa = {isn[26],isn[22:18],1'b0,isn[`INSTRUCTION_RA]}; + 3'd2: fnRa = {rgs[thrd],1'b0,isn[`INSTRUCTION_RA]}; + 3'd3: fnRa = {rs_stack[thrd][5:0],1'b0,isn[`INSTRUCTION_RA]}; + 3'd4: fnRa = {rgs[thrd],1'b0,isn[`INSTRUCTION_RA]}; + 3'd5: fnRa = {fp_rgs[thrd],1'b0,isn[`INSTRUCTION_RA]}; + 3'd6: fnRa = {fp_rgs[thrd],1'b0,isn[`INSTRUCTION_RA]}; + default:fnRa = {rgs[thrd],1'b0,isn[`INSTRUCTION_RA]}; + endcase + `VMOV: + case (isn[`INSTRUCTION_S1]) + 5'h0: fnRa = {rgs[thrd],1'b0,isn[`INSTRUCTION_RA]}; + 5'h1: fnRa = {6'h3F,1'b1,isn[`INSTRUCTION_RA]}; + endcase + default: fnRa = {rgs[thrd],1'b0,isn[`INSTRUCTION_RA]}; + endcase +`FLOAT: fnRa = {fp_rgs[thrd],1'b0,isn[`INSTRUCTION_RA]}; +default: fnRa = {rgs[thrd],1'b0,isn[`INSTRUCTION_RA]}; +endcase +endfunction + +function [RBIT:0] fnRb; +input [47:0] isn; +input fb; +input [5:0] vqei; +input [5:0] rfoa0i; +input [5:0] rfoa1i; +input thrd; +case(isn[`INSTRUCTION_OP]) +`R2: case(isn[`INSTRUCTION_S2]) + `VEX: fnRb = fb ? {rfoa1i,1'b1,isn[`INSTRUCTION_RB]} : {rfoa0i,1'b1,isn[`INSTRUCTION_RB]}; + `LVX,`SVX: fnRb = {vqei,1'b1,isn[`INSTRUCTION_RB]}; + default: fnRb = {rgs[thrd],1'b0,isn[`INSTRUCTION_RB]}; + endcase +`IVECTOR: + case(isn[`INSTRUCTION_S2]) + `VMxx: + case(isn[25:23]) + `VMAND,`VMOR,`VMXOR,`VMXNOR,`VMPOP: + fnRb = {6'h3F,1'b1,2'b0,isn[20:18]}; + default: fnRb = 12'h000; + endcase + `VXCHG: fnRb = {vqei,1'b1,isn[`INSTRUCTION_RB]}; + `VSxx,`VSxxU: fnRb = {vqei,1'b1,isn[`INSTRUCTION_RB]}; + `VSxxS,`VSxxSU: fnRb = {vqei,1'b0,isn[`INSTRUCTION_RB]}; + `VADDS,`VSUBS,`VMULS,`VANDS,`VORS,`VXORS,`VXORS: + fnRb = {rgs[thrd],1'b0,isn[`INSTRUCTION_RB]}; + `VSHL,`VSHR,`VASR: + fnRb = {isn[25],isn[22]}==2'b00 ? {rgs[thrd],1'b0,isn[`INSTRUCTION_RB]} : {vqei,1'b1,isn[`INSTRUCTION_RB]}; + default: fnRb = {vqei,1'b1,isn[`INSTRUCTION_RB]}; + endcase +`FLOAT: fnRb = {fp_rgs[thrd],1'b0,isn[`INSTRUCTION_RB]}; +default: fnRb = {rgs[thrd],1'b0,isn[`INSTRUCTION_RB]}; +endcase +endfunction + +function [RBIT:0] fnRc; +input [47:0] isn; +input [5:0] vqei; +input thrd; +case(isn[`INSTRUCTION_OP]) +`R2: fnRc = {rgs[thrd],1'b0,isn[`INSTRUCTION_RC]}; +`MEMNDX: fnRc = {rgs[thrd],1'b0,isn[`INSTRUCTION_RC]}; // SVX not implemented +`IVECTOR: + case(isn[`INSTRUCTION_S2]) + `VSxx,`VSxxS,`VSxxU,`VSxxSU: fnRc = {6'h3F,1'b1,2'b0,isn[25:23]}; + default: fnRc = {vqei,1'b1,isn[`INSTRUCTION_RC]}; + endcase +`FLOAT: fnRc = {fp_rgs[thrd],1'b0,isn[`INSTRUCTION_RC]}; +default: fnRc = {rgs[thrd],1'b0,isn[`INSTRUCTION_RC]}; +endcase +endfunction + +function [RBIT:0] fnRt; +input [47:0] isn; +input [5:0] vqei; +input [5:0] vli; +input thrd; +casez(isn[`INSTRUCTION_OP]) +`IVECTOR: + case(isn[`INSTRUCTION_S2]) + `VMxx: + case(isn[25:23]) + `VMAND,`VMOR,`VMXOR,`VMXNOR,`VMFILL: + fnRt = {6'h3F,1'b1,2'b0,isn[15:13]}; + `VMPOP: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; + default: + fnRt = {6'h3F,1'b1,2'b0,isn[15:13]}; + endcase + `VSxx,`VSxxU,`VSxxS,`VSxxSU: fnRt = {6'h3F,1'b1,2'b0,isn[15:13]}; + `VSHLV: fnRt = (vqei+1 >= vli) ? 11'h000 : {vli-vqei-1,1'b1,isn[`INSTRUCTION_RT]}; + `VSHRV: fnRt = (vqei >= vli) ? 11'h000 : {vqei,1'b1,isn[`INSTRUCTION_RT]}; + `VEINS: fnRt = {vqei,1'b1,isn[`INSTRUCTION_RT]}; // ToDo: add element # from Ra + `V2BITS: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; + default: fnRt = {vqei,1'b1,isn[`INSTRUCTION_Rt]}; + endcase + +`R2: + if (isn[`INSTRUCTION_L2]==2'b01) + case(isn[47:42]) + `CMOVEZ: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; + `CMOVNZ: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; + default: fnRt = 12'd0; + endcase + else + casez(isn[`INSTRUCTION_S2]) + `MOV: + case(isn[25:23]) + 3'd0: fnRt = {isn[26],isn[22:18],1'b0,isn[`INSTRUCTION_RT]}; + 3'd1: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; + 3'd2: fnRt = {rs_stack[thrd][5:0],1'b0,isn[`INSTRUCTION_RT]}; + 3'd3: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; + 3'd4: fnRt = {fp_rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; + 3'd5: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; + 3'd6: fnRt = {fp_rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; + default:fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; + endcase + `VMOV: + case (isn[`INSTRUCTION_S1]) + 5'h0: fnRt = {6'h3F,1'b1,isn[`INSTRUCTION_RT]}; + 5'h1: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; + default: fnRt = 12'h000; + endcase + `R1: + case(isn[22:18]) + `CNTLO,`CNTLZ,`CNTPOP,`ABS,`NOT,`NEG,`REDOR,`ZXB,`ZXC,`ZXH,`SXB,`SXC,`SXH: + fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; + `MEMDB,`MEMSB,`SYNC: + fnRt = 12'd0; + default: fnRt = 12'd0; + endcase + `CMOVEZ: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; + `CMOVNZ: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; + `MUX: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; + `MIN: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; + `MAX: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; + `LVX: fnRt = {vqei,1'b1,isn[20:16]}; + `SHIFTR: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; + `SHIFT31,`SHIFT63: + fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; + `SEI: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; + `WAIT,`RTI,`CHK: + fnRt = 12'd0; + default: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; + endcase +`MEMNDX: + begin + if (!isn[31]) + case({isn[31:28],isn[22:21]}) + `LVX, + `CACHEX, + `LVBX,`LVBUX,`LVCX,`LVCUX,`LVHX,`LVHUX,`LVWX, + `LBX,`LBUX,`LCX,`LCUX,`LHX,`LHUX,`LWX,`LWRX: + fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; + default: fnRt = 12'd0; + endcase + else + case({isn[31:28],isn[17:16]}) + `PUSH: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; + `SBX,`SCX,`SHX,`SWX,`SWCX,`CACHEX: + fnRt = 12'd0; + default: fnRt = 12'd0; + endcase + end +`FLOAT: + case(isn[31:26]) + `FTX,`FCX,`FEX,`FDX,`FRM: + fnRt = 12'd0; + `FSYNC: fnRt = 12'd0; + default: fnRt = {fp_rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; + endcase +`BRK: fnRt = 12'd0; +`REX: fnRt = 12'd0; +`CHK: fnRt = 12'd0; +`EXEC: fnRt = 12'd0; +`Bcc: fnRt = 12'd0; +`BBc: fnRt = 12'd0; +`NOP: fnRt = 12'd0; +`BEQI: fnRt = 12'd0; +`SB,`Sx,`SWC,`CACHE: + fnRt = 12'd0; +`JMP: fnRt = 12'd0; +`CALL: fnRt = {rgs[thrd],1'b0,5'd29}; // regLR +`LV: fnRt = {vqei,1'b1,isn[`INSTRUCTION_RT]}; +`AMO: fnRt = isn[31] ? {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]} : {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; +`AUIPC,`LUI: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; +default: fnRt = {rgs[thrd],1'b0,isn[`INSTRUCTION_RT]}; +endcase +endfunction +`else +function [RBIT:0] fnRa; +input [47:0] isn; +input [5:0] vqei; +input [5:0] vli; +input thrd; +case(isn[`INSTRUCTION_OP]) +`IVECTOR: + case(isn[`INSTRUCTION_S2]) + `VCIDX,`VSCAN: fnRa = {6'd0,1'b1,isn[`INSTRUCTION_RA]}; + `VMxx: + case(isn[25:23]) + `VMAND,`VMOR,`VMXOR,`VMXNOR,`VMPOP,`VMFIRST,`VMLAST: + fnRa = {6'h3F,1'b1,2'b0,isn[10:8]}; + `VMFILL:fnRa = {6'd0,1'b1,isn[`INSTRUCTION_RA]}; + default:fnRa = {6'h3F,1'b1,2'b0,isn[10:8]}; + endcase + `VSHLV: fnRa = (vqei+1+isn[15:11] >= vli) ? 11'h000 : {vli-vqei-isn[15:11]-1,1'b1,isn[`INSTRUCTION_RA]}; + `VSHRV: fnRa = (vqei+isn[15:11] >= vli) ? 11'h000 : {vqei+isn[15:11],1'b1,isn[`INSTRUCTION_RA]}; + `VSxx,`VSxxU,`VSxxS,`VSxxSU: fnRa = {vqei,1'b1,isn[`INSTRUCTION_RA]}; + default: fnRa = {vqei,1'b1,isn[`INSTRUCTION_RA]}; + endcase +`R2: + casez(isn[`INSTRUCTION_S2]) + `MOV: + case(isn[25:23]) + 3'd0: fnRa = {rgs,1'b0,isn[`INSTRUCTION_RA]}; + 3'd1: fnRa = {isn[26],isn[22:18],1'b0,isn[`INSTRUCTION_RA]}; + 3'd2: fnRa = {rgs,1'b0,isn[`INSTRUCTION_RA]}; + 3'd3: fnRa = {rs_stack[5:0],1'b0,isn[`INSTRUCTION_RA]}; + 3'd4: fnRa = {rgs,1'b0,isn[`INSTRUCTION_RA]}; + 3'd5: fnRa = {fp_rgs,1'b0,isn[`INSTRUCTION_RA]}; + 3'd6: fnRa = {fp_rgs,1'b0,isn[`INSTRUCTION_RA]}; + default:fnRa = {rgs,1'b0,isn[`INSTRUCTION_RA]}; + endcase + `VMOV: + case (isn[`INSTRUCTION_S1]) + 5'h0: fnRa = {rgs,1'b0,isn[`INSTRUCTION_RA]}; + 5'h1: fnRa = {6'h3F,1'b1,isn[`INSTRUCTION_RA]}; + default: fnRa = {rgs,1'b0,isn[`INSTRUCTION_RA]}; + endcase + default: fnRa = {rgs,1'b0,isn[`INSTRUCTION_RA]}; + endcase +`FLOAT: fnRa = {fp_rgs,1'b0,isn[`INSTRUCTION_RA]}; +default: fnRa = {rgs,1'b0,isn[`INSTRUCTION_RA]}; +endcase +endfunction + +function [RBIT:0] fnRb; +input [47:0] isn; +input fb; +input [5:0] vqei; +input [5:0] rfoa0i; +input [5:0] rfoa1i; +input thrd; +case(isn[`INSTRUCTION_OP]) +`RR: case(isn[`INSTRUCTION_S2]) + `VEX: fnRb = fb ? {rfoa1i,1'b1,isn[`INSTRUCTION_RB]} : {rfoa0i,1'b1,isn[`INSTRUCTION_RB]}; + `LVX,`SVX: fnRb = {vqei,1'b1,isn[`INSTRUCTION_RB]}; + default: fnRb = {rgs,1'b0,isn[`INSTRUCTION_RB]}; + endcase +`IVECTOR: + case(isn[`INSTRUCTION_S2]) + `VMxx: + case(isn[25:23]) + `VMAND,`VMOR,`VMXOR,`VMXNOR,`VMPOP: + fnRb = {6'h3F,1'b1,2'b0,isn[20:18]}; + default: fnRb = 12'h000; + endcase + `VXCHG: fnRb = {vqei,1'b1,isn[`INSTRUCTION_RB]}; + `VSxx,`VSxxU: fnRb = {vqei,1'b1,isn[`INSTRUCTION_RB]}; + `VSxxS,`VSxxSU: fnRb = {vqei,1'b0,isn[`INSTRUCTION_RB]}; + `VADDS,`VSUBS,`VMULS,`VANDS,`VORS,`VXORS,`VXORS: + fnRb = {rgs,1'b0,isn[`INSTRUCTION_RB]}; + `VSHL,`VSHR,`VASR: + fnRb = {isn[25],isn[22]}==2'b00 ? {rgs,1'b0,isn[`INSTRUCTION_RB]} : {vqei,1'b1,isn[`INSTRUCTION_RB]}; + default: fnRb = {vqei,1'b1,isn[`INSTRUCTION_RB]}; + endcase +`FLOAT: fnRb = {fp_rgs,1'b0,isn[`INSTRUCTION_RB]}; +default: fnRb = {rgs,1'b0,isn[`INSTRUCTION_RB]}; +endcase +endfunction + +function [RBIT:0] fnRc; +input [47:0] isn; +input [5:0] vqei; +input thrd; +case(isn[`INSTRUCTION_OP]) +`R2: fnRc = {rgs,1'b0,isn[`INSTRUCTION_RC]}; +`MEMNDX: fnRc = {rgs,1'b0,isn[`INSTRUCTION_RC]}; // SVX not implemented +`IVECTOR: + case(isn[`INSTRUCTION_S2]) + `VSxx,`VSxxS,`VSxxU,`VSxxSU: fnRc = {6'h3F,1'b1,2'b0,isn[25:23]}; + default: fnRc = {vqei,1'b1,isn[`INSTRUCTION_RC]}; + endcase +`FLOAT: fnRc = {fp_rgs,1'b0,isn[`INSTRUCTION_RC]}; +default: fnRc = {rgs,1'b0,isn[`INSTRUCTION_RC]}; +endcase +endfunction + +function [RBIT:0] fnRt; +input [47:0] isn; +input [5:0] vqei; +input [5:0] vli; +input thrd; +casez(isn[`INSTRUCTION_OP]) +`IVECTOR: + case(isn[`INSTRUCTION_S2]) + `VMxx: + case(isn[25:23]) + `VMAND,`VMOR,`VMXOR,`VMXNOR,`VMFILL: + fnRt = {6'h3F,1'b1,2'b0,isn[15:13]}; + `VMPOP: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]}; + default: + fnRt = {6'h3F,1'b1,2'b0,isn[15:13]}; + endcase + `VSxx,`VSxxU,`VSxxS,`VSxxSU: fnRt = {6'h3F,1'b1,2'b0,isn[15:13]}; + `VSHLV: fnRt = (vqei+1 >= vli) ? 11'h000 : {vli-vqei-1,1'b1,isn[`INSTRUCTION_RT]}; + `VSHRV: fnRt = (vqei >= vli) ? 11'h000 : {vqei,1'b1,isn[`INSTRUCTION_RT]}; + `VEINS: fnRt = {vqei,1'b1,isn[`INSTRUCTION_RT]}; // ToDo: add element # from Ra + `V2BITS: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]}; + default: fnRt = {vqei,1'b1,isn[`INSTRUCTION_RT]}; + endcase + +`FVECTOR: + case(isn[`INSTRUCTION_S2]) + `VMxx: + case(isn[25:23]) + `VMAND,`VMOR,`VMXOR,`VMXNOR,`VMFILL: + fnRt = {6'h3F,1'b1,2'b0,isn[15:13]}; + `VMPOP: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RB]}; + default: + fnRt = {6'h3F,1'b1,2'b0,isn[15:13]}; + endcase + `VSxx,`VSxxU,`VSxxS,`VSxxSU: fnRt = {6'h3F,1'b1,2'b0,isn[15:13]}; + `VSHLV: fnRt = (vqei+1 >= vli) ? 11'h000 : {vli-vqei-1,1'b1,isn[`INSTRUCTION_RT]}; + `VSHRV: fnRt = (vqei >= vli) ? 11'h000 : {vqei,1'b1,isn[`INSTRUCTION_RT]}; + `VEINS: fnRt = {vqei,1'b1,isn[`INSTRUCTION_RT]}; // ToDo: add element # from Ra + `V2BITS: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]}; + default: fnRt = {vqei,1'b1,isn[`INSTRUCTION_RT]}; + endcase + +`R2: + if (isn[`INSTRUCTION_L2]==2'b01) + case(isn[47:42]) + `CMOVEZ: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]}; + `CMOVNZ: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]}; + default: fnRt = 12'd0; + endcase + else + casez(isn[`INSTRUCTION_S2]) + `MOV: + case(isn[25:23]) + 3'd0: fnRt = {isn[26],isn[22:18],1'b0,isn[`INSTRUCTION_RT]}; + 3'd1: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]}; + 3'd2: fnRt = {rs_stack[5:0],1'b0,isn[`INSTRUCTION_RT]}; + 3'd3: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]}; + 3'd4: fnRt = {fp_rgs,1'b0,isn[`INSTRUCTION_RT]}; + 3'd5: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]}; + 3'd6: fnRt = {fp_rgs,1'b0,isn[`INSTRUCTION_RT]}; + default:fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]}; + endcase + `VMOV: + case (isn[`INSTRUCTION_S1]) + 5'h0: fnRt = {6'h3F,1'b1,isn[`INSTRUCTION_RT]}; + 5'h1: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]}; + default: fnRt = 12'h000; + endcase + `R1: + case(isn[22:18]) + `CNTLO,`CNTLZ,`CNTPOP,`ABS,`NOT,`NEG,`REDOR,`ZXB,`ZXC,`ZXH,`SXB,`SXC,`SXH: + fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]}; + `MEMDB,`MEMSB,`SYNC: + fnRt = 12'd0; + default: fnRt = 12'd0; + endcase + `MUX: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]}; + `MIN: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]}; + `MAX: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]}; + `LVX: fnRt = {vqei,1'b1,isn[`INSTRUCTION_RT]}; + `SHIFTR: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]}; + `SHIFT31,`SHIFT63: + fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]}; + `SEI: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]}; + `WAIT,`RTI,`CHK: + fnRt = 12'd0; + default: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]}; + endcase +`MEMNDX: + begin + if (!isn[31]) + case({isn[31:28],isn[22:21]}) + `LVX, + `CACHEX, + `LVBX,`LVBUX,`LVCX,`LVCUX,`LVHX,`LVHUX,`LVWX, + `LBX,`LBUX,`LCX,`LCUX,`LHX,`LHUX,`LWX,`LWRX: + fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]}; + default: fnRt = 12'd0; + endcase + else + case({isn[31:28],isn[17:16]}) + `PUSH: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]}; + `SBX,`SCX,`SHX,`SWX,`SWCX,`CACHEX: + fnRt = 12'd0; + default: fnRt = 12'd0; + endcase + end +`FLOAT: + case(isn[31:26]) + `FTX,`FCX,`FEX,`FDX,`FRM: + fnRt = 12'd0; + `FSYNC: fnRt = 12'd0; + default: fnRt = {fp_rgs,1'b0,isn[`INSTRUCTION_RT]}; + endcase +`BRK: fnRt = 12'd0; +`REX: fnRt = 12'd0; +`CHK: fnRt = 12'd0; +`EXEC: fnRt = 12'd0; +`Bcc: fnRt = 12'd0; +`BBc: fnRt = 12'd0; +`NOP: fnRt = 12'd0; +`BEQI: fnRt = 12'd0; +`SB,`Sx,`SWC,`CACHE: + fnRt = 12'd0; +`JMP: fnRt = 12'd0; +`CALL: fnRt = {rgs,1'b0,5'd29}; // regLR +`LV: fnRt = {vqei,1'b1,isn[`INSTRUCTION_RT]}; +`AMO: fnRt = isn[31] ? {rgs,1'b0,isn[`INSTRUCTION_RT]} : {rgs,1'b0,isn[`INSTRUCTION_RT]}; +`AUIPC,`LUI: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]}; +default: fnRt = {rgs,1'b0,isn[`INSTRUCTION_RT]}; +endcase +endfunction +`endif + +// Determines which lanes of the target register get updated. +// Duh, all the lanes. +function [7:0] fnWe; +input [47:0] isn; +casez(isn[`INSTRUCTION_OP]) +`R2: + case(isn[`INSTRUCTION_S2]) + `CMP: fnWe = 8'h00; + default: fnWe = 8'hFF; + endcase +`CMPI: fnWe = 8'h00; +default: fnWe = 8'hFF; +endcase +/* +casez(isn[`INSTRUCTION_OP]) +`R2: + case(isn[`INSTRUCTION_S2]) + `R1: + case(isn[22:18]) + `ABS,`CNTLZ,`CNTLO,`CNTPOP: + case(isn[25:23]) + 3'b000: fnWe = 8'h01; + 3'b001: fnWe = 8'h03; + 3'b010: fnWe = 8'h0F; + 3'b011: fnWe = 8'hFF; + default: fnWe = 8'hFF; + endcase + default: fnWe = 8'hFF; + endcase + `SHIFT31: fnWe = (~isn[25] & isn[21]) ? 8'hFF : 8'hFF; + `SHIFT63: fnWe = (~isn[25] & isn[21]) ? 8'hFF : 8'hFF; + `SLT,`SLTU,`SLE,`SLEU, + `ADD,`SUB, + `AND,`OR,`XOR, + `NAND,`NOR,`XNOR, + `DIV,`DIVU,`DIVSU, + `MOD,`MODU,`MODSU, + `MUL,`MULU,`MULSU, + `MULH,`MULUH,`MULSUH, + `FXMUL: + case(isn[25:23]) + 3'b000: fnWe = 8'h01; + 3'b001: fnWe = 8'h03; + 3'b010: fnWe = 8'h0F; + 3'b011: fnWe = 8'hFF; + default: fnWe = 8'hFF; + endcase + default: fnWe = 8'hFF; + endcase +default: fnWe = 8'hFF; +endcase +*/ +endfunction + +// Detect if a source is automatically valid +function Source1Valid; +input [47:0] isn; +casez(isn[`INSTRUCTION_OP]) +`BRK: Source1Valid = isn[16] ? isn[`INSTRUCTION_RA]==5'd0 : TRUE; +`Bcc: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`BBc: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`BEQI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`CHK: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`RR: case(isn[`INSTRUCTION_S2]) + `SHIFT31: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; + `SHIFT63: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; + `SHIFTR: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; + default: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; + endcase +`MEMNDX:Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`ADDI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`SLTI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`SLTUI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`SGTI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`SGTUI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`ANDI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`ORI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`XORI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`XNORI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`MULI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`MULUI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`MULFI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`DIVI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`DIVUI: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`AMO: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`LB: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`LBU: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`Lx: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`LxU: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`LWR: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`LV: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`LVx: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`LVxU: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`SB: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`Sx: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`SWC: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`SV: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`INC: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`CAS: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`CACHE: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`JAL: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`RET: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`CSRRW: Source1Valid = isn[`INSTRUCTION_RA]==5'd0; +`BITFIELD: case(isn[47:44]) + `BFINSI: Source1Valid = TRUE; + default: Source1Valid = isn[`INSTRUCTION_RA]==5'd0 || isn[30]==1'b0; + endcase +`IVECTOR: + Source1Valid = FALSE; +default: Source1Valid = TRUE; +endcase +endfunction + +function Source2Valid; +input [47:0] isn; +casez(isn[`INSTRUCTION_OP]) +`BRK: Source2Valid = TRUE; +`Bcc: Source2Valid = isn[`INSTRUCTION_RB]==5'd0; +`BBc: Source2Valid = TRUE; +`BEQI: Source2Valid = TRUE; +`CHK: Source2Valid = isn[`INSTRUCTION_RB]==5'd0; +`R2: casez(isn[`INSTRUCTION_S2]) + `TLB: Source2Valid = TRUE; + `R1: Source2Valid = TRUE; + `MOV: Source2Valid = TRUE; + `SHIFTR: Source2Valid = isn[25] ? 1'b1 : isn[`INSTRUCTION_RB]==5'd0; + `SHIFT31: Source2Valid = isn[25] ? 1'b1 : isn[`INSTRUCTION_RB]==5'd0; + `SHIFT63: Source2Valid = isn[25] ? 1'b1 : isn[`INSTRUCTION_RB]==5'd0; + `LVX,`SVX: Source2Valid = FALSE; + default: Source2Valid = isn[`INSTRUCTION_RB]==5'd0; + endcase +`MEMNDX: + begin + if (!isn[31]) + case({isn[31:28],isn[22:21]}) + `LVX: Source2Valid = FALSE; + `CACHEX, + `LVBX,`LVBUX,`LVCX,`LVCUX,`LVHX,`LVHUX,`LVWX, + `LBX,`LBUX,`LCX,`LCUX,`LHX,`LHUX,`LWX,`LWRX: Source2Valid = TRUE; + default: Source2Valid = isn[`INSTRUCTION_RB]==5'd0; + endcase + else + case({isn[31:28],isn[17:16]}) + `SVX: Source2Valid = FALSE; + default: Source2Valid = isn[`INSTRUCTION_RB]==5'd0; + endcase + end +`ADDI: Source2Valid = TRUE; +`SLTI: Source2Valid = TRUE; +`SLTUI: Source2Valid = TRUE; +`SGTI: Source2Valid = TRUE; +`SGTUI: Source2Valid = TRUE; +`ANDI: Source2Valid = TRUE; +`ORI: Source2Valid = TRUE; +`XORI: Source2Valid = TRUE; +`XNORI: Source2Valid = TRUE; +`MULUI: Source2Valid = TRUE; +`MULFI: Source2Valid = TRUE; +`LB: Source2Valid = TRUE; +`LBU: Source2Valid = TRUE; +`Lx: Source2Valid = TRUE; +`LxU: Source2Valid = TRUE; +`LWR: Source2Valid = TRUE; +`LVx: Source2Valid = TRUE; +`LVxU: Source2Valid = TRUE; +`INC: Source2Valid = TRUE; +`SB: Source2Valid = isn[`INSTRUCTION_RB]==5'd0; +`Sx: Source2Valid = isn[`INSTRUCTION_RB]==5'd0; +`SWC: Source2Valid = isn[`INSTRUCTION_RB]==5'd0; +`CAS: Source2Valid = isn[`INSTRUCTION_RB]==5'd0; +`JAL: Source2Valid = TRUE; +`RET: Source2Valid = isn[`INSTRUCTION_RB]==5'd0; +`IVECTOR: + case(isn[`INSTRUCTION_S2]) + `VABS: Source2Valid = TRUE; + `VMAND,`VMOR,`VMXOR,`VMXNOR,`VMPOP: + Source2Valid = FALSE; + `VADDS,`VSUBS,`VANDS,`VORS,`VXORS: + Source2Valid = isn[`INSTRUCTION_RB]==5'd0; + `VBITS2V: Source2Valid = TRUE; + `V2BITS: Source2Valid = isn[`INSTRUCTION_RB]==5'd0; + `VSHL,`VSHR,`VASR: Source2Valid = isn[22:21]==2'd2; + default: Source2Valid = FALSE; + endcase +`LV: Source2Valid = TRUE; +`SV: Source2Valid = FALSE; +`AMO: Source2Valid = isn[31] || isn[`INSTRUCTION_RB]==5'd0; +`BITFIELD: Source2Valid = isn[`INSTRUCTION_RB]==5'd0 || isn[31]==1'b0; +default: Source2Valid = TRUE; +endcase +endfunction + +function Source3Valid; +input [47:0] isn; +case(isn[`INSTRUCTION_OP]) +`IVECTOR: + case(isn[`INSTRUCTION_S2]) + `VEX: Source3Valid = TRUE; + default: Source3Valid = TRUE; + endcase +`CHK: Source3Valid = isn[`INSTRUCTION_RC]==5'd0; +`R2: + if (isn[`INSTRUCTION_L2]==2'b01) + case(isn[47:42]) + `CMOVEZ,`CMOVNZ: Source3Valid = isn[`INSTRUCTION_RC]==5'd0; + default: Source3Valid = TRUE; + endcase + else + case(isn[`INSTRUCTION_S2]) + `MAJ: Source3Valid = isn[`INSTRUCTION_RC]==5'd0; + default: Source3Valid = TRUE; + endcase +`MEMNDX: + if (!isn[31]) + case({isn[31:28],isn[22:21]}) + `CACHEX, + `LVBX,`LVBUX,`LVCX,`LVCUX,`LVHX,`LVHUX,`LVWX, + `LBX,`LBUX,`LCX,`LCUX,`LHX,`LHUX,`LWX,`LWRX: + Source3Valid = isn[`INSTRUCTION_RC]==5'd0; + default: Source3Valid = TRUE; + endcase + else + case({isn[31:28],isn[17:16]}) + `PUSH: Source3Valid = TRUE; + `SBX: Source3Valid = isn[`INSTRUCTION_RC]==5'd0; + `SCX: Source3Valid = isn[`INSTRUCTION_RC]==5'd0; + `SHX: Source3Valid = isn[`INSTRUCTION_RC]==5'd0; + `SWX: Source3Valid = isn[`INSTRUCTION_RC]==5'd0; + `SWCX: Source3Valid = isn[`INSTRUCTION_RC]==5'd0; + `CASX: Source3Valid = isn[`INSTRUCTION_RC]==5'd0; + default: Source3Valid = TRUE; + endcase +`BITFIELD: Source3Valid = isn[`INSTRUCTION_RC]==5'd0 || isn[32]==1'b0; +default: Source3Valid = TRUE; +endcase +endfunction + +// For predication logic +function SourceTValid; +input [47:0] isn; +case(isn[`INSTRUCTION_OP]) +`BRK: SourceTValid = TRUE; +`Bcc: SourceTValid = TRUE; +`BBc: SourceTValid = TRUE; +`BEQI: SourceTValid = TRUE; +`IVECTOR: + case(isn[`INSTRUCTION_S2]) + `VEX: SourceTValid = TRUE; + default: SourceTValid = TRUE; + endcase +`CHK: SourceTValid = isn[`INSTRUCTION_RT]==5'd0; +`R2: + if (isn[`INSTRUCTION_L2]==2'b01) + case(isn[47:42]) + `CMOVEZ,`CMOVNZ: SourceTValid = isn[`INSTRUCTION_RT]==5'd0; + default: SourceTValid = TRUE; + endcase + else + case(isn[`INSTRUCTION_S2]) + `MAJ: SourceTValid = isn[`INSTRUCTION_RT]==5'd0; + default: SourceTValid = TRUE; + endcase +`MEMNDX: + if (!isn[31]) + case({isn[31:28],isn[22:21]}) + `CACHEX, + `LVBX,`LVBUX,`LVCX,`LVCUX,`LVHX,`LVHUX,`LVWX, + `LBX,`LBUX,`LCX,`LCUX,`LHX,`LHUX,`LWX,`LWRX: + SourceTValid = isn[`INSTRUCTION_RT]==5'd0; + default: SourceTValid = TRUE; + endcase + else + SourceTValid = TRUE; +`SB: SourceTValid = TRUE; +`Sx: SourceTValid = TRUE; +`SWC: SourceTValid = TRUE; +`CAS: SourceTValid = TRUE; +`BITFIELD: SourceTValid = isn[`INSTRUCTION_RT]==5'd0 || isn[32]==1'b0; +default: SourceTValid = isn[`INSTRUCTION_RT]==5'd0; +endcase +endfunction + +// Used to indicate to the queue logic that the instruction needs to be +// recycled to the queue VL number of times. +function IsVector; +input [47:0] isn; +case(isn[`INSTRUCTION_OP]) +`MEMNDX: + case(isn[`INSTRUCTION_S2]) + `LVX,`SVX: IsVector = TRUE; + default: IsVector = FALSE; + endcase +`IVECTOR: + case(isn[`INSTRUCTION_S2]) + `VMxx: + case(isn[25:23]) + `VMAND,`VMOR,`VMXOR,`VMXNOR,`VMPOP: + IsVector = FALSE; + default: IsVector = TRUE; + endcase + `VEINS: IsVector = FALSE; + `VEX: IsVector = FALSE; + default: IsVector = TRUE; + endcase +`LV,`SV: IsVector = TRUE; +default: IsVector = FALSE; +endcase +endfunction + +function IsVeins; +input [47:0] isn; +case(isn[`INSTRUCTION_OP]) +`IVECTOR: IsVeins = isn[`INSTRUCTION_S2]==`VEINS; +default: IsVeins = FALSE; +endcase +endfunction + +function IsVex; +input [47:0] isn; +case(isn[`INSTRUCTION_OP]) +`IVECTOR: IsVex = isn[`INSTRUCTION_S2]==`VEX; +default: IsVex = FALSE; +endcase +endfunction + +function IsVCmprss; +input [47:0] isn; +case(isn[`INSTRUCTION_OP]) +`IVECTOR: IsVCmprss = isn[`INSTRUCTION_S2]==`VCMPRSS || isn[`INSTRUCTION_S2]==`VCIDX; +default: IsVCmprss = FALSE; +endcase +endfunction + +function IsVShifti; +input [47:0] isn; +case(isn[`INSTRUCTION_OP]) +`IVECTOR: + case(isn[`INSTRUCTION_S2]) + `VSHL,`VSHR,`VASR: + IsVShifti = {isn[25],isn[22]}==2'd2; + default: IsVShifti = FALSE; + endcase +default: IsVShifti = FALSE; +endcase +endfunction + +function IsVLS; +input [47:0] isn; +case(isn[`INSTRUCTION_OP]) +`MEMNDX: + if (IsLoad(isn)) + case({isn[31:28],isn[22:21]}) + `LVX,`LVWS: IsVLS = TRUE; + default: IsVLS = FALSE; + endcase + else + case({isn[31:28],isn[17:16]}) + `SVX,`SVWS: IsVLS = TRUE; + default: IsVLS = FALSE; + endcase +`LV,`SV: IsVLS = TRUE; +default: IsVLS = FALSE; +endcase +endfunction + +function [1:0] fnM2; +input [31:0] isn; +case(isn[`INSTRUCTION_OP]) +`RR: fnM2 = isn[24:23]; +default: fnM2 = 2'b00; +endcase +endfunction + +function IsCmp; +input [47:0] isn; +case(isn[`INSTRUCTION_OP]) +`R2: + if (isn[`INSTRUCTION_L2]==2'b00) + case(isn[31:26]) + `CMP: IsCmp = TRUE; + default: IsCmp = FALSE; + endcase + else + IsCmp = FALSE; +`CMPI: IsCmp = TRUE; +default: IsCmp = FALSE; +endcase +endfunction + +function [0:0] IsMem; +input [47:0] isn; +case(isn[`INSTRUCTION_OP]) +`MEMNDX: IsMem = TRUE; +`AMO: IsMem = TRUE; +`LB: IsMem = TRUE; +`LBU: IsMem = TRUE; +`Lx: IsMem = TRUE; +`LxU: IsMem = TRUE; +`LWR: IsMem = TRUE; +`LV,`SV: IsMem = TRUE; +`INC: IsMem = TRUE; +`SB: IsMem = TRUE; +`Sx: IsMem = TRUE; +`SWC: IsMem = TRUE; +`CAS: IsMem = TRUE; +`LVx: IsMem = TRUE; +`LVxU: IsMem = TRUE; +default: IsMem = FALSE; +endcase +endfunction + +function IsMemNdx; +input [47:0] isn; +case(isn[`INSTRUCTION_OP]) +`MEMNDX: IsMemNdx = TRUE; +default: IsMemNdx = FALSE; +endcase +endfunction + +function IsLoad; +input [47:0] isn; +case(isn[`INSTRUCTION_OP]) +`MEMNDX: IsLoad = !isn[31]; +`LB: IsLoad = TRUE; +`LBU: IsLoad = TRUE; +`Lx: IsLoad = TRUE; +`LxU: IsLoad = TRUE; +`LWR: IsLoad = TRUE; +`LV: IsLoad = TRUE; +`LVx: IsLoad = TRUE; +`LVxU: IsLoad = TRUE; +default: IsLoad = FALSE; +endcase +endfunction + +function IsInc; +input [47:0] isn; +case(isn[`INSTRUCTION_OP]) +`MEMNDX: + if (isn[`INSTRUCTION_L2]==2'b00) + case({isn[31:28],isn[17:16]}) + `INC: IsInc = TRUE; + default: IsInc = FALSE; + endcase + else + IsInc = FALSE; +`INC: IsInc = TRUE; +default: IsInc = FALSE; +endcase +endfunction + +function IsSWC; +input [47:0] isn; +case(isn[`INSTRUCTION_OP]) +`MEMNDX: + if (isn[`INSTRUCTION_L2]==2'b00) + case({isn[31:28],isn[17:16]}) + `SWCX: IsSWC = TRUE; + default: IsSWC = FALSE; + endcase + else + IsSWC = FALSE; +`SWC: IsSWC = TRUE; +default: IsSWC = FALSE; +endcase +endfunction + +// Aquire / release bits are only available on indexed SWC / LWR +function IsSWCX; +input [47:0] isn; +case(isn[`INSTRUCTION_OP]) +`MEMNDX: + if (isn[`INSTRUCTION_L2]==2'b00) + case({isn[31:28],isn[17:16]}) + `SWCX: IsSWCX = TRUE; + default: IsSWCX = FALSE; + endcase + else + IsSWCX = FALSE; +default: IsSWCX = FALSE; +endcase +endfunction + +function IsLWR; +input [47:0] isn; +case(isn[`INSTRUCTION_OP]) +`MEMNDX: + if (isn[`INSTRUCTION_L2]==2'b00) + case({isn[31:28],isn[22:21]}) + `LWRX: IsLWR = TRUE; + default: IsLWR = FALSE; + endcase + else + IsLWR = FALSE; +`LWR: IsLWR = TRUE; +default: IsLWR = FALSE; +endcase +endfunction + +function IsLWRX; +input [47:0] isn; +case(isn[`INSTRUCTION_OP]) +`MEMNDX: + if (isn[`INSTRUCTION_L2]==2'b00) + case({isn[31:28],isn[22:21]}) + `LWRX: IsLWRX = TRUE; + default: IsLWRX = FALSE; + endcase + else + IsLWRX = FALSE; +default: IsLWRX = FALSE; +endcase +endfunction + +function IsCAS; +input [47:0] isn; +case(isn[`INSTRUCTION_OP]) +`MEMNDX: + if (isn[`INSTRUCTION_L2]==2'b00) + case({isn[31:28],isn[17:16]}) + `CASX: IsCAS = TRUE; + default: IsCAS = FALSE; + endcase + else + IsCAS = FALSE; +`CAS: IsCAS = TRUE; +default: IsCAS = FALSE; +endcase +endfunction + +function IsAMO; +input [47:0] isn; +case(isn[`INSTRUCTION_OP]) +`AMO: IsAMO = TRUE; +default: IsAMO = FALSE; +endcase +endfunction + +// Really IsPredictableBranch +// Does not include BccR's +function IsBranch; +input [47:0] isn; +casez(isn[`INSTRUCTION_OP]) +`Bcc: IsBranch = TRUE; +`BBc: IsBranch = TRUE; +`BEQI: IsBranch = TRUE; +`CHK: IsBranch = TRUE; +default: IsBranch = FALSE; +endcase +endfunction + +function IsWait; +input [47:0] isn; +IsWait = isn[`INSTRUCTION_OP]==`R2 && isn[`INSTRUCTION_L2]==2'b00 && isn[`INSTRUCTION_S2]==`WAIT; +endfunction + +function IsCall; +input [47:0] isn; +IsCall = isn[`INSTRUCTION_OP]==`CALL && isn[7]==1'b0; +endfunction + +function IsJmp; +input [47:0] isn; +IsJmp = isn[`INSTRUCTION_OP]==`JMP && isn[7]==1'b0; +endfunction + +function IsFlowCtrl; +input [47:0] isn; +casez(isn[`INSTRUCTION_OP]) +`BRK: IsFlowCtrl = TRUE; +`R2: case(isn[`INSTRUCTION_S2]) + `RTI: IsFlowCtrl = TRUE; + default: IsFlowCtrl = FALSE; + endcase +`Bcc: IsFlowCtrl = TRUE; +`BBc: IsFlowCtrl = TRUE; +`BEQI: IsFlowCtrl = TRUE; +`CHK: IsFlowCtrl = TRUE; +`JAL: IsFlowCtrl = TRUE; +`JMP: IsFlowCtrl = TRUE; +`CALL: IsFlowCtrl = TRUE; +`RET: IsFlowCtrl = TRUE; +default: IsFlowCtrl = FALSE; +endcase +endfunction + +function IsCache; +input [47:0] isn; +case(isn[`INSTRUCTION_OP]) +`MEMNDX: + if (isn[`INSTRUCTION_L2]==2'b00) + case({isn[31:28],isn[22:21]}) + `CACHEX: IsCache = TRUE; + default: IsCache = FALSE; + endcase + else + IsCache = FALSE; +`CACHE: IsCache = TRUE; +default: IsCache = FALSE; +endcase +endfunction + +function [4:0] CacheCmd; +input [47:0] isn; +case(isn[`INSTRUCTION_OP]) +`MEMNDX: + if (isn[`INSTRUCTION_L2]==2'b00) + case({isn[31:28],isn[22:21]}) + `CACHEX: CacheCmd = isn[17:13]; + default: CacheCmd = 5'd0; + endcase + else + CacheCmd = 5'd0; +`CACHE: CacheCmd = isn[15:11]; +default: CacheCmd = 5'd0; +endcase +endfunction + +function IsMemsb; +input [47:0] isn; +IsMemsb = (isn[`INSTRUCTION_OP]==`RR && isn[`INSTRUCTION_L2]==2'b00 && isn[`INSTRUCTION_S2]==`R1 && isn[22:18]==`MEMSB); +endfunction + +function IsSEI; +input [47:0] isn; +IsSEI = (isn[`INSTRUCTION_OP]==`R2 && isn[`INSTRUCTION_L2]==2'b00 && isn[`INSTRUCTION_S2]==`SEI); +endfunction + +function IsLV; +input [47:0] isn; +case(isn[`INSTRUCTION_OP]) +`MEMNDX: + if (isn[`INSTRUCTION_L2]==2'b00) + case({isn[31:28],isn[22:21]}) + `LVX: IsLV = TRUE; + default: IsLV = FALSE; + endcase + else + IsLV = FALSE; +`LV: IsLV = TRUE; +default: IsLV = FALSE; +endcase +endfunction + +function IsRFW; +input [47:0] isn; +input [5:0] vqei; +input [5:0] vli; +input thrd; +if (fnRt(isn,vqei,vli,thrd)==12'd0) + IsRFW = FALSE; +else +casez(isn[`INSTRUCTION_OP]) +`IVECTOR: IsRFW = TRUE; +`FVECTOR: IsRFW = TRUE; +`R2: + if (isn[`INSTRUCTION_L2]==2'b00) + casez(isn[`INSTRUCTION_S2]) + `TLB: IsRFW = TRUE; + `R1: + case(isn[22:18]) + `MEMDB,`MEMSB,`SYNC,`SETWB,5'h14,5'h15: IsRFW = FALSE; + default: IsRFW = TRUE; + endcase + `ADD: IsRFW = TRUE; + `SUB: IsRFW = TRUE; + `SLT: IsRFW = TRUE; + `SLTU: IsRFW = TRUE; + `SLE: IsRFW = TRUE; + `SLEU: IsRFW = TRUE; + `AND: IsRFW = TRUE; + `OR: IsRFW = TRUE; + `XOR: IsRFW = TRUE; + `NAND: IsRFW = TRUE; + `NOR: IsRFW = TRUE; + `XNOR: IsRFW = TRUE; + `MULU: IsRFW = TRUE; + `MULSU: IsRFW = TRUE; + `MUL: IsRFW = TRUE; + `MULUH: IsRFW = TRUE; + `MULSUH: IsRFW = TRUE; + `MULH: IsRFW = TRUE; + `MULF: IsRFW = TRUE; + `FXMUL: IsRFW = TRUE; + `DIVU: IsRFW = TRUE; + `DIVSU: IsRFW = TRUE; + `DIV:IsRFW = TRUE; + `MODU: IsRFW = TRUE; + `MODSU: IsRFW = TRUE; + `MOD:IsRFW = TRUE; + `MOV: IsRFW = TRUE; + `VMOV: IsRFW = TRUE; + `SHIFTR,`SHIFT31,`SHIFT63: + IsRFW = TRUE; + `MIN,`MAX: IsRFW = TRUE; + `SEI: IsRFW = TRUE; + default: IsRFW = FALSE; + endcase + else if (isn[`INSTRUCTION_L2]==2'b01) + case(isn[47:42]) + `CMOVEZ: IsRFW = TRUE; + `CMOVNZ: IsRFW = TRUE; + default: IsRFW = FALSE; + endcase + else if (isn[7]==1'b1) + // The following instructions might come from a compressed version. + casez(isn[`INSTRUCTION_S2]) + `ADD: IsRFW = TRUE; + `SUB: IsRFW = TRUE; + `AND: IsRFW = TRUE; + `OR: IsRFW = TRUE; + `XOR: IsRFW = TRUE; + `MOV: IsRFW = TRUE; + `SHIFTR,`SHIFT31,`SHIFT63: + IsRFW = TRUE; + default: IsRFW = FALSE; + endcase + else + IsRFW = FALSE; +`MEMNDX: + if (isn[`INSTRUCTION_L2]==2'b10) begin + if (!isn[31]) + IsRFW = TRUE; + else + case({isn[31:28],isn[17:16]}) + `PUSH: IsRFW = TRUE; + `CASX: IsRFW = TRUE; + default: IsRFW = FALSE; + endcase + end + else if (isn[`INSTRUCTION_L2]==2'b00) begin + if (!isn[31]) + case({isn[31:28],isn[22:21]}) + `LBX: IsRFW = TRUE; + `LBUX: IsRFW = TRUE; + `LCX: IsRFW = TRUE; + `LCUX: IsRFW = TRUE; + `LHX: IsRFW = TRUE; + `LHUX: IsRFW = TRUE; + `LWX: IsRFW = TRUE; + `LVBX: IsRFW = TRUE; + `LVBUX: IsRFW = TRUE; + `LVCX: IsRFW = TRUE; + `LVCUX: IsRFW = TRUE; + `LVHX: IsRFW = TRUE; + `LVHUX: IsRFW = TRUE; + `LVWX: IsRFW = TRUE; + `LWRX: IsRFW = TRUE; + `LVX: IsRFW = TRUE; + default: IsRFW = FALSE; + endcase + else + case({isn[31:28],isn[17:16]}) + `PUSH: IsRFW = TRUE; + `CASX: IsRFW = TRUE; + default: IsRFW = FALSE; + endcase + end + else + IsRFW = FALSE; +`BBc: IsRFW = FALSE; +`BITFIELD: IsRFW = TRUE; +`ADDI: IsRFW = TRUE; +`SLTI: IsRFW = TRUE; +`SLTUI: IsRFW = TRUE; +`SGTI: IsRFW = TRUE; +`SGTUI: IsRFW = TRUE; +`ANDI: IsRFW = TRUE; +`ORI: IsRFW = TRUE; +`XORI: IsRFW = TRUE; +`XNORI: IsRFW = TRUE; +`MULUI: IsRFW = TRUE; +`MULI: IsRFW = TRUE; +`MULFI: IsRFW = TRUE; +`DIVUI: IsRFW = TRUE; +`DIVI: IsRFW = TRUE; +`MODI: IsRFW = TRUE; +`JAL: IsRFW = TRUE; +`CALL: IsRFW = TRUE; +`RET: IsRFW = TRUE; +`LB: IsRFW = TRUE; +`LBU: IsRFW = TRUE; +`Lx: IsRFW = TRUE; +`LxU: IsRFW = TRUE; +`LWR: IsRFW = TRUE; +`LV: IsRFW = TRUE; +`LVx: IsRFW = TRUE; +`LVxU: IsRFW = TRUE; +`CAS: IsRFW = TRUE; +`AMO: IsRFW = TRUE; +`CSRRW: IsRFW = TRUE; +`AUIPC: IsRFW = TRUE; +`LUI: IsRFW = TRUE; +default: IsRFW = FALSE; +endcase +endfunction + +function IsShifti; +input [47:0] isn; +case(isn[`INSTRUCTION_OP]) +`R2: + if (isn[`INSTRUCTION_L2]==2'b00) + case(isn[`INSTRUCTION_S2]) + `SHIFT31,`SHIFT63: + IsShifti = TRUE; + default: IsShifti = FALSE; + endcase + else + IsShifti = FALSE; +default: IsShifti = FALSE; +endcase +endfunction + +function IsShift; +input [47:0] isn; +case(isn[`INSTRUCTION_OP]) +`R2: + if (isn[`INSTRUCTION_L2]==2'b00) + case(isn[31:26]) + `SHIFTR: IsShift = TRUE; + `SHIFT31: IsShift = TRUE; + `SHIFT63: IsShift = TRUE; + default: IsShift = FALSE; + endcase + else + IsShift = FALSE; +default: IsShift = FALSE; +endcase +endfunction + +function IsShift48; +input [47:0] isn; +case(isn[`INSTRUCTION_OP]) +`R2: + if (isn[`INSTRUCTION_L2]==2'b01) + case(isn[47:42]) + `SHIFTR: IsShift48 = TRUE; + default: IsShift48 = FALSE; + endcase + else + IsShift48 = FALSE; +default: IsShift48 = FALSE; +endcase +endfunction + +function IsRtop; +input [47:0] isn; +case(isn[`INSTRUCTION_OP]) +`R2: + if (isn[`INSTRUCTION_L2]==2'b01) + case(isn[47:42]) + `RTOP: IsRtop = TRUE; + default: IsRtop = FALSE; + endcase + else + IsRtop = FALSE; +default: IsRtop = FALSE; +endcase +endfunction + +function IsMul; +input [47:0] isn; +case(isn[`INSTRUCTION_OP]) +`R2: + if (isn[`INSTRUCTION_L2]==2'b00) + case(isn[`INSTRUCTION_S2]) + `MULU,`MULSU,`MUL: IsMul = TRUE; + `MULUH,`MULSUH,`MULH: IsMul = TRUE; + default: IsMul = FALSE; + endcase + else + IsMul = FALSE; +`MULUI,`MULI: IsMul = TRUE; +default: IsMul = FALSE; +endcase +endfunction + +function IsDivmod; +input [47:0] isn; +case(isn[`INSTRUCTION_OP]) +`R2: + if (isn[`INSTRUCTION_L2]==2'b00) + case(isn[`INSTRUCTION_S2]) + `DIVU,`DIVSU,`DIV: IsDivmod = TRUE; + `MODU,`MODSU,`MOD: IsDivmod = TRUE; + default: IsDivmod = FALSE; + endcase + else + IsDivmod = FALSE; +`DIVUI,`DIVI,`MODI: IsDivmod = TRUE; +default: IsDivmod = FALSE; +endcase +endfunction + +function IsExec; +input [47:0] isn; +case(isn[`INSTRUCTION_OP]) +`EXEC: IsExec = TRUE; +default: IsExec = FALSE; +endcase +endfunction + +function [7:0] fnSelect; +input [47:0] ins; +input [`ABITS] adr; +begin + case(ins[`INSTRUCTION_OP]) + `MEMNDX: + if (ins[`INSTRUCTION_L2]==2'b10) begin + if (ins[31]) begin + case({ins[31:28],ins[17:16]}) + `PUSH: fnSelect = 8'hFF; + default: fnSelect = 8'h00; + endcase + end + else + fnSelect = 8'h00; + end + else if (ins[`INSTRUCTION_L2]==2'b00) begin + if (!ins[31]) + case({ins[31:28],ins[22:21]}) + `LBX,`LBUX,`LVBX,`LVBUX: + case(adr[2:0]) + 3'd0: fnSelect = 8'h01; + 3'd1: fnSelect = 8'h02; + 3'd2: fnSelect = 8'h04; + 3'd3: fnSelect = 8'h08; + 3'd4: fnSelect = 8'h10; + 3'd5: fnSelect = 8'h20; + 3'd6: fnSelect = 8'h40; + 3'd7: fnSelect = 8'h80; + endcase + `LCX,`LCUX,`LVCX,`LVCUX: + case(adr[2:1]) + 2'd0: fnSelect = 8'h03; + 2'd1: fnSelect = 8'h0C; + 2'd2: fnSelect = 8'h30; + 2'd3: fnSelect = 8'hC0; + endcase + `LHX,`LHUX,`LVHX,`LVHUX: + case(adr[2]) + 1'b0: fnSelect = 8'h0F; + 1'b1: fnSelect = 8'hF0; + endcase + `INC,`LVWX, + `LWX,`LWRX,`LVX: + fnSelect = 8'hFF; + default:fnSelect = 8'hFF; + endcase + else + case({ins[31:28],ins[17:16]}) + `SBX: + case(adr[2:0]) + 3'd0: fnSelect = 8'h01; + 3'd1: fnSelect = 8'h02; + 3'd2: fnSelect = 8'h04; + 3'd3: fnSelect = 8'h08; + 3'd4: fnSelect = 8'h10; + 3'd5: fnSelect = 8'h20; + 3'd6: fnSelect = 8'h40; + 3'd7: fnSelect = 8'h80; + endcase + `SCX: + case(adr[2:1]) + 2'd0: fnSelect = 8'h03; + 2'd1: fnSelect = 8'h0C; + 2'd2: fnSelect = 8'h30; + 2'd3: fnSelect = 8'hC0; + endcase + `SHX: + case(adr[2]) + 1'b0: fnSelect = 8'h0F; + 1'b1: fnSelect = 8'hF0; + endcase + `INC, + `SWX,`SWCX,`SVX,`CASX,`PUSH: + fnSelect = 8'hFF; + default: fnSelect = 8'h00; + endcase + end + else + fnSelect = 8'h00; + `LB,`LBU,`SB: + case(adr[2:0]) + 3'd0: fnSelect = 8'h01; + 3'd1: fnSelect = 8'h02; + 3'd2: fnSelect = 8'h04; + 3'd3: fnSelect = 8'h08; + 3'd4: fnSelect = 8'h10; + 3'd5: fnSelect = 8'h20; + 3'd6: fnSelect = 8'h40; + 3'd7: fnSelect = 8'h80; + endcase + `Lx,`LxU,`LVx,`LVxU: + casez(ins[20:18]) + 3'b100: fnSelect = 8'hFF; + 3'b?10: fnSelect = adr[2] ? 8'hF0 : 8'h0F; + 3'b??1: + case(adr[2:1]) + 2'd0: fnSelect = 8'h03; + 2'd1: fnSelect = 8'h0C; + 2'd2: fnSelect = 8'h30; + 2'd3: fnSelect = 8'hC0; + endcase + default: fnSelect = 8'h00; + endcase + `Sx: + casez(ins[15:13]) + 3'b100: fnSelect = 8'hFF; + 3'b?10: fnSelect = adr[2] ? 8'hF0 : 8'h0F; + 3'b??1: + case(adr[2:1]) + 2'd0: fnSelect = 8'h03; + 2'd1: fnSelect = 8'h0C; + 2'd2: fnSelect = 8'h30; + 2'd3: fnSelect = 8'hC0; + endcase + default: fnSelect = 8'h00; + endcase + `INC, + `LWR,`SWC,`CAS: fnSelect = 8'hFF; + `LV,`SV: fnSelect = 8'hFF; + `AMO: + case(ins[23:21]) + 3'd0: fnSelect = {8'h01 << adr[2:0]}; + 3'd1: fnSelect = {8'h03 << {adr[2:1],1'b0}}; + 3'd2: fnSelect = {8'h0F << {adr[2],2'b00}}; + 3'd3: fnSelect = 8'hFF; + default: fnSelect = 8'hFF; + endcase + default: fnSelect = 8'h00; + endcase +end +endfunction +/* +function [63:0] fnDatc; +input [47:0] ins; +input [63:0] dat; +case(ins[`INSTRUCTION_OP]) +`R2: + if (isn[`INSTRUCTION_L2]==2'b01) + case(ins[47:42]) + `FINDB: fnDatc = dat[7:0]; + `FINDC: fnDatc = dat[15:0]; + `FINDH: fnDatc = dat[31:0]; + `FINDW: fnDatc = dat[63:0]; + default: fnDatc = dat[63:0]; + endcase + else + fnDatc = dat[63:0]; +default: fnDatc = dat[63:0]; +endcase +endfunction +*/ +/* +function [63:0] fnMemInc; +input [47:0] ins; +case(ins[`INSTRUCTION_OP]) +`R2: + if (isn[`INSTRUCTION_L2]==2'b01) + case(ins[47:42]) + `FINDB: fnMemInc = 32'd1; + `FINDC: fnMemInc = 32'd2; + `FINDH: fnMemInc = 32'd4; + `FINDW: fnMemInc = 32'd8; + default: fnMemInc = 32'd8; + endcase + else + fnMemInc = 32'd8; +default: fnMemInc = 32'd8; +endcase +endfunction +*/ +function [63:0] fnDatiAlign; +input [47:0] ins; +input [`ABITS] adr; +input [63:0] dat; +case(ins[`INSTRUCTION_OP]) +`MEMNDX: + if (ins[`INSTRUCTION_L2]==2'b00) + case({ins[31:28],ins[22:21]}) + `LBX,`LVBX: + case(adr[2:0]) + 3'd0: fnDatiAlign = {{56{dat[7]}},dat[7:0]}; + 3'd1: fnDatiAlign = {{56{dat[15]}},dat[15:8]}; + 3'd2: fnDatiAlign = {{56{dat[23]}},dat[23:16]}; + 3'd3: fnDatiAlign = {{56{dat[31]}},dat[31:24]}; + 3'd4: fnDatiAlign = {{56{dat[39]}},dat[39:32]}; + 3'd5: fnDatiAlign = {{56{dat[47]}},dat[47:40]}; + 3'd6: fnDatiAlign = {{56{dat[55]}},dat[55:48]}; + 3'd7: fnDatiAlign = {{56{dat[63]}},dat[63:56]}; + endcase + `LBUX,`LVBUX: + case(adr[2:0]) + 3'd0: fnDatiAlign = {{56{1'b0}},dat[7:0]}; + 3'd1: fnDatiAlign = {{56{1'b0}},dat[15:8]}; + 3'd2: fnDatiAlign = {{56{1'b0}},dat[23:16]}; + 3'd3: fnDatiAlign = {{56{1'b0}},dat[31:24]}; + 3'd4: fnDatiAlign = {{56{1'b0}},dat[39:32]}; + 3'd5: fnDatiAlign = {{56{1'b0}},dat[47:40]}; + 3'd6: fnDatiAlign = {{56{1'b0}},dat[55:48]}; + 3'd7: fnDatiAlign = {{56{2'b0}},dat[63:56]}; + endcase + `LCX,`LVCX: + case(adr[2:1]) + 2'd0: fnDatiAlign = {{48{dat[15]}},dat[15:0]}; + 2'd1: fnDatiAlign = {{48{dat[31]}},dat[31:16]}; + 2'd2: fnDatiAlign = {{48{dat[47]}},dat[47:32]}; + 2'd3: fnDatiAlign = {{48{dat[63]}},dat[63:48]}; + endcase + `LCUX,`LVCUX: + case(adr[2:1]) + 2'd0: fnDatiAlign = {{48{1'b0}},dat[15:0]}; + 2'd1: fnDatiAlign = {{48{1'b0}},dat[31:16]}; + 2'd2: fnDatiAlign = {{48{1'b0}},dat[47:32]}; + 2'd3: fnDatiAlign = {{48{1'b0}},dat[63:48]}; + endcase + `LHX,`LVHX: + case(adr[2]) + 1'b0: fnDatiAlign = {{32{dat[31]}},dat[31:0]}; + 1'b1: fnDatiAlign = {{32{dat[63]}},dat[63:32]}; + endcase + `LHUX,`LVHUX: + case(adr[2]) + 1'b0: fnDatiAlign = {{32{1'b0}},dat[31:0]}; + 1'b1: fnDatiAlign = {{32{1'b0}},dat[63:32]}; + endcase + `LWX,`LWRX,`LVX,`CAS,`LVWX: fnDatiAlign = dat; + default: fnDatiAlign = dat; + endcase + else + fnDatiAlign = dat; +`LB: + case(adr[2:0]) + 3'd0: fnDatiAlign = {{56{dat[7]}},dat[7:0]}; + 3'd1: fnDatiAlign = {{56{dat[15]}},dat[15:8]}; + 3'd2: fnDatiAlign = {{56{dat[23]}},dat[23:16]}; + 3'd3: fnDatiAlign = {{56{dat[31]}},dat[31:24]}; + 3'd4: fnDatiAlign = {{56{dat[39]}},dat[39:32]}; + 3'd5: fnDatiAlign = {{56{dat[47]}},dat[47:40]}; + 3'd6: fnDatiAlign = {{56{dat[55]}},dat[55:48]}; + 3'd7: fnDatiAlign = {{56{dat[63]}},dat[63:56]}; + endcase +`LBU: + case(adr[2:0]) + 3'd0: fnDatiAlign = {{56{1'b0}},dat[7:0]}; + 3'd1: fnDatiAlign = {{56{1'b0}},dat[15:8]}; + 3'd2: fnDatiAlign = {{56{1'b0}},dat[23:16]}; + 3'd3: fnDatiAlign = {{56{1'b0}},dat[31:24]}; + 3'd4: fnDatiAlign = {{56{1'b0}},dat[39:32]}; + 3'd5: fnDatiAlign = {{56{1'b0}},dat[47:40]}; + 3'd6: fnDatiAlign = {{56{1'b0}},dat[55:48]}; + 3'd7: fnDatiAlign = {{56{2'b0}},dat[63:56]}; + endcase +`Lx,`LVx: + casez(ins[20:18]) + 3'b100: fnDatiAlign = dat; + 3'b?10: + case(adr[2]) + 1'b0: fnDatiAlign = {{32{dat[31]}},dat[31:0]}; + 1'b1: fnDatiAlign = {{32{dat[63]}},dat[63:32]}; + endcase + 3'b??1: + case(adr[2:1]) + 2'd0: fnDatiAlign = {{48{dat[15]}},dat[15:0]}; + 2'd1: fnDatiAlign = {{48{dat[31]}},dat[31:16]}; + 2'd2: fnDatiAlign = {{48{dat[47]}},dat[47:32]}; + 2'd3: fnDatiAlign = {{48{dat[63]}},dat[63:48]}; + endcase + default: fnDatiAlign = dat; + endcase +`LxU,`LVxU: + casez(ins[20:18]) + 3'b100: fnDatiAlign = dat; + 3'b?10: + case(adr[2]) + 1'b0: fnDatiAlign = {{32{1'b0}},dat[31:0]}; + 1'b1: fnDatiAlign = {{32{1'b0}},dat[63:32]}; + endcase + 3'b??1: + case(adr[2:1]) + 2'd0: fnDatiAlign = {{48{1'b0}},dat[15:0]}; + 2'd1: fnDatiAlign = {{48{1'b0}},dat[31:16]}; + 2'd2: fnDatiAlign = {{48{1'b0}},dat[47:32]}; + 2'd3: fnDatiAlign = {{48{1'b0}},dat[63:48]}; + endcase + default: fnDatiAlign = dat; + endcase +`LWR,`LV,`CAS,`AMO: fnDatiAlign = dat; +default: fnDatiAlign = dat; +endcase +endfunction + +function [63:0] fnDato; +input [47:0] isn; +input [63:0] dat; +case(isn[`INSTRUCTION_OP]) +`MEMNDX: + if (isn[`INSTRUCTION_L2]==2'b00) + case({isn[31:28],isn[17:16]}) + `SBX: fnDato = {8{dat[7:0]}}; + `SCX: fnDato = {4{dat[15:0]}}; + `SHX: fnDato = {2{dat[31:0]}}; + default: fnDato = dat; + endcase + else + fnDato = dat; +`SB: fnDato = {8{dat[7:0]}}; +`Sx: + casez(isn[15:13]) + 3'b100: fnDato = dat; + 3'b?10: fnDato = {2{dat[31:0]}}; + 3'b??1: fnDato = {4{dat[15:0]}}; + default: fnDato = dat; + endcase +`AMO: + case(isn[23:21]) + 3'd0: fnDato = {8{dat[7:0]}}; + 3'd1: fnDato = {4{dat[15:0]}}; + 3'd2: fnDato = {2{dat[31:0]}}; + 3'd3: fnDato = dat; + default: fnDato = dat; + endcase +default: fnDato = dat; +endcase +endfunction + +function IsTLB; +input [47:0] isn; +case(isn[`INSTRUCTION_OP]) +`R2: + case(isn[`INSTRUCTION_S2]) + `TLB: IsTLB = TRUE; + default: IsTLB = FALSE; + endcase +default: IsTLB = FALSE; +endcase +endfunction + +// Indicate if the ALU instruction is valid immediately (single cycle operation) +function IsSingleCycle; +input [47:0] isn; +IsSingleCycle = !(IsMul(isn)|IsDivmod(isn)|IsTLB(isn)|IsShift48(isn)); +endfunction + + +generate begin : gDecocderInst +for (g = 0; g < QENTRIES; g = g + 1) begin +`ifdef SUPPORT_SMT +decoder8 iq0(.num({iqentry_tgt[g][8:7],iqentry_tgt[g][5:0]}), .out(iq_out[g])); +`else +decoder7 iq0(.num({iqentry_tgt[g][7],iqentry_tgt[g][5:0]}), .out(iq_out[g])); +`endif +end +end +endgenerate + +initial begin: Init + // + // + // set up panic messages + message[ `PANIC_NONE ] = "NONE "; + message[ `PANIC_FETCHBUFBEQ ] = "FETCHBUFBEQ "; + message[ `PANIC_INVALIDISLOT ] = "INVALIDISLOT "; + message[ `PANIC_IDENTICALDRAMS ] = "IDENTICALDRAMS "; + message[ `PANIC_OVERRUN ] = "OVERRUN "; + message[ `PANIC_HALTINSTRUCTION ] = "HALTINSTRUCTION "; + message[ `PANIC_INVALIDMEMOP ] = "INVALIDMEMOP "; + message[ `PANIC_INVALIDFBSTATE ] = "INVALIDFBSTATE "; + message[ `PANIC_INVALIDIQSTATE ] = "INVALIDIQSTATE "; + message[ `PANIC_BRANCHBACK ] = "BRANCHBACK "; + message[ `PANIC_MEMORYRACE ] = "MEMORYRACE "; + message[ `PANIC_ALU0ONLY ] = "ALU0 Only "; + + for (n = 0; n < 64; n = n + 1) + codebuf[n] <= 48'h0; + +end + +// --------------------------------------------------------------------------- +// FETCH +// --------------------------------------------------------------------------- +// +assign fetchbuf0_mem = IsMem(fetchbuf0_instr);// & IsLoad(fetchbuf0_instr); +assign fetchbuf0_rfw = IsRFW(fetchbuf0_instr,vqe0,vl,fetchbuf0_thrd); +`ifdef SUPPORT_PREDICATION +assign fetchbuf0_prfw = IsCmp(fetchbuf0_instr); +`endif + +generate begin: gFetchbufDec +if (`WAYS > 1) begin +assign fetchbuf1_mem = IsMem(fetchbuf1_instr);// & IsLoad(fetchbuf1_instr); +assign fetchbuf1_rfw = IsRFW(fetchbuf1_instr,vqe1,vl,fetchbuf1_thrd); +`ifdef SUPPORT_PREDICATION +assign fetchbuf1_prfw = IsCmp(fetchbuf1_instr); +`endif +end +if (`WAYS > 2) begin +assign fetchbuf2_mem = IsMem(fetchbuf2_instr);// & IsLoad(fetchbuf2_instr); +assign fetchbuf2_rfw = IsRFW(fetchbuf2_instr,vqe2,vl,fetchbuf2_thrd); +`ifdef SUPPORT_PREDICATION +assign fetchbuf2_prfw = IsCmp(fetchbuf2_instr); +`endif +end +end +endgenerate + +generate begin : gFetchbufInst +if (`WAYS > 2) begin : gb1 +FT64_fetchbuf_x3 #(AMSB,RSTPC) ufb1 +( + .rst(rst), + .clk4x(clk4x), + .clk(clk), + .fcu_clk(fcu_clk), + .cs_i(vadr[31:16]==16'hFFFF), + .cyc_i(cyc), + .stb_i(stb_o), + .ack_o(dc_ack), + .we_i(we), + .adr_i(vadr[15:0]), + .dat_i(dat_o[47:0]), + .cmpgrp(cr0[10:8]), + .freezePC(freezePC), + .regLR(regLR), + .thread_en(thread_en), + .insn0(insn0), + .insn1(insn1), + .insn1(insn2), + .phit(phit), + .threadx(threadx), + .branchmiss(branchmiss), + .misspc(misspc), + .branchmiss_thrd(branchmiss_thrd), + .predict_takenA(predict_takenA), + .predict_takenB(predict_takenB), + .predict_takenC(predict_takenC), + .predict_takenD(predict_takenD), + .predict_takenE(predict_takenE), + .predict_takenF(predict_takenF), + .predict_taken0(predict_taken0), + .predict_taken1(predict_taken1), + .predict_taken2(predict_taken2), + .queued1(queued1), + .queued2(queued2), + .queued2(queued3), + .queuedNop(queuedNop), + .pc0(pc0a), + .pc1(pc1a), + .fetchbuf(fetchbuf), + .fetchbufA_v(fetchbufA_v), + .fetchbufB_v(fetchbufB_v), + .fetchbufC_v(fetchbufC_v), + .fetchbufD_v(fetchbufD_v), + .fetchbufD_v(fetchbufE_v), + .fetchbufD_v(fetchbufF_v), + .fetchbufA_pc(fetchbufA_pc), + .fetchbufB_pc(fetchbufB_pc), + .fetchbufC_pc(fetchbufC_pc), + .fetchbufD_pc(fetchbufD_pc), + .fetchbufD_pc(fetchbufE_pc), + .fetchbufD_pc(fetchbufF_pc), + .fetchbufA_instr(fetchbufA_instr), + .fetchbufB_instr(fetchbufB_instr), + .fetchbufC_instr(fetchbufC_instr), + .fetchbufD_instr(fetchbufD_instr), + .fetchbufE_instr(fetchbufE_instr), + .fetchbufF_instr(fetchbufF_instr), + .fetchbuf0_instr(fetchbuf0_instr), + .fetchbuf1_instr(fetchbuf1_instr), + .fetchbuf0_thrd(fetchbuf0_thrd), + .fetchbuf1_thrd(fetchbuf1_thrd), + .fetchbuf2_thrd(fetchbuf2_thrd), + .fetchbuf0_pc(fetchbuf0_pc), + .fetchbuf1_pc(fetchbuf1_pc), + .fetchbuf2_pc(fetchbuf2_pc), + .fetchbuf0_v(fetchbuf0_v), + .fetchbuf1_v(fetchbuf1_v), + .fetchbuf2_v(fetchbuf2_v), + .fetchbuf0_insln(fetchbuf0_insln), + .fetchbuf1_insln(fetchbuf1_insln), + .fetchbuf2_insln(fetchbuf2_insln), + .codebuf0(codebuf[insn0[21:16]]), + .codebuf1(codebuf[insn1[21:16]]), + .codebuf2(codebuf[insn2[21:16]]), + .btgtA(btgtA), + .btgtB(btgtB), + .btgtC(btgtC), + .btgtD(btgtD), + .btgtE(btgtE), + .btgtF(btgtF), + .nop_fetchbuf(nop_fetchbuf), + .take_branch0(take_branch0), + .take_branch1(take_branch1), + .take_branch2(take_branch2), + .stompedRets(stompedOnRets), + .pred_on(pred_on), + .panic(fb_panic) +); +end +else if (`WAYS > 1) begin : gb1 +FT64_fetchbuf #(AMSB,RSTPC) ufb1 +( + .rst(rst), + .clk4x(clk4x), + .clk(clk), + .fcu_clk(fcu_clk), + .cs_i(vadr[31:16]==16'hFFFF), + .cyc_i(cyc), + .stb_i(stb_o), + .ack_o(dc_ack), + .we_i(we), + .adr_i(vadr[15:0]), + .dat_i(dat_o[47:0]), + .cmpgrp(cr0[10:8]), + .freezePC(freezePC), + .regLR(regLR), + .thread_en(thread_en), + .insn0(insn0), + .insn1(insn1), + .phit(phit), + .threadx(threadx), + .branchmiss(branchmiss), + .misspc(misspc), + .branchmiss_thrd(branchmiss_thrd), + .predict_takenA(predict_takenA), + .predict_takenB(predict_takenB), + .predict_takenC(predict_takenC), + .predict_takenD(predict_takenD), + .predict_taken0(predict_taken0), + .predict_taken1(predict_taken1), + .queued1(queued1), + .queued2(queued2), + .queuedNop(queuedNop), + .pc0(pc0a), + .pc1(pc1a), + .fetchbuf(fetchbuf), + .fetchbufA_v(fetchbufA_v), + .fetchbufB_v(fetchbufB_v), + .fetchbufC_v(fetchbufC_v), + .fetchbufD_v(fetchbufD_v), + .fetchbufA_pc(fetchbufA_pc), + .fetchbufB_pc(fetchbufB_pc), + .fetchbufC_pc(fetchbufC_pc), + .fetchbufD_pc(fetchbufD_pc), + .fetchbufA_instr(fetchbufA_instr), + .fetchbufB_instr(fetchbufB_instr), + .fetchbufC_instr(fetchbufC_instr), + .fetchbufD_instr(fetchbufD_instr), + .fetchbuf0_instr(fetchbuf0_instr), + .fetchbuf1_instr(fetchbuf1_instr), + .fetchbuf0_thrd(fetchbuf0_thrd), + .fetchbuf1_thrd(fetchbuf1_thrd), + .fetchbuf0_pc(fetchbuf0_pc), + .fetchbuf1_pc(fetchbuf1_pc), + .fetchbuf0_v(fetchbuf0_v), + .fetchbuf1_v(fetchbuf1_v), + .fetchbuf0_insln(fetchbuf0_insln), + .fetchbuf1_insln(fetchbuf1_insln), + .codebuf0(codebuf[insn0[21:16]]), + .codebuf1(codebuf[insn1[21:16]]), + .btgtA(btgtA), + .btgtB(btgtB), + .btgtC(btgtC), + .btgtD(btgtD), + .nop_fetchbuf(nop_fetchbuf), + .take_branch0(take_branch0), + .take_branch1(take_branch1), + .stompedRets(stompedOnRets), + .pred_on(pred_on), + .panic(fb_panic) +); +end +else begin : gb1 +FT64_fetchbuf_x1 #(AMSB,RSTPC) ufb1 +( + .rst(rst), + .clk4x(clk4x), + .clk(clk), + .fcu_clk(fcu_clk), + .cs_i(vadr[31:16]==16'hFFFF), + .cyc_i(cyc), + .stb_i(stb_o), + .ack_o(dc_ack), + .we_i(we), + .adr_i(vadr[15:0]), + .dat_i(dat_o[47:0]), + .cmpgrp(cr0[10:8]), + .freezePC(freezePC), + .regLR(regLR), + .thread_en(thread_en), + .insn0(insn0), + .phit(phit), + .threadx(threadx), + .branchmiss(branchmiss), + .misspc(misspc), + .branchmiss_thrd(branchmiss_thrd), + .predict_takenA(predict_takenA), + .predict_takenB(predict_takenB), + .predict_taken0(predict_taken0), + .queued1(queued1), + .queuedNop(queuedNop), + .pc0(pc0a), + .fetchbuf(fetchbuf), + .fetchbufA_v(fetchbufA_v), + .fetchbufB_v(fetchbufB_v), + .fetchbufA_pc(fetchbufA_pc), + .fetchbufB_pc(fetchbufB_pc), + .fetchbufA_instr(fetchbufA_instr), + .fetchbufB_instr(fetchbufB_instr), + .fetchbuf0_instr(fetchbuf0_instr), + .fetchbuf0_thrd(fetchbuf0_thrd), + .fetchbuf0_pc(fetchbuf0_pc), + .fetchbuf0_v(fetchbuf0_v), + .fetchbuf0_insln(fetchbuf0_insln), + .fetchbuf0_pbyte(fetchbuf0_pbyte), + .codebuf0(codebuf[insn0[21:16]]), + .btgtA(btgtA), + .btgtB(btgtB), + .nop_fetchbuf(nop_fetchbuf), + .take_branch0(take_branch0), + .stompedRets(stompedOnRets), + .pred_on(pred_on), + .panic(fb_panic) +); +assign fetchbuf1_v = `INV; +end +end +endgenerate + +wire cmt_head1 = (!iqentry_rfw[heads[1]] && !iqentry_oddball[heads[1]] && ~|iqentry_exc[heads[1]]); +wire cmt_head2 = (!iqentry_rfw[heads[2]] && !iqentry_oddball[heads[2]] && ~|iqentry_exc[heads[2]]); + +// Determine the head increment amount, this must match code later on. +reg [2:0] hi_amt; +always @* +begin + hi_amt <= 4'd0; + casez ({ iqentry_v[heads[0]], + iqentry_state[heads[0]]==IQS_CMT, + iqentry_v[heads[1]], + iqentry_state[heads[1]]==IQS_CMT, + iqentry_v[heads[2]], + iqentry_state[heads[2]]==IQS_CMT}) + + // retire 3 + 6'b0?_0?_0?: + if (heads[0] != tail0 && heads[1] != tail0 && heads[2] != tail0) + hi_amt <= 3'd3; + else if (heads[0] != tail0 && heads[1] != tail0) + hi_amt <= 3'd2; + else if (heads[0] != tail0) + hi_amt <= 3'd1; + 6'b0?_0?_10: + if (heads[0] != tail0 && heads[1] != tail0) + hi_amt <= 3'd2; + else if (heads[0] != tail0) + hi_amt <= 3'd1; + else + hi_amt <= 3'd0; + 6'b0?_0?_11: + if (`NUM_CMT > 2 || cmt_head2) + hi_amt <= 3'd3; + else + hi_amt <= 3'd2; + + // retire 1 (wait for regfile for heads[1]) + 6'b0?_10_??: + hi_amt <= 3'd1; + + // retire 2 + 6'b0?_11_0?, + 6'b0?_11_10: + if (`NUM_CMT > 1 || cmt_head1) + hi_amt <= 3'd2; + else + hi_amt <= 3'd1; + 6'b0?_11_11: + if (`NUM_CMT > 2 || (`NUM_CMT > 1 && cmt_head2)) + hi_amt <= 3'd3; + else if (`NUM_CMT > 1 || cmt_head1) + hi_amt <= 3'd2; + else + hi_amt <= 3'd1; + 6'b10_??_??: ; + 6'b11_0?_0?: + if (heads[1] != tail0 && heads[2] != tail0) + hi_amt <= 3'd3; + else if (heads[1] != tail0) + hi_amt <= 3'd2; + else + hi_amt <= 3'd1; + 6'b11_0?_10: + if (heads[1] != tail0) + hi_amt <= 3'd2; + else + hi_amt <= 3'd1; + 6'b11_0?_11: + if (heads[1] != tail0) begin + if (`NUM_CMT > 2 || cmt_head2) + hi_amt <= 3'd3; + else + hi_amt <= 3'd2; + end + else + hi_amt <= 3'd1; + 6'b11_10_??: + hi_amt <= 3'd1; + 6'b11_11_0?: + if (`NUM_CMT > 1 && heads[2] != tail0) + hi_amt <= 3'd3; + else if (cmt_head1 && heads[2] != tail0) + hi_amt <= 3'd3; + else if (`NUM_CMT > 1 || cmt_head1) + hi_amt <= 3'd2; + else + hi_amt <= 3'd1; + 6'b11_11_10: + if (`NUM_CMT > 1 || cmt_head1) + hi_amt <= 3'd2; + else + hi_amt <= 3'd1; + 6'b11_11_11: + if (`NUM_CMT > 2 || (`NUM_CMT > 1 && cmt_head2)) + hi_amt <= 3'd3; + else if (`NUM_CMT > 1 || cmt_head1) + hi_amt <= 3'd2; + else + hi_amt <= 3'd1; + default: + begin + hi_amt <= 3'd0; + $display("hi_amt: Uncoded case %h",{ iqentry_v[heads[0]], + iqentry_state[heads[0]], + iqentry_v[heads[1]], + iqentry_state[heads[1]], + iqentry_v[heads[2]], + iqentry_state[heads[2]]}); + end + endcase +end + +// Amount subtracted from sequence numbers +reg [`SNBITS] tosub; +always @* +case(hi_amt) +3'd3: tosub <= (iqentry_v[heads[2]] ? iqentry_sn[heads[2]] + : iqentry_v[heads[1]] ? iqentry_sn[heads[1]] + : iqentry_v[heads[0]] ? iqentry_sn[heads[0]] + : 4'b0); +3'd2: tosub <= (iqentry_v[heads[1]] ? iqentry_sn[heads[1]] + : iqentry_v[heads[0]] ? iqentry_sn[heads[0]] + : 4'b0); +3'd1: tosub <= (iqentry_v[heads[0]] ? iqentry_sn[heads[0]] + : 4'b0); +default: tosub <= 4'd0; +endcase + +//initial begin: stop_at +//#1000000; panic <= `PANIC_OVERRUN; +//end + +// +// BRANCH-MISS LOGIC: livetarget +// +// livetarget implies that there is a not-to-be-stomped instruction that targets the register in question +// therefore, if it is zero it implies the rf_v value should become VALID on a branchmiss +// + +always @* +for (j = 1; j < PREGS; j = j + 1) begin + livetarget[j] = 1'b0; + for (n = 0; n < QENTRIES; n = n + 1) + livetarget[j] = livetarget[j] | iqentry_livetarget[n][j]; +end + +always @* + for (n = 0; n < QENTRIES; n = n + 1) +`ifdef SUPPORT_PREDICATION + iqentry_livetarget[n] = {PREGS {iqentry_v[n]}} & {PREGS {~iqentry_stomp[n] && iqentry_thrd[n]==branchmiss_thrd}} & iq_out[n] & ~{PREGS{iqentry_cmp[n]}}; +`else + iqentry_livetarget[n] = {PREGS {iqentry_v[n]}} & {PREGS {~iqentry_stomp[n] && iqentry_thrd[n]==branchmiss_thrd}} & iq_out[n]; +`endif + +`ifdef SUPPORT_PREDICATION +always @* +for (j = 1; j < 16; j = j + 1) begin + plivetarget[j] = 1'b0; + for (n = 0; n < QENTRIES; n = n + 1) + plivetarget[j] = plivetarget[j] | iqentry_plivetarget[n][j]; +end + +always @* + for (n = 0; n < QENTRIES; n = n + 1) + iqentry_plivetarget[n] = {16 {iqentry_v[n]}} & {16 {~iqentry_stomp[n] && iqentry_thrd[n]==branchmiss_thrd}} & iq_out[n] & {16{iqentry_cmp[n]}}; +`endif + +// +// BRANCH-MISS LOGIC: latestID +// +// latestID is the instruction queue ID of the newest instruction (latest) that targets +// a particular register. looks a lot like scheduling logic, but in reverse. +// +always @* + for (n = 0; n < QENTRIES; n = n + 1) begin + iqentry_cumulative[n] = 1'b0; + for (j = n; j < n + QENTRIES; j = j + 1) begin + if (missid==(j % QENTRIES)) + for (k = n; k <= j; k = k + 1) + iqentry_cumulative[n] = iqentry_cumulative[n] | iqentry_livetarget[k % QENTRIES]; + end + end + +always @* + for (n = 0; n < QENTRIES; n = n + 1) + iqentry_latestID[n] = (missid == n || ((iqentry_livetarget[n] & iqentry_cumulative[(n+1)%QENTRIES]) == {PREGS{1'b0}})) + ? iqentry_livetarget[n] + : {PREGS{1'b0}}; + +always @* + for (n = 0; n < QENTRIES; n = n + 1) + iqentry_source[n] = | iqentry_latestID[n]; + +`ifdef SUPPORT_PREDICATION +always @* + for (n = 0; n < QENTRIES; n = n + 1) begin + iqentry_pcumulative[n] = 1'b0; + for (j = n; j < n + QENTRIES; j = j + 1) begin + if (missid==(j % QENTRIES)) + for (k = n; k <= j; k = k + 1) + iqentry_pcumulative[n] = iqentry_pcumulative[n] | iqentry_plivetarget[k % QENTRIES]; + end + end + +always @* + for (n = 0; n < QENTRIES; n = n + 1) + iqentry_platestID[n] = (missid == n || ((iqentry_plivetarget[n] & iqentry_pcumulative[(n+1)%QENTRIES]) == {16{1'b0}})) + ? iqentry_plivetarget[n] + : {16{1'b0}}; + +always @* + for (n = 0; n < QENTRIES; n = n + 1) + iqentry_psource[n] = | iqentry_platestID[n]; + +`endif + +reg vqueued2; +assign Ra0 = fnRa(fetchbuf0_instr,vqe0,vl,fetchbuf0_thrd) | {fetchbuf0_thrd,7'b0}; +assign Rb0 = fnRb(fetchbuf0_instr,1'b0,vqe0,rfoa0[5:0],rfoa1[5:0],fetchbuf0_thrd) | {fetchbuf0_thrd,7'b0}; +assign Rc0 = fnRc(fetchbuf0_instr,vqe0,fetchbuf0_thrd) | {fetchbuf0_thrd,7'b0}; +assign Rt0 = fnRt(fetchbuf0_instr,vqet0,vl,fetchbuf0_thrd) | {fetchbuf0_thrd,7'b0}; +assign Ra1 = fnRa(fetchbuf1_instr,vqueued2 ? vqe0 + 1 : vqe1,vl,fetchbuf1_thrd) | {fetchbuf1_thrd,7'b0}; +assign Rb1 = fnRb(fetchbuf1_instr,1'b1,vqueued2 ? vqe0 + 1 : vqe1,rfoa0[5:0],rfoa1[5:0],fetchbuf1_thrd) | {fetchbuf1_thrd,7'b0}; +assign Rc1 = fnRc(fetchbuf1_instr,vqueued2 ? vqe0 + 1 : vqe1,fetchbuf1_thrd) | {fetchbuf1_thrd,7'b0}; +assign Rt1 = fnRt(fetchbuf1_instr,vqueued2 ? vqet0 + 1 : vqet1,vl,fetchbuf1_thrd) | {fetchbuf1_thrd,7'b0}; + +// +// additional logic for ISSUE +// +// for the moment, we look at ALU-input buffers to allow back-to-back issue of +// dependent instructions ... we do not, however, look ahead for DRAM requests +// that will become valid in the next cycle. instead, these have to propagate +// their results into the IQ entry directly, at which point it becomes issue-able +// + +// note that, for all intents & purposes, iqentry_done == iqentry_agen ... no need to duplicate + +wire [QENTRIES-1:0] args_valid; +wire [QENTRIES-1:0] could_issue; +wire [QENTRIES-1:0] could_issueid; + +// Note that bypassing is provided only from the first fpu. +generate begin : issue_logic +for (g = 0; g < QENTRIES; g = g + 1) +begin +assign args_valid[g] = + (iqentry_a1_v[g] +`ifdef FU_BYPASS + || (iqentry_a1_s[g] == alu0_sourceid && alu0_dataready && (~alu0_mem | alu0_push)) + || ((iqentry_a1_s[g] == alu1_sourceid && alu1_dataready && (~alu1_mem | alu1_push)) && (`NUM_ALU > 1)) + || ((iqentry_a1_s[g] == fpu1_sourceid && fpu1_dataready) && (`NUM_FPU > 0)) +`endif + ) + && (iqentry_a2_v[g] || iqentry_mem[g] // a2 does not need to be valid immediately for a mem op (agen), it is checked by iqentry_memready logic +`ifdef FU_BYPASS + || (iqentry_a2_s[g] == alu0_sourceid && alu0_dataready && (~alu0_mem | alu0_push)) + || ((iqentry_a2_s[g] == alu1_sourceid && alu1_dataready && (~alu1_mem | alu1_push)) && (`NUM_ALU > 1)) + || ((iqentry_a2_s[g] == fpu1_sourceid && fpu1_dataready) && (`NUM_FPU > 0)) +`endif + ) + && (iqentry_a3_v[g] + || (iqentry_mem[g] & ~iqentry_agen[g] & ~iqentry_memndx[g]) // a3 needs to be valid for indexed instruction +// || (iqentry_mem[g] & ~iqentry_agen[g]) +`ifdef FU_BYPASS + || (iqentry_a3_s[g] == alu0_sourceid && alu0_dataready && (~alu0_mem | alu0_push)) + || ((iqentry_a3_s[g] == alu1_sourceid && alu1_dataready && (~alu1_mem | alu1_push)) && (`NUM_ALU > 1)) +`endif + ) + ; + +assign could_issue[g] = iqentry_v[g] && iqentry_state[g]==IQS_QUEUED + && args_valid[g] + && iqentry_iv[g]; + //&& (iqentry_mem[g] ? !iqentry_agen[g] : 1'b1); + +assign could_issueid[g] = (iqentry_v[g])// || (g==tail0 && canq1))// || (g==tail1 && canq2)) + && !iqentry_iv[g]; +// && (iqentry_a1_v[g] +// || (iqentry_a1_s[g] == alu0_sourceid && alu0_dataready) +// || (iqentry_a1_s[g] == alu1_sourceid && alu1_dataready)); + +end +end +endgenerate + +// The (old) simulator didn't handle the asynchronous race loop properly in the +// original code. It would issue two instructions to the same islot. So the +// issue logic has been re-written to eliminate the asynchronous loop. +// Can't issue to the ALU if it's busy doing a long running operation like a +// divide. +// ToDo: fix the memory synchronization, see fp_issue below +`ifndef INLINE_DECODE +always @* +begin + iqentry_id1issue = {QENTRIES{1'b0}}; + if (id1_available) begin + for (n = 0; n < QENTRIES; n = n + 1) + if (could_issueid[heads[n]] && iqentry_id1issue=={QENTRIES{1'b0}}) + iqentry_id1issue[heads[n]] = `TRUE; + end +end +generate begin : gIDUIssue + if (`NUM_IDU > 1) begin + always @* + begin + iqentry_id2issue = {QENTRIES{1'b0}}; + if (id2_available) begin + for (n = 0; n < QENTRIES; n = n + 1) + if (could_issueid[heads[n]] && !iqentry_id1issue[heads[n]] && iqentry_id2issue=={QENTRIES{1'b0}}) + iqentry_id2issue[heads[n]] = `TRUE; + end + end + end + if (`NUM_IDU > 2) begin + always @* + begin + iqentry_id3issue = {QENTRIES{1'b0}}; + if (id3_available) begin + for (n = 0; n < QENTRIES; n = n + 1) + if (could_issueid[heads[n]] + && !iqentry_id1issue[heads[n]] + && !iqentry_id2issue[heads[n]] + && iqentry_id3issue=={QENTRIES{1'b0}}) + iqentry_id3issue[heads[n]] = `TRUE; + end + end + end +end +endgenerate +`endif // not INLINE_DECODE + +// Detect if there are any valid queue entries prior to the given queue entry. +reg [QENTRIES-1:0] prior_valid; +//generate begin : gPriorValid +always @* +for (j = 0; j < QENTRIES; j = j + 1) +begin + prior_valid[heads[j]] = 1'b0; + if (j > 0) + for (n = j-1; n >= 0; n = n - 1) + prior_valid[heads[j]] = prior_valid[heads[j]]|iqentry_v[heads[n]]; +end +//end +//endgenerate + +// Detect if there are any valid sync instructions prior to the given queue +// entry. +reg [QENTRIES-1:0] prior_sync; +//generate begin : gPriorSync +always @* +for (j = 0; j < QENTRIES; j = j + 1) +begin + prior_sync[heads[j]] = 1'b0; + if (j > 0) + for (n = j-1; n >= 0; n = n - 1) + prior_sync[heads[j]] = prior_sync[heads[j]]|(iqentry_v[heads[n]] & iqentry_sync[heads[n]]); +end +//end +//endgenerate + +// Detect if there are any valid fsync instructions prior to the given queue +// entry. +reg [QENTRIES-1:0] prior_fsync; +//generate begin : gPriorFsync +always @* +for (j = 0; j < QENTRIES; j = j + 1) +begin + prior_fsync[heads[j]] = 1'b0; + if (j > 0) + for (n = j-1; n >= 0; n = n - 1) + prior_fsync[heads[j]] = prior_fsync[heads[j]]|(iqentry_v[heads[n]] & iqentry_fsync[heads[n]]); +end +//end +//endgenerate + +// Start search for instructions to process at head of queue (oldest instruction). +always @* +begin + iqentry_alu0_issue = {QENTRIES{1'b0}}; + iqentry_alu1_issue = {QENTRIES{1'b0}}; + + if (alu0_available & alu0_idle) begin + for (n = 0; n < QENTRIES; n = n + 1) begin + if (could_issue[heads[n]] && iqentry_alu[heads[n]] + && iqentry_alu0_issue == {QENTRIES{1'b0}} + // If there are no valid queue entries prior it doesn't matter if there is + // a sync. + && (!prior_sync[heads[n]] || !prior_valid[heads[n]]) + ) + iqentry_alu0_issue[heads[n]] = `TRUE; + end + end + + if (alu1_available && alu1_idle && `NUM_ALU > 1) begin +// if ((could_issue & ~iqentry_alu0_issue & ~iqentry_alu0) != {QENTRIES{1'b0}}) begin + for (n = 0; n < QENTRIES; n = n + 1) begin + if (could_issue[heads[n]] && iqentry_alu[heads[n]] + && !iqentry_alu0[heads[n]] // alu0 only + && !iqentry_alu0_issue[heads[n]] + && iqentry_alu1_issue == {QENTRIES{1'b0}} + && (!prior_sync[heads[n]] || !prior_valid[heads[n]]) + ) + iqentry_alu1_issue[heads[n]] = `TRUE; + end +// end + end +end + + +// Start search for instructions to process at head of queue (oldest instruction). +always @* +begin + iqentry_fpu1_issue = {QENTRIES{1'b0}}; + iqentry_fpu2_issue = {QENTRIES{1'b0}}; + + if (fpu1_available && fpu1_idle && `NUM_FPU > 0) begin + for (n = 0; n < QENTRIES; n = n + 1) begin + if (could_issue[heads[n]] && iqentry_fpu[heads[n]] + && iqentry_fpu1_issue == {QENTRIES{1'b0}} + // If there are no valid queue entries prior it doesn't matter if there is + // a sync. + && (!(prior_sync[heads[n]]|prior_fsync[heads[n]]) || !prior_valid[heads[n]]) + ) + iqentry_fpu1_issue[heads[n]] = `TRUE; + end + end + + if (fpu2_available && fpu2_idle && `NUM_FPU > 1) begin + for (n = 0; n < QENTRIES; n = n + 1) begin + if (could_issue[heads[n]] && iqentry_fpu[heads[n]] + && !iqentry_fpu1_issue[heads[n]] + && iqentry_fpu2_issue == {QENTRIES{1'b0}} + && (!(prior_sync[heads[n]]|prior_fsync[heads[n]]) || !prior_valid[heads[n]]) + ) + iqentry_fpu2_issue[heads[n]] = `TRUE; + end + end +end + +reg [QENTRIES-1:0] nextqd; +// Next queue id + +/* +reg [`QBITS] nids [0:QENTRIES-1]; +always @* +for (n = 0; n < QENTRIES; n = n + 1) +begin + nids[n] = n[`QBITS]; + for (j = n; j != (n+1) % QENTRIES; j = (j - 1) % QENTRIES) + if (iqentry_thrd[(j+1)%QENTRIES]==iqentry_thrd[n]) + nids[n] = (j + 1) % QENTRIES; + // Add one more compare and set +end +*/ + +reg [`QBITS] nids [0:QENTRIES-1]; +always @* +for (j = 0; j < QENTRIES; j = j + 1) begin + // We can't both start and stop at j + for (n = j; n != (j+1)%QENTRIES; n = (n + (QENTRIES-1)) % QENTRIES) + if (iqentry_thrd[n]==iqentry_thrd[j]) + nids[j] = n; + // Do the last one + if (iqentry_thrd[(j+1)%QENTRIES]==iqentry_thrd[j]) + nids[j] = (j+1)%QENTRIES; +end +/* +assign nids[0] = nid0; +assign nids[1] = nid1; +assign nids[2] = nid2; +assign nids[3] = nid3; +assign nids[4] = nid4; +assign nids[5] = nid5; +assign nids[6] = nid6; +assign nids[7] = nid7; +assign nids[8] = nid8; +assign nids[9] = nid9; +*/ +// Search the queue for the next entry on the same thread. +reg [`QBITS] nid; +always @* +begin + nid = fcu_id; + for (n = QENTRIES-1; n > 0; n = n - 1) + if (iqentry_thrd[(fcu_id + n) % QENTRIES]==fcu_thrd) + nid = (fcu_id + n) % QENTRIES; +end +/* +always @* +if (iqentry_thrd[idp1(fcu_id)]==iqentry_thrd[fcu_id[`QBITS]]) + nid = idp1(fcu_id); +else if (iqentry_thrd[idp2(fcu_id)]==iqentry_thrd[fcu_id[`QBITS]]) + nid = idp2(fcu_id); +else if (iqentry_thrd[idp3(fcu_id)]==iqentry_thrd[fcu_id[`QBITS]]) + nid = idp3(fcu_id); +else if (iqentry_thrd[idp4(fcu_id)]==iqentry_thrd[fcu_id[`QBITS]]) + nid = idp4(fcu_id); +else if (iqentry_thrd[idp5(fcu_id)]==iqentry_thrd[fcu_id[`QBITS]]) + nid = idp5(fcu_id); +else if (iqentry_thrd[idp6(fcu_id)]==iqentry_thrd[fcu_id[`QBITS]]) + nid = idp6(fcu_id); +else if (iqentry_thrd[idp7(fcu_id)]==iqentry_thrd[fcu_id[`QBITS]]) + nid = idp7(fcu_id); +else if (iqentry_thrd[idp8(fcu_id)]==iqentry_thrd[fcu_id[`QBITS]]) + nid = idp8(fcu_id); +else if (iqentry_thrd[idp9(fcu_id)]==iqentry_thrd[fcu_id[`QBITS]]) + nid = idp9(fcu_id); +else + nid = fcu_id; +*/ +always @* +for (n = 0; n < QENTRIES; n = n + 1) + nextqd[n] <= iqentry_sn[nids[n]] > iqentry_sn[n] || iqentry_v[n]; + +//assign nextqd = 8'hFF; + +// Don't issue to the fcu until the following instruction is enqueued. +// However, if the queue is full then issue anyway. A branch miss will likely occur. +// Start search for instructions at head of queue (oldest instruction). +always @* +begin + iqentry_fcu_issue = {QENTRIES{1'b0}}; + + if (fcu_done & ~branchmiss) begin + for (n = 0; n < QENTRIES; n = n + 1) begin + if (could_issue[heads[n]] && iqentry_fc[heads[n]] && (nextqd[heads[n]] || iqentry_br[heads[n]]) + && iqentry_fcu_issue == {QENTRIES{1'b0}} + && (!prior_sync[heads[n]] || !prior_valid[heads[n]]) + ) + iqentry_fcu_issue[heads[n]] = `TRUE; + end + end +end + + +// Test if a given address is in the write buffer. This is done only for the +// first two queue slots to save logic on comparators. +reg inwb0; +always @* +begin + inwb0 = FALSE; +`ifdef HAS_WB + for (n = 0; n < `WB_DEPTH; n = n + 1) + if (iqentry_ma[heads[0]][AMSB:3]==wb_addr[n][AMSB:3] && wb_v[n]) + inwb0 = TRUE; +`endif +end + +reg inwb1; +always @* +begin + inwb1 = FALSE; +`ifdef HAS_WB + for (n = 0; n < `WB_DEPTH; n = n + 1) + if (iqentry_ma[heads[1]][AMSB:3]==wb_addr[n][AMSB:3] && wb_v[n]) + inwb1 = TRUE; +`endif +end + +always @* +begin + for (n = 0; n < QENTRIES; n = n + 1) begin + iqentry_v[n] <= iqentry_state[n] != IQS_INVALID; + iqentry_done[n] <= iqentry_state[n]==IQS_DONE || iqentry_state[n]==IQS_CMT; + iqentry_out[n] <= iqentry_state[n]==IQS_OUT; + iqentry_agen[n] <= iqentry_state[n]==IQS_AGEN; + end +end + +// +// determine if the instructions ready to issue can, in fact, issue. +// "ready" means that the instruction has valid operands but has not gone yet +reg [1:0] issue_count, missue_count; +generate begin : gMemIssue +always @* +begin + issue_count = 0; + memissue[ heads[0] ] = iqentry_memready[ heads[0] ] && !(iqentry_load[heads[0]] && inwb0); // first in line ... go as soon as ready + if (memissue[heads[0]]) + issue_count = issue_count + 1; + + memissue[ heads[1] ] = ~iqentry_stomp[heads[1]] && iqentry_memready[ heads[1] ] // addr and data are valid + && issue_count < `NUM_MEM + // ... and no preceding instruction is ready to go + //&& ~iqentry_memready[heads[0]] + // ... and there is no address-overlap with any preceding instruction + && (!iqentry_mem[heads[0]] || (iqentry_agen[heads[0]] & iqentry_out[heads[0]]) || iqentry_done[heads[0]] + || ((iqentry_ma[heads[1]][AMSB:3] != iqentry_ma[heads[0]][AMSB:3] || iqentry_out[heads[0]] || iqentry_done[heads[0]]))) + // ... if a release, any prior memory ops must be done before this one + && (iqentry_rl[heads[1]] ? iqentry_done[heads[0]] || !iqentry_v[heads[0]] || !iqentry_mem[heads[0]] : 1'b1) + // ... if a preivous op has the aquire bit set + && !(iqentry_aq[heads[0]] && iqentry_v[heads[0]]) + // ... and there's nothing in the write buffer during a load + && !(iqentry_load[heads[1]] && (inwb1 || iqentry_store[heads[0]])) + // ... and, if it is a store, there is no chance of it being undone + && ((iqentry_load[heads[1]] && sple) || + !(iqentry_fc[heads[0]]||iqentry_canex[heads[0]])); + if (memissue[heads[1]]) + issue_count = issue_count + 1; + + memissue[ heads[2] ] = ~iqentry_stomp[heads[2]] && iqentry_memready[ heads[2] ] // addr and data are valid + // ... and no preceding instruction is ready to go + && issue_count < `NUM_MEM + //&& ~iqentry_memready[heads[0]] + //&& ~iqentry_memready[heads[1]] + // ... and there is no address-overlap with any preceding instruction + && (!iqentry_mem[heads[0]] || (iqentry_agen[heads[0]] & iqentry_out[heads[0]]) || iqentry_done[heads[0]] + || ((iqentry_ma[heads[2]][AMSB:3] != iqentry_ma[heads[0]][AMSB:3] || iqentry_out[heads[0]] || iqentry_done[heads[0]]))) + && (!iqentry_mem[heads[1]] || (iqentry_agen[heads[1]] & iqentry_out[heads[1]]) || iqentry_done[heads[1]] + || ((iqentry_ma[heads[2]][AMSB:3] != iqentry_ma[heads[1]][AMSB:3] || iqentry_out[heads[1]] || iqentry_done[heads[1]]))) + // ... if a release, any prior memory ops must be done before this one + && (iqentry_rl[heads[2]] ? (iqentry_done[heads[0]] || !iqentry_v[heads[0]] || !iqentry_mem[heads[0]]) + && (iqentry_done[heads[1]] || !iqentry_v[heads[1]] || !iqentry_mem[heads[1]]) + : 1'b1) + // ... if a preivous op has the aquire bit set + && !(iqentry_aq[heads[0]] && iqentry_v[heads[0]]) + && !(iqentry_aq[heads[1]] && iqentry_v[heads[1]]) + // ... and there's nothing in the write buffer during a load + && !(iqentry_load[heads[2]] && (wb_v!=1'b0 + || iqentry_store[heads[0]] || iqentry_store[heads[1]])) + // ... and there isn't a barrier, or everything before the barrier is done or invalid + && (!(iqentry_iv[heads[1]] && iqentry_memsb[heads[1]]) || (iqentry_done[heads[0]] || !iqentry_v[heads[0]])) + && (!(iqentry_iv[heads[1]] && iqentry_memdb[heads[1]]) || (!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]])) + // ... and, if it is a SW, there is no chance of it being undone + && ((iqentry_load[heads[2]] && sple) || + !(iqentry_fc[heads[0]]||iqentry_canex[heads[0]]) + && !(iqentry_fc[heads[1]]||iqentry_canex[heads[1]])); + if (memissue[heads[2]]) + issue_count = issue_count + 1; + + memissue[ heads[3] ] = ~iqentry_stomp[heads[3]] && iqentry_memready[ heads[3] ] // addr and data are valid + // ... and no preceding instruction is ready to go + && issue_count < `NUM_MEM + //&& ~iqentry_memready[heads[0]] + //&& ~iqentry_memready[heads[1]] + //&& ~iqentry_memready[heads[2]] + // ... and there is no address-overlap with any preceding instruction + && (!iqentry_mem[heads[0]] || (iqentry_agen[heads[0]] & iqentry_out[heads[0]]) || iqentry_done[heads[0]] + || ((iqentry_ma[heads[3]][AMSB:3] != iqentry_ma[heads[0]][AMSB:3] || iqentry_out[heads[0]] || iqentry_done[heads[0]]))) + && (!iqentry_mem[heads[1]] || (iqentry_agen[heads[1]] & iqentry_out[heads[1]]) || iqentry_done[heads[1]] + || ((iqentry_ma[heads[3]][AMSB:3] != iqentry_ma[heads[1]][AMSB:3] || iqentry_out[heads[1]] || iqentry_done[heads[1]]))) + && (!iqentry_mem[heads[2]] || (iqentry_agen[heads[2]] & iqentry_out[heads[2]]) || iqentry_done[heads[2]] + || ((iqentry_ma[heads[3]][AMSB:3] != iqentry_ma[heads[2]][AMSB:3] || iqentry_out[heads[2]] || iqentry_done[heads[2]]))) + // ... if a release, any prior memory ops must be done before this one + && (iqentry_rl[heads[3]] ? (iqentry_done[heads[0]] || !iqentry_v[heads[0]] || !iqentry_mem[heads[0]]) + && (iqentry_done[heads[1]] || !iqentry_v[heads[1]] || !iqentry_mem[heads[1]]) + && (iqentry_done[heads[2]] || !iqentry_v[heads[2]] || !iqentry_mem[heads[2]]) + : 1'b1) + // ... if a preivous op has the aquire bit set + && !(iqentry_aq[heads[0]] && iqentry_v[heads[0]]) + && !(iqentry_aq[heads[1]] && iqentry_v[heads[1]]) + && !(iqentry_aq[heads[2]] && iqentry_v[heads[2]]) + // ... and there's nothing in the write buffer during a load + && !(iqentry_load[heads[3]] && (wb_v!=1'b0 + || iqentry_store[heads[0]] || iqentry_store[heads[1]] || iqentry_store[heads[2]])) + // ... and there isn't a barrier, or everything before the barrier is done or invalid + && (!(iqentry_iv[heads[1]] && iqentry_memsb[heads[1]]) || (iqentry_done[heads[0]] || !iqentry_v[heads[0]])) + && (!(iqentry_iv[heads[2]] && iqentry_memsb[heads[2]]) || + ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (iqentry_done[heads[1]] || !iqentry_v[heads[1]])) + ) + && (!(iqentry_iv[heads[1]] && iqentry_memdb[heads[1]]) || (!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]])) + && (!(iqentry_iv[heads[2]] && iqentry_memdb[heads[2]]) || + ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]])) + ) + // ... and, if it is a SW, there is no chance of it being undone + && ((iqentry_load[heads[3]] && sple) || + !(iqentry_fc[heads[0]]||iqentry_canex[heads[0]]) + && !(iqentry_fc[heads[1]]||iqentry_canex[heads[1]]) + && !(iqentry_fc[heads[2]]||iqentry_canex[heads[2]])); + if (memissue[heads[3]]) + issue_count = issue_count + 1; + + if (QENTRIES > 4) begin + memissue[ heads[4] ] = ~iqentry_stomp[heads[4]] && iqentry_memready[ heads[4] ] // addr and data are valid + // ... and no preceding instruction is ready to go + && issue_count < `NUM_MEM + //&& ~iqentry_memready[heads[0]] + //&& ~iqentry_memready[heads[1]] + //&& ~iqentry_memready[heads[2]] + //&& ~iqentry_memready[heads[3]] + // ... and there is no address-overlap with any preceding instruction + && (!iqentry_mem[heads[0]] || (iqentry_agen[heads[0]] & iqentry_out[heads[0]]) || iqentry_done[heads[0]] + || ((iqentry_ma[heads[4]][AMSB:3] != iqentry_ma[heads[0]][AMSB:3] || iqentry_out[heads[0]] || iqentry_done[heads[0]]))) + && (!iqentry_mem[heads[1]] || (iqentry_agen[heads[1]] & iqentry_out[heads[1]]) || iqentry_done[heads[1]] + || ((iqentry_ma[heads[4]][AMSB:3] != iqentry_ma[heads[1]][AMSB:3] || iqentry_out[heads[1]] || iqentry_done[heads[1]]))) + && (!iqentry_mem[heads[2]] || (iqentry_agen[heads[2]] & iqentry_out[heads[2]]) || iqentry_done[heads[2]] + || ((iqentry_ma[heads[4]][AMSB:3] != iqentry_ma[heads[2]][AMSB:3] || iqentry_out[heads[2]] || iqentry_done[heads[2]]))) + && (!iqentry_mem[heads[3]] || (iqentry_agen[heads[3]] & iqentry_out[heads[3]]) || iqentry_done[heads[3]] + || ((iqentry_ma[heads[4]][AMSB:3] != iqentry_ma[heads[3]][AMSB:3] || iqentry_out[heads[3]] || iqentry_done[heads[3]]))) + // ... if a release, any prior memory ops must be done before this one + && (iqentry_rl[heads[4]] ? (iqentry_done[heads[0]] || !iqentry_v[heads[0]] || !iqentry_mem[heads[0]]) + && (iqentry_done[heads[1]] || !iqentry_v[heads[1]] || !iqentry_mem[heads[1]]) + && (iqentry_done[heads[2]] || !iqentry_v[heads[2]] || !iqentry_mem[heads[2]]) + && (iqentry_done[heads[3]] || !iqentry_v[heads[3]] || !iqentry_mem[heads[3]]) + : 1'b1) + // ... if a preivous op has the aquire bit set + && !(iqentry_aq[heads[0]] && iqentry_v[heads[0]]) + && !(iqentry_aq[heads[1]] && iqentry_v[heads[1]]) + && !(iqentry_aq[heads[2]] && iqentry_v[heads[2]]) + && !(iqentry_aq[heads[3]] && iqentry_v[heads[3]]) + // ... and there's nothing in the write buffer during a load + && !(iqentry_load[heads[4]] && (wb_v!=1'b0 + || iqentry_store[heads[0]] || iqentry_store[heads[1]] || iqentry_store[heads[2]] || iqentry_store[heads[3]])) + // ... and there isn't a barrier, or everything before the barrier is done or invalid + && (!(iqentry_iv[heads[1]] && iqentry_memsb[heads[1]]) || (iqentry_done[heads[0]] || !iqentry_v[heads[0]])) + && (!(iqentry_iv[heads[2]] && iqentry_memsb[heads[2]]) || + ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (iqentry_done[heads[1]] || !iqentry_v[heads[1]])) + ) + && (!(iqentry_iv[heads[3]] && iqentry_memsb[heads[3]]) || + ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (iqentry_done[heads[1]] || !iqentry_v[heads[1]]) + && (iqentry_done[heads[2]] || !iqentry_v[heads[2]])) + ) + && (!(iqentry_v[heads[1]] && iqentry_memdb[heads[1]]) || (!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]])) + && (!(iqentry_iv[heads[2]] && iqentry_memdb[heads[2]]) || + ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]])) + ) + && (!(iqentry_iv[heads[3]] && iqentry_memdb[heads[3]]) || + ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]]) + && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]])) + ) + // ... and, if it is a SW, there is no chance of it being undone + && ((iqentry_load[heads[4]] && sple) || + !(iqentry_fc[heads[0]]||iqentry_canex[heads[0]]) + && !(iqentry_fc[heads[1]]||iqentry_canex[heads[1]]) + && !(iqentry_fc[heads[2]]||iqentry_canex[heads[2]]) + && !(iqentry_fc[heads[3]]||iqentry_canex[heads[3]])); + if (memissue[heads[4]]) + issue_count = issue_count + 1; + end + + if (QENTRIES > 5) begin + memissue[ heads[5] ] = ~iqentry_stomp[heads[5]] && iqentry_memready[ heads[5] ] // addr and data are valid + // ... and no preceding instruction is ready to go + && issue_count < `NUM_MEM + //&& ~iqentry_memready[heads[0]] + //&& ~iqentry_memready[heads[1]] + //&& ~iqentry_memready[heads[2]] + //&& ~iqentry_memready[heads[3]] + //&& ~iqentry_memready[heads[4]] + // ... and there is no address-overlap with any preceding instruction + && (!iqentry_mem[heads[0]] || (iqentry_agen[heads[0]] & iqentry_out[heads[0]]) || iqentry_done[heads[0]] + || ((iqentry_ma[heads[5]][AMSB:3] != iqentry_ma[heads[0]][AMSB:3] || iqentry_out[heads[0]] || iqentry_done[heads[0]]))) + && (!iqentry_mem[heads[1]] || (iqentry_agen[heads[1]] & iqentry_out[heads[1]]) || iqentry_done[heads[1]] + || ((iqentry_ma[heads[5]][AMSB:3] != iqentry_ma[heads[1]][AMSB:3] || iqentry_out[heads[1]] || iqentry_done[heads[1]]))) + && (!iqentry_mem[heads[2]] || (iqentry_agen[heads[2]] & iqentry_out[heads[2]]) || iqentry_done[heads[2]] + || ((iqentry_ma[heads[5]][AMSB:3] != iqentry_ma[heads[2]][AMSB:3] || iqentry_out[heads[2]] || iqentry_done[heads[2]]))) + && (!iqentry_mem[heads[3]] || (iqentry_agen[heads[3]] & iqentry_out[heads[3]]) || iqentry_done[heads[3]] + || ((iqentry_ma[heads[5]][AMSB:3] != iqentry_ma[heads[3]][AMSB:3] || iqentry_out[heads[3]] || iqentry_done[heads[3]]))) + && (!iqentry_mem[heads[4]] || (iqentry_agen[heads[4]] & iqentry_out[heads[4]]) || iqentry_done[heads[4]] + || ((iqentry_ma[heads[5]][AMSB:3] != iqentry_ma[heads[4]][AMSB:3] || iqentry_out[heads[4]] || iqentry_done[heads[4]]))) + // ... if a release, any prior memory ops must be done before this one + && (iqentry_rl[heads[5]] ? (iqentry_done[heads[0]] || !iqentry_v[heads[0]] || !iqentry_mem[heads[0]]) + && (iqentry_done[heads[1]] || !iqentry_v[heads[1]] || !iqentry_mem[heads[1]]) + && (iqentry_done[heads[2]] || !iqentry_v[heads[2]] || !iqentry_mem[heads[2]]) + && (iqentry_done[heads[3]] || !iqentry_v[heads[3]] || !iqentry_mem[heads[3]]) + && (iqentry_done[heads[4]] || !iqentry_v[heads[4]] || !iqentry_mem[heads[4]]) + : 1'b1) + // ... if a preivous op has the aquire bit set + && !(iqentry_aq[heads[0]] && iqentry_v[heads[0]]) + && !(iqentry_aq[heads[1]] && iqentry_v[heads[1]]) + && !(iqentry_aq[heads[2]] && iqentry_v[heads[2]]) + && !(iqentry_aq[heads[3]] && iqentry_v[heads[3]]) + && !(iqentry_aq[heads[4]] && iqentry_v[heads[4]]) + // ... and there's nothing in the write buffer during a load + && !(iqentry_load[heads[5]] && (wb_v!=1'b0 + || iqentry_store[heads[0]] || iqentry_store[heads[1]] || iqentry_store[heads[2]] || iqentry_store[heads[3]] + || iqentry_store[heads[4]])) + // ... and there isn't a barrier, or everything before the barrier is done or invalid + && (!(iqentry_iv[heads[1]] && iqentry_memsb[heads[1]]) || (iqentry_done[heads[0]] || !iqentry_v[heads[0]])) + && (!(iqentry_iv[heads[2]] && iqentry_memsb[heads[2]]) || + ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (iqentry_done[heads[1]] || !iqentry_v[heads[1]])) + ) + && (!(iqentry_iv[heads[3]] && iqentry_memsb[heads[3]]) || + ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (iqentry_done[heads[1]] || !iqentry_v[heads[1]]) + && (iqentry_done[heads[2]] || !iqentry_v[heads[2]])) + ) + && (!(iqentry_iv[heads[4]] && iqentry_memsb[heads[4]]) || + ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (iqentry_done[heads[1]] || !iqentry_v[heads[1]]) + && (iqentry_done[heads[2]] || !iqentry_v[heads[2]]) + && (iqentry_done[heads[3]] || !iqentry_v[heads[3]])) + ) + && (!(iqentry_iv[heads[1]] && iqentry_memdb[heads[1]]) || (!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]])) + && (!(iqentry_iv[heads[2]] && iqentry_memdb[heads[2]]) || + ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]])) + ) + && (!(iqentry_iv[heads[3]] && iqentry_memdb[heads[3]]) || + ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]]) + && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]])) + ) + && (!(iqentry_iv[heads[4]] && iqentry_memdb[heads[4]]) || + ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]]) + && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]]) + && (!iqentry_mem[heads[3]] || iqentry_done[heads[3]] || !iqentry_v[heads[3]])) + ) + // ... and, if it is a SW, there is no chance of it being undone + && ((iqentry_load[heads[5]] && sple) || + !(iqentry_fc[heads[0]]||iqentry_canex[heads[0]]) + && !(iqentry_fc[heads[1]]||iqentry_canex[heads[1]]) + && !(iqentry_fc[heads[2]]||iqentry_canex[heads[2]]) + && !(iqentry_fc[heads[3]]||iqentry_canex[heads[3]]) + && !(iqentry_fc[heads[4]]||iqentry_canex[heads[4]])); + if (memissue[heads[5]]) + issue_count = issue_count + 1; + end + +`ifdef FULL_ISSUE_LOGIC +if (QENTRIES > 6) begin + memissue[ heads[6] ] = ~iqentry_stomp[heads[6]] && iqentry_memready[ heads[6] ] // addr and data are valid + // ... and no preceding instruction is ready to go + && issue_count < `NUM_MEM + //&& ~iqentry_memready[heads[0]] + //&& ~iqentry_memready[heads[1]] + //&& ~iqentry_memready[heads[2]] + //&& ~iqentry_memready[heads[3]] + //&& ~iqentry_memready[heads[4]] + //&& ~iqentry_memready[heads[5]] + // ... and there is no address-overlap with any preceding instruction + && (!iqentry_mem[heads[0]] || (iqentry_agen[heads[0]] & iqentry_out[heads[0]]) || iqentry_done[heads[0]] + || ((iqentry_ma[heads[6]][AMSB:3] != iqentry_ma[heads[0]][AMSB:3]))) + && (!iqentry_mem[heads[1]] || (iqentry_agen[heads[1]] & iqentry_out[heads[1]]) || iqentry_done[heads[1]] + || ((iqentry_ma[heads[6]][AMSB:3] != iqentry_ma[heads[1]][AMSB:3]))) + && (!iqentry_mem[heads[2]] || (iqentry_agen[heads[2]] & iqentry_out[heads[2]]) || iqentry_done[heads[2]] + || ((iqentry_ma[heads[6]][AMSB:3] != iqentry_ma[heads[2]][AMSB:3]))) + && (!iqentry_mem[heads[3]] || (iqentry_agen[heads[3]] & iqentry_out[heads[3]]) || iqentry_done[heads[3]] + || ((iqentry_ma[heads[6]][AMSB:3] != iqentry_ma[heads[3]][AMSB:3]))) + && (!iqentry_mem[heads[4]] || (iqentry_agen[heads[4]] & iqentry_out[heads[4]]) || iqentry_done[heads[4]] + || ((iqentry_ma[heads[6]][AMSB:3] != iqentry_ma[heads[4]][AMSB:3]))) + && (!iqentry_mem[heads[5]] || (iqentry_agen[heads[5]] & iqentry_out[heads[5]]) || iqentry_done[heads[5]] + || ((iqentry_ma[heads[6]][AMSB:3] != iqentry_ma[heads[5]][AMSB:3]))) + && (iqentry_rl[heads[6]] ? (iqentry_done[heads[0]] || !iqentry_v[heads[0]] || !iqentry_mem[heads[0]]) + && (iqentry_done[heads[1]] || !iqentry_v[heads[1]] || !iqentry_mem[heads[1]]) + && (iqentry_done[heads[2]] || !iqentry_v[heads[2]] || !iqentry_mem[heads[2]]) + && (iqentry_done[heads[3]] || !iqentry_v[heads[3]] || !iqentry_mem[heads[3]]) + && (iqentry_done[heads[4]] || !iqentry_v[heads[4]] || !iqentry_mem[heads[4]]) + && (iqentry_done[heads[5]] || !iqentry_v[heads[5]] || !iqentry_mem[heads[5]]) + : 1'b1) + // ... if a preivous op has the aquire bit set + && !(iqentry_aq[heads[0]] && iqentry_v[heads[0]]) + && !(iqentry_aq[heads[1]] && iqentry_v[heads[1]]) + && !(iqentry_aq[heads[2]] && iqentry_v[heads[2]]) + && !(iqentry_aq[heads[3]] && iqentry_v[heads[3]]) + && !(iqentry_aq[heads[4]] && iqentry_v[heads[4]]) + && !(iqentry_aq[heads[5]] && iqentry_v[heads[5]]) + // ... and there's nothing in the write buffer during a load + && !(iqentry_load[heads[6]] && (wb_v!=1'b0 + || iqentry_store[heads[0]] || iqentry_store[heads[1]] || iqentry_store[heads[2]] || iqentry_store[heads[3]] + || iqentry_store[heads[4]] || iqentry_store[heads[5]])) + // ... and there isn't a barrier, or everything before the barrier is done or invalid + && (!(iqentry_iv[heads[1]] && iqentry_memsb[heads[1]]) || (iqentry_done[heads[0]] || !iqentry_v[heads[0]])) + && (!(iqentry_iv[heads[2]] && iqentry_memsb[heads[2]]) || + ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (iqentry_done[heads[1]] || !iqentry_v[heads[1]])) + ) + && (!(iqentry_iv[heads[3]] && iqentry_memsb[heads[3]]) || + ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (iqentry_done[heads[1]] || !iqentry_v[heads[1]]) + && (iqentry_done[heads[2]] || !iqentry_v[heads[2]])) + ) + && (!(iqentry_iv[heads[4]] && iqentry_memsb[heads[4]]) || + ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (iqentry_done[heads[1]] || !iqentry_v[heads[1]]) + && (iqentry_done[heads[2]] || !iqentry_v[heads[2]]) + && (iqentry_done[heads[3]] || !iqentry_v[heads[3]])) + ) + && (!(iqentry_iv[heads[5]] && iqentry_memsb[heads[5]]) || + ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (iqentry_done[heads[1]] || !iqentry_v[heads[1]]) + && (iqentry_done[heads[2]] || !iqentry_v[heads[2]]) + && (iqentry_done[heads[3]] || !iqentry_v[heads[3]]) + && (iqentry_done[heads[4]] || !iqentry_v[heads[4]])) + ) + && (!(iqentry_iv[heads[1]] && iqentry_memdb[heads[1]]) || (!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]])) + && (!(iqentry_iv[heads[2]] && iqentry_memdb[heads[2]]) || + ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]])) + ) + && (!(iqentry_iv[heads[3]] && iqentry_memdb[heads[3]]) || + ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]]) + && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]])) + ) + && (!(iqentry_iv[heads[4]] && iqentry_memdb[heads[4]]) || + ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]]) + && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]]) + && (!iqentry_mem[heads[3]] || iqentry_done[heads[3]] || !iqentry_v[heads[3]])) + ) + && (!(iqentry_iv[heads[5]] && iqentry_memdb[heads[5]]) || + ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]]) + && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]]) + && (!iqentry_mem[heads[3]] || iqentry_done[heads[3]] || !iqentry_v[heads[3]]) + && (!iqentry_mem[heads[4]] || iqentry_done[heads[4]] || !iqentry_v[heads[4]])) + ) + // ... and, if it is a SW, there is no chance of it being undone + && ((iqentry_load[heads[6]] && sple) || + !(iqentry_fc[heads[0]]||iqentry_canex[heads[0]]) + && !(iqentry_fc[heads[1]]||iqentry_canex[heads[1]]) + && !(iqentry_fc[heads[2]]||iqentry_canex[heads[2]]) + && !(iqentry_fc[heads[3]]||iqentry_canex[heads[3]]) + && !(iqentry_fc[heads[4]]||iqentry_canex[heads[4]]) + && !(iqentry_fc[heads[5]]||iqentry_canex[heads[5]])); + if (memissue[heads[6]]) + issue_count = issue_count + 1; + end + + if (QENTRIES > 7) begin + memissue[ heads[7] ] = ~iqentry_stomp[heads[7]] && iqentry_memready[ heads[7] ] // addr and data are valid + // ... and no preceding instruction is ready to go + && issue_count < `NUM_MEM + //&& ~iqentry_memready[heads[0]] + //&& ~iqentry_memready[heads[1]] + //&& ~iqentry_memready[heads[2]] + //&& ~iqentry_memready[heads[3]] + //&& ~iqentry_memready[heads[4]] + //&& ~iqentry_memready[heads[5]] + //&& ~iqentry_memready[heads[6]] + // ... and there is no address-overlap with any preceding instruction + && (!iqentry_mem[heads[0]] || (iqentry_agen[heads[0]] & iqentry_out[heads[0]]) || iqentry_done[heads[0]] + || ((iqentry_ma[heads[7]][AMSB:3] != iqentry_ma[heads[0]][AMSB:3] || iqentry_out[heads[0]] || iqentry_done[heads[0]]))) + && (!iqentry_mem[heads[1]] || (iqentry_agen[heads[1]] & iqentry_out[heads[1]]) || iqentry_done[heads[1]] + || ((iqentry_ma[heads[7]][AMSB:3] != iqentry_ma[heads[1]][AMSB:3] || iqentry_out[heads[1]] || iqentry_done[heads[1]]))) + && (!iqentry_mem[heads[2]] || (iqentry_agen[heads[2]] & iqentry_out[heads[2]]) || iqentry_done[heads[2]] + || ((iqentry_ma[heads[7]][AMSB:3] != iqentry_ma[heads[2]][AMSB:3] || iqentry_out[heads[2]] || iqentry_done[heads[2]]))) + && (!iqentry_mem[heads[3]] || (iqentry_agen[heads[3]] & iqentry_out[heads[3]]) || iqentry_done[heads[3]] + || ((iqentry_ma[heads[7]][AMSB:3] != iqentry_ma[heads[3]][AMSB:3] || iqentry_out[heads[3]] || iqentry_done[heads[3]]))) + && (!iqentry_mem[heads[4]] || (iqentry_agen[heads[4]] & iqentry_out[heads[4]]) || iqentry_done[heads[4]] + || ((iqentry_ma[heads[7]][AMSB:3] != iqentry_ma[heads[4]][AMSB:3] || iqentry_out[heads[4]] || iqentry_done[heads[4]]))) + && (!iqentry_mem[heads[5]] || (iqentry_agen[heads[5]] & iqentry_out[heads[5]]) || iqentry_done[heads[5]] + || ((iqentry_ma[heads[7]][AMSB:3] != iqentry_ma[heads[5]][AMSB:3] || iqentry_out[heads[5]] || iqentry_done[heads[5]]))) + && (!iqentry_mem[heads[6]] || (iqentry_agen[heads[6]] & iqentry_out[heads[6]]) || iqentry_done[heads[6]] + || ((iqentry_ma[heads[7]][AMSB:3] != iqentry_ma[heads[6]][AMSB:3] || iqentry_out[heads[6]] || iqentry_done[heads[6]]))) + && (iqentry_rl[heads[7]] ? (iqentry_done[heads[0]] || !iqentry_v[heads[0]] || !iqentry_mem[heads[0]]) + && (iqentry_done[heads[1]] || !iqentry_v[heads[1]] || !iqentry_mem[heads[1]]) + && (iqentry_done[heads[2]] || !iqentry_v[heads[2]] || !iqentry_mem[heads[2]]) + && (iqentry_done[heads[3]] || !iqentry_v[heads[3]] || !iqentry_mem[heads[3]]) + && (iqentry_done[heads[4]] || !iqentry_v[heads[4]] || !iqentry_mem[heads[4]]) + && (iqentry_done[heads[5]] || !iqentry_v[heads[5]] || !iqentry_mem[heads[5]]) + && (iqentry_done[heads[6]] || !iqentry_v[heads[6]] || !iqentry_mem[heads[6]]) + : 1'b1) + // ... if a preivous op has the aquire bit set + && !(iqentry_aq[heads[0]] && iqentry_v[heads[0]]) + && !(iqentry_aq[heads[1]] && iqentry_v[heads[1]]) + && !(iqentry_aq[heads[2]] && iqentry_v[heads[2]]) + && !(iqentry_aq[heads[3]] && iqentry_v[heads[3]]) + && !(iqentry_aq[heads[4]] && iqentry_v[heads[4]]) + && !(iqentry_aq[heads[5]] && iqentry_v[heads[5]]) + && !(iqentry_aq[heads[6]] && iqentry_v[heads[6]]) + // ... and there's nothing in the write buffer during a load + && !(iqentry_load[heads[7]] && (wb_v!=1'b0 + || iqentry_store[heads[0]] || iqentry_store[heads[1]] || iqentry_store[heads[2]] || iqentry_store[heads[3]] + || iqentry_store[heads[4]] || iqentry_store[heads[5]] || iqentry_store[heads[6]])) + // ... and there isn't a barrier, or everything before the barrier is done or invalid + && (!(iqentry_iv[heads[1]] && iqentry_memsb[heads[1]]) || (iqentry_done[heads[0]] || !iqentry_v[heads[0]])) + && (!(iqentry_iv[heads[2]] && iqentry_memsb[heads[2]]) || + ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (iqentry_done[heads[1]] || !iqentry_v[heads[1]])) + ) + && (!(iqentry_iv[heads[3]] && iqentry_memsb[heads[3]]) || + ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (iqentry_done[heads[1]] || !iqentry_v[heads[1]]) + && (iqentry_done[heads[2]] || !iqentry_v[heads[2]])) + ) + && (!(iqentry_iv[heads[4]] && iqentry_memsb[heads[4]]) || + ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (iqentry_done[heads[1]] || !iqentry_v[heads[1]]) + && (iqentry_done[heads[2]] || !iqentry_v[heads[2]]) + && (iqentry_done[heads[3]] || !iqentry_v[heads[3]])) + ) + && (!(iqentry_iv[heads[5]] && iqentry_memsb[heads[5]]) || + ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (iqentry_done[heads[1]] || !iqentry_v[heads[1]]) + && (iqentry_done[heads[2]] || !iqentry_v[heads[2]]) + && (iqentry_done[heads[3]] || !iqentry_v[heads[3]]) + && (iqentry_done[heads[4]] || !iqentry_v[heads[4]])) + ) + && (!(iqentry_iv[heads[6]] && iqentry_memsb[heads[6]]) || + ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (iqentry_done[heads[1]] || !iqentry_v[heads[1]]) + && (iqentry_done[heads[2]] || !iqentry_v[heads[2]]) + && (iqentry_done[heads[3]] || !iqentry_v[heads[3]]) + && (iqentry_done[heads[4]] || !iqentry_v[heads[4]]) + && (iqentry_done[heads[5]] || !iqentry_v[heads[5]])) + ) + && (!(iqentry_iv[heads[1]] && iqentry_memdb[heads[1]]) || (!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]])) + && (!(iqentry_iv[heads[2]] && iqentry_memdb[heads[2]]) || + ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]])) + ) + && (!(iqentry_iv[heads[3]] && iqentry_memdb[heads[3]]) || + ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]]) + && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]])) + ) + && (!(iqentry_iv[heads[4]] && iqentry_memdb[heads[4]]) || + ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]]) + && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]]) + && (!iqentry_mem[heads[3]] || iqentry_done[heads[3]] || !iqentry_v[heads[3]])) + ) + && (!(iqentry_iv[heads[5]] && iqentry_memdb[heads[5]]) || + ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]]) + && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]]) + && (!iqentry_mem[heads[3]] || iqentry_done[heads[3]] || !iqentry_v[heads[3]]) + && (!iqentry_mem[heads[4]] || iqentry_done[heads[4]] || !iqentry_v[heads[4]])) + ) + && (!(iqentry_iv[heads[6]] && iqentry_memdb[heads[6]]) || + ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]]) + && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]]) + && (!iqentry_mem[heads[3]] || iqentry_done[heads[3]] || !iqentry_v[heads[3]]) + && (!iqentry_mem[heads[4]] || iqentry_done[heads[4]] || !iqentry_v[heads[4]]) + && (!iqentry_mem[heads[5]] || iqentry_done[heads[5]] || !iqentry_v[heads[5]])) + ) + // ... and, if it is a SW, there is no chance of it being undone + && ((iqentry_load[heads[7]] && sple) || + !(iqentry_fc[heads[0]]||iqentry_canex[heads[0]]) + && !(iqentry_fc[heads[1]]||iqentry_canex[heads[1]]) + && !(iqentry_fc[heads[2]]||iqentry_canex[heads[2]]) + && !(iqentry_fc[heads[3]]||iqentry_canex[heads[3]]) + && !(iqentry_fc[heads[4]]||iqentry_canex[heads[4]]) + && !(iqentry_fc[heads[5]]||iqentry_canex[heads[5]]) + && !(iqentry_fc[heads[6]]||iqentry_canex[heads[6]])); + if (memissue[heads[7]]) + issue_count = issue_count + 1; + end + + if (QENTRIES > 8) begin + memissue[ heads[8] ] = ~iqentry_stomp[heads[8]] && iqentry_memready[ heads[8] ] // addr and data are valid + // ... and no preceding instruction is ready to go + && issue_count < `NUM_MEM + //&& ~iqentry_memready[heads[0]] + //&& ~iqentry_memready[heads[1]] + //&& ~iqentry_memready[heads[2]] + //&& ~iqentry_memready[heads[3]] + //&& ~iqentry_memready[heads[4]] + //&& ~iqentry_memready[heads[5]] + //&& ~iqentry_memready[heads[6]] + // ... and there is no address-overlap with any preceding instruction + && (!iqentry_mem[heads[0]] || (iqentry_agen[heads[0]] & iqentry_out[heads[0]]) || iqentry_done[heads[0]] + || ((iqentry_ma[heads[8]][AMSB:3] != iqentry_ma[heads[0]][AMSB:3] || iqentry_out[heads[0]] || iqentry_done[heads[0]]))) + && (!iqentry_mem[heads[1]] || (iqentry_agen[heads[1]] & iqentry_out[heads[1]]) || iqentry_done[heads[1]] + || ((iqentry_ma[heads[8]][AMSB:3] != iqentry_ma[heads[1]][AMSB:3] || iqentry_out[heads[1]] || iqentry_done[heads[1]]))) + && (!iqentry_mem[heads[2]] || (iqentry_agen[heads[2]] & iqentry_out[heads[2]]) || iqentry_done[heads[2]] + || ((iqentry_ma[heads[8]][AMSB:3] != iqentry_ma[heads[2]][AMSB:3] || iqentry_out[heads[2]] || iqentry_done[heads[2]]))) + && (!iqentry_mem[heads[3]] || (iqentry_agen[heads[3]] & iqentry_out[heads[3]]) || iqentry_done[heads[3]] + || ((iqentry_ma[heads[8]][AMSB:3] != iqentry_ma[heads[3]][AMSB:3] || iqentry_out[heads[3]] || iqentry_done[heads[3]]))) + && (!iqentry_mem[heads[4]] || (iqentry_agen[heads[4]] & iqentry_out[heads[4]]) || iqentry_done[heads[4]] + || ((iqentry_ma[heads[8]][AMSB:3] != iqentry_ma[heads[4]][AMSB:3] || iqentry_out[heads[4]] || iqentry_done[heads[4]]))) + && (!iqentry_mem[heads[5]] || (iqentry_agen[heads[5]] & iqentry_out[heads[5]]) || iqentry_done[heads[5]] + || ((iqentry_ma[heads[8]][AMSB:3] != iqentry_ma[heads[5]][AMSB:3] || iqentry_out[heads[5]] || iqentry_done[heads[5]]))) + && (!iqentry_mem[heads[6]] || (iqentry_agen[heads[6]] & iqentry_out[heads[6]]) || iqentry_done[heads[6]] + || ((iqentry_ma[heads[8]][AMSB:3] != iqentry_ma[heads[6]][AMSB:3] || iqentry_out[heads[6]] || iqentry_done[heads[6]]))) + && (!iqentry_mem[heads[7]] || (iqentry_agen[heads[7]] & iqentry_out[heads[7]]) || iqentry_done[heads[7]] + || ((iqentry_ma[heads[8]][AMSB:3] != iqentry_ma[heads[7]][AMSB:3] || iqentry_out[heads[7]] || iqentry_done[heads[7]]))) + && (iqentry_rl[heads[8]] ? (iqentry_done[heads[0]] || !iqentry_v[heads[0]] || !iqentry_mem[heads[0]]) + && (iqentry_done[heads[1]] || !iqentry_v[heads[1]] || !iqentry_mem[heads[1]]) + && (iqentry_done[heads[2]] || !iqentry_v[heads[2]] || !iqentry_mem[heads[2]]) + && (iqentry_done[heads[3]] || !iqentry_v[heads[3]] || !iqentry_mem[heads[3]]) + && (iqentry_done[heads[4]] || !iqentry_v[heads[4]] || !iqentry_mem[heads[4]]) + && (iqentry_done[heads[5]] || !iqentry_v[heads[5]] || !iqentry_mem[heads[5]]) + && (iqentry_done[heads[6]] || !iqentry_v[heads[6]] || !iqentry_mem[heads[6]]) + && (iqentry_done[heads[7]] || !iqentry_v[heads[7]] || !iqentry_mem[heads[7]]) + : 1'b1) + // ... if a preivous op has the aquire bit set + && !(iqentry_aq[heads[0]] && iqentry_v[heads[0]]) + && !(iqentry_aq[heads[1]] && iqentry_v[heads[1]]) + && !(iqentry_aq[heads[2]] && iqentry_v[heads[2]]) + && !(iqentry_aq[heads[3]] && iqentry_v[heads[3]]) + && !(iqentry_aq[heads[4]] && iqentry_v[heads[4]]) + && !(iqentry_aq[heads[5]] && iqentry_v[heads[5]]) + && !(iqentry_aq[heads[6]] && iqentry_v[heads[6]]) + && !(iqentry_aq[heads[7]] && iqentry_v[heads[7]]) + // ... and there's nothing in the write buffer during a load + && !(iqentry_load[heads[8]] && (wb_v!=1'b0 + || iqentry_store[heads[0]] || iqentry_store[heads[1]] || iqentry_store[heads[2]] || iqentry_store[heads[3]] + || iqentry_store[heads[4]] || iqentry_store[heads[5]] || iqentry_store[heads[6]] || iqentry_store[heads[7]])) + // ... and there isn't a barrier, or everything before the barrier is done or invalid + && (!(iqentry_iv[heads[1]] && iqentry_memsb[heads[1]]) || (iqentry_done[heads[0]] || !iqentry_v[heads[0]])) + && (!(iqentry_iv[heads[2]] && iqentry_memsb[heads[2]]) || + ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (iqentry_done[heads[1]] || !iqentry_v[heads[1]])) + ) + && (!(iqentry_iv[heads[3]] && iqentry_memsb[heads[3]]) || + ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (iqentry_done[heads[1]] || !iqentry_v[heads[1]]) + && (iqentry_done[heads[2]] || !iqentry_v[heads[2]])) + ) + && (!(iqentry_iv[heads[4]] && iqentry_memsb[heads[4]]) || + ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (iqentry_done[heads[1]] || !iqentry_v[heads[1]]) + && (iqentry_done[heads[2]] || !iqentry_v[heads[2]]) + && (iqentry_done[heads[3]] || !iqentry_v[heads[3]])) + ) + && (!(iqentry_iv[heads[5]] && iqentry_memsb[heads[5]]) || + ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (iqentry_done[heads[1]] || !iqentry_v[heads[1]]) + && (iqentry_done[heads[2]] || !iqentry_v[heads[2]]) + && (iqentry_done[heads[3]] || !iqentry_v[heads[3]]) + && (iqentry_done[heads[4]] || !iqentry_v[heads[4]])) + ) + && (!(iqentry_iv[heads[6]] && iqentry_memsb[heads[6]]) || + ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (iqentry_done[heads[1]] || !iqentry_v[heads[1]]) + && (iqentry_done[heads[2]] || !iqentry_v[heads[2]]) + && (iqentry_done[heads[3]] || !iqentry_v[heads[3]]) + && (iqentry_done[heads[4]] || !iqentry_v[heads[4]]) + && (iqentry_done[heads[5]] || !iqentry_v[heads[5]])) + ) + && (!(iqentry_iv[heads[7]] && iqentry_memsb[heads[7]]) || + ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (iqentry_done[heads[1]] || !iqentry_v[heads[1]]) + && (iqentry_done[heads[2]] || !iqentry_v[heads[2]]) + && (iqentry_done[heads[3]] || !iqentry_v[heads[3]]) + && (iqentry_done[heads[4]] || !iqentry_v[heads[4]]) + && (iqentry_done[heads[5]] || !iqentry_v[heads[5]]) + && (iqentry_done[heads[6]] || !iqentry_v[heads[6]]) + ) + ) + && (!(iqentry_iv[heads[1]] && iqentry_memdb[heads[1]]) || (!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]])) + && (!(iqentry_iv[heads[2]] && iqentry_memdb[heads[2]]) || + ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]])) + ) + && (!(iqentry_iv[heads[3]] && iqentry_memdb[heads[3]]) || + ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]]) + && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]])) + ) + && (!(iqentry_iv[heads[4]] && iqentry_memdb[heads[4]]) || + ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]]) + && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]]) + && (!iqentry_mem[heads[3]] || iqentry_done[heads[3]] || !iqentry_v[heads[3]])) + ) + && (!(iqentry_iv[heads[5]] && iqentry_memdb[heads[5]]) || + ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]]) + && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]]) + && (!iqentry_mem[heads[3]] || iqentry_done[heads[3]] || !iqentry_v[heads[3]]) + && (!iqentry_mem[heads[4]] || iqentry_done[heads[4]] || !iqentry_v[heads[4]])) + ) + && (!(iqentry_iv[heads[6]] && iqentry_memdb[heads[6]]) || + ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]]) + && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]]) + && (!iqentry_mem[heads[3]] || iqentry_done[heads[3]] || !iqentry_v[heads[3]]) + && (!iqentry_mem[heads[4]] || iqentry_done[heads[4]] || !iqentry_v[heads[4]]) + && (!iqentry_mem[heads[5]] || iqentry_done[heads[5]] || !iqentry_v[heads[5]])) + ) + && (!(iqentry_iv[heads[7]] && iqentry_memdb[heads[7]]) || + ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]]) + && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]]) + && (!iqentry_mem[heads[3]] || iqentry_done[heads[3]] || !iqentry_v[heads[3]]) + && (!iqentry_mem[heads[4]] || iqentry_done[heads[4]] || !iqentry_v[heads[4]]) + && (!iqentry_mem[heads[5]] || iqentry_done[heads[5]] || !iqentry_v[heads[5]]) + && (!iqentry_mem[heads[6]] || iqentry_done[heads[6]] || !iqentry_v[heads[6]]) + ) + ) + // ... and, if it is a SW, there is no chance of it being undone + && ((iqentry_load[heads[8]] && sple) || + !(iqentry_fc[heads[0]]||iqentry_canex[heads[0]]) + && !(iqentry_fc[heads[1]]||iqentry_canex[heads[1]]) + && !(iqentry_fc[heads[2]]||iqentry_canex[heads[2]]) + && !(iqentry_fc[heads[3]]||iqentry_canex[heads[3]]) + && !(iqentry_fc[heads[4]]||iqentry_canex[heads[4]]) + && !(iqentry_fc[heads[5]]||iqentry_canex[heads[5]]) + && !(iqentry_fc[heads[6]]||iqentry_canex[heads[6]]) + && !(iqentry_fc[heads[7]]||iqentry_canex[heads[7]]) + ); + if (memissue[heads[8]]) + issue_count = issue_count + 1; + end + + if (QENTRIES > 9) begin + memissue[ heads[9] ] = ~iqentry_stomp[heads[9]] && iqentry_memready[ heads[9] ] // addr and data are valid + // ... and no preceding instruction is ready to go + && issue_count < `NUM_MEM + //&& ~iqentry_memready[heads[0]] + //&& ~iqentry_memready[heads[1]] + //&& ~iqentry_memready[heads[2]] + //&& ~iqentry_memready[heads[3]] + //&& ~iqentry_memready[heads[4]] + //&& ~iqentry_memready[heads[5]] + //&& ~iqentry_memready[heads[6]] + // ... and there is no address-overlap with any preceding instruction + && (!iqentry_mem[heads[0]] || (iqentry_agen[heads[0]] & iqentry_out[heads[0]]) || iqentry_done[heads[0]] + || ((iqentry_ma[heads[9]][AMSB:3] != iqentry_ma[heads[0]][AMSB:3] || iqentry_out[heads[0]] || iqentry_done[heads[0]]))) + && (!iqentry_mem[heads[1]] || (iqentry_agen[heads[1]] & iqentry_out[heads[1]]) || iqentry_done[heads[1]] + || ((iqentry_ma[heads[9]][AMSB:3] != iqentry_ma[heads[1]][AMSB:3] || iqentry_out[heads[1]] || iqentry_done[heads[1]]))) + && (!iqentry_mem[heads[2]] || (iqentry_agen[heads[2]] & iqentry_out[heads[2]]) || iqentry_done[heads[2]] + || ((iqentry_ma[heads[9]][AMSB:3] != iqentry_ma[heads[2]][AMSB:3] || iqentry_out[heads[2]] || iqentry_done[heads[2]]))) + && (!iqentry_mem[heads[3]] || (iqentry_agen[heads[3]] & iqentry_out[heads[3]]) || iqentry_done[heads[3]] + || ((iqentry_ma[heads[9]][AMSB:3] != iqentry_ma[heads[3]][AMSB:3] || iqentry_out[heads[3]] || iqentry_done[heads[3]]))) + && (!iqentry_mem[heads[4]] || (iqentry_agen[heads[4]] & iqentry_out[heads[4]]) || iqentry_done[heads[4]] + || ((iqentry_ma[heads[9]][AMSB:3] != iqentry_ma[heads[4]][AMSB:3] || iqentry_out[heads[4]] || iqentry_done[heads[4]]))) + && (!iqentry_mem[heads[5]] || (iqentry_agen[heads[5]] & iqentry_out[heads[5]]) || iqentry_done[heads[5]] + || ((iqentry_ma[heads[9]][AMSB:3] != iqentry_ma[heads[5]][AMSB:3] || iqentry_out[heads[5]] || iqentry_done[heads[5]]))) + && (!iqentry_mem[heads[6]] || (iqentry_agen[heads[6]] & iqentry_out[heads[6]]) || iqentry_done[heads[6]] + || ((iqentry_ma[heads[9]][AMSB:3] != iqentry_ma[heads[6]][AMSB:3] || iqentry_out[heads[6]] || iqentry_done[heads[6]]))) + && (!iqentry_mem[heads[7]] || (iqentry_agen[heads[7]] & iqentry_out[heads[7]]) || iqentry_done[heads[7]] + || ((iqentry_ma[heads[9]][AMSB:3] != iqentry_ma[heads[7]][AMSB:3] || iqentry_out[heads[7]] || iqentry_done[heads[7]]))) + && (!iqentry_mem[heads[8]] || (iqentry_agen[heads[8]] & iqentry_out[heads[8]]) || iqentry_done[heads[8]] + || ((iqentry_ma[heads[9]][AMSB:3] != iqentry_ma[heads[8]][AMSB:3] || iqentry_out[heads[8]] || iqentry_done[heads[8]]))) + && (iqentry_rl[heads[9]] ? (iqentry_done[heads[0]] || !iqentry_v[heads[0]] || !iqentry_mem[heads[0]]) + && (iqentry_done[heads[1]] || !iqentry_v[heads[1]] || !iqentry_mem[heads[1]]) + && (iqentry_done[heads[2]] || !iqentry_v[heads[2]] || !iqentry_mem[heads[2]]) + && (iqentry_done[heads[3]] || !iqentry_v[heads[3]] || !iqentry_mem[heads[3]]) + && (iqentry_done[heads[4]] || !iqentry_v[heads[4]] || !iqentry_mem[heads[4]]) + && (iqentry_done[heads[5]] || !iqentry_v[heads[5]] || !iqentry_mem[heads[5]]) + && (iqentry_done[heads[6]] || !iqentry_v[heads[6]] || !iqentry_mem[heads[6]]) + && (iqentry_done[heads[7]] || !iqentry_v[heads[7]] || !iqentry_mem[heads[7]]) + && (iqentry_done[heads[8]] || !iqentry_v[heads[8]] || !iqentry_mem[heads[8]]) + : 1'b1) + // ... if a preivous op has the aquire bit set + && !(iqentry_aq[heads[0]] && iqentry_v[heads[0]]) + && !(iqentry_aq[heads[1]] && iqentry_v[heads[1]]) + && !(iqentry_aq[heads[2]] && iqentry_v[heads[2]]) + && !(iqentry_aq[heads[3]] && iqentry_v[heads[3]]) + && !(iqentry_aq[heads[4]] && iqentry_v[heads[4]]) + && !(iqentry_aq[heads[5]] && iqentry_v[heads[5]]) + && !(iqentry_aq[heads[6]] && iqentry_v[heads[6]]) + && !(iqentry_aq[heads[7]] && iqentry_v[heads[7]]) + && !(iqentry_aq[heads[8]] && iqentry_v[heads[8]]) + // ... and there's nothing in the write buffer during a load + && !(iqentry_load[heads[9]] && (wb_v!=1'b0 + || iqentry_store[heads[0]] || iqentry_store[heads[1]] || iqentry_store[heads[2]] || iqentry_store[heads[3]] + || iqentry_store[heads[4]] || iqentry_store[heads[5]] || iqentry_store[heads[6]] || iqentry_store[heads[7]] + || iqentry_store[heads[8]])) + // ... and there isn't a barrier, or everything before the barrier is done or invalid + && (!(iqentry_iv[heads[1]] && iqentry_memsb[heads[1]]) || (iqentry_done[heads[0]] || !iqentry_v[heads[0]])) + && (!(iqentry_iv[heads[2]] && iqentry_memsb[heads[2]]) || + ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (iqentry_done[heads[1]] || !iqentry_v[heads[1]])) + ) + && (!(iqentry_iv[heads[3]] && iqentry_memsb[heads[3]]) || + ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (iqentry_done[heads[1]] || !iqentry_v[heads[1]]) + && (iqentry_done[heads[2]] || !iqentry_v[heads[2]])) + ) + && (!(iqentry_iv[heads[4]] && iqentry_memsb[heads[4]]) || + ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (iqentry_done[heads[1]] || !iqentry_v[heads[1]]) + && (iqentry_done[heads[2]] || !iqentry_v[heads[2]]) + && (iqentry_done[heads[3]] || !iqentry_v[heads[3]])) + ) + && (!(iqentry_iv[heads[5]] && iqentry_memsb[heads[5]]) || + ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (iqentry_done[heads[1]] || !iqentry_v[heads[1]]) + && (iqentry_done[heads[2]] || !iqentry_v[heads[2]]) + && (iqentry_done[heads[3]] || !iqentry_v[heads[3]]) + && (iqentry_done[heads[4]] || !iqentry_v[heads[4]])) + ) + && (!(iqentry_iv[heads[6]] && iqentry_memsb[heads[6]]) || + ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (iqentry_done[heads[1]] || !iqentry_v[heads[1]]) + && (iqentry_done[heads[2]] || !iqentry_v[heads[2]]) + && (iqentry_done[heads[3]] || !iqentry_v[heads[3]]) + && (iqentry_done[heads[4]] || !iqentry_v[heads[4]]) + && (iqentry_done[heads[5]] || !iqentry_v[heads[5]])) + ) + && (!(iqentry_iv[heads[7]] && iqentry_memsb[heads[7]]) || + ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (iqentry_done[heads[1]] || !iqentry_v[heads[1]]) + && (iqentry_done[heads[2]] || !iqentry_v[heads[2]]) + && (iqentry_done[heads[3]] || !iqentry_v[heads[3]]) + && (iqentry_done[heads[4]] || !iqentry_v[heads[4]]) + && (iqentry_done[heads[5]] || !iqentry_v[heads[5]]) + && (iqentry_done[heads[6]] || !iqentry_v[heads[6]])) + ) + && (!(iqentry_iv[heads[8]] && iqentry_memsb[heads[8]]) || + ((iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (iqentry_done[heads[1]] || !iqentry_v[heads[1]]) + && (iqentry_done[heads[2]] || !iqentry_v[heads[2]]) + && (iqentry_done[heads[3]] || !iqentry_v[heads[3]]) + && (iqentry_done[heads[4]] || !iqentry_v[heads[4]]) + && (iqentry_done[heads[5]] || !iqentry_v[heads[5]]) + && (iqentry_done[heads[6]] || !iqentry_v[heads[6]]) + && (iqentry_done[heads[7]] || !iqentry_v[heads[7]]) + ) + ) + && (!(iqentry_iv[heads[1]] && iqentry_memdb[heads[1]]) || (!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]])) + && (!(iqentry_iv[heads[2]] && iqentry_memdb[heads[2]]) || + ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]])) + ) + && (!(iqentry_iv[heads[3]] && iqentry_memdb[heads[3]]) || + ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]]) + && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]])) + ) + && (!(iqentry_iv[heads[4]] && iqentry_memdb[heads[4]]) || + ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]]) + && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]]) + && (!iqentry_mem[heads[3]] || iqentry_done[heads[3]] || !iqentry_v[heads[3]])) + ) + && (!(iqentry_iv[heads[5]] && iqentry_memdb[heads[5]]) || + ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]]) + && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]]) + && (!iqentry_mem[heads[3]] || iqentry_done[heads[3]] || !iqentry_v[heads[3]]) + && (!iqentry_mem[heads[4]] || iqentry_done[heads[4]] || !iqentry_v[heads[4]])) + ) + && (!(iqentry_iv[heads[6]] && iqentry_memdb[heads[6]]) || + ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]]) + && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]]) + && (!iqentry_mem[heads[3]] || iqentry_done[heads[3]] || !iqentry_v[heads[3]]) + && (!iqentry_mem[heads[4]] || iqentry_done[heads[4]] || !iqentry_v[heads[4]]) + && (!iqentry_mem[heads[5]] || iqentry_done[heads[5]] || !iqentry_v[heads[5]])) + ) + && (!(iqentry_iv[heads[7]] && iqentry_memdb[heads[7]]) || + ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]]) + && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]]) + && (!iqentry_mem[heads[3]] || iqentry_done[heads[3]] || !iqentry_v[heads[3]]) + && (!iqentry_mem[heads[4]] || iqentry_done[heads[4]] || !iqentry_v[heads[4]]) + && (!iqentry_mem[heads[5]] || iqentry_done[heads[5]] || !iqentry_v[heads[5]]) + && (!iqentry_mem[heads[6]] || iqentry_done[heads[6]] || !iqentry_v[heads[6]])) + ) + && (!(iqentry_iv[heads[8]] && iqentry_memdb[heads[8]]) || + ((!iqentry_mem[heads[0]] || iqentry_done[heads[0]] || !iqentry_v[heads[0]]) + && (!iqentry_mem[heads[1]] || iqentry_done[heads[1]] || !iqentry_v[heads[1]]) + && (!iqentry_mem[heads[2]] || iqentry_done[heads[2]] || !iqentry_v[heads[2]]) + && (!iqentry_mem[heads[3]] || iqentry_done[heads[3]] || !iqentry_v[heads[3]]) + && (!iqentry_mem[heads[4]] || iqentry_done[heads[4]] || !iqentry_v[heads[4]]) + && (!iqentry_mem[heads[5]] || iqentry_done[heads[5]] || !iqentry_v[heads[5]]) + && (!iqentry_mem[heads[6]] || iqentry_done[heads[6]] || !iqentry_v[heads[6]]) + && (!iqentry_mem[heads[7]] || iqentry_done[heads[7]] || !iqentry_v[heads[7]]) + ) + ) + // ... and, if it is a store, there is no chance of it being undone + && ((iqentry_load[heads[9]] && sple) || + !(iqentry_fc[heads[0]]||iqentry_canex[heads[0]]) + && !(iqentry_fc[heads[1]]||iqentry_canex[heads[1]]) + && !(iqentry_fc[heads[2]]||iqentry_canex[heads[2]]) + && !(iqentry_fc[heads[3]]||iqentry_canex[heads[3]]) + && !(iqentry_fc[heads[4]]||iqentry_canex[heads[4]]) + && !(iqentry_fc[heads[5]]||iqentry_canex[heads[5]]) + && !(iqentry_fc[heads[6]]||iqentry_canex[heads[6]]) + && !(iqentry_fc[heads[7]]||iqentry_canex[heads[7]]) + && !(iqentry_fc[heads[8]]||iqentry_canex[heads[8]]) + ); + if (memissue[heads[9]]) + issue_count = issue_count + 1; + end +end +end +endgenerate +`endif + +// Starts search for instructions to issue at the head of the queue and +// progresses from there. This ensures that the oldest instructions are +// selected first for processing. +always @* +begin + last_issue0 = QENTRIES; + last_issue1 = QENTRIES; + last_issue2 = QENTRIES; + for (n = 0; n < QENTRIES; n = n + 1) + if (~iqentry_stomp[heads[n]] && iqentry_memissue[heads[n]]) begin + if (mem1_available && dram0 == `DRAMSLOT_AVAIL) begin + last_issue0 = heads[n]; + end + end + for (n = 0; n < QENTRIES; n = n + 1) + if (~iqentry_stomp[heads[n]] && iqentry_memissue[heads[n]]) begin + if (mem2_available && heads[n] != last_issue0 && `NUM_MEM > 1) begin + if (dram1 == `DRAMSLOT_AVAIL) begin + last_issue1 = heads[n]; + end + end + end + for (n = 0; n < QENTRIES; n = n + 1) + if (~iqentry_stomp[heads[n]] && iqentry_memissue[heads[n]]) begin + if (mem3_available && heads[n] != last_issue0 && heads[n] != last_issue1 && `NUM_MEM > 2) begin + if (dram2 == `DRAMSLOT_AVAIL) begin + last_issue2 = heads[n]; + end + end + end +end + +reg [2:0] wbptr; +// Stomp logic for branch miss. +/* +FT64_stomp #(QENTRIES) ustmp1 +( + .branchmiss(branchmiss), + .branchmiss_thrd(branchmiss_thrd), + .missid(missid), + .head0(heads[0]), + .thrd(iqentry_thrd), + .iqentry_v(iqentry_v), + .stomp(iqentry_stomp) +); +*/ +always @* +begin + iqentry_stomp <= 1'b0; + if (branchmiss) begin + for (n = 0; n < QENTRIES; n = n + 1) begin + if (iqentry_v[n] && iqentry_thrd[n]==branchmiss_thrd) begin + if (iqentry_sn[n] > iqentry_sn[missid[`QBITS]]) + iqentry_stomp[n] <= `TRUE; + end + end + end + /* + if (fcu_branchmiss) begin + for (n = 0; n < QENTRIES; n = n + 1) begin + if (iqentry_v[n] && iqentry_thrd[n]==fcu_thrd) begin + if (iqentry_sn[n] > iqentry_sn[fcu_id[`QBITS]]) + iqentry_stomp[n] <= `TRUE; + end + end + end + */ +end + +always @* +begin + stompedOnRets = 1'b0; + for (n = 0; n < QENTRIES; n = n + 1) + if (iqentry_stomp[n] && iqentry_ret[n]) + stompedOnRets = stompedOnRets + 4'd1; +end + +reg id1_vi, id2_vi, id3_vi; +wire [4:0] id1_ido, id2_ido, id3_ido; +wire id1_vo, id2_vo, id3_vo; +wire id1_clk, id2_clk, id3_clk; + +// Always at least one decoder +assign id1_clk = clk_i; +//BUFGCE uclkb2 +//( +// .I(clk_i), +// .CE(id1_available), +// .O(id1_clk) +//); + +FT64_idecoder uid1 +( + .clk(id1_clk), + .idv_i(id1_vi), + .id_i(id1_id), +`ifdef INLINE_DECODE + .instr(fetchbuf0_instr), + .Rt(Rt0[4:0]), + .predict_taken(predict_taken0), + .thrd(fetchbuf0_thrd), + .vl(vl), +`else + .instr(id1_instr), + .Rt(id1_Rt), + .predict_taken(id1_pt), + .thrd(id1_thrd), + .vl(id1_vl), +`endif +//ToDo: fix for vectors length and element number + .ven(id1_ven), + .bus(id1_bus), + .id_o(id1_ido), + .idv_o(id1_vo), + .debug_on(debug_on), + .pred_on(pred_on) +); +/* +`ifdef INLINE_DECODE + id1_Rt <= Rt0[4:0]; + id1_vl <= vl; + id1_ven <= venno; + id1_id <= tail; + id1_pt <= predict_taken0; + id1_thrd <= fetchbuf0_thrd; + setinsn1(tail,id1_bus); +`endif +*/ +generate begin : gIDUInst +if (`NUM_IDU > 1) begin +//BUFGCE uclkb3 +//( +// .I(clk_i), +// .CE(id2_available), +// .O(id2_clk) +//); +assign id2_clk = clk_i; + +FT64_idecoder uid2 +( + .clk(id2_clk), + .idv_i(id2_vi), + .id_i(id2_id), +`ifdef INLINE_DECODE + .instr(fetchbuf1_instr), + .Rt(Rt1[4:0]), + .predict_taken(predict_taken1), + .thrd(fetchbuf1_thrd), + .vl(vl), +`else + .instr(id2_instr), + .Rt(id2_Rt), + .predict_taken(id2_pt), + .thrd(id2_thrd), + .vl(id2_vl), +`endif + .ven(id2_ven), + .bus(id2_bus), + .id_o(id2_ido), + .idv_o(id2_vo), + .debug_on(debug_on), + .pred_on(pred_on) +); +end +if (`NUM_IDU > 2) begin +//BUFGCE uclkb4 +//( +// .I(clk_i), +// .CE(id3_available), +// .O(id3_clk) +//); +assign id3_clk = clk_i; + +FT64_idecoder uid2 +( + .clk(id3_clk), + .idv_i(id3_vi), + .id_i(id3_id), +`ifdef INLINE_DECODE + .instr(fetchbuf2_instr), + .Rt(Rt2[4:0]), + .predict_taken(predict_taken2), + .thrd(fetchbuf2_thrd), + .vl(vl), +`else + .instr(id3_instr), + .Rt(id3_Rt), + .predict_taken(id3_pt), + .thrd(id3_thrd), + .vl(id3_vl), +`endif + .ven(id3_ven), + .bus(id3_bus), + .id_o(id3_ido), + .idv_o(id3_vo), + .debug_on(debug_on), + .pred_on(pred_on) +); +end +end +endgenerate + +// +// EXECUTE +// +wire [15:0] lfsro; +lfsr #(16,16'hACE4) u1 (rst, clk, 1'b1, 1'b0, lfsro); + +reg [63:0] csr_r; +wire [11:0] csrno = alu0_instr[29:18]; +always @* +begin +`ifdef SUPPORT_SMT + if (csrno[11:10] >= ol[alu0_thrd]) +`else + if (csrno[11:10] >= ol) +`endif + casez(csrno[9:0]) + `CSR_CR0: csr_r <= cr0; + `CSR_HARTID: csr_r <= hartid; + `CSR_TICK: csr_r <= tick; + `CSR_PCR: csr_r <= pcr; + `CSR_PCR2: csr_r <= pcr2; + `CSR_PMR: csr_r <= pmr; + `CSR_WBRCD: csr_r <= wbrcd; + `CSR_SEMA: csr_r <= sema; + `CSR_KEYS: csr_r <= keys; + `CSR_TCB: csr_r <= tcb; + `CSR_FSTAT: csr_r <= {fp_rgs,fp_status}; +`ifdef SUPPORT_DBG + `CSR_DBAD0: csr_r <= dbg_adr0; + `CSR_DBAD1: csr_r <= dbg_adr1; + `CSR_DBAD2: csr_r <= dbg_adr2; + `CSR_DBAD3: csr_r <= dbg_adr3; + `CSR_DBCTRL: csr_r <= dbg_ctrl; + `CSR_DBSTAT: csr_r <= dbg_stat; +`endif + `CSR_CAS: csr_r <= cas; + `CSR_TVEC: csr_r <= tvec[csrno[2:0]]; + `CSR_BADADR: csr_r <= badaddr[{alu0_thrd,csrno[11:10]}]; + `CSR_BADINSTR: csr_r <= bad_instr[{alu0_thrd,csrno[11:10]}]; + `CSR_CAUSE: csr_r <= {48'd0,cause[{alu0_thrd,csrno[11:10]}]}; +`ifdef SUPPORT_SMT + `CSR_IM_STACK: csr_r <= im_stack[alu0_thrd]; + `CSR_OL_STACK: csr_r <= {dl_stack[alu0_thrd],ol_stack[alu0_thrd]}; + `CSR_PL_STACK: csr_r <= pl_stack[alu0_thrd]; + `CSR_RS_STACK: csr_r <= rs_stack[alu0_thrd]; + `CSR_STATUS: csr_r <= mstatus[alu0_thrd][63:0]; + `CSR_BRS_STACK: csr_r <= brs_stack[alu0_thrd]; + `CSR_EPC0: csr_r <= epc0[alu0_thrd]; + `CSR_EPC1: csr_r <= epc1[alu0_thrd]; + `CSR_EPC2: csr_r <= epc2[alu0_thrd]; + `CSR_EPC3: csr_r <= epc3[alu0_thrd]; + `CSR_EPC4: csr_r <= epc4[alu0_thrd]; + `CSR_EPC5: csr_r <= epc5[alu0_thrd]; + `CSR_EPC6: csr_r <= epc6[alu0_thrd]; + `CSR_EPC7: csr_r <= epc7[alu0_thrd]; +`else + `CSR_IM_STACK: csr_r <= im_stack; + `CSR_OL_STACK: csr_r <= {dl_stack,ol_stack}; + `CSR_PL_STACK: csr_r <= pl_stack; + `CSR_RS_STACK: csr_r <= rs_stack; + `CSR_STATUS: csr_r <= mstatus[63:0]; + `CSR_BRS_STACK: csr_r <= brs_stack; + `CSR_EPC0: csr_r <= epc0; + `CSR_EPC1: csr_r <= epc1; + `CSR_EPC2: csr_r <= epc2; + `CSR_EPC3: csr_r <= epc3; + `CSR_EPC4: csr_r <= epc4; + `CSR_EPC5: csr_r <= epc5; + `CSR_EPC6: csr_r <= epc6; + `CSR_EPC7: csr_r <= epc7; +`endif + `CSR_CODEBUF: csr_r <= codebuf[csrno[5:0]]; +`ifdef SUPPORT_BBMS + `CSR_TB: csr_r <= tb; + `CSR_CBL: csr_r <= cbl; + `CSR_CBU: csr_r <= cbu; + `CSR_RO: csr_r <= ro; + `CSR_DBL: csr_r <= dbl; + `CSR_DBU: csr_r <= dbu; + `CSR_SBL: csr_r <= sbl; + `CSR_SBU: csr_r <= sbu; + `CSR_ENU: csr_r <= en; +`endif +`ifdef SUPPORT_PREDICATION + `CSR_PREGS: read_pregs(csr_r); +`endif + `CSR_Q_CTR: csr_r <= iq_ctr; + `CSR_BM_CTR: csr_r <= bm_ctr; + `CSR_ICL_CTR: csr_r <= icl_ctr; + `CSR_IRQ_CTR: csr_r <= irq_ctr; + `CSR_TIME: csr_r <= wc_times; + `CSR_INFO: + case(csrno[3:0]) + 4'd0: csr_r <= "Finitron"; // manufacturer + 4'd1: csr_r <= " "; + 4'd2: csr_r <= "64 bit "; // CPU class + 4'd3: csr_r <= " "; + 4'd4: csr_r <= "FT64 "; // Name + 4'd5: csr_r <= " "; + 4'd6: csr_r <= 64'd1; // model # + 4'd7: csr_r <= 64'd1; // serial number + 4'd8: csr_r <= {32'd16384,32'd16384}; // cache sizes instruction,csr_ra + 4'd9: csr_r <= 64'd0; + default: csr_r <= 64'd0; + endcase + default: begin + $display("Unsupported CSR:%h",csrno[10:0]); + csr_r <= 64'hEEEEEEEEEEEEEEEE; + end + endcase + else + csr_r <= 64'h0; +end + +reg [63:0] alu0_xu = 1'd0, alu1_xu = 1'd0; + +`ifdef SUPPORT_BBMS + +`else +// This always block didn't work, it left the signals as X's. +// So they are set to zero where the reg declaration is. +// I'm guessing the @* says there's no variables on the right +// hand side, so I'm not going to evaluate it. +always @* + alu0_xs <= 64'd0; +always @* + alu1_xs <= 64'd0; +`endif + +//always @* +// read_csr(alu0_instr[29:18],csr_r,alu0_thrd); +FT64_alu #(.BIG(1'b1),.SUP_VECTOR(SUP_VECTOR)) ualu0 ( + .rst(rst), + .clk(clk), + .ld(alu0_ld), + .abort(alu0_abort), + .instr(alu0_instr), + .sz(alu0_sz), + .tlb(alu0_tlb), + .store(alu0_store), + .a(alu0_argA), + .b(alu0_argB), + .c(alu0_argC), + .pc(alu0_pc), +// .imm(alu0_argI), + .tgt(alu0_tgt), + .ven(alu0_ven), + .vm(vm[alu0_instr[25:23]]), + .csr(csr_r), + .o(alu0_out), + .ob(alu0b_bus), + .done(alu0_done), + .idle(alu0_idle), + .excen(aec[4:0]), + .exc(alu0_exc), + .thrd(alu0_thrd), + .mem(alu0_mem), + .shift(alu0_shft), // 48 bit shift inst. + .ol(ol), + .ASID(ASID), + .icl_i(icl_o), + .cyc_i(cyc), + .we_i(we), + .vadr_i(vadr), + .cyc_o(cyc_o), + .we_o(we_o), + .padr_o(adr_o), + .uncached(), + .tlb_miss(tlb_miss), + .exv_o(exv_i), + .wrv_o(wrv_i), + .rdv_o(rdv_i) +`ifdef SUPPORT_SEGMENTATION + , + .zs_base(zsx_base), + .ds_base(dsx_base), + .es_base(esx_base), + .fs_base(fsx_base), + .gs_base(gsx_base), + .hs_base(hsx_base), + .ss_base(ssx_base), + .cs_base(csx_base), + .zsub(zsub), + .dsub(dsub), + .esub(esub), + .fsub(fsub), + .gsub(gsub), + .hsub(hsub), + .ssub(ssub), + .csub(csub), + .zslb(zslb), + .dslb(dslb), + .eslb(eslb), + .fslb(fslb), + .gslb(gslb), + .hslb(hslb), + .sslb(sslb), + .cslb(cslb) +`endif +`ifdef SUPPORT_BBMS + .pb(dl==2'b00 ? 64'd0 : pb), + .cbl(cbl), + .cbu(cbu), + .ro(ro), + .dbl(dbl), + .dbu(dbu), + .sbl(sbl), + .sbu(sbu), + .en(en) +`endif +); +generate begin : gAluInst +if (`NUM_ALU > 1) begin +FT64_alu #(.BIG(1'b0),.SUP_VECTOR(SUP_VECTOR)) ualu1 ( + .rst(rst), + .clk(clk), + .ld(alu1_ld), + .abort(alu1_abort), + .instr(alu1_instr), + .sz(alu1_sz), + .tlb(1'b0), + .store(alu1_store), + .a(alu1_argA), + .b(alu1_argB), + .c(alu1_argC), + .pc(alu1_pc), + //.imm(alu1_argI), + .tgt(alu1_tgt), + .ven(alu1_ven), + .vm(vm[alu1_instr[25:23]]), + .csr(64'd0), + .o(alu1_out), + .ob(alu1b_bus), + .done(alu1_done), + .idle(alu1_idle), + .excen(aec[4:0]), + .exc(alu1_exc), + .thrd(1'b0), + .mem(alu1_mem), + .shift(alu1_shft), + .ol(2'b0), + .ASID(8'h0), + .cyc_i(1'b0), + .we_i(1'b0), + .vadr_i(64'd0), + .cyc_o(), + .we_o(), + .padr_o(), + .uncached(), + .tlb_miss(), + .exv_o(), + .wrv_o(), + .rdv_o() +`ifdef SUPPORT_SEGMENTATION + , + .zs_base(zsx_base), + .ds_base(dsx_base), + .es_base(esx_base), + .fs_base(fsx_base), + .gs_base(gsx_base), + .hs_base(hsx_base), + .ss_base(ssx_base), + .cs_base(csx_base), + .zsub(zsub), + .dsub(dsub), + .esub(esub), + .fsub(fsub), + .gsub(gsub), + .hsub(hsub), + .ssub(ssub), + .csub(csub), + .zslb(zslb), + .dslb(dslb), + .eslb(eslb), + .fslb(fslb), + .gslb(gslb), + .hslb(hslb), + .sslb(sslb), + .cslb(cslb) +`endif +`ifdef SUPPORT_BBMS + .pb(dl==2'b00 ? 64'd0 : pb), + .cbl(cbl), + .cbu(cbu), + .ro(ro), + .dbl(dbl), + .dbu(dbu), + .sbl(sbl), + .sbu(sbu), + .en(en) +`endif +); +end +end +endgenerate + +always @* +begin + alu0_cmt <= 1'b1; + alu1_cmt <= 1'b1; + fpu1_cmt <= 1'b1; + fpu2_cmt <= 1'b1; + fcu_cmt <= 1'b1; + + alu0_bus <= alu0_out; + alu1_bus <= alu1_out; + fpu1_bus <= fpu1_out; + fpu2_bus <= fpu2_out; + fcu_bus <= fcu_out; +end + +assign alu0_abort = 1'b0; +assign alu1_abort = 1'b0; + +generate begin : gFPUInst +if (`NUM_FPU > 0) begin +wire fpu1_clk; +//BUFGCE ufpc1 +//( +// .I(clk_i), +// .CE(fpu1_available), +// .O(fpu1_clk) +//); +assign fpu1_clk = clk_i; + +fpUnit ufp1 +( + .rst(rst), + .clk(fpu1_clk), + .clk4x(clk4x), + .ce(1'b1), + .ir(fpu1_instr), + .ld(fpu1_ld), + .a(fpu1_argA), + .b(fpu1_argB), + .imm(fpu1_argI), + .o(fpu1_out), + .csr_i(), + .status(fpu1_status), + .exception(), + .done(fpu1_done) +); +end +if (`NUM_FPU > 1) begin +wire fpu2_clk; +//BUFGCE ufpc2 +//( +// .I(clk_i), +// .CE(fpu2_available), +// .O(fpu2_clk) +//); +assign fpu2_clk = clk_i; +fpUnit ufp1 +( + .rst(rst), + .clk(fpu2_clk), + .clk4x(clk4x), + .ce(1'b1), + .ir(fpu2_instr), + .ld(fpu2_ld), + .a(fpu2_argA), + .b(fpu2_argB), + .imm(fpu2_argI), + .o(fpu2_out), + .csr_i(), + .status(fpu2_status), + .exception(), + .done(fpu2_done) +); +end +end +endgenerate + +assign fpu1_exc = (fpu1_available) ? + ((|fpu1_status[15:0]) ? `FLT_FLT : `FLT_NONE) : `FLT_UNIMP; +assign fpu2_exc = (fpu2_available) ? + ((|fpu2_status[15:0]) ? `FLT_FLT : `FLT_NONE) : `FLT_UNIMP; + +assign alu0_v = alu0_dataready, + alu1_v = alu1_dataready; +assign alu0_id = alu0_sourceid, + alu1_id = alu1_sourceid; +assign fpu1_v = fpu1_dataready; +assign fpu1_id = fpu1_sourceid; +assign fpu2_v = fpu2_dataready; +assign fpu2_id = fpu2_sourceid; + +`ifdef SUPPORT_SMT +wire [1:0] olm = ol[fcu_thrd]; +`else +wire [1:0] olm = ol; +`endif + +reg [`SNBITS] maxsn [0:`WAYS-1]; +always @* +begin + for (j = 0; j < `WAYS; j = j + 1) begin + maxsn[j] = 8'd0; + for (n = 0; n < QENTRIES; n = n + 1) + if (iqentry_sn[n] > maxsn[j] && iqentry_thrd[n]==j && iqentry_v[n]) + maxsn[j] = iqentry_sn[n]; + maxsn[j] = maxsn[j] - tosub; + end +end + +assign fcu_v = fcu_dataready; +assign fcu_id = fcu_sourceid; + +wire [4:0] fcmpo; +wire fnanx; +fp_cmp_unit #(64) ufcmp1 (fcu_argA, fcu_argB, fcmpo, fnanx); + +wire fcu_takb; + +always @* +begin + fcu_exc <= `FLT_NONE; + casez(fcu_instr[`INSTRUCTION_OP]) +`ifdef SUPPORT_SEGMENTATION + `LDCS: fcu_exc <= fcu_instr[31:8] != fcu_pc[63:40] ? `FLT_CS : `FLT_NONE; + `RET: fcu_exc <= fcu_argB[63:40] != fcu_pc[63:40] ? `FLT_RET : `FLT_NONE; +`endif +`ifdef SUPPORT_BBMS + `LFCS: fcu_exc <= currentCSSelector != fcu_instr[31:8] ? `FLT_CS : `FLT_NONE; + `RET: fcu_exc <= fcu_argB[63:40] != currentCSSelector ? `FLT_RET : `FLT_NONE; +`endif + `CHK: begin + if (fcu_instr[21]) + fcu_exc <= fcu_argA >= fcu_argB && fcu_argA < fcu_argC ? `FLT_NONE : `FLT_CHK; + end + `REX: + case(olm) + `OL_USER: fcu_exc <= `FLT_PRIV; + default: ; + endcase + default: fcu_exc <= `FLT_NONE; + endcase +end + +FT64_EvalBranch ube1 +( + .instr(fcu_instr), + .a(fcu_argA), + .b(fcu_argB), + .c(fcu_argC), + .takb(fcu_takb) +); + +FT64_FCU_Calc #(.AMSB(AMSB)) ufcuc1 +( + .ol(olm), + .instr(fcu_instr), + .tvec(tvec[fcu_instr[14:13]]), + .a(fcu_argA), + .pc(fcu_pc), + .nextpc(fcu_nextpc), + .im(im), + .waitctr(waitctr), + .bus(fcu_out) +); + +wire will_clear_branchmiss = branchmiss && ((fetchbuf0_v && fetchbuf0_pc==misspc) || (fetchbuf1_v && fetchbuf1_pc==misspc)); + +always @* +begin +case(fcu_instr[`INSTRUCTION_OP]) +`R2: fcu_misspc = fcu_argB; // RTI (we don't bother fully decoding this as it's the only R2) +`RET: fcu_misspc = fcu_argB; +`REX: fcu_misspc = fcu_bus; +`BRK: fcu_misspc = {tvec[0][AMSB:8], 1'b0, olm, 5'h0}; +`JAL: fcu_misspc = fcu_argA + fcu_argI; +//`CHK: fcu_misspc = fcu_nextpc + fcu_argI; // Handled as an instruction exception +// Default: branch +default: fcu_misspc = fcu_takb ? {fcu_pc[31:8] + fcu_brdisp[31:8],fcu_brdisp[7:0]} : fcu_nextpc; +endcase +fcu_misspc[0] = 1'b0; +end + +// To avoid false branch mispredicts the branch isn't evaluated until the +// following instruction queues. The address of the next instruction is +// looked at to see if the BTB predicted correctly. + +wire fcu_brk_miss = fcu_brk || fcu_rti; +`ifdef FCU_ENH +wire fcu_ret_miss = fcu_ret && (fcu_argB != iqentry_pc[nid]); +wire fcu_jal_miss = fcu_jal && (fcu_argA + fcu_argI != iqentry_pc[nid]); +wire fcu_followed = iqentry_sn[nid] > iqentry_sn[fcu_id[`QBITS]]; +`else +wire fcu_ret_miss = fcu_ret; +wire fcu_jal_miss = fcu_jal; +wire fcu_followed = `TRUE; +`endif +always @* +if (fcu_v) begin + // Break and RTI switch register sets, and so are always treated as a branch miss in order to + // flush the pipeline. Hardware interrupts also stream break instructions so they need to + // flushed from the queue so the interrupt is recognized only once. + // BRK and RTI are handled as excmiss types which are processed during the commit stage. + if (fcu_brk_miss) + fcu_branchmiss = TRUE; + else if (fcu_branch && (fcu_takb ^ fcu_pt)) + fcu_branchmiss = TRUE; + else +`ifdef SUPPORT_SMT + if (fcu_instr[`INSTRUCTION_OP] == `REX && (im < ~ol[fcu_thrd])) +`else + if (fcu_instr[`INSTRUCTION_OP] == `REX && (im < ~ol)) +`endif + fcu_branchmiss = TRUE; + else if (fcu_ret_miss) + fcu_branchmiss = TRUE; + else if (fcu_jal_miss) + fcu_branchmiss = TRUE; + else if (fcu_instr[`INSTRUCTION_OP] == `CHK && ~fcu_takb) + fcu_branchmiss = TRUE; + else + fcu_branchmiss = FALSE; +end +else + fcu_branchmiss = FALSE; + +FT64_RMW_alu urmwalu0 (rmw_instr, rmw_argA, rmw_argB, rmw_argC, rmw_res); + + +// +// additional DRAM-enqueue logic + +assign dram_avail = (dram0 == `DRAMSLOT_AVAIL || dram1 == `DRAMSLOT_AVAIL || dram2 == `DRAMSLOT_AVAIL); + +always @* +for (n = 0; n < QENTRIES; n = n + 1) + iqentry_memopsvalid[n] <= (iqentry_mem[n] && (iqentry_store[n] ? iqentry_a2_v[n] : 1'b1) && iqentry_state[n]==IQS_AGEN); + +always @* +for (n = 0; n < QENTRIES; n = n + 1) + iqentry_memready[n] <= (iqentry_v[n] & iqentry_iv[n] & iqentry_memopsvalid[n] & ~iqentry_memissue[n] & ~iqentry_stomp[n]); + +assign outstanding_stores = (dram0 && dram0_store) || + (dram1 && dram1_store) || + (dram2 && dram2_store); + +// +// additional COMMIT logic +// +always @* +begin + commit0_v <= (iqentry_state[heads[0]] == IQS_CMT && ~|panic); + commit0_id <= {iqentry_mem[heads[0]], heads[0]}; // if a memory op, it has a DRAM-bus id + commit0_tgt <= iqentry_tgt[heads[0]]; + commit0_we <= iqentry_we[heads[0]]; + commit0_bus <= iqentry_res[heads[0]]; + if (`NUM_CMT > 1) begin + commit1_v <= ({iqentry_v[heads[0]], iqentry_state[heads[0]] == IQS_CMT} != 2'b10 + && iqentry_state[heads[1]] == IQS_CMT + && ~|panic); + commit1_id <= {iqentry_mem[heads[1]], heads[1]}; + commit1_tgt <= iqentry_tgt[heads[1]]; + commit1_we <= iqentry_we[heads[1]]; + commit1_bus <= iqentry_res[heads[1]]; + // Need to set commit1, and commit2 valid bits for the branch predictor. + if (`NUM_CMT > 2) begin + end + else begin + commit2_v <= ({iqentry_v[heads[0]], iqentry_state[heads[0]] == IQS_CMT} != 2'b10 + && {iqentry_v[heads[1]], iqentry_state[heads[1]] == IQS_CMT} != 2'b10 + && {iqentry_v[heads[2]], iqentry_br[heads[2]], iqentry_state[heads[2]] == IQS_CMT}==3'b111 + && iqentry_tgt[heads[2]][4:0]==5'd0 && ~|panic); // watch out for dbnz and ibne + commit2_tgt <= 12'h000; + commit2_we <= 8'h00; + end + end + else begin + commit1_v <= ({iqentry_v[heads[0]], iqentry_state[heads[0]] == IQS_CMT} != 2'b10 + && {iqentry_v[heads[1]], iqentry_state[heads[1]] == IQS_CMT} == 2'b11 + && !iqentry_rfw[heads[1]] && ~|panic); // watch out for dbnz and ibne + commit1_id <= {iqentry_mem[heads[1]], heads[1]}; // if a memory op, it has a DRAM-bus id + commit1_tgt <= 12'h000; + commit1_we <= 8'h00; + // We don't really need the bus value since nothing is being written. + commit1_bus <= iqentry_res[heads[1]]; + commit2_v <= ({iqentry_v[heads[0]], iqentry_state[heads[0]] == IQS_CMT} != 2'b10 + && {iqentry_v[heads[1]], iqentry_state[heads[1]] == IQS_CMT} != 2'b10 + && {iqentry_v[heads[2]], iqentry_br[heads[2]], iqentry_state[heads[2]] == IQS_CMT}==3'b111 + && !iqentry_rfw[heads[2]] && ~|panic); // watch out for dbnz and ibne + commit2_id <= {iqentry_mem[heads[2]], heads[2]}; // if a memory op, it has a DRAM-bus id + commit2_tgt <= 12'h000; + commit2_we <= 8'h00; + commit2_bus <= iqentry_res[heads[2]]; + end +end + +assign int_commit = (commit0_v && iqentry_irq[heads[0]]) + || (commit0_v && commit1_v && iqentry_irq[heads[1]] && `NUM_CMT > 1) + || (commit0_v && commit1_v && commit2_v && iqentry_irq[heads[2]] && `NUM_CMT > 2); + +// Detect if a given register will become valid during the current cycle. +// We want a signal that is active during the current clock cycle for the read +// through register file, which trims a cycle off register access for every +// instruction. But two different kinds of assignment statements can't be +// placed under the same always block, it's a bad practice and may not work. +// So a signal is created here with it's own always block. +reg [AREGS-1:0] regIsValid; +always @* +begin + for (n = 1; n < AREGS; n = n + 1) + begin + regIsValid[n] = rf_v[n]; + if (branchmiss) + if (~livetarget[n]) begin + if (branchmiss_thrd) begin + if (n >= 128) + regIsValid[n] = `VAL; + end + else begin + if (n < 128) + regIsValid[n] = `VAL; + end + end + if (commit0_v && n=={commit0_tgt[7:0]}) + regIsValid[n] = regIsValid[n] | ((rf_source[ {commit0_tgt[7:0]} ] == commit0_id) + || (branchmiss && branchmiss_thrd == iqentry_thrd[commit0_id[`QBITS]] && iqentry_source[ commit0_id[`QBITS] ])); + if (commit1_v && n=={commit1_tgt[7:0]} && `NUM_CMT > 1) + regIsValid[n] = regIsValid[n] | ((rf_source[ {commit1_tgt[7:0]} ] == commit1_id) + || (branchmiss && branchmiss_thrd == iqentry_thrd[commit1_id[`QBITS]] && iqentry_source[ commit1_id[`QBITS] ])); + if (commit2_v && n=={commit2_tgt[7:0]} && `NUM_CMT > 2) + regIsValid[n] = regIsValid[n] | ((rf_source[ {commit2_tgt[7:0]} ] == commit2_id) + || (branchmiss && branchmiss_thrd == iqentry_thrd[commit2_id[`QBITS]] && iqentry_source[ commit2_id[`QBITS] ])); + end + regIsValid[0] = `VAL; + regIsValid[32] = `VAL; + regIsValid[64] = `VAL; + regIsValid[96] = `VAL; +`ifdef SMT + regIsValid[128] = `VAL; + regIsValid[160] = `VAL; + regIsValid[192] = `VAL; + regIsValid[224] = `VAL; +`endif +end + +// Wait until the cycle after Ra becomes valid to give time to read +// the vector element from the register file. +reg rf_vra0, rf_vra1; +/*always @(posedge clk) + rf_vra0 <= regIsValid[Ra0s]; +always @(posedge clk) + rf_vra1 <= regIsValid[Ra1s]; +*/ +// Check how many instructions can be queued. This might be fewer than the +// number ready to queue from the fetch stage if queue slots aren't +// available or if there are no more physical registers left for remapping. +// The fetch stage needs to know how many instructions will queue so this +// logic is placed here. +// NOPs are filtered out and do not enter the instruction queue. The core +// will stream NOPs on a cache miss and they would mess up the queue order +// if there are immediate prefixes in the queue. +// For the VEX instruction, the instruction can't queue until register Ra +// is valid, because register Ra is used to specify the vector element to +// read. +wire q2open = iqentry_v[tail0]==`INV && iqentry_v[tail1]==`INV; +wire q3open = iqentry_v[tail0]==`INV && iqentry_v[tail1]==`INV && iqentry_v[(tail1 + 2'd1) % QENTRIES]==`INV; +always @* +begin + canq1 <= FALSE; + canq2 <= FALSE; + queued1 <= FALSE; + queued2 <= FALSE; + queuedNop <= FALSE; + vqueued2 <= FALSE; + if (!branchmiss) begin + // Two available + if (fetchbuf1_v & fetchbuf0_v) begin + // Is there a pair of NOPs ? (cache miss) + if ((fetchbuf0_instr[`INSTRUCTION_OP]==`NOP) && (fetchbuf1_instr[`INSTRUCTION_OP]==`NOP)) + queuedNop <= TRUE; + else begin + // If it's a predicted branch queue only the first instruction, the second + // instruction will be stomped on. + if (take_branch0 && fetchbuf1_thrd==fetchbuf0_thrd) begin + if (iqentry_v[tail0]==`INV) begin + canq1 <= TRUE; + queued1 <= TRUE; + end + end + // This is where a single NOP is allowed through to simplify the code. A + // single NOP can't be a cache miss. Otherwise it would be necessary to queue + // fetchbuf1 on tail0 it would add a nightmare to the enqueue code. + // Not a branch and there are two instructions fetched, see whether or not + // both instructions can be queued. + else begin + if (iqentry_v[tail0]==`INV) begin + canq1 <= !IsVex(fetchbuf0_instr) || rf_vra0 || !SUP_VECTOR; + queued1 <= ( + ((!IsVex(fetchbuf0_instr) || rf_vra0) && (!IsVector(fetchbuf0_instr))) || !SUP_VECTOR); + if (iqentry_v[tail1]==`INV) begin + canq2 <= ((!IsVex(fetchbuf1_instr) || rf_vra1)) || !SUP_VECTOR; + queued2 <= ( + (!IsVector(fetchbuf1_instr) && (!IsVex(fetchbuf1_instr) || rf_vra1) && (!IsVector(fetchbuf0_instr))) || !SUP_VECTOR); + vqueued2 <= IsVector(fetchbuf0_instr) && vqe0 < vl-2 && !vechain; + end + end + // If an irq is active during a vector instruction fetch, claim the vector instruction + // is finished queueing even though it may not be. It'll pick up where it left off after + // the exception is processed. + if (freezePC) begin + if (IsVector(fetchbuf0_instr) && IsVector(fetchbuf1_instr) && vechain) begin + queued1 <= TRUE; + queued2 <= TRUE; + end + else if (IsVector(fetchbuf0_instr)) begin + queued1 <= TRUE; + if (vqe0 < vl-2) + queued2 <= TRUE; + else + queued2 <= iqentry_v[tail1]==`INV; + end + end + end + end + end + // One available + else if (fetchbuf0_v) begin + if (fetchbuf0_instr[`INSTRUCTION_OP]!=`NOP) begin + if (iqentry_v[tail0]==`INV) begin + canq1 <= !IsVex(fetchbuf0_instr) || rf_vra0 || !SUP_VECTOR; + queued1 <= + (((!IsVex(fetchbuf0_instr) || rf_vra0) && (!IsVector(fetchbuf0_instr))) || !SUP_VECTOR); + end + if (iqentry_v[tail1]==`INV) begin + canq2 <= IsVector(fetchbuf0_instr) && vqe0 < vl-2 && SUP_VECTOR; + vqueued2 <= IsVector(fetchbuf0_instr) && vqe0 < vl-2 && !vechain; + end + if (freezePC) begin + if (IsVector(fetchbuf0_instr)) begin + queued1 <= TRUE; + if (vqe0 < vl-2) + queued2 <= iqentry_v[tail1]==`INV; + end + end + end + else + queuedNop <= TRUE; + end + else if (fetchbuf1_v) begin + if (fetchbuf1_instr[`INSTRUCTION_OP]!=`NOP) begin + if (iqentry_v[tail0]==`INV) begin + canq1 <= !IsVex(fetchbuf1_instr) || rf_vra1 || !SUP_VECTOR; + queued1 <= ( + ((!IsVex(fetchbuf1_instr) || rf_vra1) && (!IsVector(fetchbuf1_instr))) || !SUP_VECTOR); + end + if (iqentry_v[tail1]==`INV) begin + canq2 <= IsVector(fetchbuf1_instr) && vqe1 < vl-2 && SUP_VECTOR; + vqueued2 <= IsVector(fetchbuf1_instr) && vqe1 < vl-2; + end + if (freezePC) begin + if (IsVector(fetchbuf1_instr)) begin + queued1 <= TRUE; + if (vqe1 < vl-2) + queued2 <= iqentry_v[tail1]==`INV; + end + end + end + else + queuedNop <= TRUE; + end + //else no instructions available to queue + end + else begin + // One available + if (fetchbuf0_v && fetchbuf0_thrd != branchmiss_thrd) begin + if (fetchbuf0_instr[`INSTRUCTION_OP]!=`NOP) begin + if (iqentry_v[tail0]==`INV) begin + canq1 <= !IsVex(fetchbuf0_instr) || rf_vra0 || !SUP_VECTOR; + queued1 <= ( + ((!IsVex(fetchbuf0_instr) || rf_vra0) && (!IsVector(fetchbuf0_instr))) || !SUP_VECTOR); + end + if (iqentry_v[tail1]==`INV) begin + canq2 <= IsVector(fetchbuf0_instr) && vqe0 < vl-2 && SUP_VECTOR; + vqueued2 <= IsVector(fetchbuf0_instr) && vqe0 < vl-2 && !vechain; + end + end + else + queuedNop <= TRUE; + end + else if (fetchbuf1_v && fetchbuf1_thrd != branchmiss_thrd) begin + if (fetchbuf1_instr[`INSTRUCTION_OP]!=`NOP) begin + if (iqentry_v[tail0]==`INV) begin + canq1 <= !IsVex(fetchbuf1_instr) || rf_vra1 || !SUP_VECTOR; + queued1 <= ( + ((!IsVex(fetchbuf1_instr) || rf_vra1) && (!IsVector(fetchbuf1_instr))) || !SUP_VECTOR); + end + if (iqentry_v[tail1]==`INV) begin + canq2 <= IsVector(fetchbuf1_instr) && vqe1 < vl-2 && SUP_VECTOR; + vqueued2 <= IsVector(fetchbuf0_instr) && vqe0 < vl-2 && !vechain; + end + end + else + queuedNop <= TRUE; + end +// else +// queuedNop <= TRUE; + end +end + +// +// Branchmiss seems to be sticky sometimes during simulation. For instance branch miss +// and cache miss at same time. The branchmiss should clear before the core continues +// so the positive edge is detected to avoid incrementing the sequnce number too many +// times. +wire pebm; +edge_det uedbm (.rst(rst), .clk(clk), .ce(1'b1), .i(branchmiss), .pe(pebm), .ne(), .ee() ); + +reg [5:0] ld_time; +reg [63:0] wc_time_dat; +reg [63:0] wc_times; +always @(posedge tm_clk_i) +begin + if (|ld_time) + wc_time <= wc_time_dat; + else begin + wc_time[31:0] <= wc_time[31:0] + 32'd1; + if (wc_time[31:0] >= TM_CLKFREQ-1) begin + wc_time[31:0] <= 32'd0; + wc_time[63:32] <= wc_time[63:32] + 32'd1; + end + end +end + +wire writing_wb = + (mem1_available && dram0==`DRAMSLOT_BUSY && dram0_store && !iqentry_stomp[dram0_id[`QBITS]] && wbptr<`WB_DEPTH-1) + || (mem2_available && dram1==`DRAMSLOT_BUSY && dram1_store && !iqentry_stomp[dram1_id[`QBITS]] && `NUM_MEM > 1 && wbptr<`WB_DEPTH-1) + || (mem3_available && dram2==`DRAMSLOT_BUSY && dram2_store && !iqentry_stomp[dram2_id[`QBITS]] && `NUM_MEM > 2 && wbptr<`WB_DEPTH-1) + ; + +// Monster clock domain. +// Like to move some of this to clocking under different always blocks in order +// to help out the toolset's synthesis, but it ain't gonna be easy. +// Simulation doesn't like it if things are under separate always blocks. +// Synthesis doesn't like it if things are under the same always block. + +//always @(posedge clk) +//begin +// branchmiss <= excmiss|fcu_branchmiss; +// misspc <= excmiss ? excmisspc : fcu_misspc; +// missid <= excmiss ? (|iqentry_exc[heads[0]] ? heads[0] : heads[1]) : fcu_sourceid; +// branchmiss_thrd <= excmiss ? excthrd : fcu_thrd; +//end +wire alu0_done_pe, alu1_done_pe, pe_wait; +edge_det uedalu0d (.clk(clk), .ce(1'b1), .i(alu0_done), .pe(alu0_done_pe), .ne(), .ee()); +edge_det uedalu1d (.clk(clk), .ce(1'b1), .i(alu1_done), .pe(alu1_done_pe), .ne(), .ee()); +edge_det uedwait1 (.clk(clk), .ce(1'b1), .i((waitctr==48'd1) || signal_i[fcu_argA[4:0]|fcu_argI[4:0]]), .pe(pe_wait), .ne(), .ee()); + +// Bus randomization to mitigate meltdown attacks +wire [63:0] ralu0_bus = |alu0_exc ? {4{lfsro}} : alu0_bus; +wire [63:0] ralu1_bus = |alu1_exc ? {4{lfsro}} : alu1_bus; +wire [63:0] rfpu1_bus = |fpu1_exc ? {4{lfsro}} : fpu1_bus; +wire [63:0] rfpu2_bus = |fpu2_exc ? {4{lfsro}} : fpu2_bus; +wire [63:0] rfcu_bus = |fcu_exc ? {4{lfsro}} : fcu_bus; +wire [63:0] rdramA_bus = |dramA_exc ? {4{lfsro}} : dramA_bus; +wire [63:0] rdramB_bus = |dramB_exc ? {4{lfsro}} : dramB_bus; +wire [63:0] rdramC_bus = |dramC_exc ? {4{lfsro}} : dramC_bus; + +always @(posedge clk) +if (rst) begin +`ifdef SUPPORT_SMT + mstatus[0] <= 64'h4000F; // select register set #16 for thread 0 + mstatus[1] <= 64'h4800F; // select register set #18 for thread 1 + rs_stack[0] <= 64'd16; + brs_stack[0] <= 64'd16; + rs_stack[1] <= 64'd18; + brs_stack[1] <= 64'd18; +`else + mstatus <= 64'h4000F; // select register set #16 for thread 0 + rs_stack <= 64'd16; + brs_stack <= 64'd16; +`endif + for (n = 0; n < QENTRIES; n = n + 1) begin + iqentry_state[n] <= IQS_INVALID; + iqentry_iv[n] <= `INV; + iqentry_is[n] <= 3'b00; + iqentry_sn[n] <= 4'd0; + iqentry_pt[n] <= FALSE; + iqentry_bt[n] <= FALSE; + iqentry_br[n] <= FALSE; + iqentry_aq[n] <= FALSE; + iqentry_rl[n] <= FALSE; + iqentry_alu0[n] <= FALSE; + iqentry_alu[n] <= FALSE; + iqentry_fpu[n] <= FALSE; + iqentry_fsync[n] <= FALSE; + iqentry_fc[n] <= FALSE; + iqentry_takb[n] <= FALSE; + iqentry_jmp[n] <= FALSE; + iqentry_jal[n] <= FALSE; + iqentry_ret[n] <= FALSE; + iqentry_brk[n] <= FALSE; + iqentry_irq[n] <= FALSE; + iqentry_rti[n] <= FALSE; + iqentry_ldcmp[n] <= FALSE; + iqentry_load[n] <= FALSE; + iqentry_rtop[n] <= FALSE; + iqentry_sei[n] <= FALSE; + iqentry_shft[n] <= FALSE; + iqentry_sync[n] <= FALSE; + iqentry_ven[n] <= 6'd0; + iqentry_vl[n] <= 8'd0; + iqentry_we[n] <= 8'h00; + iqentry_rfw[n] <= FALSE; + iqentry_rmw[n] <= FALSE; + iqentry_pc[n] <= RSTPC; + iqentry_instr[n] <= `NOP_INSN; + iqentry_insln[n] <= 3'd4; + iqentry_preload[n] <= FALSE; + iqentry_mem[n] <= FALSE; + iqentry_memndx[n] <= FALSE; + iqentry_memissue[n] <= FALSE; + iqentry_mem_islot[n] <= 3'd0; + iqentry_memdb[n] <= FALSE; + iqentry_memsb[n] <= FALSE; + iqentry_tgt[n] <= 6'd0; + iqentry_imm[n] <= 1'b0; + iqentry_ma[n] <= 1'b0; + iqentry_a0[n] <= 64'd0; + iqentry_a1[n] <= 64'd0; + iqentry_a2[n] <= 64'd0; + iqentry_a3[n] <= 64'd0; + iqentry_a1_v[n] <= `INV; + iqentry_a2_v[n] <= `INV; + iqentry_a3_v[n] <= `INV; + iqentry_a1_s[n] <= 5'd0; + iqentry_a2_s[n] <= 5'd0; + iqentry_a3_s[n] <= 5'd0; +`ifdef SUPPORT_PREDICATION + iqentry_aT[n] <= 64'd0; + iqentry_aT_s[n] <= 1'd0; +`endif + iqentry_canex[n] <= FALSE; + end + bwhich <= 2'b00; + dram0 <= `DRAMSLOT_AVAIL; + dram1 <= `DRAMSLOT_AVAIL; + dram2 <= `DRAMSLOT_AVAIL; + dram0_instr <= `NOP_INSN; + dram1_instr <= `NOP_INSN; + dram2_instr <= `NOP_INSN; + dram0_addr <= 32'h0; + dram1_addr <= 32'h0; + dram2_addr <= 32'h0; + dram0_id <= 1'b0; + dram1_id <= 1'b0; + dram2_id <= 1'b0; + L1_adr <= RSTPC; + invic <= FALSE; + tail0 <= 3'd0; + tail1 <= 3'd1; + for (n = 0; n < QENTRIES; n = n + 1) + heads[n] <= n; + panic = `PANIC_NONE; + alu0_dataready <= 1'b0; + alu1_dataready <= 1'b0; + alu0_sourceid <= 5'd0; + alu1_sourceid <= 5'd0; +`define SIM_ +`ifdef SIM_ + alu0_pc <= RSTPC; + alu0_instr <= `NOP_INSN; + alu0_argA <= 64'h0; + alu0_argB <= 64'h0; + alu0_argC <= 64'h0; + alu0_argI <= 64'h0; + alu0_mem <= 1'b0; + alu0_shft <= 1'b0; + alu0_thrd <= 1'b0; + alu0_tgt <= 6'h00; + alu0_ven <= 6'd0; + alu1_pc <= RSTPC; + alu1_instr <= `NOP_INSN; + alu1_argA <= 64'h0; + alu1_argB <= 64'h0; + alu1_argC <= 64'h0; + alu1_argI <= 64'h0; + alu1_mem <= 1'b0; + alu1_shft <= 1'b0; + alu1_thrd <= 1'b0; + alu1_tgt <= 6'h00; + alu1_ven <= 6'd0; +`endif + fcu_dataready <= 0; + fcu_instr <= `NOP_INSN; + dramA_v <= 0; + dramB_v <= 0; + dramC_v <= 0; + I <= 0; + CC <= 0; + icstate <= IDLE; + bstate <= BIDLE; + tick <= 64'd0; + ol_o <= 2'b0; + bte_o <= 2'b00; + cti_o <= 3'b000; + cyc <= `LOW; + stb_o <= `LOW; + we <= `LOW; + sel_o <= 8'h00; + dat_o <= 64'hFFFFFFFFFFFFFFFF; + sr_o <= `LOW; + cr_o <= `LOW; + vadr <= RSTPC; + icl_o <= `LOW; // instruction cache load + cr0 <= 64'd0; + cr0[13:8] <= 6'd0; // select compressed instruction group #0 + cr0[30] <= TRUE; // enable data caching + cr0[32] <= TRUE; // enable branch predictor + cr0[16] <= 1'b0; // disable SMT + cr0[17] <= 1'b0; // sequence number reset = 1 + cr0[34] <= FALSE; // write buffer merging enable + cr0[35] <= TRUE; // load speculation enable + pcr <= 32'd0; + pcr2 <= 64'd0; + for (n = 0; n < PREGS; n = n + 1) begin + rf_v[n] <= `VAL; + rf_source[n] <= {`QBIT{1'b1}}; + end + fp_rm <= 3'd0; // round nearest even - default rounding mode + fpu_csr[37:32] <= 5'd31; // register set #31 + waitctr <= 48'd0; + for (n = 0; n < 16; n = n + 1) begin + badaddr[n] <= 64'd0; + bad_instr[n] <= `NOP_INSN; + end + // Vector + vqe0 <= 6'd0; + vqet0 <= 6'd0; + vqe1 <= 6'd0; + vqet1 <= 6'd0; + vl <= 7'd62; + for (n = 0; n < 8; n = n + 1) + vm[n] <= 64'h7FFFFFFFFFFFFFFF; + nop_fetchbuf <= 4'h0; + fcu_done <= `TRUE; + sema <= 64'h0; + tvec[0] <= RSTPC; + pmr <= 64'hFFFFFFFFFFFFFFFF; + pmr[0] <= `ID1_AVAIL; + pmr[1] <= `ID2_AVAIL; + pmr[2] <= `ID3_AVAIL; + pmr[8] <= `ALU0_AVAIL; + pmr[9] <= `ALU1_AVAIL; + pmr[16] <= `FPU1_AVAIL; + pmr[17] <= `FPU2_AVAIL; + pmr[24] <= `MEM1_AVAIL; + pmr[25] <= `MEM2_AVAIL; + pmr[26] <= `MEM3_AVAIL; + pmr[32] <= `FCU_AVAIL; + for (n = 0; n < `WB_DEPTH; n = n + 1) begin + wb_v[n] <= 1'b0; + wb_rmw[n] <= 1'b0; + wb_id[n] <= {QENTRIES{1'b0}}; + wb_ol[n] <= 2'b00; + wb_sel[n] <= 8'h00; + wb_addr[n] <= 32'd0; + wb_data[n] <= 64'd0; + end + wb_en <= `TRUE; + wbo_id <= {QENTRIES{1'b0}}; + wbptr <= 2'd0; +`ifdef SIM + wb_merges <= 32'd0; +`endif + iq_ctr <= 40'd0; + icl_ctr <= 40'd0; + bm_ctr <= 40'd0; + irq_ctr <= 40'd0; + cmt_timer <= 9'd0; + StoreAck1 <= `FALSE; + keys <= 64'h0; +`ifdef SUPPORT_DBG + dbg_ctrl <= 64'h0; +`endif +/* Initialized with initial begin above +`ifdef SUPPORT_BBMS + for (n = 0; n < 64; n = n + 1) begin + thrd_handle[n] <= 16'h0; + prg_base[n] <= 64'h0; + cl_barrier[n] <= 64'h0; + cu_barrier[n] <= 64'hFFFFFFFFFFFFFFFF; + ro_barrier[n] <= 64'h0; + dl_barrier[n] <= 64'h0; + du_barrier[n] <= 64'hFFFFFFFFFFFFFFFF; + sl_barrier[n] <= 64'h0; + su_barrier[n] <= 64'hFFFFFFFFFFFFFFFF; + end +`endif +*/ +end +else begin + if (|fb_panic) + panic <= fb_panic; + + // Only one branchmiss is allowed to be processed at a time. If a second + // branchmiss occurs while the first is being processed, it would have + // to of occurred as a speculation in the branch shadow of the first. + // The second instruction would be stomped on by the first branchmiss so + // there is no need to process it. + // The branchmiss has to be latched, then cleared later as there could + // be a cache miss at the same time meaning the switch to the new pc + // does not take place immediately. + if (!branchmiss) begin + if (excmiss) begin + branchmiss <= `TRUE; + misspc <= excmisspc; + missid <= (|iqentry_exc[heads[0]] ? heads[0] : heads[1]); + branchmiss_thrd <= excthrd; + end + else if (fcu_branchmiss) begin + branchmiss <= `TRUE; + misspc <= fcu_misspc; + missid <= fcu_sourceid; + branchmiss_thrd <= fcu_thrd; + end + end + // Clear a branch miss when target instruction is fetched. + if (will_clear_branchmiss) begin + branchmiss <= `FALSE; + end + + // The following signals only pulse + + // Instruction decode output should only pulse once for a queue entry. We + // want the decode to be invalidated after a clock cycle so that it isn't + // inadvertently used to update the queue at a later point. + dramA_v <= `INV; + dramB_v <= `INV; + dramC_v <= `INV; + id1_vi <= `INV; + if (`NUM_IDU > 1) + id2_vi <= `INV; + if (`NUM_IDU > 2) + id3_vi <= `INV; + wb_shift <= FALSE; + ld_time <= {ld_time[4:0],1'b0}; + wc_times <= wc_time; + rf_vra0 <= regIsValid[Ra0s]; + rf_vra1 <= regIsValid[Ra1s]; + if (vqe0 >= vl) begin + vqe0 <= 6'd0; + vqet0 <= 6'h0; + end + if (vqe1 >= vl) begin + vqe1 <= 6'd0; + vqet1 <= 6'h0; + end + // Turn off vector chaining indicator when chained instructions are done. + if ((vqe0 >= vl || vqe0==6'd0) && (vqe1 >= vl || vqe1==6'd0)) +`ifdef SUPPORT_SMT + mstatus[0][32] <= 1'b0; +`else + mstatus[32] <= 1'b0; +`endif + + nop_fetchbuf <= 4'h0; + excmiss <= FALSE; + invic <= FALSE; + tick <= tick + 64'd1; + alu0_ld <= FALSE; + alu1_ld <= FALSE; + fpu1_ld <= FALSE; + fpu2_ld <= FALSE; + fcu_ld <= FALSE; + cr0[17] <= 1'b0; + if (waitctr != 48'd0) + waitctr <= waitctr - 4'd1; + + + if (iqentry_fc[fcu_id[`QBITS]] && iqentry_v[fcu_id[`QBITS]] && !iqentry_done[fcu_id[`QBITS]] && iqentry_out[fcu_id[`QBITS]]) + fcu_timeout <= fcu_timeout + 8'd1; + + if (branchmiss) begin + for (n = 1; n < PREGS; n = n + 1) + if (~livetarget[n]) begin + if (branchmiss_thrd) begin + if (n >= 128) + rf_v[n] <= `VAL; + end + else begin + if (n < 128) + rf_v[n] <= `VAL; + end + end + for (n = 0; n < QENTRIES; n = n + 1) + if (|iqentry_latestID[n]) + if (iqentry_thrd[n]==branchmiss_thrd) rf_source[ {iqentry_tgt[n][7:0]} ] <= { 1'b0, iqentry_mem[n], n[`QBITS] }; + end + + // The source for the register file data might have changed since it was + // placed on the commit bus. So it's needed to check that the source is + // still as expected to validate the register. + if (commit0_v) begin + if (!rf_v[ {commit0_tgt[7:0]} ]) begin +// rf_v[ {commit0_tgt[7:0]} ] <= rf_source[ commit0_tgt[7:0] ] == commit0_id || (branchmiss && iqentry_source[ commit0_id[`QBITS] ]); + rf_v[ {commit0_tgt[7:0]} ] <= regIsValid[{commit0_tgt[7:0]}];//rf_source[ commit0_tgt[4:0] ] == commit0_id || (branchmiss && iqentry_source[ commit0_id[`QBITS] ]); + if (regIsValid[{commit0_tgt[7:0]}]) + rf_source[{commit0_tgt[7:0]}] <= {`QBIT{1'b1}}; + end + if (commit0_tgt[5:0] != 6'd0) $display("r%d <- %h v[%d]<-%d", commit0_tgt, commit0_bus, regIsValid[commit0_tgt[5:0]], + rf_source[ {commit0_tgt[7:0]} ] == commit0_id || (branchmiss && iqentry_source[ commit0_id[`QBITS] ])); + if (commit0_tgt[5:0]==6'd30 && commit0_bus==64'd0) + $display("FP <= 0"); + end + if (commit1_v && `NUM_CMT > 1) begin + if (!rf_v[ {commit1_tgt[7:0]} ]) begin + if ({commit1_tgt[7:0]}=={commit0_tgt[7:0]}) begin + rf_v[ {commit1_tgt[7:0]} ] <= regIsValid[{commit0_tgt[7:0]}] | regIsValid[{commit1_tgt[7:0]}]; + if (regIsValid[{commit0_tgt[7:0]}] | regIsValid[{commit1_tgt[7:0]}]) + rf_source[{commit1_tgt[7:0]}] <= {`QBIT{1'b1}}; + /* + (rf_source[ commit0_tgt[4:0] ] == commit0_id || (branchmiss && iqentry_source[ commit0_id[`QBITS] ])) || + (rf_source[ commit1_tgt[4:0] ] == commit1_id || (branchmiss && iqentry_source[ commit1_id[`QBITS] ])); + */ + end + else begin + rf_v[ {commit1_tgt[7:0]} ] <= regIsValid[{commit1_tgt[7:0]}];//rf_source[ commit1_tgt[4:0] ] == commit1_id || (branchmiss && iqentry_source[ commit1_id[`QBITS] ]); + if (regIsValid[{commit1_tgt[7:0]}]) + rf_source[{commit1_tgt[7:0]}] <= {`QBIT{1'b1}}; + end + end + if (commit1_tgt[5:0] != 6'd0) $display("r%d <- %h v[%d]<-%d", commit1_tgt, commit1_bus, regIsValid[commit1_tgt[5:0]], + rf_source[ {commit1_tgt[7:0]} ] == commit1_id || (branchmiss && iqentry_source[ commit1_id[`QBITS] ])); + if (commit1_tgt[5:0]==6'd30 && commit1_bus==64'd0) + $display("FP <= 0"); + end + if (commit2_v && `NUM_CMT > 2) begin + if (!rf_v[ {commit2_tgt[7:0]} ]) begin + if ({commit2_tgt[7:0]}=={commit1_tgt[7:0]} && {commit2_tgt[7:0]}=={commit0_tgt[7:0]}) begin + rf_v[ {commit2_tgt[7:0]} ] <= regIsValid[{commit0_tgt[7:0]}] | regIsValid[{commit1_tgt[7:0]}] | regIsValid[{commit2_tgt[7:0]}]; + if (regIsValid[{commit0_tgt[7:0]}] | regIsValid[{commit1_tgt[7:0]}] | regIsValid[{commit2_tgt[7:0]}]) + rf_source[{commit0_tgt[7:0]}] <= {`QBIT{1'b1}}; + end + else if ({commit2_tgt[7:0]}=={commit0_tgt[7:0]}) begin + rf_v[ {commit2_tgt[7:0]} ] <= regIsValid[{commit0_tgt[7:0]}] | regIsValid[{commit2_tgt[7:0]}]; + if (regIsValid[{commit0_tgt[7:0]}] | regIsValid[{commit2_tgt[7:0]}]) + rf_source[{commit0_tgt[7:0]}] <= {`QBIT{1'b1}}; + end + else if ({commit2_tgt[7:0]}=={commit1_tgt[7:0]}) begin + rf_v[ {commit2_tgt[7:0]} ] <= regIsValid[{commit1_tgt[7:0]}] | regIsValid[{commit2_tgt[7:0]}]; + if (regIsValid[{commit1_tgt[7:0]}] | regIsValid[{commit2_tgt[7:0]}]) + rf_source[{commit1_tgt[7:0]}] <= {`QBIT{1'b1}}; + end + else begin + rf_v[ {commit2_tgt[7:0]} ] <= regIsValid[{commit2_tgt[7:0]}];//rf_source[ commit1_tgt[4:0] ] == commit1_id || (branchmiss && iqentry_source[ commit1_id[`QBITS] ]); + if (regIsValid[{commit2_tgt[7:0]}]) + rf_source[{commit2_tgt[7:0]}] <= {`QBIT{1'b1}}; + end + end + if (commit2_tgt[5:0] != 6'd0) $display("r%d <- %h v[%d]<-%d", commit2_tgt, commit2_bus, regIsValid[commit2_tgt[5:0]], + rf_source[ {commit2_tgt[7:0]} ] == commit2_id || (branchmiss && iqentry_source[ commit2_id[`QBITS] ])); + if (commit2_tgt[5:0]==6'd30 && commit2_bus==64'd0) + $display("FP <= 0"); + end + rf_v[0] <= 1; + + // + // ENQUEUE + // + // place up to two instructions from the fetch buffer into slots in the IQ. + // note: they are placed in-order, and they are expected to be executed + // 0, 1, or 2 of the fetch buffers may have valid data + // 0, 1, or 2 slots in the instruction queue may be available. + // if we notice that one of the instructions in the fetch buffer is a predicted branch, + // (set branchback/backpc and delete any instructions after it in fetchbuf) + // + + // enqueue fetchbuf0 and fetchbuf1, but only if there is room, + // and ignore fetchbuf1 if fetchbuf0 has a backwards branch in it. + // + // also, do some instruction-decode ... set the operand_valid bits in the IQ + // appropriately so that the DATAINCOMING stage does not have to look at the opcode + // + if (!branchmiss) // don't bother doing anything if there's been a branch miss + + case ({fetchbuf0_v, fetchbuf1_v}) + + 2'b00: ; // do nothing + + 2'b01: + if (canq1) begin + if (fetchbuf1_rfw) begin + rf_source[ Rt1s ] <= { 1'b0, fetchbuf1_mem, tail0 }; // top bit indicates ALU/MEM bus + rf_v [Rt1s] <= `INV; + end + if (IsVector(fetchbuf1_instr) && SUP_VECTOR) begin + vqe1 <= vqe1 + 4'd1; + if (IsVCmprss(fetchbuf1_instr)) begin + if (vm[fetchbuf1_instr[25:23]][vqe1]) + vqet1 <= vqet1 + 4'd1; + end + else + vqet1 <= vqet1 + 4'd1; + if (vqe1 >= vl-2) + nop_fetchbuf <= fetchbuf ? 4'b0100 : 4'b0001; + enque1(tail0, fetchbuf1_thrd ? maxsn[1]+4'd1 : maxsn[0]+4'd1, vqe1); + iq_ctr = iq_ctr + 4'd1; + if (canq2 && vqe1 < vl-2) begin + vqe1 <= vqe1 + 4'd2; + if (IsVCmprss(fetchbuf1_instr)) begin + if (vm[fetchbuf1_instr[25:23]][vqe1+6'd1]) + vqet1 <= vqet1 + 4'd2; + end + else + vqet1 <= vqet1 + 4'd2; + enque1(tail1, fetchbuf1_thrd ? maxsn[1] + 4'd2 : maxsn[0] + 4'd2, vqe1 + 6'd1); + iq_ctr = iq_ctr + 4'd2; + end + end + else begin + enque1(tail0, fetchbuf1_thrd ? maxsn[1]+4'd1 : maxsn[0]+4'd1, 6'd0); + iq_ctr = iq_ctr + 4'd1; + end + end + + 2'b10: + if (canq1) begin + enque0x(); + end + + 2'b11: + if (canq1) begin + // + // if the first instruction is a predicted branch, enqueue it & stomp on all following instructions + // but only if the following instruction is in the same thread. Otherwise we want to queue two. + // + if (take_branch0 && fetchbuf1_thrd==fetchbuf0_thrd) begin + enque0x(); + end + + else begin // fetchbuf0 doesn't contain a predicted branch + // + // so -- we can enqueue 1 or 2 instructions, depending on space in the IQ + // update the rf_v and rf_source bits separately (at end) + // the problem is that if we do have two instructions, + // they may interact with each other, so we have to be + // careful about where things point. + // + // enqueue the first instruction ... + // + if (IsVector(fetchbuf0_instr) && SUP_VECTOR) begin + vqe0 <= vqe0 + 4'd1; + if (IsVCmprss(fetchbuf0_instr)) begin + if (vm[fetchbuf0_instr[25:23]][vqe0]) + vqet0 <= vqet0 + 4'd1; + end + else + vqet0 <= vqet0 + 4'd1; + if (vqe0 >= vl-2) + nop_fetchbuf <= fetchbuf ? 4'b1000 : 4'b0010; + end + if (vqe0 < vl || !IsVector(fetchbuf0_instr)) begin + enque0(tail0, fetchbuf0_thrd ? maxsn[1]+4'd1 : maxsn[0]+4'd1, vqe0); + iq_ctr = iq_ctr + 4'd1; + // + // if there is room for a second instruction, enqueue it + // + if (canq2) begin + if (vechain && IsVector(fetchbuf1_instr) + && Ra1s != Rt0s // And there is no dependency + && Rb1s != Rt0s + && Rc1s != Rt0s + ) begin +`ifdef SUPPORT_SMT + mstatus[0][32] <= 1'b1; +`else + mstatus[32] <= 1'b1; +`endif + vqe1 <= vqe1 + 4'd1; + if (IsVCmprss(fetchbuf1_instr)) begin + if (vm[fetchbuf1_instr[25:23]][vqe1]) + vqet1 <= vqet1 + 4'd1; + end + else + vqet1 <= vqet1 + 4'd1; + if (vqe1 >= vl-2) + nop_fetchbuf <= fetchbuf ? 4'b0100 : 4'b0001; + enque1(tail1, + fetchbuf1_thrd==fetchbuf0_thrd && fetchbuf1_thrd==1'b1 ? maxsn[1] + 4'd2 : + fetchbuf1_thrd==fetchbuf0_thrd && fetchbuf1_thrd==1'b0 ? maxsn[0] + 4'd2 : + fetchbuf1_thrd ? maxsn[1] + 4'd2: maxsn[0] + 4'd2, 6'd0); + iq_ctr = iq_ctr + 4'd2; + + // SOURCE 1 ... + a1_vs(); + + // SOURCE 2 ... + a2_vs(); + + // SOURCE 3 ... + a3_vs(); + + // if the two instructions enqueued target the same register, + // make sure only the second writes to rf_v and rf_source. + // first is allowed to update rf_v and rf_source only if the + // second has no target + // + if (fetchbuf0_rfw) begin + rf_source[ Rt0s ] <= { 1'b0,fetchbuf0_mem, tail0 }; + rf_v [ Rt0s] <= `INV; + end + if (fetchbuf1_rfw) begin + rf_source[ Rt1s ] <= { 1'b0,fetchbuf1_mem, tail1 }; + rf_v [ Rt1s ] <= `INV; + end + end + // If there was a vector instruction in fetchbuf0, we really + // want to queue the next vector element, not the next + // instruction waiting in fetchbuf1. + else if (IsVector(fetchbuf0_instr) && SUP_VECTOR && vqe0 < vl-1) begin + vqe0 <= vqe0 + 4'd2; + if (IsVCmprss(fetchbuf0_instr)) begin + if (vm[fetchbuf0_instr[25:23]][vqe0+6'd1]) + vqet0 <= vqet0 + 4'd2; + end + else + vqet0 <= vqet0 + 4'd2; + if (vqe0 >= vl-3) + nop_fetchbuf <= fetchbuf ? 4'b1000 : 4'b0010; + if (vqe0 < vl-1) begin + enque0(tail1, fetchbuf0_thrd ? maxsn[1] + 4'd2 : maxsn[0] + 4'd2, vqe0 + 6'd1); + iq_ctr = iq_ctr + 4'd2; + + // SOURCE 1 ... + iqentry_a1_v [tail1] <= regIsValid[Ra0s]; + iqentry_a1_s [tail1] <= rf_source [Ra0s]; + + // SOURCE 2 ... + iqentry_a2_v [tail1] <= regIsValid[Rb0s]; + iqentry_a2_s [tail1] <= rf_source[ Rb0s ]; + + // SOURCE 3 ... + iqentry_a3_v [tail1] <= regIsValid[Rc0s]; + iqentry_a3_s [tail1] <= rf_source[ Rc0s ]; + + + // if the two instructions enqueued target the same register, + // make sure only the second writes to rf_v and rf_source. + // first is allowed to update rf_v and rf_source only if the + // second has no target (BEQ or SW) + // + if (fetchbuf0_rfw) begin + rf_source[ Rt0s ] <= { 1'b0, fetchbuf0_mem, tail1 }; + rf_v [ Rt0s ] <= `INV; + end + end + end + else if (IsVector(fetchbuf1_instr) && SUP_VECTOR) begin + vqe1 <= 6'd1; + if (IsVCmprss(fetchbuf1_instr)) begin + if (vm[fetchbuf1_instr[25:23]][IsVector(fetchbuf0_instr)? 6'd0:vqe1+6'd1]) + vqet1 <= 6'd1; + else + vqet1 <= 6'd0; + end + else + vqet1 <= 6'd1; + if (IsVector(fetchbuf0_instr) && SUP_VECTOR) + nop_fetchbuf <= fetchbuf ? 4'b1000 : 4'b0010; + enque1(tail1, + fetchbuf1_thrd==fetchbuf0_thrd && fetchbuf1_thrd==1'b1 ? maxsn[1] + 4'd2 : + fetchbuf1_thrd==fetchbuf0_thrd && fetchbuf1_thrd==1'b0 ? maxsn[0] + 4'd2 : + fetchbuf1_thrd ? maxsn[1] + 4'd2: maxsn[0] + 4'd2, 6'd0); + iq_ctr = iq_ctr + 4'd2; + + // SOURCE 1 ... + a1_vs(); + + // SOURCE 2 .. + a2_vs(); + + // SOURCE 3 ... + a3_vs(); + + // if the two instructions enqueued target the same register, + // make sure only the second writes to rf_v and rf_source. + // first is allowed to update rf_v and rf_source only if the + // second has no target + // + if (fetchbuf0_rfw) begin + rf_source[ Rt0s ] <= { 1'b0,fetchbuf0_mem, tail0 }; + rf_v [ Rt0s] <= `INV; + end + if (fetchbuf1_rfw) begin + rf_source[ Rt1s ] <= { 1'b0,fetchbuf1_mem, tail1 }; + rf_v [ Rt1s ] <= `INV; + end + end + else begin +// enque1(tail1, seq_num + 5'd1, 6'd0); + enque1(tail1, + fetchbuf1_thrd==fetchbuf0_thrd && fetchbuf1_thrd==1'b1 ? maxsn[1] + 4'd2 : + fetchbuf1_thrd==fetchbuf0_thrd && fetchbuf1_thrd==1'b0 ? maxsn[0] + 4'd2 : + fetchbuf1_thrd ? maxsn[1] + 4'd1: maxsn[0]+4'd1, 6'd0); + iq_ctr = iq_ctr + 4'd2; + + // SOURCE 1 ... + a1_vs(); + + // SOURCE 2 ... + a2_vs(); + + // SOURCE 3 ... + a3_vs(); + + + // if the two instructions enqueued target the same register, + // make sure only the second writes to regIsValid and rf_source. + // first is allowed to update regIsValid and rf_source only if the + // second has no target (BEQ or SW) + // + if (fetchbuf0_rfw) begin + rf_source[ Rt0s ] <= { 1'b0,fetchbuf0_mem, tail0 }; + rf_v [ Rt0s] <= `INV; + $display("r%dx (%d) Invalidated", Rt0s, Rt0s[4:0]); + end + else + $display("No rfw"); + if (fetchbuf1_rfw) begin + rf_source[ Rt1s ] <= { 1'b0,fetchbuf1_mem, tail1 }; + $display("r%dx (%d) Invalidated", Rt1s, Rt1s[4:0]); + rf_v [ Rt1s ] <= `INV; + end + else + $display("No rfw"); + end + + end // ends the "if IQ[tail1] is available" clause + else begin // only first instruction was enqueued + if (fetchbuf0_rfw) begin + $display("r%dx (%d) Invalidated 1", Rt0s, Rt0s[4:0]); + rf_source[ Rt0s ] <= {1'b0,fetchbuf0_mem, tail0}; + rf_v [ Rt0s ] <= `INV; + end + end + end + + end // ends the "else fetchbuf0 doesn't have a backwards branch" clause + end + endcase + if (pebm) begin + bm_ctr <= bm_ctr + 40'd1; + end + +// +// DATAINCOMING +// +// wait for operand/s to appear on alu busses and puts them into +// the iqentry_a1 and iqentry_a2 slots (if appropriate) +// as well as the appropriate iqentry_res slots (and setting valid bits) +// +// put results into the appropriate instruction entries +// +// This chunk of code has to be before the enqueue stage so that the agen bit +// can be reset to zero by enqueue. +// put results into the appropriate instruction entries +// +if (IsMul(alu0_instr)|IsDivmod(alu0_instr)|alu0_shft|alu0_tlb) begin + if (alu0_done_pe) begin + alu0_dataready <= TRUE; + end +end +if (alu1_shft) begin + if (alu1_done_pe) begin + alu1_dataready <= TRUE; + end +end + +if (alu0_v) begin + iqentry_tgt [ alu0_id[`QBITS] ] <= alu0_tgt; + iqentry_res [ alu0_id[`QBITS] ] <= ralu0_bus; + iqentry_exc [ alu0_id[`QBITS] ] <= alu0_exc; + if (!iqentry_mem[ alu0_id[`QBITS] ] && alu0_done) begin +// iqentry_done[ alu0_id[`QBITS] ] <= `TRUE; + iqentry_state[alu0_id[`QBITS]] <= IQS_CMT; + end +// if (alu0_done) +// iqentry_cmt [ alu0_id[`QBITS] ] <= `TRUE; +// iqentry_out [ alu0_id[`QBITS] ] <= `INV; +// iqentry_agen[ alu0_id[`QBITS] ] <= `VAL;//!iqentry_fc[alu0_id[`QBITS]]; // RET + if (iqentry_mem[alu0_id[`QBITS]]) + iqentry_state[alu0_id[`QBITS]] <= IQS_AGEN; + if (iqentry_mem[ alu0_id[`QBITS] ] && !iqentry_agen[ alu0_id[`QBITS] ]) begin + iqentry_ma[ alu0_id[`QBITS] ] <= alu0_bus; + end + if (|alu0_exc) begin +// iqentry_done[alu0_id[`QBITS]] <= `VAL; + iqentry_store[alu0_id[`QBITS]] <= `INV; + iqentry_state[alu0_id[`QBITS]] <= IQS_CMT; + end + alu0_dataready <= FALSE; +end + +if (alu1_v && `NUM_ALU > 1) begin + iqentry_tgt [ alu1_id[`QBITS] ] <= alu1_tgt; + iqentry_res [ alu1_id[`QBITS] ] <= ralu1_bus; + iqentry_exc [ alu1_id[`QBITS] ] <= alu1_exc; + if (!iqentry_mem[ alu1_id[`QBITS] ] && alu1_done) begin +// iqentry_done[ alu1_id[`QBITS] ] <= `TRUE; + iqentry_state[alu1_id[`QBITS]] <= IQS_CMT; + end +// iqentry_done[ alu1_id[`QBITS] ] <= (!iqentry_mem[ alu1_id[`QBITS] ] && alu1_done); +// if (alu1_done) +// iqentry_cmt [ alu1_id[`QBITS] ] <= `TRUE; +// iqentry_out [ alu1_id[`QBITS] ] <= `INV; + if (iqentry_mem[alu1_id[`QBITS]]) + iqentry_state[alu1_id[`QBITS]] <= IQS_AGEN; +// iqentry_agen[ alu1_id[`QBITS] ] <= `VAL;//!iqentry_fc[alu0_id[`QBITS]]; // RET + if (iqentry_mem[ alu1_id[`QBITS] ] && !iqentry_agen[ alu1_id[`QBITS] ]) begin + iqentry_ma[ alu1_id[`QBITS] ] <= alu1_bus; + end + if (|alu1_exc) begin +// iqentry_done[alu1_id[`QBITS]] <= `VAL; + iqentry_store[alu1_id[`QBITS]] <= `INV; + iqentry_state[alu1_id[`QBITS]] <= IQS_CMT; + end + alu1_dataready <= FALSE; +end + +if (fpu1_v && `NUM_FPU > 0) begin + iqentry_res [ fpu1_id[`QBITS] ] <= rfpu1_bus; + iqentry_ares[ fpu1_id[`QBITS] ] <= fpu1_status; + iqentry_exc [ fpu1_id[`QBITS] ] <= fpu1_exc; +// iqentry_done[ fpu1_id[`QBITS] ] <= fpu1_done; +// iqentry_out [ fpu1_id[`QBITS] ] <= `INV; + iqentry_state[fpu1_id[`QBITS]] <= IQS_CMT; + fpu1_dataready <= FALSE; +end + +if (fpu2_v && `NUM_FPU > 1) begin + iqentry_res [ fpu2_id[`QBITS] ] <= rfpu2_bus; + iqentry_ares[ fpu2_id[`QBITS] ] <= fpu2_status; + iqentry_exc [ fpu2_id[`QBITS] ] <= fpu2_exc; +// iqentry_done[ fpu2_id[`QBITS] ] <= fpu2_done; +// iqentry_out [ fpu2_id[`QBITS] ] <= `INV; + iqentry_state[fpu2_id[`QBITS]] <= IQS_CMT; + //iqentry_agen[ fpu_id[`QBITS] ] <= `VAL; // RET + fpu2_dataready <= FALSE; +end + +if (IsWait(fcu_instr)) begin + if (pe_wait) + fcu_dataready <= `TRUE; +end + +if (fcu_v) begin + fcu_done <= `TRUE; + iqentry_ma [ fcu_id[`QBITS] ] <= fcu_misspc; + iqentry_res [ fcu_id[`QBITS] ] <= rfcu_bus; + iqentry_exc [ fcu_id[`QBITS] ] <= fcu_exc; +// iqentry_done[ fcu_id[`QBITS] ] <= `TRUE; +// iqentry_out [ fcu_id[`QBITS] ] <= `INV; + iqentry_state[fcu_id[`QBITS] ] <= IQS_CMT; + // takb is looked at only for branches to update the predictor. Here it is + // unconditionally set, the value will be ignored if it's not a branch. + iqentry_takb[ fcu_id[`QBITS] ] <= fcu_takb; + fcu_dataready <= `INV; +end + +// dramX_v only set on a load +if (mem1_available && dramA_v && iqentry_v[ dramA_id[`QBITS] ]) begin + iqentry_res [ dramA_id[`QBITS] ] <= rdramA_bus; + iqentry_exc [ dramA_id[`QBITS] ] <= dramA_exc; +// iqentry_done[ dramA_id[`QBITS] ] <= `VAL; +// iqentry_out [ dramA_id[`QBITS] ] <= `INV; + iqentry_state[dramA_id[`QBITS] ] <= IQS_CMT; + iqentry_aq [ dramA_id[`QBITS] ] <= `INV; +end +if (mem2_available && `NUM_MEM > 1 && dramB_v && iqentry_v[ dramB_id[`QBITS] ]) begin + iqentry_res [ dramB_id[`QBITS] ] <= rdramB_bus; + iqentry_exc [ dramB_id[`QBITS] ] <= dramB_exc; +// iqentry_done[ dramB_id[`QBITS] ] <= `VAL; + iqentry_state[dramB_id[`QBITS] ] <= IQS_CMT; +// iqentry_out [ dramB_id[`QBITS] ] <= `INV; + iqentry_aq [ dramB_id[`QBITS] ] <= `INV; +end +if (mem3_available && `NUM_MEM > 2 && dramC_v && iqentry_v[ dramC_id[`QBITS] ]) begin + iqentry_res [ dramC_id[`QBITS] ] <= rdramC_bus; + iqentry_exc [ dramC_id[`QBITS] ] <= dramC_exc; +// iqentry_done[ dramC_id[`QBITS] ] <= `VAL; + iqentry_state[dramC_id[`QBITS] ] <= IQS_CMT; +// iqentry_out [ dramC_id[`QBITS] ] <= `INV; + iqentry_aq [ dramC_id[`QBITS] ] <= `INV; +// if (iqentry_lptr[dram2_id[`QBITS]]) +// wbrcd[pcr[5:0]] <= 1'b1; +end + +// +// see if anybody else wants the results ... look at lots of buses: +// - fpu_bus +// - alu0_bus +// - alu1_bus +// - fcu_bus +// - dram_bus +// - commit0_bus +// - commit1_bus +// + +for (n = 0; n < QENTRIES; n = n + 1) +begin + if (`NUM_FPU > 0) + setargs(n,{1'b0,fpu1_id},fpu1_v,rfpu1_bus); + if (`NUM_FPU > 1) + setargs(n,{1'b0,fpu2_id},fpu2_v,rfpu2_bus); + + // The memory address generated by the ALU should not be posted to be + // recieved into waiting argument registers. The arguments will be waiting + // for the result of the memory load, picked up from the dram busses. The + // only mem operation requiring the alu result bus is the push operation. + setargs(n,{1'b0,alu0_id},alu0_v & (~alu0_mem | alu0_push),ralu0_bus); + if (`NUM_ALU > 1) + setargs(n,{1'b0,alu1_id},alu1_v & (~alu1_mem | alu1_push),ralu1_bus); + + setargs(n,{1'b0,fcu_id},fcu_v,rfcu_bus); + + setargs(n,{1'b0,dramA_id},dramA_v,rdramA_bus); + if (`NUM_MEM > 1) + setargs(n,{1'b0,dramB_id},dramB_v,rdramB_bus); + if (`NUM_MEM > 2) + setargs(n,{1'b0,dramC_id},dramC_v,rdramC_bus); + + setargs(n,commit0_id,commit0_v,commit0_bus); + if (`NUM_CMT > 1) + setargs(n,commit1_id,commit1_v,commit1_bus); + if (`NUM_CMT > 2) + setargs(n,commit2_id,commit2_v,commit2_bus); +`ifndef INLINE_DECODE + setinsn(n[`QBITS],id1_ido,id1_available&id1_vo,id1_bus); + if (`NUM_IDU > 1) + setinsn(n[`QBITS],id2_ido,id2_available&id2_vo,id2_bus); + if (`NUM_IDU > 2) + setinsn(n[`QBITS],id3_ido,id3_available&id3_vo,id3_bus); +`endif +end + + +// +// ISSUE +// +// determines what instructions are ready to go, then places them +// in the various ALU queues. +// also invalidates instructions following a branch-miss BEQ or any JALR (STOMP logic) +// +`ifndef INLINE_DECODE +for (n = 0; n < QENTRIES; n = n + 1) +if (id1_available) begin +if (iqentry_id1issue[n] && !iqentry_iv[n] && !(iqentry_v[n] && iqentry_stomp[n])) begin + id1_vi <= `VAL; + id1_id <= n[4:0]; + id1_instr <= iqentry_rtop[n] ? ( + iqentry_a3_v[n] ? iqentry_a3[n] +`ifdef FU_BYPASS + : (iqentry_a3_s[n] == alu0_id) ? alu0_bus + : (iqentry_a3_s[n] == alu1_id) ? alu1_bus +`endif + : `NOP_INSN) + : iqentry_instr[n]; + id1_ven <= iqentry_ven[n]; + id1_vl <= iqentry_vl[n]; + id1_thrd <= iqentry_thrd[n]; + id1_Rt <= iqentry_tgt[n][4:0]; + id1_pt <= iqentry_pt[n]; + end +end +if (`NUM_IDU > 1) begin +for (n = 0; n < QENTRIES; n = n + 1) + if (id2_available) begin + if (iqentry_id2issue[n] && !iqentry_iv[n] && !(iqentry_v[n] && iqentry_stomp[n])) begin + id2_vi <= `VAL; + id2_id <= n[4:0]; + id2_instr <= iqentry_rtop[n] ? ( + iqentry_a3_v[n] ? iqentry_a3[n] +`ifdef FU_BYPASS + : (iqentry_a3_s[n] == alu0_id) ? alu0_bus + : (iqentry_a3_s[n] == alu1_id) ? alu1_bus +`endif + : `NOP_INSN) + : iqentry_instr[n]; + id2_ven <= iqentry_ven[n]; + id2_vl <= iqentry_vl[n]; + id2_thrd <= iqentry_thrd[n]; + id2_Rt <= iqentry_tgt[n][4:0]; + id2_pt <= iqentry_pt[n]; + end + end +end +if (`NUM_IDU > 2) begin +for (n = 0; n < QENTRIES; n = n + 1) + if (id3_available) begin + if (iqentry_id3issue[n] && !iqentry_iv[n] && !(iqentry_v[n] && iqentry_stomp[n])) begin + id3_vi <= `VAL; + id3_id <= n[4:0]; + id3_instr <= iqentry_rtop[n] ? ( + iqentry_a3_v[n] ? iqentry_a3[n] +`ifdef FU_BYPASS + : (iqentry_a3_s[n] == alu0_id) ? alu0_bus + : (iqentry_a3_s[n] == alu1_id) ? alu1_bus +`endif + : `NOP_INSN) + : iqentry_instr[n]; + id3_ven <= iqentry_ven[n]; + id3_vl <= iqentry_vl[n]; + id3_thrd <= iqentry_thrd[n]; + id3_Rt <= iqentry_tgt[n][4:0]; + id3_pt <= iqentry_pt[n]; + end + end +end +`endif // not INLINE_DECODE + +// X's on unused busses cause problems in SIM. + for (n = 0; n < QENTRIES; n = n + 1) + if (iqentry_alu0_issue[n] && !(iqentry_v[n] && iqentry_stomp[n])) begin + if (alu0_available & alu0_done) begin + alu0_sourceid <= {iqentry_push[n],n[`QBITS]}; + alu0_instr <= iqentry_rtop[n] ? ( +`ifdef FU_BYPASS + iqentry_a3_v[n] ? iqentry_a3[n] + : (iqentry_a3_s[n] == alu0_id) ? ralu0_bus + : (iqentry_a3_s[n] == alu1_id) ? ralu1_bus + : (iqentry_a3_s[n] == fpu1_id && `NUM_FPU > 0) ? rfpu1_bus + : `NOP_INSN) +`else + iqentry_a3[n]) +`endif + : iqentry_instr[n]; + alu0_sz <= iqentry_sz[n]; + alu0_tlb <= iqentry_tlb[n]; + alu0_mem <= iqentry_mem[n]; + alu0_load <= iqentry_load[n]; + alu0_store <= iqentry_store[n]; + alu0_push <= iqentry_push[n]; + alu0_shft <= iqentry_shft[n]; + alu0_pc <= iqentry_pc[n]; + alu0_argA <= +`ifdef FU_BYPASS + iqentry_a1_v[n] ? iqentry_a1[n] + : (iqentry_a1_s[n] == alu0_id) ? ralu0_bus + : (iqentry_a1_s[n] == alu1_id) ? ralu1_bus + : (iqentry_a1_s[n] == fpu1_id && `NUM_FPU > 0) ? rfpu1_bus + : 64'hDEADDEADDEADDEAD; +`else + iqentry_a1[n]; +`endif + alu0_argB <= iqentry_imm[n] + ? iqentry_a0[n] +`ifdef FU_BYPASS + : (iqentry_a2_v[n] ? iqentry_a2[n] + : (iqentry_a2_s[n] == alu0_id) ? ralu0_bus + : (iqentry_a2_s[n] == alu1_id) ? ralu1_bus + : (iqentry_a2_s[n] == fpu1_id && `NUM_FPU > 0) ? rfpu1_bus + : 64'hDEADDEADDEADDEAD); +`else + : iqentry_a2[n]; +`endif + alu0_argC <= +`ifdef FU_BYPASS + iqentry_a3_v[n] ? iqentry_a3[n] + : (iqentry_a3_s[n] == alu0_id) ? ralu0_bus : ralu1_bus; +`else + iqentry_a3[n]; +`endif + alu0_argI <= iqentry_a0[n]; + alu0_tgt <= IsVeins(iqentry_instr[n]) ? + {6'h0,1'b1,iqentry_tgt[n][4:0]} | (( + iqentry_a2_v[n] ? iqentry_a2[n][5:0] + : (iqentry_a2_s[n] == alu0_id) ? ralu0_bus[5:0] + : (iqentry_a2_s[n] == alu1_id) ? ralu1_bus[5:0] + : {4{16'h0000}})) << 6 : + iqentry_tgt[n]; + alu0_ven <= iqentry_ven[n]; + alu0_thrd <= iqentry_thrd[n]; + alu0_dataready <= IsSingleCycle(iqentry_instr[n]); + alu0_ld <= TRUE; + iqentry_state[n] <= IQS_OUT; + end + end + if (`NUM_ALU > 1) begin + for (n = 0; n < QENTRIES; n = n + 1) + if (iqentry_alu1_issue[n] && !(iqentry_v[n] && iqentry_stomp[n])) begin + if (alu1_available && alu1_done) begin + if (iqentry_alu0[n]) + panic <= `PANIC_ALU0ONLY; + alu1_sourceid <= {iqentry_push[n],n[`QBITS]}; + alu1_instr <= iqentry_instr[n]; + alu1_sz <= iqentry_sz[n]; + alu1_mem <= iqentry_mem[n]; + alu1_load <= iqentry_load[n]; + alu1_store <= iqentry_store[n]; + alu1_push <= iqentry_push[n]; + alu1_shft <= iqentry_shft[n]; + alu1_pc <= iqentry_pc[n]; + alu1_argA <= +`ifdef FU_BYPASS + iqentry_a1_v[n] ? iqentry_a1[n] + : (iqentry_a1_s[n] == alu0_id) ? ralu0_bus + : (iqentry_a1_s[n] == alu1_id) ? ralu1_bus + : (iqentry_a1_s[n] == fpu1_id && `NUM_FPU > 0) ? rfpu1_bus + : 64'hDEADDEADDEADDEAD; +`else + iqentry_a1[n]; +`endif + alu1_argB <= iqentry_imm[n] + ? iqentry_a0[n] +`ifdef FU_BYPASS + : (iqentry_a2_v[n] ? iqentry_a2[n] + : (iqentry_a2_s[n] == alu0_id) ? ralu0_bus + : (iqentry_a2_s[n] == alu1_id) ? ralu1_bus + : (iqentry_a2_s[n] == fpu1_id && `NUM_FPU > 0) ? rfpu1_bus + : 64'hDEADDEADDEADDEAD); +`else + : iqentry_a2[n]; +`endif + alu1_argC <= +`ifdef FU_BYPASS + iqentry_a3_v[n] ? iqentry_a3[n] + : (iqentry_a3_s[n] == alu0_id) ? ralu0_bus : ralu1_bus; +`else + iqentry_a3[n]; +`endif + alu1_argI <= iqentry_a0[n]; + alu1_tgt <= IsVeins(iqentry_instr[n]) ? + {6'h0,1'b1,iqentry_tgt[n][4:0]} | ((iqentry_a2_v[n] ? iqentry_a2[n][5:0] + : (iqentry_a2_s[n] == alu0_id) ? ralu0_bus[5:0] + : (iqentry_a2_s[n] == alu1_id) ? ralu1_bus[5:0] + : {4{16'h0000}})) << 6 : + iqentry_tgt[n]; + alu1_ven <= iqentry_ven[n]; + alu1_dataready <= IsSingleCycle(iqentry_instr[n]); + alu1_ld <= TRUE; + iqentry_state[n] <= IQS_OUT; + end + end + end + + for (n = 0; n < QENTRIES; n = n + 1) + if (iqentry_fpu1_issue[n] && !(iqentry_v[n] && iqentry_stomp[n])) begin + if (fpu1_available & fpu1_done) begin + fpu1_sourceid <= n[`QBITS]; + fpu1_instr <= iqentry_instr[n]; + fpu1_pc <= iqentry_pc[n]; + fpu1_argA <= +`ifdef FU_BYPASS + iqentry_a1_v[n] ? iqentry_a1[n] + : (iqentry_a1_s[n] == alu0_id) ? ralu0_bus + : (iqentry_a1_s[n] == alu1_id) ? ralu1_bus + : (iqentry_a1_s[n] == fpu1_id && `NUM_FPU > 0) ? rfpu1_bus + : 64'hDEADDEADDEADDEAD; +`else + iqentry_a1[n]; +`endif + fpu1_argB <= +`ifdef FU_BYPASS + (iqentry_a2_v[n] ? iqentry_a2[n] + : (iqentry_a2_s[n] == alu0_id) ? ralu0_bus + : (iqentry_a2_s[n] == alu1_id) ? ralu1_bus + : (iqentry_a2_s[n] == fpu1_id && `NUM_FPU > 0) ? rfpu1_bus + : 64'hDEADDEADDEADDEAD); +`else + iqentry_a2[n]; +`endif + fpu1_argC <= +`ifdef FU_BYPASS + iqentry_a3_v[n] ? iqentry_a3[n] + : (iqentry_a3_s[n] == alu0_id) ? ralu0_bus : ralu1_bus; +`else + iqentry_a3[n]; +`endif +`ifdef SUPPORT_PREDICATION + fpu1_pred <= iqentry_p_v[n] ? iqentry_pred[n] : +`ifdef FU_BYPASS + (iqentry_p_s[n] == alu0_id) ? alu0nyb[iqentry_preg[n]] : + (iqentry_p_s[n] == alu1_id) ? alu1nyb[iqentry_preg[n]] : +`endif + 4'h0; + fpu1_argT <= +`ifdef FU_BYPASS + iqentry_aT_v[n] ? iqentry_aT[n] + : (iqentry_aT_s[n] == alu0_id) ? ralu0_bus : ralu1_bus; +`else + iqentry_aT[n]; +`endif +`endif + fpu1_argI <= iqentry_a0[n]; + fpu1_dataready <= `VAL; + fpu1_ld <= TRUE; + iqentry_state[n] <= IQS_OUT; + end + end + + for (n = 0; n < QENTRIES; n = n + 1) + if (`NUM_FPU > 1 && iqentry_fpu2_issue[n] && !(iqentry_v[n] && iqentry_stomp[n])) begin + if (fpu2_available & fpu2_done) begin + fpu2_sourceid <= n[`QBITS]; + fpu2_instr <= iqentry_instr[n]; + fpu2_pc <= iqentry_pc[n]; + fpu2_argA <= +`ifdef FU_BYPASS + iqentry_a1_v[n] ? iqentry_a1[n] + : (iqentry_a1_s[n] == alu0_id) ? ralu0_bus + : (iqentry_a1_s[n] == alu1_id) ? ralu1_bus + : (iqentry_a1_s[n] == fpu1_id && `NUM_FPU > 0) ? rfpu1_bus + : 64'hDEADDEADDEADDEAD; +`else + iqentry_a1[n]; +`endif + fpu2_argB <= +`ifdef FU_BYPASS + (iqentry_a2_v[n] ? iqentry_a2[n] + : (iqentry_a2_s[n] == alu0_id) ? ralu0_bus + : (iqentry_a2_s[n] == alu1_id) ? ralu1_bus + : (iqentry_a2_s[n] == fpu1_id && `NUM_FPU > 0) ? rfpu1_bus + : 64'hDEADDEADDEADDEAD); +`else + iqentry_a2[n]; +`endif + fpu2_argC <= +`ifdef FU_BYPASS + iqentry_a3_v[n] ? iqentry_a3[n] + : (iqentry_a3_s[n] == alu0_id) ? ralu0_bus : ralu1_bus; +`else + iqentry_a3[n]; +`endif +`ifdef SUPPORT_PREDICATION + fpu2_pred <= iqentry_p_v[n] ? iqentry_pred[n] : +`ifdef FU_BYPASS + (iqentry_p_s[n] == alu0_id) ? alu0nyb[iqentry_preg[n]] : + (iqentry_p_s[n] == alu1_id) ? alu1nyb[iqentry_preg[n]] : +`endif + 4'h0; + fpu2_argT <= +`ifdef FU_BYPASS + iqentry_aT_v[n] ? iqentry_aT[n] + : (iqentry_aT_s[n] == alu0_id) ? ralu0_bus : ralu1_bus; +`else + iqentry_aT[n]; +`endif +`endif + fpu2_argI <= iqentry_a0[n]; + fpu2_dataready <= `VAL; + fpu2_ld <= TRUE; + iqentry_state[n] <= IQS_OUT; + end + end + + for (n = 0; n < QENTRIES; n = n + 1) + if (iqentry_fcu_issue[n] && !(iqentry_v[n] && iqentry_stomp[n])) begin + if (fcu_done) begin + fcu_sourceid <= n[`QBITS]; + fcu_prevInstr <= fcu_instr; + fcu_instr <= iqentry_instr[n]; + fcu_insln <= iqentry_insln[n]; + fcu_pc <= iqentry_pc[n]; + fcu_nextpc <= iqentry_pc[n] + iqentry_insln[n]; + fcu_pt <= iqentry_pt[n]; + fcu_brdisp <= iqentry_instr[n][6] ? {{37{iqentry_instr[n][47]}},iqentry_instr[n][47:23],iqentry_instr[n][17:16]} + : {{53{iqentry_instr[n][31]}},iqentry_instr[n][31:23],iqentry_instr[n][17:16]}; + fcu_branch <= iqentry_br[n]; + fcu_call <= IsCall(iqentry_instr[n])|iqentry_jal[n]; + fcu_jal <= iqentry_jal[n]; + fcu_ret <= iqentry_ret[n]; + fcu_brk <= iqentry_brk[n]; + fcu_rti <= iqentry_rti[n]; + fcu_pc <= iqentry_pc[n]; + fcu_argA <= iqentry_a1_v[n] ? iqentry_a1[n] + : (iqentry_a1_s[n] == alu0_id) ? ralu0_bus + : (iqentry_a1_s[n] == fpu1_id && `NUM_FPU > 0) ? rfpu1_bus + : ralu1_bus; +`ifdef SUPPORT_SMT + fcu_argB <= iqentry_rti[n] ? epc0[iqentry_thrd[n]] +`else + fcu_argB <= iqentry_rti[n] ? epc0 +`endif + : (iqentry_a2_v[n] ? iqentry_a2[n] + : (iqentry_a2_s[n] == alu0_id) ? ralu0_bus + : (iqentry_a2_s[n] == fpu1_id && `NUM_FPU > 0) ? rfpu1_bus + : ralu1_bus); + // argB + waitctr <= (iqentry_a2_v[n] ? iqentry_a2[n][47:0] + : (iqentry_a2_s[n] == alu0_id) ? ralu0_bus[47:0] + : (iqentry_a2_s[n] == fpu1_id && `NUM_FPU > 0) ? rfpu1_bus[47:0] + : ralu1_bus[47:0]); + fcu_argC <= iqentry_a3_v[n] ? iqentry_a3[n] + : (iqentry_a3_s[n] == alu0_id) ? ralu0_bus : ralu1_bus; +`ifdef SUPPORT_PREDICATION + fcu_pred <= iqentry_p_v[n] ? iqentry_pred[n] : +`ifdef FU_BYPASS + (iqentry_p_s[n] == alu0_id) ? alu0nyb[iqentry_preg[n]] : + (iqentry_p_s[n] == alu1_id) ? alu1nyb[iqentry_preg[n]] : +`endif + 4'h0; + fcu_argT <= +`ifdef FU_BYPASS + iqentry_aT_v[n] ? iqentry_aT[n] + : (iqentry_aT_s[n] == alu0_id) ? ralu0_bus : ralu1_bus; +`else + iqentry_aT[n]; +`endif +`endif + fcu_argI <= iqentry_a0[n]; + fcu_thrd <= iqentry_thrd[n]; + fcu_dataready <= !IsWait(iqentry_instr[n]); + fcu_clearbm <= `FALSE; + fcu_ld <= TRUE; + fcu_timeout <= 8'h00; + iqentry_state[n] <= IQS_OUT; + fcu_done <= `FALSE; + end + end +// +// MEMORY +// +// update the memory queues and put data out on bus if appropriate +// + +// +// dram0, dram1, dram2 are the "state machines" that keep track +// of three pipelined DRAM requests. if any has the value "000", +// then it can accept a request (which bumps it up to the value "001" +// at the end of the cycle). once it hits the value "111" the request +// is finished and the dram_bus takes the value. if it is a store, the +// dram_bus value is not used, but the dram_v value along with the +// dram_id value signals the waiting memq entry that the store is +// completed and the instruction can commit. +// + +// if (dram0 != `DRAMSLOT_AVAIL) dram0 <= dram0 + 2'd1; +// if (dram1 != `DRAMSLOT_AVAIL) dram1 <= dram1 + 2'd1; +// if (dram2 != `DRAMSLOT_AVAIL) dram2 <= dram2 + 2'd1; + +// Flip the ready status to available. Used for loads or stores. + +if (dram0 == `DRAMREQ_READY) + dram0 <= `DRAMSLOT_AVAIL; +if (dram1 == `DRAMREQ_READY && `NUM_MEM > 1) + dram1 <= `DRAMSLOT_AVAIL; +if (dram2 == `DRAMREQ_READY && `NUM_MEM > 2) + dram2 <= `DRAMSLOT_AVAIL; + +// grab requests that have finished and put them on the dram_bus + +if (dram0 == `DRAMREQ_READY && dram0_load) begin + dramA_v <= `VAL;//!iqentry_stomp[dram0_id[`QBITS]]; + dramA_id <= dram0_id; + dramA_exc <= dram0_exc; + dramA_bus <= fnDatiAlign(dram0_instr,dram0_addr,rdat0); +end +if (dram1 == `DRAMREQ_READY && dram1_load && `NUM_MEM > 1) begin + dramB_v <= `VAL;//!iqentry_stomp[dram1_id[`QBITS]]; + dramB_id <= dram1_id; + dramB_exc <= dram1_exc; + dramB_bus <= fnDatiAlign(dram1_instr,dram1_addr,rdat1); +end +if (dram2 == `DRAMREQ_READY && dram2_load && `NUM_MEM > 2) begin + dramC_v <= `VAL;//!iqentry_stomp[dram2_id[`QBITS]]; + dramC_id <= dram2_id; + dramC_exc <= dram2_exc; + dramC_bus <= fnDatiAlign(dram2_instr,dram2_addr,rdat2); +end + +if (dram0 == `DRAMREQ_READY && dram0_store) + $display("m[%h] <- %h", dram0_addr, dram0_data); +if (dram1 == `DRAMREQ_READY && dram1_store && `NUM_MEM > 1) + $display("m[%h] <- %h", dram1_addr, dram1_data); +if (dram2 == `DRAMREQ_READY && dram2_store && `NUM_MEM > 2) + $display("m[%h] <- %h", dram2_addr, dram2_data); + +// +// determine if the instructions ready to issue can, in fact, issue. +// "ready" means that the instruction has valid operands but has not gone yet +iqentry_memissue <= memissue; +missue_count <= issue_count; + +if (dram0 == `DRAMSLOT_AVAIL) dram0_exc <= `FLT_NONE; +if (dram1 == `DRAMSLOT_AVAIL) dram1_exc <= `FLT_NONE; +if (dram2 == `DRAMSLOT_AVAIL) dram2_exc <= `FLT_NONE; + +for (n = 0; n < QENTRIES; n = n + 1) + if (iqentry_v[n] && iqentry_stomp[n]) begin + iqentry_iv[n] <= `INV; + iqentry_mem[n] <= `INV; + iqentry_load[n] <= `INV; + iqentry_store[n] <= `INV; + iqentry_state[n] <= IQS_INVALID; +// iqentry_agen[n] <= `INV; +// iqentry_out[n] <= `INV; +// iqentry_done[n] <= `INV; +// iqentry_cmt[n] <= `INV; + if (dram0_id[`QBITS] == n[`QBITS]) begin + if (dram0==`DRAMSLOT_HASBUS) + wb_nack(); + dram0_load <= `FALSE; + dram0_store <= `FALSE; + dram0_rmw <= `FALSE; + dram0 <= `DRAMSLOT_AVAIL; + end + if (dram1_id[`QBITS] == n[`QBITS]) begin + if (dram1==`DRAMSLOT_HASBUS) + wb_nack(); + dram1_load <= `FALSE; + dram1_store <= `FALSE; + dram1_rmw <= `FALSE; + dram1 <= `DRAMSLOT_AVAIL; + end + if (dram2_id[`QBITS] == n[`QBITS]) begin + if (dram2==`DRAMSLOT_HASBUS) + wb_nack(); + dram2_load <= `FALSE; + dram2_store <= `FALSE; + dram2_rmw <= `FALSE; + dram2 <= `DRAMSLOT_AVAIL; + end + end + +if (last_issue0 < QENTRIES) + tDram0Issue(last_issue0); +if (last_issue1 < QENTRIES) + tDram1Issue(last_issue1); +if (last_issue2 < QENTRIES) + tDram2Issue(last_issue2); + + +//for (n = 0; n < QENTRIES; n = n + 1) +//begin +// if (!iqentry_v[n]) +// iqentry_done[n] <= FALSE; +//end + +if (ohead[0]==heads[0]) + cmt_timer <= cmt_timer + 12'd1; +else + cmt_timer <= 12'd0; + +if (cmt_timer==12'd1000) begin + iqentry_state[heads[0]] <= IQS_CMT; + iqentry_exc[heads[0]] <= `FLT_CMT; + cmt_timer <= 12'd0; +end + +// +// COMMIT PHASE (dequeue only ... not register-file update) +// +// look at heads[0] and heads[1] and let 'em write to the register file if they are ready +// +// always @(posedge clk) begin: commit_phase +ohead[0] <= heads[0]; +ohead[1] <= heads[1]; +ohead[2] <= heads[2]; +ocommit0_v <= commit0_v; +ocommit1_v <= commit1_v; +ocommit2_v <= commit2_v; + +oddball_commit(commit0_v, heads[0], 2'd0); +if (`NUM_CMT > 1) + oddball_commit(commit1_v, heads[1], 2'd1); +if (`NUM_CMT > 2) + oddball_commit(commit2_v, heads[2], 2'd2); + +// Fetch and queue are limited to two instructions per cycle, so we might as +// well limit retiring to two instructions max to conserve logic. +// +if (~|panic) + casez ({ iqentry_v[heads[0]], + iqentry_state[heads[0]] == IQS_CMT, + iqentry_v[heads[1]], + iqentry_state[heads[1]] == IQS_CMT, + iqentry_v[heads[2]], + iqentry_state[heads[2]] == IQS_CMT}) + + // retire 3 + 6'b0?_0?_0?: + if (heads[0] != tail0 && heads[1] != tail0 && heads[2] != tail0) + head_inc(3); + else if (heads[0] != tail0 && heads[1] != tail0) + head_inc(2); + else if (heads[0] != tail0) + head_inc(1); + 6'b0?_0?_10: + if (heads[0] != tail0 && heads[1] != tail0) + head_inc(2); + else if (heads[0] != tail0) + head_inc(1); + 6'b0?_0?_11: + if (`NUM_CMT > 2 || cmt_head2) // and it's not an oddball? + head_inc(3); + else + head_inc(2); + + // retire 1 (wait for regfile for heads[1]) + 6'b0?_10_??: + head_inc(1); + + // retire 2 + 6'b0?_11_0?, + 6'b0?_11_10: + if (`NUM_CMT > 1 || cmt_head1) + head_inc(2); + else + head_inc(1); + 6'b0?_11_11: + if (`NUM_CMT > 2 || (`NUM_CMT > 1 && cmt_head2)) + head_inc(3); + else if (`NUM_CMT > 1 || cmt_head1) + head_inc(2); + else + head_inc(1); + 6'b10_??_??: ; + 6'b11_0?_0?: + if (heads[1] != tail0 && heads[2] != tail0) + head_inc(3); + else if (heads[1] != tail0) + head_inc(2); + else + head_inc(1); + 6'b11_0?_10: + if (heads[1] != tail0) + head_inc(2); + else + head_inc(1); + 6'b11_0?_11: + if (heads[1] != tail0) begin + if (`NUM_CMT > 2 || cmt_head2) + head_inc(3); + else + head_inc(2); + end + else + head_inc(1); + 6'b11_10_??: + head_inc(1); + 6'b11_11_0?: + if (`NUM_CMT > 1 && heads[2] != tail0) + head_inc(3); + else if (cmt_head1 && heads[2] != tail0) + head_inc(3); + else if (`NUM_CMT > 1 || cmt_head1) + head_inc(2); + else + head_inc(1); + 6'b11_11_10: + if (`NUM_CMT > 1 || cmt_head1) + head_inc(2); + else + head_inc(1); + 6'b11_11_11: + if (`NUM_CMT > 2 || (`NUM_CMT > 1 && cmt_head2)) + head_inc(3); + else if (`NUM_CMT > 1 || cmt_head1) + head_inc(2); + else + head_inc(1); + default: + begin + $display("head_inc: Uncoded case %h",{ iqentry_v[heads[0]], + iqentry_state[heads[0]], + iqentry_v[heads[1]], + iqentry_state[heads[1]], + iqentry_v[heads[2]], + iqentry_state[heads[2]]}); + $stop; + end + endcase + + +rf_source[0] <= 0; +L1_wr0 <= FALSE; +L1_wr1 <= FALSE; +L1_wr2 <= FALSE; +L1_invline <= FALSE; +icnxt <= FALSE; +L2_nxt <= FALSE; +// Instruction cache state machine. +// On a miss first see if the instruction is in the L2 cache. No need to go to +// the BIU on an L1 miss. +// If not the machine will wait until the BIU loads the L2 cache. + +// Capture the previous ic state, used to determine how long to wait in +// icstate #4. +picstate <= icstate; +case(icstate) +IDLE: + // If the bus unit is busy doing an update involving L1_adr or L2_adr + // we have to wait. + if (bstate != B_ICacheAck && bstate != B_ICacheNack && bstate != B_ICacheNack2) begin + if (!ihit0) begin + L1_adr <= {pcr[7:0],pc0[AMSB:5],5'h0}; + L2_adr <= {pcr[7:0],pc0[AMSB:5],5'h0}; + L1_invline <= TRUE; + icwhich <= 2'b00; + iccnt <= 3'b00; + icstate <= IC2; + end + else if (!ihit1 && `WAYS > 1) begin + if (thread_en) begin + L1_adr <= {pcr[7:0],pc1[AMSB:5],5'h0}; + L2_adr <= {pcr[7:0],pc1[AMSB:5],5'h0}; + end + else begin + L1_adr <= {pcr[7:0],pc0plus6[AMSB:5],5'h0}; + L2_adr <= {pcr[7:0],pc0plus6[AMSB:5],5'h0}; + end + L1_invline <= TRUE; + icwhich <= 2'b01; + iccnt <= 3'b00; + icstate <= IC2; + end + else if (!ihit2 && `WAYS > 2) begin + if (thread_en) begin + L1_adr <= {pcr[7:0],pc2[AMSB:5],5'h0}; + L2_adr <= {pcr[7:0],pc2[AMSB:5],5'h0}; + end + else begin + L1_adr <= {pcr[7:0],pc0plus12[AMSB:5],5'h0}; + L2_adr <= {pcr[7:0],pc0plus12[AMSB:5],5'h0}; + end + L1_invline <= TRUE; + icwhich <= 2'b10; + iccnt <= 3'b00; + icstate <= IC2; + end + end +IC2: icstate <= IC3; +IC3: icstate <= IC3a; +IC3a: icstate <= IC_WaitL2; +// If data was in the L2 cache already there's no need to wait on the +// BIU to retrieve data. It can be determined if the hit signal was +// already active when this state was entered in which case waiting +// will do no good. +// The IC machine will stall in this state until the BIU has loaded the +// L2 cache. +IC_WaitL2: + if (ihitL2 && picstate==IC3a) begin + L1_en <= 9'h1FF; + L1_wr0 <= TRUE; + L1_wr1 <= TRUE && `WAYS > 1; + L1_wr2 <= TRUE && `WAYS > 2; + L1_adr <= L2_adr; + L2_rdat <= L2_dato; + icstate <= IC5; + end + else if (bstate!=B_ICacheNack) + ; + else begin + L1_en <= 9'h1FF; + L1_wr0 <= TRUE; + L1_wr1 <= TRUE && `WAYS > 1; + L1_wr2 <= TRUE && `WAYS > 2; + L1_adr <= L2_adr; + // L2_rdat set below while loading cache line + //L2_rdat <= L2_dato; + icstate <= IC5; + end +IC5: + begin + L1_en <= 9'h000; + L1_wr0 <= FALSE; + L1_wr1 <= FALSE; + L1_wr2 <= FALSE; + icstate <= IC6; + end +IC6: icstate <= IC7; +IC7: icstate <= IC_Next; +IC_Next: + begin + icstate <= IDLE; + icnxt <= TRUE; + end +default: icstate <= IDLE; +endcase + +if (mem1_available && dram0_load) +case(dram0) +`DRAMSLOT_AVAIL: ; +`DRAMSLOT_BUSY: +// if (iqentry_v[dram0_id[`QBITS]] && !iqentry_stomp[dram0_id[`QBITS]]) + dram0 <= dram0 + !dram0_unc; +// else begin +// dram0 <= `DRAMSLOT_AVAIL; +// dram0_load <= `FALSE; +// end +3'd2: +// if (iqentry_v[dram0_id[`QBITS]] && !iqentry_stomp[dram0_id[`QBITS]]) + dram0 <= dram0 + 3'd1; +// else begin +// dram0 <= `DRAMSLOT_AVAIL; +// dram0_load <= `FALSE; +// end +3'd3: +// if (iqentry_v[dram0_id[`QBITS]] && !iqentry_stomp[dram0_id[`QBITS]]) + dram0 <= dram0 + 3'd1; +// else begin +// dram0 <= `DRAMSLOT_AVAIL; +// dram0_load <= `FALSE; +// end +3'd4: + if (iqentry_v[dram0_id[`QBITS]] && !iqentry_stomp[dram0_id[`QBITS]]) begin + if (dhit0) + dram0 <= `DRAMREQ_READY; + else + dram0 <= `DRAMSLOT_REQBUS; + end + else begin + dram0 <= `DRAMSLOT_AVAIL; + dram0_load <= `FALSE; + end +`DRAMSLOT_REQBUS: ; +`DRAMSLOT_HASBUS: ; +`DRAMREQ_READY: dram0 <= `DRAMSLOT_AVAIL; +endcase + +if (mem2_available && dram1_load && `NUM_MEM > 1) +case(dram1) +`DRAMSLOT_AVAIL: ; +`DRAMSLOT_BUSY: + dram1 <= dram1 + !dram1_unc; +3'd2: + dram1 <= dram1 + 3'd1; +3'd3: + dram1 <= dram1 + 3'd1; +3'd4: + if (iqentry_v[dram1_id[`QBITS]] && !iqentry_stomp[dram1_id[`QBITS]]) begin + if (dhit1) + dram1 <= `DRAMREQ_READY; + else + dram1 <= `DRAMSLOT_REQBUS; + end + else begin + dram1 <= `DRAMSLOT_AVAIL; + dram1_load <= `FALSE; + end +`DRAMSLOT_REQBUS: ; +`DRAMSLOT_HASBUS: ; +`DRAMREQ_READY: dram1 <= `DRAMSLOT_AVAIL; +endcase + +if (mem3_available && dram2_load && `NUM_MEM > 2) +case(dram2) +`DRAMSLOT_AVAIL: ; +`DRAMSLOT_BUSY: + dram2 <= dram2 + !dram2_unc; +3'd2: + dram2 <= dram2 + 3'd1; +3'd3: + dram2 <= dram2 + 3'd1; +3'd4: + if (iqentry_v[dram2_id[`QBITS]] && !iqentry_stomp[dram2_id[`QBITS]]) begin + if (dhit2) + dram2 <= `DRAMREQ_READY; + else + dram2 <= `DRAMSLOT_REQBUS; + end + else begin + dram2 <= `DRAMSLOT_AVAIL; + dram2_load <= `FALSE; + end +`DRAMSLOT_REQBUS: ; +`DRAMSLOT_HASBUS: ; +`DRAMREQ_READY: dram2 <= `DRAMSLOT_AVAIL; +endcase + + +// Bus Interface Unit (BIU) +// Interfaces to the external bus which is WISHBONE compatible. +// Stores take precedence over other operations. +// Next data cache read misses are serviced. +// Uncached data reads are serviced. +// Finally L2 instruction cache misses are serviced.// +// set the IQ entry == DONE as soon as the SW is let loose to the memory system +// +`ifndef HAS_WB +if (mem1_available && dram0 == `DRAMSLOT_BUSY && dram0_store) begin + if ((alu0_v && (dram0_id[`QBITS] == alu0_id[`QBITS])) || (alu1_v && (dram0_id[`QBITS] == alu1_id[`QBITS]))) panic <= `PANIC_MEMORYRACE; +// iqentry_done[ dram0_id[`QBITS] ] <= `VAL; +// iqentry_out[ dram0_id[`QBITS] ] <= `INV; + iqentry_state[ dram0_id[`QBITS] ] <= IQS_DONE; +end +if (mem2_available && `NUM_MEM > 1 && dram1 == `DRAMSLOT_BUSY && dram1_store) begin + if ((alu0_v && (dram1_id[`QBITS] == alu0_id[`QBITS])) || (alu1_v && (dram1_id[`QBITS] == alu1_id[`QBITS]))) panic <= `PANIC_MEMORYRACE; +// iqentry_done[ dram1_id[`QBITS] ] <= `VAL; +// iqentry_out[ dram1_id[`QBITS] ] <= `INV; + iqentry_state[ dram1_id[`QBITS] ] <= IQS_DONE; +end +if (mem3_available && `NUM_MEM > 2 && dram2 == `DRAMSLOT_BUSY && dram2_store) begin + if ((alu0_v && (dram2_id[`QBITS] == alu0_id[`QBITS])) || (alu1_v && (dram2_id[`QBITS] == alu1_id[`QBITS]))) panic <= `PANIC_MEMORYRACE; +// iqentry_done[ dram2_id[`QBITS] ] <= `VAL; +// iqentry_out[ dram2_id[`QBITS] ] <= `INV; + iqentry_state[ dram2_id[`QBITS] ] <= IQS_DONE; +end +`endif + +`ifdef HAS_WB + if (mem1_available && dram0==`DRAMSLOT_BUSY && dram0_store && !iqentry_stomp[dram0_id[`QBITS]]) begin + if (wbptr<`WB_DEPTH-1) begin + dram0 <= `DRAMSLOT_AVAIL; + dram0_instr[`INSTRUCTION_OP] <= `NOP; + wb_update( + dram0_id, + `FALSE, + fnSelect(dram0_instr,dram0_addr), + dram0_ol, + dram0_addr, + fnDato(dram0_instr,dram0_data) + ); +// iqentry_done[ dram0_id[`QBITS] ] <= `VAL; +// iqentry_out[ dram0_id[`QBITS] ] <= `INV; + iqentry_state[ dram0_id[`QBITS] ] <= IQS_DONE; + end + end + else if (mem2_available && dram1==`DRAMSLOT_BUSY && dram1_store && !iqentry_stomp[dram1_id[`QBITS]] && `NUM_MEM > 1) begin + if (wbptr<`WB_DEPTH-1) begin + dram1 <= `DRAMSLOT_AVAIL; + dram1_instr[`INSTRUCTION_OP] <= `NOP; + wb_update( + dram1_id, + `FALSE, + fnSelect(dram1_instr,dram1_addr), + dram1_ol, + dram1_addr, + fnDato(dram1_instr,dram1_data) + ); + iqentry_state[ dram1_id[`QBITS] ] <= IQS_DONE; + end + end + else if (mem3_available && dram2==`DRAMSLOT_BUSY && dram2_store && !iqentry_stomp[dram2_id[`QBITS]] && `NUM_MEM > 2) begin + if (wbptr<`WB_DEPTH-1) begin + dram2 <= `DRAMSLOT_AVAIL; + dram2_instr[`INSTRUCTION_OP] <= `NOP; + wb_update( + dram2_id, + `FALSE, + fnSelect(dram2_instr,dram2_addr), + dram2_ol, + dram2_addr, + fnDato(dram2_instr,dram2_data) + ); + iqentry_state[ dram2_id[`QBITS] ] <= IQS_DONE; + end + end +`endif + +case(bstate) +BIDLE: + begin + isCAS <= FALSE; + isAMO <= FALSE; + isInc <= FALSE; + isSpt <= FALSE; + isRMW <= FALSE; + rdvq <= 1'b0; + errq <= 1'b0; + exvq <= 1'b0; + bwhich <= 2'b00; + preload <= FALSE; +`ifdef HAS_WB + if (wb_v[0] & wb_en & ~acki & ~cyc) begin + cyc <= `HIGH; + stb_o <= `HIGH; + we <= `HIGH; + sel_o <= wb_sel[0]; + vadr <= wb_addr[0]; + dat_o <= wb_data[0]; + ol_o <= wb_ol[0]; + wbo_id <= wb_id[0]; + isStore <= TRUE; + bstate <= wb_rmw[0] ? B12 : B_StoreAck; + wb_v[0] <= `INV; + end + if (wb_v[0]==`INV && !writing_wb) begin + for (j = 1; j < `WB_DEPTH; j = j + 1) begin + wb_v[j-1] <= wb_v[j]; + wb_id[j-1] <= wb_id[j]; + wb_rmw[j-1] <= wb_rmw[j]; + wb_sel[j-1] <= wb_sel[j]; + wb_addr[j-1] <= wb_addr[j]; + wb_data[j-1] <= wb_data[j]; + wb_ol[j-1] <= wb_ol[j]; + if (wbptr > 2'd0) + wbptr <= wbptr - 2'd1; + end + wb_v[`WB_DEPTH-1] <= `INV; + wb_rmw[`WB_DEPTH-1] <= `FALSE; + end + +`endif + if (~|wb_v && mem1_available && dram0==`DRAMSLOT_BUSY && dram0_rmw) begin +`ifdef SUPPORT_DBG + if (dbg_smatch0|dbg_lmatch0) begin + dramA_v <= `TRUE; + dramA_id <= dram0_id; + dramA_exc <= `FLT_DBG; + dramA_bus <= 64'h0; + dram0 <= `DRAMSLOT_AVAIL; + end + else +`endif + if (!acki) begin + isRMW <= dram0_rmw; + isCAS <= IsCAS(dram0_instr); + isAMO <= IsAMO(dram0_instr); + isInc <= IsInc(dram0_instr); + casid <= dram0_id; + bwhich <= 2'b00; + dram0 <= `DRAMSLOT_HASBUS; + cyc <= `HIGH; + stb_o <= `HIGH; + sel_o <= fnSelect(dram0_instr,dram0_addr); + vadr <= dram0_addr; + dat_o <= fnDato(dram0_instr,dram0_data); + ol_o <= dram0_ol; + bstate <= B12; + end + end + else if (~|wb_v && mem2_available && dram1==`DRAMSLOT_BUSY && dram1_rmw && `NUM_MEM > 1) begin +`ifdef SUPPORT_DBG + if (dbg_smatch1|dbg_lmatch1) begin + dramB_v <= `TRUE; + dramB_id <= dram1_id; + dramB_exc <= `FLT_DBG; + dramB_bus <= 64'h0; + dram1 <= `DRAMSLOT_AVAIL; + end + else +`endif + if (!acki) begin + isRMW <= dram1_rmw; + isCAS <= IsCAS(dram1_instr); + isAMO <= IsAMO(dram1_instr); + isInc <= IsInc(dram1_instr); + casid <= dram1_id; + bwhich <= 2'b01; + dram1 <= `DRAMSLOT_HASBUS; + cyc <= `HIGH; + stb_o <= `HIGH; + sel_o <= fnSelect(dram1_instr,dram1_addr); + vadr <= dram1_addr; + dat_o <= fnDato(dram1_instr,dram1_data); + ol_o <= dram1_ol; + bstate <= B12; + end + end + else if (~|wb_v && mem3_available && dram2==`DRAMSLOT_BUSY && dram2_rmw && `NUM_MEM > 2) begin +`ifdef SUPPORT_DBG + if (dbg_smatch2|dbg_lmatch2) begin + dramC_v <= `TRUE; + dramC_id <= dram2_id; + dramC_exc <= `FLT_DBG; + dramC_bus <= 64'h0; + dram2 <= `DRAMSLOT_AVAIL; + end + else +`endif + if (!acki) begin + isRMW <= dram2_rmw; + isCAS <= IsCAS(dram2_instr); + isAMO <= IsAMO(dram2_instr); + isInc <= IsInc(dram2_instr); + casid <= dram2_id; + bwhich <= 2'b10; + dram2 <= `DRAMSLOT_HASBUS; + cyc <= `HIGH; + stb_o <= `HIGH; + sel_o <= fnSelect(dram2_instr,dram2_addr); + vadr <= dram2_addr; + dat_o <= fnDato(dram2_instr,dram2_data); + ol_o <= dram2_ol; + bstate <= B12; + end + end +`ifndef HAS_WB + // Check write buffer enable ? + else if (mem1_available && dram0==`DRAMSLOT_BUSY && dram0_store) begin +`ifdef SUPPORT_DBG + if (dbg_smatch0) begin + dramA_v <= `TRUE; + dramA_id <= dram0_id; + dramA_exc <= `FLT_DBG; + dramA_bus <= 64'h0; + dram0 <= `DRAMSLOT_AVAIL; + end + else +`endif + begin + bwhich <= 2'b00; + if (!acki) begin + dram0 <= `DRAMSLOT_HASBUS; + dram0_instr[`INSTRUCTION_OP] <= `NOP; + cyc <= `HIGH; + stb_o <= `HIGH; + sel_o <= fnSelect(dram0_instr,dram0_addr); + vadr <= dram0_addr; + dat_o <= fnDato(dram0_instr,dram0_data); + ol_o <= dram0_ol; + isStore <= TRUE; + bstate <= B_StoreAck; + end +// cr_o <= IsSWC(dram0_instr); + end + end + else if (mem2_available && dram1==`DRAMSLOT_BUSY && dram1_store && `NUM_MEM > 1) begin +`ifdef SUPPORT_DBG + if (dbg_smatch1) begin + dramB_v <= `TRUE; + dramB_id <= dram1_id; + dramB_exc <= `FLT_DBG; + dramB_bus <= 64'h0; + dram1 <= `DRAMSLOT_AVAIL; + end + else +`endif + begin + bwhich <= 2'b01; + if (!acki) begin + dram1 <= `DRAMSLOT_HASBUS; + dram1_instr[`INSTRUCTION_OP] <= `NOP; + cyc <= `HIGH; + stb_o <= `HIGH; + sel_o <= fnSelect(dram1_instr,dram1_addr); + vadr <= dram1_addr; + dat_o <= fnDato(dram1_instr,dram1_data); + ol_o <= dram1_ol; + isStore <= TRUE; + bstate <= B_StoreAck; + end +// cr_o <= IsSWC(dram0_instr); + end + end + else if (mem3_available && dram2==`DRAMSLOT_BUSY && dram2_store && `NUM_MEM > 2) begin +`ifdef SUPPORT_DBG + if (dbg_smatch2) begin + dramC_v <= `TRUE; + dramC_id <= dram2_id; + dramC_exc <= `FLT_DBG; + dramC_bus <= 64'h0; + dram2 <= `DRAMSLOT_AVAIL; + end + else +`endif + begin + bwhich <= 2'b10; + if (!acki) begin + dram2 <= `DRAMSLOT_HASBUS; + dram2_instr[`INSTRUCTION_OP] <= `NOP; + cyc <= `HIGH; + stb_o <= `HIGH; + sel_o <= fnSelect(dram2_instr,dram2_addr); + vadr <= dram2_addr; + dat_o <= fnDato(dram2_instr,dram2_data); + ol_o <= dram2_ol; + isStore <= TRUE; + bstate <= B_StoreAck; + end +// cr_o <= IsSWC(dram0_instr); + end + end +`endif + // Check for read misses on the data cache + else if (~|wb_v && mem1_available && !dram0_unc && dram0==`DRAMSLOT_REQBUS && dram0_load) begin +`ifdef SUPPORT_DBG + if (dbg_lmatch0) begin + dramA_v <= `TRUE; + dramA_id <= dram0_id; + dramA_exc <= `FLT_DBG; + dramA_bus <= 64'h0; + dram0 <= `DRAMSLOT_AVAIL; + end + else +`endif + begin + dram0 <= `DRAMSLOT_HASBUS; + bwhich <= 2'b00; + preload <= dram0_preload; + bstate <= B_DCacheLoadStart; + end + end + else if (~|wb_v && mem2_available && !dram1_unc && dram1==`DRAMSLOT_REQBUS && dram1_load && `NUM_MEM > 1) begin +`ifdef SUPPORT_DBG + if (dbg_lmatch1) begin + dramB_v <= `TRUE; + dramB_id <= dram1_id; + dramB_exc <= `FLT_DBG; + dramB_bus <= 64'h0; + dram1 <= `DRAMSLOT_AVAIL; + end + else +`endif + begin + dram1 <= `DRAMSLOT_HASBUS; + bwhich <= 2'b01; + preload <= dram1_preload; + bstate <= B_DCacheLoadStart; + end + end + else if (~|wb_v && mem3_available && !dram2_unc && dram2==`DRAMSLOT_REQBUS && dram2_load && `NUM_MEM > 2) begin +`ifdef SUPPORT_DBG + if (dbg_lmatch2) begin + dramC_v <= `TRUE; + dramC_id <= dram2_id; + dramC_exc <= `FLT_DBG; + dramC_bus <= 64'h0; + dram2 <= `DRAMSLOT_AVAIL; + end + else +`endif + begin + dram2 <= `DRAMSLOT_HASBUS; + preload <= dram2_preload; + bwhich <= 2'b10; + bstate <= B_DCacheLoadStart; + end + end + else if (~|wb_v && mem1_available && dram0_unc && dram0==`DRAMSLOT_BUSY && dram0_load) begin +`ifdef SUPPORT_DBG + if (dbg_lmatch0) begin + dramA_v <= `TRUE; + dramA_id <= dram0_id; + dramA_exc <= `FLT_DBG; + dramA_bus <= 64'h0; + dram0 <= `DRAMSLOT_AVAIL; + end + else +`endif + if (!acki) begin + bwhich <= 2'b00; + cyc <= `HIGH; + stb_o <= `HIGH; + sel_o <= fnSelect(dram0_instr,dram0_addr); + vadr <= {dram0_addr[AMSB:3],3'b0}; + sr_o <= IsLWR(dram0_instr); + ol_o <= dram0_ol; + bstate <= B_DLoadAck; + end + end + else if (~|wb_v && mem2_available && dram1_unc && dram1==`DRAMSLOT_BUSY && dram1_load && `NUM_MEM > 1) begin +`ifdef SUPPORT_DBG + if (dbg_lmatch1) begin + dramB_v <= `TRUE; + dramB_id <= dram1_id; + dramB_exc <= `FLT_DBG; + dramB_bus <= 64'h0; + dram1 <= `DRAMSLOT_AVAIL; + end + else +`endif + if (!acki) begin + bwhich <= 2'b01; + cyc <= `HIGH; + stb_o <= `HIGH; + sel_o <= fnSelect(dram1_instr,dram1_addr); + vadr <= {dram1_addr[AMSB:3],3'b0}; + sr_o <= IsLWR(dram1_instr); + ol_o <= dram1_ol; + bstate <= B_DLoadAck; + end + end + else if (~|wb_v && mem3_available && dram2_unc && dram2==`DRAMSLOT_BUSY && dram2_load && `NUM_MEM > 2) begin +`ifdef SUPPORT_DBG + if (dbg_lmatch2) begin + dramC_v <= `TRUE; + dramC_id <= dram2_id; + dramC_exc <= `FLT_DBG; + dramC_bus <= 64'h0; + dram2 <= 2'd0; + end + else +`endif + if (!acki) begin + bwhich <= 2'b10; + cyc <= `HIGH; + stb_o <= `HIGH; + sel_o <= fnSelect(dram2_instr,dram2_addr); + vadr <= {dram2_addr[AMSB:3],3'b0}; + sr_o <= IsLWR(dram2_instr); + ol_o <= dram2_ol; + bstate <= B_DLoadAck; + end + end + // Check for L2 cache miss + else if (~|wb_v && !ihitL2 && !acki) begin + cti_o <= 3'b001; + bte_o <= 2'b00;//2'b01; // 4 beat burst wrap + cyc <= `HIGH; + stb_o <= `HIGH; + sel_o <= 8'hFF; + icl_o <= `HIGH; + iccnt <= 3'd0; +// adr_o <= icwhich ? {pc0[31:5],5'b0} : {pc1[31:5],5'b0}; +// L2_adr <= icwhich ? {pc0[31:5],5'b0} : {pc1[31:5],5'b0}; + vadr <= {pcr[7:0],L1_adr[AMSB:5],5'h0}; + ol_o <= ol[0]; + L2_adr <= {pcr[7:0],L1_adr[AMSB:5],5'h0}; + L2_xsel <= 1'b0; + bstate <= B_ICacheAck; + end + end + +// Terminal state for a store operation. +// Note that if only a single memory channel is selected, bwhich will be a +// constant 0. This should cause the extra code to be removed. +B_StoreAck: + begin + StoreAck1 <= `TRUE; + isStore <= `TRUE; + if (acki|err_i|tlb_miss|wrv_i) begin + wb_nack(); + cr_o <= 1'b0; + // This isn't a good way of doing things; the state should be propagated + // to the commit stage, however since this is a store we know there will + // be no change of program flow. So the reservation status bit is set + // here. The author wanted to avoid the complexity of propagating the + // input signal to the commit stage. It does mean that the SWC + // instruction should be surrounded by SYNC's. + if (cr_o) + sema[0] <= rbi_i; +`ifdef HAS_WB + for (n = 0; n < QENTRIES; n = n + 1) begin + if (wbo_id[n]) begin + iqentry_exc[n] <= tlb_miss ? `FLT_TLB : wrv_i ? `FLT_DWF : err_i ? `FLT_IBE : `FLT_NONE; + if (err_i|wrv_i) begin + wb_v <= 1'b0; // Invalidate write buffer if there is a problem with the store + wb_en <= `FALSE; // and disable write buffer + end + iqentry_state[n] <= IQS_CMT; + iqentry_aq[n] <= `INV; + end + end +`else + case(bwhich) + 2'd0: begin + dram0 <= `DRAMSLOT_AVAIL; + iqentry_exc[dram0_id[`QBITS]] <= (wrv_i|err_i) ? `FLT_DWF : `FLT_NONE; + iqentry_state[dram0_id[`QBITS]] <= IQS_CMT; + iqentry_aq[ dram0_id[`QBITS] ] <= `INV; + //iqentry_out[ dram0_id[`QBITS] ] <= `INV; + end + 2'd1: if (`NUM_MEM > 1) begin + dram1 <= `DRAMSLOT_AVAIL; + iqentry_exc[dram1_id[`QBITS]] <= (wrv_i|err_i) ? `FLT_DWF : `FLT_NONE; + iqentry_state[dram1_id[`QBITS]] <= IQS_CMT; + iqentry_aq[ dram1_id[`QBITS] ] <= `INV; + //iqentry_out[ dram1_id[`QBITS] ] <= `INV; + end + 2'd2: if (`NUM_MEM > 2) begin + dram2 <= `DRAMSLOT_AVAIL; + iqentry_exc[dram2_id[`QBITS]] <= (wrv_i|err_i) ? `FLT_DWF : `FLT_NONE; + iqentry_state[dram2_id[`QBITS]] <= IQS_CMT; + iqentry_aq[ dram2_id[`QBITS] ] <= `INV; + //iqentry_out[ dram2_id[`QBITS] ] <= `INV; + end + default: ; + endcase +`endif + bstate <= B19; + end + end + +B_DCacheLoadStart: + if (~acki & ~cyc) begin // check for idle bus - it should be + dccnt <= 2'd0; + bstate <= B_DCacheLoadAck; + cti_o <= 3'b001; // constant address burst + bte_o <= 2'b00; // linear burst, non-wrapping + cyc <= `HIGH; + stb_o <= `HIGH; + // Select should be selecting all byte lanes for a cache load + sel_o <= 8'hFF; + // bwhich should always be one of the three channels. + case(bwhich) + 2'd0: begin + vadr <= {dram0_addr[AMSB:5],5'b0}; + ol_o <= dram0_ol; + end + 2'd1: if (`NUM_MEM > 1) begin + vadr <= {dram1_addr[AMSB:5],5'b0}; + ol_o <= dram1_ol; + end + 2'd2: if (`NUM_MEM > 2) begin + vadr <= {dram2_addr[AMSB:5],5'b0}; + ol_o <= dram2_ol; + end + default: + begin + $display("Invalid memory channel selection"); + $stop; + wb_nack(); + bstate <= BIDLE; + end + endcase + end + +// Data cache load terminal state +B_DCacheLoadAck: + if (ack_i|err_i|tlb_miss|rdv_i) begin + if (!bok_i) begin + stb_o <= `LOW; + bstate <= B_DCacheLoadStb; + end + errq <= errq | err_i; + rdvq <= rdvq | rdv_i; + if (!preload) // A preload instruction ignores any error + case(bwhich) + 2'd0: if (err_i|rdv_i|tlb_miss) begin + iqentry_exc[dram0_id[`QBITS]] <= tlb_miss ? `FLT_TLB : err_i ? `FLT_DBE : `FLT_DRF; + end + 2'd1: if ((err_i|rdv_i|tlb_miss) && `NUM_MEM > 1) begin + iqentry_exc[dram1_id[`QBITS]] <= tlb_miss ? `FLT_TLB : err_i ? `FLT_DBE : `FLT_DRF; + end + 2'd2: if ((err_i|rdv_i|tlb_miss) && `NUM_MEM > 2) begin + iqentry_exc[dram2_id[`QBITS]] <= tlb_miss ? `FLT_TLB : err_i ? `FLT_DBE : `FLT_DRF; + end + default: ; + endcase + dccnt <= dccnt + 2'd1; + vadr[4:3] <= vadr[4:3] + 2'd1; + bstate <= B_DCacheLoadAck; + if (dccnt==2'd2) + cti_o <= 3'b111; + if (dccnt==2'd3) begin + wb_nack(); + bstate <= B_DCacheLoadWait1; + end + end + +B_DCacheLoadStb: + begin + stb_o <= `HIGH; + bstate <= B_DCacheLoadAck; + end +B_DCacheLoadWait1: bstate <= B_DCacheLoadWait2; +B_DCacheLoadWait2: bstate <= B_DCacheLoadResetBusy; +//B_DCacheLoadWait3: bstate <= B_DCacheLoadResetBusy; +B_DCacheLoadResetBusy: begin + // There could be more than one memory cycle active. We reset the state + // of all the machines to retest for a hit because otherwise sequential + // loading of memory will cause successive machines to miss resulting in + // multiple dcache loads that aren't needed. + if (dram0 != `DRAMSLOT_AVAIL && dram0_addr[AMSB:5]==vadr[AMSB:5]) dram0 <= `DRAMSLOT_BUSY; // causes retest of dhit + if (dram1 != `DRAMSLOT_AVAIL && dram1_addr[AMSB:5]==vadr[AMSB:5]) dram1 <= `DRAMSLOT_BUSY; + if (dram2 != `DRAMSLOT_AVAIL && dram2_addr[AMSB:5]==vadr[AMSB:5]) dram2 <= `DRAMSLOT_BUSY; + if (~ack_i) bstate <= BIDLE; + end + +// Ack state for instruction cache load +B_ICacheAck: + if (ack_i|err_i|tlb_miss|exv_i) begin + if (!bok_i) begin + stb_o <= `LOW; + bstate <= B_ICacheNack2; + end + errq <= errq | err_i; + exvq <= exvq | exv_i; +// L1_en <= 9'h3 << {L2_xsel,L2_adr[4:3],1'b0}; +// L1_wr0 <= `TRUE; +// L1_wr1 <= `TRUE; +// L1_adr <= L2_adr; + if (tlb_miss) begin + L2_rdat <= {18{`INSN_FLT_TLB}}; + wb_nack(); + icl_o <= `LOW; + bstate <= B_ICacheNack; + end + else if (exv_i) begin + L2_rdat <= {18{`INSN_FLT_EXF}}; + wb_nack(); + icl_o <= `LOW; + bstate <= B_ICacheNack; + end + else if (err_i) begin + L2_rdat <= {18{`INSN_FLT_IBE}}; + wb_nack(); + icl_o <= `LOW; + bstate <= B_ICacheNack; + end + else + case(iccnt) + 3'd0: L2_rdat[63:0] <= dat_i; + 3'd1: L2_rdat[127:64] <= dat_i; + 3'd2: L2_rdat[191:128] <= dat_i; + 3'd3: L2_rdat[255:192] <= dat_i; + 3'd4: L2_rdat[297:256] <= {2'b00,dat_i[39:0]}; + default: ; + endcase + //L2_rdat <= {dat_i[31:0],{4{dat_i}}}; + iccnt <= iccnt + 3'd1; + //stb_o <= `LOW; + if (iccnt==3'd3) + cti_o <= 3'b111; + if (iccnt==3'd4) begin + wb_nack(); + icl_o <= `LOW; + bstate <= B_ICacheNack; + end + else begin + L2_adr[4:3] <= L2_adr[4:3] + 2'd1; + if (L2_adr[4:3]==2'b11) + L2_xsel <= 1'b1; + end + end +B_ICacheNack2: + if (~acki) begin + stb_o <= `HIGH; + vadr[AMSB:3] <= vadr[AMSB:3] + 2'd1; + bstate <= B_ICacheAck; + end +B_ICacheNack: + begin + L1_wr0 <= `FALSE; + L1_wr1 <= `FALSE; + L1_wr2 <= `FALSE; + L1_en <= 9'h1FF; + L2_xsel <= 1'b0; + if (~ack_i) begin + icl_ctr <= icl_ctr + 40'd1; + bstate <= BIDLE; + L2_nxt <= TRUE; + end + end +B12: + if (ack_i|err_i|tlb_miss|rdv_i) begin + if (isCAS) begin + iqentry_res [ casid[`QBITS] ] <= (dat_i == cas); + iqentry_exc [ casid[`QBITS] ] <= tlb_miss ? `FLT_TLB : err_i ? `FLT_DRF : rdv_i ? `FLT_DRF : `FLT_NONE; +// iqentry_done[ casid[`QBITS] ] <= `VAL; +// iqentry_out [ casid[`QBITS] ] <= `INV; + iqentry_state [ casid[`QBITS] ] <= IQS_DONE; + iqentry_instr[ casid[`QBITS]] <= `NOP_INSN; + if (err_i | rdv_i) iqentry_ma[casid[`QBITS]] <= vadr; + if (dat_i == cas) begin + stb_o <= `LOW; + we <= `TRUE; + bstate <= B15; + end + else begin + cas <= dat_i; + cyc <= `LOW; + stb_o <= `LOW; + case(bwhich) + 2'b00: dram0 <= `DRAMREQ_READY; + 2'b01: dram1 <= `DRAMREQ_READY; + 2'b10: dram2 <= `DRAMREQ_READY; + default: ; + endcase + bstate <= B19; + end + end + else if (isRMW) begin + rmw_instr <= iqentry_instr[casid[`QBITS]]; + rmw_argA <= dat_i; + if (isSpt) begin + rmw_argB <= 64'd1 << iqentry_a1[casid[`QBITS]][63:58]; + rmw_argC <= iqentry_instr[casid[`QBITS]][5:0]==`R2 ? + iqentry_a3[casid[`QBITS]][64] << iqentry_a1[casid[`QBITS]][63:58] : + iqentry_a2[casid[`QBITS]][64] << iqentry_a1[casid[`QBITS]][63:58]; + end + else if (isInc) begin + rmw_argB <= iqentry_instr[casid[`QBITS]][5:0]==`R2 ? {{59{iqentry_instr[casid[`QBITS]][22]}},iqentry_instr[casid[`QBITS]][22:18]} : + {{59{iqentry_instr[casid[`QBITS]][17]}},iqentry_instr[casid[`QBITS]][17:13]}; + end + else begin // isAMO + iqentry_res [ casid[`QBITS] ] <= dat_i; + rmw_argB <= iqentry_instr[casid[`QBITS]][31] ? {{59{iqentry_instr[casid[`QBITS]][20:16]}},iqentry_instr[casid[`QBITS]][20:16]} : iqentry_a2[casid[`QBITS]]; + end + iqentry_exc [ casid[`QBITS] ] <= tlb_miss ? `FLT_TLB : err_i ? `FLT_DRF : rdv_i ? `FLT_DRF : `FLT_NONE; + stb_o <= `LOW; + bstate <= B20; + end + end + +// Regular load +B_DLoadAck: + if (ack_i|err_i|tlb_miss|rdv_i) begin + wb_nack(); + sr_o <= `LOW; + xdati <= dat_i; + case(bwhich) + 2'b00: begin + dram0 <= `DRAMREQ_READY; + iqentry_exc [ dram0_id[`QBITS] ] <= tlb_miss ? `FLT_TLB : err_i ? `FLT_DRF : rdv_i ? `FLT_DRF : `FLT_NONE; + end + 2'b01: if (`NUM_MEM > 1) begin + dram1 <= `DRAMREQ_READY; + iqentry_exc [ dram1_id[`QBITS] ] <= tlb_miss ? `FLT_TLB : err_i ? `FLT_DRF : rdv_i ? `FLT_DRF : `FLT_NONE; + end + 2'b10: if (`NUM_MEM > 2) begin + dram2 <= `DRAMREQ_READY; + iqentry_exc [ dram2_id[`QBITS] ] <= tlb_miss ? `FLT_TLB : err_i ? `FLT_DRF : rdv_i ? `FLT_DRF : `FLT_NONE; + end + default: ; + endcase + bstate <= B19; + end + +// Three cycles to detemrine if there's a cache hit during a store. +B16: begin + case(bwhich) + 2'd0: if (dhit0) begin dram0 <= `DRAMREQ_READY; bstate <= B17; end + 2'd1: if (dhit1) begin dram1 <= `DRAMREQ_READY; bstate <= B17; end + 2'd2: if (dhit2) begin dram2 <= `DRAMREQ_READY; bstate <= B17; end + default: bstate <= BIDLE; + endcase + end +B17: bstate <= B18; +B18: bstate <= B19; +B19: if (~acki) begin + sel_o <= 8'h00; + bstate <= BIDLE; + StoreAck1 <= `FALSE; + isStore <= `FALSE; + end +B20: + if (~ack_i) begin + stb_o <= `HIGH; + we <= `HIGH; + dat_o <= fnDato(rmw_instr,rmw_res); + bstate <= B_StoreAck; + end +B21: + if (~ack_i) begin + stb_o <= `HIGH; + bstate <= B12; + end +default: bstate <= BIDLE; +endcase + +if (!branchmiss) begin + case({fetchbuf0_v, fetchbuf1_v}) + 2'b00: ; + 2'b01: + if (canq1) begin + tail0 <= (tail0+2'd1) % QENTRIES; + tail1 <= (tail1+2'd1) % QENTRIES; + end + 2'b10: + if (canq1) begin + tail0 <= (tail0+2'd1) % QENTRIES; + tail1 <= (tail1+2'd1) % QENTRIES; + end + 2'b11: + if (canq1) begin + if (IsBranch(fetchbuf0_instr) && predict_taken0 && fetchbuf0_thrd==fetchbuf1_thrd) begin + tail0 <= (tail0+2'd1) % QENTRIES; + tail1 <= (tail1+2'd1) % QENTRIES; + end + else begin + if (vqe0 < vl || !IsVector(fetchbuf0_instr)) begin + if (canq2) begin + tail0 <= (tail0 + 3'd2) % QENTRIES; + tail1 <= (tail1 + 3'd2) % QENTRIES; + end + else begin // queued1 will be true + tail0 <= (tail0+2'd1) % QENTRIES; + tail1 <= (tail1+2'd1) % QENTRIES; + end + end + end + end + endcase +end +else if (!thread_en) begin // if branchmiss + for (n = QENTRIES-1; n >= 0; n = n - 1) + // (QENTRIES-1) is needed to ensure that n increments forwards so that the modulus is + // a positive number. + if (iqentry_stomp[n] & ~iqentry_stomp[(n+(QENTRIES-1))%QENTRIES]) begin + tail0 <= n; + tail1 <= (n + 1) % QENTRIES; + end + // otherwise, it is the last instruction in the queue that has been mispredicted ... do nothing +end + +// #5 rf[0] = 0; rf_v[0] = 1; rf_source[0] = 0; +`ifdef SIM + $display("\n\n\n\n\n\n\n\n"); + $display("TIME %0d", $time); + $display("%h #", pc0); +`ifdef SUPPORT_SMT + $display ("Regfile: %d", rgs[0]); + for (n=0; n < 32; n=n+4) begin + $display("%d: %h %d %o %d: %h %d %o %d: %h %d %o %d: %h %d %o#", + n[4:0]+0, urf1.urf10.mem[{rgs[0],1'b0,n[4:2],2'b00}], regIsValid[n+0], rf_source[n+0], + n[4:0]+1, urf1.urf10.mem[{rgs[0],1'b0,n[4:2],2'b01}], regIsValid[n+1], rf_source[n+1], + n[4:0]+2, urf1.urf10.mem[{rgs[0],1'b0,n[4:2],2'b10}], regIsValid[n+2], rf_source[n+2], + n[4:0]+3, urf1.urf10.mem[{rgs[0],1'b0,n[4:2],2'b11}], regIsValid[n+3], rf_source[n+3] + ); + end + $display ("Regfile: %d", rgs[1]); + for (n=128; n < 160; n=n+4) begin + $display("%d: %h %d %o %d: %h %d %o %d: %h %d %o %d: %h %d %o#", + n[4:0]+0, urf1.urf10.mem[{rgs[1],1'b0,n[4:2],2'b00}], regIsValid[n+0], rf_source[n+0], + n[4:0]+1, urf1.urf10.mem[{rgs[1],1'b0,n[4:2],2'b01}], regIsValid[n+1], rf_source[n+1], + n[4:0]+2, urf1.urf10.mem[{rgs[1],1'b0,n[4:2],2'b10}], regIsValid[n+2], rf_source[n+2], + n[4:0]+3, urf1.urf10.mem[{rgs[1],1'b0,n[4:2],2'b11}], regIsValid[n+3], rf_source[n+3] + ); + end +`else + $display ("Regfile: %d", rgs); + for (n=0; n < 32; n=n+4) begin + $display("%d: %h %d %o %d: %h %d %o %d: %h %d %o %d: %h %d %o#", + n[4:0]+0, gRegfileInst.gb1.urf1.urf10.mem[{rgs,1'b0,n[4:2],2'b00}], regIsValid[n+0], rf_source[n+0], + n[4:0]+1, gRegfileInst.gb1.urf1.urf10.mem[{rgs,1'b0,n[4:2],2'b01}], regIsValid[n+1], rf_source[n+1], + n[4:0]+2, gRegfileInst.gb1.urf1.urf10.mem[{rgs,1'b0,n[4:2],2'b10}], regIsValid[n+2], rf_source[n+2], + n[4:0]+3, gRegfileInst.gb1.urf1.urf10.mem[{rgs,1'b0,n[4:2],2'b11}], regIsValid[n+3], rf_source[n+3] + ); + end +`endif +`ifdef FCU_ENH + $display("Call Stack:"); + for (n = 0; n < 16; n = n + 4) + $display("%c%d: %h %c%d: %h %c%d: %h %c%d: %h", + gFetchbufInst.gb1.ufb1.ursb1.rasp==n+0 ?">" : " ", n[4:0]+0, gFetchbufInst.gb1.ufb1.ursb1.ras[n+0], + gFetchbufInst.gb1.ufb1.ursb1.rasp==n+1 ?">" : " ", n[4:0]+1, gFetchbufInst.gb1.ufb1.ursb1.ras[n+1], + gFetchbufInst.gb1.ufb1.ursb1.rasp==n+2 ?">" : " ", n[4:0]+2, gFetchbufInst.gb1.ufb1.ursb1.ras[n+2], + gFetchbufInst.gb1.ufb1.ursb1.rasp==n+3 ?">" : " ", n[4:0]+3, gFetchbufInst.gb1.ufb1.ursb1.ras[n+3] + ); + $display("\n"); +`endif +// $display("Return address stack:"); +// for (n = 0; n < 16; n = n + 1) +// $display("%d %h", rasp+n[3:0], ras[rasp+n[3:0]]); + $display("TakeBr:%d #", take_branch);//, backpc); + $display("Insn%d: %h", 0, insn0); + if (`WAYS==1) begin + $display("%c%c A: %d %h %h #", + 45, fetchbuf?45:62, fetchbufA_v, fetchbufA_instr, fetchbufA_pc); + $display("%c%c B: %d %h %h #", + 45, fetchbuf?62:45, fetchbufB_v, fetchbufB_instr, fetchbufB_pc); + end + else if (`WAYS > 1) begin + $display("Insn%d: %h", 1, insn1); + $display("%c%c A: %d %h %h #", + 45, fetchbuf?45:62, fetchbufA_v, fetchbufA_instr, fetchbufA_pc); + $display("%c%c B: %d %h %h #", + 45, fetchbuf?45:62, fetchbufB_v, fetchbufB_instr, fetchbufB_pc); + end + else if (`WAYS > 2) begin + $display("%c%c C: %d %h %h #", + 45, fetchbuf?62:45, fetchbufC_v, fetchbufC_instr, fetchbufC_pc); + $display("%c%c D: %d %h %h #", + 45, fetchbuf?62:45, fetchbufD_v, fetchbufD_instr, fetchbufD_pc); + end + for (i=0; i 1) + $display("%d %h %h %c%h %o #", + dram1, dram1_addr, dram1_data, (IsFlowCtrl(dram1_instr) ? 98 : (IsMem(dram1_instr)) ? 109 : 97), + dram1_instr, dram1_id); + if (`NUM_MEM > 2) + $display("%d %h %h %c%h %o #", + dram2, dram2_addr, dram2_data, (IsFlowCtrl(dram2_instr) ? 98 : (IsMem(dram2_instr)) ? 109 : 97), + dram2_instr, dram2_id); + $display("%d %h %o %h #", dramA_v, dramA_bus, dramA_id, dramA_exc); + if (`NUM_MEM > 1) + $display("%d %h %o %h #", dramB_v, dramB_bus, dramB_id, dramB_exc); + if (`NUM_MEM > 2) + $display("%d %h %o %h #", dramC_v, dramC_bus, dramC_id, dramC_exc); + $display("ALU"); + $display("%d %h %h %h %c%h %o %h #", + alu0_dataready, alu0_argI, alu0_argA, alu0_argB, + (IsFlowCtrl(alu0_instr) ? 98 : IsMem(alu0_instr) ? 109 : 97), + alu0_instr, alu0_sourceid, alu0_pc); + $display("%d %h %o 0 #", alu0_v, alu0_bus, alu0_id); + if (`NUM_ALU > 1) begin + $display("%d %h %h %h %c%h %o %h #", + alu1_dataready, alu1_argI, alu1_argA, alu1_argB, + (IsFlowCtrl(alu1_instr) ? 98 : IsMem(alu1_instr) ? 109 : 97), + alu1_instr, alu1_sourceid, alu1_pc); + $display("%d %h %o 0 #", alu1_v, alu1_bus, alu1_id); + end + $display("FCU"); + $display("%d %h %h %h %h %c%c #", fcu_v, fcu_bus, fcu_argI, fcu_argA, fcu_argB, fcu_takb?"T":"-", fcu_pt?"T":"-"); + $display("%c %h %h %h %h #", fcu_branchmiss?"m":" ", fcu_sourceid, fcu_misspc, fcu_nextpc, fcu_brdisp); + $display("Commit"); + $display("0: %c %h %o %d #", commit0_v?"v":" ", commit0_bus, commit0_id, commit0_tgt[4:0]); + $display("1: %c %h %o %d #", commit1_v?"v":" ", commit1_bus, commit1_id, commit1_tgt[4:0]); + $display("instructions committed: %d valid committed: %d ticks: %d ", CC, I, tick); + $display("Write Buffer:"); + for (n = `WB_DEPTH-1; n >= 0; n = n - 1) + $display("%c adr: %h dat: %h", wb_v[n]?" ":"*", wb_addr[n], wb_data[n]); + $display("Write merges: %d", wb_merges); +`endif // SIM + +// +// $display("\n\n\n\n\n\n\n\n"); +// $display("TIME %0d", $time); +// $display(" pc0=%h", pc0); +// $display(" pc1=%h", pc1); +// $display(" reg0=%h, v=%d, src=%o", rf[0], rf_v[0], rf_source[0]); +// $display(" reg1=%h, v=%d, src=%o", rf[1], rf_v[1], rf_source[1]); +// $display(" reg2=%h, v=%d, src=%o", rf[2], rf_v[2], rf_source[2]); +// $display(" reg3=%h, v=%d, src=%o", rf[3], rf_v[3], rf_source[3]); +// $display(" reg4=%h, v=%d, src=%o", rf[4], rf_v[4], rf_source[4]); +// $display(" reg5=%h, v=%d, src=%o", rf[5], rf_v[5], rf_source[5]); +// $display(" reg6=%h, v=%d, src=%o", rf[6], rf_v[6], rf_source[6]); +// $display(" reg7=%h, v=%d, src=%o", rf[7], rf_v[7], rf_source[7]); + +// $display("Fetch Buffers:"); +// $display(" %c%c fbA: v=%d instr=%h pc=%h %c%c fbC: v=%d instr=%h pc=%h", +// fetchbuf?32:45, fetchbuf?32:62, fetchbufA_v, fetchbufA_instr, fetchbufA_pc, +// fetchbuf?45:32, fetchbuf?62:32, fetchbufC_v, fetchbufC_instr, fetchbufC_pc); +// $display(" %c%c fbB: v=%d instr=%h pc=%h %c%c fbD: v=%d instr=%h pc=%h", +// fetchbuf?32:45, fetchbuf?32:62, fetchbufB_v, fetchbufB_instr, fetchbufB_pc, +// fetchbuf?45:32, fetchbuf?62:32, fetchbufD_v, fetchbufD_instr, fetchbufD_pc); +// $display(" branchback=%d backpc=%h", branchback, backpc); + +// $display("Instruction Queue:"); +// for (i=0; i<8; i=i+1) +// $display(" %c%c%d: v=%d done=%d out=%d agen=%d res=%h op=%d bt=%d tgt=%d a1=%h (v=%d/s=%o) a2=%h (v=%d/s=%o) im=%h pc=%h exc=%h", +// (i[`QBITS]==heads[0])?72:32, (i[`QBITS]==tail0)?84:32, i, +// iqentry_v[i], iqentry_done[i], iqentry_out[i], iqentry_agen[i], iqentry_res[i], iqentry_op[i], +// iqentry_bt[i], iqentry_tgt[i], iqentry_a1[i], iqentry_a1_v[i], iqentry_a1_s[i], iqentry_a2[i], iqentry_a2_v[i], +// iqentry_a2_s[i], iqentry_a0[i], iqentry_pc[i], iqentry_exc[i]); + +// $display("Scheduling Status:"); +// $display(" iqentry0 issue=%d islot=%d stomp=%d source=%d - memready=%d memissue=%b", +// iqentry_0_issue, iqentry_0_islot, iqentry_stomp[0], iqentry_source[0], iqentry_memready[0], iqentry_memissue[0]); +// $display(" iqentry1 issue=%d islot=%d stomp=%d source=%d - memready=%d memissue=%b", +// iqentry_1_issue, iqentry_1_islot, iqentry_stomp[1], iqentry_source[1], iqentry_memready[1], iqentry_memissue[1]); +// $display(" iqentry2 issue=%d islot=%d stomp=%d source=%d - memready=%d memissue=%b", +// iqentry_2_issue, iqentry_2_islot, iqentry_stomp[2], iqentry_source[2], iqentry_memready[2], iqentry_memissue[2]); +// $display(" iqentry3 issue=%d islot=%d stomp=%d source=%d - memready=%d memissue=%b", +// iqentry_3_issue, iqentry_3_islot, iqentry_stomp[3], iqentry_source[3], iqentry_memready[3], iqentry_memissue[3]); +// $display(" iqentry4 issue=%d islot=%d stomp=%d source=%d - memready=%d memissue=%b", +// iqentry_4_issue, iqentry_4_islot, iqentry_stomp[4], iqentry_source[4], iqentry_memready[4], iqentry_memissue[4]); +// $display(" iqentry5 issue=%d islot=%d stomp=%d source=%d - memready=%d memissue=%b", +// iqentry_5_issue, iqentry_5_islot, iqentry_stomp[5], iqentry_source[5], iqentry_memready[5], iqentry_memissue[5]); +// $display(" iqentry6 issue=%d islot=%d stomp=%d source=%d - memready=%d memissue=%b", +// iqentry_6_issue, iqentry_6_islot, iqentry_stomp[6], iqentry_source[6], iqentry_memready[6], iqentry_memissue[6]); +// $display(" iqentry7 issue=%d islot=%d stomp=%d source=%d - memready=%d memissue=%b", +// iqentry_7_issue, iqentry_7_islot, iqentry_stomp[7], iqentry_source[7], iqentry_memready[7], iqentry_memissue[7]); + +// $display("ALU Inputs:"); +// $display(" 0: avail=%d data=%d id=%o op=%d a1=%h a2=%h im=%h bt=%d", +// alu0_available, alu0_dataready, alu0_sourceid, alu0_op, alu0_argA, +// alu0_argB, alu0_argI, alu0_bt); +// $display(" 1: avail=%d data=%d id=%o op=%d a1=%h a2=%h im=%h bt=%d", +// alu1_available, alu1_dataready, alu1_sourceid, alu1_op, alu1_argA, +// alu1_argB, alu1_argI, alu1_bt); + +// $display("ALU Outputs:"); +// $display(" 0: v=%d bus=%h id=%o bmiss=%d misspc=%h missid=%o", +// alu0_v, alu0_bus, alu0_id, alu0_branchmiss, alu0_misspc, alu0_sourceid); +// $display(" 1: v=%d bus=%h id=%o bmiss=%d misspc=%h missid=%o", +// alu1_v, alu1_bus, alu1_id, alu1_branchmiss, alu1_misspc, alu1_sourceid); + +// $display("DRAM Status:"); +// $display(" OUT: v=%d data=%h tgt=%d id=%o", dram_v, dram_bus, dram_tgt, dram_id); +// $display(" dram0: status=%h addr=%h data=%h op=%d tgt=%d id=%o", +// dram0, dram0_addr, dram0_data, dram0_op, dram0_tgt, dram0_id); +// $display(" dram1: status=%h addr=%h data=%h op=%d tgt=%d id=%o", +// dram1, dram1_addr, dram1_data, dram1_op, dram1_tgt, dram1_id); +// $display(" dram2: status=%h addr=%h data=%h op=%d tgt=%d id=%o", +// dram2, dram2_addr, dram2_data, dram2_op, dram2_tgt, dram2_id); + +// $display("Commit Buses:"); +// $display(" 0: v=%d id=%o data=%h", commit0_v, commit0_id, commit0_bus); +// $display(" 1: v=%d id=%o data=%h", commit1_v, commit1_id, commit1_bus); + +// +// $display("Memory Contents:"); +// for (j=0; j<64; j=j+16) +// $display(" %h %h %h %h %h %h %h %h %h %h %h %h %h %h %h %h", +// m[j+0], m[j+1], m[j+2], m[j+3], m[j+4], m[j+5], m[j+6], m[j+7], +// m[j+8], m[j+9], m[j+10], m[j+11], m[j+12], m[j+13], m[j+14], m[j+15]); + + $display(""); + + if (|panic) begin + $display(""); + $display("-----------------------------------------------------------------"); + $display("-----------------------------------------------------------------"); + $display("--------------- PANIC:%s -----------------", message[panic]); + $display("-----------------------------------------------------------------"); + $display("-----------------------------------------------------------------"); + $display(""); + $display("instructions committed: %d", I); + $display("total execution cycles: %d", $time / 10); + $display(""); + end + if (|panic && ~outstanding_stores) begin + $finish; + end +/* + for (n = 0; n < QENTRIES; n = n + 1) + if (branchmiss) begin + if (!setpred[n]) begin + iqentry_instr[n][`INSTRUCTION_OP] <= `NOP; + iqentry_done[n] <= iqentry_v[n]; + iqentry_cmt[n] <= iqentry_v[n]; + end + end +*/ + rf_source[ 0] <= {`QBIT{1'b1}}; + rf_source[32] <= {`QBIT{1'b1}}; + rf_source[64] <= {`QBIT{1'b1}}; + rf_source[96] <= {`QBIT{1'b1}}; +`ifdef SUPPORTSMT + rf_source[128] <= {`QBIT{1'b1}}; + rf_source[160] <= {`QBIT{1'b1}}; + rf_source[192] <= {`QBIT{1'b1}}; + rf_source[224] <= {`QBIT{1'b1}}; +`endif + +end // clock domain +/* +always @(posedge clk) +if (rst) begin + tail0 <= 3'd0; + tail1 <= 3'd1; +end +else begin +if (!branchmiss) begin + case({fetchbuf0_v, fetchbuf1_v}) + 2'b00: ; + 2'b01: + if (canq1) begin + tail0 <= idp1(tail0); + tail1 <= idp1(tail1); + end + 2'b10: + if (canq1) begin + tail0 <= idp1(tail0); + tail1 <= idp1(tail1); + end + 2'b11: + if (canq1) begin + if (IsBranch(fetchbuf0_instr) && predict_taken0) begin + tail0 <= idp1(tail0); + tail1 <= idp1(tail1); + end + else begin + if (vqe < vl || !IsVector(fetchbuf0_instr)) begin + if (canq2) begin + tail0 <= idp2(tail0); + tail1 <= idp2(tail1); + end + else begin // queued1 will be true + tail0 <= idp1(tail0); + tail1 <= idp1(tail1); + end + end + end + end + endcase +end +else begin // if branchmiss + if (iqentry_stomp[0] & ~iqentry_stomp[7]) begin + tail0 <= 3'd0; + tail1 <= 3'd1; + end + else if (iqentry_stomp[1] & ~iqentry_stomp[0]) begin + tail0 <= 3'd1; + tail1 <= 3'd2; + end + else if (iqentry_stomp[2] & ~iqentry_stomp[1]) begin + tail0 <= 3'd2; + tail1 <= 3'd3; + end + else if (iqentry_stomp[3] & ~iqentry_stomp[2]) begin + tail0 <= 3'd3; + tail1 <= 3'd4; + end + else if (iqentry_stomp[4] & ~iqentry_stomp[3]) begin + tail0 <= 3'd4; + tail1 <= 3'd5; + end + else if (iqentry_stomp[5] & ~iqentry_stomp[4]) begin + tail0 <= 3'd5; + tail1 <= 3'd6; + end + else if (iqentry_stomp[6] & ~iqentry_stomp[5]) begin + tail0 <= 3'd6; + tail1 <= 3'd7; + end + else if (iqentry_stomp[7] & ~iqentry_stomp[6]) begin + tail0 <= 3'd7; + tail1 <= 3'd0; + end + // otherwise, it is the last instruction in the queue that has been mispredicted ... do nothing +end +end +*/ + +// Update the write buffer. +task wb_update; +input [`QBITS] id; +input rmw; +input [7:0] sel; +input [1:0] ol; +input [`ABITS] addr; +input [63:0] data; +begin + if (wbm && wbptr > 1 && wb_addr[wbptr-1][AMSB:3]==addr[AMSB:3] + && wb_ol[wbptr-1]==ol && wb_rmw[wbptr-1]==rmw && wb_v[wbptr-1]) begin + // The write buffer is always shifted during the bus IDLE state. That means + // the data is out of place by a slot. The slot the data is moved from is + // invalidated. + wb_v[wbptr-2] <= `INV; + wb_v[wbptr-1] <= wb_en; + wb_id[wbptr-1] <= wb_id[wbptr-1] | (16'd1 << id); + wb_rmw[wbptr-1] <= rmw; + wb_ol[wbptr-1] <= ol; + wb_sel[wbptr-1] <= wb_sel[wbptr-1] | sel; + wb_addr[wbptr-1] <= wb_addr[wbptr-1]; + wb_data[wbptr-1] <= wb_data[wbptr-1]; + if (sel[0]) wb_data[wbptr-1][ 7: 0] <= data[ 7: 0]; + if (sel[1]) wb_data[wbptr-1][15: 8] <= data[15: 8]; + if (sel[2]) wb_data[wbptr-1][23:16] <= data[23:16]; + if (sel[3]) wb_data[wbptr-1][31:24] <= data[31:24]; + if (sel[4]) wb_data[wbptr-1][39:32] <= data[39:32]; + if (sel[5]) wb_data[wbptr-1][47:40] <= data[47:40]; + if (sel[6]) wb_data[wbptr-1][55:48] <= data[55:48]; + if (sel[7]) wb_data[wbptr-1][63:56] <= data[63:56]; + wb_merges <= wb_merges + 32'd1; + end + else begin + wb_v[wbptr] <= wb_en; + wb_id[wbptr] <= (16'd1 << id); + wb_rmw[wbptr] <= rmw; + wb_ol[wbptr] <= ol; + wb_sel[wbptr] <= sel; + wb_addr[wbptr] <= {addr[AMSB:3],3'b0}; + wb_data[wbptr] <= data; + wbptr <= wbptr + 2'd1; + end +end +endtask + +// Increment the head pointers +// Also increments the instruction counter +// Used when instructions are committed. +// Also clear any outstanding state bits that foul things up. +// +task head_inc; +input [`QBITS] amt; +begin + for (n = 0; n < QENTRIES; n = n + 1) + heads[n] <= (heads[n] + amt) % QENTRIES; + CC <= CC + amt; + if (amt==3'd3) begin + I = I + iqentry_v[heads[0]] + iqentry_v[heads[1]] + iqentry_v[heads[2]]; + iqentry_state[heads[0]] <= IQS_INVALID; + iqentry_state[heads[1]] <= IQS_INVALID; + iqentry_state[heads[2]] <= IQS_INVALID; + iqentry_mem[heads[0]] <= `FALSE; + iqentry_mem[heads[1]] <= `FALSE; + iqentry_mem[heads[2]] <= `FALSE; + iqentry_iv[heads[0]] <= `INV; + iqentry_iv[heads[1]] <= `INV; + iqentry_iv[heads[2]] <= `INV; + iqentry_alu[heads[0]] <= `FALSE; + iqentry_alu[heads[1]] <= `FALSE; + iqentry_alu[heads[2]] <= `FALSE; + for (n = 0; n < QENTRIES; n = n + 1) + if (iqentry_v[n]) + iqentry_sn[n] <= iqentry_sn[n] - (iqentry_v[heads[2]] ? iqentry_sn[heads[2]] + : iqentry_v[heads[1]] ? iqentry_sn[heads[1]] + : iqentry_v[heads[0]] ? iqentry_sn[heads[0]] + : 4'b0); + end + else if (amt==3'd2) begin + I = I + iqentry_v[heads[0]] + iqentry_v[heads[1]]; + iqentry_state[heads[0]] <= IQS_INVALID; + iqentry_state[heads[1]] <= IQS_INVALID; + iqentry_mem[heads[0]] <= `FALSE; + iqentry_mem[heads[1]] <= `FALSE; + iqentry_iv[heads[0]] <= `INV; + iqentry_iv[heads[1]] <= `INV; + iqentry_alu[heads[0]] <= `FALSE; + iqentry_alu[heads[1]] <= `FALSE; + for (n = 0; n < QENTRIES; n = n + 1) + if (iqentry_v[n]) + iqentry_sn[n] <= iqentry_sn[n] - (iqentry_v[heads[1]] ? iqentry_sn[heads[1]] + : iqentry_v[heads[0]] ? iqentry_sn[heads[0]] + : 4'b0); + end else if (amt==3'd1) begin + I = I + iqentry_v[heads[0]]; + iqentry_state[heads[0]] <= IQS_INVALID; + iqentry_mem[heads[0]] <= `FALSE; + iqentry_iv[heads[0]] <= `INV; + iqentry_alu[heads[0]] <= `FALSE; + for (n = 0; n < QENTRIES; n = n + 1) + if (iqentry_v[n]) + iqentry_sn[n] <= iqentry_sn[n] - (iqentry_v[heads[0]] ? iqentry_sn[heads[0]] + : 4'b0); + end +end +endtask + +task setargs; +input [`QBITS] nn; +input [`QBITSP1] id; +input v; +input [63:0] bus; +begin + if (iqentry_a1_v[nn] == `INV && iqentry_a1_s[nn] == id && iqentry_v[nn] == `VAL && v == `VAL) begin + iqentry_a1[nn] <= bus; + iqentry_a1_v[nn] <= `VAL; + end + if (iqentry_a2_v[nn] == `INV && iqentry_a2_s[nn] == id && iqentry_v[nn] == `VAL && v == `VAL) begin + iqentry_a2[nn] <= bus; + iqentry_a2_v[nn] <= `VAL; + end + if (iqentry_a3_v[nn] == `INV && iqentry_a3_s[nn] == id && iqentry_v[nn] == `VAL && v == `VAL) begin + iqentry_a3[nn] <= bus; + iqentry_a3_v[nn] <= `VAL; + end +end +endtask + +task setinsn1; +input [`QBITS] nn; +input [143:0] bus; +begin + iqentry_iv [nn] <= `VAL; +// iqentry_Rt [nn] <= bus[`IB_RT]; +// iqentry_Rc [nn] <= bus[`IB_RC]; +// iqentry_Ra [nn] <= bus[`IB_RA]; + iqentry_a0 [nn] <= bus[`IB_CONST]; + iqentry_imm [nn] <= bus[`IB_IMM]; +// iqentry_insln[nn] <= bus[`IB_LN]; +`ifndef INLINE_DECODE + if (iqentry_insln[nn] != bus[`IB_LN]) begin + $display("Insn length mismatch."); + $stop; + end +`endif + iqentry_cmp [nn] <= bus[`IB_CMP]; + iqentry_tlb [nn] <= bus[`IB_TLB]; + iqentry_sz [nn] <= bus[`IB_SZ]; + iqentry_jal [nn] <= bus[`IB_JAL]; + iqentry_ret [nn] <= bus[`IB_RET]; + iqentry_irq [nn] <= bus[`IB_IRQ]; + iqentry_brk [nn] <= bus[`IB_BRK]; + iqentry_rti [nn] <= bus[`IB_RTI]; + iqentry_bt [nn] <= bus[`IB_BT]; + iqentry_alu [nn] <= bus[`IB_ALU]; + iqentry_alu0 [nn] <= bus[`IB_ALU0]; + iqentry_fpu [nn] <= bus[`IB_FPU]; + iqentry_fc [nn] <= bus[`IB_FC]; + iqentry_canex[nn] <= bus[`IB_CANEX]; + iqentry_loadv[nn] <= bus[`IB_LOADV]; + iqentry_load [nn] <= bus[`IB_LOAD]; + iqentry_preload[nn]<= bus[`IB_PRELOAD]; + iqentry_store[nn] <= bus[`IB_STORE]; + iqentry_push [nn] <= bus[`IB_PUSH]; + iqentry_oddball[nn] <= bus[`IB_ODDBALL]; + iqentry_memsz[nn] <= bus[`IB_MEMSZ]; + iqentry_mem [nn] <= bus[`IB_MEM]; + iqentry_memndx[nn] <= bus[`IB_MEMNDX]; + iqentry_rmw [nn] <= bus[`IB_RMW]; + iqentry_memdb[nn] <= bus[`IB_MEMDB]; + iqentry_memsb[nn] <= bus[`IB_MEMSB]; + iqentry_shft [nn] <= bus[`IB_SHFT]; // 48 bit shift instructions + iqentry_sei [nn] <= bus[`IB_SEI]; + iqentry_aq [nn] <= bus[`IB_AQ]; + iqentry_rl [nn] <= bus[`IB_RL]; + iqentry_jmp [nn] <= bus[`IB_JMP]; + iqentry_br [nn] <= bus[`IB_BR]; + iqentry_sync [nn] <= bus[`IB_SYNC]; + iqentry_fsync[nn] <= bus[`IB_FSYNC]; + iqentry_rfw [nn] <= bus[`IB_RFW]; +`ifdef SUPPORT_PREDICATION + iqentry_prfw [nn] <= bus[`IB_PRFW]; +`endif + iqentry_we [nn] <= bus[`IB_WE]; +end +endtask + +task setinsn; +input [`QBITS] nn; +input [4:0] id; +input v; +input [143:0] bus; +begin + if (iqentry_iv[nn] == `INV && iqentry_is[nn] == id && iqentry_v[nn] == `VAL && v == `VAL) + setinsn1(nn,bus); +end +endtask + +task a1_vs; +begin + // if there is not an overlapping write to the register file. + if (Ra1s != Rt0s || !fetchbuf0_rfw) begin + iqentry_a1_v [tail1] <= regIsValid[Ra1s]; + iqentry_a1_s [tail1] <= rf_source [Ra1s]; + end + else begin + iqentry_a1_v [tail1] <= `INV; + iqentry_a1_s [tail1] <= { 1'b0, fetchbuf0_mem, tail0 }; + end +end +endtask + +task a2_vs; +begin + // if there is not an overlapping write to the register file. + if (Rb1s != Rt0s || !fetchbuf0_rfw) begin + iqentry_a2_v [tail1] <= regIsValid[Rb1s]; + iqentry_a2_s [tail1] <= rf_source [Rb1s]; + end + else begin + iqentry_a2_v [tail1] <= `INV; + iqentry_a2_s [tail1] <= { 1'b0, fetchbuf0_mem, tail0 }; + end +end +endtask + +task a3_vs; +begin + // if there is not an overlapping write to the register file. + if (Rc1s != Rt0s || !fetchbuf0_rfw) begin + iqentry_a3_v [tail1] <= regIsValid[Rc1s]; + iqentry_a3_s [tail1] <= rf_source [Rc1s]; + end + else begin + iqentry_a3_v [tail1] <= `INV; + iqentry_a3_s [tail1] <= { 1'b0, fetchbuf0_mem, tail0 }; + end +end +endtask + +task enque0x; +begin + if (IsVector(fetchbuf0_instr) && SUP_VECTOR) begin + vqe0 <= vqe0 + 4'd1; + if (IsVCmprss(fetchbuf0_instr)) begin + if (vm[fetchbuf0_instr[25:23]][vqe0]) + vqet0 <= vqet0 + 4'd1; + end + else + vqet0 <= vqet0 + 4'd1; + if (vqe0 >= vl-2) + nop_fetchbuf <= fetchbuf ? 4'b1000 : 4'b0010; + enque0(tail0, fetchbuf0_thrd ? maxsn[1]+4'd1 : maxsn[0]+4'd1, vqe0); + iq_ctr = iq_ctr + 4'd1; + if (fetchbuf0_rfw) begin + rf_source[ Rt0s ] <= { 1'b0, fetchbuf0_mem, tail0 }; // top bit indicates ALU/MEM bus + rf_v[Rt0s] <= `INV; + end + if (canq2) begin + if (vqe0 < vl-2) begin + vqe0 <= vqe0 + 4'd2; + if (IsVCmprss(fetchbuf0_instr)) begin + if (vm[fetchbuf0_instr[25:23]][vqe0+6'd1]) + vqet0 <= vqet0 + 4'd2; + end + else + vqet0 <= vqet0 + 4'd2; + enque0(tail1, fetchbuf0_thrd ? maxsn[1] + 4'd2 : maxsn[0]+4'd2, vqe0 + 6'd1); + iq_ctr = iq_ctr + 4'd2; + if (fetchbuf0_rfw) begin + rf_source[ Rt0s ] <= { 1'b0, fetchbuf0_mem, tail1 }; // top bit indicates ALU/MEM bus + rf_v[Rt0s] <= `INV; + end + end + end + end + else begin + enque0(tail0, fetchbuf0_thrd ? maxsn[1]+4'd1 : maxsn[0]+4'd1, 6'd0); + iq_ctr = iq_ctr + 4'd1; + if (fetchbuf0_rfw) begin + rf_source[ Rt0s ] <= { 1'b0, fetchbuf0_mem, tail0 }; // top bit indicates ALU/MEM bus + rf_v[Rt0s] <= `INV; + end + end +end +endtask + +// Enqueue fetchbuf0 onto the tail of the instruction queue +task enque0; +input [`QBITS] tail; +input [`SNBITS] seqnum; +input [5:0] venno; +begin + iqentry_exc[tail] <= `FLT_NONE; +`ifdef SUPPORT_DBG + if (dbg_imatchA) + iqentry_exc[tail] <= `FLT_DBG; + else if (dbg_ctrl[63]) + iqentry_exc[tail] <= `FLT_SSM; +`endif + iqentry_state[tail] <= IQS_QUEUED; + iqentry_sn [tail] <= seqnum; + iqentry_iv [tail] <= `INV; + iqentry_is [tail] <= tail; + iqentry_thrd [tail] <= fetchbuf0_thrd; + iqentry_res [tail] <= `ZERO; + iqentry_instr[tail] <= IsVLS(fetchbuf0_instr) ? (vm[fnM2(fetchbuf0_instr)] ? fetchbuf0_instr : `NOP_INSN) : fetchbuf0_instr; + iqentry_insln[tail] <= fetchbuf0_insln; + iqentry_fc [tail] <= `INV; + iqentry_mem [tail] <= `INV; + iqentry_alu [tail] <= `INV; + iqentry_fpu [tail] <= `INV; + iqentry_load [tail] <= `INV; + iqentry_pt [tail] <= predict_taken0; +// If the previous instruction was a hardware interrupt and this instruction is a hardware interrupt +// inherit the previous pc. +//if (IsBrk(fetchbuf0_instr) && !fetchbuf0_instr[15] && +// (IsBrk(iqentry_instr[idm1(tail)]) && !iqentry_instr[idm1(tail1)][15] && iqentry_v[idm1(tail)])) +// iqentry_pc [tail] <= iqentry_pc[idm1(tail)]; +//else + iqentry_pc [tail] <= fetchbuf0_pc; + iqentry_rtop [tail] <= IsRtop(fetchbuf0_instr); + iqentry_tgt [tail] <= Rt0; + iqentry_Ra [tail] <= Ra0; + iqentry_Rb [tail] <= Rb0; + iqentry_Rc [tail] <= Rc0; + iqentry_vl [tail] <= vl; + iqentry_ven [tail] <= venno; + iqentry_exc [tail] <= `EXC_NONE; + iqentry_a1 [tail] <= rfoa0; + iqentry_a1_v [tail] <= Source1Valid(fetchbuf0_instr) | regIsValid[Ra0s]; + iqentry_a1_s [tail] <= rf_source[Ra0s]; + iqentry_a2 [tail] <= rfob0; + iqentry_a2_v [tail] <= Source2Valid(fetchbuf0_instr) | regIsValid[Rb0s]; + iqentry_a2_s [tail] <= rf_source[Rb0s]; + iqentry_a3 [tail] <= rfoc0; + iqentry_a3_v [tail] <= Source3Valid(fetchbuf0_instr) | regIsValid[Rc0s]; + iqentry_a3_s [tail] <= rf_source[Rc0s]; +`ifdef INLINE_DECODE +/* This decoding cannot be done here because it'll introduce a 1 cycle delay + id1_Rt <= Rt0[4:0]; + id1_vl <= vl; + id1_ven <= venno; + id1_id <= tail; + id1_pt <= predict_taken0; + id1_thrd <= fetchbuf0_thrd; +*/ + setinsn1(tail,id1_bus); +`endif +end +endtask + +// Enque fetchbuf1. Fetchbuf1 might be the second instruction to queue so some +// of this code checks to see which tail it is being queued on. +task enque1; +input [`QBITS] tail; +input [`SNBITS] seqnum; +input [5:0] venno; +begin + iqentry_exc[tail] <= `FLT_NONE; +`ifdef SUPPORT_DBG + if (dbg_imatchB) + iqentry_exc[tail] <= `FLT_DBG; + else if (dbg_ctrl[63]) + iqentry_exc[tail] <= `FLT_SSM; +`endif + iqentry_state[tail] <= IQS_QUEUED; + iqentry_sn [tail] <= seqnum; + iqentry_iv [tail] <= `INV; + iqentry_is [tail] <= tail; + iqentry_thrd [tail] <= fetchbuf1_thrd; + iqentry_res [tail] <= `ZERO; + iqentry_instr[tail] <= IsVLS(fetchbuf1_instr) ? (vm[fnM2(fetchbuf1_instr)] ? fetchbuf1_instr : `NOP_INSN) : fetchbuf1_instr; + iqentry_insln[tail] <= fetchbuf1_insln; + iqentry_fc [tail] <= `INV; + iqentry_mem [tail] <= `INV; + iqentry_alu [tail] <= `INV; + iqentry_fpu [tail] <= `INV; + iqentry_load [tail] <= `INV; + iqentry_pt [tail] <= predict_taken1; +// If queing 2nd instruction must read from first +if (tail==tail1) begin + // If the previous instruction was a hardware interrupt and this instruction is a hardware interrupt + // inherit the previous pc. +// if (IsBrk(fetchbuf1_instr) && !fetchbuf1_instr[15] && +// IsBrk(fetchbuf0_instr) && !fetchbuf0_instr[15]) +// iqentry_pc [tail] <= fetchbuf0_pc; +// else + iqentry_pc [tail] <= fetchbuf1_pc; +end +else begin + // If the previous instruction was a hardware interrupt and this instruction is a hardware interrupt + // inherit the previous pc. +// if (IsBrk(fetchbuf1_instr) && !fetchbuf1_instr[15] && +// (IsBrk(iqentry_instr[idp7(tail)]) && !iqentry_instr[idm1(tail)][15] && iqentry_v[idm1(tail)])) +// iqentry_pc [tail] <= iqentry_pc[idm1(tail)]; +// else + iqentry_pc [tail] <= fetchbuf1_pc; +end + iqentry_rtop [tail] <= IsRtop(fetchbuf1_instr); + iqentry_tgt [tail] <= Rt1; + iqentry_Ra [tail] <= Ra1; + iqentry_Rb [tail] <= Rb1; + iqentry_Rc [tail] <= Rc1; + iqentry_vl [tail] <= vl; + iqentry_ven [tail] <= venno; + iqentry_exc [tail] <= `EXC_NONE; + iqentry_a1 [tail] <= rfoa1; + iqentry_a1_v [tail] <= Source1Valid(fetchbuf1_instr) | regIsValid[Ra1s]; + iqentry_a1_s [tail] <= rf_source[Ra1s]; + iqentry_a2 [tail] <= rfob1; + iqentry_a2_v [tail] <= Source2Valid(fetchbuf1_instr) | regIsValid[Rb1s]; + iqentry_a2_s [tail] <= rf_source[Rb1s]; + iqentry_a3 [tail] <= rfoc1; + iqentry_a3_v [tail] <= Source3Valid(fetchbuf1_instr) | regIsValid[Rc1s]; + iqentry_a3_s [tail] <= rf_source[Rc1s]; +`ifdef INLINE_DECODE +/* This decoding cannot be done here because it'll introduce a 1 cycle delay + id2_Rt <= Rt1[4:0]; + id2_vl <= vl; + id2_ven <= venno; + id2_id <= tail; + id2_pt <= predict_taken1; + id2_thrd <= fetchbuf1_thrd; +*/ + setinsn1(tail,id2_bus); +`endif +end +endtask + +// This task takes care of commits for things other than the register file. +task oddball_commit; +input v; +input [`QBITS] head; +input [1:0] which; +reg thread; +begin + thread = iqentry_thrd[head]; + if (v) begin + if (|iqentry_exc[head]) begin + excmiss <= TRUE; +`ifdef SUPPORT_SMT + excmisspc <= {tvec[3'd0][AMSB:8],1'b0,ol[thread],5'h00}; + excthrd <= iqentry_thrd[head]; + badaddr[{thread,2'd0}] <= iqentry_ma[head]; + bad_instr[{thread,2'd0}] <= iqentry_instr[head]; + epc0[thread] <= iqentry_pc[head]; + epc1[thread] <= epc0[thread]; + epc2[thread] <= epc1[thread]; + epc3[thread] <= epc2[thread]; + epc4[thread] <= epc3[thread]; + epc5[thread] <= epc4[thread]; + epc6[thread] <= epc5[thread]; + epc7[thread] <= epc6[thread]; + epc8[thread] <= epc7[thread]; + im_stack[thread] <= {im_stack[thread][27:0],im}; + ol_stack[thread] <= {ol_stack[thread][13:0],ol[thread]}; + dl_stack[thread] <= {dl_stack[thread][13:0],dl[thread]}; + pl_stack[thread] <= {pl_stack[thread][55:0],cpl[thread]}; + rs_stack[thread] <= {rs_stack[thread][59:0],`EXC_RGS}; + brs_stack[thread] <= {brs_stack[thread][59:0],`EXC_RGS}; + cause[{thread,2'd0}] <= {8'd0,iqentry_exc[head]}; + mstatus[thread][5:4] <= 2'd0; + mstatus[thread][13:6] <= 8'h00; + mstatus[thread][19:14] <= `EXC_RGS; +`else + excmisspc <= {tvec[3'd0][AMSB:8],1'b0,ol,5'h00}; + excthrd <= 1'b0; + badaddr[{1'b0,2'd0}] <= iqentry_ma[head]; + bad_instr[3'd0] <= iqentry_instr[head]; + epc0 <= iqentry_pc[head]; + epc1 <= epc0; + epc2 <= epc1; + epc3 <= epc2; + epc4 <= epc3; + epc5 <= epc4; + epc6 <= epc5; + epc7 <= epc6; + epc8 <= epc7; + im_stack <= {im_stack[27:0],im}; + ol_stack <= {ol_stack[13:0],ol}; + dl_stack <= {dl_stack[13:0],dl}; + pl_stack <= {pl_stack[55:0],cpl}; + rs_stack <= {rs_stack[59:0],`EXC_RGS}; + brs_stack <= {rs_stack[59:0],`EXC_RGS}; + cause[3'd0] <= {8'd0,iqentry_exc[head]}; + mstatus[5:4] <= 2'd0; + mstatus[13:6] <= 8'h00; + mstatus[19:14] <= `EXC_RGS; +`endif + wb_en <= `TRUE; + sema[0] <= 1'b0; + ve_hold <= {vqet1,10'd0,vqe1,10'd0,vqet0,10'd0,vqe0}; +`ifdef SUPPORT_DBG + dbg_ctrl[62:55] <= {dbg_ctrl[61:55],dbg_ctrl[63]}; + dbg_ctrl[63] <= FALSE; +`endif + end + else + case(iqentry_instr[head][`INSTRUCTION_OP]) +`ifdef SUPPORT_PREDICATION + `CMPI: pregs[{rgs,iqentry_tgt[head][3:0]}] <= which==2'd1 ? cmt1nyb[iqentry_tgt[head][3:0]] : cmt0nyb[iqentry_tgt[head][3:0]];//commit_bus[3:0]; +`endif + + `BRK: + // BRK is treated as a nop unless it's a software interrupt or a + // hardware interrupt at a higher priority than the current priority. + if ((|iqentry_instr[head][25:21]) || iqentry_instr[head][20:17] > im) begin + excmiss <= TRUE; +`ifdef SUPPORT_SMT + excmisspc <= {tvec[3'd0][AMSB:8],1'b0,ol[thread],5'h00}; + excthrd <= iqentry_thrd[head]; + epc0[thread] <= iqentry_pc[head] + {iqentry_instr[head][25:21],1'b0}; + epc1[thread] <= epc0[thread]; + epc2[thread] <= epc1[thread]; + epc3[thread] <= epc2[thread]; + epc4[thread] <= epc3[thread]; + epc5[thread] <= epc4[thread]; + epc6[thread] <= epc5[thread]; + epc7[thread] <= epc6[thread]; + epc8[thread] <= epc7[thread]; + im_stack[thread] <= {im_stack[thread][27:0],im}; + ol_stack[thread] <= {ol_stack[thread][13:0],ol[thread]}; + dl_stack[thread] <= {dl_stack[thread][13:0],dl[thread]}; + pl_stack[thread] <= {pl_stack[thread][55:0],cpl[thread]}; + rs_stack[thread] <= {rs_stack[thread][59:0],`BRK_RGS}; + brs_stack[thread] <= {brs_stack[thread][59:0],`BRK_RGS}; + cause[{thread,2'd0}] <= iqentry_res[head][7:0]; + mstatus[thread][5:4] <= 2'd0; + mstatus[thread][13:6] <= 8'h00; + // For hardware interrupts only, set a new mask level. Setting a + // new mask level will effectively prevent subsequent brks that + // are streaming from an interrupt from being processed. + // Select register set according to interrupt level + if (iqentry_instr[head][25:21]==5'd0) begin + mstatus[thread][ 3: 0] <= iqentry_instr[head][20:17]; + mstatus[thread][31:28] <= iqentry_instr[head][20:17]; + mstatus[thread][19:14] <= {2'b0,iqentry_instr[head][20:17]}; + rs_stack[thread][5:0] <= {2'b0,iqentry_instr[head][20:17]}; + brs_stack[thread][5:0] <= {2'b0,iqentry_instr[head][20:17]}; + end + else begin + mstatus[thread][19:14] <= `BRK_RGS; + rs_stack[thread][5:0] <= `BRK_RGS; + brs_stack[thread][5:0] <= `BRK_RGS; + end +`else + excmisspc <= {tvec[3'd0][AMSB:8],1'b0,ol,5'h00}; + excthrd <= 1'b0; + epc0 <= iqentry_pc[head] + {iqentry_instr[head][25:21],1'b0}; + epc1 <= epc0; + epc2 <= epc1; + epc3 <= epc2; + epc4 <= epc3; + epc5 <= epc4; + epc6 <= epc5; + epc7 <= epc6; + epc8 <= epc7; + im_stack <= {im_stack[27:0],im}; + ol_stack <= {ol_stack[13:0],ol}; + dl_stack <= {dl_stack[13:0],dl}; + pl_stack <= {pl_stack[55:0],cpl}; + rs_stack <= {rs_stack[59:0],`BRK_RGS}; + brs_stack <= {brs_stack[59:0],`BRK_RGS}; + cause[3'd0] <= iqentry_res[head][7:0]; + mstatus[5:4] <= 2'd0; + mstatus[13:6] <= 8'h00; + // For hardware interrupts only, set a new mask level. Setting a + // new mask level will effectively prevent subsequent brks that + // are streaming from an interrupt from being processed. + // Select register set according to interrupt level + if (iqentry_instr[head][25:21]==5'd0) begin + mstatus[ 3: 0] <= iqentry_instr[head][20:17]; + mstatus[31:28] <= iqentry_instr[head][20:17]; + mstatus[19:14] <= {2'b0,iqentry_instr[head][20:17]}; + rs_stack[5:0] <= {2'b0,iqentry_instr[head][20:17]}; + brs_stack[5:0] <= {2'b0,iqentry_instr[head][20:17]}; + end + else begin + mstatus[19:14] <= `BRK_RGS; + rs_stack[5:0] <= `BRK_RGS; + brs_stack[5:0] <= `BRK_RGS; + end +`endif + sema[0] <= 1'b0; + ve_hold <= {vqet1,10'd0,vqe1,10'd0,vqet0,10'd0,vqe0}; +`ifdef SUPPORT_DBG + dbg_ctrl[62:55] <= {dbg_ctrl[61:55],dbg_ctrl[63]}; + dbg_ctrl[63] <= FALSE; +`endif + end + `IVECTOR: + casez(iqentry_tgt[head]) + 8'b00100???: vm[iqentry_tgt[head][2:0]] <= iqentry_res[head]; + 8'b00101111: vl <= iqentry_res[head]; + default: ; + endcase + `R2: + case(iqentry_instr[head][`INSTRUCTION_S2]) +`ifdef SUPPORT_PREDICATION + `CMP: pregs[{rgs,iqentry_tgt[head][3:0]}] <= which==2'd1 ? cmt1nyb[iqentry_tgt[head][3:0]] : cmt0nyb[iqentry_tgt[head][3:0]];//commit_bus[3:0]; +`endif + `R1: case(iqentry_instr[head][20:16]) + `CHAIN_OFF: cr0[18] <= 1'b0; + `CHAIN_ON: cr0[18] <= 1'b1; + //`SETWB: wbrcd[pcr[5:0]] <= 1'b1; + default: ; + endcase + `VMOV: casez(iqentry_tgt[head]) + 12'b1111111_00???: vm[iqentry_tgt[head][2:0]] <= iqentry_res[head]; + 12'b1111111_01111: vl <= iqentry_res[head]; + default: ; + endcase +`ifdef SUPPORT_SMT + `SEI: mstatus[thread][3:0] <= iqentry_res[head][3:0]; // S1 +`else + `SEI: mstatus[3:0] <= iqentry_res[head][3:0]; // S1 +`endif + `RTI: begin + excmiss <= TRUE; +`ifdef SUPPORT_SMT + excmisspc <= epc0[thread]; + excthrd <= thread; + mstatus[thread][3:0] <= im_stack[thread][3:0]; + mstatus[thread][5:4] <= ol_stack[thread][1:0]; + mstatus[thread][21:20] <= dl_stack[thread][1:0]; + mstatus[thread][13:6] <= pl_stack[thread][7:0]; + mstatus[thread][19:14] <= rs_stack[thread][5:0]; + im_stack[thread] <= {4'd15,im_stack[thread][31:4]}; + ol_stack[thread] <= {2'd0,ol_stack[thread][15:2]}; + dl_stack[thread] <= {2'd0,dl_stack[thread][15:2]}; + pl_stack[thread] <= {8'h00,pl_stack[thread][63:8]}; + rs_stack[thread] <= {6'h00,rs_stack[thread][59:6]}; + brs_stack[thread] <= {6'h00,brs_stack[thread][59:6]}; + epc0[thread] <= epc1[thread]; + epc1[thread] <= epc2[thread]; + epc2[thread] <= epc3[thread]; + epc3[thread] <= epc4[thread]; + epc4[thread] <= epc5[thread]; + epc5[thread] <= epc6[thread]; + epc6[thread] <= epc7[thread]; + epc7[thread] <= epc8[thread]; + epc8[thread] <= {tvec[0][AMSB:8], 1'b0, ol[thread], 5'h0}; +`else + excmisspc <= epc0; + excthrd <= thread; + mstatus[3:0] <= im_stack[3:0]; + mstatus[5:4] <= ol_stack[1:0]; + mstatus[21:20] <= dl_stack[1:0]; + mstatus[13:6] <= pl_stack[7:0]; + mstatus[19:14] <= rs_stack[5:0]; + im_stack <= {4'd15,im_stack[31:4]}; + ol_stack <= {2'd0,ol_stack[15:2]}; + dl_stack <= {2'd0,dl_stack[15:2]}; + pl_stack <= {8'h00,pl_stack[63:8]}; + rs_stack <= {6'h00,rs_stack[59:6]}; + brs_stack <= {6'h00,brs_stack[59:6]}; + epc0 <= epc1; + epc1 <= epc2; + epc2 <= epc3; + epc3 <= epc4; + epc4 <= epc5; + epc5 <= epc6; + epc6 <= epc7; + epc7 <= epc8; + epc8 <= {tvec[0][AMSB:8], 1'b0, ol, 5'h0}; +`endif + sema[0] <= 1'b0; + sema[iqentry_res[head][5:0]] <= 1'b0; + vqe0 <= ve_hold[ 5: 0]; + vqet0 <= ve_hold[21:16]; + vqe1 <= ve_hold[37:32]; + vqet1 <= ve_hold[53:48]; +`ifdef SUPPORT_DBG + dbg_ctrl[62:55] <= {FALSE,dbg_ctrl[62:56]}; + dbg_ctrl[63] <= dbg_ctrl[55]; +`endif + end + default: ; + endcase + `MEMNDX: + case(iqentry_instr[head][`INSTRUCTION_S2]) + `CACHEX: + case(iqentry_instr[head][22:18]) + 5'h03: invic <= TRUE; + 5'h10: cr0[30] <= FALSE; + 5'h11: cr0[30] <= TRUE; + default: ; + endcase + default: ; + endcase + `CSRRW: + begin + write_csr(iqentry_instr[head][31:18],iqentry_a1[head],thread); + end + `REX: +`ifdef SUPPORT_SMT + // Can only redirect to a lower level + if (ol[thread] < iqentry_instr[head][14:13]) begin + mstatus[thread][5:4] <= iqentry_instr[head][14:13]; + badaddr[{thread,iqentry_instr[head][14:13]}] <= badaddr[{thread,ol[thread]}]; + bad_instr[{thread,iqentry_instr[head][14:13]}] <= bad_instr[{thread,ol[thread]}]; + cause[{thread,iqentry_instr[head][14:13]}] <= cause[{thread,ol[thread]}]; + mstatus[thread][13:6] <= iqentry_instr[head][25:18] | iqentry_a1[head][7:0]; + end +`else + if (ol < iqentry_instr[head][14:13]) begin + mstatus[5:4] <= iqentry_instr[head][14:13]; + badaddr[{1'b0,iqentry_instr[head][14:13]}] <= badaddr[{1'b0,ol}]; + bad_instr[{1'b0,iqentry_instr[head][14:13]}] <= bad_instr[{1'b0,ol}]; + cause[{1'b0,iqentry_instr[head][14:13]}] <= cause[{1'b0,ol}]; + mstatus[13:6] <= iqentry_instr[head][25:18] | iqentry_a1[head][7:0]; + end +`endif + `CACHE: + case(iqentry_instr[head][17:13]) + 5'h03: invic <= TRUE; + 5'h10: cr0[30] <= FALSE; + 5'h11: cr0[30] <= TRUE; + default: ; + endcase + `FLOAT: + case(iqentry_instr[head][`INSTRUCTION_S2]) + `FRM: begin + fp_rm <= iqentry_res[head][2:0]; + end + `FCX: + begin + fp_sx <= fp_sx & ~iqentry_res[head][5]; + fp_inex <= fp_inex & ~iqentry_res[head][4]; + fp_dbzx <= fp_dbzx & ~(iqentry_res[head][3]|iqentry_res[head][0]); + fp_underx <= fp_underx & ~iqentry_res[head][2]; + fp_overx <= fp_overx & ~iqentry_res[head][1]; + fp_giopx <= fp_giopx & ~iqentry_res[head][0]; + fp_infdivx <= fp_infdivx & ~iqentry_res[head][0]; + fp_zerozerox <= fp_zerozerox & ~iqentry_res[head][0]; + fp_subinfx <= fp_subinfx & ~iqentry_res[head][0]; + fp_infzerox <= fp_infzerox & ~iqentry_res[head][0]; + fp_NaNCmpx <= fp_NaNCmpx & ~iqentry_res[head][0]; + fp_swtx <= 1'b0; + end + `FDX: + begin + fp_inexe <= fp_inexe & ~iqentry_res[head][4]; + fp_dbzxe <= fp_dbzxe & ~iqentry_res[head][3]; + fp_underxe <= fp_underxe & ~iqentry_res[head][2]; + fp_overxe <= fp_overxe & ~iqentry_res[head][1]; + fp_invopxe <= fp_invopxe & ~iqentry_res[head][0]; + end + `FEX: + begin + fp_inexe <= fp_inexe | iqentry_res[head][4]; + fp_dbzxe <= fp_dbzxe | iqentry_res[head][3]; + fp_underxe <= fp_underxe | iqentry_res[head][2]; + fp_overxe <= fp_overxe | iqentry_res[head][1]; + fp_invopxe <= fp_invopxe | iqentry_res[head][0]; + end + default: + begin + // 31 to 29 is rounding mode + // 28 to 24 are exception enables + // 23 is nsfp + // 22 is a fractie + fp_fractie <= iqentry_ares[head][22]; + fp_raz <= iqentry_ares[head][21]; + // 20 is a 0 + fp_neg <= iqentry_ares[head][19]; + fp_pos <= iqentry_ares[head][18]; + fp_zero <= iqentry_ares[head][17]; + fp_inf <= iqentry_ares[head][16]; + // 15 swtx + // 14 + fp_inex <= fp_inex | (fp_inexe & iqentry_ares[head][14]); + fp_dbzx <= fp_dbzx | (fp_dbzxe & iqentry_ares[head][13]); + fp_underx <= fp_underx | (fp_underxe & iqentry_ares[head][12]); + fp_overx <= fp_overx | (fp_overxe & iqentry_ares[head][11]); + //fp_giopx <= fp_giopx | (fp_giopxe & iqentry_res2[head][10]); + //fp_invopx <= fp_invopx | (fp_invopxe & iqentry_res2[head][24]); + // + fp_cvtx <= fp_cvtx | (fp_giopxe & iqentry_ares[head][7]); + fp_sqrtx <= fp_sqrtx | (fp_giopxe & iqentry_ares[head][6]); + fp_NaNCmpx <= fp_NaNCmpx | (fp_giopxe & iqentry_ares[head][5]); + fp_infzerox <= fp_infzerox | (fp_giopxe & iqentry_ares[head][4]); + fp_zerozerox <= fp_zerozerox | (fp_giopxe & iqentry_ares[head][3]); + fp_infdivx <= fp_infdivx | (fp_giopxe & iqentry_ares[head][2]); + fp_subinfx <= fp_subinfx | (fp_giopxe & iqentry_ares[head][1]); + fp_snanx <= fp_snanx | (fp_giopxe & iqentry_ares[head][0]); + + end + endcase + default: ; + endcase + // Once the flow control instruction commits, NOP it out to allow + // pending stores to be issued. + iqentry_instr[head][5:0] <= `NOP; + end +end +endtask + +// CSR access tasks +// This task does not work. Possibly because the always block @* doesn't +// evaluate into the task to see which signals are changing. The following +// code is simply included as an always block above. +task read_csr; +input [11:0] csrno; +output [63:0] dat; +input thread; +begin +`ifdef SUPPORT_SMT + if (csrno[11:10] >= ol[thread]) +`else + if (csrno[11:10] >= ol) +`endif + casez(csrno[9:0]) + `CSR_CR0: dat <= cr0; + `CSR_HARTID: dat <= hartid; + `CSR_TICK: dat <= tick; + `CSR_PCR: dat <= pcr; + `CSR_PCR2: dat <= pcr2; + `CSR_PMR: dat <= pmr; + `CSR_WBRCD: dat <= wbrcd; + `CSR_SEMA: dat <= sema; + `CSR_KEYS: dat <= keys; + `CSR_TCB: dat <= tcb; + `CSR_FSTAT: dat <= {fp_rgs,fp_status}; +`ifdef SUPPORT_DBG + `CSR_DBAD0: dat <= dbg_adr0; + `CSR_DBAD1: dat <= dbg_adr1; + `CSR_DBAD2: dat <= dbg_adr2; + `CSR_DBAD3: dat <= dbg_adr3; + `CSR_DBCTRL: dat <= dbg_ctrl; + `CSR_DBSTAT: dat <= dbg_stat; +`endif + `CSR_CAS: dat <= cas; + `CSR_TVEC: dat <= tvec[csrno[2:0]]; + `CSR_BADADR: dat <= badaddr[{thread,csrno[11:10]}]; + `CSR_BADINSTR: dat <= bad_instr[{thread,csrno[11:10]}]; + `CSR_CAUSE: dat <= {48'd0,cause[{thread,csrno[11:10]}]}; +`ifdef SUPPORT_SMT + `CSR_IM_STACK: dat <= im_stack[thread]; + `CSR_OL_STACK: dat <= {dl_stack[thread],ol_stack[thread]}; + `CSR_PL_STACK: dat <= pl_stack[thread]; + `CSR_RS_STACK: dat <= rs_stack[thread]; + `CSR_STATUS: dat <= mstatus[thread][63:0]; + `CSR_EPC0: dat <= epc0[thread]; + `CSR_EPC1: dat <= epc1[thread]; + `CSR_EPC2: dat <= epc2[thread]; + `CSR_EPC3: dat <= epc3[thread]; + `CSR_EPC4: dat <= epc4[thread]; + `CSR_EPC5: dat <= epc5[thread]; + `CSR_EPC6: dat <= epc6[thread]; + `CSR_EPC7: dat <= epc7[thread]; +`else + `CSR_IM_STACK: dat <= im_stack; + `CSR_OL_STACK: dat <= {dl_stack,ol_stack}; + `CSR_PL_STACK: dat <= pl_stack; + `CSR_RS_STACK: dat <= rs_stack; + `CSR_STATUS: dat <= mstatus[63:0]; + `CSR_EPC0: dat <= epc0; + `CSR_EPC1: dat <= epc1; + `CSR_EPC2: dat <= epc2; + `CSR_EPC3: dat <= epc3; + `CSR_EPC4: dat <= epc4; + `CSR_EPC5: dat <= epc5; + `CSR_EPC6: dat <= epc6; + `CSR_EPC7: dat <= epc7; +`endif + `CSR_CODEBUF: dat <= codebuf[csrno[5:0]]; +`ifdef SUPPORT_BBMS + `CSR_TB: dat <= tb; + `CSR_CBL: dat <= cbl; + `CSR_CBU: dat <= cbu; + `CSR_RO: dat <= ro; + `CSR_DBL: dat <= dbl; + `CSR_DBU: dat <= dbu; + `CSR_SBL: dat <= sbl; + `CSR_SBU: dat <= sbu; + `CSR_ENU: dat <= en; +`endif +`ifdef SUPPORT_PREDICATION + `CSR_PREGS: read_pregs(dat); +`endif + `CSR_Q_CTR: dat <= iq_ctr; + `CSR_BM_CTR: dat <= bm_ctr; + `CSR_ICL_CTR: dat <= icl_ctr; + `CSR_IRQ_CTR: dat <= irq_ctr; + `CSR_TIME: dat <= wc_times; + `CSR_INFO: + case(csrno[3:0]) + 4'd0: dat <= "Finitron"; // manufacturer + 4'd1: dat <= " "; + 4'd2: dat <= "64 bit "; // CPU class + 4'd3: dat <= " "; + 4'd4: dat <= "FT64 "; // Name + 4'd5: dat <= " "; + 4'd6: dat <= 64'd1; // model # + 4'd7: dat <= 64'd1; // serial number + 4'd8: dat <= {32'd16384,32'd16384}; // cache sizes instruction,data + 4'd9: dat <= 64'd0; + default: dat <= 64'd0; + endcase + default: begin + $display("Unsupported CSR:%h",csrno[10:0]); + dat <= 64'hEEEEEEEEEEEEEEEE; + end + endcase + else + dat <= 64'h0; +end +endtask + +task write_csr; +input [13:0] csrno; +input [63:0] dat; +input thread; +begin +`ifdef SUPPORT_SMT + if (csrno[11:10] >= ol[thread]) +`else + if (csrno[11:10] >= ol) +`endif + case(csrno[13:12]) + 2'd1: // CSRRW + casez(csrno[9:0]) + `CSR_CR0: cr0 <= dat; + `CSR_PCR: pcr <= dat[31:0]; + `CSR_PCR2: pcr2 <= dat; + `CSR_PMR: case(`NUM_IDU) + 0,1: pmr[0] <= 1'b1; + 2: + begin + if (dat[1:0]==2'b00) + pmr[1:0] <= 2'b01; + else + pmr[1:0] <= dat[1:0]; + pmr[63:2] <= dat[63:2]; + end + 3: + begin + if (dat[2:0]==3'b000) + pmr[2:0] <= 3'b001; + else + pmr[2:0] <= dat[2:0]; + pmr[63:3] <= dat[63:3]; + end + default: pmr[0] <= 1'b1; + endcase + `CSR_WBRCD: wbrcd <= dat; + `CSR_SEMA: sema <= dat; + `CSR_KEYS: keys <= dat; + `CSR_TCB: tcb <= dat; + `CSR_FSTAT: fpu_csr[37:32] <= dat[37:32]; + `CSR_BADADR: badaddr[{thread,csrno[11:10]}] <= dat; + `CSR_BADINSTR: bad_instr[{thread,csrno[11:10]}] <= dat; + `CSR_CAUSE: cause[{thread,csrno[11:10]}] <= dat[15:0]; +`ifdef SUPPORT_DBG + `CSR_DBAD0: dbg_adr0 <= dat[AMSB:0]; + `CSR_DBAD1: dbg_adr1 <= dat[AMSB:0]; + `CSR_DBAD2: dbg_adr2 <= dat[AMSB:0]; + `CSR_DBAD3: dbg_adr3 <= dat[AMSB:0]; + `CSR_DBCTRL: dbg_ctrl <= dat; +`endif + `CSR_CAS: cas <= dat; + `CSR_TVEC: tvec[csrno[2:0]] <= dat[31:0]; +`ifdef SUPPORT_SMT + `CSR_IM_STACK: im_stack[thread] <= dat[31:0]; + `CSR_OL_STACK: begin + ol_stack[thread] <= dat[15:0]; + dl_stack[thread] <= dat[31:16]; + end + `CSR_PL_STACK: pl_stack[thread] <= dat; + `CSR_RS_STACK: rs_stack[thread] <= dat; + `CSR_STATUS: mstatus[thread][63:0] <= dat; + `CSR_EPC0: epc0[thread] <= dat; + `CSR_EPC1: epc1[thread] <= dat; + `CSR_EPC2: epc2[thread] <= dat; + `CSR_EPC3: epc3[thread] <= dat; + `CSR_EPC4: epc4[thread] <= dat; + `CSR_EPC5: epc5[thread] <= dat; + `CSR_EPC6: epc6[thread] <= dat; + `CSR_EPC7: epc7[thread] <= dat; +`else + `CSR_IM_STACK: im_stack <= dat[31:0]; + `CSR_OL_STACK: begin + ol_stack <= dat[15:0]; + dl_stack <= dat[31:16]; + end + `CSR_PL_STACK: pl_stack <= dat; + `CSR_RS_STACK: rs_stack <= dat; + `CSR_STATUS: mstatus[63:0] <= dat; + `CSR_EPC0: epc0 <= dat; + `CSR_EPC1: epc1 <= dat; + `CSR_EPC2: epc2 <= dat; + `CSR_EPC3: epc3 <= dat; + `CSR_EPC4: epc4 <= dat; + `CSR_EPC5: epc5 <= dat; + `CSR_EPC6: epc6 <= dat; + `CSR_EPC7: epc7 <= dat; +`endif +`ifdef SUPPORT_BBMS + `CSR_TB: prg_base[brgs] <= dat; + `CSR_CBL: cl_barrier[brgs] <= dat; + `CSR_CBU: cu_barrier[brgs] <= dat; + `CSR_RO: ro_barrier[brgs] <= dat; + `CSR_DBL: dl_barrier[brgs] <= dat; + `CSR_DBU: du_barrier[brgs] <= dat; + `CSR_SBL: sl_barrier[brgs] <= dat; + `CSR_SBU: su_barrier[brgs] <= dat; + `CSR_ENU: en_barrier[brgs] <= dat; +`endif +`ifdef SUPPORT_PREDICATION + `CSR_PREGS: write_pregs(dat); +`endif + `CSR_TIME: begin + ld_time <= 6'h3f; + wc_time_dat <= dat; + end + `CSR_CODEBUF: codebuf[csrno[5:0]] <= dat; + default: ; + endcase + 2'd2: // CSRRS + case(csrno[9:0]) + `CSR_CR0: cr0 <= cr0 | dat; + `CSR_PCR: pcr[31:0] <= pcr[31:0] | dat[31:0]; + `CSR_PCR2: pcr2 <= pcr2 | dat; + `CSR_PMR: pmr <= pmr | dat; + `CSR_WBRCD: wbrcd <= wbrcd | dat; +`ifdef SUPPORT_DBG + `CSR_DBCTRL: dbg_ctrl <= dbg_ctrl | dat; +`endif + `CSR_SEMA: sema <= sema | dat; +`ifdef SUPPORT_SMT + `CSR_STATUS: mstatus[thread][63:0] <= mstatus[thread][63:0] | dat; +`else + `CSR_STATUS: mstatus[63:0] <= mstatus[63:0] | dat; +`endif + default: ; + endcase + 2'd3: // CSRRC + case(csrno[9:0]) + `CSR_CR0: cr0 <= cr0 & ~dat; + `CSR_PCR: pcr <= pcr & ~dat; + `CSR_PCR2: pcr2 <= pcr2 & ~dat; + `CSR_PMR: begin + if (dat[1:0]==2'b11) + pmr[1:0] <= 2'b01; + else + pmr[1:0] <= pmr[1:0] & ~dat[1:0]; + pmr[63:2] <= pmr[63:2] & ~dat[63:2]; + end + `CSR_WBRCD: wbrcd <= wbrcd & ~dat; +`ifdef SUPPORT_DBG + `CSR_DBCTRL: dbg_ctrl <= dbg_ctrl & ~dat; +`endif + `CSR_SEMA: sema <= sema & ~dat; +`ifdef SUPPORT_SMT + `CSR_STATUS: mstatus[thread][63:0] <= mstatus[thread][63:0] & ~dat; +`else + `CSR_STATUS: mstatus[63:0] <= mstatus[63:0] & ~dat; +`endif + default: ; + endcase + default: ; + endcase +end +endtask + +task tDram0Issue; +input [`QBITSP1] n; +begin +// dramA_v <= `INV; + dram0 <= `DRAMSLOT_BUSY; + dram0_id <= { 1'b1, n[`QBITS] }; + dram0_instr <= iqentry_instr[n]; + dram0_rmw <= iqentry_rmw[n]; + dram0_preload <= iqentry_preload[n]; + dram0_tgt <= iqentry_tgt[n]; + dram0_data <= iqentry_a2[n]; + dram0_addr <= iqentry_ma[n]; + // if (ol[iqentry_thrd[n]]==`OL_USER) + // dram0_seg <= (iqentry_Ra[n]==5'd30 || iqentry_Ra[n]==5'd31) ? {ss[iqentry_thrd[n]],13'd0} : {ds[iqentry_thrd[n]],13'd0}; + // else + dram0_unc <= iqentry_ma[n][31:20]==12'hFFD || !dce || iqentry_loadv[n]; + dram0_memsize <= iqentry_memsz[n]; + dram0_load <= iqentry_load[n]; + dram0_store <= iqentry_store[n]; +`ifdef SUPPORT_SMT + dram0_ol <= (iqentry_Ra[n][4:0]==5'd31 || iqentry_Ra[n][4:0]==5'd30) ? ol[iqentry_thrd[n]] : dl[iqentry_thrd[n]]; +`else + dram0_ol <= (iqentry_Ra[n][4:0]==5'd31 || iqentry_Ra[n][4:0]==5'd30) ? ol : dl; +`endif + // Once the memory op is issued reset the a1_v flag. + // This will cause the a1 bus to look for new data from memory (a1_s is pointed to a memory bus) + // This is used for the load and compare instructions. + // must reset the a1 source too. + //iqentry_a1_v[n] <= `INV; + iqentry_state[n] <= IQS_MEM; +end +endtask + +task tDram1Issue; +input [`QBITSP1] n; +begin + dramB_v <= `INV; + dram1 <= `DRAMSLOT_BUSY; + dram1_id <= { 1'b1, n[`QBITS] }; + dram1_instr <= iqentry_instr[n]; + dram1_rmw <= iqentry_rmw[n]; + dram1_preload <= iqentry_preload[n]; + dram1_tgt <= iqentry_tgt[n]; + dram1_data <= iqentry_a2[n]; + dram1_addr <= iqentry_ma[n]; + // if (ol[iqentry_thrd[n]]==`OL_USER) + // dram1_seg <= (iqentry_Ra[n]==5'd30 || iqentry_Ra[n]==5'd31) ? {ss[iqentry_thrd[n]],13'd0} : {ds[iqentry_thrd[n]],13'd0}; + // else + dram1_unc <= iqentry_ma[n][31:20]==12'hFFD || !dce || iqentry_loadv[n]; + dram1_memsize <= iqentry_memsz[n]; + dram1_load <= iqentry_load[n]; + dram1_store <= iqentry_store[n]; +`ifdef SUPPORT_SMT + dram1_ol <= (iqentry_Ra[n][4:0]==5'd31 || iqentry_Ra[n][4:0]==5'd30) ? ol[iqentry_thrd[n]] : dl[iqentry_thrd[n]]; +`else + dram1_ol <= (iqentry_Ra[n][4:0]==5'd31 || iqentry_Ra[n][4:0]==5'd30) ? ol : dl; +`endif + //iqentry_a1_v[n] <= `INV; + iqentry_state[n] <= IQS_MEM; +end +endtask + +task tDram2Issue; +input [`QBITSP1] n; +begin + dramC_v <= `INV; + dram2 <= `DRAMSLOT_BUSY; + dram2_id <= { 1'b1, n[`QBITS] }; + dram2_instr <= iqentry_instr[n]; + dram2_rmw <= iqentry_rmw[n]; + dram2_preload <= iqentry_preload[n]; + dram2_tgt <= iqentry_tgt[n]; + dram2_data <= iqentry_a2[n]; + dram2_addr <= iqentry_ma[n]; + // if (ol[iqentry_thrd[n]]==`OL_USER) + // dram2_seg <= (iqentry_Ra[n]==5'd30 || iqentry_Ra[n]==5'd31) ? {ss[iqentry_thrd[n]],13'd0} : {ds[iqentry_thrd[n]],13'd0}; + // else + dram2_unc <= iqentry_ma[n][31:20]==12'hFFD || !dce || iqentry_loadv[n]; + dram2_memsize <= iqentry_memsz[n]; + dram2_load <= iqentry_load[n]; + dram2_store <= iqentry_store[n]; +`ifdef SUPPORT_SMT + dram2_ol <= (iqentry_Ra[n][4:0]==5'd31 || iqentry_Ra[n][4:0]==5'd30) ? ol[iqentry_thrd[n]] : dl[iqentry_thrd[n]]; +`else + dram2_ol <= (iqentry_Ra[n][4:0]==5'd31 || iqentry_Ra[n][4:0]==5'd30) ? ol : dl; +`endif + //iqentry_a1_v[n] <= `INV; + iqentry_state[n] <= IQS_MEM; +end +endtask + +task wb_nack; +begin + cti_o <= 3'b000; + bte_o <= 2'b00; + cyc <= `LOW; + stb_o <= `LOW; + we <= `LOW; + sel_o <= 8'h00; +end +endtask + +endmodule + + +module decoder5 (num, out); +input [4:0] num; +output [31:1] out; +reg [31:1] out; + +always @(num) +case (num) + 5'd0 : out <= 31'b0000000000000000000000000000000; + 5'd1 : out <= 31'b0000000000000000000000000000001; + 5'd2 : out <= 31'b0000000000000000000000000000010; + 5'd3 : out <= 31'b0000000000000000000000000000100; + 5'd4 : out <= 31'b0000000000000000000000000001000; + 5'd5 : out <= 31'b0000000000000000000000000010000; + 5'd6 : out <= 31'b0000000000000000000000000100000; + 5'd7 : out <= 31'b0000000000000000000000001000000; + 5'd8 : out <= 31'b0000000000000000000000010000000; + 5'd9 : out <= 31'b0000000000000000000000100000000; + 5'd10: out <= 31'b0000000000000000000001000000000; + 5'd11: out <= 31'b0000000000000000000010000000000; + 5'd12: out <= 31'b0000000000000000000100000000000; + 5'd13: out <= 31'b0000000000000000001000000000000; + 5'd14: out <= 31'b0000000000000000010000000000000; + 5'd15: out <= 31'b0000000000000000100000000000000; + 5'd16: out <= 31'b0000000000000001000000000000000; + 5'd17: out <= 31'b0000000000000010000000000000000; + 5'd18: out <= 31'b0000000000000100000000000000000; + 5'd19: out <= 31'b0000000000001000000000000000000; + 5'd20: out <= 31'b0000000000010000000000000000000; + 5'd21: out <= 31'b0000000000100000000000000000000; + 5'd22: out <= 31'b0000000001000000000000000000000; + 5'd23: out <= 31'b0000000010000000000000000000000; + 5'd24: out <= 31'b0000000100000000000000000000000; + 5'd25: out <= 31'b0000001000000000000000000000000; + 5'd26: out <= 31'b0000010000000000000000000000000; + 5'd27: out <= 31'b0000100000000000000000000000000; + 5'd28: out <= 31'b0001000000000000000000000000000; + 5'd29: out <= 31'b0010000000000000000000000000000; + 5'd30: out <= 31'b0100000000000000000000000000000; + 5'd31: out <= 31'b1000000000000000000000000000000; +endcase + +endmodule + +module decoder6 (num, out); +input [5:0] num; +output [63:1] out; + +wire [63:0] out1; + +assign out1 = 64'd1 << num; +assign out = out1[63:1]; + +endmodule + +module decoder7 (num, out); +input [6:0] num; +output [127:1] out; + +wire [127:0] out1; + +assign out1 = 128'd1 << num; +assign out = out1[127:1]; + +endmodule + +module decoder8 (num, out); +input [7:0] num; +output [255:1] out; + +wire [255:0] out1; + +assign out1 = 256'd1 << num; +assign out = out1[255:1]; + +endmodule + Index: thor/trunk/FT64v7/rtl/twoway/FT64_BTB.v =================================================================== --- thor/trunk/FT64v7/rtl/twoway/FT64_BTB.v (nonexistent) +++ thor/trunk/FT64v7/rtl/twoway/FT64_BTB.v (revision 60) @@ -0,0 +1,199 @@ +// ============================================================================ +// __ +// \\__/ o\ (C) 2017-2018 Robert Finch, Waterloo +// \ __ / All rights reserved. +// \/_// robfinch@finitron.ca +// || +// +// FT64_BTB.v +// +// This source file is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This source file is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// +// ============================================================================ +// +module FT64_BTB(rst, wclk, + wr0, wadr0, wdat0, valid0, + wr1, wadr1, wdat1, valid1, + wr2, wadr2, wdat2, valid2, + rclk, pcA, btgtA, pcB, btgtB, + pcC, btgtC, pcD, btgtD, pcE, btgtE, pcF, btgtF, + hitA, hitB, hitC, hitD, hitE, hitF, + npcA, npcB, npcC, npcD, npcE, npcF); +parameter AMSB = 63; +parameter RSTPC = 64'hFFFFFFFFFFFC0100; +input rst; +input wclk; +input wr0; +input [AMSB:0] wadr0; +input [AMSB:0] wdat0; +input valid0; +input wr1; +input [AMSB:0] wadr1; +input [AMSB:0] wdat1; +input valid1; +input wr2; +input [AMSB:0] wadr2; +input [AMSB:0] wdat2; +input valid2; +input rclk; +input [AMSB:0] pcA; +output [AMSB:0] btgtA; +input [AMSB:0] pcB; +output [AMSB:0] btgtB; +input [AMSB:0] pcC; +output [AMSB:0] btgtC; +input [AMSB:0] pcD; +output [AMSB:0] btgtD; +input [AMSB:0] pcE; +output [AMSB:0] btgtE; +input [AMSB:0] pcF; +output [AMSB:0] btgtF; +output hitA; +output hitB; +output hitC; +output hitD; +output hitE; +output hitF; +input [AMSB:0] npcA; +input [AMSB:0] npcB; +input [AMSB:0] npcC; +input [AMSB:0] npcD; +input [AMSB:0] npcE; +input [AMSB:0] npcF; + +integer n; +reg [AMSB:0] pcs [0:31]; +reg [AMSB:0] wdats [0:31]; +reg [AMSB:0] wdat; +reg [4:0] pcstail,pcshead; +reg [AMSB:0] pc; +reg takb; +reg wrhist; + +reg [(AMSB+1)*2+1:0] mem [0:1023]; +reg [9:0] radrA, radrB, radrC, radrD, radrE, radrF; +initial begin + for (n = 0; n < 1024; n = n + 1) + mem[n] <= RSTPC; +end + +always @(posedge wclk) +if (rst) + pcstail <= 5'd0; +else begin + case({wr0,wr1,wr2}) + 3'b000: ; + 3'b001: + begin + pcs[pcstail] <= {wadr2[31:1],valid2}; + wdats[pcstail] <= wdat2; + pcstail <= pcstail + 5'd1; + end + 3'b010: + begin + pcs[pcstail] <= {wadr1[31:1],valid1}; + wdats[pcstail] <= wdat1; + pcstail <= pcstail + 5'd1; + end + 3'b011: + begin + pcs[pcstail] <= {wadr1[31:1],valid1}; + pcs[pcstail+1] <= {wadr2[31:1],valid2}; + wdats[pcstail] <= wdat1; + wdats[pcstail+1] <= wdat2; + pcstail <= pcstail + 5'd2; + end + 3'b100: + begin + pcs[pcstail] <= {wadr0[31:1],valid0}; + wdats[pcstail] <= wdat0; + pcstail <= pcstail + 5'd1; + end + 3'b101: + begin + pcs[pcstail] <= {wadr0[31:1],valid0}; + pcs[pcstail+1] <= {wadr2[31:1],valid2}; + wdats[pcstail] <= wdat0; + wdats[pcstail+1] <= wdat2; + pcstail <= pcstail + 5'd2; + end + 3'b110: + begin + pcs[pcstail] <= {wadr0[31:1],valid0}; + pcs[pcstail+1] <= {wadr1[31:1],valid1}; + wdats[pcstail] <= wdat0; + wdats[pcstail+1] <= wdat1; + pcstail <= pcstail + 5'd2; + end + 3'b111: + begin + pcs[pcstail] <= {wadr0[31:1],valid0}; + pcs[pcstail+1] <= {wadr1[31:1],valid1}; + pcs[pcstail+2] <= {wadr2[31:1],valid2}; + wdats[pcstail] <= wdat0; + wdats[pcstail+1] <= wdat1; + wdats[pcstail+2] <= wdat2; + pcstail <= pcstail + 5'd3; + end + endcase +end + +always @(posedge wclk) +if (rst) + pcshead <= 5'd0; +else begin + wrhist <= 1'b0; + if (pcshead != pcstail) begin + pc <= pcs[pcshead]; + takb <= pcs[pcshead][0]; + wdat <= wdats[pcshead]; + wrhist <= 1'b1; + pcshead <= pcshead + 5'd1; + end +end + + +always @(posedge wclk) +begin + if (wrhist) #1 mem[pc[9:0]][AMSB:0] <= wdat; + if (wrhist) #1 mem[pc[9:0]][(AMSB+1)*2:AMSB+1] <= pc; + if (wrhist) #1 mem[pc[9:0]][(AMSB+1)*2+1] <= takb; +end + +always @(posedge rclk) + #1 radrA <= pcA[11:2]; +always @(posedge rclk) + #1 radrB <= pcB[11:2]; +always @(posedge rclk) + #1 radrC <= pcC[11:2]; +always @(posedge rclk) + #1 radrD <= pcD[11:2]; +always @(posedge rclk) + #1 radrE <= pcE[11:2]; +always @(posedge rclk) + #1 radrF <= pcF[11:2]; +assign hitA = mem[radrA][(AMSB+1)*2:AMSB+1]==pcA && mem[radrA][(AMSB+1)*2+1]; +assign hitB = mem[radrB][(AMSB+1)*2:AMSB+1]==pcB && mem[radrB][(AMSB+1)*2+1]; +assign hitC = mem[radrC][(AMSB+1)*2:AMSB+1]==pcC && mem[radrC][(AMSB+1)*2+1]; +assign hitD = mem[radrD][(AMSB+1)*2:AMSB+1]==pcD && mem[radrD][(AMSB+1)*2+1]; +assign hitE = mem[radrE][(AMSB+1)*2:AMSB+1]==pcE && mem[radrE][(AMSB+1)*2+1]; +assign hitF = mem[radrF][(AMSB+1)*2:AMSB+1]==pcF && mem[radrF][(AMSB+1)*2+1]; +assign btgtA = hitA ? mem[radrA][AMSB:0] : npcA; +assign btgtB = hitB ? mem[radrB][AMSB:0] : npcB; +assign btgtC = hitC ? mem[radrC][AMSB:0] : npcC; +assign btgtD = hitD ? mem[radrD][AMSB:0] : npcD; +assign btgtE = hitE ? mem[radrE][AMSB:0] : npcE; +assign btgtF = hitF ? mem[radrF][AMSB:0] : npcF; + +endmodule Index: thor/trunk/FT64v7/rtl/twoway/FT64_BranchPredictor.v =================================================================== --- thor/trunk/FT64v7/rtl/twoway/FT64_BranchPredictor.v (nonexistent) +++ thor/trunk/FT64v7/rtl/twoway/FT64_BranchPredictor.v (revision 60) @@ -0,0 +1,193 @@ +//============================================================================= +// __ +// \\__/ o\ (C) 2013-2018 Robert Finch, Waterloo +// \ __ / All rights reserved. +// \/_// robfinch@finitron.ca +// || +// +// FT64_BranchPredictor.v +// +// +// This source file is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This source file is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// +// +//============================================================================= +// +module FT64_BranchPredictor(rst, clk, en, + xisBranch0, xisBranch1, xisBranch2, + pcA, pcB, pcC, pcD, pcE, pcF, xpc0, xpc1, xpc2, takb0, takb1, takb2, + predict_takenA, predict_takenB, predict_takenC, predict_takenD, + predict_takenE, predict_takenF); +parameter AMSB=63; +parameter DBW=32; +input rst; +input clk; +input en; +input xisBranch0; +input xisBranch1; +input xisBranch2; +input [AMSB:0] pcA; +input [AMSB:0] pcB; +input [AMSB:0] pcC; +input [AMSB:0] pcD; +input [AMSB:0] pcE; +input [AMSB:0] pcF; +input [AMSB:0] xpc0; +input [AMSB:0] xpc1; +input [AMSB:0] xpc2; +input takb0; +input takb1; +input takb2; +output predict_takenA; +output predict_takenB; +output predict_takenC; +output predict_takenD; +output predict_takenE; +output predict_takenF; + +integer n; +reg [AMSB:0] pcs [0:31]; +reg [AMSB:0] pc; +reg takb; +reg [4:0] pcshead,pcstail; +reg wrhist; +reg [2:0] gbl_branch_hist; +reg [1:0] branch_history_table [511:0]; +// For simulation only, initialize the history table to zeros. +// In the real world we don't care. +initial begin + gbl_branch_hist = 3'b000; + for (n = 0; n < 512; n = n + 1) + branch_history_table[n] = 3; +end +wire [8:0] bht_wa = {pc[7:1],gbl_branch_hist[2:1]}; // write address +wire [8:0] bht_raA = {pcA[7:1],gbl_branch_hist[2:1]}; // read address (IF stage) +wire [8:0] bht_raB = {pcB[7:1],gbl_branch_hist[2:1]}; // read address (IF stage) +wire [8:0] bht_raC = {pcC[7:1],gbl_branch_hist[2:1]}; // read address (IF stage) +wire [8:0] bht_raD = {pcD[7:1],gbl_branch_hist[2:1]}; // read address (IF stage) +wire [8:0] bht_raE = {pcE[7:1],gbl_branch_hist[2:1]}; // read address (IF stage) +wire [8:0] bht_raF = {pcF[7:1],gbl_branch_hist[2:1]}; // read address (IF stage) +wire [1:0] bht_xbits = branch_history_table[bht_wa]; +wire [1:0] bht_ibitsA = branch_history_table[bht_raA]; +wire [1:0] bht_ibitsB = branch_history_table[bht_raB]; +wire [1:0] bht_ibitsC = branch_history_table[bht_raC]; +wire [1:0] bht_ibitsD = branch_history_table[bht_raD]; +wire [1:0] bht_ibitsE = branch_history_table[bht_raE]; +wire [1:0] bht_ibitsF = branch_history_table[bht_raF]; +assign predict_takenA = (bht_ibitsA==2'd0 || bht_ibitsA==2'd1) && en; +assign predict_takenB = (bht_ibitsB==2'd0 || bht_ibitsB==2'd1) && en; +assign predict_takenC = (bht_ibitsC==2'd0 || bht_ibitsC==2'd1) && en; +assign predict_takenD = (bht_ibitsD==2'd0 || bht_ibitsD==2'd1) && en; +assign predict_takenE = (bht_ibitsE==2'd0 || bht_ibitsE==2'd1) && en; +assign predict_takenF = (bht_ibitsF==2'd0 || bht_ibitsF==2'd1) && en; + +always @(posedge clk) +if (rst) + pcstail <= 5'd0; +else begin + case({xisBranch0,xisBranch1,xisBranch2}) + 3'b000: ; + 3'b001: + begin + pcs[pcstail] <= {xpc2[31:1],takb2}; + pcstail <= pcstail + 5'd1; + end + 3'b010: + begin + pcs[pcstail] <= {xpc1[31:1],takb1}; + pcstail <= pcstail + 5'd1; + end + 3'b011: + begin + pcs[pcstail] <= {xpc1[31:1],takb1}; + pcs[pcstail+1] <= {xpc2[31:1],takb2}; + pcstail <= pcstail + 5'd2; + end + 3'b100: + begin + pcs[pcstail] <= {xpc0[31:1],takb0}; + pcstail <= pcstail + 5'd1; + end + 3'b101: + begin + pcs[pcstail] <= {xpc0[31:1],takb0}; + pcs[pcstail+1] <= {xpc2[31:1],takb2}; + pcstail <= pcstail + 5'd2; + end + 3'b110: + begin + pcs[pcstail] <= {xpc0[31:1],takb0}; + pcs[pcstail+1] <= {xpc1[31:1],takb1}; + pcstail <= pcstail + 5'd2; + end + 3'b111: + begin + pcs[pcstail] <= {xpc0[31:1],takb0}; + pcs[pcstail+1] <= {xpc1[31:1],takb1}; + pcs[pcstail+2] <= {xpc2[31:1],takb2}; + pcstail <= pcstail + 5'd3; + end + endcase +end + +always @(posedge clk) +if (rst) + pcshead <= 5'd0; +else begin + wrhist <= 1'b0; + if (pcshead != pcstail) begin + pc <= pcs[pcshead]; + takb <= pcs[pcshead][0]; + wrhist <= 1'b1; + pcshead <= pcshead + 5'd1; + end +end + +// Two bit saturating counter +// If taking a branch in commit0 then a following branch +// in commit1 is never encountered. So only update for +// commit1 if commit0 is not taken. +reg [1:0] xbits_new; +always @* +if (wrhist) begin + if (takb) begin + if (bht_xbits != 2'd1) + xbits_new <= bht_xbits + 2'd1; + else + xbits_new <= bht_xbits; + end + else begin + if (bht_xbits != 2'd2) + xbits_new <= bht_xbits - 2'd1; + else + xbits_new <= bht_xbits; + end +end +else + xbits_new <= bht_xbits; + +always @(posedge clk) +if (rst) + gbl_branch_hist <= 3'b000; +else begin + if (en) begin + if (wrhist) begin + gbl_branch_hist <= {gbl_branch_hist[1:0],takb}; + branch_history_table[bht_wa] <= xbits_new; + end + end +end + +endmodule + Index: thor/trunk/FT64v7/rtl/twoway/FT64_TLB.v =================================================================== --- thor/trunk/FT64v7/rtl/twoway/FT64_TLB.v (nonexistent) +++ thor/trunk/FT64v7/rtl/twoway/FT64_TLB.v (revision 60) @@ -0,0 +1,622 @@ +`include "FT64_defines.vh" +`include "FT64_config.vh" +//============================================================================= +// __ +// \\__/ o\ (C) 2011-2018 Robert Finch, Waterloo +// \ __ / All rights reserved. +// \/_// robfinch@finitron.ca +// || +// +// FT64_TLB.v +// +// +// This source file is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This source file is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// +// +// TLB +// The TLB contains 256 entries, that are 16 way set associative. +// The TLB is shared between the instruction and data streams. +// +//============================================================================= +// +`define TLBMissPage {DBW-13{1'b1}} + +module FT64_TLB(rst, clk, ld, done, idle, ol, + ASID, op, regno, dati, dato, + uncached, + icl_i, cyc_i, we_i, vadr_i, cyc_o, we_o, padr_o, + wrv_o, rdv_o, exv_o, + TLBMiss, HTLBVirtPageo); +parameter DBW=64; +parameter ABW=32; +parameter ENTRIES=256; +parameter IDLE = 4'd0; +parameter ONE = 4'd1; +parameter TWO = 4'd2; +parameter READ = 4'd1; +parameter INC1 = 4'd2; +parameter INC2 = 4'd3; +parameter INC3 = 4'd4; +parameter AGE1 = 4'd5; +parameter AGE2 = 4'd6; +input rst; +input clk; +input ld; +output done; +output idle; +input [1:0] ol; // operating level +input [ABW-1:0] vadr_i; +output reg [ABW-1:0] padr_o; +output uncached; + +input icl_i; +input cyc_i; +input we_i; +output reg cyc_o; +output reg we_o; +output reg exv_o; +output reg wrv_o; +output reg rdv_o; +input [7:0] ASID; +input [3:0] op; +input [3:0] regno; +input [DBW-1:0] dati; +output reg [DBW-1:0] dato; +output TLBMiss; +output [DBW-1:0] HTLBVirtPageo; + +integer n; + +reg [3:0] state; +assign done = state==(IDLE && !ld) || state==TWO; +assign idle = state==IDLE && !ld; + +// Holding registers +// These allow the TLB to updated in a single cycle as a unit +reg [DBW-1:0] HTLBVirtPage; +assign HTLBVirtPageo = {HTLBVirtPage,13'b0}; +reg [DBW-1:0] HTLBPhysPage; +reg [7:0] HTLBASID; +reg HTLBG; +reg HTLBD; +reg HTLBR, HTLBW, HTLBX, HTLBA, HTLBU, HTLBS; +reg [2:0] HTLBC; +reg [7:0] HTLBPL; +reg [2:0] HTLBPageSize; +reg HTLBValid; +reg [ABW-1:0] miss_addr; + +reg TLBenabled; +reg [7:0] i; +reg [DBW-1:0] Index; +reg [3:0] Random; +reg [3:0] Wired; +reg [2:0] PageSize; +reg [15:0] Match; + +reg [4:0] q; +wire doddpage; +reg [DBW-1:0] TLBVirtPage [ENTRIES-1:0]; +reg [ENTRIES-1:0] TLBG; +reg [ENTRIES-1:0] TLBD; +reg [ENTRIES-1:0] TLBU; +reg [ENTRIES-1:0] TLBS; +reg [ENTRIES-1:0] TLBA; +reg [2:0] TLBC [ENTRIES-1:0]; +reg [7:0] TLBASID [ENTRIES-1:0]; +reg [7:0] TLBPL [ENTRIES-1:0]; +reg [2:0] TLBPageSize [255:0]; +reg [ENTRIES-1:0] TLBValid; +reg [DBW-1:0] imiss_addr; +reg [DBW-1:0] dmiss_addr; +reg [DBW-1:0] PageTblAddr; +reg [DBW-1:0] PageTblCtrl; + +reg [23:0] age_lmt; +reg [23:0] age_ctr; +wire age_tick = age_ctr < 24'd5; +reg cyc_en, age_en; +reg [3:0] ar_state; +reg ar_wr; +reg [7:0] age_adr, ar_adr; +reg [32:0] count; +reg [31:0] ar_dati; +wire [31:0] ar_dato; +reg [31:0] ar_cdato; +reg getset_age; +reg doLoad; + +/* +initial begin + for (n = 0; n < ENTRIES; n = n + 1) + begin + TLBVirtPage[n] = 0; + TLBG[n] = 0; + TLBASID[n] = 0; + TLBD[n] = 0; + TLBC[n] = 0; + TLBA[n] = 0; + TLBR[n] = 0; + TLBW[n] = 0; + TLBX[n] = 0; + TLBS[n] = 0; + TLBU[n] = 0; + TLBValid[n] = 0; + end +end +*/ + +// Assume the instruction doesn't overlap between a mapped and unmapped area. +wire unmappedArea = vadr_i[ABW-1:ABW-8]==8'hFF || !TLBenabled; +wire m1UnmappedArea = padr_o[ABW-1:ABW-8]==8'hFF || !TLBenabled; +wire hitIOPage = vadr_i[ABW-1:ABW-12]==12'hFFD; + +always @(posedge clk) + PageSize <= TLBPageSize[ASID]; + +wire [ABW-1:0] vadrs = vadr_i[ABW-1:13] >> {PageSize,1'b0}; +wire [DBW-1:0] TLBPhysPage_rdo; +wire [ABW-1:0] PFN; + +// Toolset didn't like the simpler distributed code where the RAM was inferred. +// Resulted in combinatorial loop error message. Even though there weren't any +// combinatorial loops. + +TLBPhysPageRam #(DBW) upgrm1 +( + .clk(clk), + .we(state==TWO && (op==`TLB_WR || op==`TLB_WI)), + .wa(i), + .i(HTLBPhysPage), + .ra0(i), + .ra1({q[3:0],vadrs[3:0]}), + .o0(TLBPhysPage_rdo), + .o1(PFN) +); + +wire tlbRo0,tlbRo1; +TLBRam #(1) uR +( + .clk(clk), + .we(state==TWO && (op==`TLB_WR || op==`TLB_WI)), + .wa(i), + .i(HTLBR), + .ra0(i), + .ra1({q[3:0],vadrs[3:0]}), + .o0(tlbRo0), + .o1(tlbRo1) +); + +wire tlbWo0,tlbWo1; +TLBRam #(1) uW +( + .clk(clk), + .we(state==TWO && (op==`TLB_WR || op==`TLB_WI)), + .wa(i), + .i(HTLBW), + .ra0(i), + .ra1({q[3:0],vadrs[3:0]}), + .o0(tlbWo0), + .o1(tlbWo1) +); + +wire tlbXo0,tlbXo1; +TLBRam #(1) uX +( + .clk(clk), + .we(state==TWO && (op==`TLB_WR || op==`TLB_WI)), + .wa(i), + .i(HTLBX), + .ra0(i), + .ra1({q[3:0],vadrs[3:0]}), + .o0(tlbXo0), + .o1(tlbXo1) +); + +always @(posedge clk) +if (rst) begin + age_ctr <= 24'd0; +end +else begin + if (age_ctr==24'd0) + age_ctr <= age_lmt; + else + age_ctr <= age_ctr - 4'd1; +end + +// Handle Random register +always @(posedge clk) +if (rst) begin + Random <= 4'hF; +end +else begin + if (Random==Wired) + Random <= 4'hF; + else + Random <= Random - 4'd1; + // Why would we want to update since random changes on the next clock + // anyways ? + if (state==ONE) begin + if (op==`TLB_WRREG && regno==`TLBRandom) + Random <= dati[3:0]; + end +end + +always @(posedge clk) +if (rst) begin + state <= IDLE; +end +else begin +case(state) +IDLE: + if (ld) + state <= ONE; +ONE: + if (op==`TLB_RDAGE || op==`TLB_WRAGE) begin + if (getset_age) + state <= TWO; + end + else + state <= TWO; +TWO: + state <= IDLE; +default: + state <= IDLE; +endcase +end + +// Set index to page table +always @(posedge clk) +if (rst) begin + i <= 8'd0; +end +else begin + if (state==ONE) begin + case(op) + `TLB_RD,`TLB_WI: + i <= {Index[7:4],(HTLBVirtPage >> {HTLBPageSize,1'b0}) & 4'hF}; + `TLB_WR: + i <= {Random,(HTLBVirtPage >> {HTLBPageSize,1'b0}) & 4'hF}; + endcase + end +end + +always @(posedge clk) +if (rst) begin + TLBenabled <= 1'b0; + Wired <= 4'd0; + PageTblAddr <= {DBW{1'b0}}; + PageTblCtrl <= {DBW{1'b0}}; + age_lmt <= 24'd20000; +end +else begin + if (miss_addr == {DBW{1'b0}} && TLBMiss) + miss_addr <= vadr_i; + + if (state==ONE) begin + case(op) + `TLB_WRREG: + begin + case(regno) + `TLBWired: Wired <= dati[2:0]; + `TLBIndex: Index <= dati[5:0]; + //`TLBPageSize: PageSize <= dati[2:0]; + `TLBVirtPage: HTLBVirtPage <= dati; + `TLBPhysPage: HTLBPhysPage <= dati; + `TLBASID: begin + HTLBValid <= |dati[2:0]; + HTLBX <= dati[0]; + HTLBW <= dati[1]; + HTLBR <= dati[2]; + HTLBC <= dati[5:3]; + HTLBA <= dati[6]; + HTLBS <= dati[7]; + HTLBU <= dati[8]; + HTLBD <= dati[9]; + HTLBG <= dati[10]; + HTLBPageSize <= dati[13:11]; + HTLBASID <= dati[23:16]; + HTLBPL <= dati[31:24]; + end + `TLBMissAdr: miss_addr <= dati; + `TLBPageTblAddr: PageTblAddr <= dati; + `TLBPageTblCtrl: PageTblCtrl <= dati; + `TLBAFC: age_lmt <= dati[23:0]; + default: ; + endcase + end + `TLB_EN: + TLBenabled <= 1'b1; + `TLB_DIS: + TLBenabled <= 1'b0; + `TLB_INVALL: + TLBValid <= 256'd0; + default: ; + endcase + end + else if (state==TWO) begin + case(op) + `TLB_P: + begin + Index[DBW-1] <= ~|Match; + end + `TLB_RD: + begin + HTLBVirtPage <= TLBVirtPage[i]; + HTLBPhysPage <= TLBPhysPage_rdo; + HTLBASID <= TLBASID[i]; + HTLBPL <= TLBPL[i]; + HTLBPageSize <= TLBPageSize[i]; + HTLBG <= TLBG[i]; + HTLBD <= TLBD[i]; + HTLBC <= TLBC[i]; + HTLBR <= tlbRo0; + HTLBW <= tlbWo0; + HTLBX <= tlbXo0; + HTLBU <= TLBU[i]; + HTLBS <= TLBS[i]; + HTLBA <= TLBA[i]; + HTLBValid <= TLBValid[i]; + end + `TLB_WR,`TLB_WI: + begin + TLBVirtPage[i] <= HTLBVirtPage; + TLBASID[i] <= HTLBASID; + TLBPL[i] <= HTLBPL; + TLBPageSize[i] <= HTLBPageSize; + TLBG[i] <= HTLBG; + TLBD[i] <= HTLBD; + TLBC[i] <= HTLBC; + TLBA[i] <= HTLBA; + TLBU[i] <= HTLBU; + TLBS[i] <= HTLBS; + TLBValid[i] <= HTLBValid; + end + default: ; + endcase + end + + // Set the dirty bit on a store + if (we_i) + if (!m1UnmappedArea & !q[4]) begin + TLBD[{q[3:0],vadrs[3:0]}] <= 1'b1; + end +end + +always @(posedge clk) + case(regno) + `TLBWired: dato <= Wired; + `TLBIndex: dato <= Index; + `TLBRandom: dato <= Random; + `TLBPhysPage: dato <= HTLBPhysPage; + `TLBVirtPage: dato <= HTLBVirtPage; + `TLBPageSize: dato <= PageSize; + `TLBASID: begin + dato <= {DBW{1'b0}}; + dato[0] <= HTLBX; + dato[1] <= HTLBW; + dato[2] <= HTLBR; + dato[5:3] <= HTLBC; + dato[6] <= HTLBA; + dato[7] <= HTLBS; + dato[8] <= HTLBU; + dato[9] <= HTLBD; + dato[10] <= HTLBG; + dato[13:11] <= HTLBPageSize; + dato[23:16] <= HTLBASID; + dato[31:24] <= HTLBPL; + end + `TLBMissAdr: dato <= miss_addr; + `TLBPageTblAddr: dato <= PageTblAddr; + `TLBPageTblCtrl: dato <= PageTblCtrl; + `TLBPageCount: dato <= {16'd0,ar_cdato}; + default: dato <= {DBW{1'b0}}; + endcase + +TLBAgeRam uar1(clk,ar_wr,ar_adr,ar_dati,ar_dato); + +always @(posedge clk) +if (rst) begin + age_adr <= 4'd0; + ar_wr <= 1'b0; + ar_adr <= 4'd0; + ar_state <= IDLE; + cyc_en <= 1'b1; + age_en <= 1'b1; + doLoad <= 1'b0; +end +else begin +ar_wr <= 1'b0; +getset_age <= 1'b0; +if (ld) + doLoad <= 1'b1; +case(ar_state) +IDLE: + begin + if (~cyc_i) + cyc_en <= 1'b1; + if (~age_tick) + age_en <= 1'b1; + if ((ld|doLoad) && (op==`TLB_RDAGE || op==`TLB_WRAGE)) begin + doLoad <= 1'b0; + ar_wr <= op==`TLB_WRAGE; + ar_adr <= i; + ar_dati <= dati[31:0]; + ar_state <= READ; + end + else if (cyc_i & |Match & cyc_en) begin + cyc_en <= 1'b0; + ar_adr <= {q[3:0],vadrs[3:0]}; + ar_state <= INC1; + end + else if (age_tick & age_en) begin + age_en <= 1'b0; + ar_adr <= age_adr; + age_adr <= age_adr + 4'd1; + ar_state <= AGE1; + end + end +READ: + begin + getset_age <= 1'b1; + ar_cdato <= ar_dato; + ar_state <= IDLE; + end +INC1: + begin + count <= ar_dato; + ar_state <= INC2; + end +INC2: + begin + count <= {count[31:8] + 4'd1,count[7:0]}; + ar_state <= INC3; + end +INC3: + begin + ar_wr <= 1'b1; + ar_dati <= {count[32] ? 24'hFFFFFF :count[31:8],count[7:0]}; + ar_state <= IDLE; + end +AGE1: + begin + count <= ar_dato; + ar_state <= AGE2; + end +AGE2: + begin + ar_wr <= 1'b1; + ar_dati <= count >> 1; + ar_state <= IDLE; + end +endcase +end + +always @* +for (n = 0; n < 16; n = n + 1) + Match[n[3:0]] = (vadrs[ABW-1:4]==TLBVirtPage[{n[3:0],vadrs[3:0]}]) && + ((TLBASID[{n[3:0],vadrs[3:0]}]==ASID) || TLBG[{n[3:0],vadrs[3:0]}]) && + TLBValid[{q[3:0],vadrs[3:0]}]; + +always @* +begin + q = 5'd31; + for (n = 15; n >= 0; n = n - 1) + if (Match[n]) q = n; +end + +assign uncached = TLBC[{q[3:0],vadrs[3:0]}]==3'd1;// || unmappedDataArea; + +assign TLBMiss = TLBenabled & (!unmappedArea & (q[4] | ~TLBValid[{q[3:0],vadrs[3:0]}]) || + (ol!=2'b00 && hitIOPage)); + +always @(posedge clk) + cyc_o <= cyc_i & (~TLBMiss | ~TLBenabled); + +always @(posedge clk) + we_o <= we_i & ((~TLBMiss & tlbWo1) | ~TLBenabled); + +always @(posedge clk) + wrv_o <= we_i & ~TLBMiss & ~tlbWo1 & TLBenabled; + +always @(posedge clk) + rdv_o <= ~we_i & ~TLBMiss & ~tlbRo1 & TLBenabled; + +always @(posedge clk) + exv_o <= icl_i & ~TLBMiss & ~tlbXo1 & TLBenabled; + +always @(posedge clk) +if (rst) + padr_o <= 32'hFFFC0100; +else begin +if (TLBenabled && ol != 2'b00) begin + case(PageSize) + 3'd0: padr_o[ABW-1:13] <= unmappedArea ? vadr_i[ABW-1:13] : TLBMiss ? `TLBMissPage: PFN; + 3'd1: padr_o[ABW-1:13] <= {unmappedArea ? vadr_i[ABW-1:15] : TLBMiss ? `TLBMissPage: PFN,vadr_i[14:13]}; + 3'd2: padr_o[ABW-1:13] <= {unmappedArea ? vadr_i[ABW-1:17] : TLBMiss ? `TLBMissPage: PFN,vadr_i[16:13]}; + 3'd3: padr_o[ABW-1:13] <= {unmappedArea ? vadr_i[ABW-1:19] : TLBMiss ? `TLBMissPage: PFN,vadr_i[18:13]}; + 3'd4: padr_o[ABW-1:13] <= {unmappedArea ? vadr_i[ABW-1:21] : TLBMiss ? `TLBMissPage: PFN,vadr_i[20:13]}; + 3'd5: padr_o[ABW-1:13] <= {unmappedArea ? vadr_i[ABW-1:23] : TLBMiss ? `TLBMissPage: PFN,vadr_i[22:13]}; + default: padr_o[ABW-1:13] <= vadr_i[ABW-1:13]; + endcase + padr_o[12:0] <= vadr_i[12:0]; +end +else + padr_o <= vadr_i; +end + +endmodule + +module TLBRam(clk,we,wa,i,ra0,ra1,o0,o1); +parameter DBW=1; +input clk; +input we; +input [7:0] wa; +input [DBW-1:0] i; +input [7:0] ra0; +input [7:0] ra1; +output [DBW-1:0] o0; +output [DBW-1:0] o1; + +reg [DBW-1:0] mem [0:255]; + +always @(posedge clk) + if (we) + mem[wa] <= i; + +assign o0 = mem[ra0]; +assign o1 = mem[ra1]; + +endmodule + +module TLBPhysPageRam(clk,we,wa,i,ra0,ra1,o0,o1); +parameter DBW=64; +input clk; +input we; +input [7:0] wa; +input [DBW-1:0] i; +input [7:0] ra0; +input [7:0] ra1; +output [DBW-1:0] o0; +output [DBW-1:0] o1; + +reg [DBW-1:0] mem [0:255]; + +always @(posedge clk) + if (we) + mem[wa] <= i; + +assign o0 = mem[ra0]; +assign o1 = mem[ra1]; + +endmodule + +module TLBAgeRam(clk,we,a,i,o); +parameter DBW=32; +input clk; +input we; +input [7:0] a; +input [DBW-1:0] i; +output [DBW-1:0] o; + +reg [DBW-1:0] mem [0:255]; + +always @(posedge clk) + if (we) + mem[a] <= i; + +assign o = mem[a]; + +endmodule + Index: thor/trunk/FT64v7/rtl/twoway/FT64_fetchbuf.v =================================================================== --- thor/trunk/FT64v7/rtl/twoway/FT64_fetchbuf.v (nonexistent) +++ thor/trunk/FT64v7/rtl/twoway/FT64_fetchbuf.v (revision 60) @@ -0,0 +1,1200 @@ +// ============================================================================ +// __ +// \\__/ o\ (C) 2017-2018 Robert Finch, Waterloo +// \ __ / All rights reserved. +// \/_// robfinch@finitron.ca +// || +// +// FT64_fetchbuf.v +// +// This source file is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This source file is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// +// ============================================================================ +// +`include "FT64_config.vh" +`include "FT64_defines.vh" + +// FETCH +// +// fetch exactly two instructions from memory into the fetch buffer +// unless either one of the buffers is still full, in which case we +// do nothing (kinda like alpha approach) +// Like to turn this into an independent module at some point. +// +module FT64_fetchbuf(rst, clk4x, clk, fcu_clk, + cs_i, cyc_i, stb_i, ack_o, we_i, adr_i, dat_i, + cmpgrp, + freezePC, thread_en, + regLR, + insn0, insn1, phit, + threadx, + branchmiss, misspc, branchmiss_thrd, predict_taken0, predict_taken1, + predict_takenA, predict_takenB, predict_takenC, predict_takenD, + queued1, queued2, queuedNop, + pc0, pc1, fetchbuf, fetchbufA_v, fetchbufB_v, fetchbufC_v, fetchbufD_v, + fetchbufA_instr, fetchbufA_pc, + fetchbufB_instr, fetchbufB_pc, + fetchbufC_instr, fetchbufC_pc, + fetchbufD_instr, fetchbufD_pc, + fetchbuf0_instr, fetchbuf1_instr, fetchbuf0_insln, fetchbuf1_insln, + fetchbuf0_thrd, fetchbuf1_thrd, + fetchbuf0_pc, fetchbuf1_pc, + fetchbuf0_v, fetchbuf1_v, + codebuf0, codebuf1, + btgtA, btgtB, btgtC, btgtD, + nop_fetchbuf, + take_branch0, take_branch1, + stompedRets, + panic +); +parameter AMSB = `AMSB; +parameter RSTPC = 64'hFFFC0100; +parameter TRUE = 1'b1; +parameter FALSE = 1'b0; +input rst; +input clk4x; +input clk; +input fcu_clk; +input cs_i; +input cyc_i; +input stb_i; +output ack_o; +input we_i; +input [15:0] adr_i; +input [47:0] dat_i; +input [2:0] cmpgrp; +input freezePC; +input thread_en; +input [4:0] regLR; +input [47:0] insn0; +input [47:0] insn1; +input phit; +output threadx; +input branchmiss; +input [AMSB:0] misspc; +input branchmiss_thrd; +output predict_taken0; +output predict_taken1; +input predict_takenA; +input predict_takenB; +input predict_takenC; +input predict_takenD; +input queued1; +input queued2; +input queuedNop; +output reg [AMSB:0] pc0; +output reg [AMSB:0] pc1; +output reg fetchbuf; +output reg fetchbufA_v; +output reg fetchbufB_v; +output reg fetchbufC_v; +output reg fetchbufD_v; +output fetchbuf0_thrd; +output fetchbuf1_thrd; +output reg [47:0] fetchbufA_instr; +output reg [47:0] fetchbufB_instr; +output reg [47:0] fetchbufC_instr; +output reg [47:0] fetchbufD_instr; +output reg [AMSB:0] fetchbufA_pc; +output reg [AMSB:0] fetchbufB_pc; +output reg [AMSB:0] fetchbufC_pc; +output reg [AMSB:0] fetchbufD_pc; +output [47:0] fetchbuf0_instr; +output [47:0] fetchbuf1_instr; +output [AMSB:0] fetchbuf0_pc; +output [AMSB:0] fetchbuf1_pc; +output [2:0] fetchbuf0_insln; +output [2:0] fetchbuf1_insln; +output fetchbuf0_v; +output fetchbuf1_v; +input [47:0] codebuf0; +input [47:0] codebuf1; +input [AMSB:0] btgtA; +input [AMSB:0] btgtB; +input [AMSB:0] btgtC; +input [AMSB:0] btgtD; +input [3:0] nop_fetchbuf; +output take_branch0; +output take_branch1; +input [3:0] stompedRets; +output reg [3:0] panic; +integer n; + +//`include "FT64_decode.vh" + +function IsBranch; +input [47:0] isn; +casex(isn[`INSTRUCTION_OP]) +`Bcc: IsBranch = TRUE; +`BBc: IsBranch = TRUE; +`BEQI: IsBranch = TRUE; +`BCHK: IsBranch = TRUE; +default: IsBranch = FALSE; +endcase +endfunction + +function IsJAL; +input [47:0] isn; +IsJAL = isn[`INSTRUCTION_OP]==`JAL; +endfunction + +function IsJmp; +input [47:0] isn; +IsJmp = isn[`INSTRUCTION_OP]==`JMP; +endfunction + +function IsCall; +input [47:0] isn; +IsCall = isn[`INSTRUCTION_OP]==`CALL; +endfunction + +function IsRet; +input [47:0] isn; +IsRet = isn[`INSTRUCTION_OP]==`RET; +endfunction + +function IsBrk; +input [47:0] isn; +IsBrk = isn[`INSTRUCTION_OP]==`BRK; +endfunction + +function IsRTI; +input [47:0] isn; +IsRTI = isn[`INSTRUCTION_OP]==`R2 && isn[`INSTRUCTION_S2]==`RTI; +endfunction + + +function [2:0] fnInsLength; +input [47:0] ins; +`ifdef SUPPORT_DCI +if (ins[`INSTRUCTION_OP]==`CMPRSSD) + fnInsLength = 3'd2; +else +`endif + case(ins[7:6]) + 2'd0: fnInsLength = 3'd4; + 2'd1: fnInsLength = 3'd6; + default: fnInsLength = 3'd2; + endcase +endfunction + +wire [2:0] fetchbufA_inslen; +wire [2:0] fetchbufB_inslen; +wire [2:0] fetchbufC_inslen; +wire [2:0] fetchbufD_inslen; +FT64_InsLength uilA (fetchbufA_instr, fetchbufA_inslen); +FT64_InsLength uilB (fetchbufB_instr, fetchbufB_inslen); +FT64_InsLength uilC (fetchbufC_instr, fetchbufC_inslen); +FT64_InsLength uilD (fetchbufD_instr, fetchbufD_inslen); + +wire [47:0] xinsn0; +wire [47:0] xinsn1; + +FT64_iexpander ux1 +( + .cinstr(insn0[15:0]), + .expand(xinsn0) +); +FT64_iexpander ux2 +( + .cinstr(insn1[15:0]), + .expand(xinsn1) +); + + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +// Table of decompressed instructions. +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +assign ack_o = cs_i & cyc_i & stb_i; +`ifdef SUPPORT_DCI +reg [47:0] DecompressTable [0:2047]; +always @(posedge clk) + if (cs_i & cyc_i & stb_i & we_i) + DecompressTable[adr_i[12:3]] <= dat_i[47:0]; +wire [47:0] expand0 = DecompressTable[{cmpgrp,insn0[15:8]}]; +wire [47:0] expand1 = DecompressTable[{cmpgrp,insn1[15:8]}]; +`endif + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +reg thread; +reg stompedRet; +reg ret0Counted, ret1Counted; +wire [AMSB:0] retpc0, retpc1; + +reg did_branchback0; +reg did_branchback1; + +assign predict_taken0 = (fetchbuf==1'b0) ? predict_takenA : predict_takenC; +assign predict_taken1 = (fetchbuf==1'b0) ? predict_takenB : predict_takenD; + +reg [AMSB:0] branch_pcA; +reg [AMSB:0] branch_pcB; +reg [AMSB:0] branch_pcC; +reg [AMSB:0] branch_pcD; + +always @* +case(fetchbufA_instr[`INSTRUCTION_OP]) +`RET: branch_pcA = retpc0; +`JMP,`CALL: branch_pcA = fetchbufA_instr[6] ? {fetchbufA_instr[39:8],1'b0} : {fetchbufA_pc[31:25],fetchbufA_instr[31:8],1'b0}; +`R2: branch_pcA = btgtA; // RTI +`BRK,`JAL: branch_pcA = btgtA; +default: + begin + branch_pcA[31:8] = fetchbufA_pc[31:8] + + (fetchbufA_instr[7:6]==2'b01 ? {{4{fetchbufA_instr[47]}},fetchbufA_instr[47:28]} : {{20{fetchbufA_instr[31]}},fetchbufA_instr[31:28]}); + branch_pcA[7:0] = {fetchbufA_instr[27:23],fetchbufA_instr[17:16],1'b0}; + branch_pcA[63:32] = fetchbufA_pc[63:32]; + end +endcase + +always @* +case(fetchbufB_instr[`INSTRUCTION_OP]) +`RET: branch_pcB = retpc1; +`JMP,`CALL: branch_pcB = fetchbufB_instr[6] ? {fetchbufB_instr[39:8],1'b0} : {fetchbufB_pc[31:25],fetchbufB_instr[31:8],1'b0}; +`R2: branch_pcB = btgtB; // RTI +`BRK,`JAL: branch_pcB = btgtB; +default: + begin + branch_pcB[31:8] = fetchbufB_pc[31:8] + + (fetchbufB_instr[7:6]==2'b01 ? {{4{fetchbufB_instr[47]}},fetchbufB_instr[47:28]} : {{20{fetchbufB_instr[31]}},fetchbufB_instr[31:28]}); + branch_pcB[7:0] = {fetchbufB_instr[27:23],fetchbufB_instr[17:16],1'b0}; + branch_pcB[63:32] = fetchbufB_pc[63:32]; + end +endcase + +always @* +case(fetchbufC_instr[`INSTRUCTION_OP]) +`RET: branch_pcC = retpc0; +`JMP,`CALL: branch_pcC = fetchbufC_instr[6] ? {fetchbufC_instr[39:8],1'b0} : {fetchbufC_pc[31:25],fetchbufC_instr[31:8],1'b0}; +`R2: branch_pcC = btgtC; // RTI +`BRK,`JAL: branch_pcC = btgtC; +default: + begin + branch_pcC[31:8] = fetchbufC_pc[31:8] + + (fetchbufC_instr[7:6]==2'b01 ? {{4{fetchbufC_instr[47]}},fetchbufC_instr[47:28]} : {{20{fetchbufC_instr[31]}},fetchbufC_instr[31:28]}); + branch_pcC[7:0] = {fetchbufC_instr[27:23],fetchbufC_instr[17:16],1'b0}; + branch_pcC[63:32] = fetchbufC_pc[63:32]; + end +endcase + +always @* +case(fetchbufD_instr[`INSTRUCTION_OP]) +`RET: branch_pcD = retpc1; +`JMP,`CALL: branch_pcD = fetchbufD_instr[6] ? {fetchbufD_instr[39:8],1'b0} : {fetchbufD_pc[31:25],fetchbufD_instr[31:8],1'b0}; +`R2: branch_pcD = btgtD; // RTI +`BRK,`JAL: branch_pcD = btgtD; +default: + begin + branch_pcD[31:8] = fetchbufD_pc[31:8] + + (fetchbufD_instr[7:6]==2'b01 ? {{4{fetchbufD_instr[47]}},fetchbufD_instr[47:28]} : {{20{fetchbufD_instr[31]}},fetchbufD_instr[31:28]}); + branch_pcD[7:0] = {fetchbufD_instr[27:23],fetchbufD_instr[17:16],1'b0}; + branch_pcD[63:32] = fetchbufD_pc[63:32]; + end +endcase + +wire take_branchA = ({fetchbufA_v, IsBranch(fetchbufA_instr), predict_takenA} == {`VAL, `TRUE, `TRUE}) || + ((IsRet(fetchbufA_instr)||IsJmp(fetchbufA_instr)||IsCall(fetchbufA_instr)|| + IsRTI(fetchbufA_instr)|| IsBrk(fetchbufA_instr) || IsJAL(fetchbufA_instr)) && + fetchbufA_v); +wire take_branchB = ({fetchbufB_v, IsBranch(fetchbufB_instr), predict_takenB} == {`VAL, `TRUE, `TRUE}) || + ((IsRet(fetchbufB_instr)|IsJmp(fetchbufB_instr)|IsCall(fetchbufB_instr) || + IsRTI(fetchbufB_instr)|| IsBrk(fetchbufB_instr) || IsJAL(fetchbufB_instr)) && + fetchbufB_v); +wire take_branchC = ({fetchbufC_v, IsBranch(fetchbufC_instr), predict_takenC} == {`VAL, `TRUE, `TRUE}) || + ((IsRet(fetchbufC_instr)|IsJmp(fetchbufC_instr)|IsCall(fetchbufC_instr) || + IsRTI(fetchbufC_instr)|| IsBrk(fetchbufC_instr) || IsJAL(fetchbufC_instr)) && + fetchbufC_v); +wire take_branchD = ({fetchbufD_v, IsBranch(fetchbufD_instr), predict_takenD} == {`VAL, `TRUE, `TRUE}) || + ((IsRet(fetchbufD_instr)|IsJmp(fetchbufD_instr)|IsCall(fetchbufD_instr) || + IsRTI(fetchbufD_instr)|| IsBrk(fetchbufD_instr) || IsJAL(fetchbufD_instr)) && + fetchbufD_v); + +assign take_branch0 = fetchbuf==1'b0 ? take_branchA : take_branchC; +assign take_branch1 = fetchbuf==1'b0 ? take_branchB : take_branchD; +wire take_branch = take_branch0 || take_branch1; +/* +always @* +begin + pc0 <= thread_en ? (fetchbuf ? pc0b : pc0a) : pc0a; + pc1 <= thread_en ? (fetchbuf ? pc1b : pc1a) : pc1a; +end +*/ +assign threadx = fetchbuf; + +`ifdef FCU_ENH +FT64_RSB #(AMSB) ursb1 +( + .rst(rst), + .clk(fcu_clk), + .regLR(regLR), + .queued1(queued1), + .queued2(queued2), + .fetchbuf0_v(fetchbuf0_v), + .fetchbuf0_pc(fetchbuf0_pc), + .fetchbuf0_instr(fetchbuf0_instr), + .fetchbuf1_v(fetchbuf1_v), + .fetchbuf1_pc(fetchbuf1_pc), + .fetchbuf1_instr(fetchbuf1_instr), + .stompedRets(stompedRets), + .stompedRet(stompedRet), + .pc(retpc0) +); + +FT64_RSB #(AMSB) ursb2 +( + .rst(rst), + .clk(fcu_clk), + .regLR(regLR), + .queued1(queued1), + .queued2(1'b0), + .fetchbuf0_v(fetchbuf1_v), + .fetchbuf0_pc(fetchbuf1_pc), + .fetchbuf0_instr(fetchbuf1_instr), + .fetchbuf1_v(1'b0), + .fetchbuf1_pc(32'h00000000), + .fetchbuf1_instr(`NOP_INSN), + .stompedRets(stompedRets[3:1]), + .stompedRet(stompedRet), + .pc(retpc1) +); +`else +assign retpc0 = RSTPC; +assign retpc1 = RSTPC; +`endif + +wire peclk, neclk; +edge_det ued1 (.rst(rst), .clk(clk4x), .ce(1'b1), .i(clk), .pe(peclk), .ne(neclk), .ee()); + +always @(posedge clk) +if (rst) begin + pc0 <= RSTPC; + pc1 <= RSTPC; + fetchbufA_v <= 0; + fetchbufB_v <= 0; + fetchbufC_v <= 0; + fetchbufD_v <= 0; + fetchbuf <= 0; + panic <= `PANIC_NONE; +end +else begin + + did_branchback0 <= take_branch0; + did_branchback1 <= take_branch1; + + stompedRet = FALSE; + + begin + + // On a branch miss with threading enabled all fectch buffers are + // invalidated even though the data in the fetch buffer would be valid + // for the thread that isn't in a branchmiss state. This is done to + // keep things simple. For the thread that doesn't miss the current + // data for the fetch buffer needs to be retrieved again, so the pc + // for that thread is assigned the current fetchbuf pc. + // For the thread that misses the pc is simply assigned the misspc. + if (branchmiss) begin + $display("***********"); + $display("Branch miss"); + $display("***********"); + if (branchmiss_thrd) begin + pc1 <= misspc; + fetchbufB_v <= `INV; + fetchbufD_v <= `INV; + end + else begin + pc0 <= misspc; + if (thread_en) begin + fetchbufA_v <= `INV; + fetchbufC_v <= `INV; + end + else begin + fetchbufA_v <= `INV; + fetchbufB_v <= `INV; + fetchbufC_v <= `INV; + fetchbufD_v <= `INV; + fetchbuf <= 1'b0; + end + end + $display("********************"); + $display("********************"); + $display("********************"); + $display("Branch miss"); + $display("misspc=%h", misspc); + $display("********************"); + $display("********************"); + $display("********************"); + end + else if (take_branch) begin + + // update the fetchbuf valid bits as well as fetchbuf itself + // ... this must be based on which things are backwards branches, how many things + // will get enqueued (0, 1, or 2), and how old the instructions are + if (fetchbuf == 1'b0) case ({fetchbufA_v, fetchbufB_v, fetchbufC_v, fetchbufD_v}) + + 4'b0000: ; // do nothing + 4'b0001: if (thread_en) FetchC(); + 4'b0010: if (thread_en) FetchD(); + 4'b0011: ; + 4'b0100 : + begin + if (thread_en) begin + FetchC(); + pc1 <= branch_pcB; + end + else + pc0 <= branch_pcB; + fetchbufB_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbuf <= fetchbuf + (queued1|queuedNop); + end + 4'b0101: + begin + if (thread_en) begin + pc1 <= branch_pcB; + FetchC(); + end + else + pc0 <= branch_pcB; + fetchbufD_v <= `INV; + fetchbufB_v <= !(queued1|queuedNop); + end + 4'b0110: + begin + if (thread_en) + pc1 <= branch_pcB; + else begin + pc0 <= branch_pcB; + fetchbufC_v <= `INV; + end + fetchbufB_v <= !(queued1|queuedNop); + end + 4'b0111: + begin + if (thread_en) begin + pc1 <= branch_pcB; + fetchbufD_v <= `INV; + end + else begin + pc0 <= branch_pcB; + fetchbufC_v <= `INV; + fetchbufD_v <= `INV; + end + fetchbufB_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbuf <= fetchbuf + (queued1|queuedNop); + end + 4'b1000 : + begin + if (thread_en) FetchD(); + pc0 <= branch_pcA; + fetchbufA_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbuf <= fetchbuf + (queued1|queuedNop); + end + 4'b1001: + begin + pc0 <= branch_pcA; + if (!thread_en) + fetchbufD_v <= `INV; + fetchbufA_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbuf <= fetchbuf + (queued1|queuedNop); + end + 4'b1010: + begin + pc0 <= branch_pcA; + fetchbufC_v <= `INV; + if (thread_en) FetchD(); + fetchbufA_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbuf <= fetchbuf + (queued1|queuedNop); + end + 4'b1011: + begin + pc0 <= branch_pcA; + fetchbufC_v <= `INV; + if (!thread_en) + fetchbufD_v <= `INV; + fetchbufA_v <=!(queued1|queuedNop); // if it can be queued, it will + fetchbuf <= fetchbuf + (queued1|queuedNop); + end + 4'b1100: + if (thread_en) begin + if (take_branchA && take_branchB) begin + pc0 <= branch_pcA; + pc1 <= branch_pcB; + fetchbufA_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbufB_v <= !(queued2|queuedNop); // if it can be queued, it will + if ((queued2|queuedNop)) fetchbuf <= 1'b1; + end + else if (take_branchA) begin + FetchD(); + pc0 <= branch_pcA; + fetchbufA_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbufB_v <= !(queued2|queuedNop); // if it can be queued, it will + if ((queued2|queuedNop)) fetchbuf <= 1'b1; + end + else if (take_branchB) begin + FetchC(); + pc1 <= branch_pcB; + fetchbufA_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbufB_v <= !(queued2|queuedNop); // if it can be queued, it will + if ((queued2|queuedNop)) fetchbuf <= 1'b1; + end + end + else begin + if (take_branchA) begin + pc0 <= branch_pcA; + fetchbufA_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbufB_v <= `INV; + if ((queued1|queuedNop)) fetchbuf <= 1'b1; + end + else if (take_branchB) begin + pc0 <= branch_pcB; + fetchbufA_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbufB_v <= !(queued2|queuedNop); // if it can be queued, it will + if ((queued2|queuedNop)) fetchbuf <= 1'b1; + end + // else hardware error + end + 4'b1101: + if (thread_en) begin + if (take_branchA && take_branchB) begin + pc0 <= branch_pcA; + pc1 <= branch_pcB; + fetchbufD_v <= `INV; + fetchbufA_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbufB_v <= !(queued2|queuedNop); // if it can be queued, it will + if ((queued2|queuedNop)) fetchbuf <= 1'b1; + end + else if (take_branchA) begin + pc0 <= branch_pcA; + fetchbufA_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbufB_v <= !(queued2|queuedNop); // if it can be queued, it will + if ((queued2|queuedNop)) fetchbuf <= 1'b1; + end + else if (take_branchB) begin + FetchC(); + pc1 <= branch_pcB; + fetchbufD_v <= `INV; + fetchbufA_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbufB_v <= !(queued2|queuedNop); // if it can be queued, it will + if ((queued2|queuedNop)) fetchbuf <= 1'b1; + end + end + else begin + fetchbufD_v <= `INV; + if (take_branchA) begin + pc0 <= branch_pcA; + fetchbufA_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbufB_v <= `INV; + if ((queued1|queuedNop)) fetchbuf <= 1'b1; + end + else if (take_branchB) begin + pc0 <= branch_pcB; + fetchbufA_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbufB_v <= !(queued2|queuedNop); // if it can be queued, it will + if ((queued2|queuedNop)) fetchbuf <= 1'b1; + end + // else hardware error + end + 4'b1110: + if (thread_en) begin + if (take_branchA && take_branchB) begin + pc0 <= branch_pcA; + pc1 <= branch_pcB; + fetchbufC_v <= `INV; + fetchbufA_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbufB_v <= !(queued2|queuedNop); // if it can be queued, it will + if ((queued2|queuedNop)) fetchbuf <= 1'b1; + end + else if (take_branchA) begin + FetchD(); + pc0 <= branch_pcA; + fetchbufC_v <= `INV; + fetchbufA_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbufB_v <= !(queued2|queuedNop); // if it can be queued, it will + if ((queued2|queuedNop)) fetchbuf <= 1'b1; + end + else if (take_branchB) begin + pc1 <= branch_pcB; + fetchbufA_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbufB_v <= !(queued2|queuedNop); // if it can be queued, it will + if ((queued2|queuedNop)) fetchbuf <= 1'b1; + end + end + else begin + fetchbufC_v <= `INV; + if (take_branchA) begin + pc0 <= branch_pcA; + fetchbufA_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbufB_v <= `INV; + if ((queued1|queuedNop)) fetchbuf <= 1'b1; + end + else if (take_branchB) begin + pc0 <= branch_pcB; + fetchbufA_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbufB_v <= !(queued2|queuedNop); // if it can be queued, it will + if ((queued2|queuedNop)) fetchbuf <= 1'b1; + end + // else hardware error + end + 4'b1111: + begin + if (thread_en) begin + if (take_branchA & take_branchB) begin + pc0 <= branch_pcA; + pc1 <= branch_pcB; + fetchbufC_v <= `INV; + fetchbufD_v <= `INV; + fetchbufA_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbufB_v <= !(queued2|queuedNop); // if it can be queued, it will + fetchbuf <= fetchbuf + (queued2|queuedNop); + end + else if (take_branchA) begin + pc0 <= branch_pcA; + fetchbufC_v <= `INV; + fetchbufA_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbufB_v <= !(queued2|queuedNop); // if it can be queued, it will + fetchbuf <= fetchbuf + (queued2|queuedNop); + end + else if (take_branchB) begin + pc1 <= branch_pcB; + fetchbufD_v <= `INV; + fetchbufA_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbufB_v <= !(queued2|queuedNop); // if it can be queued, it will + fetchbuf <= fetchbuf + (queued2|queuedNop); + end + end + else begin + if (take_branchA) begin + pc0 <= branch_pcA; + fetchbufB_v <= `INV; + fetchbufC_v <= `INV; + fetchbufD_v <= `INV; + fetchbufA_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbuf <= fetchbuf + (queued1|queuedNop); + end + else if (take_branchB) begin + pc0 <= branch_pcB; + fetchbufC_v <= `INV; + fetchbufD_v <= `INV; + fetchbufA_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbufB_v <= !(queued2|queuedNop); // if it can be queued, it will + fetchbuf <= fetchbuf + (queued2|queuedNop); + end + end + end + default: ; + endcase + else case ({fetchbufC_v, fetchbufD_v, fetchbufA_v, fetchbufB_v}) + + 4'b0000: ; // do nothing + 4'b0001: if (thread_en) FetchA(); + 4'b0010: if (thread_en) FetchB(); + 4'b0011: ; + 4'b0100 : + begin + if (thread_en) begin + FetchA(); + pc1 <= branch_pcD; + end + else + pc0 <= branch_pcD; + fetchbufD_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbuf <= fetchbuf + (queued1|queuedNop); + end + 4'b0101: + begin + if (thread_en) begin + pc1 <= branch_pcD; + FetchA(); + end + else + pc0 <= branch_pcD; + fetchbufB_v <= `INV; + fetchbufD_v <= !(queued1|queuedNop); + end + 4'b0110: + begin + if (thread_en) + pc1 <= branch_pcD; + else begin + pc0 <= branch_pcD; + fetchbufA_v <= `INV; + end + fetchbufD_v <= !(queued1|queuedNop); + end + 4'b0111: + begin + if (thread_en) begin + pc1 <= branch_pcD; + fetchbufB_v <= `INV; + end + else begin + pc0 <= branch_pcD; + fetchbufA_v <= `INV; + fetchbufB_v <= `INV; + end + fetchbufD_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbuf <= fetchbuf + (queued1|queuedNop); + end + 4'b1000 : + begin + if (thread_en) FetchB(); + pc0 <= branch_pcC; + fetchbufC_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbuf <= fetchbuf + (queued1|queuedNop); + end + 4'b1001: + begin + pc0 <= branch_pcC; + if (!thread_en) + fetchbufB_v <= `INV; + fetchbufC_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbuf <= fetchbuf + (queued1|queuedNop); + end + 4'b1010: + begin + pc0 <= branch_pcC; + fetchbufA_v <= `INV; + if (thread_en) FetchB(); + fetchbufC_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbuf <= fetchbuf + (queued1|queuedNop); + end + 4'b1011: + begin + pc0 <= branch_pcC; + fetchbufA_v <= `INV; + if (!thread_en) + fetchbufB_v <= `INV; + fetchbufC_v <=!(queued1|queuedNop); // if it can be queued, it will + fetchbuf <= fetchbuf + (queued1|queuedNop); + end + 4'b1100: + if (thread_en) begin + if (take_branchC && take_branchD) begin + pc0 <= branch_pcC; + pc1 <= branch_pcD; + fetchbufC_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbufD_v <= !(queued2|queuedNop); // if it can be queued, it will + if ((queued2|queuedNop)) fetchbuf <= 1'b1; + end + else if (take_branchC) begin + FetchB(); + pc0 <= branch_pcC; + fetchbufC_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbufD_v <= !(queued2|queuedNop); // if it can be queued, it will + if ((queued2|queuedNop)) fetchbuf <= 1'b1; + end + else if (take_branchD) begin + FetchA(); + pc1 <= branch_pcD; + fetchbufC_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbufD_v <= !(queued2|queuedNop); // if it can be queued, it will + if ((queued2|queuedNop)) fetchbuf <= 1'b1; + end + end + else begin + if (take_branchC) begin + pc0 <= branch_pcC; + fetchbufC_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbufD_v <= `INV; + if ((queued1|queuedNop)) fetchbuf <= 1'b1; + end + else if (take_branchD) begin + pc0 <= branch_pcD; + fetchbufC_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbufD_v <= !(queued2|queuedNop); // if it can be queued, it will + if ((queued2|queuedNop)) fetchbuf <= 1'b1; + end + // else hardware error + end + 4'b1101: + if (thread_en) begin + if (take_branchC && take_branchD) begin + pc0 <= branch_pcC; + pc1 <= branch_pcD; + fetchbufB_v <= `INV; + fetchbufC_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbufD_v <= !(queued2|queuedNop); // if it can be queued, it will + if ((queued2|queuedNop)) fetchbuf <= 1'b1; + end + else if (take_branchC) begin + pc0 <= branch_pcC; + fetchbufC_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbufD_v <= !(queued2|queuedNop); // if it can be queued, it will + if ((queued2|queuedNop)) fetchbuf <= 1'b1; + end + else if (take_branchC) begin + FetchA(); + pc1 <= branch_pcD; + fetchbufB_v <= `INV; + fetchbufC_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbufD_v <= !(queued2|queuedNop); // if it can be queued, it will + if ((queued2|queuedNop)) fetchbuf <= 1'b1; + end + end + else begin + fetchbufB_v <= `INV; + if (take_branchC) begin + pc0 <= branch_pcC; + fetchbufC_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbufD_v <= `INV; + if ((queued1|queuedNop)) fetchbuf <= 1'b1; + end + else if (take_branchD) begin + pc0 <= branch_pcD; + fetchbufC_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbufD_v <= !(queued2|queuedNop); // if it can be queued, it will + if ((queued2|queuedNop)) fetchbuf <= 1'b1; + end + // else hardware error + end + 4'b1110: + if (thread_en) begin + if (take_branchC && take_branchD) begin + pc0 <= branch_pcC; + pc1 <= branch_pcD; + fetchbufA_v <= `INV; + fetchbufC_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbufD_v <= !(queued2|queuedNop); // if it can be queued, it will + if ((queued2|queuedNop)) fetchbuf <= 1'b1; + end + else if (take_branchC) begin + FetchB(); + pc0 <= branch_pcC; + fetchbufA_v <= `INV; + fetchbufC_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbufD_v <= !(queued2|queuedNop); // if it can be queued, it will + if ((queued2|queuedNop)) fetchbuf <= 1'b1; + end + else if (take_branchD) begin + pc1 <= branch_pcD; + fetchbufC_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbufD_v <= !(queued2|queuedNop); // if it can be queued, it will + if ((queued2|queuedNop)) fetchbuf <= 1'b1; + end + end + else begin + fetchbufA_v <= `INV; + if (take_branchC) begin + pc0 <= branch_pcC; + fetchbufC_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbufD_v <= `INV; + if ((queued1|queuedNop)) fetchbuf <= 1'b1; + end + else if (take_branchD) begin + pc0 <= branch_pcD; + fetchbufC_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbufD_v <= !(queued2|queuedNop); // if it can be queued, it will + if ((queued2|queuedNop)) fetchbuf <= 1'b1; + end + // else hardware error + end + 4'b1111: + begin + if (thread_en) begin + if (take_branchC & take_branchD) begin + pc0 <= branch_pcC; + pc1 <= branch_pcD; + fetchbufA_v <= `INV; + fetchbufB_v <= `INV; + fetchbufC_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbufD_v <= !(queued2|queuedNop); // if it can be queued, it will + fetchbuf <= fetchbuf + (queued2|queuedNop); + end + else if (take_branchC) begin + pc0 <= branch_pcD; + fetchbufA_v <= `INV; + fetchbufC_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbufD_v <= !(queued2|queuedNop); // if it can be queued, it will + fetchbuf <= fetchbuf + (queued2|queuedNop); + end + else if (take_branchD) begin + pc1 <= branch_pcD; + fetchbufB_v <= `INV; + fetchbufC_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbufD_v <= !(queued2|queuedNop); // if it can be queued, it will + fetchbuf <= fetchbuf + (queued2|queuedNop); + end + end + else begin + if (take_branchC) begin + pc0 <= branch_pcC; + fetchbufD_v <= `INV; + fetchbufA_v <= `INV; + fetchbufB_v <= `INV; + fetchbufC_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbuf <= fetchbuf + (queued1|queuedNop); + end + else if (take_branchD) begin + pc0 <= branch_pcD; + fetchbufA_v <= `INV; + fetchbufB_v <= `INV; + fetchbufC_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbufD_v <= !(queued2|queuedNop); // if it can be queued, it will + fetchbuf <= fetchbuf + (queued2|queuedNop); + end + end + end + default: ; + endcase + end // if branchback + + else begin // there is no branchback in the system + // + // update fetchbufX_v and fetchbuf ... relatively simple, as + // there are no backwards branches in the mix + if (fetchbuf == 1'b0) case ({fetchbufA_v, fetchbufB_v, (queued1|queuedNop), (queued2|queuedNop)}) + 4'b00_00 : ; // do nothing + 4'b00_01: ; + 4'b00_10: ; + 4'b00_11: ; + 4'b01_00: ; // do nothing + 4'b01_01: ; + 4'b01_10, + 4'b01_11: + begin // enqueue fbB and flip fetchbuf + fetchbufB_v <= `INV; + fetchbuf <= ~fetchbuf; + end + 4'b10_00: ; // do nothing + 4'b10_01: ; + 4'b10_10, + 4'b10_11: + begin // enqueue fbA and flip fetchbuf + fetchbufA_v <= `INV; + fetchbuf <= ~fetchbuf; + end + 4'b11_00: ; // do nothing + 4'b11_01: ; + 4'b11_10: + begin // enqueue fbA but leave fetchbuf + fetchbufA_v <= `INV; + end + 4'b11_11: + begin // enqueue both and flip fetchbuf + fetchbufA_v <= `INV; + fetchbufB_v <= `INV; + fetchbuf <= ~fetchbuf; + end + default: panic <= `PANIC_INVALIDIQSTATE; + endcase + else case ({fetchbufC_v, fetchbufD_v, (queued1|queuedNop), (queued2|queuedNop)}) + 4'b00_00 : ; // do nothing + 4'b00_01: ; + 4'b00_10 : ; // do nothing + 4'b00_11 : ; // do nothing + 4'b01_00 : ; // do nothing + 4'b01_01 : ; + 4'b01_10, + 4'b01_11 : + begin // enqueue fbD and flip fetchbuf + fetchbufD_v <= `INV; + fetchbuf <= ~fetchbuf; + end + 4'b10_00 : ; // do nothing + 4'b10_01: ; + 4'b10_10, + 4'b10_11: + begin // enqueue fbC and flip fetchbuf + fetchbufC_v <= `INV; + fetchbuf <= ~fetchbuf; + end + 4'b11_00 : ; // do nothing + 4'b11_01: ; + 4'b11_10: + begin // enqueue fbC but leave fetchbuf + fetchbufC_v <= `INV; + end + 4'b11_11: + begin // enqueue both and flip fetchbuf + fetchbufC_v <= `INV; + fetchbufD_v <= `INV; + fetchbuf <= ~fetchbuf; + end + default: panic <= `PANIC_INVALIDIQSTATE; + endcase + // + // get data iff the fetch buffers are empty + // + if (fetchbufA_v == `INV && fetchbufB_v == `INV) begin + FetchAB(); + // fetchbuf steering logic correction + if (fetchbufC_v==`INV && fetchbufD_v==`INV && phit) + fetchbuf <= 1'b0; + end + else if (fetchbufC_v == `INV && fetchbufD_v == `INV) + FetchCD(); + end + // + // get data iff the fetch buffers are empty + // + if (fetchbufA_v == `INV && fetchbufB_v == `INV && fetchbufC_v==`INV && fetchbufD_v==`INV) begin + FetchAB(); + fetchbuf <= 1'b0; + end + end + + // The fetchbuffer is invalidated at the end of a vector instruction + // queue. + if (nop_fetchbuf[0]) fetchbufA_v <= `INV; + if (nop_fetchbuf[1]) fetchbufB_v <= `INV; + if (nop_fetchbuf[2]) fetchbufC_v <= `INV; + if (nop_fetchbuf[3]) fetchbufD_v <= `INV; +end + +assign fetchbuf0_instr = (fetchbuf == 1'b0) ? fetchbufA_instr : fetchbufC_instr; +assign fetchbuf0_insln = (fetchbuf == 1'b0) ? fetchbufA_inslen: fetchbufC_inslen; +assign fetchbuf0_v = (fetchbuf == 1'b0) ? fetchbufA_v : fetchbufC_v ; +assign fetchbuf0_pc = (fetchbuf == 1'b0) ? fetchbufA_pc : fetchbufC_pc ; +assign fetchbuf1_instr = (fetchbuf == 1'b0) ? fetchbufB_instr : fetchbufD_instr; +assign fetchbuf1_insln = (fetchbuf == 1'b0) ? fetchbufB_inslen: fetchbufD_inslen; +assign fetchbuf1_v = (fetchbuf == 1'b0) ? fetchbufB_v : fetchbufD_v ; +assign fetchbuf1_pc = (fetchbuf == 1'b0) ? fetchbufB_pc : fetchbufD_pc ; +assign fetchbuf0_thrd = 1'b0; +assign fetchbuf1_thrd = thread_en; + +reg [2:0] insln0, insln1; +always @* +begin +`ifdef SUPPORT_DCI + if (insn0[5:0]==`CMPRSSD) + insln0 <= 3'd2; + else +`endif + if (insn0[7:6]==2'b00 && insn0[`INSTRUCTION_OP]==`EXEC) + insln0 <= fnInsLength(codebuf0); + else + insln0 <= fnInsLength(insn0); +end + +always @* +begin +`ifdef SUPPORT_DCI + if (insn1[5:0]==`CMPRSSD) + insln1 <= 3'd2; + else +`endif + if (insn1[7:6]==2'b00 && insn1[`INSTRUCTION_OP]==`EXEC) + insln1 <= fnInsLength(codebuf1); + else + insln1 <= fnInsLength(insn1); +end + +reg [47:0] cinsn0, cinsn1; + +always @* +begin +`ifdef SUPPORT_DCI + if (insn0[5:0]==`CMPRSSD) + cinsn0 <= expand0; + else +`endif + if (insn0[7:6]==2'b00 && insn0[`INSTRUCTION_OP]==`EXEC) + cinsn0 <= codebuf0; + else if (insn0[7]) + cinsn0 <= xinsn0; + else + cinsn0 <= insn0; +end + +always @* +begin +`ifdef SUPPORT_DCI + if (insn1[5:0]==`CMPRSSD) + cinsn1 <= expand1; + else +`endif + if (insn1[7:6]==2'b00 && insn1[`INSTRUCTION_OP]==`EXEC) + cinsn1 <= codebuf1; + else if (insn1[7]) + cinsn1 <= xinsn1; + else + cinsn1 <= insn1; +end + +task FetchA; +begin + fetchbufA_instr <= cinsn0; + fetchbufA_v <= `VAL; + fetchbufA_pc <= pc0; + if (phit && ~freezePC) begin + if (thread_en) + pc0 <= pc0 + insln0; + else if (`WAYS > 1) + pc0 <= pc0 + insln0 + insln1; + else + pc0 <= pc0 + insln0; + end +end +endtask + +task FetchB; +begin + fetchbufB_instr <= cinsn1; + fetchbufB_v <= `WAYS > 1; + if (thread_en) + fetchbufB_pc <= pc1; + else + fetchbufB_pc <= pc0 + insln0; + if (phit & thread_en) + pc1 <= pc1 + insln1; +end +endtask + + +task FetchAB; +begin + FetchA(); + FetchB(); +end +endtask + +task FetchC; +begin + fetchbufC_instr <= cinsn0; + fetchbufC_v <= `VAL; + fetchbufC_pc <= pc0; + if (phit && ~freezePC) begin + if (thread_en) + pc0 <= pc0 + insln0; + else if (`WAYS > 1) + pc0 <= pc0 + insln0 + insln1; + else + pc0 <= pc0 + insln0; + end +end +endtask + +task FetchD; +begin + fetchbufD_instr <= cinsn1; + fetchbufD_v <= `WAYS > 1; + if (thread_en) + fetchbufD_pc <= pc1; + else + fetchbufD_pc <= pc0 + insln0; + if (phit & thread_en) + pc1 <= pc1 + insln1; +end +endtask + +task FetchCD; +begin + FetchC(); + FetchD(); +end +endtask + +endmodule + Index: thor/trunk/FT64v7/rtl/twoway/FT64_fetchbuf_x1.v =================================================================== --- thor/trunk/FT64v7/rtl/twoway/FT64_fetchbuf_x1.v (nonexistent) +++ thor/trunk/FT64v7/rtl/twoway/FT64_fetchbuf_x1.v (revision 60) @@ -0,0 +1,527 @@ +// ============================================================================ +// __ +// \\__/ o\ (C) 2018 Robert Finch, Waterloo +// \ __ / All rights reserved. +// \/_// robfinch@finitron.ca +// || +// +// FT64_fetchbuf_x1.v +// +// This source file is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This source file is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// +// ============================================================================ +// +`include "FT64_config.vh" +`include "FT64_defines.vh" + +// FETCH +// +// fetch exactly one instructions from memory into the fetch buffer +// unless either one of the buffers is still full, in which case we +// do nothing (kinda like alpha approach) +// +module FT64_fetchbuf_x1(rst, clk4x, clk, fcu_clk, + cs_i, cyc_i, stb_i, ack_o, we_i, adr_i, dat_i, + cmpgrp, + freezePC, thread_en, pred_on, + regLR, + insn0, phit, + threadx, + branchmiss, misspc, branchmiss_thrd, predict_taken0, + predict_takenA, predict_takenB, + queued1, queuedNop, + pc0, fetchbuf, fetchbufA_v, fetchbufB_v, + fetchbufA_instr, fetchbufA_pc, fetchbufA_pbyte, + fetchbufB_instr, fetchbufB_pc, fetchbufB_pbyte, + fetchbuf0_instr, fetchbuf0_insln, + fetchbuf0_thrd, + fetchbuf0_pc, + fetchbuf0_v, + fetchbuf0_pbyte, + codebuf0, + btgtA, btgtB, + nop_fetchbuf, + take_branch0, + stompedRets, + panic +); +parameter AMSB = `AMSB; +parameter RSTPC = 64'hFFFFFFFFFFFC0100; +parameter TRUE = 1'b1; +parameter FALSE = 1'b0; +input rst; +input clk4x; +input clk; +input fcu_clk; +input cs_i; +input cyc_i; +input stb_i; +output ack_o; +input we_i; +input [15:0] adr_i; +input [55:0] dat_i; +input [2:0] cmpgrp; +input freezePC; +input thread_en; +input pred_on; +input [4:0] regLR; +input [55:0] insn0; +input phit; +output threadx; +input branchmiss; +input [AMSB:0] misspc; +input branchmiss_thrd; +output predict_taken0; +input predict_takenA; +input predict_takenB; +input queued1; +input queuedNop; +output reg [AMSB:0] pc0; +output reg fetchbuf; +output reg fetchbufA_v; +output reg fetchbufB_v; +output fetchbuf0_thrd; +output reg [47:0] fetchbufA_instr; +output reg [7:0] fetchbufA_pbyte; +output reg [47:0] fetchbufB_instr; +output reg [7:0] fetchbufB_pbyte; +output reg [AMSB:0] fetchbufA_pc; +output reg [AMSB:0] fetchbufB_pc; +output [47:0] fetchbuf0_instr; +output [AMSB:0] fetchbuf0_pc; +output [2:0] fetchbuf0_insln; +output fetchbuf0_v; +output [7:0] fetchbuf0_pbyte; +input [55:0] codebuf0; +input [AMSB:0] btgtA; +input [AMSB:0] btgtB; +input [3:0] nop_fetchbuf; +output take_branch0; +input [3:0] stompedRets; +output reg [3:0] panic; +integer n; + +reg [55:0] cinsn0; + +//`include "FT64_decode.vh" + +function IsBranch; +input [47:0] isn; +casex(isn[`INSTRUCTION_OP]) +`Bcc: IsBranch = TRUE; +`BBc: IsBranch = TRUE; +`BEQI: IsBranch = TRUE; +`BCHK: IsBranch = TRUE; +default: IsBranch = FALSE; +endcase +endfunction + +function IsJAL; +input [47:0] isn; +IsJAL = isn[`INSTRUCTION_OP]==`JAL; +endfunction + +function IsJmp; +input [47:0] isn; +IsJmp = isn[`INSTRUCTION_OP]==`JMP; +endfunction + +function IsCall; +input [47:0] isn; +IsCall = isn[`INSTRUCTION_OP]==`CALL; +endfunction + +function IsRet; +input [47:0] isn; +IsRet = isn[`INSTRUCTION_OP]==`RET; +endfunction + +function IsBrk; +input [47:0] isn; +IsBrk = isn[`INSTRUCTION_OP]==`BRK; +endfunction + +function IsRTI; +input [47:0] isn; +IsRTI = isn[`INSTRUCTION_OP]==`R2 && isn[`INSTRUCTION_S2]==`RTI; +endfunction + +function [2:0] fnInsLength; +input [47:0] ins; +`ifdef SUPPORT_DCI +if (ins[`INSTRUCTION_OP]==`CMPRSSD) + fnInsLength = 3'd2 | pred_on; +else +`endif + case(ins[7:6]) + 2'd0: fnInsLength = 3'd4 | pred_on; + 2'd1: fnInsLength = 3'd6 | pred_on; + default: fnInsLength = 3'd2 | pred_on; + endcase +endfunction + +wire [2:0] fetchbufA_inslen; +wire [2:0] fetchbufB_inslen; +FT64_InsLength uilA (fetchbufA_instr, fetchbufA_inslen, pred_on); +FT64_InsLength uilB (fetchbufB_instr, fetchbufB_inslen, pred_on); + +wire [47:0] xinsn0; + +FT64_iexpander ux1 +( + .cinstr(pred_on ? insn0[23:8] : insn0[15:0]), + .expand(xinsn0) +); + + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +// Table of decompressed instructions. +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +assign ack_o = cs_i & cyc_i & stb_i; +`ifdef SUPPORT_DCI +reg [47:0] DecompressTable [0:2047]; +always @(posedge clk) + if (cs_i & cyc_i & stb_i & we_i) + DecompressTable[adr_i[12:3]] <= dat_i[47:0]; +wire [47:0] expand0 = DecompressTable[{cmpgrp,pred_on ? insn0[23:16]:insn0[15:8]}]; +`endif + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +reg thread; +reg stompedRet; +reg ret0Counted; +wire [AMSB:0] retpc0; + +assign predict_taken0 = (fetchbuf==1'b0) ? ({fetchbufA_v, IsBranch(fetchbufA_instr), predict_takenA} == {`VAL, `TRUE, `TRUE}) + : ({fetchbufB_v, IsBranch(fetchbufB_instr), predict_takenB} == {`VAL, `TRUE, `TRUE}); + +reg [AMSB:0] branch_pcA; +reg [AMSB:0] branch_pcB; + +always @* +begin +case(fetchbufA_instr[`INSTRUCTION_OP]) +`RET: branch_pcA = retpc0; +`JMP,`CALL: + begin + branch_pcA[39:0] = fetchbufA_instr[6] ? {fetchbufA_instr[47:8]} : {fetchbufA_pc[39:24],fetchbufA_instr[31:8]}; + branch_pcA[63:40] = fetchbufA_pc[63:40]; + end +`R2: branch_pcA = btgtA; // RTI +`BRK,`JAL: branch_pcA = btgtA; +default: + begin + branch_pcA[31:8] = fetchbufA_pc[31:8] + + ((fetchbufA_instr[7:6]==2'b01) ? {{5{fetchbufA_instr[47]}},fetchbufA_instr[47:29]} : {{21{fetchbufA_instr[31]}},fetchbufA_instr[31:29]}); + branch_pcA[7:0] = {fetchbufA_instr[28:23],fetchbufA_instr[17:16]}; + branch_pcA[63:32] = fetchbufA_pc[63:32]; + end +endcase +end + +always @* +begin +case(fetchbufB_instr[`INSTRUCTION_OP]) +`RET: branch_pcB = retpc0; +`JMP,`CALL: + begin + branch_pcB[39:0] = fetchbufB_instr[6] ? {fetchbufB_instr[47:8]} : {fetchbufB_pc[39:24],fetchbufB_instr[31:8]}; + branch_pcB[63:40] = fetchbufB_pc[63:40]; + end +`R2: branch_pcB = btgtB; // RTI +`BRK,`JAL: branch_pcB = btgtB; +default: + begin + branch_pcB[31:8] = fetchbufB_pc[31:8] + + ((fetchbufB_instr[7:6]==2'b01) ? {{5{fetchbufB_instr[47]}},fetchbufB_instr[47:29]} : {{21{fetchbufB_instr[31]}},fetchbufB_instr[31:29]}); + branch_pcB[7:0] = {fetchbufB_instr[28:23],fetchbufB_instr[17:16]}; + branch_pcB[63:32] = fetchbufB_pc[63:32]; + end +endcase +end + +wire take_branchA = ({fetchbufA_v, IsBranch(fetchbufA_instr), predict_takenA} == {`VAL, `TRUE, `TRUE}) || (( +`ifdef FCU_ENH + IsRet(fetchbufA_instr) + || IsRTI(fetchbufA_instr)|| IsBrk(fetchbufA_instr) || IsJAL(fetchbufA_instr) || +`endif + IsJmp(fetchbufA_instr)||IsCall(fetchbufA_instr)) && + fetchbufA_v); +wire take_branchB = ({fetchbufB_v, IsBranch(fetchbufB_instr), predict_takenB} == {`VAL, `TRUE, `TRUE}) || (( +`ifdef FCU_ENH + IsRet(fetchbufB_instr) + || IsRTI(fetchbufB_instr)|| IsBrk(fetchbufB_instr) || IsJAL(fetchbufB_instr) || +`endif + IsJmp(fetchbufB_instr)||IsCall(fetchbufB_instr)) && + fetchbufB_v); + +wire take_branch = (fetchbuf==1'b0) ? take_branchA : take_branchB; +assign take_branch0 = take_branch; + +/* +always @* +begin + pc0 <= thread_en ? (fetchbuf ? pc0b : pc0a) : pc0a; + pc1 <= thread_en ? (fetchbuf ? pc1b : pc1a) : pc1a; +end +*/ +assign threadx = fetchbuf; + +`ifdef FCU_ENH +FT64_RSB #(AMSB) ursb1 +( + .rst(rst), + .clk(fcu_clk), + .regLR(regLR), + .queued1(queued1), + .queued2(1'b0), + .fetchbuf0_v(fetchbuf0_v), + .fetchbuf0_pc(fetchbuf0_pc), + .fetchbuf0_instr(fetchbuf0_instr), + .fetchbuf1_v(1'b0), + .fetchbuf1_pc(RSTPC), + .fetchbuf1_instr(`NOP_INSN), + .stompedRets(stompedRets), + .stompedRet(stompedRet), + .pc(retpc0) +); + +`else +assign retpc0 = RSTPC; +assign retpc1 = RSTPC; +`endif + +wire peclk, neclk; +edge_det ued1 (.rst(rst), .clk(clk4x), .ce(1'b1), .i(clk), .pe(peclk), .ne(neclk), .ee()); + +reg did_branch; + +always @(posedge clk) +if (rst) begin + pc0 <= RSTPC; + fetchbufA_v <= 1'b0; + fetchbufB_v <= 1'b0; + fetchbuf <= 1'b0; + panic <= `PANIC_NONE; + did_branch <= 1'b0; +end +else begin + + did_branch <= take_branch & ~branchmiss; + + begin + + // On a branch miss with threading enabled all fectch buffers are + // invalidated even though the data in the fetch buffer would be valid + // for the thread that isn't in a branchmiss state. This is done to + // keep things simple. For the thread that doesn't miss the current + // data for the fetch buffer needs to be retrieved again, so the pc + // for that thread is assigned the current fetchbuf pc. + // For the thread that misses the pc is simply assigned the misspc. + if (branchmiss) begin + pc0 <= misspc; + fetchbufA_v <= `INV; + fetchbufB_v <= `INV; + fetchbuf <= 1'b0; + $display("********************"); + $display("********************"); + $display("********************"); + $display("Branch miss"); + $display("misspc=%h", misspc); + $display("********************"); + $display("********************"); + $display("********************"); + end +// else if (cinsn0[`INSTRUCTION_OP]==`CALL || cinsn0[`INSTRUCTION_OP]==`JMP) begin +// pc0[31:0] = cinsn0[6] ? {cinsn0[47:8]} : {pc0[31:24],cinsn0[31:8]}; +// fetchbufA_v <= `INV; +// fetchbufB_v <= `INV; +// fetchbuf <= 1'b0; +// end + else if (take_branch) begin + if (fetchbuf == 1'b0) begin + // In this case fetchbufA must be valid, or take_branch wouldn't be. + case(fetchbufB_v) + 1'b0: + begin + pc0 <= branch_pcA; + fetchbufA_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbuf <= (queued1|queuedNop); + end + 1'b1: + if (did_branch) begin + fetchbufA_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbuf <= (queued1|queuedNop); + FetchB(); + end + else + begin + pc0 <= branch_pcA; + fetchbufA_v <= !(queued1|queuedNop); // if it can be queued, it will + fetchbufB_v <= `INV; + fetchbuf <= (queued1|queuedNop); + end + endcase + end + else begin + case(fetchbufA_v) + 1'b0: + begin + pc0 <= branch_pcB; + fetchbufB_v <= !(queued1|queuedNop); + fetchbuf <= !(queued1|queuedNop); + end + 1'b1: + if (did_branch) begin + fetchbufB_v <= !(queued1|queuedNop); + fetchbuf <= ~(queued1|queuedNop); + FetchA(); + end + else + begin + pc0 <= branch_pcB; + fetchbufB_v <= !(queued1|queuedNop); + fetchbufA_v <= `INV; + fetchbuf <= !(queued1|queuedNop); + end + endcase + end + end // if branch + + else begin // there is no branchback in the system + // update fetchbufX_v and fetchbuf ... relatively simple, as + // there are no backwards branches in the mix + if (fetchbuf == 1'b0) case ({fetchbufA_v, (queued1|queuedNop)}) + 2'b00: ; // do nothing + 2'b10: ; + 2'b11: begin fetchbufA_v <= `INV; fetchbuf <= ~fetchbuf; end + default: panic <= `PANIC_INVALIDIQSTATE; + endcase + else case ({fetchbufB_v, (queued1|queuedNop)}) + 2'b00: ; // do nothing + 2'b10: ; + 2'b11: begin fetchbufB_v <= `INV; fetchbuf <= ~fetchbuf; end + default: panic <= `PANIC_INVALIDIQSTATE; + endcase + // + // get data iff the fetch buffers are empty + // + if (fetchbufA_v == `INV) begin + FetchA(); + // fetchbuf steering logic correction + if (fetchbufB_v==`INV && phit) + fetchbuf <= 1'b0; + end + else if (fetchbufB_v == `INV) begin + FetchB(); + end + end + // + // get data iff the fetch buffers are empty + // + if (fetchbufA_v == `INV && fetchbufB_v == `INV) begin + FetchA(); + fetchbuf <= 1'b0; + end +// // Steer fetchbuf to the valid buffer. +// else if (fetchbufB_v == `INV) +// fetchbuf <= 1'b0; +// else if (fetchbufA_v == `INV) +// fetchbuf <= 1'b1; +// else if (fetchbufA_v == `INV) begin +// FetchA(); +// end +// else if (fetchbufB_v == `INV) begin +// FetchB(); +// end +end + + // The fetchbuffer is invalidated at the end of a vector instruction + // queue. + if (nop_fetchbuf[0]) fetchbufA_v <= `INV; + if (nop_fetchbuf[1]) fetchbufB_v <= `INV; +end + +assign fetchbuf0_instr = (fetchbuf == 1'b0) ? fetchbufA_instr : fetchbufB_instr; +assign fetchbuf0_insln = (fetchbuf == 1'b0) ? fetchbufA_inslen: fetchbufB_inslen; +assign fetchbuf0_v = (fetchbuf == 1'b0) ? fetchbufA_v : fetchbufB_v ; +assign fetchbuf0_pc = (fetchbuf == 1'b0) ? fetchbufA_pc : fetchbufB_pc ; +assign fetchbuf0_thrd = 1'b0; +assign fetchbuf0_pbyte = (fetchbuf == 1'b0) ? fetchbufA_pbyte : fetchbufB_pbyte; + +reg [2:0] insln0; +always @* +begin +`ifdef SUPPORT_DCI + if (insn0[5:0]==`CMPRSSD) + insln0 <= 3'd2 | pred_on; + else +`endif + if (insn0[7:6]==2'b00 && insn0[`INSTRUCTION_OP]==`EXEC) + insln0 <= fnInsLength(codebuf0); + else + insln0 <= fnInsLength(insn0); +end + + +always @* +begin +`ifdef SUPPORT_DCI + if (insn0[13:8]==`CMPRSSD && pred_on) + cinsn0 <= expand0; + else if (insn0[5:0]==`CMPRSSD && !pred_on) + cinsn0 <= expand0; + else +`endif + if (insn0[7:6]==2'b00 && insn0[`INSTRUCTION_OP]==`EXEC && !pred_on) + cinsn0 <= codebuf0; + else if (insn0[15:14]==2'b00 && insn0[`INSTRUCTION_OP]==`EXEC && pred_on) + cinsn0 <= codebuf0; + else if (insn0[15] & pred_on) + cinsn0 <= {xinsn0,insn0[7:0]}; + else if (insn0[7] & ~pred_on) + cinsn0 <= xinsn0; + else + cinsn0 <= insn0; +end + +task FetchA; +begin + fetchbufA_instr <= pred_on ? cinsn0[55:8] : cinsn0[47:0]; + fetchbufA_pbyte = cinsn0[7:0]; + fetchbufA_v <= `VAL; + fetchbufA_pc <= pc0; + if (phit && ~freezePC) + pc0 <= pc0 + insln0; + else + pc0 <= pc0; +end +endtask + +task FetchB; +begin + fetchbufB_instr <= pred_on ? cinsn0[55:8] : cinsn0[47:0]; + fetchbufB_pbyte = cinsn0[7:0]; + fetchbufB_v <= `VAL; + fetchbufB_pc <= pc0; + if (phit && ~freezePC) + pc0 <= pc0 + insln0; + else + pc0 <= pc0; +end +endtask + +endmodule Index: thor/trunk/FT64v7/rtl/twoway/FT64_regfile1w4r_oc.v =================================================================== --- thor/trunk/FT64v7/rtl/twoway/FT64_regfile1w4r_oc.v (nonexistent) +++ thor/trunk/FT64v7/rtl/twoway/FT64_regfile1w4r_oc.v (revision 60) @@ -0,0 +1,282 @@ +`timescale 1ns / 1ps +// ============================================================================ +// __ +// \\__/ o\ (C) 2018 Robert Finch, Waterloo +// \ __ / All rights reserved. +// \/_// robfinch@finitron.ca +// || +// +// This source file is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This source file is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// +// +// Register file with two write ports and six read ports. +// ============================================================================ +// +`include "FT64_config.vh" + +module FT64_regfileRam_sim_oc(clka, ena, wea, addra, dina, clkb, enb, addrb, doutb); +parameter WID=64; +parameter RBIT = 11; +input clka; +input ena; +input [7:0] wea; +input [RBIT:0] addra; +input [WID-1:0] dina; +input clkb; +input enb; +input [RBIT:0] addrb; +output [WID-1:0] doutb; + +integer n; +(* RAM_STYLE="BLOCK" *) +reg [64:0] mem [0:4095]; +reg [RBIT:0] raddrb; + +initial begin + for (n = 0; n < 4096; n = n + 1) + mem[n] = 0; +end + +always @(posedge clka) if (ena & wea[0]) mem[addra][7:0] <= dina[7:0]; +always @(posedge clka) if (ena & wea[1]) mem[addra][15:8] <= dina[15:8]; +always @(posedge clka) if (ena & wea[2]) mem[addra][23:16] <= dina[23:16]; +always @(posedge clka) if (ena & wea[3]) mem[addra][31:24] <= dina[31:24]; +always @(posedge clka) if (ena & wea[4]) mem[addra][39:32] <= dina[39:32]; +always @(posedge clka) if (ena & wea[5]) mem[addra][47:40] <= dina[47:40]; +always @(posedge clka) if (ena & wea[6]) mem[addra][55:48] <= dina[55:48]; +always @(posedge clka) if (ena & wea[7]) mem[addra][63:56] <= dina[63:56]; + +always @(posedge clkb) + raddrb <= addrb; +assign doutb = mem[raddrb]; + +endmodule + +module FT64_regfile1w4r_oc(clk, wr0, we0, wa0, i0, + rclk, ra0, ra1, ra2, ra3, o0, o1, o2, o3); +parameter WID=64; +parameter RBIT = 11; +input clk; +input wr0; +input [7:0] we0; +input [RBIT:0] wa0; +input [WID-1:0] i0; +input rclk; +input [RBIT:0] ra0; +input [RBIT:0] ra1; +input [RBIT:0] ra2; +input [RBIT:0] ra3; +output [WID-1:0] o0; +output [WID-1:0] o1; +output [WID-1:0] o2; +output [WID-1:0] o3; + +reg wr; +reg [RBIT:0] wa; +reg [WID-1:0] i; +reg [7:0] we; +wire [WID-1:0] o00, o01, o02, o03; + +integer n; + +`ifdef SIM +FT64_regfileRam_sim_oc urf10 ( + .clka(clk), + .ena(wr), + .wea(we), + .addra(wa), + .dina(i), + .clkb(rclk), + .enb(1'b1), + .addrb(ra0), + .doutb(o00) +); + +FT64_regfileRam_sim_oc urf11 ( + .clka(clk), + .ena(wr), + .wea(we), + .addra(wa), + .dina(i), + .clkb(rclk), + .enb(1'b1), + .addrb(ra1), + .doutb(o01) +); + +FT64_regfileRam_sim_oc urf12 ( + .clka(clk), + .ena(wr), + .wea(we), + .addra(wa), + .dina(i), + .clkb(rclk), + .enb(1'b1), + .addrb(ra2), + .doutb(o02) +); + +FT64_regfileRam_sim_oc urf13 ( + .clka(clk), + .ena(wr), + .wea(we), + .addra(wa), + .dina(i), + .clkb(rclk), + .enb(1'b1), + .addrb(ra3), + .doutb(o03) +); + +`else +FT64_regfileRam urf10 ( + .clka(clk), + .ena(wr), + .wea(we), + .addra(wa), + .dina(i), + .douta(), + .clkb(rclk), + .enb(1'b1), + .web(8'b0), + .addrb(ra0), + .dinb(64'h00), + .doutb(o00) +); + +FT64_regfileRam urf11 ( + .clka(clk), + .ena(wr), + .wea(we), + .addra(wa), + .dina(i), + .douta(), + .clkb(rclk), + .enb(1'b1), + .web(8'b0), + .addrb(ra1), + .dinb(64'h00), + .doutb(o01) +); + +FT64_regfileRam urf12 ( + .clka(clk), + .ena(wr), + .wea(we), + .addra(wa), + .dina(i), + .douta(), + .clkb(rclk), + .enb(1'b1), + .web(8'b0), + .addrb(ra2), + .dinb(64'h00), + .doutb(o02) +); + +FT64_regfileRam urf13 ( + .clka(clk), + .ena(wr), + .wea(we), + .addra(wa), + .dina(i), + .douta(), + .clkb(rclk), + .enb(1'b1), + .web(8'b0), + .addrb(ra3), + .dinb(64'h00), + .doutb(o03) +); + +`endif + +always @* +begin + wr <= wr0; + we <= we0; + wa <= wa0; + i <= i0; +end + +assign o0[7:0] = ra0[4:0]==5'd0 ? {8{1'b0}} : + (wr0 && we0[0] && (ra0==wa0)) ? i0[7:0] : o00[7:0]; +assign o0[15:8] = ra0[4:0]==5'd0 ? {8{1'b0}} : + (wr0 && we0[1] && (ra0==wa0)) ? i0[15:8] : o00[15:8]; +assign o0[23:16] = ra0[4:0]==5'd0 ? {8{1'b0}} : + (wr0 && we0[2] && (ra0==wa0)) ? i0[23:16] : o00[23:16]; +assign o0[31:24] = ra0[4:0]==5'd0 ? {8{1'b0}} : + (wr0 && we0[3] && (ra0==wa0)) ? i0[31:24] : o00[31:24]; +assign o0[39:32] = ra0[4:0]==5'd0 ? {8{1'b0}} : + (wr0 && we0[4] && (ra0==wa0)) ? i0[39:32] : o00[39:32]; +assign o0[47:40] = ra0[4:0]==5'd0 ? {8{1'b0}} : + (wr0 && we0[5] && (ra0==wa0)) ? i0[47:40] : o00[47:40]; +assign o0[55:48] = ra0[4:0]==5'd0 ? {8{1'b0}} : + (wr0 && we0[6] && (ra0==wa0)) ? i0[55:48] : o00[55:48]; +assign o0[63:56] = ra0[4:0]==5'd0 ? {8{1'b0}} : + (wr0 && we0[7] && (ra0==wa0)) ? i0[63:56] : o00[63:56]; + +assign o1[7:0] = ra1[4:0]==5'd0 ? {8{1'b0}} : + (wr0 && we0[0] && (ra1==wa0)) ? i0[7:0] : o01[7:0]; +assign o1[15:8] = ra1[4:0]==5'd0 ? {8{1'b0}} : + (wr0 && we0[1] && (ra1==wa0)) ? i0[15:8] : o01[15:8]; +assign o1[23:16] = ra1[4:0]==5'd0 ? {8{1'b0}} : + (wr0 && we0[2] && (ra1==wa0)) ? i0[23:16] : o01[23:16]; +assign o1[31:24] = ra1[4:0]==5'd0 ? {8{1'b0}} : + (wr0 && we0[3] && (ra1==wa0)) ? i0[31:24] : o01[31:24]; +assign o1[39:32] = ra1[4:0]==5'd0 ? {8{1'b0}} : + (wr0 && we0[4] && (ra1==wa0)) ? i0[39:32] : o01[39:32]; +assign o1[47:40] = ra1[4:0]==5'd0 ? {8{1'b0}} : + (wr0 && we0[5] && (ra1==wa0)) ? i0[47:40] : o01[47:40]; +assign o1[55:48] = ra1[4:0]==5'd0 ? {8{1'b0}} : + (wr0 && we0[6] && (ra1==wa0)) ? i0[55:48] : o01[55:48]; +assign o1[63:56] = ra1[4:0]==5'd0 ? {8{1'b0}} : + (wr0 && we0[7] && (ra1==wa0)) ? i0[63:56] : o01[63:56]; + +assign o2[7:0] = ra2[4:0]==5'd0 ? {8{1'b0}} : + (wr0 && we0[0] && (ra2==wa0)) ? i0[7:0] : o02[7:0]; +assign o2[15:8] = ra2[4:0]==5'd0 ? {8{1'b0}} : + (wr0 && we0[1] && (ra2==wa0)) ? i0[15:8] : o02[15:8]; +assign o2[23:16] = ra2[4:0]==5'd0 ? {8{1'b0}} : + (wr0 && we0[2] && (ra2==wa0)) ? i0[23:16] : o02[23:16]; +assign o2[31:24] = ra2[4:0]==5'd0 ? {8{1'b0}} : + (wr0 && we0[3] && (ra2==wa0)) ? i0[31:24] : o02[31:24]; +assign o2[39:32] = ra2[4:0]==5'd0 ? {8{1'b0}} : + (wr0 && we0[4] && (ra2==wa0)) ? i0[39:32] : o02[39:32]; +assign o2[47:40] = ra2[4:0]==5'd0 ? {8{1'b0}} : + (wr0 && we0[5] && (ra2==wa0)) ? i0[47:40] : o02[47:40]; +assign o2[55:48] = ra2[4:0]==5'd0 ? {8{1'b0}} : + (wr0 && we0[6] && (ra2==wa0)) ? i0[55:48] : o02[55:48]; +assign o2[63:56] = ra2[4:0]==5'd0 ? {8{1'b0}} : + (wr0 && we0[7] && (ra2==wa0)) ? i0[63:56] : o02[63:56]; + +assign o3[7:0] = ra3[4:0]==5'd0 ? {8{1'b0}} : + (wr0 && we0[0] && (ra3==wa0)) ? i0[7:0] : o03[7:0]; +assign o3[15:8] = ra3[4:0]==5'd0 ? {8{1'b0}} : + (wr0 && we0[1] && (ra3==wa0)) ? i0[15:8] : o03[15:8]; +assign o3[23:16] = ra3[4:0]==5'd0 ? {8{1'b0}} : + (wr0 && we0[2] && (ra3==wa0)) ? i0[23:16] : o03[23:16]; +assign o3[31:24] = ra3[4:0]==5'd0 ? {8{1'b0}} : + (wr0 && we0[3] && (ra3==wa0)) ? i0[31:24] : o03[31:24]; +assign o3[39:32] = ra3[4:0]==5'd0 ? {8{1'b0}} : + (wr0 && we0[4] && (ra3==wa0)) ? i0[39:32] : o03[39:32]; +assign o3[47:40] = ra3[4:0]==5'd0 ? {8{1'b0}} : + (wr0 && we0[5] && (ra3==wa0)) ? i0[47:40] : o03[47:40]; +assign o3[55:48] = ra3[4:0]==5'd0 ? {8{1'b0}} : + (wr0 && we0[6] && (ra3==wa0)) ? i0[55:48] : o03[55:48]; +assign o3[63:56] = ra3[4:0]==5'd0 ? {8{1'b0}} : + (wr0 && we0[7] && (ra3==wa0)) ? i0[63:56] : o03[63:56]; + +endmodule + Index: thor/trunk/FT64v7/rtl/twoway/FT64_regfile2w6r_oc.v =================================================================== --- thor/trunk/FT64v7/rtl/twoway/FT64_regfile2w6r_oc.v (nonexistent) +++ thor/trunk/FT64v7/rtl/twoway/FT64_regfile2w6r_oc.v (revision 60) @@ -0,0 +1,516 @@ +`timescale 1ns / 1ps +// ============================================================================ +// __ +// \\__/ o\ (C) 2013-2018 Robert Finch, Waterloo +// \ __ / All rights reserved. +// \/_// robfinch@finitron.ca +// || +// +// This source file is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This source file is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// +// +// Register file with two write ports and six read ports. +// ============================================================================ +// +`include "FT64_config.vh" + +module FT64_regfileRam_sim(clka, ena, wea, addra, dina, clkb, enb, addrb, doutb); +parameter WID=64; +parameter RBIT = 11; +input clka; +input ena; +input [7:0] wea; +input [RBIT:0] addra; +input [WID-1:0] dina; +input clkb; +input enb; +input [RBIT:0] addrb; +output [WID-1:0] doutb; + +integer n; +(* RAM_STYLE="BLOCK" *) +reg [64:0] mem [0:4095]; +reg [RBIT:0] raddrb; + +initial begin + for (n = 0; n < 4096; n = n + 1) + mem[n] = 0; +end + +always @(posedge clka) if (ena & wea[0]) mem[addra][7:0] <= dina[7:0]; +always @(posedge clka) if (ena & wea[1]) mem[addra][15:8] <= dina[15:8]; +always @(posedge clka) if (ena & wea[2]) mem[addra][23:16] <= dina[23:16]; +always @(posedge clka) if (ena & wea[3]) mem[addra][31:24] <= dina[31:24]; +always @(posedge clka) if (ena & wea[4]) mem[addra][39:32] <= dina[39:32]; +always @(posedge clka) if (ena & wea[5]) mem[addra][47:40] <= dina[47:40]; +always @(posedge clka) if (ena & wea[6]) mem[addra][55:48] <= dina[55:48]; +always @(posedge clka) if (ena & wea[7]) mem[addra][63:56] <= dina[63:56]; + +always @(posedge clkb) + raddrb <= addrb; +assign doutb = mem[raddrb]; + +endmodule + +module FT64_regfile2w6r_oc(clk4x, clk, wr0, wr1, we0, we1, wa0, wa1, i0, i1, + rclk, ra0, ra1, ra2, ra3, ra4, ra5, + o0, o1, o2, o3, o4, o5); +parameter WID=64; +parameter RBIT = 11; +input clk4x; +input clk; +input wr0; +input wr1; +input [7:0] we0; +input [7:0] we1; +input [RBIT:0] wa0; +input [RBIT:0] wa1; +input [WID-1:0] i0; +input [WID-1:0] i1; +input rclk; +input [RBIT:0] ra0; +input [RBIT:0] ra1; +input [RBIT:0] ra2; +input [RBIT:0] ra3; +input [RBIT:0] ra4; +input [RBIT:0] ra5; +output [WID-1:0] o0; +output [WID-1:0] o1; +output [WID-1:0] o2; +output [WID-1:0] o3; +output [WID-1:0] o4; +output [WID-1:0] o5; + +reg wr; +reg [RBIT:0] wa; +reg [WID-1:0] i; +reg [7:0] we; +wire [WID-1:0] o00, o01, o02, o03, o04, o05; +reg wr1x; +reg [RBIT:0] wa1x; +reg [WID-1:0] i1x; +reg [7:0] we1x; +reg holdwr0,holdwr1; +reg [63:0] holdi0, holdi1; +reg [RBIT:0] holdwa0,holdwa1; + +integer n; + +`ifdef SIM +FT64_regfileRam_sim urf10 ( + .clka(clk4x), + .ena(wr), + .wea(we), + .addra(wa), + .dina(i), + .clkb(rclk), + .enb(1'b1), + .addrb(ra0), + .doutb(o00) +); + +FT64_regfileRam_sim urf11 ( + .clka(clk4x), + .ena(wr), + .wea(we), + .addra(wa), + .dina(i), + .clkb(rclk), + .enb(1'b1), + .addrb(ra1), + .doutb(o01) +); + +FT64_regfileRam_sim urf12 ( + .clka(clk4x), + .ena(wr), + .wea(we), + .addra(wa), + .dina(i), + .clkb(rclk), + .enb(1'b1), + .addrb(ra2), + .doutb(o02) +); + +FT64_regfileRam_sim urf13 ( + .clka(clk4x), + .ena(wr), + .wea(we), + .addra(wa), + .dina(i), + .clkb(rclk), + .enb(1'b1), + .addrb(ra3), + .doutb(o03) +); + +FT64_regfileRam_sim urf14 ( + .clka(clk4x), + .ena(wr), + .wea(we), + .addra(wa), + .dina(i), + .clkb(rclk), + .enb(1'b1), + .addrb(ra4), + .doutb(o04) +); + +FT64_regfileRam_sim urf15 ( + .clka(clk4x), + .ena(wr), + .wea(we), + .addra(wa), + .dina(i), + .clkb(rclk), + .enb(1'b1), + .addrb(ra5), + .doutb(o05) +); +`else +FT64_regfileRam urf10 ( + .clka(clk4x), + .ena(wr), + .wea(we), + .addra(wa), + .dina(i), + .clkb(rclk), + .enb(1'b1), + .web(1'b0), + .addrb(ra0), + .dinb(8'h00), + .doutb(o00) +); + +FT64_regfileRam urf11 ( + .clka(clk4x), + .ena(wr), + .wea(we), + .addra(wa), + .dina(i), + .clkb(rclk), + .enb(1'b1), + .web(1'b0), + .addrb(ra1), + .dinb(8'h00), + .doutb(o01) +); + +FT64_regfileRam urf12 ( + .clka(clk4x), + .ena(wr), + .wea(we), + .addra(wa), + .dina(i), + .clkb(rclk), + .enb(1'b1), + .web(1'b0), + .addrb(ra2), + .dinb(8'h00), + .doutb(o02) +); + +FT64_regfileRam urf13 ( + .clka(clk4x), + .ena(wr), + .wea(we), + .addra(wa), + .dina(i), + .clkb(rclk), + .enb(1'b1), + .web(1'b0), + .addrb(ra3), + .dinb(8'h00), + .doutb(o03) +); + +FT64_regfileRam urf14 ( + .clka(clk4x), + .ena(wr), + .wea(we), + .addra(wa), + .dina(i), + .clkb(rclk), + .enb(1'b1), + .web(1'b0), + .addrb(ra4), + .dinb(8'h00), + .doutb(o04) +); + +FT64_regfileRam urf15 ( + .clka(clk4x), + .ena(wr), + .wea(we), + .addra(wa), + .dina(i), + .clkb(rclk), + .enb(1'b1), + .web(1'b0), + .addrb(ra5), + .dinb(8'h00), + .doutb(o05) +); +`endif + +// Record what was written in the previous clock cycle so that read +// forwarding logic may use it. +always @(posedge clk) + holdwr0 <= wr0; +always @(posedge clk) + holdwr1 <= wr1; +always @(posedge clk) + holdwa0 <= wa0; +always @(posedge clk) + holdwa1 <= wa1; +always @(posedge clk) + holdi0 <= i0; +always @(posedge clk) + holdi1 <= i1; + +// The same clock edge that would normally update the register file is the +// clock edge that causes the data to disappear for the next cycle. The +// data needs to be held onto so that it can update the register file on +// the next 4x clock. +always @(posedge clk) +begin + wr1x <= wr1; + we1x <= we1; + wa1x <= wa1; + i1x <= i1; +end + +reg wclk2; +always @(posedge clk4x) +begin + wclk2 <= clk; + if (clk & ~wclk2) begin + wr <= wr0; + we <= 8'hFF; + wa <= wa0; + i <= i0; + end + else if (clk & wclk2) begin + wr <= wr1x; + we <= 8'hFF; + wa <= wa1x; + i <= i1x; + end + else begin + wr <= 1'b0; + we <= 8'hFF; + wa <= 'd0; + i <= 'd0; + end +end + + +function [63:0] fwdmux; +input [RBIT:0] ra; +input wr0; +input wr1; +input hwr0; +input hwr1; +input [RBIT:0] wa0; +input [RBIT:0] wa1; +input [RBIT:0] hwa0; +input [RBIT:0] hwa1; +input [63:0] i0; +input [63:0] i1; +input [63:0] hi0; +input [63:0] hi1; +input [63:0] oo; +begin + if (ra[4:0]==5'd0) + fwdmux = 64'd0; + else if (wr1 && ra==wa1) + fwdmux = i1; + else if (wr0 && ra==wa0) + fwdmux = i0; + else if (hwr1 && ra==hwa1) + fwdmux = hi1; + else if (hwr0 && ra==hwa0) + fwdmux = hi0; + else + fwdmux = oo; +end +endfunction + +assign o0 = fwdmux(ra0,wr0,wr1,holdwr0,holdwr1,wa0,wa1,holdwa0,holdwa1,i0,i1,holdi0,holdi1,o00); +assign o1 = fwdmux(ra1,wr0,wr1,holdwr0,holdwr1,wa0,wa1,holdwa0,holdwa1,i0,i1,holdi0,holdi1,o01); +assign o2 = fwdmux(ra2,wr0,wr1,holdwr0,holdwr1,wa0,wa1,holdwa0,holdwa1,i0,i1,holdi0,holdi1,o02); +assign o3 = fwdmux(ra3,wr0,wr1,holdwr0,holdwr1,wa0,wa1,holdwa0,holdwa1,i0,i1,holdi0,holdi1,o03); +assign o4 = fwdmux(ra4,wr0,wr1,holdwr0,holdwr1,wa0,wa1,holdwa0,holdwa1,i0,i1,holdi0,holdi1,o04); +assign o5 = fwdmux(ra5,wr0,wr1,holdwr0,holdwr1,wa0,wa1,holdwa0,holdwa1,i0,i1,holdi0,holdi1,o05); + +/* +assign o0[7:0] = ra0[4:0]==5'd0 ? {8{1'b0}} : + (wr1 && we1[0] && (ra0==wa1)) ? i1[7:0] : + (wr0 && we0[0] && (ra0==wa0)) ? i0[7:0] : o00[7:0]; +assign o0[15:8] = ra0[4:0]==5'd0 ? {8{1'b0}} : + (wr1 && we1[1] && (ra0==wa1)) ? i1[15:8] : + (wr0 && we0[1] && (ra0==wa0)) ? i0[15:8] : o00[15:8]; +assign o0[23:16] = ra0[4:0]==5'd0 ? {8{1'b0}} : + (wr1 && we1[2] && (ra0==wa1)) ? i1[23:16] : + (wr0 && we0[2] && (ra0==wa0)) ? i0[23:16] : o00[23:16]; +assign o0[31:24] = ra0[4:0]==5'd0 ? {8{1'b0}} : + (wr1 && we1[3] && (ra0==wa1)) ? i1[31:24] : + (wr0 && we0[3] && (ra0==wa0)) ? i0[31:24] : o00[31:24]; +assign o0[39:32] = ra0[4:0]==5'd0 ? {8{1'b0}} : + (wr1 && we1[4] && (ra0==wa1)) ? i1[39:32] : + (wr0 && we0[4] && (ra0==wa0)) ? i0[39:32] : o00[39:32]; +assign o0[47:40] = ra0[4:0]==5'd0 ? {8{1'b0}} : + (wr1 && we1[5] && (ra0==wa1)) ? i1[47:40] : + (wr0 && we0[5] && (ra0==wa0)) ? i0[47:40] : o00[47:40]; +assign o0[55:48] = ra0[4:0]==5'd0 ? {8{1'b0}} : + (wr1 && we1[6] && (ra0==wa1)) ? i1[55:48] : + (wr0 && we0[6] && (ra0==wa0)) ? i0[55:48] : o00[55:48]; +assign o0[63:56] = ra0[4:0]==5'd0 ? {8{1'b0}} : + (wr1 && we1[7] && (ra0==wa1)) ? i1[63:56] : + (wr0 && we0[7] && (ra0==wa0)) ? i0[63:56] : o00[63:56]; + +assign o1[7:0] = ra1[4:0]==5'd0 ? {8{1'b0}} : + (wr1 && we1[0] && (ra1==wa1)) ? i1[7:0] : + (wr0 && we0[0] && (ra1==wa0)) ? i0[7:0] : o01[7:0]; +assign o1[15:8] = ra1[4:0]==5'd0 ? {8{1'b0}} : + (wr1 && we1[1] && (ra1==wa1)) ? i1[15:8] : + (wr0 && we0[1] && (ra1==wa0)) ? i0[15:8] : o01[15:8]; +assign o1[23:16] = ra1[4:0]==5'd0 ? {8{1'b0}} : + (wr1 && we1[2] && (ra1==wa1)) ? i1[23:16] : + (wr0 && we0[2] && (ra1==wa0)) ? i0[23:16] : o01[23:16]; +assign o1[31:24] = ra1[4:0]==5'd0 ? {8{1'b0}} : + (wr1 && we1[3] && (ra1==wa1)) ? i1[31:24] : + (wr0 && we0[3] && (ra1==wa0)) ? i0[31:24] : o01[31:24]; +assign o1[39:32] = ra1[4:0]==5'd0 ? {8{1'b0}} : + (wr1 && we1[4] && (ra1==wa1)) ? i1[39:32] : + (wr0 && we0[4] && (ra1==wa0)) ? i0[39:32] : o01[39:32]; +assign o1[47:40] = ra1[4:0]==5'd0 ? {8{1'b0}} : + (wr1 && we1[5] && (ra1==wa1)) ? i1[47:40] : + (wr0 && we0[5] && (ra1==wa0)) ? i0[47:40] : o01[47:40]; +assign o1[55:48] = ra1[4:0]==5'd0 ? {8{1'b0}} : + (wr1 && we1[6] && (ra1==wa1)) ? i1[55:48] : + (wr0 && we0[6] && (ra1==wa0)) ? i0[55:48] : o01[55:48]; +assign o1[63:56] = ra1[4:0]==5'd0 ? {8{1'b0}} : + (wr1 && we1[7] && (ra1==wa1)) ? i1[63:56] : + (wr0 && we0[7] && (ra1==wa0)) ? i0[63:56] : o01[63:56]; + +assign o2[7:0] = ra2[4:0]==5'd0 ? {8{1'b0}} : + (wr1 && we1[0] && (ra2==wa1)) ? i1[7:0] : + (wr0 && we0[0] && (ra2==wa0)) ? i0[7:0] : o02[7:0]; +assign o2[15:8] = ra2[4:0]==5'd0 ? {8{1'b0}} : + (wr1 && we1[1] && (ra2==wa1)) ? i1[15:8] : + (wr0 && we0[1] && (ra2==wa0)) ? i0[15:8] : o02[15:8]; +assign o2[23:16] = ra2[4:0]==5'd0 ? {8{1'b0}} : + (wr1 && we1[2] && (ra2==wa1)) ? i1[23:16] : + (wr0 && we0[2] && (ra2==wa0)) ? i0[23:16] : o02[23:16]; +assign o2[31:24] = ra2[4:0]==5'd0 ? {8{1'b0}} : + (wr1 && we1[3] && (ra2==wa1)) ? i1[31:24] : + (wr0 && we0[3] && (ra2==wa0)) ? i0[31:24] : o02[31:24]; +assign o2[39:32] = ra2[4:0]==5'd0 ? {8{1'b0}} : + (wr1 && we1[4] && (ra2==wa1)) ? i1[39:32] : + (wr0 && we0[4] && (ra2==wa0)) ? i0[39:32] : o02[39:32]; +assign o2[47:40] = ra2[4:0]==5'd0 ? {8{1'b0}} : + (wr1 && we1[5] && (ra2==wa1)) ? i1[47:40] : + (wr0 && we0[5] && (ra2==wa0)) ? i0[47:40] : o02[47:40]; +assign o2[55:48] = ra2[4:0]==5'd0 ? {8{1'b0}} : + (wr1 && we1[6] && (ra2==wa1)) ? i1[55:48] : + (wr0 && we0[6] && (ra2==wa0)) ? i0[55:48] : o02[55:48]; +assign o2[63:56] = ra2[4:0]==5'd0 ? {8{1'b0}} : + (wr1 && we1[7] && (ra2==wa1)) ? i1[63:56] : + (wr0 && we0[7] && (ra2==wa0)) ? i0[63:56] : o02[63:56]; + +assign o3[7:0] = ra3[4:0]==5'd0 ? {8{1'b0}} : + (wr1 && we1[0] && (ra3==wa1)) ? i1[7:0] : + (wr0 && we0[0] && (ra3==wa0)) ? i0[7:0] : o03[7:0]; +assign o3[15:8] = ra3[4:0]==5'd0 ? {8{1'b0}} : + (wr1 && we1[1] && (ra3==wa1)) ? i1[15:8] : + (wr0 && we0[1] && (ra3==wa0)) ? i0[15:8] : o03[15:8]; +assign o3[23:16] = ra3[4:0]==5'd0 ? {8{1'b0}} : + (wr1 && we1[2] && (ra3==wa1)) ? i1[23:16] : + (wr0 && we0[2] && (ra3==wa0)) ? i0[23:16] : o03[23:16]; +assign o3[31:24] = ra3[4:0]==5'd0 ? {8{1'b0}} : + (wr1 && we1[3] && (ra3==wa1)) ? i1[31:24] : + (wr0 && we0[3] && (ra3==wa0)) ? i0[31:24] : o03[31:24]; +assign o3[39:32] = ra3[4:0]==5'd0 ? {8{1'b0}} : + (wr1 && we1[4] && (ra3==wa1)) ? i1[39:32] : + (wr0 && we0[4] && (ra3==wa0)) ? i0[39:32] : o03[39:32]; +assign o3[47:40] = ra3[4:0]==5'd0 ? {8{1'b0}} : + (wr1 && we1[5] && (ra3==wa1)) ? i1[47:40] : + (wr0 && we0[5] && (ra3==wa0)) ? i0[47:40] : o03[47:40]; +assign o3[55:48] = ra3[4:0]==5'd0 ? {8{1'b0}} : + (wr1 && we1[6] && (ra3==wa1)) ? i1[55:48] : + (wr0 && we0[6] && (ra3==wa0)) ? i0[55:48] : o03[55:48]; +assign o3[63:56] = ra3[4:0]==5'd0 ? {8{1'b0}} : + (wr1 && we1[7] && (ra3==wa1)) ? i1[63:56] : + (wr0 && we0[7] && (ra3==wa0)) ? i0[63:56] : o03[63:56]; + +assign o4[7:0] = ra4[4:0]==5'd0 ? {8{1'b0}} : + (wr1 && we1[0] && (ra4==wa1)) ? i1[7:0] : + (wr0 && we0[0] && (ra4==wa0)) ? i0[7:0] : o04[7:0]; +assign o4[15:8] = ra4[4:0]==5'd0 ? {8{1'b0}} : + (wr1 && we1[1] && (ra4==wa1)) ? i1[15:8] : + (wr0 && we0[1] && (ra4==wa0)) ? i0[15:8] : o04[15:8]; +assign o4[23:16] = ra4[4:0]==5'd0 ? {8{1'b0}} : + (wr1 && we1[2] && (ra4==wa1)) ? i1[23:16] : + (wr0 && we0[2] && (ra4==wa0)) ? i0[23:16] : o04[23:16]; +assign o4[31:24] = ra4[4:0]==5'd0 ? {8{1'b0}} : + (wr1 && we1[3] && (ra4==wa1)) ? i1[31:24] : + (wr0 && we0[3] && (ra4==wa0)) ? i0[31:24] : o04[31:24]; +assign o4[39:32] = ra4[4:0]==5'd0 ? {8{1'b0}} : + (wr1 && we1[4] && (ra4==wa1)) ? i1[39:32] : + (wr0 && we0[4] && (ra4==wa0)) ? i0[39:32] : o04[39:32]; +assign o4[47:40] = ra4[4:0]==5'd0 ? {8{1'b0}} : + (wr1 && we1[5] && (ra4==wa1)) ? i1[47:40] : + (wr0 && we0[5] && (ra4==wa0)) ? i0[47:40] : o04[47:40]; +assign o4[55:48] = ra4[4:0]==5'd0 ? {8{1'b0}} : + (wr1 && we1[6] && (ra4==wa1)) ? i1[55:48] : + (wr0 && we0[6] && (ra4==wa0)) ? i0[55:48] : o04[55:48]; +assign o4[63:56] = ra4[4:0]==5'd0 ? {8{1'b0}} : + (wr1 && we1[7] && (ra4==wa1)) ? i1[63:56] : + (wr0 && we0[7] && (ra4==wa0)) ? i0[63:56] : o04[63:56]; + +assign o5[7:0] = ra5[4:0]==5'd0 ? {8{1'b0}} : + (wr1 && we1[0] && (ra5==wa1)) ? i1[7:0] : + (wr0 && we0[0] && (ra5==wa0)) ? i0[7:0] : o05[7:0]; +assign o5[15:8] = ra5[4:0]==5'd0 ? {8{1'b0}} : + (wr1 && we1[1] && (ra5==wa1)) ? i1[15:8] : + (wr0 && we0[1] && (ra5==wa0)) ? i0[15:8] : o05[15:8]; +assign o5[23:16] = ra5[4:0]==5'd0 ? {8{1'b0}} : + (wr1 && we1[2] && (ra5==wa1)) ? i1[23:16] : + (wr0 && we0[2] && (ra5==wa0)) ? i0[23:16] : o05[23:16]; +assign o5[31:24] = ra5[4:0]==5'd0 ? {8{1'b0}} : + (wr1 && we1[3] && (ra5==wa1)) ? i1[31:24] : + (wr0 && we0[3] && (ra5==wa0)) ? i0[31:24] : o05[31:24]; +assign o5[39:32] = ra5[4:0]==5'd0 ? {8{1'b0}} : + (wr1 && we1[4] && (ra5==wa1)) ? i1[39:32] : + (wr0 && we0[4] && (ra5==wa0)) ? i0[39:32] : o05[39:32]; +assign o5[47:40] = ra5[4:0]==5'd0 ? {8{1'b0}} : + (wr1 && we1[5] && (ra5==wa1)) ? i1[47:40] : + (wr0 && we0[5] && (ra5==wa0)) ? i0[47:40] : o05[47:40]; +assign o5[55:48] = ra5[4:0]==5'd0 ? {8{1'b0}} : + (wr1 && we1[6] && (ra5==wa1)) ? i1[55:48] : + (wr0 && we0[6] && (ra5==wa0)) ? i0[55:48] : o05[55:48]; +assign o5[63:56] = ra5[4:0]==5'd0 ? {8{1'b0}} : + (wr1 && we1[7] && (ra5==wa1)) ? i1[63:56] : + (wr0 && we0[7] && (ra5==wa0)) ? i0[63:56] : o05[63:56]; +*/ +/* +assign o5 = ra5[4:0]==5'd0 ? {WID{1'b0}} : + (wr1 && (ra5==wa1)) ? i1 : + (wr0 && (ra5==wa0)) ? i0 : o05; + +*/ +endmodule + Index: thor/trunk/FT64v7/software/AS64/bin/AS64.exe =================================================================== Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream Index: thor/trunk/FT64v7/software/AS64/bin/AS64.exe =================================================================== --- thor/trunk/FT64v7/software/AS64/bin/AS64.exe (nonexistent) +++ thor/trunk/FT64v7/software/AS64/bin/AS64.exe (revision 60)
thor/trunk/FT64v7/software/AS64/bin/AS64.exe Property changes : Added: svn:mime-type ## -0,0 +1 ## +application/octet-stream \ No newline at end of property Index: thor/trunk/FT64v7/software/CC64/bin/CC64.exe =================================================================== Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream Index: thor/trunk/FT64v7/software/CC64/bin/CC64.exe =================================================================== --- thor/trunk/FT64v7/software/CC64/bin/CC64.exe (nonexistent) +++ thor/trunk/FT64v7/software/CC64/bin/CC64.exe (revision 60)
thor/trunk/FT64v7/software/CC64/bin/CC64.exe Property changes : Added: svn:mime-type ## -0,0 +1 ## +application/octet-stream \ No newline at end of property

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.