1 |
2 |
ndumitrach |
//////////////////////////////////////////////////////////////////////////////////
|
2 |
|
|
//
|
3 |
|
|
// This file is part of the Next186 project
|
4 |
|
|
// http://opencores.org/project,next186
|
5 |
|
|
//
|
6 |
|
|
// Filename: Next186_BIU_2T_delayread.v
|
7 |
|
|
// Description: Part of the Next186 CPU project, bus interface unit
|
8 |
|
|
// Version 1.0
|
9 |
|
|
// Creation date: 20Jan2012 - 10Mar2012
|
10 |
|
|
//
|
11 |
|
|
// Author: Nicolae Dumitrache
|
12 |
|
|
// e-mail: ndumitrache@opencores.org
|
13 |
|
|
//
|
14 |
|
|
/////////////////////////////////////////////////////////////////////////////////
|
15 |
|
|
//
|
16 |
3 |
ndumitrach |
// Copyright (C) 2012 Nicolae Dumitrache
|
17 |
2 |
ndumitrach |
//
|
18 |
|
|
// This source file may be used and distributed without
|
19 |
|
|
// restriction provided that this copyright statement is not
|
20 |
|
|
// removed from the file and that any derivative work contains
|
21 |
|
|
// the original copyright notice and the associated disclaimer.
|
22 |
|
|
//
|
23 |
|
|
// This source file is free software; you can redistribute it
|
24 |
|
|
// and/or modify it under the terms of the GNU Lesser General
|
25 |
|
|
// Public License as published by the Free Software Foundation;
|
26 |
|
|
// either version 2.1 of the License, or (at your option) any
|
27 |
|
|
// later version.
|
28 |
|
|
//
|
29 |
|
|
// This source is distributed in the hope that it will be
|
30 |
|
|
// useful, but WITHOUT ANY WARRANTY; without even the implied
|
31 |
|
|
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
32 |
|
|
// PURPOSE. See the GNU Lesser General Public License for more
|
33 |
|
|
// details.
|
34 |
|
|
//
|
35 |
|
|
// You should have received a copy of the GNU Lesser General
|
36 |
|
|
// Public License along with this source; if not, download it
|
37 |
|
|
// from http://www.opencores.org/lgpl.shtml
|
38 |
|
|
//
|
39 |
|
|
///////////////////////////////////////////////////////////////////////////////////
|
40 |
|
|
// Additional Comments:
|
41 |
|
|
//
|
42 |
3 |
ndumitrach |
// - Links the CPU with a 32bit static synchronous RAM (or cache)
|
43 |
|
|
// - Able to address up to 1MB
|
44 |
|
|
// - 16byte instruction queue
|
45 |
|
|
// - Works at 2 X CPU frequency (80Mhz on Spartan3AN), requiring minimum 2T for an instruction.
|
46 |
|
|
// - The 32bit data bus and the double CPU clock allows the instruction queue to be almost always full, avoiding the CPU starving.
|
47 |
|
|
// The data un-alignement penalties are required only when data words crosses the 4byte boundaries.
|
48 |
2 |
ndumitrach |
//
|
49 |
|
|
//////////////////////////////////////////////////////////////////////////////////
|
50 |
|
|
//
|
51 |
3 |
ndumitrach |
// How to compute each instruction duration, in clock cycles (for this particular BIU implementation!):
|
52 |
|
|
//
|
53 |
|
|
// 1 - From the Next186_features.doc see for each instruction how many T states are required (you will notice they are always
|
54 |
2 |
ndumitrach |
// less or equal than 486 and much less than the original 80186
|
55 |
3 |
ndumitrach |
// 2 - Multiply this number by 2 - the BIU works at double ALU frequency because it needs to multiplex the data and instructions,
|
56 |
2 |
ndumitrach |
// in order to keep the ALU permanently feed with instructions. The 16bit queue acts like a flexible instruction buffer.
|
57 |
3 |
ndumitrach |
// 3 - Add penalties, as follows:
|
58 |
2 |
ndumitrach |
// +1T for each memory read - because of the synchronous SRAM which need this extra cycle to deliver the data
|
59 |
|
|
// +2T for each jump - required to flush and re-fill the instruction queue
|
60 |
|
|
// +1T for each 16bit(word) read/write which overlaps the 4byte boundary - specific to 32bit bus width
|
61 |
|
|
// +1T if the jump is made at an address with the latest 2bits 11 - specific to 32bit bus width
|
62 |
3 |
ndumitrach |
// +1T when the instruction queue empties - this case appears very rare, when a lot of 5-6 bytes memory write instructions are executed in direct sequence
|
63 |
|
|
//
|
64 |
2 |
ndumitrach |
// Some examples:
|
65 |
3 |
ndumitrach |
// - "lea ax,[bx+si+1234]" requires 2T
|
66 |
|
|
// - "add ax, 2345" requires 2T
|
67 |
|
|
// - "xchg ax, bx" requires 4T
|
68 |
|
|
// - "inc word ptr [1]" requires 5T (2x2T inc M + 1T read)
|
69 |
|
|
// - "inc word ptr [3]" requires 7T (2x2T inc M + 1T read + 1T unaligned read + 1T unaligned write)
|
70 |
|
|
// - "imul ax,bx,234" requires 4T (2x2T imul)
|
71 |
|
|
// - "loop address != 3(mod 4)" requires 4T (2x1T loop + 2T flush)
|
72 |
|
|
// - "loop address == 3(mod 4)" requires 5T (2x1T loop + 2T flush + 1T unaligned jump)
|
73 |
|
|
// - "call address 0" requires 4T (2x1T call near + 2T flush
|
74 |
4 |
ndumitrach |
// - "ret address 0" requires 7T (2x2T ret + 1T read penalty + 2T flush)
|
75 |
3 |
ndumitrach |
//
|
76 |
2 |
ndumitrach |
//////////////////////////////////////////////////////////////////////////////////
|
77 |
|
|
|
78 |
|
|
`timescale 1ns / 1ps
|
79 |
|
|
|
80 |
|
|
|
81 |
|
|
module BIU186_32bSync_2T_DelayRead(
|
82 |
|
|
input CLK,
|
83 |
|
|
output [47:0]INSTR,
|
84 |
|
|
input [2:0]ISIZE,
|
85 |
|
|
input IFETCH,
|
86 |
|
|
input FLUSH,
|
87 |
|
|
input MREQ,
|
88 |
|
|
input WR,
|
89 |
|
|
input WORD,
|
90 |
19 |
ndumitrach |
input [20:0]ADDR,
|
91 |
|
|
input [20:0]IADDR,
|
92 |
2 |
ndumitrach |
output reg CE186, // CPU clock enable
|
93 |
|
|
input [31:0]RAM_DIN,
|
94 |
|
|
output [31:0]RAM_DOUT,
|
95 |
19 |
ndumitrach |
output [18:0]RAM_ADDR,
|
96 |
2 |
ndumitrach |
output RAM_MREQ,
|
97 |
|
|
output wire[3:0]RAM_WMASK,
|
98 |
|
|
output reg [15:0]DOUT,
|
99 |
|
|
input [15:0]DIN,
|
100 |
19 |
ndumitrach |
input CE, // BIU clock enable
|
101 |
|
|
output reg data_bound,
|
102 |
|
|
input [1:0]WSEL, // normally {~ADDR[0], ADDR[0]}
|
103 |
|
|
output reg RAM_RD,
|
104 |
20 |
ndumitrach |
output reg RAM_WR,
|
105 |
|
|
input IORQ,
|
106 |
|
|
input FASTIO
|
107 |
2 |
ndumitrach |
);
|
108 |
|
|
|
109 |
|
|
reg [31:0]queue[3:0];
|
110 |
|
|
reg [1:0]STATE = 0;
|
111 |
|
|
reg OLDSTATE = 1;
|
112 |
|
|
reg [3:0]qpos = 0;
|
113 |
|
|
reg [4:0]qsize = 0;
|
114 |
|
|
reg [1:0]rpos = 0;
|
115 |
19 |
ndumitrach |
reg [18:0]piaddr = 0;
|
116 |
2 |
ndumitrach |
reg [7:0]exdata = 0;
|
117 |
|
|
reg rdi = 0;
|
118 |
|
|
|
119 |
|
|
reg [1:0]NEXTSTATE;
|
120 |
|
|
reg sflush;
|
121 |
|
|
wire [4:0]newqsize = sflush ? -IADDR[1:0] : CE186 && IFETCH && ~FLUSH ? qsize - ISIZE : qsize;
|
122 |
|
|
wire qnofull = qsize < 13;
|
123 |
|
|
reg iread;// = (qnofull && !RAM_RD && !RAM_WR) || sflush;
|
124 |
|
|
wire [3:0]nqpos = (FLUSH && IFETCH) ? {2'b00, IADDR[1:0]} : (qpos + ISIZE);
|
125 |
19 |
ndumitrach |
wire [18:0]MIADDR = sflush ? IADDR[20:2] : piaddr;
|
126 |
2 |
ndumitrach |
wire split = (&ADDR[1:0]) && WORD; // data between dwords
|
127 |
19 |
ndumitrach |
wire [15:0]DSWAP = {WSEL[1] ? DIN[15:8] : DIN[7:0], WSEL[0] ? DIN[15:8] : DIN[7:0]}; //ADDR[0] ? {DIN[7:0], DIN[15:8]} : DIN;
|
128 |
2 |
ndumitrach |
wire [1:0]a1 = nqpos[3:2] + 1;
|
129 |
|
|
wire [1:0]a2 = nqpos[3:2] + 2;
|
130 |
|
|
wire [31:0]q1 = rdi && (a1 == rpos) ? RAM_DIN : queue[a1];
|
131 |
|
|
wire [7:0]q2 = rdi && (a2 == rpos) ? RAM_DIN[7:0] : queue[a2][7:0];
|
132 |
|
|
|
133 |
|
|
assign INSTR = {q2, q1, queue[nqpos[3:2]]} >> {nqpos[1:0], 3'b000};
|
134 |
|
|
// assign DOUT = split ? {RAM_DIN[7:0], exdata} : (RAM_DIN >> {ADDR[1:0], 3'b000});
|
135 |
|
|
assign RAM_DOUT = {DSWAP, DSWAP};
|
136 |
|
|
assign RAM_MREQ = iread || RAM_RD || RAM_WR;
|
137 |
19 |
ndumitrach |
assign RAM_ADDR = iread ? MIADDR : ADDR[20:2] + data_bound;
|
138 |
2 |
ndumitrach |
assign RAM_WMASK = data_bound ? {3'b000, RAM_WR} : {2'b00, WORD & RAM_WR, RAM_WR} << ADDR[1:0];
|
139 |
|
|
|
140 |
|
|
always @(*) begin
|
141 |
|
|
RAM_RD = 0;
|
142 |
|
|
RAM_WR = 0;
|
143 |
|
|
CE186 = 0;
|
144 |
|
|
sflush = 0;
|
145 |
|
|
data_bound = 0;
|
146 |
|
|
iread = 0;
|
147 |
|
|
|
148 |
|
|
case(ADDR[1:0])
|
149 |
|
|
2'b00: DOUT = RAM_DIN[15:0];
|
150 |
|
|
2'b01: DOUT = RAM_DIN[23:8];
|
151 |
|
|
2'b10: DOUT = RAM_DIN[31:16];
|
152 |
|
|
2'b11: DOUT = {RAM_DIN[7:0], WORD ? exdata : RAM_DIN[31:24]};
|
153 |
|
|
endcase
|
154 |
|
|
|
155 |
|
|
case(STATE)
|
156 |
|
|
0: begin // no cpu activity on first state
|
157 |
|
|
iread = qnofull;
|
158 |
|
|
NEXTSTATE = 1;
|
159 |
|
|
end
|
160 |
|
|
1: begin
|
161 |
|
|
NEXTSTATE = 1;
|
162 |
|
|
if(FLUSH && IFETCH && !OLDSTATE) begin
|
163 |
|
|
sflush = 1;
|
164 |
|
|
iread = 1;
|
165 |
|
|
end else if((FLUSH && IFETCH && (qsize > 5)) || (qsize > 11)) begin
|
166 |
|
|
NEXTSTATE = 0;
|
167 |
|
|
if(MREQ) begin
|
168 |
|
|
if(WR) begin // write
|
169 |
|
|
RAM_WR = 1;
|
170 |
|
|
if(split) NEXTSTATE = 3;
|
171 |
|
|
else CE186 = 1;
|
172 |
|
|
end else begin
|
173 |
|
|
RAM_RD = 1;
|
174 |
|
|
NEXTSTATE = split ? 2 : 3;
|
175 |
|
|
end
|
176 |
|
|
end else begin
|
177 |
|
|
iread = qnofull;
|
178 |
20 |
ndumitrach |
if(IORQ && !WR && !FASTIO) NEXTSTATE = 3;
|
179 |
|
|
else CE186 = 1;
|
180 |
2 |
ndumitrach |
end
|
181 |
|
|
end else iread = 1; // else nextstate = 1
|
182 |
|
|
end
|
183 |
|
|
2: begin
|
184 |
|
|
RAM_RD = 1;
|
185 |
|
|
data_bound = 1; // split memory access
|
186 |
|
|
NEXTSTATE = 3;
|
187 |
|
|
end
|
188 |
|
|
3: begin
|
189 |
|
|
RAM_WR = WR && MREQ;
|
190 |
|
|
iread = !(WR && MREQ) && qnofull;
|
191 |
|
|
data_bound = split;
|
192 |
|
|
CE186 = 1;
|
193 |
|
|
NEXTSTATE = 0;
|
194 |
|
|
end
|
195 |
|
|
endcase
|
196 |
|
|
end
|
197 |
|
|
|
198 |
|
|
always @ (posedge CLK) if(CE) begin
|
199 |
|
|
rdi <= iread;
|
200 |
|
|
if(rdi) queue[rpos] <= RAM_DIN;
|
201 |
|
|
if(iread) begin
|
202 |
|
|
qsize <= {newqsize[4:2] + 1, newqsize[1:0]};
|
203 |
|
|
piaddr <= MIADDR + 1;
|
204 |
|
|
end else begin
|
205 |
|
|
qsize <= newqsize;
|
206 |
|
|
piaddr <= MIADDR;
|
207 |
|
|
end
|
208 |
|
|
if(CE186 && IFETCH) qpos <= nqpos;
|
209 |
|
|
if(sflush) rpos <= 0;
|
210 |
|
|
else if(rdi) rpos <= rpos + 1;
|
211 |
|
|
OLDSTATE <= STATE[0];
|
212 |
|
|
STATE <= NEXTSTATE;
|
213 |
|
|
if(data_bound) exdata <= RAM_DIN[31:24];
|
214 |
|
|
end
|
215 |
|
|
|
216 |
|
|
endmodule
|
217 |
|
|
|