| 1 |
2 |
dgisselq |
///////////////////////////////////////////////////////////////////////////////
|
| 2 |
|
|
//
|
| 3 |
|
|
// Filename: cpudefs.v
|
| 4 |
|
|
//
|
| 5 |
|
|
// Project: Zip CPU -- a small, lightweight, RISC CPU soft core
|
| 6 |
|
|
//
|
| 7 |
|
|
// Purpose: Some architectures have some needs, others have other needs.
|
| 8 |
|
|
// Some of my projects need a Zip CPU with pipelining, others
|
| 9 |
|
|
// can't handle the timing required to get the answer from the ALU
|
| 10 |
|
|
// back into the input for the ALU. As each different projects has
|
| 11 |
|
|
// different needs, I can either 1) reconfigure my entire baseline prior
|
| 12 |
|
|
// to building each project, or 2) host a configuration file which contains
|
| 13 |
|
|
// the information regarding each baseline. This file is that
|
| 14 |
|
|
// configuration file. It controls how the CPU (not the system,
|
| 15 |
|
|
// peripherals, or other) is defined and implemented. Several options
|
| 16 |
|
|
// are available within here, making the Zip CPU pipelined or not,
|
| 17 |
|
|
// able to handle a faster clock with more stalls or a slower clock with
|
| 18 |
|
|
// no stalls, etc.
|
| 19 |
|
|
//
|
| 20 |
|
|
// This file encapsulates those control options.
|
| 21 |
|
|
//
|
| 22 |
|
|
// The number of LUTs the Zip CPU uses varies dramatically with the
|
| 23 |
|
|
// options defined in this file.
|
| 24 |
|
|
//
|
| 25 |
|
|
//
|
| 26 |
|
|
// Creator: Dan Gisselquist, Ph.D.
|
| 27 |
|
|
// Gisselquist Technology, LLC
|
| 28 |
|
|
//
|
| 29 |
|
|
///////////////////////////////////////////////////////////////////////////////
|
| 30 |
|
|
//
|
| 31 |
|
|
// Copyright (C) 2015, Gisselquist Technology, LLC
|
| 32 |
|
|
//
|
| 33 |
|
|
// This program is free software (firmware): you can redistribute it and/or
|
| 34 |
|
|
// modify it under the terms of the GNU General Public License as published
|
| 35 |
|
|
// by the Free Software Foundation, either version 3 of the License, or (at
|
| 36 |
|
|
// your option) any later version.
|
| 37 |
|
|
//
|
| 38 |
|
|
// This program is distributed in the hope that it will be useful, but WITHOUT
|
| 39 |
|
|
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or
|
| 40 |
|
|
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
| 41 |
|
|
// for more details.
|
| 42 |
|
|
//
|
| 43 |
|
|
// License: GPL, v3, as defined and found on www.gnu.org,
|
| 44 |
|
|
// http://www.gnu.org/licenses/gpl.html
|
| 45 |
|
|
//
|
| 46 |
|
|
//
|
| 47 |
|
|
///////////////////////////////////////////////////////////////////////////////
|
| 48 |
|
|
`ifndef CPUDEFS_H
|
| 49 |
|
|
`define CPUDEFS_H
|
| 50 |
|
|
//
|
| 51 |
|
|
//
|
| 52 |
|
|
// The first couple options control the Zip CPU instruction set, and how
|
| 53 |
|
|
// it handles various instructions within the set:
|
| 54 |
|
|
//
|
| 55 |
|
|
//
|
| 56 |
|
|
// OPT_ILLEGAL_INSTRUCTION is part of a new section of code that is supposed
|
| 57 |
|
|
// to recognize illegal instructions and interrupt the CPU whenever one such
|
| 58 |
|
|
// instruction is encountered. The goal is to create a soft floating point
|
| 59 |
|
|
// unit via this approach, that can then be replaced with a true floating point
|
| 60 |
|
|
// unit. As I'm not there yet, it just catches illegal instructions and
|
| 61 |
|
|
// interrupts the CPU on any such instruction--when defined. Otherwise,
|
| 62 |
|
|
// illegal instructions are quietly ignored and their behaviour is ...
|
| 63 |
|
|
// undefined. (Many get treated like NOOPs ...)
|
| 64 |
|
|
//
|
| 65 |
|
|
// I recommend setting this flag, although it can be taken out if area is
|
| 66 |
|
|
// critical ...
|
| 67 |
|
|
//
|
| 68 |
|
|
`define OPT_ILLEGAL_INSTRUCTION
|
| 69 |
|
|
//
|
| 70 |
|
|
//
|
| 71 |
|
|
//
|
| 72 |
|
|
// OPT_MULTIPLY controls whether or not the multiply is built and included
|
| 73 |
|
|
// in the ALU by default. Set this option and a parameter will be set that
|
| 74 |
|
|
// includes the multiply. (This parameter may still be overridden, as with
|
| 75 |
|
|
// any parameter ...) If the multiply is not included and
|
| 76 |
|
|
// OPT_ILLEGAL_INSTRUCTION is set, then the multiply will create an illegal
|
| 77 |
|
|
// instruction that will then trip the illegal instruction trap.
|
| 78 |
|
|
//
|
| 79 |
|
|
//
|
| 80 |
|
|
`define OPT_MULTIPLY
|
| 81 |
|
|
//
|
| 82 |
|
|
//
|
| 83 |
|
|
//
|
| 84 |
|
|
// OPT_DIVIDE controls whether or not the divide instruction is built and
|
| 85 |
|
|
// included into the ZipCPU by default. Set this option and a parameter will
|
| 86 |
|
|
// be set that causes the divide unit to be included. (This parameter may
|
| 87 |
|
|
// still be overridden, as with any parameter ...) If the divide is not
|
| 88 |
|
|
// included and OPT_ILLEGAL_INSTRUCTION is set, then the multiply will create
|
| 89 |
|
|
// an illegal instruction exception that will send the CPU into supervisor
|
| 90 |
|
|
// mode.
|
| 91 |
|
|
//
|
| 92 |
|
|
//
|
| 93 |
|
|
// `define OPT_DIVIDE
|
| 94 |
|
|
//
|
| 95 |
|
|
//
|
| 96 |
|
|
//
|
| 97 |
|
|
// OPT_IMPLEMENT_FPU will (one day) control whether or not the floating point
|
| 98 |
|
|
// unit (once I have one) is built and included into the ZipCPU by default.
|
| 99 |
|
|
// At that time, if this option is set then a parameter will be set that
|
| 100 |
|
|
// causes the floating point unit to be included. (This parameter may
|
| 101 |
|
|
// still be overridden, as with any parameter ...) If the floating point unit
|
| 102 |
|
|
// is not included and OPT_ILLEGAL_INSTRUCTION is set, then as with the
|
| 103 |
|
|
// multiply and divide any floating point instruction will result in an illegal
|
| 104 |
|
|
// instruction exception that will send the CPU into supervisor mode.
|
| 105 |
|
|
//
|
| 106 |
|
|
//
|
| 107 |
|
|
// `define OPT_IMPLEMENT_FPU
|
| 108 |
|
|
//
|
| 109 |
|
|
//
|
| 110 |
|
|
//
|
| 111 |
|
|
// OPT_NEW_INSTRUCTION_SET controls whether or not the new instruction set
|
| 112 |
|
|
// is in use. The new instruction set contains space for floating point
|
| 113 |
|
|
// operations, signed and unsigned divide instructions, as well as bit reversal
|
| 114 |
|
|
// and ... at least two other operations yet to be defined. The decoder alone
|
| 115 |
|
|
// uses about 70 fewer LUTs, although in practice this works out to 12 fewer
|
| 116 |
|
|
// when all works out in the wash. Further, floating point and divide
|
| 117 |
|
|
// instructions will cause an illegal instruction exception if they are not
|
| 118 |
|
|
// implemented--so software capability can be built to use these instructions
|
| 119 |
|
|
// immediately, even if the hardware is not yet ready.
|
| 120 |
|
|
//
|
| 121 |
|
|
// This option is likely to go away in the future, obsoleting the previous
|
| 122 |
|
|
// instruction set, so I recommend setting this option and switching to the
|
| 123 |
|
|
// new instruction set as soon as possible.
|
| 124 |
|
|
//
|
| 125 |
|
|
`define OPT_NEW_INSTRUCTION_SET
|
| 126 |
|
|
//
|
| 127 |
|
|
//
|
| 128 |
|
|
//
|
| 129 |
|
|
//
|
| 130 |
|
|
//
|
| 131 |
|
|
//
|
| 132 |
|
|
// OPT_SINGLE_FETCH controls whether or not the prefetch has a cache, and
|
| 133 |
|
|
// whether or not it can issue one instruction per clock. When set, the
|
| 134 |
|
|
// prefetch has no cache, and only one instruction is fetched at a time.
|
| 135 |
|
|
// This effectively sets the CPU so that only one instruction is ever
|
| 136 |
|
|
// in the pipeline at once, and hence you may think of this as a "kill
|
| 137 |
|
|
// pipeline" option. However, since the pipelined fetch component uses so
|
| 138 |
|
|
// much area on the FPGA, this is an important option to use in trimming down
|
| 139 |
|
|
// used area if necessary. Hence, it needs to be maintained for that purpose.
|
| 140 |
|
|
// Be aware, though, it will drop your performance by a factor between 2x and
|
| 141 |
|
|
// 3x.
|
| 142 |
|
|
//
|
| 143 |
|
|
// We can either pipeline our fetches, or issue one fetch at a time. Pipelined
|
| 144 |
|
|
// fetches are more complicated and therefore use more FPGA resources, while
|
| 145 |
|
|
// single fetches will cause the CPU to stall for about 5 stalls each
|
| 146 |
|
|
// instruction cycle, effectively reducing the instruction count per clock to
|
| 147 |
|
|
// about 0.2. However, the area cost may be worth it. Consider:
|
| 148 |
|
|
//
|
| 149 |
|
|
// Slice LUTs ZipSystem ZipCPU
|
| 150 |
|
|
// Single Fetching 2521 1734
|
| 151 |
|
|
// Pipelined fetching 2796 2046
|
| 152 |
|
|
// (These numbers may be dated, but should still be representative ...)
|
| 153 |
|
|
//
|
| 154 |
|
|
// I recommend only defining this if you "need" to, if area is tight and
|
| 155 |
|
|
// speed isn't as important. Otherwise, just leave this undefined.
|
| 156 |
|
|
//
|
| 157 |
|
|
`define OPT_SINGLE_FETCH
|
| 158 |
|
|
//
|
| 159 |
|
|
//
|
| 160 |
|
|
//
|
| 161 |
|
|
// The next several options are pipeline optimization options. They make no
|
| 162 |
|
|
// sense in a single instruction fetch mode, hence we #ifndef them so they
|
| 163 |
|
|
// are only defined if we are in a full pipelined mode (i.e. OPT_SINGLE_FETCH
|
| 164 |
|
|
// is not defined).
|
| 165 |
|
|
//
|
| 166 |
|
|
`ifndef OPT_SINGLE_FETCH
|
| 167 |
|
|
//
|
| 168 |
|
|
//
|
| 169 |
|
|
//
|
| 170 |
|
|
// OPT_PIPELINED is the natural result and opposite of using the single
|
| 171 |
|
|
// instruction fetch unit. If you are not using that unit, the ZipCPU will
|
| 172 |
|
|
// be pipelined. The option is defined here more for readability than
|
| 173 |
|
|
// anything else, since OPT_PIPELINED makes more sense than OPT_SINGLE_FETCH,
|
| 174 |
|
|
// well ... that and it does a better job of explaining what is going on.
|
| 175 |
|
|
//
|
| 176 |
|
|
// In other words, leave this define alone--lest you break the ZipCPU.
|
| 177 |
|
|
//
|
| 178 |
|
|
`define OPT_PIPELINED
|
| 179 |
|
|
//
|
| 180 |
|
|
//
|
| 181 |
|
|
//
|
| 182 |
|
|
// OPT_TRADITIONAL_PFCACHE allows you to switch between one of two prefetch
|
| 183 |
|
|
// caches. If enabled, a more traditional cache is implemented. This more
|
| 184 |
|
|
// traditional cache (currently) uses many more LUTs, but it also reduces
|
| 185 |
|
|
// the stall count tremendously over the alternative hacked pipeline cache.
|
| 186 |
|
|
// (The traditional pfcache is also pipelined, whereas the pipeline cache
|
| 187 |
|
|
// implements a windowed approach to caching.)
|
| 188 |
|
|
//
|
| 189 |
|
|
// If you have the fabric to support this option, I recommend including it.
|
| 190 |
|
|
//
|
| 191 |
|
|
// `define OPT_TRADITIONAL_PFCACHE
|
| 192 |
|
|
//
|
| 193 |
|
|
//
|
| 194 |
|
|
//
|
| 195 |
|
|
// OPT_EARLY_BRANCHING is an attempt to execute a BRA statement as early
|
| 196 |
|
|
// as possible, to avoid as many pipeline stalls on a branch as possible.
|
| 197 |
|
|
// It's not tremendously successful yet--BRA's still suffer stalls,
|
| 198 |
|
|
// but I intend to keep working on this approach until the number of stalls
|
| 199 |
|
|
// gets down to one or (ideally) zero. (With the OPT_TRADITIONAL_PFCACHE, this
|
| 200 |
|
|
// gets down to a single stall cycle ...) That way a "BRA" can be used as the
|
| 201 |
|
|
// compiler's branch prediction optimizer: BRA's barely stall, while branches
|
| 202 |
|
|
// on conditions will always suffer about 4 stall cycles or so.
|
| 203 |
|
|
//
|
| 204 |
|
|
// I recommend setting this flag, so as to turn early branching on.
|
| 205 |
|
|
//
|
| 206 |
|
|
`define OPT_EARLY_BRANCHING
|
| 207 |
|
|
//
|
| 208 |
|
|
//
|
| 209 |
|
|
//
|
| 210 |
|
|
// OPT_PIPELINED_BUS_ACCESS controls whether or not LOD/STO instructions
|
| 211 |
|
|
// can take advantaged of pipelined bus instructions. To be eligible, the
|
| 212 |
|
|
// operations must be identical (cannot pipeline loads and stores, just loads
|
| 213 |
|
|
// only or stores only), and the addresses must either be identical or one up
|
| 214 |
|
|
// from the previous address. Further, the load/store string must all have
|
| 215 |
|
|
// the same conditional. This approach gains the must use, in my humble
|
| 216 |
|
|
// opinion, when saving registers to or restoring registers from the stack
|
| 217 |
|
|
// at the beginning/end of a procedure, or when doing a context swap.
|
| 218 |
|
|
//
|
| 219 |
|
|
// I recommend setting this flag, for performance reasons, especially if your
|
| 220 |
|
|
// wishbone bus can handle pipelined bus accesses.
|
| 221 |
|
|
//
|
| 222 |
|
|
`define OPT_PIPELINED_BUS_ACCESS
|
| 223 |
|
|
//
|
| 224 |
|
|
//
|
| 225 |
|
|
//
|
| 226 |
|
|
`ifdef OPT_NEW_INSTRUCTION_SET
|
| 227 |
|
|
//
|
| 228 |
|
|
//
|
| 229 |
|
|
//
|
| 230 |
|
|
// The new instruction set also defines a set of very long instruction words.
|
| 231 |
|
|
// Well, calling them "very long" instruction words is probably a misnomer,
|
| 232 |
|
|
// although we're going to do it. They're really 2x16-bit instructions---
|
| 233 |
|
|
// instruction words that pack two instructions into one word. (2x14 bit
|
| 234 |
|
|
// really--'cause you need a bit to note the instruction is a 2x instruction,
|
| 235 |
|
|
// and then 3-bits for the condition codes ...) Set OPT_VLIW to include these
|
| 236 |
|
|
// double instructions as part of the new instruction set. These allow a single
|
| 237 |
|
|
// instruction to contain two instructions within. These instructions are
|
| 238 |
|
|
// designed to get more code density from the instruction set, and to hopefully
|
| 239 |
|
|
// take some pain off of the performance of the pre-fetch and instruction cache.
|
| 240 |
|
|
//
|
| 241 |
|
|
// These new instructions, however, also necessitate a change in the Zip
|
| 242 |
|
|
// CPU--the Zip CPU can no longer execute instructions atomically. It must
|
| 243 |
|
|
// now execute non-VLIW instructions, or VLIW instruction pairs, atomically.
|
| 244 |
|
|
// This logic has been added into the ZipCPU, but it has not (yet) been
|
| 245 |
|
|
// tested thoroughly.
|
| 246 |
|
|
//
|
| 247 |
|
|
// Oh, and the assembler, the debugger, and the object file dumper, and the
|
| 248 |
|
|
// simulator all need to be updated as well ....
|
| 249 |
|
|
//
|
| 250 |
|
|
`define OPT_VLIW
|
| 251 |
|
|
//
|
| 252 |
|
|
//
|
| 253 |
|
|
`endif // OPT_NEW_INSTRUCTION_SET
|
| 254 |
|
|
//
|
| 255 |
|
|
//
|
| 256 |
|
|
`endif // OPT_SINGLE_FETCH
|
| 257 |
|
|
//
|
| 258 |
|
|
//
|
| 259 |
|
|
//
|
| 260 |
|
|
// Now let's talk about peripherals for a moment. These next two defines
|
| 261 |
|
|
// control whether the DMA controller is included in the Zip System, and
|
| 262 |
|
|
// whether or not the 8 accounting timers are also included. Set these to
|
| 263 |
|
|
// include the respective peripherals, comment them out not to.
|
| 264 |
|
|
//
|
| 265 |
|
|
// `define INCLUDE_DMA_CONTROLLER
|
| 266 |
|
|
// `define INCLUDE_ACCOUNTING_COUNTERS
|
| 267 |
|
|
//
|
| 268 |
|
|
//
|
| 269 |
|
|
// `define DEBUG_SCOPE
|
| 270 |
|
|
//
|
| 271 |
|
|
`endif // CPUDEFS_H
|