| 1 | 50 | dgisselq | ////////////////////////////////////////////////////////////////////////////////
 | 
      
         | 2 | 3 | dgisselq | //
 | 
      
         | 3 |  |  | // Filename:    cpudefs.v
 | 
      
         | 4 |  |  | //
 | 
      
         | 5 |  |  | // Project:     OpenArty, an entirely open SoC based upon the Arty platform
 | 
      
         | 6 |  |  | //
 | 
      
         | 7 |  |  | // Purpose:     Some architectures have some needs, others have other needs.
 | 
      
         | 8 |  |  | //              Some of my projects need a Zip CPU with pipelining, others
 | 
      
         | 9 |  |  | //      can't handle the timing required to get the answer from the ALU
 | 
      
         | 10 |  |  | //      back into the input for the ALU.  As each different projects has
 | 
      
         | 11 |  |  | //      different needs, I can either 1) reconfigure my entire baseline prior
 | 
      
         | 12 |  |  | //      to building each project, or 2) host a configuration file which contains
 | 
      
         | 13 |  |  | //      the information regarding each baseline.  This file is that
 | 
      
         | 14 |  |  | //      configuration file.  It controls how the CPU (not the system,
 | 
      
         | 15 |  |  | //      peripherals, or other) is defined and implemented.  Several options
 | 
      
         | 16 |  |  | //      are available within here, making the Zip CPU pipelined or not,
 | 
      
         | 17 |  |  | //      able to handle a faster clock with more stalls or a slower clock with
 | 
      
         | 18 |  |  | //      no stalls, etc.
 | 
      
         | 19 |  |  | //
 | 
      
         | 20 |  |  | //      This file encapsulates those control options.
 | 
      
         | 21 |  |  | //
 | 
      
         | 22 |  |  | //      The number of LUTs the Zip CPU uses varies dramatically with the
 | 
      
         | 23 |  |  | //      options defined in this file.
 | 
      
         | 24 |  |  | //
 | 
      
         | 25 |  |  | //
 | 
      
         | 26 |  |  | // OpenArty comments:
 | 
      
         | 27 |  |  | //      My goal on the OpenArty is going to be using the CPU to its fullest
 | 
      
         | 28 |  |  | //      extent.  All features shall be turned on if they exist, full pipelines,
 | 
      
         | 29 | 42 | dgisselq | //      multiplies, divides, and hopefully even the 162MHz clock.  This file
 | 
      
         | 30 | 3 | dgisselq | //      reflects that purpose.
 | 
      
         | 31 |  |  | //
 | 
      
         | 32 |  |  | //
 | 
      
         | 33 |  |  | // Creator:     Dan Gisselquist, Ph.D.
 | 
      
         | 34 |  |  | //              Gisselquist Technology, LLC
 | 
      
         | 35 |  |  | //
 | 
      
         | 36 | 50 | dgisselq | ////////////////////////////////////////////////////////////////////////////////
 | 
      
         | 37 | 3 | dgisselq | //
 | 
      
         | 38 |  |  | // Copyright (C) 2015-2016, Gisselquist Technology, LLC
 | 
      
         | 39 |  |  | //
 | 
      
         | 40 |  |  | // This program is free software (firmware): you can redistribute it and/or
 | 
      
         | 41 |  |  | // modify it under the terms of  the GNU General Public License as published
 | 
      
         | 42 |  |  | // by the Free Software Foundation, either version 3 of the License, or (at
 | 
      
         | 43 |  |  | // your option) any later version.
 | 
      
         | 44 |  |  | //
 | 
      
         | 45 |  |  | // This program is distributed in the hope that it will be useful, but WITHOUT
 | 
      
         | 46 |  |  | // ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or
 | 
      
         | 47 |  |  | // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 | 
      
         | 48 |  |  | // for more details.
 | 
      
         | 49 |  |  | //
 | 
      
         | 50 | 50 | dgisselq | // You should have received a copy of the GNU General Public License along
 | 
      
         | 51 |  |  | // with this program.  (It's in the $(ROOT)/doc directory.  Run make with no
 | 
      
         | 52 |  |  | // target there if the PDF file isn't present.)  If not, see
 | 
      
         | 53 |  |  | // <http://www.gnu.org/licenses/> for a copy.
 | 
      
         | 54 |  |  | //
 | 
      
         | 55 | 3 | dgisselq | // License:     GPL, v3, as defined and found on www.gnu.org,
 | 
      
         | 56 |  |  | //              http://www.gnu.org/licenses/gpl.html
 | 
      
         | 57 |  |  | //
 | 
      
         | 58 |  |  | //
 | 
      
         | 59 | 50 | dgisselq | ////////////////////////////////////////////////////////////////////////////////
 | 
      
         | 60 |  |  | //
 | 
      
         | 61 |  |  | //
 | 
      
         | 62 | 3 | dgisselq | `ifndef CPUDEFS_H
 | 
      
         | 63 |  |  | `define CPUDEFS_H
 | 
      
         | 64 |  |  | //
 | 
      
         | 65 |  |  | //
 | 
      
         | 66 |  |  | // The first couple options control the Zip CPU instruction set, and how
 | 
      
         | 67 |  |  | // it handles various instructions within the set:
 | 
      
         | 68 |  |  | //
 | 
      
         | 69 |  |  | //
 | 
      
         | 70 |  |  | // OPT_ILLEGAL_INSTRUCTION is part of a new section of code that is supposed
 | 
      
         | 71 |  |  | // to recognize illegal instructions and interrupt the CPU whenever one such
 | 
      
         | 72 |  |  | // instruction is encountered.  The goal is to create a soft floating point
 | 
      
         | 73 |  |  | // unit via this approach, that can then be replaced with a true floating point
 | 
      
         | 74 |  |  | // unit.  As I'm not there yet, it just catches illegal instructions and
 | 
      
         | 75 |  |  | // interrupts the CPU on any such instruction--when defined.  Otherwise,
 | 
      
         | 76 |  |  | // illegal instructions are quietly ignored and their behaviour is ...
 | 
      
         | 77 |  |  | // undefined. (Many get treated like NOOPs ...)
 | 
      
         | 78 |  |  | //
 | 
      
         | 79 | 32 | dgisselq | // I recommend setting this flag so highly, that I'm likely going to remove
 | 
      
         | 80 |  |  | // the option to turn this off in future versions of this CPU.
 | 
      
         | 81 | 3 | dgisselq | //
 | 
      
         | 82 |  |  | `define OPT_ILLEGAL_INSTRUCTION
 | 
      
         | 83 |  |  | //
 | 
      
         | 84 |  |  | //
 | 
      
         | 85 |  |  | //
 | 
      
         | 86 |  |  | // OPT_MULTIPLY controls whether or not the multiply is built and included
 | 
      
         | 87 |  |  | // in the ALU by default.  Set this option and a parameter will be set that
 | 
      
         | 88 |  |  | // includes the multiply.  (This parameter may still be overridden, as with
 | 
      
         | 89 |  |  | // any parameter ...)  If the multiply is not included and
 | 
      
         | 90 |  |  | // OPT_ILLEGAL_INSTRUCTION is set, then the multiply will create an illegal
 | 
      
         | 91 |  |  | // instruction that will then trip the illegal instruction trap.
 | 
      
         | 92 |  |  | //
 | 
      
         | 93 | 42 | dgisselq | // Either not defining this value, or defining it to zero will disable the
 | 
      
         | 94 |  |  | // hardware multiply.  A value of '1' will cause the multiply to occurr in one
 | 
      
         | 95 |  |  | // clock cycle only--often at the expense of the rest of the CPUs speed.
 | 
      
         | 96 |  |  | // A value of 2 will cause the multiply to have a single delay cycle, 3 will
 | 
      
         | 97 |  |  | // have two delay cycles, and 4 (or more) will have 3 delay cycles.
 | 
      
         | 98 | 3 | dgisselq | //
 | 
      
         | 99 |  |  | //
 | 
      
         | 100 | 42 | dgisselq | `define OPT_MULTIPLY    3
 | 
      
         | 101 | 3 | dgisselq | //
 | 
      
         | 102 |  |  | //
 | 
      
         | 103 | 42 | dgisselq | //
 | 
      
         | 104 | 3 | dgisselq | // OPT_DIVIDE controls whether or not the divide instruction is built and
 | 
      
         | 105 |  |  | // included into the ZipCPU by default.  Set this option and a parameter will
 | 
      
         | 106 |  |  | // be set that causes the divide unit to be included.  (This parameter may
 | 
      
         | 107 |  |  | // still be overridden, as with any parameter ...)  If the divide is not
 | 
      
         | 108 |  |  | // included and OPT_ILLEGAL_INSTRUCTION is set, then the multiply will create
 | 
      
         | 109 |  |  | // an illegal instruction exception that will send the CPU into supervisor
 | 
      
         | 110 |  |  | // mode.
 | 
      
         | 111 |  |  | //
 | 
      
         | 112 |  |  | //
 | 
      
         | 113 | 30 | dgisselq | `define OPT_DIVIDE
 | 
      
         | 114 | 3 | dgisselq | //
 | 
      
         | 115 |  |  | //
 | 
      
         | 116 |  |  | //
 | 
      
         | 117 |  |  | // OPT_IMPLEMENT_FPU will (one day) control whether or not the floating point
 | 
      
         | 118 | 50 | dgisselq | // unit (once I have one) is built and included into the ZipCPU by default.
 | 
      
         | 119 | 3 | dgisselq | // At that time, if this option is set then a parameter will be set that
 | 
      
         | 120 |  |  | // causes the floating point unit to be included.  (This parameter may
 | 
      
         | 121 |  |  | // still be overridden, as with any parameter ...)  If the floating point unit
 | 
      
         | 122 |  |  | // is not included and OPT_ILLEGAL_INSTRUCTION is set, then as with the
 | 
      
         | 123 |  |  | // multiply and divide any floating point instruction will result in an illegal
 | 
      
         | 124 |  |  | // instruction exception that will send the CPU into supervisor mode.
 | 
      
         | 125 |  |  | //
 | 
      
         | 126 |  |  | //
 | 
      
         | 127 |  |  | // `define      OPT_IMPLEMENT_FPU
 | 
      
         | 128 |  |  | //
 | 
      
         | 129 |  |  | //
 | 
      
         | 130 |  |  | //
 | 
      
         | 131 |  |  | //
 | 
      
         | 132 | 50 | dgisselq | // OPT_SINGLE_FETCH controls whether or not the prefetch has a cache, and
 | 
      
         | 133 | 3 | dgisselq | // whether or not it can issue one instruction per clock.  When set, the
 | 
      
         | 134 |  |  | // prefetch has no cache, and only one instruction is fetched at a time.
 | 
      
         | 135 | 50 | dgisselq | // This effectively sets the CPU so that only one instruction is ever
 | 
      
         | 136 |  |  | // in the pipeline at once, and hence you may think of this as a "kill
 | 
      
         | 137 | 3 | dgisselq | // pipeline" option.  However, since the pipelined fetch component uses so
 | 
      
         | 138 |  |  | // much area on the FPGA, this is an important option to use in trimming down
 | 
      
         | 139 |  |  | // used area if necessary.  Hence, it needs to be maintained for that purpose.
 | 
      
         | 140 |  |  | // Be aware, though, it will drop your performance by a factor between 2x and
 | 
      
         | 141 |  |  | // 3x.
 | 
      
         | 142 |  |  | //
 | 
      
         | 143 |  |  | // We can either pipeline our fetches, or issue one fetch at a time.  Pipelined
 | 
      
         | 144 |  |  | // fetches are more complicated and therefore use more FPGA resources, while
 | 
      
         | 145 | 50 | dgisselq | // single fetches will cause the CPU to stall for about 5 stalls each
 | 
      
         | 146 | 3 | dgisselq | // instruction cycle, effectively reducing the instruction count per clock to
 | 
      
         | 147 |  |  | // about 0.2.  However, the area cost may be worth it.  Consider:
 | 
      
         | 148 |  |  | //
 | 
      
         | 149 |  |  | //      Slice LUTs              ZipSystem       ZipCPU
 | 
      
         | 150 |  |  | //      Single Fetching         2521            1734
 | 
      
         | 151 |  |  | //      Pipelined fetching      2796            2046
 | 
      
         | 152 |  |  | //      (These numbers may be dated, but should still be representative ...)
 | 
      
         | 153 |  |  | //
 | 
      
         | 154 |  |  | // I recommend only defining this if you "need" to, if area is tight and
 | 
      
         | 155 |  |  | // speed isn't as important.  Otherwise, just leave this undefined.
 | 
      
         | 156 |  |  | //
 | 
      
         | 157 |  |  | // `define      OPT_SINGLE_FETCH
 | 
      
         | 158 |  |  | //
 | 
      
         | 159 |  |  | //
 | 
      
         | 160 |  |  | //
 | 
      
         | 161 |  |  | // The next several options are pipeline optimization options.  They make no
 | 
      
         | 162 |  |  | // sense in a single instruction fetch mode, hence we #ifndef them so they
 | 
      
         | 163 |  |  | // are only defined if we are in a full pipelined mode (i.e. OPT_SINGLE_FETCH
 | 
      
         | 164 |  |  | // is not defined).
 | 
      
         | 165 |  |  | //
 | 
      
         | 166 |  |  | `ifndef OPT_SINGLE_FETCH
 | 
      
         | 167 |  |  | //
 | 
      
         | 168 |  |  | //
 | 
      
         | 169 |  |  | //
 | 
      
         | 170 | 50 | dgisselq | // OPT_PIPELINED is the natural result and opposite of using the single
 | 
      
         | 171 | 3 | dgisselq | // instruction fetch unit.  If you are not using that unit, the ZipCPU will
 | 
      
         | 172 | 50 | dgisselq | // be pipelined.  The option is defined here more for readability than
 | 
      
         | 173 | 3 | dgisselq | // anything else, since OPT_PIPELINED makes more sense than OPT_SINGLE_FETCH,
 | 
      
         | 174 |  |  | // well ... that and it does a better job of explaining what is going on.
 | 
      
         | 175 |  |  | //
 | 
      
         | 176 |  |  | // In other words, leave this define alone--lest you break the ZipCPU.
 | 
      
         | 177 |  |  | //
 | 
      
         | 178 |  |  | `define OPT_PIPELINED
 | 
      
         | 179 |  |  | //
 | 
      
         | 180 |  |  | //
 | 
      
         | 181 |  |  | //
 | 
      
         | 182 |  |  | // OPT_TRADITIONAL_PFCACHE allows you to switch between one of two prefetch
 | 
      
         | 183 |  |  | // caches.  If enabled, a more traditional cache is implemented.  This more
 | 
      
         | 184 |  |  | // traditional cache (currently) uses many more LUTs, but it also reduces
 | 
      
         | 185 |  |  | // the stall count tremendously over the alternative hacked pipeline cache.
 | 
      
         | 186 |  |  | // (The traditional pfcache is also pipelined, whereas the pipeline cache
 | 
      
         | 187 |  |  | // implements a windowed approach to caching.)
 | 
      
         | 188 |  |  | //
 | 
      
         | 189 |  |  | // If you have the fabric to support this option, I recommend including it.
 | 
      
         | 190 |  |  | //
 | 
      
         | 191 |  |  | `define OPT_TRADITIONAL_PFCACHE
 | 
      
         | 192 |  |  | //
 | 
      
         | 193 |  |  | //
 | 
      
         | 194 |  |  | //
 | 
      
         | 195 |  |  | // OPT_EARLY_BRANCHING is an attempt to execute a BRA statement as early
 | 
      
         | 196 |  |  | // as possible, to avoid as many pipeline stalls on a branch as possible.
 | 
      
         | 197 |  |  | // It's not tremendously successful yet--BRA's still suffer stalls,
 | 
      
         | 198 |  |  | // but I intend to keep working on this approach until the number of stalls
 | 
      
         | 199 |  |  | // gets down to one or (ideally) zero.  (With the OPT_TRADITIONAL_PFCACHE, this
 | 
      
         | 200 |  |  | // gets down to a single stall cycle ...)  That way a "BRA" can be used as the
 | 
      
         | 201 |  |  | // compiler's branch prediction optimizer: BRA's barely stall, while branches
 | 
      
         | 202 |  |  | // on conditions will always suffer about 4 stall cycles or so.
 | 
      
         | 203 |  |  | //
 | 
      
         | 204 |  |  | // I recommend setting this flag, so as to turn early branching on.
 | 
      
         | 205 |  |  | //
 | 
      
         | 206 |  |  | `define OPT_EARLY_BRANCHING
 | 
      
         | 207 |  |  | //
 | 
      
         | 208 |  |  | //
 | 
      
         | 209 |  |  | //
 | 
      
         | 210 |  |  | // OPT_PIPELINED_BUS_ACCESS controls whether or not LOD/STO instructions
 | 
      
         | 211 |  |  | // can take advantaged of pipelined bus instructions.  To be eligible, the
 | 
      
         | 212 |  |  | // operations must be identical (cannot pipeline loads and stores, just loads
 | 
      
         | 213 |  |  | // only or stores only), and the addresses must either be identical or one up
 | 
      
         | 214 |  |  | // from the previous address.  Further, the load/store string must all have
 | 
      
         | 215 |  |  | // the same conditional.  This approach gains the must use, in my humble
 | 
      
         | 216 |  |  | // opinion, when saving registers to or restoring registers from the stack
 | 
      
         | 217 |  |  | // at the beginning/end of a procedure, or when doing a context swap.
 | 
      
         | 218 |  |  | //
 | 
      
         | 219 |  |  | // I recommend setting this flag, for performance reasons, especially if your
 | 
      
         | 220 |  |  | // wishbone bus can handle pipelined bus accesses.
 | 
      
         | 221 |  |  | //
 | 
      
         | 222 |  |  | `define OPT_PIPELINED_BUS_ACCESS
 | 
      
         | 223 |  |  | //
 | 
      
         | 224 |  |  | //
 | 
      
         | 225 |  |  | //
 | 
      
         | 226 |  |  | //
 | 
      
         | 227 |  |  | //
 | 
      
         | 228 | 50 | dgisselq | // The instruction set defines an optional compressed instruction set (CIS)
 | 
      
         | 229 |  |  | // complement.  These were at one time erroneously called Very Long Instruction
 | 
      
         | 230 |  |  | // Words.  They are more appropriately referred to as compressed instructions.
 | 
      
         | 231 |  |  | // The compressed instruction format allows two instructions to be packed into
 | 
      
         | 232 |  |  | // the same instruction word.  Some instructions can be compressed, not all.
 | 
      
         | 233 |  |  | // Compressed instructions take the same time to complete.  Set OPT_CIS to
 | 
      
         | 234 |  |  | // include these double instructions as part of the instruction set.  These
 | 
      
         | 235 |  |  | // instructions are designed to get more code density from the instruction set,
 | 
      
         | 236 |  |  | // and to hopefully take some pain off of the performance of the pre-fetch and
 | 
      
         | 237 |  |  | // instruction cache.
 | 
      
         | 238 | 3 | dgisselq | //
 | 
      
         | 239 |  |  | // These new instructions, however, also necessitate a change in the Zip
 | 
      
         | 240 |  |  | // CPU--the Zip CPU can no longer execute instructions atomically.  It must
 | 
      
         | 241 | 50 | dgisselq | // now execute non-CIS instructions, or CIS instruction pairs, atomically. 
 | 
      
         | 242 | 3 | dgisselq | // This logic has been added into the ZipCPU, but it has not (yet) been
 | 
      
         | 243 |  |  | // tested thoroughly.
 | 
      
         | 244 |  |  | //
 | 
      
         | 245 |  |  | //
 | 
      
         | 246 | 50 | dgisselq | `define OPT_CIS
 | 
      
         | 247 | 3 | dgisselq | //
 | 
      
         | 248 |  |  | //
 | 
      
         | 249 |  |  | //
 | 
      
         | 250 |  |  | `endif  // OPT_SINGLE_FETCH
 | 
      
         | 251 |  |  | //
 | 
      
         | 252 |  |  | //
 | 
      
         | 253 |  |  | //
 | 
      
         | 254 |  |  | // Now let's talk about peripherals for a moment.  These next two defines
 | 
      
         | 255 |  |  | // control whether the DMA controller is included in the Zip System, and
 | 
      
         | 256 |  |  | // whether or not the 8 accounting timers are also included.  Set these to
 | 
      
         | 257 |  |  | // include the respective peripherals, comment them out not to.
 | 
      
         | 258 |  |  | //
 | 
      
         | 259 |  |  | `define INCLUDE_DMA_CONTROLLER
 | 
      
         | 260 |  |  | `define INCLUDE_ACCOUNTING_COUNTERS
 | 
      
         | 261 |  |  | //
 | 
      
         | 262 |  |  | //
 | 
      
         | 263 | 30 | dgisselq | `define DEBUG_SCOPE
 | 
      
         | 264 | 3 | dgisselq | //
 | 
      
         | 265 | 49 | dgisselq | // The following is experimental:
 | 
      
         | 266 |  |  | // `define      OPT_NO_USERMODE // Savings: about 143 LUTs or so
 | 
      
         | 267 |  |  | //
 | 
      
         | 268 | 3 | dgisselq | `endif  // CPUDEFS_H
 |