URL
https://opencores.org/ocsvn/ssbcc/ssbcc/trunk
Subversion Repositories ssbcc
Compare Revisions
- This comparison shows the changes necessary to convert path
/ssbcc
- from Rev 6 to Rev 7
- ↔ Reverse comparison
Rev 6 → Rev 7
/trunk/lib/9x8/math.s
2,40 → 2,56
; |
; Unsigned arithmetic operations. |
|
; Notation: |
; ux_n is the n'th byte of ux where n=0 is the LSB |
; example: ( u0_0 u0_1 ) are the LSB and MSB of a 2-byte 16-bit value. |
; u0 and u1 are two input vectors, us is their sum |
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
; |
; Add two unsigned 8-bit values to produce an unsigned 16-bit value. |
; Method: Calculate the sum of the msb of the two raw values and the msb of the |
; sums of the 7 lsbs of the two values to get the msb of the sum and |
; the lsb of the MSB of the 16-bit sum. |
; 36 instructions |
; Method: calculate and push the carry bit onto the return stack, calculate the |
; 8-bit sum of the two 8-bit values, use the previously stored and |
; computed carry bit as the MSB of the 16-bit return value. |
; 6 instructions |
; |
; ( u1 u2 - (u1+u2)_LSB (u1+u2)_MSB ) |
; ( u0 u1 - us_0 us_1 ) |
.function math__add_u8_u8_u16 |
; and the two 7 lsbs and put the 7 lsb of that sum on the return stack |
over 0x7F & over 0x7F & + dup 0x7F & >r |
; add the msb of the sum of the 7 lsbs and the two inputs |
0x80 & <<msb swap 0x80 & <<msb + swap 0x80 & <<msb + |
; construct the MSB of the sum as bit 1 of the sums of the msbs |
dup 0>> swap |
; set the msb of the LSB if the lsb of the sum of the msbs is non-zero |
0x01 & 0<> 0x80 & r> or |
; swap the orders so that the MSB is on the top of the data stack |
.return(swap) |
; ( u_0 u_1 - u_0 u_1 ) r:( - c ) |
; ( u0 u1 - us_0 us_1 ) |
+c >r + r> |
.return |
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
; miscellaneous unsigned addition operations |
|
; ( u0_0 u0_1 u0 - us_0 us_1 ) |
; 9 instructions |
.function math__add_u16_u8_u16 |
swap >r .call(math__add_u8_u8_u16) r> .return(+) |
; ( u0_0 u0_1 u1 - u0_0 u1 ) r:( - u0_1 ) |
swap >r |
; ( u0_0 u1 - us_0 c_0 ) |
+c >r + r> |
; ( c_0 - us_1 ) r:( u0_1 ) |
r> .return(+) |
|
; ( u0_0 u0_1 u1 - us_0 us_1 us_2 ) |
; 13 instructions |
.function math__add_u16_u8_u24 |
swap >r .call(math__add_u8_u8_u16) r> .call(math__add_u8_u8_u16) .return |
; ( u0_0 u0_1 u1 - u0_0 u1 ) r:( - u0_1 ) |
swap >r |
; ( u0_0 u1 - us_0 c_0 ) |
+c >r + r> |
; ( c_0 - us_1 us_2 ) r:( u0_1 - ) |
r> +c >r + r> |
.return |
|
; ( u0_0 u0_1 u0_2 u1 - us_0 us_1 us_2 ) |
.function math__add_u24_u8_u24 |
swap >r .call(math__add_u16_u8_u24) |
r> .return(+) |
|
; ( u0_0 u0_1 u0_2 u1 - us_0 us_1 us_2 us_3 ) |
.function math__add_u24_u8_u32 |
swap >r .call(math__add_u16_u8_u24) |
r> .call(math__add_u8_u8_u16) |
/trunk/core/9x8/build/xilinx-xc3s50a/run
1,22 → 1,22
#!/bin/bash |
# Copyright 2013, Sinclair R.F., Inc. |
# Copyright 2013-2014, Sinclair R.F., Inc. |
# Test against Xilinx Spartan 3A and ISE. |
# |
# To replicate the performance runs: |
# - Minimum resource utilization, use -5 speed grade, 15 ns clock, XST and MAP |
# set for minimum area. ISE 11.4, 12.4, 13.3, and 14.4 gave identical |
# results: 130 slices, 231 LUTs |
# - Maximum speed |
# -4 11.4 8.1 ns "-timing -t 57" ==> 8.102 ns (123.4 MHz) |
# -4 12.4 8.2 ns "-timing -t 25" ==> 8.165 ns (122.5 MHz) |
# -4 13.3 8.0 ns "-timing -t 84" ==> 8.074 ns (123.3 MHz) |
# -4 14.4 8.1 ns "-timing -t 22" ==> 8.127 ns (123.0 MHz) |
# -5 11.4 6.7 ns "-timing -t 83" ==> 6.757 ns (148.0 MHz) |
# -5 12.4 6.8 ns "-timing -t 24" ==> 6.771 ns (147.7 MHz) |
# -5 13.3 6.7 ns "-timing -t 53" ==> 6.744 ns (148.3 MHz) |
# -5 14.4 6.8 ns "-timing -t 31" ==> 6.773 ns (147.6 MHz) |
# results: 129 slices, 232 LUTs |
# - Maximum speed (rounded down) |
# -4 11.4 8.1 ns "-timing -t 76" ==> 8.093 ns (123.5 MHz) |
# -4 12.4 8.1 ns "-timing -t 68" ==> 8.131 ns (122.9 MHz) |
# -4 13.3 8.1 ns "-timing -t 84" ==> 8.117 ns (123.1 MHz) |
# -4 14.7 8.1 ns "-timing -t 50" ==> 8.097 ns (123.5 MHz) |
# -5 11.4 6.7 ns "-timing -t 91" ==> 6.694 ns (149.3 MHz) |
# -5 12.4 6.7 ns "-timing -t 57" ==> 6.751 ns (148.1 MHz) |
# -5 13.3 6.7 ns "-timing -t 19" ==> 6.749 ns (148.1 MHz) |
# -5 14.7 6.7 ns "-timing -t 59" ==> 6.689 ns (149.4 MHz) |
|
TESTED="11.4 12.4 13.3 14.4"; |
TESTED="11.4 12.4 13.3 14.4 14.7"; |
TEST_BENCHES=""; |
TEST_BENCHES+=" uc_led"; |
TEST_BENCHES+=" uc_peripherals"; |
/trunk/core/9x8/build/xilinx-xc6slx4/run
1,25 → 1,23
#!/bin/bash |
# Copyright 2013, Sinclair R.F., Inc. |
# Copyright 2013-2014, Sinclair R.F., Inc. |
# Test against Xilinx Spartan 6 and ISE. |
# |
# To replicate the performance runs (using uc_led) |
# - Minimum resource utilization, use -3 speed grade, 10 ns clock, XST and MAP |
# set for minimum area, and ISE 13.3 or 14.4 |
# - Maximum speed |
# -1L 11.4 9.0 ns "-timing -t 52" ==> 9.112 ns (109.7 MHz) |
# -1L 12.4 9.4 ns "-timing -t 52" ==> 9.434 ns (106.0 MHz) |
# -1L 13.3 9.5 ns "-timing -t 36" ==> 9.657 ns (103.6 MHz) |
# -1L 14.4 9.5 ns "-timing -t 36" ==> 9.657 ns (103.6 MHz) |
# -2 11.4 7.5 ns "-timing -t 50" ==> 7.948 ns (125.8 MHz) |
# -2 12.4 7.0 ns "-timing -t 6" ==> 7.544 ns (132.6 MHz) |
# -2 13.3 5.7 ns "-timing -t 69" ==> 5.682 ns (176.0 MHz) |
# -2 14.4 5.7 ns "-timing -t 69" ==> 5.682 ns (176.0 MHz) |
# -3 11.4 5.6 ns "-timing -t 50" ==> 5.700 ns (175.4 MHz) |
# -3 12.4 5.0 ns "-timing -t 28" ==> 5.064 ns (197.5 MHz) |
# -3 13.3 5.0 ns "-timing -t 12" ==> 5.000 ns (200.0 MHz) |
# -3 14.4 5.0 ns "-timing -t 12" ==> 5.000 ns (200.0 MHz) |
# - results for ISE 13.3 and 14.4 or 14.7 are identical |
# - Maximum speed (rounded down) |
# -1L 11.4 9.6 ns "-timing -t 53" ==> 9.575 ns (104.4 MHz) |
# -1L 12.4 9.6 ns "-timing -t 52" ==> 9.906 ns (100.9 MHz) |
# -1L 13.3 9.8 ns "-timing -t 8" ==> 9.844 ns (101.5 MHz) |
# -2 11.4 8.2 ns "-timing -t 37" ==> 8.166 ns (122.4 MHz) |
# -2 12.4 7.7 ns "-timing -t 56" ==> 7.955 ns (125.7 MHz) |
# -2 13.3 5.8 ns "-timing -t 80" ==> 5.781 ns (172.9 MHz) |
# -3 11.4 5.9 ns "-timing -t 45" ==> 5.927 ns (167.7 MHz) |
# -3 12.4 5.4 ns "-timing -t 63" ==> 5.162 ns (193.7 MHz) |
# -3 13.3 5.3 ns "-timing -t 14" ==> 5.162 ns (193.7 MHz) |
|
TESTED="11.4 12.4 13.3 14.4 14.5"; |
TESTED="11.4 12.4 13.3 14.7"; |
|
TEST_BENCHES=""; |
TEST_BENCHES+=" uc_combine_instr_ds"; |
/trunk/core/9x8/build/uc/.gitignore
1,3 → 1,5
*.9x8-meta |
*.mem |
*.v |
*_pkg.vhd |
adder_16bit.s |
/trunk/core/9x8/build/vivado-xc7/vivado/.gitignore
1,5 → 1,8
*.xpr |
.srcs |
build.cache |
build.runs |
build.srcs |
clock.xdc |
vivado*.jou |
vivado*.log |
/trunk/core/9x8/build/vivado-xc7/vivado/make
3,13 → 3,30
# Script to build the micro controller for various 7-series FPGA. |
# |
# Usage: |
# source /opt/Xilinx/Vivado/2014.1/setttings64.sh |
# export DEVICE=xc7a35t-3cpg236 |
# export PERIOD=100 |
# [export OPTART="-propconst -sweep -remap -resynth_area"] |
# ./make |
# ./make -v 2014.2 -d xc7a35t-3cpg236 -p 100 [-o "-propconst -sweep -remap -resynth_area"] |
# Note: See the pinouts directory for a list of available devices. |
|
while getopts "hd:o:p:v:" OPTNAME; do |
case ${OPTNAME} in |
( h ) echo "Usage: run [-t uc_name] [-v ISE_version]" > /dev/stderr; |
echo "Where:" > /dev/stderr; |
echo " uc_name is one of the .9x8 files in ../uc" > /dev/stderr; |
echo " ISE_version is an ISE version number" > /dev/stderr; |
exit 0;; |
( d ) DEVICE="${OPTARG}";; |
( o ) OPTPAR="${OPTARG}";; |
( p ) PERIOD="${OPTARG}";; |
( v ) VERSION="${OPTARG}";; |
esac |
done |
|
VERSION="/opt/Xilinx/Vivado/${VERSION}"; |
if [ ! -d "${VERSION}" ]; then |
echo "FATAL ERROR: \"${VERSION}\" not found" > /dev/stderr; |
exit 1; |
fi |
source "${VERSION}/settings64.sh"; |
|
# Ensure a version of vivado has been specified. |
|
if [ -z "`which vivado 2> /dev/null`" ]; then |
61,7 → 78,7
# Incorporate device-dependent pinout. |
read_xdc pinouts/${DEVICE}.xdc |
# Place and route the design. |
opt_design ${OPTARG} |
opt_design ${OPTPAR} |
place_design |
route_design |
# Performance reports. |
69,10 → 86,23
report_timing_summary |
EOF |
|
rm -rf clock.xdc; |
|
# |
# Extract desired performance statistics from the log file. |
# |
|
# TODO |
gawk ' |
BEGIN { usage_match=0; slack=999999; } |
/^1\. Slice Logic/ || /^2\. Slice Logic Distribution/ || /^3\. Memory/ {usage_match=1; next; } |
/^[0-9]\./ { usage_match=0; next; } |
usage_match && /Slice LUTs/ { LUTs=$5; } |
usage_match && ($2 == "Slice") { slices=$4; } |
usage_match && ($2 == "RAMB18") { ramb18=$4; } |
/^Slack/ { this_slack=1*$4; if (this_slack < slack) slack=this_slack; } |
END { |
print "*** Performance Statistics ***"; |
print "LUTs =",LUTs; |
print "Slices =",slices; |
print "Slack =",slack; |
print "RAMB18 =",ramb18; |
} |
' vivado.log |
/trunk/core/9x8/build/vivado-xc7/uc_led/make
24,10 → 24,12
; |
|
# Build the micro controller. |
../../../../../ssbcc \ |
-q \ |
--define-clog2 \ |
uc_led.9x8 \ |
../../../../../ssbcc \ |
-q \ |
--synth-instr-mem '(* ROM_STYLE="BLOCK" *)' \ |
--rand-instr-mem \ |
--define-clog2 \ |
uc_led.9x8 \ |
|| exit 1; |
|
# Generate the core. |
/trunk/core/9x8/build/vivado-xc7/README
1,15 → 1,23
Size and speed results for 7-Series family |
Note: The Zynq devices use the Artix-7 and Kintex-7 fabric. |
|
DEVICE and PERIOD are set before running vivado/make. Fastest speed is |
determined by the smallest PERIOD that doesn't fail timing. |
Fastest speed is determined by the smallest PERIOD that doesn't fail timing. |
The reported speed is rounded down from 1.e3/PERIOD. |
|
Use |
export OPTART="-propconst -sweep -remap -resynth_area" |
./make ... -p 100 -o "-propconst -sweep -remap -resynth_area" |
to reduce the resource utilization. |
|
Note: Vivado is not reliably generating a Block RAM for the opcode memory for |
the speed test. These tests need to be re-run when a work-around is implemented |
in the computer compiler. |
|
TOOL DEVICE PERIOD SPEED RESOURCE UTILIZATION |
Vivado 2014.1 xc7a35t-3cpg236 100 N/A 41 slices, 151 slice LUTs |
Vivado 2014.1 xc7a35t-3cpg236 3.16 316.5 68 slices, 237 slice LUTs |
Vivado 2014.1 xc7k70t-3fbg484 100 N/A 55 slices, 196 slice LUTs |
Vivado 2014.1 xc7k70t-3fbg484 2.11 473.9 72 slices, 223 slice LUTs |
Vivado 2014.1 xc7a35t-3cpg236 100 N/A 48 slices, 163 slice LUTs |
Vivado 2014.1 xc7a35t-3cpg236 3.02 333.1 68 slices, 220 slice LUTs |
Vivado 2014.1 xc7k70t-3fbg484 100 N/A 47 slices, 158 slice LUTs |
Vivado 2014.1 xc7k70t-3fbg484 2.04 490.1 64 slices, 206 slice LUTs |
Vivado 2014.2 xc7a35t-3cpg236 100 N/A 50 slices, 170 slice LUTs |
Vivado 2014.2 xc7a35t-3cpg236 3.05 327.8 69 slices, 227 slice LUTs |
Vivado 2014.2 xc7k70t-3fbg484 100 N/A 44 slices, 158 slice LUTs |
Vivado 2014.2 xc7k70t-3fbg484 2.00 500.0 67 slices, 221 slice LUTs |
/trunk/core/9x8/display_opcode.v
13,6 → 13,8
9'b00_0001_000 : s_opcode_name = "dup"; |
9'b00_0001_001 : s_opcode_name = "r@ "; |
9'b00_0001_010 : s_opcode_name = "ovr"; |
9'b00_0001_011 : s_opcode_name = "+c "; |
9'b00_0001_111 : s_opcode_name = "-c "; |
9'b00_0010_010 : s_opcode_name = "swp"; |
9'b00_0011_000 : s_opcode_name = "+ "; |
9'b00_0011_100 : s_opcode_name = "- "; |
/trunk/core/9x8/doc/opcodes.html
1,5 → 1,5
|
<!-- Copyright 2012, Sinclair R.F., Inc. --> |
<!-- Copyright 2012-2014, Sinclair R.F., Inc. --> |
<html> |
<title> |
Opcodes |
6,7 → 6,7
</title> |
<body> |
<h1>Opcodes for the 9x8 micro controller</h1><br/> |
Copyright 2012, Sinclair R.F., Inc.<br/><br/> |
Copyright 2012, 2014, Sinclair R.F., Inc.<br/><br/> |
This document describes the opcodes for the 9x8 micro controller. The first |
section lists the opcodes in alphabetic order, the second lists them by their |
numerical value, and the final section describes each one in detail.<br/><br/> |
19,9 → 19,11
Alphabetic listing: |
<a href="#&">&</a>, |
<a href="#+">+</a>, |
<a href="#+c">+c</a>, |
<a href="#-">-</a>, |
<a href="#-1<>">-1<></a>, |
<a href="#-1=">-1=</a>, |
<a href="#-c">-c</a>, |
<a href="#0<>">0<></a>, |
<a href="#0=">0=</a>, |
<a href="#0>>">0>></a>, |
110,6 → 112,14
<td>0</td><td>0</td><td>0</td><td>0</td><td>0</td><td>1</td><td>0</td><td>1</td><td>0</td> |
<td align="left">push a duplicate of the next-to-top of the data stack onto the data stack</td> |
</tr> |
<th align="left"><a href="#+c">+c</a></th> |
<td>0</td><td>0</td><td>0</td><td>0</td><td>0</td><td>1</td><td>0</td><td>1</td><td>1</td> |
<td align="left">push the carry bit from N+T onto the data stack</td> |
</tr> |
<th align="left"><a href="#-c">-c</a></th> |
<td>0</td><td>0</td><td>0</td><td>0</td><td>0</td><td>1</td><td>1</td><td>1</td><td>1</td> |
<td align="left">push the carry bit from N-T onto the data stack</td> |
</tr> |
<th align="left"><a href="#swap">swap</a></th> |
<td>0</td><td>0</td><td>0</td><td>0</td><td>1</td><td>0</td><td>0</td><td>1</td><td>0</td> |
<td align="left">swap the top and the next-to-top of the data stack</td> |
250,6 → 260,16
T ← N + T<br/> |
N ← <tt>stack--</tt><br/> |
<br/> |
<h2><a name="+c">Instruction: +c</a></h2> |
<b>Desription:</b> Push the carry bit from N+T onto the data |
stack.<br/><br/> |
<b>Operation:</b><br/><br/> |
PC ← PC+1<br/> |
R and <tt>return</tt> unchanged<br/> |
T ← the msb of the 9-bit sum of N+T<br/> |
N ← T<br/> |
++stack ← N<br/> |
<br/> |
<h2><a name="-">Instruction: -</a></h2> |
<b>Desription:</b> Pop the data stack and replace the top with the |
8 difference of the previous top and next-to-top.<br/><br/> |
277,6 → 297,16
T ← 0xFF if T=0xFF, 0x00 otherwise<br/> |
N and <tt>stack</tt> unchanged<br/> |
<br/> |
<h2><a name="-c">Instruction: -c</a></h2> |
<b>Desription:</b> Push the carry bit from N-T onto the data |
stack.<br/><br/> |
<b>Operation:</b><br/><br/> |
PC ← PC+1<br/> |
R and <tt>return</tt> unchanged<br/> |
T ← the msb of the 9-bit difference N-T<br/> |
N ← T<br/> |
++stack ← N<br/> |
<br/> |
<h2><a name="0<>">Instruction: 0<></a></h2> |
<b>Desription:</b> Set the top of the stack to all ones if the previous |
value was not all zeros, otherwise set it to all zeros.<br/><br/> |
/trunk/core/9x8/tb/core/core.s
71,6 → 71,10
; Test "call" and "callc" opcodes. |
.call(test_callc,3) drop |
|
; Test the carry bit operations |
0xBF 0x41 +c - +c - +c drop drop drop |
0x00 0x02 -c - -c - -c drop drop drop |
|
; Hang in an infinite loop. |
:infinite .jump(infinite) |
|
/trunk/core/9x8/tb/core/tb.v
17,7 → 17,7
initial begin |
repeat (5) @ (posedge s_clk); |
s_rst <= 1'b0; |
repeat (241) @ (posedge s_clk); |
repeat (261) @ (posedge s_clk); |
@ (negedge s_clk); // ensure $write's finish before the $finish is performed |
$finish; |
end |
/trunk/core/9x8/tb/core/tb.good
217,32 → 217,52
0ac 068 fetch0 : 1f 07 06 : 000 1f |
0ad 054 drop : 1e 00 07 : 000 1f |
0ae 054 drop : 1d 00 00 : 000 1f |
0af 1b6 push : 1e 00 b6 : 000 1f |
0af 1ca push : 1e 00 ca : 000 1f |
0b0 0c0 call : 1d 00 00 : 0b2 00 |
0b1 103 push : 1e 00 03 : 0b2 00 |
0b6 008 dup : 1f 03 03 : 0b2 00 |
0b7 05c 1- : 1f 03 02 : 0b2 00 |
0b8 1b6 push : 00 02 b6 : 0b2 00 |
0b9 0e0 callc : 1f 03 02 : 0bb 01 |
0ba 000 nop : 1f 03 02 : 0bb 01 |
0b6 008 dup : 00 02 02 : 0bb 01 |
0b7 05c 1- : 00 02 01 : 0bb 01 |
0b8 1b6 push : 01 01 b6 : 0bb 01 |
0b9 0e0 callc : 00 02 01 : 0bb 02 |
0ba 000 nop : 00 02 01 : 0bb 02 |
0b6 008 dup : 01 01 01 : 0bb 02 |
0b7 05c 1- : 01 01 00 : 0bb 02 |
0b8 1b6 push : 02 00 b6 : 0bb 02 |
0b9 0e0 callc : 01 01 00 : 0bb 02 |
0ba 000 nop : 01 01 00 : 0bb 02 |
0bb 028 return : 01 01 00 : 0bb 01 |
0bc 018 + : 00 02 01 : 0bb 01 |
0bb 028 return : 00 02 01 : 0b2 00 |
0bc 018 + : 1f 03 03 : 0b2 00 |
0bb 028 return : 1f 03 03 : 000 1f |
0bc 018 + : 1e 00 06 : 000 1f |
0ca 008 dup : 1f 03 03 : 0b2 00 |
0cb 05c 1- : 1f 03 02 : 0b2 00 |
0cc 1ca push : 00 02 ca : 0b2 00 |
0cd 0e0 callc : 1f 03 02 : 0cf 01 |
0ce 000 nop : 1f 03 02 : 0cf 01 |
0ca 008 dup : 00 02 02 : 0cf 01 |
0cb 05c 1- : 00 02 01 : 0cf 01 |
0cc 1ca push : 01 01 ca : 0cf 01 |
0cd 0e0 callc : 00 02 01 : 0cf 02 |
0ce 000 nop : 00 02 01 : 0cf 02 |
0ca 008 dup : 01 01 01 : 0cf 02 |
0cb 05c 1- : 01 01 00 : 0cf 02 |
0cc 1ca push : 02 00 ca : 0cf 02 |
0cd 0e0 callc : 01 01 00 : 0cf 02 |
0ce 000 nop : 01 01 00 : 0cf 02 |
0cf 028 return : 01 01 00 : 0cf 01 |
0d0 018 + : 00 02 01 : 0cf 01 |
0cf 028 return : 00 02 01 : 0b2 00 |
0d0 018 + : 1f 03 03 : 0b2 00 |
0cf 028 return : 1f 03 03 : 000 1f |
0d0 018 + : 1e 00 06 : 000 1f |
0b2 054 drop : 1d 00 00 : 000 1f |
0b3 1b3 push : 1e 00 b3 : 000 1f |
0b4 080 jump : 1d 00 00 : 000 1f |
0b5 000 nop : 1d 00 00 : 000 1f |
0b3 1b3 push : 1e 00 b3 : 000 1f |
0b3 1bf push : 1e 00 bf : 000 1f |
0b4 141 push : 1f bf 41 : 000 1f |
0b5 00b +c : 00 41 01 : 000 1f |
0b6 01c - : 1f bf 40 : 000 1f |
0b7 00b +c : 00 40 00 : 000 1f |
0b8 01c - : 1f bf 40 : 000 1f |
0b9 00b +c : 00 40 00 : 000 1f |
0ba 054 drop : 1f bf 40 : 000 1f |
0bb 054 drop : 1e 00 bf : 000 1f |
0bc 054 drop : 1d 00 00 : 000 1f |
0bd 100 push : 1e 00 00 : 000 1f |
0be 102 push : 1f 00 02 : 000 1f |
0bf 00f -c : 00 02 01 : 000 1f |
0c0 01c - : 1f 00 01 : 000 1f |
0c1 00f -c : 00 01 01 : 000 1f |
0c2 01c - : 1f 00 00 : 000 1f |
0c3 00f -c : 00 00 00 : 000 1f |
0c4 054 drop : 1f 00 00 : 000 1f |
0c5 054 drop : 1e 00 00 : 000 1f |
0c6 054 drop : 1d 00 00 : 000 1f |
0c7 1c7 push : 1e 00 c7 : 000 1f |
0c8 080 jump : 1d 00 00 : 000 1f |
0c9 000 nop : 1d 00 00 : 000 1f |
0c7 1c7 push : 1e 00 c7 : 000 1f |
/trunk/core/9x8/display_trace.v
37,6 → 37,8
9'b00_0001_000 : s_opcode_name = "dup "; |
9'b00_0001_001 : s_opcode_name = "r@ "; |
9'b00_0001_010 : s_opcode_name = "over "; |
9'b00_0001_011 : s_opcode_name = "+c "; |
9'b00_0001_111 : s_opcode_name = "-c "; |
9'b00_0010_010 : s_opcode_name = "swap "; |
9'b00_0011_000 : s_opcode_name = "+ "; |
9'b00_0011_100 : s_opcode_name = "- "; |
/trunk/core/9x8/core.v
1,6 → 1,6
/******************************************************************************* |
* |
* Copyright 2012-2013, Sinclair R.F., Inc. |
* Copyright 2012-2014, Sinclair R.F., Inc. |
* |
* SSBCC.9x8 -- Small Stack Based Computer Compiler, 9-bit opcode, 8-bit data. |
* |
39,11 → 39,12
|
/******************************************************************************* |
* |
* Instantiate the ALU operations. These are listed in the order in which they |
* first occur in the opcodes. |
* Instantiate the ALU operations. These are listed in order by opcode. |
* |
******************************************************************************/ |
|
reg [8:0] s_T_adder; |
|
// opcode = 000000_xxx |
// shifter operations (including "nop" as no shift) |
// 6-input LUT formulation -- 3-bit opcode, 3 bits of T centered at current bit |
66,26 → 67,27
reg [7:0] s_T_stack; |
always @ (*) |
case (s_opcode[0+:2]) |
2'b00 : s_T_stack = s_T; // dup |
2'b01 : s_T_stack = s_R[0+:8]; // r@ |
2'b10 : s_T_stack = s_N; // over |
2'b00 : s_T_stack = s_T; // dup |
2'b01 : s_T_stack = s_R[0+:8]; // r@ |
2'b10 : s_T_stack = s_N; // over |
2'b11 : s_T_stack = { 7'd0, s_T_adder[8] }; // +/-c |
default : s_T_stack = s_T; |
endcase |
|
// opcode = 000011_x00 (adder) and 001xxx_x.. (incrementers) |
reg [7:0] s_T_adder; |
always @ (*) |
if (s_opcode[6] == 1'b0) |
case (s_opcode[2]) |
1'b0: s_T_adder = s_N + s_T; |
1'b1: s_T_adder = s_N - s_T; |
1'b0: s_T_adder = { 1'b0, s_N } + { 1'b0, s_T }; |
1'b1: s_T_adder = { 1'b0, s_N } - { 1'b0, s_T }; |
endcase |
else |
else begin |
case (s_opcode[2]) |
1'b0: s_T_adder = s_T + 8'h01; |
1'b1: s_T_adder = s_T - 8'h01; |
default: s_T_adder = s_T + 8'h01; |
1'b0: s_T_adder = { 1'b0, s_T } + 9'h01; |
1'b1: s_T_adder = { 1'b0, s_T } - 9'h01; |
default: s_T_adder = { 1'b0, s_T } + 9'h01; |
endcase |
end |
|
// opcode = 000100_0xx |
// ^ 0 ==> "=", 1 ==> "<>" |
218,7 → 220,7
end else case (s_opcode[3+:4]) |
4'b0000: // nop, math_rotate |
; |
4'b0001: begin // dup, r@, over |
4'b0001: begin // dup, r@, over, +/-c |
s_bus_t = C_BUS_T_PRE; |
s_bus_n = C_BUS_N_T; |
s_stack = C_STACK_INC; |
401,7 → 403,7
C_BUS_T_OPCODE: s_T_pre = s_opcode[0+:8]; // push 8-bit value |
C_BUS_T_N: s_T_pre = s_N; |
C_BUS_T_PRE: s_T_pre = s_T_stack; |
C_BUS_T_ADDER: s_T_pre = s_T_adder; |
C_BUS_T_ADDER: s_T_pre = s_T_adder[0+:8]; |
C_BUS_T_COMPARE: s_T_pre = {(8){s_T_compare}}; |
C_BUS_T_INPORT: s_T_pre = s_T_inport; |
C_BUS_T_LOGIC: s_T_pre = s_T_logic; |
/trunk/core/9x8/asmDef_9x8.py
1221,9 → 1221,11
self.instructions = dict(list=list(), opcode=list()); |
self.AddInstruction('&', 0x050); |
self.AddInstruction('+', 0x018); |
self.AddInstruction('+c', 0x00B); |
self.AddInstruction('-', 0x01C); |
self.AddInstruction('-1<>', 0x023); |
self.AddInstruction('-1=', 0x022); |
self.AddInstruction('-c', 0x00F); |
self.AddInstruction('0<>', 0x021); |
self.AddInstruction('0=', 0x020); |
self.AddInstruction('0>>', 0x004); |
/trunk/README
20,7 → 20,7
The features are: |
- high speed, low fabric utilization |
- vendor-independent Verilog output with a VHDL package file |
- simple Forth-like assembly language (41 instructions) |
- simple Forth-like assembly language (43 instructions) |
- single cycle instruction execution |
- automatic generation of I/O ports |
- configurable instruction, data stack, return stack, and memory utilization |
50,6 → 50,7
|
The instructions are all single-cycle. The instructions include |
- 4 arithmetic instructions: addition, subtraction, increment, and decrement |
- 2 carry bit instructions: +c and -c for addition and subtraction respectively |
- 3 bit-wise logical instructions: and, or, and exclusive or |
- 7 shift and rotation instructions: <<0, <<1, 0>>, 1>>, <<msb, >>msb, and >>lsb |
- 4 logical instructions: 0=, 0<>, -1=, -1<> |
90,11 → 91,9
SPEED AND RESOURCE UTILIZATION |
================================================================================ |
These device speed and resource utilization results are copied from the build |
tests. The full results are listed in core/9x8/build/uc/uc_led.9x8 which |
represents a minimal processor implementation (clock, reset, and one output). |
See the uc_peripherals.9x8 file for results for a more complicated |
implementation. Device-specific scripts state how these performance numbers |
were obtained. |
tests. The full results are listed in the core/9x8/build directories. The |
tests use a minimal processor implementation (clock, reset, and one output). |
Device-specific scripts state how these performance numbers were obtained. |
|
VENDOR DEVICE BEST SPEED SMALLEST RESOURCE UTILIZATION |
------ ------ ---------- ------------------------------- |
103,10 → 102,10
Altera Stratix-V 372.9 MHz 198 ALUTs (preliminary) |
Lattice LCMXO2-640ZE-3 98.4 MHz 206 LUTs (preliminary) |
Lattice LFE2-6E-7 157.9 MHz 203 LUTs (preliminary) |
Xilinx Artix-7 316.5 MHz 151 slice LUTs (41 slices) |
Xilinx Kintex-7 473.9 MHz 196 slice LUTs (55 slices) |
Xilinx Spartan-3A 148.3 MHz 231 4-input LUTs (130 slices) |
Xilinx Spartan-6 200.0 MHz 120 Slice LUTs (36 slices) |
Xilinx Artix-7 TBD 163 slice LUTs (48 slices) |
Xilinx Kintex-7 TBD 158 slice LUTs (44 slices) |
Xilinx Spartan-3A 149.4 MHz 232 4-input LUTs (129 slices) |
Xilinx Spartan-6 193.7 MHz 124 Slice LUTs (34 slices) |
Xilinx Virtex-6 275.7 MHz 122 Slice LUTs (38 slices) (p.) |
|
Disclaimer: Like other embedded processors, these are the maximum performance |
381,7 → 380,7
INSTRUCTIONS |
================================================================================ |
|
The 41 instructions are as follows (see core/9x8/doc/opcodes.html for detailed |
The 43 instructions are as follows (see core/9x8/doc/opcodes.html for detailed |
descriptions). Here, T is the top of the data stack, N is the next-to-top of |
the data stack, and R is the top of the return stack. All of these are the |
values at the start of the instruction. |
399,6 → 398,12
^ bitwise exclusive or of N and T |
or bitwise or of N and T |
|
Push the carry bit for addition or subtraction onto the data stack (see |
lib/9x8/math.s for examples on using +c and -c for multi-byte arithmetic): |
|
+c carry bit for N+T |
-c carry bit for N-T |
|
Increment and decrement replace the top of the data stack with the stated |
result. |
|